@oberion/wildo 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,232 @@
1
+ type PipelineMode = "greenfield" | "feature" | "bugfix";
2
+ type ReviewVerdict = "PASS" | "FAIL";
3
+ type EscalationType = "spec_issue" | "blocked" | "user";
4
+ interface ReviewResult {
5
+ verdict: ReviewVerdict;
6
+ rawOutput: string;
7
+ blockingIssues: string[];
8
+ roundNumber: number;
9
+ }
10
+ interface EscalationReport {
11
+ escalationType: EscalationType;
12
+ phase: string;
13
+ roundsCompleted: number;
14
+ history: ReviewResult[];
15
+ diagnosis: string;
16
+ }
17
+ interface SessionUsage {
18
+ agentName: string;
19
+ contextWindow: number;
20
+ totalInputTokens: number;
21
+ totalOutputTokens: number;
22
+ seenMessageIds: Set<string>;
23
+ }
24
+ type AgentRole = "architect" | "reviewer" | "reporter" | "developer" | "tester";
25
+ interface AgentConfig {
26
+ systemPrompt: string;
27
+ tools: string[];
28
+ model: string;
29
+ }
30
+ type RunStatus = "running" | "completed" | "stopped" | "failed" | "crashed";
31
+ interface RunManifest {
32
+ runId: string;
33
+ mode: PipelineMode;
34
+ branch: string;
35
+ language: string;
36
+ status: RunStatus;
37
+ workingDir: string;
38
+ requirement: string;
39
+ acknowledged?: boolean;
40
+ }
41
+ interface CleanupDecision {
42
+ runId: string;
43
+ action: "keep" | "discard";
44
+ branch?: string;
45
+ }
46
+ type AuthMode = "subscription" | "api_key";
47
+ interface ProviderAuth {
48
+ authMode: AuthMode;
49
+ apiKey?: string;
50
+ }
51
+ interface AuthConfig {
52
+ claude: ProviderAuth;
53
+ codex: ProviderAuth;
54
+ }
55
+ interface ModelConfig {
56
+ architect: string;
57
+ reviewer: string;
58
+ reporter: string;
59
+ developer: string;
60
+ tester: string;
61
+ orchestrator: string;
62
+ }
63
+ interface PipelineConfig {
64
+ auth: AuthConfig;
65
+ models: ModelConfig;
66
+ maxAgentTurns: number;
67
+ maxReviewRounds: number;
68
+ contextHandoffThreshold: number;
69
+ codexIdleTimeout: number;
70
+ permissionMode: string;
71
+ }
72
+ type PipelineEventType = "agent_start" | "agent_done" | "content_block" | "timeline" | "confirm_request" | "cleanup_request" | "pipeline_done" | "pipeline_error";
73
+ interface PipelineEvent {
74
+ type: PipelineEventType;
75
+ agent?: string;
76
+ phase?: string;
77
+ data?: unknown;
78
+ }
79
+ interface PreflightSummary {
80
+ workingDir: string;
81
+ mode: PipelineMode;
82
+ language: string;
83
+ requirement: string;
84
+ hasExistingFiles: boolean;
85
+ }
86
+
87
+ /**
88
+ * Orchestrator: TypeScript control flow driving the multi-agent pipeline.
89
+ *
90
+ * Agents are workers invoked by deterministic code. State transitions depend on
91
+ * agent outputs (reviewer PASS/FAIL, escalation diagnosis), not on LLM decisions.
92
+ *
93
+ * Claude agents use @anthropic-ai/claude-agent-sdk SDK (direct query sessions).
94
+ * Codex agents use @openai/codex-sdk (direct API, no MCP dispatcher).
95
+ */
96
+
97
+ declare class PipelineStopped extends Error {
98
+ constructor();
99
+ }
100
+ declare class UserInterventionRequired extends Error {
101
+ phase: string;
102
+ diagnosis: string;
103
+ constructor(phase: string, diagnosis: string, reportsDir?: string);
104
+ }
105
+ declare class Orchestrator {
106
+ readonly requirement: string;
107
+ readonly mode: PipelineMode;
108
+ readonly lang: string;
109
+ readonly workingDir: string;
110
+ private config;
111
+ private sessions;
112
+ private reviewHistories;
113
+ private lastQueryTime;
114
+ private originalBranch;
115
+ private workBranch;
116
+ private onEvent;
117
+ private stopped;
118
+ private workflowRoot;
119
+ private knowledgeDir;
120
+ private modulesDir;
121
+ private runsDir;
122
+ private runId;
123
+ private runDir;
124
+ private specsDir;
125
+ private plansDir;
126
+ private reportsDir;
127
+ private handoffsDir;
128
+ private roundsDir;
129
+ private confirmCallback;
130
+ private cleanupCallback;
131
+ constructor(requirement: string, workingDir: string, mode?: PipelineMode, lang?: string);
132
+ setEventHandler(handler: (event: PipelineEvent) => void): void;
133
+ setConfirmCallback(cb: (summary: string, warning: string) => Promise<boolean>): void;
134
+ setCleanupCallback(cb: (crashed: RunInfo[]) => Promise<Record<string, "keep" | "discard">>): void;
135
+ stop(): void;
136
+ abort(): void;
137
+ /** Apply API keys from config to environment so SDKs pick them up. */
138
+ private applyAuth;
139
+ run(): Promise<string>;
140
+ private emit;
141
+ private checkStopped;
142
+ private phase;
143
+ private describeTool;
144
+ private emitContentBlocks;
145
+ private timeline;
146
+ private get langInstruction();
147
+ private cooldown;
148
+ private markQueryDone;
149
+ private initRunDir;
150
+ private writeManifest;
151
+ private updateRunsIndex;
152
+ private finalizeRun;
153
+ private scanCrashedRuns;
154
+ private collectCleanupDecisions;
155
+ private applyCleanupDecisions;
156
+ private askCleanup;
157
+ private discardRun;
158
+ private acknowledgeRun;
159
+ private removeFromIndex;
160
+ private updateIndexStatus;
161
+ private preflightConfirm;
162
+ private callClaude;
163
+ private handoffClaude;
164
+ /**
165
+ * Call a producer agent (developer/tester) using the SDK matching its
166
+ * configured model. Claude models → Claude Agent SDK, OpenAI models → Codex SDK.
167
+ */
168
+ private callAgent;
169
+ private callCodexAgent;
170
+ private getOrCreateSession;
171
+ private archiveRound;
172
+ private reviewLoop;
173
+ private rollbackToArchitect;
174
+ private formatReviewHistory;
175
+ private report;
176
+ private extractExperience;
177
+ private hasKnowledge;
178
+ private initKnowledge;
179
+ private loadKnowledgeContext;
180
+ private updateKnowledge;
181
+ private runImplementation;
182
+ private finalize;
183
+ private runGreenfield;
184
+ private runFeature;
185
+ private runBugfix;
186
+ }
187
+ interface RunInfo {
188
+ runId: string;
189
+ mode: string;
190
+ branch: string;
191
+ requirement: string;
192
+ status: string;
193
+ }
194
+
195
+ interface UserConfigFile {
196
+ lang?: string;
197
+ theme?: "dark" | "light" | "auto";
198
+ auth?: {
199
+ claude?: Partial<ProviderAuth>;
200
+ codex?: Partial<ProviderAuth>;
201
+ };
202
+ models?: Partial<ModelConfig>;
203
+ maxAgentTurns?: number;
204
+ maxReviewRounds?: number;
205
+ contextHandoffThreshold?: number;
206
+ codexIdleTimeout?: number;
207
+ permissionMode?: string;
208
+ }
209
+ /** Detect available auth sources for each provider. */
210
+ declare function detectAuthSources(): {
211
+ claude: {
212
+ hasEnvKey: boolean;
213
+ hasCli: boolean;
214
+ envKey?: string;
215
+ };
216
+ codex: {
217
+ hasEnvKey: boolean;
218
+ hasCli: boolean;
219
+ envKey?: string;
220
+ };
221
+ };
222
+ /** Check if first-run setup has been completed. */
223
+ declare function isConfigured(): boolean;
224
+ /** Save config to ~/.wildo/config.json. */
225
+ declare function saveConfig(config: UserConfigFile): void;
226
+ /**
227
+ * Load pipeline config: defaults ← ~/.wildo/config.json ← env vars.
228
+ * Later sources override earlier ones.
229
+ */
230
+ declare function loadConfig(): Promise<PipelineConfig>;
231
+
232
+ export { type AgentConfig, type AgentRole, type AuthConfig, type AuthMode, type CleanupDecision, type EscalationReport, type EscalationType, type ModelConfig, Orchestrator, type PipelineConfig, type PipelineEvent, type PipelineEventType, type PipelineMode, PipelineStopped, type PreflightSummary, type ReviewResult, type ReviewVerdict, type RunManifest, type RunStatus, type SessionUsage, UserInterventionRequired, detectAuthSources, isConfigured, loadConfig, saveConfig };
package/dist/index.js ADDED
@@ -0,0 +1 @@
1
+ import{a as o,b as r,c as p}from"./chunk-JXNI22FR.js";import{b as e,c as i,h as t,i as n}from"./chunk-TMJX67JD.js";import"./chunk-TOAEOZEP.js";export{p as Orchestrator,o as PipelineStopped,r as UserInterventionRequired,e as detectAuthSources,i as isConfigured,n as loadConfig,t as saveConfig};
package/dist/server.js ADDED
@@ -0,0 +1 @@
1
+ import{a as w,b as S,c as v}from"./chunk-JXNI22FR.js";import{b as f,c as g,h as y,k as h}from"./chunk-TMJX67JD.js";import"./chunk-TOAEOZEP.js";import{createServer as x}from"http";import{execFile as R}from"child_process";import{readFileSync as j,existsSync as k}from"fs";import{join as P,extname as I}from"path";import{WebSocketServer as N,WebSocket as T}from"ws";var c=8420,J=P(import.meta.dirname,"..","static"),r=null,d=!1,u=new Set,a=null,l=null,b=null;function i(t){let e=JSON.stringify(t);for(let n of u)n.readyState===T.OPEN&&n.send(e)}var H={".html":"text/html; charset=utf-8",".css":"text/css",".js":"application/javascript",".json":"application/json",".png":"image/png",".svg":"image/svg+xml",".ico":"image/x-icon"},m=x((t,e)=>{if(t.url==="/api/setup/status"){e.writeHead(200,{"Content-Type":"application/json"}),e.end(JSON.stringify({configured:g(),sources:f()}));return}if(t.url==="/api/setup/save"&&t.method==="POST"){let p="";t.on("data",s=>{p+=s}),t.on("end",()=>{try{let s=JSON.parse(p);y({auth:s.auth,lang:s.lang}),e.writeHead(200,{"Content-Type":"application/json"}),e.end(JSON.stringify({ok:!0}))}catch{e.writeHead(400,{"Content-Type":"application/json"}),e.end(JSON.stringify({error:"Invalid JSON"}))}});return}if(t.url?.startsWith("/api/i18n")){let s=new URL(t.url,"http://localhost").searchParams.get("lang")??"en";e.writeHead(200,{"Content-Type":"application/json"}),e.end(JSON.stringify(h(s)));return}if(t.url==="/api/config"){e.writeHead(200,{"Content-Type":"application/json"}),e.end(JSON.stringify(b??{}));return}let n=t.url==="/"?"/index.html":t.url??"/index.html";if(n=n.split("?")[0],n=P(J,n),!k(n)){e.writeHead(404),e.end("Not Found");return}let o=I(n),C=H[o]??"application/octet-stream",O=j(n);e.writeHead(200,{"Content-Type":C}),e.end(O)}),_=new N({server:m});_.on("connection",t=>{u.add(t),t.on("close",()=>u.delete(t)),t.on("message",e=>{let n;try{n=JSON.parse(e.toString())}catch{return}let o=n.action;o==="start"?W(n):o==="confirm"?a&&(a(n.confirmed??!1),a=null):o==="cleanup"?l&&(l(n.decisions??{}),l=null):o==="stop"?r?.stop():o==="abort"&&r?.abort()})});async function M(t,e){return i({type:"confirm_request",summary:t,warning:e}),new Promise(n=>{a=n})}async function U(t){return i({type:"cleanup_request",crashed:t.map(e=>({run_id:e.runId,mode:e.mode,branch:e.branch,requirement:e.requirement,status:e.status}))}),new Promise(e=>{l=e})}async function W(t){if(d){i({type:"error",message:"Pipeline already running"});return}let e=t.mode??"greenfield";r=new v(t.requirement??"",t.working_dir??".",e,t.lang??"en"),r.setEventHandler(n=>i(n)),r.setConfirmCallback(M),r.setCleanupCallback(U),d=!0;try{let n=await r.run();i({type:"pipeline_done",report:n})}catch(n){n instanceof w?i({type:"pipeline_stopped"}):n instanceof S?i({type:"pipeline_paused",phase:n.phase,diagnosis:n.diagnosis}):i({type:"error",message:String(n)})}finally{d=!1}}function D(t){let e=process.platform==="darwin"?"open":process.platform==="win32"?"cmd":"xdg-open",n=process.platform==="win32"?["/c","start","",t]:[t];R(e,n,()=>{})}function K(t){b=t,m.listen(c,()=>{let e=`http://localhost:${c}`;console.log(`wildo pipeline UI: ${e}`),D(e)})}import.meta.url===`file://${process.argv[1]}`&&m.listen(c,()=>{console.log(`wildo pipeline UI: http://localhost:${c}`)});export{m as httpServer,K as startWebUI};
@@ -0,0 +1,14 @@
1
+ import{b as x,e as L,h as y,j as t}from"./chunk-TMJX67JD.js";import{a as r,d as k,e as A,f,g as l,h as $,i as d,j as M,k as w,m as D}from"./chunk-TOAEOZEP.js";async function P(e,s,o){f(s),process.stdout.write(`
2
+ `),o.hasCli&&d(`${r.green}${t(e,"setup_cli_found")}${r.reset}`),o.hasEnvKey&&d(`${r.green}${t(e,"setup_env_found")}${r.reset} ${r.darkGray}(${o.envKey})${r.reset}`),!o.hasCli&&!o.hasEnvKey&&w(t(e,"setup_no_auth")),process.stdout.write(`
3
+ `);let n=[];o.hasCli&&n.push({value:"subscription",label:t(e,"setup_opt_sub"),hint:t(e,"setup_opt_sub_hint")}),o.hasEnvKey&&n.push({value:"env_key",label:t(e,"setup_opt_env"),hint:o.envKey}),n.push({value:"new_key",label:t(e,"setup_opt_new")});let u=await l({message:`${s} auth`,options:n});if(u==="subscription")return{authMode:"subscription"};if(u==="env_key")return{authMode:"api_key"};let i=await $({message:t(e,"setup_enter_key"),placeholder:"sk-...",password:!0});return i?{authMode:"api_key",apiKey:i}:(w(t(e,"setup_no_key")),{authMode:"api_key"})}var O=["opus","sonnet","haiku"],S=["gpt-5.4"],z=[...O,...S],H={architect:"opus",reviewer:"sonnet",reporter:"haiku",developer:"gpt-5.4",tester:"gpt-5.4"},I={architect:"opus",reviewer:"sonnet",reporter:"haiku",developer:"opus",tester:"opus"},N={architect:"gpt-5.4",reviewer:"gpt-5.4",reporter:"gpt-5.4",developer:"gpt-5.4",tester:"gpt-5.4"},R={en:{architect:"Architect",reviewer:"Reviewer",reporter:"Reporter",developer:"Developer",tester:"Tester"},zh:{architect:"\u67B6\u6784\u5E08",reviewer:"\u8BC4\u5BA1\u5458",reporter:"\u62A5\u544A\u5458",developer:"\u5F00\u53D1\u8005",tester:"\u6D4B\u8BD5\u5458"}};async function X(e="en",s){let o=L(),n=s==="claude_only"?I:s==="codex_only"?N:H,u=s==="claude_only"?O:s==="codex_only"?S:z;process.stdout.write(`
4
+ `),f(t(e,"setup_model_title")),process.stdout.write(`
5
+ `);let i=["architect","reviewer","reporter","developer","tester"],p={},_=t(e,"setup_model_default"),g=t(e,"setup_model_current");for(let a of i){let K=(R[e]??R.en)[a]??a,b=n[a],h=o[a],U=h??b,v=[];for(let c of u){let C=h&&c===h,E=c===b,T=C&&E?`${_} \xB7 ${g}`:C?g:E?_:void 0;v.push({value:c,label:c,hint:T})}let m=v.findIndex(c=>c.value===U),F=await l({message:t(e,"setup_model_msg",{role:K}),options:v,initial:m>=0?m:0});p[a]=F}y({models:p}),process.stdout.write(`
6
+ `),M(t(e,"setup_model_done"))}async function G(){process.stdout.write(`
7
+ `),k();let e=await l({message:"Language / \u8BED\u8A00",options:[{value:"en",label:"English"},{value:"zh",label:"\u4E2D\u6587"}]});e==="zh"&&(process.stdout.write(`
8
+ `),A()),process.stdout.write(`
9
+ `),d(t(e,"setup_intro")),process.stdout.write(`
10
+ `);let s=await l({message:t(e,"setup_provider_msg"),options:[{value:"both",label:t(e,"setup_provider_both"),hint:t(e,"setup_provider_both_hint")},{value:"claude_only",label:t(e,"setup_provider_claude_only"),hint:t(e,"setup_provider_claude_only_hint")},{value:"codex_only",label:t(e,"setup_provider_codex_only"),hint:t(e,"setup_provider_codex_only_hint")}]});process.stdout.write(`
11
+ `);let o=x(),n=s!=="codex_only",u=s!=="claude_only",i={authMode:"subscription"},p={authMode:"subscription"};n&&(i=await P(e,"Claude",o.claude),process.stdout.write(`
12
+ `)),u&&(p=await P(e,"Codex",o.codex),process.stdout.write(`
13
+ `)),y({lang:e,auth:{claude:i,codex:p}}),await X(e,s==="both"?void 0:s),process.stdout.write(`
14
+ `),D(t(e,"setup_saved"),`${r.gray}${t(e,"setup_reconfig")}${r.reset}`)}export{G as runSetup,X as setupModels};
@@ -0,0 +1 @@
1
+ import{a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u}from"./chunk-TOAEOZEP.js";export{b as ROLE_COLORS,p as agentCard,o as agentStatus,d as banner,e as bannerZh,a as c,t as configSummary,f as divider,l as error,i as info,m as note,n as phaseHeader,s as pipelineDone,q as reviewRound,c as roleColor,g as select,u as spinner,r as statusLine,j as success,h as text,k as warn};
package/package.json ADDED
@@ -0,0 +1,45 @@
1
+ {
2
+ "name": "@oberion/wildo",
3
+ "version": "0.6.4",
4
+ "description": "Will do! Multi-agent pipeline — AI agents at your service",
5
+ "type": "module",
6
+ "bin": {
7
+ "wildo": "./dist/cli.js"
8
+ },
9
+ "main": "./dist/index.js",
10
+ "types": "./dist/index.d.ts",
11
+ "files": [
12
+ "dist",
13
+ "personas",
14
+ "static"
15
+ ],
16
+ "scripts": {
17
+ "build": "tsup",
18
+ "dev": "tsup --watch",
19
+ "start": "node dist/cli.js"
20
+ },
21
+ "keywords": [
22
+ "wildo",
23
+ "ai",
24
+ "agents",
25
+ "claude",
26
+ "codex",
27
+ "pipeline"
28
+ ],
29
+ "license": "MIT",
30
+ "engines": {
31
+ "node": ">=18.0.0"
32
+ },
33
+ "dependencies": {
34
+ "@anthropic-ai/claude-agent-sdk": "^0.2.0",
35
+ "@openai/codex-sdk": "^0.120.0",
36
+ "commander": "^13.0.0",
37
+ "ws": "^8.18.0"
38
+ },
39
+ "devDependencies": {
40
+ "@types/node": "^22.0.0",
41
+ "@types/ws": "^8.5.0",
42
+ "tsup": "^8.0.0",
43
+ "typescript": "^5.7.0"
44
+ }
45
+ }
@@ -0,0 +1,74 @@
1
+ # Architect Playbook
2
+
3
+ You are a senior software architect. Your job is to turn user requirements
4
+ into clear, actionable specification documents.
5
+
6
+ ## Spec Structure
7
+
8
+ Your spec must define:
9
+ - Functional requirements (what the system does)
10
+ - Non-functional requirements (performance, security, scalability)
11
+ - System components and their interfaces
12
+ - Data models and API contracts
13
+ - Acceptance criteria for each feature
14
+
15
+ Output spec to: `workflow/specs/spec.md`
16
+
17
+ ## Design Process
18
+
19
+ ### 1. Requirements Analysis
20
+ - Decompose the requirement into functional and non-functional aspects
21
+ - Identify implicit requirements the user didn't state but will expect
22
+ - Define acceptance criteria that are testable and unambiguous
23
+
24
+ ### 2. Component Design
25
+ - Define clear component boundaries and responsibilities
26
+ - Specify interfaces and contracts precisely — ambiguity causes cascading errors downstream
27
+ - Document data flow between components
28
+
29
+ ### 3. Trade-Off Analysis
30
+ For each significant design decision, document:
31
+ - **Decision**: What was chosen
32
+ - **Rationale**: Why this option
33
+ - **Alternatives considered**: What else was evaluated
34
+ - **Trade-offs**: What we gain vs. what we give up
35
+
36
+ ## System Design Checklist
37
+
38
+ ### Functional
39
+ - [ ] All user stories documented
40
+ - [ ] API contracts defined (endpoints, request/response shapes)
41
+ - [ ] Data models specified (entities, relationships, constraints)
42
+ - [ ] Error cases and edge conditions covered
43
+
44
+ ### Non-Functional
45
+ - [ ] Performance targets defined (latency, throughput)
46
+ - [ ] Security requirements identified (auth, data protection)
47
+ - [ ] Scalability considerations noted (if relevant)
48
+
49
+ ### Technical
50
+ - [ ] Component responsibilities defined
51
+ - [ ] Data flow documented
52
+ - [ ] Integration points identified
53
+ - [ ] Testing strategy outlined (what to test, how)
54
+
55
+ ### Operations
56
+ - [ ] Deployment considerations noted
57
+ - [ ] Monitoring requirements identified (if applicable)
58
+
59
+ ## Rules
60
+
61
+ - Define WHAT to build, not HOW to implement it. Leave implementation decisions to the developer.
62
+ - Be precise on interfaces and contracts — ambiguity causes cascading errors downstream.
63
+ - Every requirement must have a testable acceptance criterion.
64
+ - When you receive review feedback, address each point specifically. If you disagree, explain your reasoning rather than silently ignoring it.
65
+
66
+ ## Red Flags — Avoid These
67
+
68
+ - **Over-specification**: Prescribing frameworks, algorithms, or implementation details
69
+ - **Vague criteria**: "should be fast", "user-friendly", "scalable" without measurable targets
70
+ - **Missing error cases**: Only describing happy paths
71
+ - **Assumed context**: Relying on knowledge the developer won't have
72
+ - **God component**: One component doing everything
73
+ - **Tight coupling**: Components with circular or deep dependencies
74
+ - **Premature optimization**: Adding complexity for hypothetical scale
@@ -0,0 +1,19 @@
1
+ # Developer Playbook
2
+
3
+ You are a senior software developer. Your job is to implement production-quality
4
+ code based on specifications and architecture plans.
5
+
6
+ ## Approach
7
+
8
+ - Read the spec and plan thoroughly before writing code
9
+ - Follow the project's existing conventions (language, style, structure)
10
+ - Write clean, well-structured code — no dead code, no TODO placeholders
11
+ - Handle errors at system boundaries; trust internal invariants
12
+ - Use existing dependencies; only add new ones when justified by the spec
13
+
14
+ ## Output
15
+
16
+ - Create/modify files as specified in the plan
17
+ - Ensure the code compiles/runs without errors
18
+ - Run any build commands to verify your work
19
+ - If a test suite exists, make sure existing tests still pass
@@ -0,0 +1,38 @@
1
+ # Reporter Playbook
2
+
3
+ You are a technical report writer. After each development phase completes,
4
+ you produce a concise phase report.
5
+
6
+ ## Report Structure
7
+
8
+ ```
9
+ # Phase Report: [Phase Name]
10
+ Date: [current date]
11
+
12
+ ## Summary
13
+ [2-3 sentences: what was accomplished]
14
+
15
+ ## Key Decisions
16
+ - [Decision]: [Rationale]
17
+
18
+ ## Artifacts Produced
19
+ - [file path]: [description]
20
+
21
+ ## Review Rounds
22
+ - Round N: [PASS/FAIL] - [key feedback points]
23
+
24
+ ## Issues & Risks
25
+ - [Any unresolved concerns]
26
+
27
+ ## Next Phase
28
+ - [What comes next]
29
+ ```
30
+
31
+ Output reports to: `workflow/reports/`
32
+
33
+ ## Rules
34
+
35
+ - Be factual, not promotional. Report what happened, not what should have happened.
36
+ - Include review round history — how many rounds, what changed.
37
+ - Keep it under 100 lines.
38
+ - Use the actual review history data provided. Do not fabricate round details.
@@ -0,0 +1,109 @@
1
+ # Reviewer Playbook
2
+
3
+ You are a rigorous technical reviewer. You review artifacts produced by other
4
+ agents at each phase of the development lifecycle.
5
+
6
+ ## Review Process
7
+
8
+ 1. Read the artifact being reviewed
9
+ 2. Read the relevant spec/plan for context: `workflow/specs/`, `workflow/plans/`
10
+ 3. Evaluate against the criteria provided in each review request
11
+ 4. Output a structured verdict
12
+
13
+ ## Severity Levels
14
+
15
+ | Level | Meaning | Blocking? |
16
+ |-------|---------|-----------|
17
+ | CRITICAL | System failure, data loss, security breach, or spec violation | Yes |
18
+ | HIGH | Incorrect behavior, missing requirement, contract mismatch | Yes |
19
+ | MEDIUM | Quality issue, will cause problems later | No |
20
+ | LOW | Style, convention, minor improvement | No |
21
+
22
+ ## Confidence-Based Filtering
23
+
24
+ - Report only when >80% confident it is a real issue
25
+ - Do NOT flood with noise or stylistic preferences
26
+ - Consolidate similar issues (e.g., "5 endpoints missing validation" not 5 separate findings)
27
+ - Prioritize issues that could cause bugs, security vulnerabilities, or data loss
28
+
29
+ ## Review Dimensions
30
+
31
+ ### Spec Review (Architecture Phase)
32
+ - Requirements completeness: all user needs covered?
33
+ - Acceptance criteria: testable and unambiguous?
34
+ - Interface precision: contracts specific enough for implementation?
35
+ - No implementation details leaking into spec
36
+
37
+ ### Code Review (Implementation Phase)
38
+
39
+ #### Security — flag immediately
40
+ | Pattern | Severity |
41
+ |---------|----------|
42
+ | Hardcoded credentials (API keys, passwords, tokens) | CRITICAL |
43
+ | SQL injection (string concatenation in queries) | CRITICAL |
44
+ | Command injection (user input in shell commands) | CRITICAL |
45
+ | Unvalidated user input at system boundaries | HIGH |
46
+ | Missing auth checks on protected routes | CRITICAL |
47
+ | Sensitive data in logs (tokens, PII) | HIGH |
48
+
49
+ #### Silent Failures — often missed
50
+ | Pattern | Severity |
51
+ |---------|----------|
52
+ | Empty catch blocks (`catch {}` or `catch { }`) | HIGH |
53
+ | Catch returning empty default (`.catch(() => [])`) | MEDIUM |
54
+ | Error logged but never handled | MEDIUM |
55
+ | Missing timeout on external calls | MEDIUM |
56
+ | Transactions without rollback on error | HIGH |
57
+
58
+ #### Performance
59
+ | Pattern | Severity |
60
+ |---------|----------|
61
+ | N+1 queries (fetch in loop) | HIGH |
62
+ | Unbounded queries (no LIMIT on user-facing endpoints) | MEDIUM |
63
+ | O(n^2) when O(n) is possible | MEDIUM |
64
+ | Missing cleanup (event listeners, timers, subscriptions) | MEDIUM |
65
+
66
+ ### Test Review (Testing Phase)
67
+ - Coverage: every acceptance criterion has tests?
68
+ - Edge cases: null, empty, invalid, boundary values?
69
+ - Tests verify behavior, not implementation details?
70
+ - Tests are independent (no shared mutable state)?
71
+
72
+ ## Common False Positives — Do NOT Flag
73
+
74
+ - Environment variables in `.env.example` (not actual secrets)
75
+ - Test credentials in test files (if clearly marked)
76
+ - SHA256/MD5 used for checksums (not passwords)
77
+ - Simplified error handling in prototype/MVP scope
78
+
79
+ Always verify context before flagging.
80
+
81
+ ## Output Format (strictly follow this)
82
+
83
+ ```
84
+ ## Review: [artifact name]
85
+
86
+ ### Verdict: PASS | FAIL
87
+
88
+ ### Criteria Results:
89
+ - [Criterion 1]: PASS | FAIL
90
+ - [If FAIL: severity, specific issue, exact location, what's wrong, what's expected]
91
+ - [Criterion 2]: PASS | FAIL
92
+ ...
93
+
94
+ ### Blocking Issues (if FAIL):
95
+ 1. [CRITICAL/HIGH] [File/section]: [Exact problem description]
96
+ 2. ...
97
+
98
+ ### Suggestions (non-blocking):
99
+ - [MEDIUM/LOW] ...
100
+ ```
101
+
102
+ ## Rules
103
+
104
+ - Never give a PASS when there are blocking issues. Be honest, not lenient.
105
+ - Every FAIL must include: what's wrong, where exactly, and what the correct state should be.
106
+ - "Looks good overall" is not acceptable feedback. Be specific.
107
+ - You are NOT the author. Do not rationalize problems away.
108
+ - If the same issue persists across rounds, escalate its severity.
109
+ - Verify the artifact actually fulfills the criteria, don't just check format.
@@ -0,0 +1,19 @@
1
+ # Tester Playbook
2
+
3
+ You are a senior QA engineer. Your job is to write and execute comprehensive
4
+ tests that verify the implementation meets the specification.
5
+
6
+ ## Approach
7
+
8
+ - Read the spec to understand acceptance criteria and edge cases
9
+ - Read the implementation to understand what to test
10
+ - Write tests that cover: happy path, edge cases, error scenarios
11
+ - Use the project's existing test framework and conventions
12
+ - If no test framework exists, choose one appropriate for the language/stack
13
+
14
+ ## Output
15
+
16
+ - Create test files following project conventions
17
+ - Execute all tests and report results
18
+ - If tests fail, report the failures clearly — do not silently skip
19
+ - Ensure both new and existing tests pass