@vpxa/aikit 0.1.53 → 0.1.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/package.json +2 -1
  2. package/packages/cli/dist/commands/init/templates.js +5 -1
  3. package/packages/dashboard/dist/assets/{index-L06q8M9R.js → index-Cdke3KDK.js} +3 -3
  4. package/packages/dashboard/dist/assets/{index-L06q8M9R.js.map → index-Cdke3KDK.js.map} +1 -1
  5. package/packages/dashboard/dist/index.html +1 -1
  6. package/packages/present/dist/index.html +256 -256
  7. package/packages/server/dist/tools/graph.tool.js +20 -1
  8. package/packages/server/dist/tools/status.tool.js +1 -1
  9. package/scaffold/adapters/copilot.mjs +2 -1
  10. package/scaffold/definitions/agents.mjs +40 -0
  11. package/scaffold/definitions/bodies.mjs +72 -4
  12. package/scaffold/definitions/protocols.mjs +77 -0
  13. package/scaffold/general/agents/Architect-Reviewer-Alpha.agent.md +44 -0
  14. package/scaffold/general/agents/Architect-Reviewer-Beta.agent.md +44 -0
  15. package/scaffold/general/agents/Code-Reviewer-Alpha.agent.md +23 -0
  16. package/scaffold/general/agents/Code-Reviewer-Beta.agent.md +23 -0
  17. package/scaffold/general/agents/Frontend.agent.md +31 -0
  18. package/scaffold/general/agents/Implementer.agent.md +14 -0
  19. package/scaffold/general/agents/Orchestrator.agent.md +4 -2
  20. package/scaffold/general/agents/Refactor.agent.md +21 -0
  21. package/scaffold/general/agents/Researcher-Alpha.agent.md +20 -0
  22. package/scaffold/general/agents/Researcher-Beta.agent.md +21 -0
  23. package/scaffold/general/agents/Researcher-Delta.agent.md +22 -0
  24. package/scaffold/general/agents/Researcher-Gamma.agent.md +21 -0
  25. package/scaffold/general/agents/_shared/architect-reviewer-base.md +44 -0
  26. package/scaffold/general/agents/_shared/code-reviewer-base.md +23 -0
  27. package/scaffold/general/agents/_shared/researcher-base.md +10 -0
  28. package/scaffold/general/skills/aikit/SKILL.md +22 -0
@@ -1,4 +1,23 @@
1
- import{getToolMeta as e}from"../tool-metadata.js";import{GraphOutputSchema as t}from"../output-schemas.js";import{z as n}from"zod";import{createLogger as r,serializeError as i}from"../../../core/dist/index.js";import{graphQuery as a}from"../../../tools/dist/index.js";const o=r(`tools`),s=n.object({id:n.string().optional().describe(`Node ID (auto-generated if omitted)`),type:n.string().describe(`Node type (entity, service, api, concept, decision)`),name:n.string().describe(`Display name`),properties:n.record(n.string(),n.unknown()).optional().describe(`Arbitrary properties`),sourceRecordId:n.string().optional().describe(`Back-link to knowledge record ID`),sourcePath:n.string().optional().describe(`Source file path`)}),c=n.object({id:n.string().optional().describe(`Edge ID (auto-generated if omitted)`),fromId:n.string().describe(`Source node ID`),toId:n.string().describe(`Target node ID`),type:n.string().describe(`Relationship type (depends-on, implements, calls, affects)`),weight:n.number().min(0).max(1).optional().describe(`Relationship weight`),properties:n.record(n.string(),n.unknown()).optional().describe(`Arbitrary properties`)});function l(r,l){let u=e(`graph`);r.registerTool(`graph`,{title:u.title,description:`Query and manage the knowledge graph. Find nodes/edges, traverse connections, add entities and relationships, detect communities, trace processes, and inspect full symbol context. The graph captures structural relationships between concepts discovered in the codebase.`,outputSchema:t,inputSchema:{action:n.enum([`find_nodes`,`find_edges`,`neighbors`,`traverse`,`stats`,`validate`,`add`,`delete`,`clear`,`detect_communities`,`set_community`,`trace_process`,`list_processes`,`delete_process`,`depth_traverse`,`cohesion`,`symbol360`]).describe(`Action: find_nodes (search nodes), find_edges (search edges), neighbors (direct connections), traverse (multi-hop), stats (graph overview), validate (check graph integrity), add (insert nodes/edges), delete (remove nodes), clear (remove all), detect_communities (find clusters), set_community (label a node), trace_process (persist execution flow), list_processes (list traced flows), delete_process (remove a flow), depth_traverse (bucket traversal by depth), cohesion (score a community), symbol360 (full node context)`),node_type:n.string().optional().describe(`Node type filter (for find_nodes)`),name_pattern:n.string().optional().describe(`Name substring match (for find_nodes)`),source_path:n.string().optional().describe(`Source path filter`),node_id:n.string().optional().describe(`Node ID (for neighbors, traverse, delete)`),edge_type:n.string().optional().describe(`Edge type filter`),from_id:n.string().optional().describe(`Source node ID (for find_edges)`),to_id:n.string().optional().describe(`Target node ID (for find_edges)`),direction:n.enum([`outgoing`,`incoming`,`both`]).default(`both`).describe(`Traversal direction`),max_depth:n.number().min(1).max(5).default(2).describe(`Max traversal depth`),limit:n.number().min(1).max(100).default(50).describe(`Max results`),nodes:n.array(s).max(500).optional().describe(`Nodes to add (for action=add)`),edges:n.array(c).max(500).optional().describe(`Edges to add (for action=add)`),community:n.string().optional().describe(`Community label (for set_community, cohesion)`),process_id:n.string().optional().describe(`Process ID (for delete_process)`),label:n.string().optional().describe(`Label for process tracing (for trace_process)`)},annotations:u.annotations},async e=>{try{let t=await a(l,{action:e.action,nodeType:e.node_type,namePattern:e.name_pattern,sourcePath:e.source_path,nodeId:e.node_id,edgeType:e.edge_type,fromId:e.from_id,toId:e.to_id,direction:e.direction,maxDepth:e.max_depth,limit:e.limit,nodes:e.nodes,edges:e.edges,community:e.community,processId:e.process_id,label:e.label}),n=[t.summary];if(t.nodes&&t.nodes.length>0){n.push(`
1
+ import{getToolMeta as e}from"../tool-metadata.js";import{GraphOutputSchema as t}from"../output-schemas.js";import{z as n}from"zod";import{createLogger as r,serializeError as i}from"../../../core/dist/index.js";import{graphQuery as a}from"../../../tools/dist/index.js";const o=r(`tools`),s=n.object({id:n.string().optional().describe(`Node ID (auto-generated if omitted)`),type:n.string().describe(`Node type (entity, service, api, concept, decision)`),name:n.string().describe(`Display name`),properties:n.record(n.string(),n.unknown()).optional().describe(`Arbitrary properties`),sourceRecordId:n.string().optional().describe(`Back-link to knowledge record ID`),sourcePath:n.string().optional().describe(`Source file path`)}),c=n.object({id:n.string().optional().describe(`Edge ID (auto-generated if omitted)`),fromId:n.string().describe(`Source node ID`),toId:n.string().describe(`Target node ID`),type:n.string().describe(`Relationship type (depends-on, implements, calls, affects)`),weight:n.number().min(0).max(1).optional().describe(`Relationship weight`),properties:n.record(n.string(),n.unknown()).optional().describe(`Arbitrary properties`)});function l(r,l){let u=e(`graph`);r.registerTool(`graph`,{title:u.title,description:`Query the auto-populated code knowledge graph (modules, symbols, imports) to answer
2
+ structural questions vector search cannot.
3
+
4
+ Common flow (when you don't have a node_id yet — TWO STEPS):
5
+ 1) Discover: graph({action:'find_nodes', name_pattern:'<module-or-symbol>'})
6
+ 2) Explore: graph({action:'neighbors', node_id, direction:'incoming'|'outgoing'})
7
+
8
+ Single-step read actions:
9
+ • symbol360 — full context (definition + references + imports) for a named symbol
10
+ • traverse — walk N hops from a node
11
+ • depth_traverse — bounded-depth traversal
12
+ • find_edges — list edges matching criteria
13
+ • cohesion — module cohesion score
14
+ • stats — graph size/health
15
+ • validate — integrity check
16
+
17
+ Mutating actions (use deliberately): add, delete, clear, set_community,
18
+ detect_communities, trace_process, delete_process.
19
+
20
+ Complements: \`symbol\` (single lookup), \`trace\` (call-chain AST), \`blast_radius\` (change impact).`,outputSchema:t,inputSchema:{action:n.enum([`find_nodes`,`find_edges`,`neighbors`,`traverse`,`stats`,`validate`,`add`,`delete`,`clear`,`detect_communities`,`set_community`,`trace_process`,`list_processes`,`delete_process`,`depth_traverse`,`cohesion`,`symbol360`]).describe(`Action: find_nodes (search nodes), find_edges (search edges), neighbors (direct connections), traverse (multi-hop), stats (graph overview), validate (check graph integrity), add (insert nodes/edges), delete (remove nodes), clear (remove all), detect_communities (find clusters), set_community (label a node), trace_process (persist execution flow), list_processes (list traced flows), delete_process (remove a flow), depth_traverse (bucket traversal by depth), cohesion (score a community), symbol360 (full node context)`),node_type:n.string().optional().describe(`Node type filter (for find_nodes)`),name_pattern:n.string().optional().describe(`Name substring match (for find_nodes)`),source_path:n.string().optional().describe(`Source path filter`),node_id:n.string().optional().describe(`Node ID (for neighbors, traverse, delete)`),edge_type:n.string().optional().describe(`Edge type filter`),from_id:n.string().optional().describe(`Source node ID (for find_edges)`),to_id:n.string().optional().describe(`Target node ID (for find_edges)`),direction:n.enum([`outgoing`,`incoming`,`both`]).default(`both`).describe(`Traversal direction`),max_depth:n.number().min(1).max(5).default(2).describe(`Max traversal depth`),limit:n.number().min(1).max(100).default(50).describe(`Max results`),nodes:n.array(s).max(500).optional().describe(`Nodes to add (for action=add)`),edges:n.array(c).max(500).optional().describe(`Edges to add (for action=add)`),community:n.string().optional().describe(`Community label (for set_community, cohesion)`),process_id:n.string().optional().describe(`Process ID (for delete_process)`),label:n.string().optional().describe(`Label for process tracing (for trace_process)`)},annotations:u.annotations},async e=>{try{let t=await a(l,{action:e.action,nodeType:e.node_type,namePattern:e.name_pattern,sourcePath:e.source_path,nodeId:e.node_id,edgeType:e.edge_type,fromId:e.from_id,toId:e.to_id,direction:e.direction,maxDepth:e.max_depth,limit:e.limit,nodes:e.nodes,edges:e.edges,community:e.community,processId:e.process_id,label:e.label}),n=[t.summary];if(t.nodes&&t.nodes.length>0){n.push(`
2
21
  ### Nodes`);for(let e of t.nodes){let t=Object.keys(e.properties).length>0?` — ${JSON.stringify(e.properties)}`:``;n.push(`- **${e.name}** (${e.type}, id: \`${e.id}\`)${t}`)}}if(t.edges&&t.edges.length>0){n.push(`
3
22
  ### Edges`);for(let e of t.edges)n.push(`- \`${e.fromId}\` —[${e.type}]→ \`${e.toId}\`${e.weight===1?``:` (weight: ${e.weight})`}`)}if(t.stats&&(n.push(`\nNode types: ${JSON.stringify(t.stats.nodeTypes)}`),n.push(`Edge types: ${JSON.stringify(t.stats.edgeTypes)}`)),t.validation){if(n.push(`
4
23
  ### Validation`),n.push(`- **Valid**: ${t.validation.valid?`yes`:`no`}`),t.validation.danglingEdges.length>0){n.push(`- **Dangling edges**:`);for(let e of t.validation.danglingEdges)n.push(` - \`${e.edgeId}\` references missing node \`${e.missingNodeId}\``)}t.validation.orphanNodes.length>0&&n.push(`- **Orphan nodes**: ${t.validation.orphanNodes.map(e=>`\`${e}\``).join(`, `)}`)}if(t.communities){n.push(`
@@ -1,3 +1,3 @@
1
1
  import{getGcStatus as e}from"../auto-gc.js";import{getToolTelemetry as t}from"../replay-interceptor.js";import{getToolMeta as n}from"../tool-metadata.js";import{StatusOutputSchema as r}from"../output-schemas.js";import{autoUpgradeScaffold as i,getCurrentVersion as a,getUpgradeState as o}from"../version-check.js";import{existsSync as s,readFileSync as c,statSync as l}from"node:fs";import{resolve as u}from"node:path";import{homedir as d}from"node:os";import{AIKIT_PATHS as f,createLogger as p,serializeError as m}from"../../../core/dist/index.js";import{WasmRuntime as h}from"../../../chunker/dist/index.js";const g=p(`tools`);function _(e,t,n,r=15e3){let i,a=new Promise(e=>{i=setTimeout(()=>{g.warn(`Status sub-operation "${n}" timed out after ${r}ms`),e({value:t,timedOut:!0})},r)});return Promise.race([e.then(e=>(clearTimeout(i),{value:e,timedOut:!1}),e=>(clearTimeout(i),g.warn(`Status sub-operation "${n}" failed: ${e instanceof Error?e.message:String(e)}`),{value:t,timedOut:!1})),a])}const v=5*6e4;let y=null,b=null;function x(e,t,n,r){let i=Math.min(e/2e4,1),a=Math.min((t+n)/5e4,1),o=Math.min(r/200,1);return Math.round(i*40+a*35+o*25)}function S(){let e=Date.now();if(y&&e-y.ts<v)return y.value;try{let t=u(d(),`.copilot`,`.aikit-scaffold.json`);if(!s(t))return y={value:null,ts:e},null;let n=JSON.parse(c(t,`utf-8`)).version??null;return y={value:n,ts:e},n}catch{return y={value:null,ts:e},null}}function C(){let e=Date.now();if(b&&e-b.ts<v)return b.value;try{let t=u(process.cwd(),`.github`,`.aikit-scaffold.json`);if(!s(t))return b={value:null,ts:e},null;let n=JSON.parse(c(t,`utf-8`)).version??null;return b={value:n,ts:e},n}catch{return b={value:null,ts:e},null}}function w(e){let t=n(`status`);e.registerTool(`status`,{title:t.title,description:`Get the current status and statistics of the AI Kit index.`,outputSchema:r,annotations:t.annotations},async()=>{let e=a(),t=S(),n=C(),r=t!=null&&t!==e,s=n!=null&&n!==e,c=[`## AI Kit Status`,``,`⏳ **AI Kit is initializing** — index stats will be available shortly.`,``,`### Runtime`,`- **Tree-sitter (WASM)**: ${h.get()?`✅ Available (AST analysis)`:`⚠ Unavailable (regex fallback)`}`,``,`### Version`,`- **Server**: ${e}`,`- **Scaffold (user)**: ${t??`not installed`}`,`- **Scaffold (workspace)**: ${n??`not installed`}`];if(r||s){let a=o(),l=[];r&&l.push(`user scaffold v${t}`),s&&l.push(`workspace scaffold v${n}`);let u=l.join(`, `);a.state===`success`?c.push(``,`### ✅ Upgrade Applied`,`- Server v${e} — ${u} auto-upgraded successfully.`,`- _Restart the MCP server to use the updated version._`):a.state===`pending`?c.push(``,`### ⏳ Upgrade In Progress`,`- Server v${e} ≠ ${u}`,`- Auto-upgrade is running in the background…`):a.state===`failed`?(i(),c.push(``,`### ⬆ Upgrade Available (auto-upgrade failed, retrying)`,`- Server v${e} ≠ ${u}`,`- Error: ${a.error??`unknown`}`)):(i(),c.push(``,`### ⬆ Upgrade Available`,`- Server v${e} ≠ ${u}`,`- Auto-upgrade triggered — check again shortly.`))}let l={totalRecords:0,totalFiles:0,lastIndexedAt:null,onboarded:!1,onboardDir:``,contentTypes:{},wasmAvailable:!!h.get(),graphStats:null,curatedCount:0,serverVersion:e,scaffoldVersion:t??null,workspaceScaffoldVersion:n??null,upgradeAvailable:r||s,contextPressure:0};return{content:[{type:`text`,text:c.join(`
2
2
  `)}],structuredContent:l}})}function T(c,d,p,v,y,b,w,T){let E=n(`status`);c.registerTool(`status`,{title:E.title,description:`Get the current status and statistics of the AI Kit index.`,outputSchema:r,annotations:E.annotations},async()=>{let n=[];try{let[r,c]=await Promise.all([_(d.getStats(),{totalRecords:0,totalFiles:0,lastIndexedAt:null,contentTypeBreakdown:{}},`store.getStats`),_(d.listSourcePaths(),[],`store.listSourcePaths`)]),m=r.value;r.timedOut&&n.push(`⚠ Index stats timed out — values may be incomplete`);let g=c.value;c.timedOut&&n.push(`⚠ File listing timed out`);let E=null,D=0,O=[`## AI Kit Status`,``,`- **Total Records**: ${m.totalRecords}`,`- **Total Files**: ${m.totalFiles}`,`- **Last Indexed**: ${m.lastIndexedAt??`Never`}`,``,`### Content Types`,...Object.entries(m.contentTypeBreakdown).map(([e,t])=>`- ${e}: ${t}`),``,`### Indexed Files`,...g.slice(0,50).map(e=>`- ${e}`),g.length>50?`\n... and ${g.length-50} more files`:``];if(p)try{let e=await _(p.getStats(),{nodeCount:0,edgeCount:0,nodeTypes:{},edgeTypes:{}},`graphStore.getStats`);if(e.timedOut)n.push(`⚠ Graph stats timed out`),O.push(``,`### Knowledge Graph`,`- Graph stats timed out`);else{let t=e.value;E={nodes:t.nodeCount,edges:t.edgeCount},O.push(``,`### Knowledge Graph`,`- **Nodes**: ${t.nodeCount}`,`- **Edges**: ${t.edgeCount}`,...Object.entries(t.nodeTypes).map(([e,t])=>` - ${e}: ${t}`));try{let e=await _(p.validate(),{valid:!0,danglingEdges:[],orphanNodes:[],stats:{nodeCount:0,edgeCount:0,nodeTypes:{},edgeTypes:{}}},`graphStore.validate`);if(!e.timedOut){let t=e.value;t.valid||O.push(`- **⚠ Integrity Issues**: ${t.danglingEdges.length} dangling edges`),t.orphanNodes.length>0&&O.push(`- **Orphan nodes**: ${t.orphanNodes.length}`)}}catch{}}}catch{O.push(``,`### Knowledge Graph`,`- Graph store unavailable`)}let k=b?.onboardDir??u(process.cwd(),f.aiContext),A=s(k),j=y?.onboardComplete||A;if(O.push(``,`### Onboard Status`,j?`- ✅ Complete${y?.onboardTimestamp?` (last: ${y.onboardTimestamp})`:``}`:'- ❌ Not run — call `onboard({ path: "." })` to analyze the codebase',`- **Onboard Directory**: \`${k}\``),v)try{let e=await _(v.list(),[],`curated.list`);if(e.timedOut)n.push(`⚠ Curated knowledge listing timed out`),O.push(``,`### Curated Knowledge`,`- Listing timed out`);else{let t=e.value;D=t.length,O.push(``,`### Curated Knowledge`,t.length>0?`- ${t.length} entries`:"- Empty — use `remember()` to persist decisions")}}catch{O.push(``,`### Curated Knowledge`,`- Unable to read curated entries`)}let M=x(m.totalRecords,E?.nodes??0,E?.edges??0,D);O.push(``),O.push(`## 📊 Context Pressure: ${M}/100`),M>=80?O.push(`⚠️ High pressure — consider pruning stale entries or compacting context.`):M>=50?O.push(`ℹ️ Moderate pressure — knowledge base is well-populated.`):O.push(`✅ Low pressure — plenty of headroom for more content.`);let N=0;if(m.lastIndexedAt){N=new Date(m.lastIndexedAt).getTime();let e=(Date.now()-N)/(1e3*60*60);O.push(``,`### Index Freshness`,e>24?w===`smart`?`- ⚠ Last indexed ${Math.floor(e)}h ago — smart indexing will refresh automatically`:`- ⚠ Last indexed ${Math.floor(e)}h ago — may be stale. Run \`reindex({})\``:`- ✅ Last indexed ${e<1?`less than 1h`:`${Math.floor(e)}h`} ago`)}if(w===`smart`)if(O.push(``,`### Smart Indexing`),T){let e=T();e?O.push(`- **Mode**: Smart (trickle)`,`- **Status**: ${e.running?`✅ Running`:`⏸ Stopped`}`,`- **Queue**: ${e.queueSize} files pending`,`- **Changed files**: ${e.changedFilesSize} detected`,`- **Interval**: ${Math.round(e.intervalMs/1e3)}s per batch of ${e.batchSize}`):O.push(`- **Mode**: Smart (trickle)`,`- **Status**: scheduler state unavailable (init may have failed)`)}else O.push(`- **Mode**: Smart (trickle) — scheduler state unavailable`);{try{let e=u(process.cwd(),f.data,`stash`);if(s(e)){let t=l(e).mtimeMs;t>N&&(N=t)}}catch{}let e=[];if(v)try{let t=D>0?await v.list():[];for(let e of t){let t=new Date(e.updated||e.created).getTime();t>N&&(N=t)}e.push(...t.sort((e,t)=>new Date(t.updated).getTime()-new Date(e.updated).getTime()).slice(0,5))}catch{}let t=N>0?Date.now()-N:0;if(t>=144e5){let n=Math.floor(t/36e5);if(O.push(``,`### 🌅 Session Briefing`,`_${n}+ hours since last activity — here's what to pick up:_`,``),e.length>0){O.push(`**Recent decisions/notes:**`);for(let t of e)O.push(`- **${t.title}** (${t.category??`note`}) — ${(t.contentPreview??``).slice(0,80)}…`)}O.push(``,`**Suggested next steps:**`,'- `search({ query: "SESSION CHECKPOINT", origin: "curated" })` — find your last checkpoint',"- `restore({})` — resume from a saved checkpoint","- `list()` — browse all stored knowledge")}}O.push(``,`### Runtime`,`- **Tree-sitter (WASM)**: ${h.get()?`✅ Available (AST analysis)`:`⚠ Unavailable (regex fallback)`}`);let P=S(),F=C(),I=a(),L=P!=null&&P!==I,R=F!=null&&F!==I;if(L||R){let e=o(),t=[];L&&t.push(`user scaffold v${P}`),R&&t.push(`workspace scaffold v${F}`);let n=t.join(`, `);e.state===`success`?O.push(``,`### ✅ Upgrade Applied`,`- Server v${I} — ${n} auto-upgraded successfully.`,`- _Restart the MCP server to use the updated version._`):e.state===`pending`?O.push(``,`### ⏳ Upgrade In Progress`,`- Server v${I} ≠ ${n}`,`- Auto-upgrade is running in the background…`):e.state===`failed`?(i(),O.push(``,`### ⬆ Upgrade Available (auto-upgrade failed, retrying)`,`- Server v${I} ≠ ${n}`,`- Error: ${e.error??`unknown`}`)):(i(),O.push(``,`### ⬆ Upgrade Available`,`- Server v${I} ≠ ${n}`,`- Auto-upgrade triggered — check again shortly.`))}n.length>0&&O.push(``,`### ⚠ Warnings`,...n.map(e=>`- ${e}`));let z=t();if(z.length>0){let e=z.sort((e,t)=>t.callCount-e.callCount);O.push(``,`### Tool Usage This Session`,``),O.push(`| Tool | Calls | Tokens In | Tokens Out | Errors | Avg Latency |`),O.push(`|------|-------|-----------|------------|--------|-------------|`);for(let t of e.slice(0,15)){let e=Math.round(t.totalInputChars/4),n=Math.round(t.totalOutputChars/4),r=Math.round(t.totalDurationMs/t.callCount);O.push(`| ${t.tool} | ${t.callCount} | ${e.toLocaleString()} | ${n.toLocaleString()} | ${t.errorCount} | ${r}ms |`)}}let B=e();if(B.bufferSize>=10){let e=B.state===`healthy`?`🟢`:B.state===`degraded`?`🔴`:`🟡`;O.push(``,`### Auto-GC: ${e} ${B.state}`),O.push(`- p95 latency: ${B.p95}ms | buffer: ${B.bufferSize} samples`),B.gcCount>0&&O.push(`- GC cycles triggered: ${B.gcCount}`)}let V=O.join(`
3
- `),H={totalRecords:m.totalRecords,totalFiles:m.totalFiles,lastIndexedAt:m.lastIndexedAt??null,onboarded:j,onboardDir:k,contentTypes:m.contentTypeBreakdown,wasmAvailable:!!h.get(),graphStats:E,curatedCount:D,serverVersion:I,scaffoldVersion:P??null,workspaceScaffoldVersion:F??null,upgradeAvailable:L||R,contextPressure:M};return{content:[{type:`text`,text:V+(w===`smart`?"\n\n---\n_Next: Use `search` to query indexed content or `graph(stats)` to explore the knowledge graph. Smart indexing handles updates automatically._":"\n\n---\n_Next: Use `search` to query indexed content, `graph(stats)` to explore the knowledge graph, or `reindex` to refresh the index._")}],structuredContent:H}}catch(e){return g.error(`Status failed`,m(e)),{content:[{type:`text`,text:`Status check failed: ${e instanceof Error?e.message:String(e)}`}],isError:!0}}})}export{S as getScaffoldVersion,C as getWorkspaceScaffoldVersion,w as registerEarlyStatusTool,T as registerStatusTool};
3
+ `),H={totalRecords:m.totalRecords,totalFiles:m.totalFiles,lastIndexedAt:m.lastIndexedAt??null,onboarded:j,onboardDir:k,contentTypes:m.contentTypeBreakdown,wasmAvailable:!!h.get(),graphStats:E,curatedCount:D,serverVersion:I,scaffoldVersion:P??null,workspaceScaffoldVersion:F??null,upgradeAvailable:L||R,contextPressure:M};return{content:[{type:`text`,text:V+(w===`smart`?"\n\n---\n_Next: Use `search` to query indexed content or `graph({action:'find_nodes', name_pattern:'<top-level-module>'})` then `graph({action:'neighbors', node_id})` for relationships. Smart indexing handles updates automatically._":"\n\n---\n_Next: Use `search` to query indexed content, `graph({action:'find_nodes', name_pattern:'<top-level-module>'})` then `graph({action:'neighbors', node_id})` for relationships, or `reindex` to refresh the index._")}],structuredContent:H}}catch(e){return g.error(`Status failed`,m(e)),{content:[{type:`text`,text:`Status check failed: ${e instanceof Error?e.message:String(e)}`}],isError:!0}}})}export{S as getScaffoldVersion,C as getWorkspaceScaffoldVersion,w as registerEarlyStatusTool,T as registerStatusTool};
@@ -121,6 +121,7 @@ function generateVariantAgent(roleName, suffix, def) {
121
121
  def.sharedBase && PROTOCOLS[def.sharedBase] ? `\n\n${PROTOCOLS[def.sharedBase]}` : '';
122
122
 
123
123
  const extra = def.extraBody ? `\n\n${def.extraBody}` : '';
124
+ const variantAddendum = varDef.bodyAddendum ? `\n\n${varDef.bodyAddendum}` : '';
124
125
 
125
126
  const skillsSection = def.skills?.length
126
127
  ? `\n\n## Skills (load on demand)\n\n| Skill | When to load |\n|-------|--------------|\n${def.skills.map(([s, w]) => `| ${s} | ${w} |`).join('\n')}`
@@ -137,7 +138,7 @@ model: ${model}
137
138
  # ${fullName} - ${title}
138
139
 
139
140
  You are **${fullName}**${identity}${extra}
140
- ${sharedContent}${skillsSection}
141
+ ${sharedContent}${variantAddendum}${skillsSection}
141
142
 
142
143
  ${FLOWS_SECTION}
143
144
  `;
@@ -155,22 +155,62 @@ export const AGENTS = {
155
155
  description: 'Primary deep research agent — also serves as default Researcher',
156
156
  identity:
157
157
  ', the primary deep research agent. During multi-model decision sessions, you provide deep reasoning and nuanced system design.',
158
+ bodyAddendum: `## Required Output Section — \`## Depth Analysis\`
159
+
160
+ Your final report MUST contain a \`## Depth Analysis\` section with:
161
+ - Deep-dive into ONE chosen subsystem (most structurally central to the question)
162
+ - Full evidence chain: file:line citations for every structural claim
163
+ - At least 2 \`compact\`/\`file_summary\` extracts woven into the narrative
164
+
165
+ You are the DEFAULT researcher. When the Orchestrator needs breadth + depth, they
166
+ dispatch you alone. Your lens: thorough, evidence-first, exhaustive.`,
158
167
  },
159
168
  Beta: {
160
169
  description:
161
170
  'Research variant — pragmatic analysis with focus on trade-offs and edge cases',
162
171
  identity:
163
172
  ', a variant of the Researcher agent optimized for **pragmatic analysis**. Focus on trade-offs, edge cases, and practical constraints. Challenge assumptions and highlight risks the primary researcher may overlook.',
173
+ bodyAddendum: `## Required Output Section — \`## Failure Modes & Counter-Evidence\`
174
+
175
+ Your final report MUST contain a \`## Failure Modes & Counter-Evidence\` section with:
176
+ - At least 3 adversarial claims challenging your own primary finding
177
+ - For each counter-claim: the condition under which it would be TRUE, and the
178
+ evidence (file:line or search receipt) that currently falsifies it
179
+ - Any unresolved counter-evidence flagged as \`⚠ UNRESOLVED\`
180
+
181
+ Your lens: pragmatic skepticism. Mark competing claims as \`A\` (Assumed) by default;
182
+ challenge before promoting to \`V\`.`,
164
183
  },
165
184
  Gamma: {
166
185
  description: 'Research variant — broad pattern matching across domains and technologies',
167
186
  identity:
168
187
  ', a variant of the Researcher agent optimized for **cross-domain pattern matching**. Draw connections from other domains, frameworks, and industries. Bring breadth where Alpha brings depth.',
188
+ bodyAddendum: `## Required Output Section — \`## Cross-Domain Analogies\`
189
+
190
+ Your final report MUST contain a \`## Cross-Domain Analogies\` section with:
191
+ - At least 2 patterns from other tools/frameworks/domains that apply to the question
192
+ - For each: the external source (cite via \`web_search\` or \`web_fetch\` receipt) and
193
+ how it maps to our codebase
194
+ - One "missing pattern we should adopt" recommendation
195
+
196
+ Your lens: cross-domain pattern matching. Weight \`web_search\` + \`web_fetch\` higher
197
+ than peers. Assume the LLM's training data is stale — verify with fresh searches.`,
169
198
  },
170
199
  Delta: {
171
200
  description: 'Research variant — implementation feasibility and performance implications',
172
201
  identity:
173
202
  ', a variant of the Researcher agent optimized for **implementation feasibility**. Focus on performance implications, scaling concerns, and concrete implementation paths. Ground theoretical proposals in practical reality.',
203
+ bodyAddendum: `## Required Output Section — \`## Implementation Cost & Feasibility\`
204
+
205
+ Your final report MUST contain a \`## Implementation Cost & Feasibility\` section with:
206
+ - Complexity snapshot: you MUST call \`measure({ path })\` on any file ≥ 50 LOC in the
207
+ target subsystem at least once and quote the \`cognitiveComplexity\` result
208
+ - Blast radius estimate: \`blast_radius({ changed_files })\` on the proposed edits
209
+ - Time/risk table: | Change | Lines | Risk | Effort |
210
+ - Feasibility verdict: SAFE / RISKY / INFEASIBLE with one-line justification
211
+
212
+ Your lens: implementation feasibility. Prefer \`measure\` + \`blast_radius\` + \`analyze_patterns\`
213
+ over abstract reasoning.`,
174
214
  },
175
215
  },
176
216
  },
@@ -27,7 +27,7 @@ ${agentTable}
27
27
  ## FORGE Protocol
28
28
 
29
29
  1. \`forge_classify({ task, files })\` → determine tier (Floor/Standard/Critical)
30
- 2. Pass tier to subagents: \`FORGE Context: Tier = {tier}. Evidence: {requirements}.\`
30
+ 2. Pass tier + task_id to subagents: \`FORGE Context: Tier = {tier}. Task ID = {task_id}. Evidence: {requirements}. Reviewers add CRITICAL/HIGH claims into your task_id; never create their own.\`
31
31
  3. After review: \`evidence_map({ action: "gate", task_id })\` → YIELD/HOLD/HARD_BLOCK
32
32
  4. Auto-upgrade tier if unknowns reveal contract/security issues
33
33
 
@@ -116,7 +116,7 @@ Batch 2 (after batch 1):
116
116
  2. **Goal** — acceptance criteria, testable
117
117
  3. **Arch Context** — code snippets from \`compact()\`/\`digest()\`
118
118
  4. **Constraints** — patterns, conventions
119
- 5. **FORGE** — tier + evidence requirements
119
+ 5. **FORGE** — tier + task_id + evidence requirements (reviewers add CRITICAL/HIGH claims into your task_id; never create their own)
120
120
  6. **Self-Review** — checklist before declaring status
121
121
 
122
122
  **Subagent status protocol:** \`DONE\` | \`DONE_WITH_CONCERNS\` | \`NEEDS_CONTEXT\` | \`BLOCKED\`
@@ -128,6 +128,7 @@ Batch 2 (after batch 1):
128
128
  - Use the subagent prompt template for every dispatch so step-specific flow instructions are grounded in actual code context
129
129
 
130
130
  **Per-step review cycle:** Dispatch → Code Review (Alpha+Beta) → Arch Review (if boundary changes) → Security (if applicable) → \`evidence_map\` gate → **🛑 STOP — present results**
131
+ Reviewers add findings to the Orchestrator's existing \`evidence_map\` \`task_id\` and do NOT run the gate themselves.
131
132
 
132
133
  ### Flow MCP Tools
133
134
 
@@ -188,6 +189,7 @@ When subagents complete, their visual outputs (from \`present\`) are NOT visible
188
189
  6. **Always use flows** — every task goes through a flow; design decisions happen in the flow's design step
189
190
  7. **Never proceed without user approval** at 🛑 stops
190
191
  8. **Max 2 retries** then escalate to user
192
+ - **Graph discovery** — when exploring relationships use \`graph({action:'find_nodes', name_pattern})\` then \`graph({action:'neighbors', node_id})\`. Never use \`shortest_path\` (doesn't exist).
191
193
 
192
194
  ## Delegation Enforcement
193
195
 
@@ -352,7 +354,21 @@ When subagents complete, their visual outputs (from \`present\`) are NOT visible
352
354
  - **Never modify tests to make them pass** — Fix the implementation instead
353
355
  - **Run \`check\` after every change** — Catch errors early
354
356
  - **Loop-break** — If the same test fails 3 times with the same error after your fixes, STOP. Re-read the error from scratch, check your assumptions with \`trace\` or \`symbol\`, and try a fundamentally different approach. Do not attempt a 4th fix in the same direction
355
- - **Think-first for complex tasks** — If a task involves 3+ files or non-obvious logic, outline your approach before writing code. Check existing patterns with \`search\` first. Design, then implement`,
357
+ - **Think-first for complex tasks** — If a task involves 3+ files or non-obvious logic, outline your approach before writing code. Check existing patterns with \`search\` first. Design, then implement
358
+
359
+ ## Pre-Edit Checklist (before modifying any file)
360
+
361
+ 1. **Understand consumers** — \`graph({action:'find_nodes', name_pattern:'<target>'})\` → \`graph({action:'neighbors', node_id, direction:'incoming'})\`. See who calls/imports before changing a contract.
362
+ 2. **Compress, don't raw-read** — \`file_summary\` then \`compact({path, query})\` for the specific area. Only \`read_file\` when you need exact lines for \`replace_string_in_file\`.
363
+ 3. **Snapshot risky edits** — \`checkpoint({action:'save', label:'pre-<scope>'})\` before cross-cutting changes. \`checkpoint({action:'restore', ...})\` if \`check\`/\`test_run\` fails.
364
+ 4. **Estimate blast radius** — \`blast_radius({changed_files:[...]})\` BEFORE editing when changing a public/shared symbol; re-run AFTER to confirm actual impact matches.
365
+ 5. **TDD when tests exist** — write/extend the failing test first, then minimum code to pass.
366
+
367
+ ## Post-Edit Checklist
368
+
369
+ 1. \`check({})\` — typecheck + lint must pass clean
370
+ 2. \`test_run({})\` — full suite or targeted pattern
371
+ 3. If Orchestrator passed a \`task_id\`: \`evidence_map({action:'add', task_id, claim, status:'V', receipt:'file.ts#Lxx'})\` for each verified contract/acceptance claim. Do NOT run the gate — Orchestrator owns it.`,
356
372
 
357
373
  Frontend: `**Read \`AGENTS.md\`** in the workspace root for project conventions and AI Kit protocol.
358
374
 
@@ -369,7 +385,38 @@ When subagents complete, their visual outputs (from \`present\`) are NOT visible
369
385
  - **Accessibility first** — ARIA attributes, keyboard navigation, screen reader support
370
386
  - **Follow design system** — Use existing tokens, don't create one-off values
371
387
  - **Responsive by default** — Mobile-first, test all breakpoints
372
- - **Test-first** — Component tests before implementation`,
388
+ - **Test-first** — Component tests before implementation
389
+
390
+ ## Frontend Exploration Mode
391
+
392
+ | Need | Tool |
393
+ |------|------|
394
+ | Component dependency graph | \`graph({action:'neighbors', node_id:'src/components/X.tsx', direction:'incoming'})\` |
395
+ | Stale / unused components | \`dead_symbols({ path:'src/components' })\` |
396
+ | React / a11y / library API research | \`web_search({ query })\`, \`web_fetch({ urls })\` |
397
+ | Component complexity hotspots | \`measure({ path:'src/components' })\` |
398
+ | Verify a component's callers | \`graph({action:'find_nodes', name_pattern})\` → \`neighbors\` |
399
+
400
+ ## Visual Validation Protocol (post \`test_run\`)
401
+
402
+ **Pre-flight (MANDATORY before any browser step):**
403
+ 1. Read \`package.json\` scripts — identify dev command (e.g. \`dev\`, \`start\`, \`vite\`)
404
+ 2. Determine default port (check script args, \`vite.config.*\`, or env)
405
+ 3. Check if dev server already running on port (attempt \`http({ url:'http://localhost:<port>' })\`)
406
+ 4. If NOT running, delegate to a helper or use \`createAndRunTask\` to start \`npm run dev\`
407
+ in the background; wait for ready signal
408
+ 5. Capture the base URL
409
+
410
+ **Validation:**
411
+ 6. \`open_browser_page({ url })\` — render target component page
412
+ 7. \`screenshot_page\` + \`read_page\` — capture visual + DOM
413
+ 8. Keyboard-only navigation check: simulate Tab/Enter/Escape via \`type_in_page\` —
414
+ verify focus ring, activation, dismiss
415
+ 9. Compare against design tokens / Figma URL if supplied
416
+ 10. Fail fast if color contrast < 4.5:1 (WCAG AA) or focus indicator missing
417
+
418
+ If the pre-flight dev server cannot be started (e.g. sandbox), fall back to
419
+ \`compact\` inspection of the component source + describe expected visual behavior.`,
373
420
 
374
421
  Debugger: `**Read \`AGENTS.md\`** in the workspace root for project conventions and AI Kit protocol.
375
422
 
@@ -411,6 +458,27 @@ When subagents complete, their visual outputs (from \`present\`) are NOT visible
411
458
  - **Follow existing patterns** — Consolidate toward established conventions
412
459
  - **Don't refactor what isn't asked** — Scope discipline
413
460
 
461
+ ## Reversible Refactor Protocol
462
+
463
+ Refactors modify the canonical source, so use \`checkpoint\` (NOT \`lane\`) for safety:
464
+
465
+ 1. **Before starting:** \`checkpoint({ action:'save', label:'pre-refactor-<scope>' })\`
466
+ — captures a snapshot of the relevant files
467
+ 2. **Baseline metrics:** \`measure({ path })\` on target files — record
468
+ \`cognitiveComplexity\` values BEFORE refactor
469
+ 3. **Apply changes** — use \`rename({ old, new })\` for symbol rename (dry_run first),
470
+ or \`codemod({ pattern, replacement })\` for structural transforms (dry_run first).
471
+ Never hand-edit what \`rename\`/\`codemod\` can do safely.
472
+ 4. **Verify:** \`check({})\` + \`test_run({})\` must both pass with zero new failures
473
+ 5. **Post-metrics:** \`measure({ path })\` again — confirm cognitive complexity
474
+ delta is negative (or justify if zero)
475
+ 6. **If validation fails:** \`checkpoint({ action:'restore', label:'pre-refactor-<scope>' })\`
476
+
477
+ For multi-approach uncertainty (A vs B), do NOT create lanes. Instead:
478
+ - Delegate to \`Researcher-Delta\` with a feasibility question — they can use \`lane\`
479
+ for read-only exploration and return a recommendation
480
+ - You then apply the winning approach under the checkpoint protocol above
481
+
414
482
  ## Skills (load on demand)
415
483
 
416
484
  | Skill | When to load |
@@ -462,6 +462,16 @@ When invoked for a decision analysis, you receive a specific question. You MUST:
462
462
  - **\`digest\`** when synthesizing from 3+ sources
463
463
  - **\`stratum_card\`** for files you'll reference repeatedly
464
464
  - **\`read_file\` is ONLY acceptable** when you need exact lines for a pending edit operation
465
+
466
+ ## Parallel Exploration via \`lane\`
467
+
468
+ For questions that require trying approach A vs approach B in isolation:
469
+ 1. \`lane({ action:'create', name:'approach-a' })\` — isolated file copies
470
+ 2. Apply approach A mentally; record observations
471
+ 3. \`lane({ action:'create', name:'approach-b' })\` — second isolate
472
+ 4. Apply approach B mentally; record observations
473
+ 5. \`lane({ action:'diff', names:['approach-a','approach-b'] })\` — compare
474
+ 6. Include the diff summary in your output; do NOT merge lanes back (read-only role)
465
475
  `,
466
476
 
467
477
  'code-reviewer-base': `# Code-Reviewer — Shared Base Instructions
@@ -527,6 +537,29 @@ Follow the **MANDATORY FIRST ACTION** and **Information Lookup Order** from code
527
537
  - **NEEDS_REVISION** for any HIGH finding
528
538
  - **FAILED** for any CRITICAL finding
529
539
  - Always check for **test coverage** on new/changed code
540
+
541
+ ## Evidence Citation Protocol (tier-aware)
542
+
543
+ The Orchestrator runs \`forge_classify\` before dispatching you, and runs the final
544
+ \`evidence_map({action:'gate', task_id})\` after you respond. **Do not create your own
545
+ task_id or run the gate** — feed into the Orchestrator's existing evidence map.
546
+
547
+ | Tier | Your responsibility |
548
+ |------|---------------------|
549
+ | Floor | Free-form findings with \`file.ts#Lxx\` citations. No \`evidence_map\` calls required. |
550
+ | Standard | For every CRITICAL or HIGH finding: \`evidence_map({action:'add', task_id, claim, status:'V', receipt:'file.ts#Lxx'})\`. Max 2-4 adds to keep signal high. |
551
+ | Critical | Structured claims for all CRITICAL/HIGH findings (2-4 Verified + receipts) AND tag contract/security claims with \`safety_gate:'commitment'\` or \`safety_gate:'provenance'\`. |
552
+
553
+ **Every response MUST include:**
554
+ - \`**FORGE Task ID:** <task_id>\` (passed in by Orchestrator, or state "not provided")
555
+ - \`**Tier applied:** Floor | Standard | Critical\`
556
+ - \`**Findings:** <list>\` with \`file:line\` receipts
557
+ - Verdict: \`APPROVED\` | \`CHANGES_REQUESTED\` | \`BLOCKED\`
558
+
559
+ Do NOT:
560
+ - Create a new \`evidence_map\` (the Orchestrator already did)
561
+ - Run \`evidence_map({action:'gate'})\` yourself — the Orchestrator owns the gate
562
+ - Duplicate findings into the map that weren't CRITICAL/HIGH
530
563
  `,
531
564
 
532
565
  'architect-reviewer-base': `# Architect-Reviewer — Shared Base Instructions
@@ -588,6 +621,50 @@ Follow the **MANDATORY FIRST ACTION** and **Information Lookup Order** from code
588
621
  - **NEEDS_CHANGES** — Fixable structural issues
589
622
  - **BLOCKED** — Fundamental design flaw requiring rethink
590
623
  - Always validate **dependency direction** — inner layers must not depend on outer
624
+
625
+ ## Evidence Citation Protocol (tier-aware)
626
+
627
+ The Orchestrator runs \`forge_classify\` before dispatching you, and runs the final
628
+ \`evidence_map({action:'gate', task_id})\` after you respond. **Do not create your own
629
+ task_id or run the gate** — feed into the Orchestrator's existing evidence map.
630
+
631
+ | Tier | Your responsibility |
632
+ |------|---------------------|
633
+ | Floor | Free-form findings with \`file.ts#Lxx\` citations. No \`evidence_map\` calls required. |
634
+ | Standard | For every CRITICAL or HIGH finding: \`evidence_map({action:'add', task_id, claim, status:'V', receipt:'file.ts#Lxx'})\`. Max 2-4 adds to keep signal high. |
635
+ | Critical | Structured claims for all CRITICAL/HIGH findings (2-4 Verified + receipts) AND tag contract/security claims with \`safety_gate:'commitment'\` or \`safety_gate:'provenance'\`. |
636
+
637
+ **Every response MUST include:**
638
+ - \`**FORGE Task ID:** <task_id>\` (passed in by Orchestrator, or state "not provided")
639
+ - \`**Tier applied:** Floor | Standard | Critical\`
640
+ - \`**Findings:** <list>\` with \`file:line\` receipts
641
+ - Verdict: \`APPROVED\` | \`CHANGES_REQUESTED\` | \`BLOCKED\`
642
+
643
+ Do NOT:
644
+ - Create a new \`evidence_map\` (the Orchestrator already did)
645
+ - Run \`evidence_map({action:'gate'})\` yourself — the Orchestrator owns the gate
646
+ - Duplicate findings into the map that weren't CRITICAL/HIGH
647
+
648
+ ## Graph-Assisted Layer Verification
649
+
650
+ For each significantly changed module (from \`blast_radius\` or changed_files input):
651
+
652
+ 1. **Discover node**: \`graph({action:'find_nodes', name_pattern:'<module-path>'})\` → get node_id
653
+ 2. **Incoming dependencies** (who depends on this?):
654
+ \`graph({action:'neighbors', node_id, direction:'incoming'})\`
655
+ — flag any caller that violates layering rules (e.g. a \`core/\` module that gets imported by \`infra/\`)
656
+ 3. **Outgoing dependencies** (what does it depend on?):
657
+ \`graph({action:'neighbors', node_id, direction:'outgoing'})\`
658
+ — flag any target that violates direction (e.g. domain importing from infra)
659
+ 4. **Isolation check** (modules that should NOT be connected):
660
+ \`graph({action:'depth_traverse', node_id, max_depth:3})\`
661
+ — verify no path reaches modules in forbidden directories
662
+
663
+ Cite each layer violation as a CRITICAL finding with \`file:line\` receipt, and add it
664
+ to the Evidence Map per the tier protocol above.
665
+
666
+ **Do NOT use \`shortest_path\`** — that action does not exist. Use \`depth_traverse\`
667
+ or repeated \`neighbors\` calls.
591
668
  `,
592
669
 
593
670
  'decision-protocol': `# Multi-Model Decision Protocol
@@ -72,6 +72,50 @@ Follow the **MANDATORY FIRST ACTION** and **Information Lookup Order** from code
72
72
  - **BLOCKED** — Fundamental design flaw requiring rethink
73
73
  - Always validate **dependency direction** — inner layers must not depend on outer
74
74
 
75
+ ## Evidence Citation Protocol (tier-aware)
76
+
77
+ The Orchestrator runs `forge_classify` before dispatching you, and runs the final
78
+ `evidence_map({action:'gate', task_id})` after you respond. **Do not create your own
79
+ task_id or run the gate** — feed into the Orchestrator's existing evidence map.
80
+
81
+ | Tier | Your responsibility |
82
+ |------|---------------------|
83
+ | Floor | Free-form findings with `file.ts#Lxx` citations. No `evidence_map` calls required. |
84
+ | Standard | For every CRITICAL or HIGH finding: `evidence_map({action:'add', task_id, claim, status:'V', receipt:'file.ts#Lxx'})`. Max 2-4 adds to keep signal high. |
85
+ | Critical | Structured claims for all CRITICAL/HIGH findings (2-4 Verified + receipts) AND tag contract/security claims with `safety_gate:'commitment'` or `safety_gate:'provenance'`. |
86
+
87
+ **Every response MUST include:**
88
+ - `**FORGE Task ID:** <task_id>` (passed in by Orchestrator, or state "not provided")
89
+ - `**Tier applied:** Floor | Standard | Critical`
90
+ - `**Findings:** <list>` with `file:line` receipts
91
+ - Verdict: `APPROVED` | `CHANGES_REQUESTED` | `BLOCKED`
92
+
93
+ Do NOT:
94
+ - Create a new `evidence_map` (the Orchestrator already did)
95
+ - Run `evidence_map({action:'gate'})` yourself — the Orchestrator owns the gate
96
+ - Duplicate findings into the map that weren't CRITICAL/HIGH
97
+
98
+ ## Graph-Assisted Layer Verification
99
+
100
+ For each significantly changed module (from `blast_radius` or changed_files input):
101
+
102
+ 1. **Discover node**: `graph({action:'find_nodes', name_pattern:'<module-path>'})` → get node_id
103
+ 2. **Incoming dependencies** (who depends on this?):
104
+ `graph({action:'neighbors', node_id, direction:'incoming'})`
105
+ — flag any caller that violates layering rules (e.g. a `core/` module that gets imported by `infra/`)
106
+ 3. **Outgoing dependencies** (what does it depend on?):
107
+ `graph({action:'neighbors', node_id, direction:'outgoing'})`
108
+ — flag any target that violates direction (e.g. domain importing from infra)
109
+ 4. **Isolation check** (modules that should NOT be connected):
110
+ `graph({action:'depth_traverse', node_id, max_depth:3})`
111
+ — verify no path reaches modules in forbidden directories
112
+
113
+ Cite each layer violation as a CRITICAL finding with `file:line` receipt, and add it
114
+ to the Evidence Map per the tier protocol above.
115
+
116
+ **Do NOT use `shortest_path`** — that action does not exist. Use `depth_traverse`
117
+ or repeated `neighbors` calls.
118
+
75
119
 
76
120
  ## Skills (load on demand)
77
121
 
@@ -72,6 +72,50 @@ Follow the **MANDATORY FIRST ACTION** and **Information Lookup Order** from code
72
72
  - **BLOCKED** — Fundamental design flaw requiring rethink
73
73
  - Always validate **dependency direction** — inner layers must not depend on outer
74
74
 
75
+ ## Evidence Citation Protocol (tier-aware)
76
+
77
+ The Orchestrator runs `forge_classify` before dispatching you, and runs the final
78
+ `evidence_map({action:'gate', task_id})` after you respond. **Do not create your own
79
+ task_id or run the gate** — feed into the Orchestrator's existing evidence map.
80
+
81
+ | Tier | Your responsibility |
82
+ |------|---------------------|
83
+ | Floor | Free-form findings with `file.ts#Lxx` citations. No `evidence_map` calls required. |
84
+ | Standard | For every CRITICAL or HIGH finding: `evidence_map({action:'add', task_id, claim, status:'V', receipt:'file.ts#Lxx'})`. Max 2-4 adds to keep signal high. |
85
+ | Critical | Structured claims for all CRITICAL/HIGH findings (2-4 Verified + receipts) AND tag contract/security claims with `safety_gate:'commitment'` or `safety_gate:'provenance'`. |
86
+
87
+ **Every response MUST include:**
88
+ - `**FORGE Task ID:** <task_id>` (passed in by Orchestrator, or state "not provided")
89
+ - `**Tier applied:** Floor | Standard | Critical`
90
+ - `**Findings:** <list>` with `file:line` receipts
91
+ - Verdict: `APPROVED` | `CHANGES_REQUESTED` | `BLOCKED`
92
+
93
+ Do NOT:
94
+ - Create a new `evidence_map` (the Orchestrator already did)
95
+ - Run `evidence_map({action:'gate'})` yourself — the Orchestrator owns the gate
96
+ - Duplicate findings into the map that weren't CRITICAL/HIGH
97
+
98
+ ## Graph-Assisted Layer Verification
99
+
100
+ For each significantly changed module (from `blast_radius` or changed_files input):
101
+
102
+ 1. **Discover node**: `graph({action:'find_nodes', name_pattern:'<module-path>'})` → get node_id
103
+ 2. **Incoming dependencies** (who depends on this?):
104
+ `graph({action:'neighbors', node_id, direction:'incoming'})`
105
+ — flag any caller that violates layering rules (e.g. a `core/` module that gets imported by `infra/`)
106
+ 3. **Outgoing dependencies** (what does it depend on?):
107
+ `graph({action:'neighbors', node_id, direction:'outgoing'})`
108
+ — flag any target that violates direction (e.g. domain importing from infra)
109
+ 4. **Isolation check** (modules that should NOT be connected):
110
+ `graph({action:'depth_traverse', node_id, max_depth:3})`
111
+ — verify no path reaches modules in forbidden directories
112
+
113
+ Cite each layer violation as a CRITICAL finding with `file:line` receipt, and add it
114
+ to the Evidence Map per the tier protocol above.
115
+
116
+ **Do NOT use `shortest_path`** — that action does not exist. Use `depth_traverse`
117
+ or repeated `neighbors` calls.
118
+
75
119
 
76
120
  ## Skills (load on demand)
77
121
 
@@ -74,6 +74,29 @@ Follow the **MANDATORY FIRST ACTION** and **Information Lookup Order** from code
74
74
  - **FAILED** for any CRITICAL finding
75
75
  - Always check for **test coverage** on new/changed code
76
76
 
77
+ ## Evidence Citation Protocol (tier-aware)
78
+
79
+ The Orchestrator runs `forge_classify` before dispatching you, and runs the final
80
+ `evidence_map({action:'gate', task_id})` after you respond. **Do not create your own
81
+ task_id or run the gate** — feed into the Orchestrator's existing evidence map.
82
+
83
+ | Tier | Your responsibility |
84
+ |------|---------------------|
85
+ | Floor | Free-form findings with `file.ts#Lxx` citations. No `evidence_map` calls required. |
86
+ | Standard | For every CRITICAL or HIGH finding: `evidence_map({action:'add', task_id, claim, status:'V', receipt:'file.ts#Lxx'})`. Max 2-4 adds to keep signal high. |
87
+ | Critical | Structured claims for all CRITICAL/HIGH findings (2-4 Verified + receipts) AND tag contract/security claims with `safety_gate:'commitment'` or `safety_gate:'provenance'`. |
88
+
89
+ **Every response MUST include:**
90
+ - `**FORGE Task ID:** <task_id>` (passed in by Orchestrator, or state "not provided")
91
+ - `**Tier applied:** Floor | Standard | Critical`
92
+ - `**Findings:** <list>` with `file:line` receipts
93
+ - Verdict: `APPROVED` | `CHANGES_REQUESTED` | `BLOCKED`
94
+
95
+ Do NOT:
96
+ - Create a new `evidence_map` (the Orchestrator already did)
97
+ - Run `evidence_map({action:'gate'})` yourself — the Orchestrator owns the gate
98
+ - Duplicate findings into the map that weren't CRITICAL/HIGH
99
+
77
100
 
78
101
  ## Skills (load on demand)
79
102
 
@@ -74,6 +74,29 @@ Follow the **MANDATORY FIRST ACTION** and **Information Lookup Order** from code
74
74
  - **FAILED** for any CRITICAL finding
75
75
  - Always check for **test coverage** on new/changed code
76
76
 
77
+ ## Evidence Citation Protocol (tier-aware)
78
+
79
+ The Orchestrator runs `forge_classify` before dispatching you, and runs the final
80
+ `evidence_map({action:'gate', task_id})` after you respond. **Do not create your own
81
+ task_id or run the gate** — feed into the Orchestrator's existing evidence map.
82
+
83
+ | Tier | Your responsibility |
84
+ |------|---------------------|
85
+ | Floor | Free-form findings with `file.ts#Lxx` citations. No `evidence_map` calls required. |
86
+ | Standard | For every CRITICAL or HIGH finding: `evidence_map({action:'add', task_id, claim, status:'V', receipt:'file.ts#Lxx'})`. Max 2-4 adds to keep signal high. |
87
+ | Critical | Structured claims for all CRITICAL/HIGH findings (2-4 Verified + receipts) AND tag contract/security claims with `safety_gate:'commitment'` or `safety_gate:'provenance'`. |
88
+
89
+ **Every response MUST include:**
90
+ - `**FORGE Task ID:** <task_id>` (passed in by Orchestrator, or state "not provided")
91
+ - `**Tier applied:** Floor | Standard | Critical`
92
+ - `**Findings:** <list>` with `file:line` receipts
93
+ - Verdict: `APPROVED` | `CHANGES_REQUESTED` | `BLOCKED`
94
+
95
+ Do NOT:
96
+ - Create a new `evidence_map` (the Orchestrator already did)
97
+ - Run `evidence_map({action:'gate'})` yourself — the Orchestrator owns the gate
98
+ - Duplicate findings into the map that weren't CRITICAL/HIGH
99
+
77
100
 
78
101
  ## Skills (load on demand)
79
102
 
@@ -26,6 +26,37 @@ You are the **Frontend**, ui/ux specialist for react, styling, responsive design
26
26
  - **Responsive by default** — Mobile-first, test all breakpoints
27
27
  - **Test-first** — Component tests before implementation
28
28
 
29
+ ## Frontend Exploration Mode
30
+
31
+ | Need | Tool |
32
+ |------|------|
33
+ | Component dependency graph | `graph({action:'neighbors', node_id:'src/components/X.tsx', direction:'incoming'})` |
34
+ | Stale / unused components | `dead_symbols({ path:'src/components' })` |
35
+ | React / a11y / library API research | `web_search({ query })`, `web_fetch({ urls })` |
36
+ | Component complexity hotspots | `measure({ path:'src/components' })` |
37
+ | Verify a component's callers | `graph({action:'find_nodes', name_pattern})` → `neighbors` |
38
+
39
+ ## Visual Validation Protocol (post `test_run`)
40
+
41
+ **Pre-flight (MANDATORY before any browser step):**
42
+ 1. Read `package.json` scripts — identify dev command (e.g. `dev`, `start`, `vite`)
43
+ 2. Determine default port (check script args, `vite.config.*`, or env)
44
+ 3. Check if dev server already running on port (attempt `http({ url:'http://localhost:<port>' })`)
45
+ 4. If NOT running, delegate to a helper or use `createAndRunTask` to start `npm run dev`
46
+ in the background; wait for ready signal
47
+ 5. Capture the base URL
48
+
49
+ **Validation:**
50
+ 6. `open_browser_page({ url })` — render target component page
51
+ 7. `screenshot_page` + `read_page` — capture visual + DOM
52
+ 8. Keyboard-only navigation check: simulate Tab/Enter/Escape via `type_in_page` —
53
+ verify focus ring, activation, dismiss
54
+ 9. Compare against design tokens / Figma URL if supplied
55
+ 10. Fail fast if color contrast < 4.5:1 (WCAG AA) or focus indicator missing
56
+
57
+ If the pre-flight dev server cannot be started (e.g. sandbox), fall back to
58
+ `compact` inspection of the component source + describe expected visual behavior.
59
+
29
60
  # Code Agent — Shared Base Instructions
30
61
 
31
62
  > This file contains shared protocols for all code-modifying agents (Implementer, Frontend, Refactor, Debugger). Each agent's definition file contains only its unique identity, constraints, and workflow. **Do not duplicate this content in agent files.**
@@ -30,6 +30,20 @@ You are the **Implementer**, persistent implementation agent that writes code fo
30
30
  - **Loop-break** — If the same test fails 3 times with the same error after your fixes, STOP. Re-read the error from scratch, check your assumptions with `trace` or `symbol`, and try a fundamentally different approach. Do not attempt a 4th fix in the same direction
31
31
  - **Think-first for complex tasks** — If a task involves 3+ files or non-obvious logic, outline your approach before writing code. Check existing patterns with `search` first. Design, then implement
32
32
 
33
+ ## Pre-Edit Checklist (before modifying any file)
34
+
35
+ 1. **Understand consumers** — `graph({action:'find_nodes', name_pattern:'<target>'})` → `graph({action:'neighbors', node_id, direction:'incoming'})`. See who calls/imports before changing a contract.
36
+ 2. **Compress, don't raw-read** — `file_summary` then `compact({path, query})` for the specific area. Only `read_file` when you need exact lines for `replace_string_in_file`.
37
+ 3. **Snapshot risky edits** — `checkpoint({action:'save', label:'pre-<scope>'})` before cross-cutting changes. `checkpoint({action:'restore', ...})` if `check`/`test_run` fails.
38
+ 4. **Estimate blast radius** — `blast_radius({changed_files:[...]})` BEFORE editing when changing a public/shared symbol; re-run AFTER to confirm actual impact matches.
39
+ 5. **TDD when tests exist** — write/extend the failing test first, then minimum code to pass.
40
+
41
+ ## Post-Edit Checklist
42
+
43
+ 1. `check({})` — typecheck + lint must pass clean
44
+ 2. `test_run({})` — full suite or targeted pattern
45
+ 3. If Orchestrator passed a `task_id`: `evidence_map({action:'add', task_id, claim, status:'V', receipt:'file.ts#Lxx'})` for each verified contract/acceptance claim. Do NOT run the gate — Orchestrator owns it.
46
+
33
47
  # Code Agent — Shared Base Instructions
34
48
 
35
49
  > This file contains shared protocols for all code-modifying agents (Implementer, Frontend, Refactor, Debugger). Each agent's definition file contains only its unique identity, constraints, and workflow. **Do not duplicate this content in agent files.**
@@ -44,7 +44,7 @@ You orchestrate the full development lifecycle: **planning → implementation
44
44
  ## FORGE Protocol
45
45
 
46
46
  1. `forge_classify({ task, files })` → determine tier (Floor/Standard/Critical)
47
- 2. Pass tier to subagents: `FORGE Context: Tier = {tier}. Evidence: {requirements}.`
47
+ 2. Pass tier + task_id to subagents: `FORGE Context: Tier = {tier}. Task ID = {task_id}. Evidence: {requirements}. Reviewers add CRITICAL/HIGH claims into your task_id; never create their own.`
48
48
  3. After review: `evidence_map({ action: "gate", task_id })` → YIELD/HOLD/HARD_BLOCK
49
49
  4. Auto-upgrade tier if unknowns reveal contract/security issues
50
50
 
@@ -133,7 +133,7 @@ Batch 2 (after batch 1):
133
133
  2. **Goal** — acceptance criteria, testable
134
134
  3. **Arch Context** — code snippets from `compact()`/`digest()`
135
135
  4. **Constraints** — patterns, conventions
136
- 5. **FORGE** — tier + evidence requirements
136
+ 5. **FORGE** — tier + task_id + evidence requirements (reviewers add CRITICAL/HIGH claims into your task_id; never create their own)
137
137
  6. **Self-Review** — checklist before declaring status
138
138
 
139
139
  **Subagent status protocol:** `DONE` | `DONE_WITH_CONCERNS` | `NEEDS_CONTEXT` | `BLOCKED`
@@ -145,6 +145,7 @@ Batch 2 (after batch 1):
145
145
  - Use the subagent prompt template for every dispatch so step-specific flow instructions are grounded in actual code context
146
146
 
147
147
  **Per-step review cycle:** Dispatch → Code Review (Alpha+Beta) → Arch Review (if boundary changes) → Security (if applicable) → `evidence_map` gate → **🛑 STOP — present results**
148
+ Reviewers add findings to the Orchestrator's existing `evidence_map` `task_id` and do NOT run the gate themselves.
148
149
 
149
150
  ### Flow MCP Tools
150
151
 
@@ -205,6 +206,7 @@ When subagents complete, their visual outputs (from `present`) are NOT visible t
205
206
  6. **Always use flows** — every task goes through a flow; design decisions happen in the flow's design step
206
207
  7. **Never proceed without user approval** at 🛑 stops
207
208
  8. **Max 2 retries** then escalate to user
209
+ - **Graph discovery** — when exploring relationships use `graph({action:'find_nodes', name_pattern})` then `graph({action:'neighbors', node_id})`. Never use `shortest_path` (doesn't exist).
208
210
 
209
211
  ## Delegation Enforcement
210
212