npm - @vpxa/aikit - Versions diffs - 0.1.53 → 0.1.55 - Mend

@vpxa/aikit 0.1.53 → 0.1.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/packages/server/dist/tools/graph.tool.js CHANGED Viewed

@@ -1,4 +1,23 @@
-import{getToolMeta as e}from"../tool-metadata.js";import{GraphOutputSchema as t}from"../output-schemas.js";import{z as n}from"zod";import{createLogger as r,serializeError as i}from"../../../core/dist/index.js";import{graphQuery as a}from"../../../tools/dist/index.js";const o=r(`tools`),s=n.object({id:n.string().optional().describe(`Node ID (auto-generated if omitted)`),type:n.string().describe(`Node type (entity, service, api, concept, decision)`),name:n.string().describe(`Display name`),properties:n.record(n.string(),n.unknown()).optional().describe(`Arbitrary properties`),sourceRecordId:n.string().optional().describe(`Back-link to knowledge record ID`),sourcePath:n.string().optional().describe(`Source file path`)}),c=n.object({id:n.string().optional().describe(`Edge ID (auto-generated if omitted)`),fromId:n.string().describe(`Source node ID`),toId:n.string().describe(`Target node ID`),type:n.string().describe(`Relationship type (depends-on, implements, calls, affects)`),weight:n.number().min(0).max(1).optional().describe(`Relationship weight`),properties:n.record(n.string(),n.unknown()).optional().describe(`Arbitrary properties`)});function l(r,l){let u=e(`graph`);r.registerTool(`graph`,{title:u.title,description:`Query and manage the knowledge graph. Find nodes/edges, traverse connections, add entities and relationships, detect communities, trace processes, and inspect full symbol context. The graph captures structural relationships between concepts discovered in the codebase.`,outputSchema:t,inputSchema:{action:n.enum([`find_nodes`,`find_edges`,`neighbors`,`traverse`,`stats`,`validate`,`add`,`delete`,`clear`,`detect_communities`,`set_community`,`trace_process`,`list_processes`,`delete_process`,`depth_traverse`,`cohesion`,`symbol360`]).describe(`Action: find_nodes (search nodes), find_edges (search edges), neighbors (direct connections), traverse (multi-hop), stats (graph overview), validate (check graph integrity), add (insert nodes/edges), delete (remove nodes), clear (remove all), detect_communities (find clusters), set_community (label a node), trace_process (persist execution flow), list_processes (list traced flows), delete_process (remove a flow), depth_traverse (bucket traversal by depth), cohesion (score a community), symbol360 (full node context)`),node_type:n.string().optional().describe(`Node type filter (for find_nodes)`),name_pattern:n.string().optional().describe(`Name substring match (for find_nodes)`),source_path:n.string().optional().describe(`Source path filter`),node_id:n.string().optional().describe(`Node ID (for neighbors, traverse, delete)`),edge_type:n.string().optional().describe(`Edge type filter`),from_id:n.string().optional().describe(`Source node ID (for find_edges)`),to_id:n.string().optional().describe(`Target node ID (for find_edges)`),direction:n.enum([`outgoing`,`incoming`,`both`]).default(`both`).describe(`Traversal direction`),max_depth:n.number().min(1).max(5).default(2).describe(`Max traversal depth`),limit:n.number().min(1).max(100).default(50).describe(`Max results`),nodes:n.array(s).max(500).optional().describe(`Nodes to add (for action=add)`),edges:n.array(c).max(500).optional().describe(`Edges to add (for action=add)`),community:n.string().optional().describe(`Community label (for set_community, cohesion)`),process_id:n.string().optional().describe(`Process ID (for delete_process)`),label:n.string().optional().describe(`Label for process tracing (for trace_process)`)},annotations:u.annotations},async e=>{try{let t=await a(l,{action:e.action,nodeType:e.node_type,namePattern:e.name_pattern,sourcePath:e.source_path,nodeId:e.node_id,edgeType:e.edge_type,fromId:e.from_id,toId:e.to_id,direction:e.direction,maxDepth:e.max_depth,limit:e.limit,nodes:e.nodes,edges:e.edges,community:e.community,processId:e.process_id,label:e.label}),n=[t.summary];if(t.nodes&&t.nodes.length>0){n.push(`
+import{getToolMeta as e}from"../tool-metadata.js";import{GraphOutputSchema as t}from"../output-schemas.js";import{z as n}from"zod";import{createLogger as r,serializeError as i}from"../../../core/dist/index.js";import{graphQuery as a}from"../../../tools/dist/index.js";const o=r(`tools`),s=n.object({id:n.string().optional().describe(`Node ID (auto-generated if omitted)`),type:n.string().describe(`Node type (entity, service, api, concept, decision)`),name:n.string().describe(`Display name`),properties:n.record(n.string(),n.unknown()).optional().describe(`Arbitrary properties`),sourceRecordId:n.string().optional().describe(`Back-link to knowledge record ID`),sourcePath:n.string().optional().describe(`Source file path`)}),c=n.object({id:n.string().optional().describe(`Edge ID (auto-generated if omitted)`),fromId:n.string().describe(`Source node ID`),toId:n.string().describe(`Target node ID`),type:n.string().describe(`Relationship type (depends-on, implements, calls, affects)`),weight:n.number().min(0).max(1).optional().describe(`Relationship weight`),properties:n.record(n.string(),n.unknown()).optional().describe(`Arbitrary properties`)});function l(r,l){let u=e(`graph`);r.registerTool(`graph`,{title:u.title,description:`Query the auto-populated code knowledge graph (modules, symbols, imports) to answer
+structural questions vector search cannot.
+Common flow (when you don't have a node_id yet — TWO STEPS):
+  1) Discover:  graph({action:'find_nodes', name_pattern:'<module-or-symbol>'})
+  2) Explore:   graph({action:'neighbors', node_id, direction:'incoming'|'outgoing'})
+Single-step read actions:
+  • symbol360       — full context (definition + references + imports) for a named symbol
+  • traverse        — walk N hops from a node
+  • depth_traverse  — bounded-depth traversal
+  • find_edges      — list edges matching criteria
+  • cohesion        — module cohesion score
+  • stats           — graph size/health
+  • validate        — integrity check
+Mutating actions (use deliberately): add, delete, clear, set_community,
+detect_communities, trace_process, delete_process.
+Complements: \`symbol\` (single lookup), \`trace\` (call-chain AST), \`blast_radius\` (change impact).`,outputSchema:t,inputSchema:{action:n.enum([`find_nodes`,`find_edges`,`neighbors`,`traverse`,`stats`,`validate`,`add`,`delete`,`clear`,`detect_communities`,`set_community`,`trace_process`,`list_processes`,`delete_process`,`depth_traverse`,`cohesion`,`symbol360`]).describe(`Action: find_nodes (search nodes), find_edges (search edges), neighbors (direct connections), traverse (multi-hop), stats (graph overview), validate (check graph integrity), add (insert nodes/edges), delete (remove nodes), clear (remove all), detect_communities (find clusters), set_community (label a node), trace_process (persist execution flow), list_processes (list traced flows), delete_process (remove a flow), depth_traverse (bucket traversal by depth), cohesion (score a community), symbol360 (full node context)`),node_type:n.string().optional().describe(`Node type filter (for find_nodes)`),name_pattern:n.string().optional().describe(`Name substring match (for find_nodes)`),source_path:n.string().optional().describe(`Source path filter`),node_id:n.string().optional().describe(`Node ID (for neighbors, traverse, delete)`),edge_type:n.string().optional().describe(`Edge type filter`),from_id:n.string().optional().describe(`Source node ID (for find_edges)`),to_id:n.string().optional().describe(`Target node ID (for find_edges)`),direction:n.enum([`outgoing`,`incoming`,`both`]).default(`both`).describe(`Traversal direction`),max_depth:n.number().min(1).max(5).default(2).describe(`Max traversal depth`),limit:n.number().min(1).max(100).default(50).describe(`Max results`),nodes:n.array(s).max(500).optional().describe(`Nodes to add (for action=add)`),edges:n.array(c).max(500).optional().describe(`Edges to add (for action=add)`),community:n.string().optional().describe(`Community label (for set_community, cohesion)`),process_id:n.string().optional().describe(`Process ID (for delete_process)`),label:n.string().optional().describe(`Label for process tracing (for trace_process)`)},annotations:u.annotations},async e=>{try{let t=await a(l,{action:e.action,nodeType:e.node_type,namePattern:e.name_pattern,sourcePath:e.source_path,nodeId:e.node_id,edgeType:e.edge_type,fromId:e.from_id,toId:e.to_id,direction:e.direction,maxDepth:e.max_depth,limit:e.limit,nodes:e.nodes,edges:e.edges,community:e.community,processId:e.process_id,label:e.label}),n=[t.summary];if(t.nodes&&t.nodes.length>0){n.push(`
 ### Nodes`);for(let e of t.nodes){let t=Object.keys(e.properties).length>0?` — ${JSON.stringify(e.properties)}`:``;n.push(`- **${e.name}** (${e.type}, id: \`${e.id}\`)${t}`)}}if(t.edges&&t.edges.length>0){n.push(`
 ### Edges`);for(let e of t.edges)n.push(`- \`${e.fromId}\` —[${e.type}]→ \`${e.toId}\`${e.weight===1?``:` (weight: ${e.weight})`}`)}if(t.stats&&(n.push(`\nNode types: ${JSON.stringify(t.stats.nodeTypes)}`),n.push(`Edge types: ${JSON.stringify(t.stats.edgeTypes)}`)),t.validation){if(n.push(`
 ### Validation`),n.push(`- **Valid**: ${t.validation.valid?`yes`:`no`}`),t.validation.danglingEdges.length>0){n.push(`- **Dangling edges**:`);for(let e of t.validation.danglingEdges)n.push(`  - \`${e.edgeId}\` references missing node \`${e.missingNodeId}\``)}t.validation.orphanNodes.length>0&&n.push(`- **Orphan nodes**: ${t.validation.orphanNodes.map(e=>`\`${e}\``).join(`, `)}`)}if(t.communities){n.push(`

package/packages/server/dist/tools/status.tool.js CHANGED Viewed

@@ -1,3 +1,3 @@
 import{getGcStatus as e}from"../auto-gc.js";import{getToolTelemetry as t}from"../replay-interceptor.js";import{getToolMeta as n}from"../tool-metadata.js";import{StatusOutputSchema as r}from"../output-schemas.js";import{autoUpgradeScaffold as i,getCurrentVersion as a,getUpgradeState as o}from"../version-check.js";import{existsSync as s,readFileSync as c,statSync as l}from"node:fs";import{resolve as u}from"node:path";import{homedir as d}from"node:os";import{AIKIT_PATHS as f,createLogger as p,serializeError as m}from"../../../core/dist/index.js";import{WasmRuntime as h}from"../../../chunker/dist/index.js";const g=p(`tools`);function _(e,t,n,r=15e3){let i,a=new Promise(e=>{i=setTimeout(()=>{g.warn(`Status sub-operation "${n}" timed out after ${r}ms`),e({value:t,timedOut:!0})},r)});return Promise.race([e.then(e=>(clearTimeout(i),{value:e,timedOut:!1}),e=>(clearTimeout(i),g.warn(`Status sub-operation "${n}" failed: ${e instanceof Error?e.message:String(e)}`),{value:t,timedOut:!1})),a])}const v=5*6e4;let y=null,b=null;function x(e,t,n,r){let i=Math.min(e/2e4,1),a=Math.min((t+n)/5e4,1),o=Math.min(r/200,1);return Math.round(i*40+a*35+o*25)}function S(){let e=Date.now();if(y&&e-y.ts<v)return y.value;try{let t=u(d(),`.copilot`,`.aikit-scaffold.json`);if(!s(t))return y={value:null,ts:e},null;let n=JSON.parse(c(t,`utf-8`)).version??null;return y={value:n,ts:e},n}catch{return y={value:null,ts:e},null}}function C(){let e=Date.now();if(b&&e-b.ts<v)return b.value;try{let t=u(process.cwd(),`.github`,`.aikit-scaffold.json`);if(!s(t))return b={value:null,ts:e},null;let n=JSON.parse(c(t,`utf-8`)).version??null;return b={value:n,ts:e},n}catch{return b={value:null,ts:e},null}}function w(e){let t=n(`status`);e.registerTool(`status`,{title:t.title,description:`Get the current status and statistics of the AI Kit index.`,outputSchema:r,annotations:t.annotations},async()=>{let e=a(),t=S(),n=C(),r=t!=null&&t!==e,s=n!=null&&n!==e,c=[`## AI Kit Status`,``,`⏳ **AI Kit is initializing** — index stats will be available shortly.`,``,`### Runtime`,`- **Tree-sitter (WASM)**: ${h.get()?`✅ Available (AST analysis)`:`⚠ Unavailable (regex fallback)`}`,``,`### Version`,`- **Server**: ${e}`,`- **Scaffold (user)**: ${t??`not installed`}`,`- **Scaffold (workspace)**: ${n??`not installed`}`];if(r||s){let a=o(),l=[];r&&l.push(`user scaffold v${t}`),s&&l.push(`workspace scaffold v${n}`);let u=l.join(`, `);a.state===`success`?c.push(``,`### ✅ Upgrade Applied`,`- Server v${e} — ${u} auto-upgraded successfully.`,`- _Restart the MCP server to use the updated version._`):a.state===`pending`?c.push(``,`### ⏳ Upgrade In Progress`,`- Server v${e} ≠ ${u}`,`- Auto-upgrade is running in the background…`):a.state===`failed`?(i(),c.push(``,`### ⬆ Upgrade Available (auto-upgrade failed, retrying)`,`- Server v${e} ≠ ${u}`,`- Error: ${a.error??`unknown`}`)):(i(),c.push(``,`### ⬆ Upgrade Available`,`- Server v${e} ≠ ${u}`,`- Auto-upgrade triggered — check again shortly.`))}let l={totalRecords:0,totalFiles:0,lastIndexedAt:null,onboarded:!1,onboardDir:``,contentTypes:{},wasmAvailable:!!h.get(),graphStats:null,curatedCount:0,serverVersion:e,scaffoldVersion:t??null,workspaceScaffoldVersion:n??null,upgradeAvailable:r||s,contextPressure:0};return{content:[{type:`text`,text:c.join(`
 `)}],structuredContent:l}})}function T(c,d,p,v,y,b,w,T){let E=n(`status`);c.registerTool(`status`,{title:E.title,description:`Get the current status and statistics of the AI Kit index.`,outputSchema:r,annotations:E.annotations},async()=>{let n=[];try{let[r,c]=await Promise.all([_(d.getStats(),{totalRecords:0,totalFiles:0,lastIndexedAt:null,contentTypeBreakdown:{}},`store.getStats`),_(d.listSourcePaths(),[],`store.listSourcePaths`)]),m=r.value;r.timedOut&&n.push(`⚠ Index stats timed out — values may be incomplete`);let g=c.value;c.timedOut&&n.push(`⚠ File listing timed out`);let E=null,D=0,O=[`## AI Kit Status`,``,`- **Total Records**: ${m.totalRecords}`,`- **Total Files**: ${m.totalFiles}`,`- **Last Indexed**: ${m.lastIndexedAt??`Never`}`,``,`### Content Types`,...Object.entries(m.contentTypeBreakdown).map(([e,t])=>`- ${e}: ${t}`),``,`### Indexed Files`,...g.slice(0,50).map(e=>`- ${e}`),g.length>50?`\n... and ${g.length-50} more files`:``];if(p)try{let e=await _(p.getStats(),{nodeCount:0,edgeCount:0,nodeTypes:{},edgeTypes:{}},`graphStore.getStats`);if(e.timedOut)n.push(`⚠ Graph stats timed out`),O.push(``,`### Knowledge Graph`,`- Graph stats timed out`);else{let t=e.value;E={nodes:t.nodeCount,edges:t.edgeCount},O.push(``,`### Knowledge Graph`,`- **Nodes**: ${t.nodeCount}`,`- **Edges**: ${t.edgeCount}`,...Object.entries(t.nodeTypes).map(([e,t])=>`  - ${e}: ${t}`));try{let e=await _(p.validate(),{valid:!0,danglingEdges:[],orphanNodes:[],stats:{nodeCount:0,edgeCount:0,nodeTypes:{},edgeTypes:{}}},`graphStore.validate`);if(!e.timedOut){let t=e.value;t.valid||O.push(`- **⚠ Integrity Issues**: ${t.danglingEdges.length} dangling edges`),t.orphanNodes.length>0&&O.push(`- **Orphan nodes**: ${t.orphanNodes.length}`)}}catch{}}}catch{O.push(``,`### Knowledge Graph`,`- Graph store unavailable`)}let k=b?.onboardDir??u(process.cwd(),f.aiContext),A=s(k),j=y?.onboardComplete||A;if(O.push(``,`### Onboard Status`,j?`- ✅ Complete${y?.onboardTimestamp?` (last: ${y.onboardTimestamp})`:``}`:'- ❌ Not run — call `onboard({ path: "." })` to analyze the codebase',`- **Onboard Directory**: \`${k}\``),v)try{let e=await _(v.list(),[],`curated.list`);if(e.timedOut)n.push(`⚠ Curated knowledge listing timed out`),O.push(``,`### Curated Knowledge`,`- Listing timed out`);else{let t=e.value;D=t.length,O.push(``,`### Curated Knowledge`,t.length>0?`- ${t.length} entries`:"- Empty — use `remember()` to persist decisions")}}catch{O.push(``,`### Curated Knowledge`,`- Unable to read curated entries`)}let M=x(m.totalRecords,E?.nodes??0,E?.edges??0,D);O.push(``),O.push(`## 📊 Context Pressure: ${M}/100`),M>=80?O.push(`⚠️ High pressure — consider pruning stale entries or compacting context.`):M>=50?O.push(`ℹ️ Moderate pressure — knowledge base is well-populated.`):O.push(`✅ Low pressure — plenty of headroom for more content.`);let N=0;if(m.lastIndexedAt){N=new Date(m.lastIndexedAt).getTime();let e=(Date.now()-N)/(1e3*60*60);O.push(``,`### Index Freshness`,e>24?w===`smart`?`- ⚠ Last indexed ${Math.floor(e)}h ago — smart indexing will refresh automatically`:`- ⚠ Last indexed ${Math.floor(e)}h ago — may be stale. Run \`reindex({})\``:`- ✅ Last indexed ${e<1?`less than 1h`:`${Math.floor(e)}h`} ago`)}if(w===`smart`)if(O.push(``,`### Smart Indexing`),T){let e=T();e?O.push(`- **Mode**: Smart (trickle)`,`- **Status**: ${e.running?`✅ Running`:`⏸ Stopped`}`,`- **Queue**: ${e.queueSize} files pending`,`- **Changed files**: ${e.changedFilesSize} detected`,`- **Interval**: ${Math.round(e.intervalMs/1e3)}s per batch of ${e.batchSize}`):O.push(`- **Mode**: Smart (trickle)`,`- **Status**: scheduler state unavailable (init may have failed)`)}else O.push(`- **Mode**: Smart (trickle) — scheduler state unavailable`);{try{let e=u(process.cwd(),f.data,`stash`);if(s(e)){let t=l(e).mtimeMs;t>N&&(N=t)}}catch{}let e=[];if(v)try{let t=D>0?await v.list():[];for(let e of t){let t=new Date(e.updated||e.created).getTime();t>N&&(N=t)}e.push(...t.sort((e,t)=>new Date(t.updated).getTime()-new Date(e.updated).getTime()).slice(0,5))}catch{}let t=N>0?Date.now()-N:0;if(t>=144e5){let n=Math.floor(t/36e5);if(O.push(``,`### 🌅 Session Briefing`,`_${n}+ hours since last activity — here's what to pick up:_`,``),e.length>0){O.push(`**Recent decisions/notes:**`);for(let t of e)O.push(`- **${t.title}** (${t.category??`note`}) — ${(t.contentPreview??``).slice(0,80)}…`)}O.push(``,`**Suggested next steps:**`,'- `search({ query: "SESSION CHECKPOINT", origin: "curated" })` — find your last checkpoint',"- `restore({})` — resume from a saved checkpoint","- `list()` — browse all stored knowledge")}}O.push(``,`### Runtime`,`- **Tree-sitter (WASM)**: ${h.get()?`✅ Available (AST analysis)`:`⚠ Unavailable (regex fallback)`}`);let P=S(),F=C(),I=a(),L=P!=null&&P!==I,R=F!=null&&F!==I;if(L||R){let e=o(),t=[];L&&t.push(`user scaffold v${P}`),R&&t.push(`workspace scaffold v${F}`);let n=t.join(`, `);e.state===`success`?O.push(``,`### ✅ Upgrade Applied`,`- Server v${I} — ${n} auto-upgraded successfully.`,`- _Restart the MCP server to use the updated version._`):e.state===`pending`?O.push(``,`### ⏳ Upgrade In Progress`,`- Server v${I} ≠ ${n}`,`- Auto-upgrade is running in the background…`):e.state===`failed`?(i(),O.push(``,`### ⬆ Upgrade Available (auto-upgrade failed, retrying)`,`- Server v${I} ≠ ${n}`,`- Error: ${e.error??`unknown`}`)):(i(),O.push(``,`### ⬆ Upgrade Available`,`- Server v${I} ≠ ${n}`,`- Auto-upgrade triggered — check again shortly.`))}n.length>0&&O.push(``,`### ⚠ Warnings`,...n.map(e=>`- ${e}`));let z=t();if(z.length>0){let e=z.sort((e,t)=>t.callCount-e.callCount);O.push(``,`### Tool Usage This Session`,``),O.push(`| Tool | Calls | Tokens In | Tokens Out | Errors | Avg Latency |`),O.push(`|------|-------|-----------|------------|--------|-------------|`);for(let t of e.slice(0,15)){let e=Math.round(t.totalInputChars/4),n=Math.round(t.totalOutputChars/4),r=Math.round(t.totalDurationMs/t.callCount);O.push(`| ${t.tool} | ${t.callCount} | ${e.toLocaleString()} | ${n.toLocaleString()} | ${t.errorCount} | ${r}ms |`)}}let B=e();if(B.bufferSize>=10){let e=B.state===`healthy`?`🟢`:B.state===`degraded`?`🔴`:`🟡`;O.push(``,`### Auto-GC: ${e} ${B.state}`),O.push(`- p95 latency: ${B.p95}ms | buffer: ${B.bufferSize} samples`),B.gcCount>0&&O.push(`- GC cycles triggered: ${B.gcCount}`)}let V=O.join(`
-`),H={totalRecords:m.totalRecords,totalFiles:m.totalFiles,lastIndexedAt:m.lastIndexedAt??null,onboarded:j,onboardDir:k,contentTypes:m.contentTypeBreakdown,wasmAvailable:!!h.get(),graphStats:E,curatedCount:D,serverVersion:I,scaffoldVersion:P??null,workspaceScaffoldVersion:F??null,upgradeAvailable:L||R,contextPressure:M};return{content:[{type:`text`,text:V+(w===`smart`?"\n\n---\n_Next: Use `search` to query indexed content or `graph(stats)` to explore the knowledge graph. Smart indexing handles updates automatically._":"\n\n---\n_Next: Use `search` to query indexed content, `graph(stats)` to explore the knowledge graph, or `reindex` to refresh the index._")}],structuredContent:H}}catch(e){return g.error(`Status failed`,m(e)),{content:[{type:`text`,text:`Status check failed: ${e instanceof Error?e.message:String(e)}`}],isError:!0}}})}export{S as getScaffoldVersion,C as getWorkspaceScaffoldVersion,w as registerEarlyStatusTool,T as registerStatusTool};
+`),H={totalRecords:m.totalRecords,totalFiles:m.totalFiles,lastIndexedAt:m.lastIndexedAt??null,onboarded:j,onboardDir:k,contentTypes:m.contentTypeBreakdown,wasmAvailable:!!h.get(),graphStats:E,curatedCount:D,serverVersion:I,scaffoldVersion:P??null,workspaceScaffoldVersion:F??null,upgradeAvailable:L||R,contextPressure:M};return{content:[{type:`text`,text:V+(w===`smart`?"\n\n---\n_Next: Use `search` to query indexed content or `graph({action:'find_nodes', name_pattern:'<top-level-module>'})` → then `graph({action:'neighbors', node_id})` for relationships. Smart indexing handles updates automatically._":"\n\n---\n_Next: Use `search` to query indexed content, `graph({action:'find_nodes', name_pattern:'<top-level-module>'})` → then `graph({action:'neighbors', node_id})` for relationships, or `reindex` to refresh the index._")}],structuredContent:H}}catch(e){return g.error(`Status failed`,m(e)),{content:[{type:`text`,text:`Status check failed: ${e instanceof Error?e.message:String(e)}`}],isError:!0}}})}export{S as getScaffoldVersion,C as getWorkspaceScaffoldVersion,w as registerEarlyStatusTool,T as registerStatusTool};

package/scaffold/adapters/copilot.mjs CHANGED Viewed

@@ -121,6 +121,7 @@ function generateVariantAgent(roleName, suffix, def) {
     def.sharedBase && PROTOCOLS[def.sharedBase] ? `\n\n${PROTOCOLS[def.sharedBase]}` : '';
   const extra = def.extraBody ? `\n\n${def.extraBody}` : '';
+  const variantAddendum = varDef.bodyAddendum ? `\n\n${varDef.bodyAddendum}` : '';
   const skillsSection = def.skills?.length
     ? `\n\n## Skills (load on demand)\n\n| Skill | When to load |\n|-------|--------------|\n${def.skills.map(([s, w]) => `| ${s} | ${w} |`).join('\n')}`
@@ -137,7 +138,7 @@ model: ${model}
 # ${fullName} - ${title}
 You are **${fullName}**${identity}${extra}
-${sharedContent}${skillsSection}
+${sharedContent}${variantAddendum}${skillsSection}
 ${FLOWS_SECTION}
 `;

package/scaffold/definitions/agents.mjs CHANGED Viewed

@@ -155,22 +155,62 @@ export const AGENTS = {
         description: 'Primary deep research agent — also serves as default Researcher',
         identity:
           ', the primary deep research agent. During multi-model decision sessions, you provide deep reasoning and nuanced system design.',
+        bodyAddendum: `## Required Output Section — \`## Depth Analysis\`
+Your final report MUST contain a \`## Depth Analysis\` section with:
+- Deep-dive into ONE chosen subsystem (most structurally central to the question)
+- Full evidence chain: file:line citations for every structural claim
+- At least 2 \`compact\`/\`file_summary\` extracts woven into the narrative
+You are the DEFAULT researcher. When the Orchestrator needs breadth + depth, they
+dispatch you alone. Your lens: thorough, evidence-first, exhaustive.`,
       },
       Beta: {
         description:
           'Research variant — pragmatic analysis with focus on trade-offs and edge cases',
         identity:
           ', a variant of the Researcher agent optimized for **pragmatic analysis**. Focus on trade-offs, edge cases, and practical constraints. Challenge assumptions and highlight risks the primary researcher may overlook.',
+        bodyAddendum: `## Required Output Section — \`## Failure Modes & Counter-Evidence\`
+Your final report MUST contain a \`## Failure Modes & Counter-Evidence\` section with:
+- At least 3 adversarial claims challenging your own primary finding
+- For each counter-claim: the condition under which it would be TRUE, and the
+  evidence (file:line or search receipt) that currently falsifies it
+- Any unresolved counter-evidence flagged as \`⚠ UNRESOLVED\`
+Your lens: pragmatic skepticism. Mark competing claims as \`A\` (Assumed) by default;
+challenge before promoting to \`V\`.`,
       },
       Gamma: {
         description: 'Research variant — broad pattern matching across domains and technologies',
         identity:
           ', a variant of the Researcher agent optimized for **cross-domain pattern matching**. Draw connections from other domains, frameworks, and industries. Bring breadth where Alpha brings depth.',
+        bodyAddendum: `## Required Output Section — \`## Cross-Domain Analogies\`
+Your final report MUST contain a \`## Cross-Domain Analogies\` section with:
+- At least 2 patterns from other tools/frameworks/domains that apply to the question
+- For each: the external source (cite via \`web_search\` or \`web_fetch\` receipt) and
+  how it maps to our codebase
+- One "missing pattern we should adopt" recommendation
+Your lens: cross-domain pattern matching. Weight \`web_search\` + \`web_fetch\` higher
+than peers. Assume the LLM's training data is stale — verify with fresh searches.`,
       },
       Delta: {
         description: 'Research variant — implementation feasibility and performance implications',
         identity:
           ', a variant of the Researcher agent optimized for **implementation feasibility**. Focus on performance implications, scaling concerns, and concrete implementation paths. Ground theoretical proposals in practical reality.',
+        bodyAddendum: `## Required Output Section — \`## Implementation Cost & Feasibility\`
+Your final report MUST contain a \`## Implementation Cost & Feasibility\` section with:
+- Complexity snapshot: you MUST call \`measure({ path })\` on any file ≥ 50 LOC in the
+  target subsystem at least once and quote the \`cognitiveComplexity\` result
+- Blast radius estimate: \`blast_radius({ changed_files })\` on the proposed edits
+- Time/risk table: | Change | Lines | Risk | Effort |
+- Feasibility verdict: SAFE / RISKY / INFEASIBLE with one-line justification
+Your lens: implementation feasibility. Prefer \`measure\` + \`blast_radius\` + \`analyze_patterns\`
+over abstract reasoning.`,
       },
     },
   },

package/scaffold/definitions/bodies.mjs CHANGED Viewed

@@ -27,7 +27,7 @@ ${agentTable}
 ## FORGE Protocol
 1. \`forge_classify({ task, files })\` → determine tier (Floor/Standard/Critical)
-2. Pass tier to subagents: \`FORGE Context: Tier = {tier}. Evidence: {requirements}.\`
+2. Pass tier + task_id to subagents: \`FORGE Context: Tier = {tier}. Task ID = {task_id}. Evidence: {requirements}. Reviewers add CRITICAL/HIGH claims into your task_id; never create their own.\`
 3. After review: \`evidence_map({ action: "gate", task_id })\` → YIELD/HOLD/HARD_BLOCK
 4. Auto-upgrade tier if unknowns reveal contract/security issues
@@ -116,7 +116,7 @@ Batch 2 (after batch 1):
 2. **Goal** — acceptance criteria, testable
 3. **Arch Context** — code snippets from \`compact()\`/\`digest()\`
 4. **Constraints** — patterns, conventions
-5. **FORGE** — tier + evidence requirements
+5. **FORGE** — tier + task_id + evidence requirements (reviewers add CRITICAL/HIGH claims into your task_id; never create their own)
 6. **Self-Review** — checklist before declaring status
 **Subagent status protocol:** \`DONE\` | \`DONE_WITH_CONCERNS\` | \`NEEDS_CONTEXT\` | \`BLOCKED\`
@@ -128,6 +128,7 @@ Batch 2 (after batch 1):
 - Use the subagent prompt template for every dispatch so step-specific flow instructions are grounded in actual code context
 **Per-step review cycle:** Dispatch → Code Review (Alpha+Beta) → Arch Review (if boundary changes) → Security (if applicable) → \`evidence_map\` gate → **🛑 STOP — present results**
+Reviewers add findings to the Orchestrator's existing \`evidence_map\` \`task_id\` and do NOT run the gate themselves.
 ### Flow MCP Tools
@@ -188,6 +189,7 @@ When subagents complete, their visual outputs (from \`present\`) are NOT visible
 6. **Always use flows** — every task goes through a flow; design decisions happen in the flow's design step
 7. **Never proceed without user approval** at 🛑 stops
 8. **Max 2 retries** then escalate to user
+- **Graph discovery** — when exploring relationships use \`graph({action:'find_nodes', name_pattern})\` then \`graph({action:'neighbors', node_id})\`. Never use \`shortest_path\` (doesn't exist).
 ## Delegation Enforcement
@@ -352,7 +354,21 @@ When subagents complete, their visual outputs (from \`present\`) are NOT visible
 - **Never modify tests to make them pass** — Fix the implementation instead
 - **Run \`check\` after every change** — Catch errors early
 - **Loop-break** — If the same test fails 3 times with the same error after your fixes, STOP. Re-read the error from scratch, check your assumptions with \`trace\` or \`symbol\`, and try a fundamentally different approach. Do not attempt a 4th fix in the same direction
-- **Think-first for complex tasks** — If a task involves 3+ files or non-obvious logic, outline your approach before writing code. Check existing patterns with \`search\` first. Design, then implement`,
+- **Think-first for complex tasks** — If a task involves 3+ files or non-obvious logic, outline your approach before writing code. Check existing patterns with \`search\` first. Design, then implement
+## Pre-Edit Checklist (before modifying any file)
+1. **Understand consumers** — \`graph({action:'find_nodes', name_pattern:'<target>'})\` → \`graph({action:'neighbors', node_id, direction:'incoming'})\`. See who calls/imports before changing a contract.
+2. **Compress, don't raw-read** — \`file_summary\` then \`compact({path, query})\` for the specific area. Only \`read_file\` when you need exact lines for \`replace_string_in_file\`.
+3. **Snapshot risky edits** — \`checkpoint({action:'save', label:'pre-<scope>'})\` before cross-cutting changes. \`checkpoint({action:'restore', ...})\` if \`check\`/\`test_run\` fails.
+4. **Estimate blast radius** — \`blast_radius({changed_files:[...]})\` BEFORE editing when changing a public/shared symbol; re-run AFTER to confirm actual impact matches.
+5. **TDD when tests exist** — write/extend the failing test first, then minimum code to pass.
+## Post-Edit Checklist
+1. \`check({})\` — typecheck + lint must pass clean
+2. \`test_run({})\` — full suite or targeted pattern
+3. If Orchestrator passed a \`task_id\`: \`evidence_map({action:'add', task_id, claim, status:'V', receipt:'file.ts#Lxx'})\` for each verified contract/acceptance claim. Do NOT run the gate — Orchestrator owns it.`,
   Frontend: `**Read \`AGENTS.md\`** in the workspace root for project conventions and AI Kit protocol.
@@ -369,7 +385,38 @@ When subagents complete, their visual outputs (from \`present\`) are NOT visible
 - **Accessibility first** — ARIA attributes, keyboard navigation, screen reader support
 - **Follow design system** — Use existing tokens, don't create one-off values
 - **Responsive by default** — Mobile-first, test all breakpoints
-- **Test-first** — Component tests before implementation`,
+- **Test-first** — Component tests before implementation
+## Frontend Exploration Mode
+| Need | Tool |
+|------|------|
+| Component dependency graph | \`graph({action:'neighbors', node_id:'src/components/X.tsx', direction:'incoming'})\` |
+| Stale / unused components | \`dead_symbols({ path:'src/components' })\` |
+| React / a11y / library API research | \`web_search({ query })\`, \`web_fetch({ urls })\` |
+| Component complexity hotspots | \`measure({ path:'src/components' })\` |
+| Verify a component's callers | \`graph({action:'find_nodes', name_pattern})\` → \`neighbors\` |
+## Visual Validation Protocol (post \`test_run\`)
+**Pre-flight (MANDATORY before any browser step):**
+1. Read \`package.json\` scripts — identify dev command (e.g. \`dev\`, \`start\`, \`vite\`)
+2. Determine default port (check script args, \`vite.config.*\`, or env)
+3. Check if dev server already running on port (attempt \`http({ url:'http://localhost:<port>' })\`)
+4. If NOT running, delegate to a helper or use \`createAndRunTask\` to start \`npm run dev\`
+   in the background; wait for ready signal
+5. Capture the base URL
+**Validation:**
+6. \`open_browser_page({ url })\` — render target component page
+7. \`screenshot_page\` + \`read_page\` — capture visual + DOM
+8. Keyboard-only navigation check: simulate Tab/Enter/Escape via \`type_in_page\` —
+   verify focus ring, activation, dismiss
+9. Compare against design tokens / Figma URL if supplied
+10. Fail fast if color contrast < 4.5:1 (WCAG AA) or focus indicator missing
+If the pre-flight dev server cannot be started (e.g. sandbox), fall back to
+\`compact\` inspection of the component source + describe expected visual behavior.`,
   Debugger: `**Read \`AGENTS.md\`** in the workspace root for project conventions and AI Kit protocol.
@@ -411,6 +458,27 @@ When subagents complete, their visual outputs (from \`present\`) are NOT visible
 - **Follow existing patterns** — Consolidate toward established conventions
 - **Don't refactor what isn't asked** — Scope discipline
+## Reversible Refactor Protocol
+Refactors modify the canonical source, so use \`checkpoint\` (NOT \`lane\`) for safety:
+1. **Before starting:** \`checkpoint({ action:'save', label:'pre-refactor-<scope>' })\`
+   — captures a snapshot of the relevant files
+2. **Baseline metrics:** \`measure({ path })\` on target files — record
+   \`cognitiveComplexity\` values BEFORE refactor
+3. **Apply changes** — use \`rename({ old, new })\` for symbol rename (dry_run first),
+   or \`codemod({ pattern, replacement })\` for structural transforms (dry_run first).
+   Never hand-edit what \`rename\`/\`codemod\` can do safely.
+4. **Verify:** \`check({})\` + \`test_run({})\` must both pass with zero new failures
+5. **Post-metrics:** \`measure({ path })\` again — confirm cognitive complexity
+   delta is negative (or justify if zero)
+6. **If validation fails:** \`checkpoint({ action:'restore', label:'pre-refactor-<scope>' })\`
+For multi-approach uncertainty (A vs B), do NOT create lanes. Instead:
+- Delegate to \`Researcher-Delta\` with a feasibility question — they can use \`lane\`
+  for read-only exploration and return a recommendation
+- You then apply the winning approach under the checkpoint protocol above
 ## Skills (load on demand)
 | Skill | When to load |

package/scaffold/definitions/protocols.mjs CHANGED Viewed

@@ -462,6 +462,16 @@ When invoked for a decision analysis, you receive a specific question. You MUST:
 - **\`digest\`** when synthesizing from 3+ sources
 - **\`stratum_card\`** for files you'll reference repeatedly
 - **\`read_file\` is ONLY acceptable** when you need exact lines for a pending edit operation
+## Parallel Exploration via \`lane\`
+For questions that require trying approach A vs approach B in isolation:
+1. \`lane({ action:'create', name:'approach-a' })\` — isolated file copies
+2. Apply approach A mentally; record observations
+3. \`lane({ action:'create', name:'approach-b' })\` — second isolate
+4. Apply approach B mentally; record observations
+5. \`lane({ action:'diff', names:['approach-a','approach-b'] })\` — compare
+6. Include the diff summary in your output; do NOT merge lanes back (read-only role)
 `,
   'code-reviewer-base': `# Code-Reviewer — Shared Base Instructions
@@ -527,6 +537,29 @@ Follow the **MANDATORY FIRST ACTION** and **Information Lookup Order** from code
 - **NEEDS_REVISION** for any HIGH finding
 - **FAILED** for any CRITICAL finding
 - Always check for **test coverage** on new/changed code
+## Evidence Citation Protocol (tier-aware)
+The Orchestrator runs \`forge_classify\` before dispatching you, and runs the final
+\`evidence_map({action:'gate', task_id})\` after you respond. **Do not create your own
+task_id or run the gate** — feed into the Orchestrator's existing evidence map.
+| Tier | Your responsibility |
+|------|---------------------|
+| Floor    | Free-form findings with \`file.ts#Lxx\` citations. No \`evidence_map\` calls required. |
+| Standard | For every CRITICAL or HIGH finding: \`evidence_map({action:'add', task_id, claim, status:'V', receipt:'file.ts#Lxx'})\`. Max 2-4 adds to keep signal high. |
+| Critical | Structured claims for all CRITICAL/HIGH findings (2-4 Verified + receipts) AND tag contract/security claims with \`safety_gate:'commitment'\` or \`safety_gate:'provenance'\`. |
+**Every response MUST include:**
+- \`**FORGE Task ID:** <task_id>\` (passed in by Orchestrator, or state "not provided")
+- \`**Tier applied:** Floor | Standard | Critical\`
+- \`**Findings:** <list>\` with \`file:line\` receipts
+- Verdict: \`APPROVED\` | \`CHANGES_REQUESTED\` | \`BLOCKED\`
+Do NOT:
+- Create a new \`evidence_map\` (the Orchestrator already did)
+- Run \`evidence_map({action:'gate'})\` yourself — the Orchestrator owns the gate
+- Duplicate findings into the map that weren't CRITICAL/HIGH
 `,
   'architect-reviewer-base': `# Architect-Reviewer — Shared Base Instructions
@@ -588,6 +621,50 @@ Follow the **MANDATORY FIRST ACTION** and **Information Lookup Order** from code
 - **NEEDS_CHANGES** — Fixable structural issues
 - **BLOCKED** — Fundamental design flaw requiring rethink
 - Always validate **dependency direction** — inner layers must not depend on outer
+## Evidence Citation Protocol (tier-aware)
+The Orchestrator runs \`forge_classify\` before dispatching you, and runs the final
+\`evidence_map({action:'gate', task_id})\` after you respond. **Do not create your own
+task_id or run the gate** — feed into the Orchestrator's existing evidence map.
+| Tier | Your responsibility |
+|------|---------------------|
+| Floor    | Free-form findings with \`file.ts#Lxx\` citations. No \`evidence_map\` calls required. |
+| Standard | For every CRITICAL or HIGH finding: \`evidence_map({action:'add', task_id, claim, status:'V', receipt:'file.ts#Lxx'})\`. Max 2-4 adds to keep signal high. |
+| Critical | Structured claims for all CRITICAL/HIGH findings (2-4 Verified + receipts) AND tag contract/security claims with \`safety_gate:'commitment'\` or \`safety_gate:'provenance'\`. |
+**Every response MUST include:**
+- \`**FORGE Task ID:** <task_id>\` (passed in by Orchestrator, or state "not provided")
+- \`**Tier applied:** Floor | Standard | Critical\`
+- \`**Findings:** <list>\` with \`file:line\` receipts
+- Verdict: \`APPROVED\` | \`CHANGES_REQUESTED\` | \`BLOCKED\`
+Do NOT:
+- Create a new \`evidence_map\` (the Orchestrator already did)
+- Run \`evidence_map({action:'gate'})\` yourself — the Orchestrator owns the gate
+- Duplicate findings into the map that weren't CRITICAL/HIGH
+## Graph-Assisted Layer Verification
+For each significantly changed module (from \`blast_radius\` or changed_files input):
+1. **Discover node**: \`graph({action:'find_nodes', name_pattern:'<module-path>'})\` → get node_id
+2. **Incoming dependencies** (who depends on this?):
+  \`graph({action:'neighbors', node_id, direction:'incoming'})\`
+  — flag any caller that violates layering rules (e.g. a \`core/\` module that gets imported by \`infra/\`)
+3. **Outgoing dependencies** (what does it depend on?):
+  \`graph({action:'neighbors', node_id, direction:'outgoing'})\`
+  — flag any target that violates direction (e.g. domain importing from infra)
+4. **Isolation check** (modules that should NOT be connected):
+  \`graph({action:'depth_traverse', node_id, max_depth:3})\`
+  — verify no path reaches modules in forbidden directories
+Cite each layer violation as a CRITICAL finding with \`file:line\` receipt, and add it
+to the Evidence Map per the tier protocol above.
+**Do NOT use \`shortest_path\`** — that action does not exist. Use \`depth_traverse\`
+or repeated \`neighbors\` calls.
 `,
   'decision-protocol': `# Multi-Model Decision Protocol

package/scaffold/general/agents/Architect-Reviewer-Alpha.agent.md CHANGED Viewed

@@ -72,6 +72,50 @@ Follow the **MANDATORY FIRST ACTION** and **Information Lookup Order** from code
 - **BLOCKED** — Fundamental design flaw requiring rethink
 - Always validate **dependency direction** — inner layers must not depend on outer
+## Evidence Citation Protocol (tier-aware)
+The Orchestrator runs `forge_classify` before dispatching you, and runs the final
+`evidence_map({action:'gate', task_id})` after you respond. **Do not create your own
+task_id or run the gate** — feed into the Orchestrator's existing evidence map.
+| Tier | Your responsibility |
+|------|---------------------|
+| Floor    | Free-form findings with `file.ts#Lxx` citations. No `evidence_map` calls required. |
+| Standard | For every CRITICAL or HIGH finding: `evidence_map({action:'add', task_id, claim, status:'V', receipt:'file.ts#Lxx'})`. Max 2-4 adds to keep signal high. |
+| Critical | Structured claims for all CRITICAL/HIGH findings (2-4 Verified + receipts) AND tag contract/security claims with `safety_gate:'commitment'` or `safety_gate:'provenance'`. |
+**Every response MUST include:**
+- `**FORGE Task ID:** <task_id>` (passed in by Orchestrator, or state "not provided")
+- `**Tier applied:** Floor | Standard | Critical`
+- `**Findings:** <list>` with `file:line` receipts
+- Verdict: `APPROVED` | `CHANGES_REQUESTED` | `BLOCKED`
+Do NOT:
+- Create a new `evidence_map` (the Orchestrator already did)
+- Run `evidence_map({action:'gate'})` yourself — the Orchestrator owns the gate
+- Duplicate findings into the map that weren't CRITICAL/HIGH
+## Graph-Assisted Layer Verification
+For each significantly changed module (from `blast_radius` or changed_files input):
+1. **Discover node**: `graph({action:'find_nodes', name_pattern:'<module-path>'})` → get node_id
+2. **Incoming dependencies** (who depends on this?):
+  `graph({action:'neighbors', node_id, direction:'incoming'})`
+  — flag any caller that violates layering rules (e.g. a `core/` module that gets imported by `infra/`)
+3. **Outgoing dependencies** (what does it depend on?):
+  `graph({action:'neighbors', node_id, direction:'outgoing'})`
+  — flag any target that violates direction (e.g. domain importing from infra)
+4. **Isolation check** (modules that should NOT be connected):
+  `graph({action:'depth_traverse', node_id, max_depth:3})`
+  — verify no path reaches modules in forbidden directories
+Cite each layer violation as a CRITICAL finding with `file:line` receipt, and add it
+to the Evidence Map per the tier protocol above.
+**Do NOT use `shortest_path`** — that action does not exist. Use `depth_traverse`
+or repeated `neighbors` calls.
 ## Skills (load on demand)

package/scaffold/general/agents/Architect-Reviewer-Beta.agent.md CHANGED Viewed

@@ -72,6 +72,50 @@ Follow the **MANDATORY FIRST ACTION** and **Information Lookup Order** from code
 - **BLOCKED** — Fundamental design flaw requiring rethink
 - Always validate **dependency direction** — inner layers must not depend on outer
+## Evidence Citation Protocol (tier-aware)
+The Orchestrator runs `forge_classify` before dispatching you, and runs the final
+`evidence_map({action:'gate', task_id})` after you respond. **Do not create your own
+task_id or run the gate** — feed into the Orchestrator's existing evidence map.
+| Tier | Your responsibility |
+|------|---------------------|
+| Floor    | Free-form findings with `file.ts#Lxx` citations. No `evidence_map` calls required. |
+| Standard | For every CRITICAL or HIGH finding: `evidence_map({action:'add', task_id, claim, status:'V', receipt:'file.ts#Lxx'})`. Max 2-4 adds to keep signal high. |
+| Critical | Structured claims for all CRITICAL/HIGH findings (2-4 Verified + receipts) AND tag contract/security claims with `safety_gate:'commitment'` or `safety_gate:'provenance'`. |
+**Every response MUST include:**
+- `**FORGE Task ID:** <task_id>` (passed in by Orchestrator, or state "not provided")
+- `**Tier applied:** Floor | Standard | Critical`
+- `**Findings:** <list>` with `file:line` receipts
+- Verdict: `APPROVED` | `CHANGES_REQUESTED` | `BLOCKED`
+Do NOT:
+- Create a new `evidence_map` (the Orchestrator already did)
+- Run `evidence_map({action:'gate'})` yourself — the Orchestrator owns the gate
+- Duplicate findings into the map that weren't CRITICAL/HIGH
+## Graph-Assisted Layer Verification
+For each significantly changed module (from `blast_radius` or changed_files input):
+1. **Discover node**: `graph({action:'find_nodes', name_pattern:'<module-path>'})` → get node_id
+2. **Incoming dependencies** (who depends on this?):
+  `graph({action:'neighbors', node_id, direction:'incoming'})`
+  — flag any caller that violates layering rules (e.g. a `core/` module that gets imported by `infra/`)
+3. **Outgoing dependencies** (what does it depend on?):
+  `graph({action:'neighbors', node_id, direction:'outgoing'})`
+  — flag any target that violates direction (e.g. domain importing from infra)
+4. **Isolation check** (modules that should NOT be connected):
+  `graph({action:'depth_traverse', node_id, max_depth:3})`
+  — verify no path reaches modules in forbidden directories
+Cite each layer violation as a CRITICAL finding with `file:line` receipt, and add it
+to the Evidence Map per the tier protocol above.
+**Do NOT use `shortest_path`** — that action does not exist. Use `depth_traverse`
+or repeated `neighbors` calls.
 ## Skills (load on demand)

package/scaffold/general/agents/Code-Reviewer-Alpha.agent.md CHANGED Viewed

@@ -74,6 +74,29 @@ Follow the **MANDATORY FIRST ACTION** and **Information Lookup Order** from code
 - **FAILED** for any CRITICAL finding
 - Always check for **test coverage** on new/changed code
+## Evidence Citation Protocol (tier-aware)
+The Orchestrator runs `forge_classify` before dispatching you, and runs the final
+`evidence_map({action:'gate', task_id})` after you respond. **Do not create your own
+task_id or run the gate** — feed into the Orchestrator's existing evidence map.
+| Tier | Your responsibility |
+|------|---------------------|
+| Floor    | Free-form findings with `file.ts#Lxx` citations. No `evidence_map` calls required. |
+| Standard | For every CRITICAL or HIGH finding: `evidence_map({action:'add', task_id, claim, status:'V', receipt:'file.ts#Lxx'})`. Max 2-4 adds to keep signal high. |
+| Critical | Structured claims for all CRITICAL/HIGH findings (2-4 Verified + receipts) AND tag contract/security claims with `safety_gate:'commitment'` or `safety_gate:'provenance'`. |
+**Every response MUST include:**
+- `**FORGE Task ID:** <task_id>` (passed in by Orchestrator, or state "not provided")
+- `**Tier applied:** Floor | Standard | Critical`
+- `**Findings:** <list>` with `file:line` receipts
+- Verdict: `APPROVED` | `CHANGES_REQUESTED` | `BLOCKED`
+Do NOT:
+- Create a new `evidence_map` (the Orchestrator already did)
+- Run `evidence_map({action:'gate'})` yourself — the Orchestrator owns the gate
+- Duplicate findings into the map that weren't CRITICAL/HIGH
 ## Skills (load on demand)

package/scaffold/general/agents/Code-Reviewer-Beta.agent.md CHANGED Viewed

@@ -74,6 +74,29 @@ Follow the **MANDATORY FIRST ACTION** and **Information Lookup Order** from code
 - **FAILED** for any CRITICAL finding
 - Always check for **test coverage** on new/changed code
+## Evidence Citation Protocol (tier-aware)
+The Orchestrator runs `forge_classify` before dispatching you, and runs the final
+`evidence_map({action:'gate', task_id})` after you respond. **Do not create your own
+task_id or run the gate** — feed into the Orchestrator's existing evidence map.
+| Tier | Your responsibility |
+|------|---------------------|
+| Floor    | Free-form findings with `file.ts#Lxx` citations. No `evidence_map` calls required. |
+| Standard | For every CRITICAL or HIGH finding: `evidence_map({action:'add', task_id, claim, status:'V', receipt:'file.ts#Lxx'})`. Max 2-4 adds to keep signal high. |
+| Critical | Structured claims for all CRITICAL/HIGH findings (2-4 Verified + receipts) AND tag contract/security claims with `safety_gate:'commitment'` or `safety_gate:'provenance'`. |
+**Every response MUST include:**
+- `**FORGE Task ID:** <task_id>` (passed in by Orchestrator, or state "not provided")
+- `**Tier applied:** Floor | Standard | Critical`
+- `**Findings:** <list>` with `file:line` receipts
+- Verdict: `APPROVED` | `CHANGES_REQUESTED` | `BLOCKED`
+Do NOT:
+- Create a new `evidence_map` (the Orchestrator already did)
+- Run `evidence_map({action:'gate'})` yourself — the Orchestrator owns the gate
+- Duplicate findings into the map that weren't CRITICAL/HIGH
 ## Skills (load on demand)

package/scaffold/general/agents/Frontend.agent.md CHANGED Viewed

@@ -26,6 +26,37 @@ You are the **Frontend**, ui/ux specialist for react, styling, responsive design
 - **Responsive by default** — Mobile-first, test all breakpoints
 - **Test-first** — Component tests before implementation
+## Frontend Exploration Mode
+| Need | Tool |
+|------|------|
+| Component dependency graph | `graph({action:'neighbors', node_id:'src/components/X.tsx', direction:'incoming'})` |
+| Stale / unused components | `dead_symbols({ path:'src/components' })` |
+| React / a11y / library API research | `web_search({ query })`, `web_fetch({ urls })` |
+| Component complexity hotspots | `measure({ path:'src/components' })` |
+| Verify a component's callers | `graph({action:'find_nodes', name_pattern})` → `neighbors` |
+## Visual Validation Protocol (post `test_run`)
+**Pre-flight (MANDATORY before any browser step):**
+1. Read `package.json` scripts — identify dev command (e.g. `dev`, `start`, `vite`)
+2. Determine default port (check script args, `vite.config.*`, or env)
+3. Check if dev server already running on port (attempt `http({ url:'http://localhost:<port>' })`)
+4. If NOT running, delegate to a helper or use `createAndRunTask` to start `npm run dev`
+   in the background; wait for ready signal
+5. Capture the base URL
+**Validation:**
+6. `open_browser_page({ url })` — render target component page
+7. `screenshot_page` + `read_page` — capture visual + DOM
+8. Keyboard-only navigation check: simulate Tab/Enter/Escape via `type_in_page` —
+   verify focus ring, activation, dismiss
+9. Compare against design tokens / Figma URL if supplied
+10. Fail fast if color contrast < 4.5:1 (WCAG AA) or focus indicator missing
+If the pre-flight dev server cannot be started (e.g. sandbox), fall back to
+`compact` inspection of the component source + describe expected visual behavior.
 # Code Agent — Shared Base Instructions
 > This file contains shared protocols for all code-modifying agents (Implementer, Frontend, Refactor, Debugger). Each agent's definition file contains only its unique identity, constraints, and workflow. **Do not duplicate this content in agent files.**

package/scaffold/general/agents/Implementer.agent.md CHANGED Viewed

@@ -30,6 +30,20 @@ You are the **Implementer**, persistent implementation agent that writes code fo
 - **Loop-break** — If the same test fails 3 times with the same error after your fixes, STOP. Re-read the error from scratch, check your assumptions with `trace` or `symbol`, and try a fundamentally different approach. Do not attempt a 4th fix in the same direction
 - **Think-first for complex tasks** — If a task involves 3+ files or non-obvious logic, outline your approach before writing code. Check existing patterns with `search` first. Design, then implement
+## Pre-Edit Checklist (before modifying any file)
+1. **Understand consumers** — `graph({action:'find_nodes', name_pattern:'<target>'})` → `graph({action:'neighbors', node_id, direction:'incoming'})`. See who calls/imports before changing a contract.
+2. **Compress, don't raw-read** — `file_summary` then `compact({path, query})` for the specific area. Only `read_file` when you need exact lines for `replace_string_in_file`.
+3. **Snapshot risky edits** — `checkpoint({action:'save', label:'pre-<scope>'})` before cross-cutting changes. `checkpoint({action:'restore', ...})` if `check`/`test_run` fails.
+4. **Estimate blast radius** — `blast_radius({changed_files:[...]})` BEFORE editing when changing a public/shared symbol; re-run AFTER to confirm actual impact matches.
+5. **TDD when tests exist** — write/extend the failing test first, then minimum code to pass.
+## Post-Edit Checklist
+1. `check({})` — typecheck + lint must pass clean
+2. `test_run({})` — full suite or targeted pattern
+3. If Orchestrator passed a `task_id`: `evidence_map({action:'add', task_id, claim, status:'V', receipt:'file.ts#Lxx'})` for each verified contract/acceptance claim. Do NOT run the gate — Orchestrator owns it.
 # Code Agent — Shared Base Instructions
 > This file contains shared protocols for all code-modifying agents (Implementer, Frontend, Refactor, Debugger). Each agent's definition file contains only its unique identity, constraints, and workflow. **Do not duplicate this content in agent files.**

package/scaffold/general/agents/Orchestrator.agent.md CHANGED Viewed

@@ -44,7 +44,7 @@ You orchestrate the full development lifecycle: **planning → implementation
 ## FORGE Protocol
 1. `forge_classify({ task, files })` → determine tier (Floor/Standard/Critical)
-2. Pass tier to subagents: `FORGE Context: Tier = {tier}. Evidence: {requirements}.`
+2. Pass tier + task_id to subagents: `FORGE Context: Tier = {tier}. Task ID = {task_id}. Evidence: {requirements}. Reviewers add CRITICAL/HIGH claims into your task_id; never create their own.`
 3. After review: `evidence_map({ action: "gate", task_id })` → YIELD/HOLD/HARD_BLOCK
 4. Auto-upgrade tier if unknowns reveal contract/security issues
@@ -133,7 +133,7 @@ Batch 2 (after batch 1):
 2. **Goal** — acceptance criteria, testable
 3. **Arch Context** — code snippets from `compact()`/`digest()`
 4. **Constraints** — patterns, conventions
-5. **FORGE** — tier + evidence requirements
+5. **FORGE** — tier + task_id + evidence requirements (reviewers add CRITICAL/HIGH claims into your task_id; never create their own)
 6. **Self-Review** — checklist before declaring status
 **Subagent status protocol:** `DONE` | `DONE_WITH_CONCERNS` | `NEEDS_CONTEXT` | `BLOCKED`
@@ -145,6 +145,7 @@ Batch 2 (after batch 1):
 - Use the subagent prompt template for every dispatch so step-specific flow instructions are grounded in actual code context
 **Per-step review cycle:** Dispatch → Code Review (Alpha+Beta) → Arch Review (if boundary changes) → Security (if applicable) → `evidence_map` gate → **🛑 STOP — present results**
+Reviewers add findings to the Orchestrator's existing `evidence_map` `task_id` and do NOT run the gate themselves.
 ### Flow MCP Tools
@@ -205,6 +206,7 @@ When subagents complete, their visual outputs (from `present`) are NOT visible t
 6. **Always use flows** — every task goes through a flow; design decisions happen in the flow's design step
 7. **Never proceed without user approval** at 🛑 stops
 8. **Max 2 retries** then escalate to user
+- **Graph discovery** — when exploring relationships use `graph({action:'find_nodes', name_pattern})` then `graph({action:'neighbors', node_id})`. Never use `shortest_path` (doesn't exist).
 ## Delegation Enforcement