npm - mstro-app - Versions diffs - 0.4.39 → 0.4.43 - Mend

mstro-app 0.4.39 → 0.4.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (197) hide show

package/server/cli/headless/tool-watchdog.ts CHANGED Viewed

@@ -98,10 +98,10 @@ export const DEFAULT_TOOL_TIMEOUT_PROFILES: Record<string, ToolTimeoutProfile> =
     useHaikuTiebreaker: true,
   },
   Write: {
-    coldStartMs: 60_000,       // 1 min
-    floorMs: 30_000,           // 30s minimum
-    ceilingMs: 180_000,        // 3 min ceiling
-    useAdaptive: true,
+    coldStartMs: 300_000,      // 5 min — large docs stream slowly through stdio; model generates content inline
+    floorMs: 120_000,          // 2 min minimum — prevents premature kills on big writes
+    ceilingMs: 600_000,        // 10 min hard cap
+    useAdaptive: false,        // bimodal: 1-line config vs 50KB research doc defeats EMA
     useHaikuTiebreaker: true,
   },
 };
@@ -244,6 +244,22 @@ export class ToolWatchdog {
     const elapsedMs = Date.now() - watch.startTime;
+    // Activity-gated auto-extension: if data is actively streaming, extend without
+    // consuming the one-shot Haiku tiebreaker. Respects ceiling to prevent runaway.
+    const tokenSilenceMs = this.getTokenSilenceMs?.();
+    if (tokenSilenceMs !== undefined && tokenSilenceMs < 60_000) {
+      const remainingToCeiling = profile.ceilingMs - elapsedMs;
+      if (remainingToCeiling > 0) {
+        const extensionMs = Math.min(5 * 60_000, remainingToCeiling);
+        if (this.verbose) {
+          hlog(`[WATCHDOG] ${toolName} (${toolId}) hit timeout after ${Math.round(elapsedMs / 1000)}s, but stream active ${Math.round(tokenSilenceMs / 1000)}s ago — auto-extending ${Math.round(extensionMs / 1000)}s`);
+        }
+        this.scheduleActivityGatedTimeout(watch, toolId, toolName, toolInput, profile, extensionMs, onTimeout);
+        watch.timeoutMs = elapsedMs + extensionMs;
+        return true;
+      }
+    }
     if (!profile.useHaikuTiebreaker || !this.onTiebreaker || watch.tiebreakerAttempted) {
       if (this.verbose) {
         hlog(`[WATCHDOG] ${toolName} (${toolId}) timed out after ${Math.round(elapsedMs / 1000)}s, killing`);
@@ -294,6 +310,26 @@ export class ToolWatchdog {
     return false;
   }
+  /** Schedule an activity-gated timeout that re-enters the full timeout handler */
+  private scheduleActivityGatedTimeout(
+    watch: ActiveWatch,
+    toolId: string,
+    toolName: string,
+    toolInput: Record<string, unknown>,
+    profile: ToolTimeoutProfile,
+    extensionMs: number,
+    onTimeout: () => void,
+  ): void {
+    watch.timer = setTimeout(async () => {
+      const w = this.activeWatches.get(toolId);
+      if (!w) return;
+      const extended = await this.handleTimeoutWithTiebreaker(toolId, toolName, toolInput, profile, onTimeout);
+      if (!extended) {
+        onTimeout();
+      }
+    }, extensionMs);
+  }
   /** Schedule a post-extension timeout that kills without another tiebreaker */
   private scheduleExtensionTimeout(
     watch: ActiveWatch,

package/server/cli/improvisation-retry.ts CHANGED Viewed

@@ -9,6 +9,7 @@
 import { AnalyticsEvents, trackEvent } from '../services/analytics.js';
 import { hlog } from './headless/headless-logger.js';
 import { HeadlessRunner } from './headless/index.js';
+import { extractFinalTextBlock, isResponseAbandoned } from './headless/retry-strategies.js';
 import { assessBestResult, assessContextLoss, assessPrematureCompletion, type ContextLossContext } from './headless/stall-assessor.js';
 import type { ExecutionCheckpoint } from './headless/types.js';
 import type { FileAttachment, HeadlessRunResult, ImprovisationOptions, MovementRecord, RetryLoopState, SessionHistory } from './improvisation-types.js';
@@ -455,38 +456,6 @@ function isPrematureCompletionCandidate(
   return result.stopReason === 'max_tokens' || result.stopReason === 'end_turn';
 }
-/**
- * Fast heuristic: detect response abandonment without a Haiku call.
- * When thinking is significantly longer than the response and the response
- * contains no tool calls, Claude likely planned work it never executed.
- * This pattern occurs after context compaction or heavy parallel tool results.
- */
-function isResponseAbandoned(result: HeadlessRunResult): boolean {
-  const thinkingLen = result.thinkingOutput?.length ?? 0;
-  const responseLen = result.assistantResponse?.length ?? 0;
-  const toolCallsInResponse = result.toolUseHistory?.filter(t => t.result !== undefined).length ?? 0;
-  if (thinkingLen < 500 || responseLen > 1000) return false;
-  if (toolCallsInResponse > 0 && responseLen > 200) return false;
-  return thinkingLen >= responseLen * 3;
-}
-/**
- * Extract the final text block from a concatenated response.
- * The assistantResponse concatenates all text deltas including interleaved
- * progress messages between tool calls. The final paragraph (after the last
- * double-newline break) is the actual conclusion — earlier fragments are
- * progress updates that were already acted on via tool calls.
- */
-function extractFinalTextBlock(response: string, maxLen: number): string {
-  const lastBreak = response.lastIndexOf('\n\n');
-  if (lastBreak !== -1 && response.length - lastBreak > 20) {
-    return response.slice(lastBreak + 2).slice(-maxLen);
-  }
-  return response.slice(-maxLen);
-}
 /** Use Haiku to assess whether an end_turn response is genuinely complete */
 async function assessEndTurnCompletion(result: HeadlessRunResult, verbose: boolean): Promise<boolean> {
   if (!result.assistantResponse) return false;

package/server/cli/improvisation-session-manager.ts CHANGED Viewed

@@ -201,7 +201,7 @@ export class ImprovisationSessionManager extends EventEmitter {
       let result = await this.runRetryLoop(state, sequenceNumber, promptWithAttachments, imageAttachments, options?.workingDir);
-      if (this._cancelled) {
+      if (this._cancelled || this._cancelCompleteEmitted) {
         return this.handleCancelledExecution(result, displayPrompt, sequenceNumber, _execStart);
       }
@@ -218,7 +218,9 @@ export class ImprovisationSessionManager extends EventEmitter {
       this._executionStartTimestamp = undefined;
       this.executionEventLog = [];
-      this.emitMovementComplete(movement, result, _execStart, sequenceNumber);
+      if (!this._cancelCompleteEmitted) {
+        this.emitMovementComplete(movement, result, _execStart, sequenceNumber);
+      }
       this.maybeAutoContinue(result, userPrompt);
       return movement;
@@ -271,9 +273,15 @@ export class ImprovisationSessionManager extends EventEmitter {
     let result: HeadlessRunResult | undefined;
     const callbacks = this.buildRetryCallbacks();
+    const RETRY_BACKOFF_MS = [1000, 5000, 30000];
     // eslint-disable-next-line no-constant-condition
     while (true) {
       if (this._cancelled) break;
+      if (state.retryNumber > 0) {
+        const delay = RETRY_BACKOFF_MS[Math.min(state.retryNumber - 1, RETRY_BACKOFF_MS.length - 1)];
+        await new Promise(r => setTimeout(r, delay));
+        if (this._cancelled) break;
+      }
       const iteration = await this.executeRetryIteration(state, callbacks, sequenceNumber, imageAttachments, workingDirOverride);
       result = iteration.result;
       if (this._cancelled) break;
@@ -580,6 +588,8 @@ export class ImprovisationSessionManager extends EventEmitter {
       this.currentRunner = null;
     }
+    this.destroyQueueTimer();
     if (this._isExecuting && !this._cancelCompleteEmitted) {
       this._cancelCompleteEmitted = true;
       const execStart = this._executionStartTimestamp || Date.now();
@@ -609,11 +619,15 @@ export class ImprovisationSessionManager extends EventEmitter {
     this.flushOutputQueue();
   }
-  destroy(): void {
+  private destroyQueueTimer(): void {
     if (this.queueTimer) {
       clearInterval(this.queueTimer);
       this.queueTimer = null;
     }
+  }
+  destroy(): void {
+    this.destroyQueueTimer();
     this.flushOutputQueue();
   }

package/server/cli/prompt-builders.ts CHANGED Viewed

@@ -22,13 +22,27 @@ export function summarizeToolInput(input: Record<string, unknown>): string {
   return JSON.stringify(input).slice(0, 100);
 }
+const NETWORK_TOOLS = new Set(['WebFetch', 'WebSearch']);
 /** Format a list of timed-out tools for retry prompts */
 export function formatTimedOutTools(tools: Array<{ toolName: string; input: Record<string, unknown>; timeoutMs: number }>): string[] {
   const lines: string[] = [];
-  lines.push('### Tools/resources that have timed out (DO NOT retry these):');
-  for (const t of tools) {
-    const inputSummary = summarizeToolInput(t.input);
-    lines.push(`- **${t.toolName}**(${inputSummary}) — timed out after ${Math.round(t.timeoutMs / 1000)}s`);
+  const networkTools = tools.filter(t => NETWORK_TOOLS.has(t.toolName));
+  const localTools = tools.filter(t => !NETWORK_TOOLS.has(t.toolName));
+  if (networkTools.length > 0) {
+    lines.push('### Network resources that timed out (DO NOT retry these URLs):');
+    for (const t of networkTools) {
+      const inputSummary = summarizeToolInput(t.input);
+      lines.push(`- **${t.toolName}**(${inputSummary}) — timed out after ${Math.round(t.timeoutMs / 1000)}s`);
+    }
+  }
+  if (localTools.length > 0) {
+    lines.push('### Tools that previously timed out (OK to retry with same or smaller content):');
+    for (const t of localTools) {
+      const inputSummary = summarizeToolInput(t.input);
+      lines.push(`- **${t.toolName}**(${inputSummary}) — timed out after ${Math.round(t.timeoutMs / 1000)}s`);
+    }
   }
   return lines;
 }
@@ -211,17 +225,24 @@ export function buildResumeRetryPrompt(
     `Your previous ${checkpoint.hungTool.toolName} call timed out after ${Math.round(checkpoint.hungTool.timeoutMs / 1000)}s${checkpoint.hungTool.url ? ` fetching: ${checkpoint.hungTool.url}` : ''}.`
   );
-  if (allTimedOut && allTimedOut.length > 1) {
+  if (allTimedOut && allTimedOut.length > 0) {
+    const networkTools = allTimedOut.filter(t => NETWORK_TOOLS.has(t.toolName));
+    const localTools = allTimedOut.filter(t => !NETWORK_TOOLS.has(t.toolName));
     parts.push('');
-    parts.push('All timed-out tools/resources (DO NOT retry any of these):');
-    for (const t of allTimedOut) {
-      const inputSummary = summarizeToolInput(t.input);
-      parts.push(`- ${t.toolName}(${inputSummary})`);
+    if (networkTools.length > 0) {
+      parts.push('Network resources that timed out (DO NOT retry these URLs):');
+      for (const t of networkTools) {
+        parts.push(`- ${t.toolName}(${summarizeToolInput(t.input)})`);
+      }
+    }
+    if (localTools.length > 0) {
+      parts.push('Tools that previously timed out (OK to retry):');
+      for (const t of localTools) {
+        parts.push(`- ${t.toolName}(${summarizeToolInput(t.input)})`);
+      }
     }
-  } else {
-    parts.push('This URL/resource is unreachable. DO NOT retry the same URL or query.');
   }
-  parts.push('Continue your task — find an alternative source or proceed with the results you already have.');
+  parts.push('Continue your task — find alternative sources for network failures, or proceed with the results you already have.');
   return parts.join('\n');
 }

package/server/index.ts CHANGED Viewed

@@ -27,7 +27,6 @@ import {
 import { createPlatformRelayContext, ensureClaudeSettings, setTerminalTitle, wrapWebSocket } from './server-setup.js'
 import { AnalyticsEvents, initAnalytics, shutdownAnalytics, trackEvent } from './services/analytics.js'
 import { AuthService } from './services/auth.js'
-import { createAiBrokerRoutes, setDeployHealthUpdateListener, setDeployUsageReportListener } from './services/deploy/ai-broker.js'
 import { FileService } from './services/files.js'
 import { InstanceRegistry, type MstroInstance } from './services/instances.js'
 import { PlatformConnection } from './services/platform.js'
@@ -82,7 +81,7 @@ app.use('*', cors({
 app.use('*', logger())
 const authMiddleware = async (c: Context, next: Next) => {
-  const publicPaths = ['/health', '/api/config', '/api/deploy/ai']
+  const publicPaths = ['/health', '/api/config']
   if (publicPaths.some(path => c.req.path.startsWith(path))) {
     return next()
   }
@@ -105,7 +104,6 @@ app.route('/api/shutdown', createShutdownRoute(instanceRegistry))
 app.route('/api/improvise', createImproviseRoutes(WORKING_DIR))
 app.route('/api/files', createFileRoutes(fileService))
 app.route('/api/notifications', createNotificationRoutes(WORKING_DIR))
-app.route('/api/deploy/ai', createAiBrokerRoutes())
 app.post('/api/reload-pty', async (c) => {
   const success = await reloadPty()
@@ -195,12 +193,6 @@ async function startServer() {
       wsHandler.setUsageReporter((report) => {
         platformConnection.send({ type: 'reportUsage', data: report })
       })
-      setDeployUsageReportListener((report) => {
-        platformConnection.send({ type: 'deployUsageReport', data: report })
-      })
-      setDeployHealthUpdateListener((update) => {
-        platformConnection.send({ type: 'deployAiHealthUpdate', data: update })
-      })
     },
     onDisconnected: () => {
       if (platformRelayContext) {

package/server/mcp/bouncer-cli.ts CHANGED Viewed

@@ -29,14 +29,14 @@ function buildOperation(toolName: string, toolInput: Record<string, unknown>): s
 async function evaluate(rawInput: string): Promise<{ decision: string; reason: string }> {
   if (!rawInput.trim()) {
-    return { decision: 'allow', reason: 'Empty input' };
+    return { decision: 'deny', reason: 'Empty input — cannot evaluate safety' };
   }
   let parsed: { tool_name?: string; toolName?: string; input?: Record<string, unknown>; toolInput?: Record<string, unknown> };
   try {
     parsed = JSON.parse(rawInput);
   } catch {
-    return { decision: 'allow', reason: 'Invalid JSON input' };
+    return { decision: 'deny', reason: 'Invalid JSON input — cannot evaluate safety' };
   }
   const toolName = parsed.tool_name || parsed.toolName || 'unknown';
@@ -68,6 +68,7 @@ async function main(): Promise<void> {
   console.log(JSON.stringify(result));
 }
-main().catch(() => {
-  console.log(JSON.stringify({ decision: 'allow', reason: 'Bouncer crash' }));
+main().catch((err) => {
+  console.error('[Bouncer] Fatal error:', err);
+  console.log(JSON.stringify({ decision: 'deny', reason: 'Bouncer crash — denying for safety' }));
 });

package/server/mcp/bouncer-haiku.ts CHANGED Viewed

@@ -95,7 +95,7 @@ export async function analyzeWithHaiku(
   return new Promise((resolve, reject) => {
     const userRequest = request.context?.userRequest;
     const userContextBlock = userRequest
-      ? `\nUSER'S ORIGINAL REQUEST (what the user actually asked Claude to do):\n"${userRequest}"\n`
+      ? `\nUSER'S ORIGINAL REQUEST (what the user actually asked Claude to do):\n<user_request>\n${userRequest}\n</user_request>\n`
       : '';
     const prompt = loadSkillPrompt('check-injection', {

package/server/mcp/bouncer-integration.ts CHANGED Viewed

@@ -80,11 +80,17 @@ interface CachedDecision {
 const decisionCache = new Map<string, CachedDecision>();
-function getCachedDecision(operation: string): BouncerDecision | null {
-  const entry = decisionCache.get(operation);
+function buildCacheKey(operation: string, context?: BouncerReviewRequest['context']): string {
+  const sessionId = context?.sessionId ?? '_';
+  return `${sessionId}:${operation}`;
+}
+function getCachedDecision(operation: string, context?: BouncerReviewRequest['context']): BouncerDecision | null {
+  const key = buildCacheKey(operation, context);
+  const entry = decisionCache.get(key);
   if (!entry) return null;
   if (Date.now() > entry.expiresAt) {
-    decisionCache.delete(operation);
+    decisionCache.delete(key);
     return null;
   }
   return entry.decision;
@@ -95,13 +101,14 @@ export function clearDecisionCache(): void {
   decisionCache.clear();
 }
-function cacheDecision(operation: string, decision: BouncerDecision): void {
+function cacheDecision(operation: string, context: BouncerReviewRequest['context'] | undefined, decision: BouncerDecision): void {
   if (decision.confidence < 50) return;
   if (decisionCache.size >= CACHE_MAX_SIZE) {
     const firstKey = decisionCache.keys().next().value;
     if (firstKey !== undefined) decisionCache.delete(firstKey);
   }
-  decisionCache.set(operation, { decision, expiresAt: Date.now() + CACHE_TTL_MS });
+  const key = buildCacheKey(operation, context);
+  decisionCache.set(key, { decision, expiresAt: Date.now() + CACHE_TTL_MS });
 }
 // ── Decision Finalization ─────────────────────────────────────
@@ -134,7 +141,7 @@ function finalizeDecision(
     });
   }
-  if (!opts?.skipCache) cacheDecision(operation, decision);
+  if (!opts?.skipCache) cacheDecision(operation, context, decision);
   return decision;
 }
@@ -216,8 +223,8 @@ export async function reviewOperation(request: BouncerReviewRequest): Promise<Bo
   const fin = (d: BouncerDecision, layer: string, opts?: Parameters<typeof finalizeDecision>[6]) =>
     finalizeDecision(operation, d, layer, startTime, request.context, logBouncerDecision, opts);
-  // Check cache first
-  const cached = getCachedDecision(operation);
+  // Check cache first (keyed on operation + session to prevent cross-context bypass)
+  const cached = getCachedDecision(operation, request.context);
   if (cached) {
     console.error(`[Bouncer] ⚡ Cache hit: ${cached.decision} (${cached.confidence}%)`);
     return cached;

package/server/mcp/security-patterns.ts CHANGED Viewed

@@ -153,7 +153,7 @@ export const SAFE_OPERATIONS: SecurityPattern[] = [
   { pattern: /^Bash:\s*git\s+(commit|push|tag|remote|rebase|merge|cherry-pick|reset|revert)($|\s)/i },
   { pattern: /^Bash:\s*git\s+(worktree|submodule|config|clean|gc)($|\s)/i },
   { pattern: /^Bash:\s*(uname|hostname|whoami|id|groups|uptime|df|du|free|top|ps|lsof|stat|file|readlink)($|\s)/i },
-  { pattern: /^Bash:\s*(mv|cp|touch|ln|basename|dirname|realpath|mktemp|xargs|tee|tr|cut|paste|comm|diff|patch)($|\s)/i },
+  { pattern: /^Bash:\s*(touch|basename|dirname|realpath|mktemp|xargs|tee|tr|cut|paste|comm|diff|patch)($|\s)/i },
   { pattern: /^Bash:\s*(tar|gzip|gunzip|zip|unzip|bzip2)\s/i },
   { pattern: /^Bash:\s*(ruby|python3?|php|java|javac|scala|kotlinc|swift|rustc|gcc|g\+\+|clang)\s/i },
   { pattern: /^Bash:\s*(pip|pip3|gem|bundle|composer|maven|gradle|sbt|cargo|rustup)\s/i },

package/server/services/plan/agents/code-review.md ADDED Viewed

@@ -0,0 +1,109 @@
+---
+name: code-review
+description: "Senior staff engineer code review — surfaces architectural violations, SOLID issues, security vulnerabilities, bugs, and performance problems with structured evidence. Use when performing a comprehensive AI code review of a directory."
+user-invocable: true
+type: review
+allowed-tools: Read, Grep, Glob, Bash
+context: fork
+---
+You are a senior staff engineer performing a rigorous, honest code review. Your job is to surface the most impactful quality bottlenecks — the issues a principal engineer would flag in a code review. Be critical and objective. Do NOT inflate scores.
+IMPORTANT: Your current working directory is "{{dirPath}}". Only review files within this directory.
+{{cliFindingsSection}}
+## Review Process
+1. **Discover**: Use Glob to find source files (e.g. "**/*.{ts,tsx,js,py,rs,go,java,rb,php}"). Understand the project structure.
+2. **Read**: Read the most important files — entry points, core modules, handlers, services. Prioritize files with recent git changes (`git diff --name-only HEAD~5` via Bash if available).
+3. **Analyze**: Look for real, actionable issues across ALL of these categories:
+   ### Architecture
+   - What is the current architecture (monolith, microservices, layered, etc.)?
+   - Are there architectural violations? (e.g., presentation layer directly accessing data layer, circular dependencies between modules)
+   - Is there proper separation of concerns?
+   - Are there god objects or god modules that do too much?
+   ### SOLID / OOP Principles
+   - **SRP**: Classes/modules with multiple unrelated responsibilities
+   - **OCP**: Code that requires modification instead of extension for new features
+   - **LSP**: Subtypes that don't properly substitute for their base types
+   - **ISP**: Interfaces/contracts that force implementations to depend on methods they don't use
+   - **DIP**: High-level modules directly depending on low-level modules instead of abstractions
+   ### Security
+   - Injection vulnerabilities (SQL, XSS, command), hardcoded secrets/credentials, auth bypasses, insecure crypto, path traversal, SSRF, unsafe deserialization
+   ### Bugs & Logic
+   - Null/undefined errors, race conditions, logic errors, unhandled edge cases, off-by-one errors, resource leaks, incorrect error handling, incorrect algorithms
+   ### Performance
+   - N+1 queries, unnecessary re-renders, missing memoization, blocking I/O in hot paths, unbounded data structures, missing pagination
+## CRITICAL — Structured Evidence Requirement
+For EACH finding, you MUST provide structured evidence that grounds the finding in actual code. This is required to prevent false positives.
+For each finding, use this reasoning process:
+1. **PREMISE**: State the observable fact from the code. Quote the exact code you see.
+2. **CONTEXT**: What is the surrounding code doing? Are there guards, fixes, or patterns elsewhere that might handle this?
+3. **COUNTER-CHECK**: Actively look for evidence that CONTRADICTS your finding. Check for:
+   - Guards or validation earlier in the call chain
+   - Error handling wrapping the code
+   - Configuration that changes behavior (e.g., NODE_ENV checks)
+   - Comments explaining intentional design choices
+4. **CONCLUSION**: Only report the finding if you could not find contradicting evidence.
+### Common False Positive Patterns to AVOID
+- Claiming a function uses API X when it actually uses API Y (e.g., claiming Math.random() when code uses crypto.randomInt()) — ALWAYS quote the actual function call
+- Claiming a header/value is leaked when code already deletes/filters it — READ the full function
+- Claiming there's no guard when a condition check exists nearby — READ surrounding lines
+- Claiming N fields/methods when the actual count differs — COUNT explicitly
+- Claiming a resource leaks when cleanup exists in a different handler — SEARCH for the cleanup code
+## Rules
+- Only report findings you are >90% confident about after completing the counter-check step.
+- Focus on architecture, SOLID violations, bugs, and security over style nits.
+- Each finding MUST reference a specific file and line number. Do not report vague or file-level issues.
+- Each finding MUST include an "evidence" field with the exact code snippet (1-5 lines) proving the issue exists.
+- Limit to the 25 most important findings, ranked by severity.
+- Do NOT modify any files. This is a read-only review.
+- Be HONEST about the overall quality. A codebase with serious issues should score low.
+## Scoring Guidelines
+After your analysis, provide an honest overall quality score (0-100) and letter grade:
+- **A (90-100)**: Excellent — clean architecture, minimal issues, well-tested, follows best practices
+- **B (80-89)**: Good — solid code with minor issues, mostly well-structured
+- **C (70-79)**: Adequate — functional but has notable quality issues that should be addressed
+- **D (60-69)**: Below average — significant issues in architecture, testing, or code quality
+- **F (0-59)**: Poor — serious problems: security vulnerabilities, broken architecture, major bugs, or unmaintainable code
+Consider ALL findings (both CLI tool findings and your own) when determining the score. The score should reflect the overall state of the codebase honestly. A project with 50+ linting errors, formatting issues, complex functions, AND architectural problems should NOT score above 70.
+## Output
+After your analysis, output EXACTLY one JSON code block with your findings. No other text after the JSON block.
+```json
+{
+  "score": 72,
+  "grade": "C",
+  "scoreRationale": "Brief explanation of why this score was given, referencing key issues",
+  "findings": [
+    {
+      "severity": "critical|high|medium|low",
+      "category": "architecture|oop|security|bugs|performance|logic",
+      "file": "relative/path/to/file.ts",
+      "line": 42,
+      "title": "Short title describing the issue",
+      "description": "What the problem is and why it matters.",
+      "suggestion": "How to fix it.",
+      "evidence": "const token = Math.random().toString(36) // exact code from file proving the issue"
+    }
+  ],
+  "summary": "Brief 1-2 sentence summary of overall code quality."
+}
+```

package/server/services/plan/agents/commit-message.md ADDED Viewed

@@ -0,0 +1,26 @@
+---
+name: commit-message
+description: "Generate a conventional git commit message from staged changes. Use when committing code and wanting an AI-generated commit message."
+user-invocable: false
+allowed-tools: Bash
+---
+You are generating a git commit message for the following staged changes.
+RECENT COMMIT MESSAGES (for style reference):
+{{recentCommits}}
+STAGED FILES:
+{{stagedFiles}}
+DIFF OF STAGED CHANGES:
+{{diff}}
+Generate a commit message following these rules:
+1. First line: imperative mood, max 72 characters (e.g., "Add user authentication", "Fix memory leak in parser")
+2. If the changes are complex, add a blank line then bullet points explaining the key changes
+3. Focus on the "why" not just the "what"
+4. Match the style of recent commits if possible
+5. No emojis unless the repo already uses them
+Respond with ONLY the commit message, nothing else.

package/server/services/plan/agents/fix-quality.md ADDED Viewed

@@ -0,0 +1,24 @@
+---
+name: fix-quality
+description: "Fix code quality issues found by linters, complexity analyzers, or AI code review. Systematically works through findings from most to least severe. Use when fixing quality scan results."
+user-invocable: true
+type: review
+allowed-tools: Read, Edit, Write, Grep, Glob, Bash
+---
+You are a code quality fix agent. Fix the following quality issues in the codebase.
+## Issues to Fix ({{issueCount}} total, showing top {{showCount}})
+{{issueList}}
+## Rules
+- Fix each issue by editing the relevant file at the specified location.
+- For complexity issues: refactor into smaller functions. For long files: split or extract modules. For long functions: break into smaller functions.
+- For security issues: apply the suggested fix or use secure coding best practices.
+- For bugs: fix the root cause, not just the symptom.
+- For linting/formatting: apply the standard for the project.
+- Do NOT introduce new issues. Make minimal, focused changes.
+- After fixing, verify the changes compile/pass linting if tools are available.
+- Work through the issues systematically from most to least severe.

package/server/services/plan/agents/pr-description.md ADDED Viewed

@@ -0,0 +1,28 @@
+---
+name: pr-description
+description: "Generate a pull request title and description from branch commits and diff. Use when creating a PR and wanting an AI-generated title and body."
+user-invocable: false
+allowed-tools: Bash
+---
+You are generating a pull request title and description for the following changes.
+COMMITS ({{baseBranch}}..HEAD):
+{{commits}}
+FILES CHANGED:
+{{filesChanged}}
+DIFF:
+{{diff}}
+Generate a pull request title and description following these rules:
+1. TITLE: First line must be the PR title — imperative mood, under 70 characters
+2. Leave a blank line after the title
+3. BODY: Write a concise description in markdown with:
+   - A "## Summary" section with 1-3 bullet points explaining what changed and why
+   - Optionally a "## Details" section if the changes are complex
+4. Focus on the "why" not just the "what"
+5. No emojis
+Respond with ONLY the title and description, nothing else.

package/server/services/plan/composer.ts CHANGED Viewed

@@ -11,8 +11,8 @@
 import { existsSync, readFileSync } from 'node:fs';
 import { join } from 'node:path';
-import { runWithFileLogger } from '../../cli/headless/headless-logger.js';
-import { HeadlessRunner, type ToolUseEvent } from '../../cli/headless/index.js';
+import type { ToolUseEvent } from '../../cli/headless/index.js';
+import { ResilientRunner } from '../../cli/headless/resilient-runner.js';
 import type { HandlerContext } from '../websocket/handler-context.js';
 import type { WSContext } from '../websocket/types.js';
 import { defaultPmDir, getNextId, parseBoardDirectory, parsePlanDirectory, resolvePmDir } from './parser.js';
@@ -196,6 +196,7 @@ created: "YYYY-MM-DD"
 blocked_by: []             # Use backlog-relative paths: backlog/IS-NNN.md
 blocks: []                 # Use backlog-relative paths: backlog/IS-NNN.md
 review_gate: auto
+output_type: auto          # code = modify source files, document = produce written artifact, auto = infer
 output_file: null
 ---
@@ -228,6 +229,14 @@ Implementation guidance.
 - If an issue requires work across multiple subsystems, split it into one issue per subsystem with blocked_by edges between them
 - Research/investigation issues should be separate from implementation issues
+## output_type rules (critical — determines how the AI executes and reviews each issue)
+- Set \`output_type: document\` for research, design, analysis, writing, planning, learning, or educational issues — anything that produces a written artifact rather than code changes
+- Set \`output_type: code\` for issues that MUST modify source code files (implementation, bug fixes, refactoring)
+- Set \`output_type: auto\` when unsure — the system will infer from "Files to Modify" (if the section lists real source paths it's treated as code, otherwise as document)
+- When output_type is \`document\`, "Files to Modify" entries are treated as references, not files to edit. The AI produces a document artifact and is reviewed on document quality.
+- When output_type is \`code\`, "Files to Modify" lists actual source files the AI must edit. The review gate verifies source files were changed.
 ## Epic creation rules
 - Create an EP-*.md file in ${cc.backlogPath} with type: epic and a children: [] field in front matter
@@ -246,12 +255,13 @@ User request: ${userPrompt}`;
       data: { message: 'Starting project planning...' },
     });
-    const runner = new HeadlessRunner({
+    const runner = new ResilientRunner({
       workingDir: executionDir || workingDir,
-      directPrompt: enrichedPrompt,
-      stallWarningMs: 300_000,   // 5 min — compose usually finishes quickly
-      stallKillMs: 900_000,      // 15 min
-      stallHardCapMs: 1_800_000, // 30 min hard cap
+      prompt: enrichedPrompt,
+      policy: 'STANDARD',
+      stallWarningMs: 300_000,
+      stallKillMs: 900_000,
+      stallHardCapMs: 1_800_000,
       verbose: true,
       outputCallback: (text: string) => {
         ctx.send(ws, {
@@ -271,6 +281,8 @@ User request: ${userPrompt}`;
           }
         };
       })(),
+      logLabel: 'pm-compose',
+      logDir: cc.effectiveBoardId ? join(pmDir, 'boards', cc.effectiveBoardId, 'logs') : undefined,
     });
     ctx.broadcastToAll({
@@ -278,8 +290,7 @@ User request: ${userPrompt}`;
       data: { message: 'Claude is planning your project...' },
     });
-    const boardLogDir = cc.effectiveBoardId ? join(pmDir, 'boards', cc.effectiveBoardId, 'logs') : undefined;
-    const result = await runWithFileLogger('pm-compose', () => runner.run(), boardLogDir);
+    const result = await runner.run();
     ctx.broadcastToAll({
       type: 'planPromptProgress',