mstro-app 0.1.58 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. package/PRIVACY.md +126 -0
  2. package/README.md +24 -23
  3. package/bin/commands/login.js +85 -42
  4. package/bin/commands/logout.js +35 -1
  5. package/bin/commands/status.js +1 -1
  6. package/bin/mstro.js +231 -131
  7. package/dist/server/cli/headless/claude-invoker.d.ts.map +1 -1
  8. package/dist/server/cli/headless/claude-invoker.js +550 -115
  9. package/dist/server/cli/headless/claude-invoker.js.map +1 -1
  10. package/dist/server/cli/headless/index.d.ts +2 -1
  11. package/dist/server/cli/headless/index.d.ts.map +1 -1
  12. package/dist/server/cli/headless/index.js +2 -0
  13. package/dist/server/cli/headless/index.js.map +1 -1
  14. package/dist/server/cli/headless/prompt-utils.d.ts +5 -8
  15. package/dist/server/cli/headless/prompt-utils.d.ts.map +1 -1
  16. package/dist/server/cli/headless/prompt-utils.js +40 -5
  17. package/dist/server/cli/headless/prompt-utils.js.map +1 -1
  18. package/dist/server/cli/headless/runner.d.ts +1 -1
  19. package/dist/server/cli/headless/runner.d.ts.map +1 -1
  20. package/dist/server/cli/headless/runner.js +52 -7
  21. package/dist/server/cli/headless/runner.js.map +1 -1
  22. package/dist/server/cli/headless/stall-assessor.d.ts +79 -1
  23. package/dist/server/cli/headless/stall-assessor.d.ts.map +1 -1
  24. package/dist/server/cli/headless/stall-assessor.js +355 -20
  25. package/dist/server/cli/headless/stall-assessor.js.map +1 -1
  26. package/dist/server/cli/headless/tool-watchdog.d.ts +70 -0
  27. package/dist/server/cli/headless/tool-watchdog.d.ts.map +1 -0
  28. package/dist/server/cli/headless/tool-watchdog.js +302 -0
  29. package/dist/server/cli/headless/tool-watchdog.js.map +1 -0
  30. package/dist/server/cli/headless/types.d.ts +98 -1
  31. package/dist/server/cli/headless/types.d.ts.map +1 -1
  32. package/dist/server/cli/improvisation-session-manager.d.ts +136 -2
  33. package/dist/server/cli/improvisation-session-manager.d.ts.map +1 -1
  34. package/dist/server/cli/improvisation-session-manager.js +929 -132
  35. package/dist/server/cli/improvisation-session-manager.js.map +1 -1
  36. package/dist/server/index.js +5 -13
  37. package/dist/server/index.js.map +1 -1
  38. package/dist/server/mcp/bouncer-integration.d.ts.map +1 -1
  39. package/dist/server/mcp/bouncer-integration.js +18 -0
  40. package/dist/server/mcp/bouncer-integration.js.map +1 -1
  41. package/dist/server/mcp/security-audit.d.ts +2 -2
  42. package/dist/server/mcp/security-audit.d.ts.map +1 -1
  43. package/dist/server/mcp/security-audit.js +12 -8
  44. package/dist/server/mcp/security-audit.js.map +1 -1
  45. package/dist/server/mcp/security-patterns.d.ts.map +1 -1
  46. package/dist/server/mcp/security-patterns.js +9 -4
  47. package/dist/server/mcp/security-patterns.js.map +1 -1
  48. package/dist/server/routes/improvise.js +6 -6
  49. package/dist/server/routes/improvise.js.map +1 -1
  50. package/dist/server/services/analytics.d.ts +2 -0
  51. package/dist/server/services/analytics.d.ts.map +1 -1
  52. package/dist/server/services/analytics.js +26 -4
  53. package/dist/server/services/analytics.js.map +1 -1
  54. package/dist/server/services/platform.d.ts.map +1 -1
  55. package/dist/server/services/platform.js +17 -10
  56. package/dist/server/services/platform.js.map +1 -1
  57. package/dist/server/services/sandbox-utils.d.ts +6 -0
  58. package/dist/server/services/sandbox-utils.d.ts.map +1 -0
  59. package/dist/server/services/sandbox-utils.js +72 -0
  60. package/dist/server/services/sandbox-utils.js.map +1 -0
  61. package/dist/server/services/settings.d.ts +6 -0
  62. package/dist/server/services/settings.d.ts.map +1 -1
  63. package/dist/server/services/settings.js +21 -0
  64. package/dist/server/services/settings.js.map +1 -1
  65. package/dist/server/services/terminal/pty-manager.d.ts +5 -51
  66. package/dist/server/services/terminal/pty-manager.d.ts.map +1 -1
  67. package/dist/server/services/terminal/pty-manager.js +63 -102
  68. package/dist/server/services/terminal/pty-manager.js.map +1 -1
  69. package/dist/server/services/websocket/file-explorer-handlers.d.ts +5 -0
  70. package/dist/server/services/websocket/file-explorer-handlers.d.ts.map +1 -0
  71. package/dist/server/services/websocket/file-explorer-handlers.js +518 -0
  72. package/dist/server/services/websocket/file-explorer-handlers.js.map +1 -0
  73. package/dist/server/services/websocket/git-handlers.d.ts +36 -0
  74. package/dist/server/services/websocket/git-handlers.d.ts.map +1 -0
  75. package/dist/server/services/websocket/git-handlers.js +797 -0
  76. package/dist/server/services/websocket/git-handlers.js.map +1 -0
  77. package/dist/server/services/websocket/git-pr-handlers.d.ts +4 -0
  78. package/dist/server/services/websocket/git-pr-handlers.d.ts.map +1 -0
  79. package/dist/server/services/websocket/git-pr-handlers.js +299 -0
  80. package/dist/server/services/websocket/git-pr-handlers.js.map +1 -0
  81. package/dist/server/services/websocket/git-worktree-handlers.d.ts +4 -0
  82. package/dist/server/services/websocket/git-worktree-handlers.d.ts.map +1 -0
  83. package/dist/server/services/websocket/git-worktree-handlers.js +353 -0
  84. package/dist/server/services/websocket/git-worktree-handlers.js.map +1 -0
  85. package/dist/server/services/websocket/handler-context.d.ts +32 -0
  86. package/dist/server/services/websocket/handler-context.d.ts.map +1 -0
  87. package/dist/server/services/websocket/handler-context.js +4 -0
  88. package/dist/server/services/websocket/handler-context.js.map +1 -0
  89. package/dist/server/services/websocket/handler.d.ts +27 -338
  90. package/dist/server/services/websocket/handler.d.ts.map +1 -1
  91. package/dist/server/services/websocket/handler.js +74 -2106
  92. package/dist/server/services/websocket/handler.js.map +1 -1
  93. package/dist/server/services/websocket/index.d.ts +1 -1
  94. package/dist/server/services/websocket/index.d.ts.map +1 -1
  95. package/dist/server/services/websocket/index.js.map +1 -1
  96. package/dist/server/services/websocket/session-handlers.d.ts +10 -0
  97. package/dist/server/services/websocket/session-handlers.d.ts.map +1 -0
  98. package/dist/server/services/websocket/session-handlers.js +507 -0
  99. package/dist/server/services/websocket/session-handlers.js.map +1 -0
  100. package/dist/server/services/websocket/settings-handlers.d.ts +6 -0
  101. package/dist/server/services/websocket/settings-handlers.d.ts.map +1 -0
  102. package/dist/server/services/websocket/settings-handlers.js +125 -0
  103. package/dist/server/services/websocket/settings-handlers.js.map +1 -0
  104. package/dist/server/services/websocket/tab-handlers.d.ts +10 -0
  105. package/dist/server/services/websocket/tab-handlers.d.ts.map +1 -0
  106. package/dist/server/services/websocket/tab-handlers.js +131 -0
  107. package/dist/server/services/websocket/tab-handlers.js.map +1 -0
  108. package/dist/server/services/websocket/terminal-handlers.d.ts +9 -0
  109. package/dist/server/services/websocket/terminal-handlers.d.ts.map +1 -0
  110. package/dist/server/services/websocket/terminal-handlers.js +220 -0
  111. package/dist/server/services/websocket/terminal-handlers.js.map +1 -0
  112. package/dist/server/services/websocket/types.d.ts +67 -2
  113. package/dist/server/services/websocket/types.d.ts.map +1 -1
  114. package/hooks/bouncer.sh +11 -4
  115. package/package.json +7 -2
  116. package/server/README.md +176 -159
  117. package/server/cli/headless/claude-invoker.ts +740 -133
  118. package/server/cli/headless/index.ts +7 -1
  119. package/server/cli/headless/output-utils.test.ts +225 -0
  120. package/server/cli/headless/prompt-utils.ts +37 -5
  121. package/server/cli/headless/runner.ts +55 -8
  122. package/server/cli/headless/stall-assessor.test.ts +165 -0
  123. package/server/cli/headless/stall-assessor.ts +478 -22
  124. package/server/cli/headless/tool-watchdog.test.ts +429 -0
  125. package/server/cli/headless/tool-watchdog.ts +398 -0
  126. package/server/cli/headless/types.ts +93 -1
  127. package/server/cli/improvisation-session-manager.ts +1133 -145
  128. package/server/index.ts +5 -14
  129. package/server/mcp/README.md +59 -67
  130. package/server/mcp/bouncer-integration.test.ts +161 -0
  131. package/server/mcp/bouncer-integration.ts +28 -0
  132. package/server/mcp/security-audit.ts +12 -8
  133. package/server/mcp/security-patterns.test.ts +258 -0
  134. package/server/mcp/security-patterns.ts +8 -2
  135. package/server/routes/improvise.ts +6 -6
  136. package/server/services/analytics.ts +26 -4
  137. package/server/services/platform.test.ts +0 -10
  138. package/server/services/platform.ts +16 -11
  139. package/server/services/sandbox-utils.ts +78 -0
  140. package/server/services/settings.ts +25 -0
  141. package/server/services/terminal/pty-manager.ts +68 -129
  142. package/server/services/websocket/autocomplete.test.ts +194 -0
  143. package/server/services/websocket/file-explorer-handlers.ts +587 -0
  144. package/server/services/websocket/git-handlers.ts +924 -0
  145. package/server/services/websocket/git-pr-handlers.ts +363 -0
  146. package/server/services/websocket/git-worktree-handlers.ts +403 -0
  147. package/server/services/websocket/handler-context.ts +44 -0
  148. package/server/services/websocket/handler.test.ts +1 -1
  149. package/server/services/websocket/handler.ts +90 -2421
  150. package/server/services/websocket/index.ts +1 -1
  151. package/server/services/websocket/session-handlers.ts +574 -0
  152. package/server/services/websocket/settings-handlers.ts +150 -0
  153. package/server/services/websocket/tab-handlers.ts +150 -0
  154. package/server/services/websocket/terminal-handlers.ts +277 -0
  155. package/server/services/websocket/types.ts +145 -4
  156. package/bin/release.sh +0 -110
  157. package/dist/server/services/terminal/tmux-manager.d.ts +0 -82
  158. package/dist/server/services/terminal/tmux-manager.d.ts.map +0 -1
  159. package/dist/server/services/terminal/tmux-manager.js +0 -352
  160. package/dist/server/services/terminal/tmux-manager.js.map +0 -1
  161. package/server/services/terminal/tmux-manager.ts +0 -426
@@ -2,16 +2,18 @@
2
2
  // Licensed under the MIT License. See LICENSE file for details.
3
3
 
4
4
  /**
5
- * Stall Assessor
5
+ * Stall Assessor & Haiku Assessment Hub
6
6
  *
7
- * Intelligently determines whether a silent Claude Code process is
8
- * legitimately working or genuinely stalled. Uses a two-layer approach:
7
+ * Provides Haiku-based intelligent assessment for:
8
+ * - Stall detection (is a silent process working or hung?)
9
+ * - Context loss detection (did Claude lose context after timeouts?)
10
+ * - Approval prompt classification (is a user message an approval or new task?)
11
+ * - Best result comparison (which retry attempt produced better work?)
12
+ * - Error classification (what kind of error is in stderr?)
9
13
  *
10
- * 1. Fast heuristic: known long-running patterns (Task subagents, parallel
11
- * tool calls) get an automatic extension without any API call.
12
- *
13
- * 2. Haiku assessment: for ambiguous cases, spawns a quick Claude Haiku
14
- * call to evaluate the situation and recommend an extension (or kill).
14
+ * Stall detection uses a two-layer approach:
15
+ * 1. Fast heuristic: known long-running patterns get automatic extensions.
16
+ * 2. Haiku assessment: ambiguous cases get a quick AI evaluation.
15
17
  */
16
18
 
17
19
  import { type ChildProcess, spawn } from 'node:child_process';
@@ -27,10 +29,14 @@ export interface StallContext {
27
29
  lastToolInputSummary?: string;
28
30
  /** Number of tool calls started but not yet returned */
29
31
  pendingToolCount: number;
32
+ /** Names of all currently pending tools (toolId -> toolName) */
33
+ pendingToolNames?: Set<string>;
30
34
  /** Total tool calls made so far this session */
31
35
  totalToolCalls: number;
32
36
  /** Total wall-clock time since process started (ms) */
33
37
  elapsedTotalMs: number;
38
+ /** Time since the last token usage event (ms). Undefined if no token events yet. */
39
+ tokenSilenceMs?: number;
34
40
  }
35
41
 
36
42
  export interface StallVerdict {
@@ -45,11 +51,44 @@ export interface StallVerdict {
45
51
  /**
46
52
  * Fast heuristic for known long-running patterns.
47
53
  * Returns a verdict immediately if the pattern is recognized, null otherwise.
54
+ * When toolWatchdogActive is true, defers entirely to the watchdog for any
55
+ * pending tool calls — the watchdog has per-tool adaptive timeouts that are
56
+ * more precise than the stall detector's silence-based approach.
48
57
  */
49
- function quickHeuristic(ctx: StallContext): StallVerdict | null {
58
+ function quickHeuristic(ctx: StallContext, toolWatchdogActive = false): StallVerdict | null {
59
+ const pendingNames = ctx.pendingToolNames ?? new Set<string>();
60
+ const hasPendingTools = ctx.pendingToolCount > 0;
61
+
62
+ // Tokens still flowing = process is alive and actively processing.
63
+ // Extend generously when token activity is recent (< 60s), regardless
64
+ // of stdout silence. This covers silent thinking and tool result processing.
65
+ if (ctx.tokenSilenceMs !== undefined && ctx.tokenSilenceMs < 60_000) {
66
+ return {
67
+ action: 'extend',
68
+ extensionMs: 10 * 60_000,
69
+ reason: `Tokens still flowing (last activity ${Math.round(ctx.tokenSilenceMs / 1000)}s ago) — process is alive`,
70
+ };
71
+ }
72
+
73
+ // When the watchdog is active and tools are pending, always defer.
74
+ // The watchdog manages per-tool timeouts; the stall detector should only
75
+ // fire when no tools are running and there's genuine silence.
76
+ if (toolWatchdogActive && hasPendingTools) {
77
+ const toolList = pendingNames.size > 0
78
+ ? Array.from(pendingNames).join(', ')
79
+ : `${ctx.pendingToolCount} tool(s)`;
80
+ return {
81
+ action: 'extend',
82
+ extensionMs: 15 * 60_000,
83
+ reason: `Watchdog active, deferring — pending: ${toolList}`,
84
+ };
85
+ }
86
+
50
87
  // Task/subagent launches are known to produce long silence periods.
51
88
  // The parent Claude process emits nothing while waiting for subagent results.
52
- if (ctx.lastToolName === 'Task' && ctx.pendingToolCount > 0) {
89
+ // Check pendingToolNames (reliable) first, fall back to lastToolName (legacy).
90
+ const hasTaskPending = pendingNames.has('Task') || (ctx.lastToolName === 'Task' && hasPendingTools);
91
+ if (hasTaskPending) {
53
92
  const extensionMin = Math.min(30, 10 + ctx.pendingToolCount * 5);
54
93
  return {
55
94
  action: 'extend',
@@ -67,10 +106,10 @@ function quickHeuristic(ctx: StallContext): StallVerdict | null {
67
106
  };
68
107
  }
69
108
 
70
- // WebSearch/WebFetch can be slow depending on the site
109
+ // WebSearch/WebFetch: skip when watchdog handles them more precisely
71
110
  if (
72
- ctx.lastToolName === 'WebSearch' ||
73
- ctx.lastToolName === 'WebFetch'
111
+ !toolWatchdogActive &&
112
+ (ctx.lastToolName === 'WebSearch' || ctx.lastToolName === 'WebFetch')
74
113
  ) {
75
114
  return {
76
115
  action: 'extend',
@@ -90,9 +129,10 @@ export async function assessStall(
90
129
  ctx: StallContext,
91
130
  claudeCommand: string,
92
131
  verbose: boolean,
132
+ toolWatchdogActive = false,
93
133
  ): Promise<StallVerdict> {
94
134
  // Layer 1: fast heuristic
95
- const quick = quickHeuristic(ctx);
135
+ const quick = quickHeuristic(ctx, toolWatchdogActive);
96
136
  if (quick) {
97
137
  if (verbose) {
98
138
  console.log(`[STALL-ASSESS] Heuristic verdict: ${quick.reason}`);
@@ -119,6 +159,163 @@ export async function assessStall(
119
159
  }
120
160
  }
121
161
 
162
+ /**
163
+ * Assess a specific tool timeout using Haiku.
164
+ * Used by ToolWatchdog as a tiebreaker before killing a tool.
165
+ */
166
+ export async function assessToolTimeout(
167
+ toolName: string,
168
+ toolInput: Record<string, unknown>,
169
+ elapsedMs: number,
170
+ claudeCommand: string,
171
+ verbose: boolean,
172
+ tokenSilenceMs?: number,
173
+ ): Promise<StallVerdict> {
174
+ const elapsedSec = Math.round(elapsedMs / 1000);
175
+
176
+ // Summarize what the tool is doing
177
+ let inputSummary = '';
178
+ if (toolInput.url) {
179
+ inputSummary = `URL: ${String(toolInput.url).slice(0, 200)}`;
180
+ } else if (toolInput.query) {
181
+ inputSummary = `Query: ${String(toolInput.query).slice(0, 200)}`;
182
+ } else if (toolInput.command) {
183
+ inputSummary = `Command: ${String(toolInput.command).slice(0, 200)}`;
184
+ } else if (toolInput.prompt) {
185
+ inputSummary = `Prompt: ${String(toolInput.prompt).slice(0, 200)}`;
186
+ } else {
187
+ inputSummary = JSON.stringify(toolInput).slice(0, 200);
188
+ }
189
+
190
+ const toolDescriptions: Record<string, string> = {
191
+ WebFetch: 'fetches a URL, converts HTML to markdown, and runs a Haiku summarization pass',
192
+ WebSearch: 'performs a web search and returns results',
193
+ Task: 'spawns a subagent that runs autonomously with its own tools',
194
+ Bash: 'executes a shell command',
195
+ };
196
+ const toolDesc = toolDescriptions[toolName] || `executes the ${toolName} tool`;
197
+
198
+ const tokenLine = tokenSilenceMs !== undefined
199
+ ? `Token activity: last token event ${Math.round(tokenSilenceMs / 1000)}s ago (recent tokens = process is alive and processing)`
200
+ : 'Token activity: no token events observed';
201
+
202
+ const prompt = [
203
+ `You are a process health monitor. A ${toolName} tool call has been running for ${elapsedSec}s.`,
204
+ `${toolName} ${toolDesc}.`,
205
+ `Tool input: ${inputSummary}`,
206
+ tokenLine,
207
+ '',
208
+ `Is this tool call likely still working, or is it hung/frozen?`,
209
+ 'Consider: network latency, server response times, anti-bot protections, large page sizes, complex operations.',
210
+ 'IMPORTANT: If tokens were active recently (< 60s ago), the process is likely still alive and processing — strongly favor WORKING.',
211
+ '',
212
+ 'Respond in EXACTLY this format (3 lines, no extra text):',
213
+ 'VERDICT: WORKING or STALLED',
214
+ 'MINUTES: <number 1-10, only if WORKING, how many more minutes to allow>',
215
+ 'REASON: <brief one-line explanation>',
216
+ ].join('\n');
217
+
218
+ try {
219
+ if (verbose) {
220
+ console.log(`[TOOL-ASSESS] Running Haiku assessment for ${toolName} (${elapsedSec}s elapsed)...`);
221
+ }
222
+
223
+ return await spawnHaikuVerdict(prompt, claudeCommand, verbose, 'TOOL-ASSESS');
224
+ } catch (err) {
225
+ if (verbose) {
226
+ console.log(`[TOOL-ASSESS] Haiku assessment failed: ${err}`);
227
+ }
228
+ // On failure, default to kill (the tool has already exceeded its timeout)
229
+ return {
230
+ action: 'kill',
231
+ extensionMs: 0,
232
+ reason: `Tool timeout assessment failed: ${err}`,
233
+ };
234
+ }
235
+ }
236
+
237
+ // ========== Context Loss Assessment ==========
238
+
239
+ export interface ContextLossVerdict {
240
+ /** Whether the agent lost context and needs recovery */
241
+ contextLost: boolean;
242
+ /** Human-readable reason for the verdict */
243
+ reason: string;
244
+ }
245
+
246
+ /** Enriched context for Haiku-based context loss assessment */
247
+ export interface ContextLossContext {
248
+ assistantResponse: string;
249
+ effectiveTimeouts: number;
250
+ nativeTimeoutCount: number;
251
+ successfulToolCalls: number;
252
+ thinkingOutputLength: number;
253
+ hasSuccessfulWrite: boolean;
254
+ }
255
+
256
+ /**
257
+ * Assess whether a Claude Code session lost context after tool timeouts.
258
+ * Uses Haiku with enriched context signals — replaces brittle hardcoded
259
+ * thresholds (200 chars thinking, 2x ratio, 500 chars response) with
260
+ * a single LLM call that sees the full picture.
261
+ *
262
+ * Only call this when effectiveTimeouts > 0.
263
+ */
264
+ export async function assessContextLoss(
265
+ ctx: ContextLossContext,
266
+ claudeCommand: string,
267
+ verbose: boolean,
268
+ ): Promise<ContextLossVerdict> {
269
+ const tail = ctx.assistantResponse.slice(-800);
270
+
271
+ const prompt = [
272
+ 'You are analyzing a Claude Code agent session that experienced tool timeouts.',
273
+ 'Determine whether the agent lost context (needs recovery) or is still productively working.',
274
+ '',
275
+ 'Session signals:',
276
+ `- ${ctx.effectiveTimeouts} tools timed out (${ctx.nativeTimeoutCount} detected in text stream, ${ctx.effectiveTimeouts - ctx.nativeTimeoutCount} detected structurally)`,
277
+ `- ${ctx.successfulToolCalls} tools completed successfully`,
278
+ `- Thinking output: ${ctx.thinkingOutputLength} characters`,
279
+ `- Response length: ${ctx.assistantResponse.length} characters`,
280
+ `- Successful file writes (Edit/Write/MultiEdit): ${ctx.hasSuccessfulWrite ? 'YES' : 'NO'}`,
281
+ '',
282
+ `Final response (last ${tail.length} chars):`,
283
+ tail,
284
+ '',
285
+ 'WORKING signals: continued tool calls after timeouts, substantial thinking about the task, producing code/analysis, writing files, referencing the original task.',
286
+ 'STALLED signals: asking "how can I help?", starting fresh, offering generic help, not referencing the original task, very short response with no substance, task abandoned mid-research.',
287
+ '',
288
+ 'Respond in EXACTLY this format (2 lines, no extra text):',
289
+ 'VERDICT: WORKING or STALLED',
290
+ 'REASON: <brief one-line explanation>',
291
+ ].join('\n');
292
+
293
+ try {
294
+ if (verbose) {
295
+ console.log(`[CONTEXT-ASSESS] Running Haiku assessment (${ctx.effectiveTimeouts} timeouts, ${ctx.successfulToolCalls} successes, ${ctx.thinkingOutputLength} thinking chars)...`);
296
+ }
297
+
298
+ const raw = await spawnHaikuRaw(prompt, claudeCommand, verbose, 'CONTEXT-ASSESS');
299
+ const parsed = parseVerdictResponse(raw);
300
+ const contextLost = parsed.verdict === 'STALLED';
301
+
302
+ if (verbose) {
303
+ console.log(`[CONTEXT-ASSESS] Verdict: ${contextLost ? 'LOST' : 'CONTINUED'} — ${parsed.reason}`);
304
+ }
305
+
306
+ return { contextLost, reason: parsed.reason };
307
+ } catch (err) {
308
+ if (verbose) {
309
+ console.log(`[CONTEXT-ASSESS] Haiku assessment failed: ${err}`);
310
+ }
311
+ // On failure, assume context was lost (safer to retry than to show a confused response)
312
+ return {
313
+ contextLost: true,
314
+ reason: `Context loss assessment failed: ${err}`,
315
+ };
316
+ }
317
+ }
318
+
122
319
  function buildAssessmentPrompt(ctx: StallContext): string {
123
320
  const silenceMin = Math.round(ctx.silenceMs / 60_000);
124
321
  const totalMin = Math.round(ctx.elapsedTotalMs / 60_000);
@@ -128,6 +325,10 @@ function buildAssessmentPrompt(ctx: StallContext): string {
128
325
  ? `${ctx.originalPrompt.slice(0, 500)}...`
129
326
  : ctx.originalPrompt;
130
327
 
328
+ const tokenLine = ctx.tokenSilenceMs !== undefined
329
+ ? `Token activity: last token event ${Math.round(ctx.tokenSilenceMs / 1000)}s ago (tokens flowing = process alive)`
330
+ : 'Token activity: no token events observed';
331
+
131
332
  return [
132
333
  'You are a process health monitor. A Claude Code subprocess has been silent (no stdout) and you must determine if it is working or stalled.',
133
334
  '',
@@ -137,6 +338,7 @@ function buildAssessmentPrompt(ctx: StallContext): string {
137
338
  ctx.lastToolInputSummary ? `Last tool input: ${ctx.lastToolInputSummary}` : '',
138
339
  `Pending tool calls: ${ctx.pendingToolCount}`,
139
340
  `Total tool calls this session: ${ctx.totalToolCalls}`,
341
+ tokenLine,
140
342
  `Task being executed: ${promptPreview}`,
141
343
  '',
142
344
  'Respond in EXACTLY this format (3 lines, no extra text):',
@@ -183,13 +385,13 @@ function parseAssessmentResponse(output: string): StallVerdict {
183
385
 
184
386
  const HAIKU_TIMEOUT_MS = 30_000;
185
387
 
186
- function runHaikuAssessment(
187
- ctx: StallContext,
388
+ /** Low-level Haiku spawner: runs a prompt through `claude --print --model haiku` and returns raw text */
389
+ function spawnHaikuRaw(
390
+ prompt: string,
188
391
  claudeCommand: string,
189
392
  verbose: boolean,
190
- ): Promise<StallVerdict> {
191
- const prompt = buildAssessmentPrompt(ctx);
192
-
393
+ label: string,
394
+ ): Promise<string> {
193
395
  return new Promise((resolve, reject) => {
194
396
  let stdout = '';
195
397
  let settled = false;
@@ -214,7 +416,7 @@ function runHaikuAssessment(
214
416
 
215
417
  proc.stderr!.on('data', (data) => {
216
418
  if (verbose) {
217
- console.log(`[STALL-ASSESS] haiku stderr: ${data.toString().trim()}`);
419
+ console.log(`[${label}] haiku stderr: ${data.toString().trim()}`);
218
420
  }
219
421
  });
220
422
 
@@ -229,10 +431,10 @@ function runHaikuAssessment(
229
431
  }
230
432
 
231
433
  if (verbose) {
232
- console.log(`[STALL-ASSESS] Haiku response: ${stdout.trim()}`);
434
+ console.log(`[${label}] Haiku response: ${stdout.trim()}`);
233
435
  }
234
436
 
235
- resolve(parseAssessmentResponse(stdout));
437
+ resolve(stdout.trim());
236
438
  });
237
439
 
238
440
  proc.on('error', (err) => {
@@ -243,3 +445,257 @@ function runHaikuAssessment(
243
445
  });
244
446
  });
245
447
  }
448
+
449
+ /** Parse VERDICT/REASON format from Haiku response */
450
+ function parseVerdictResponse(raw: string): { verdict: string; reason: string } {
451
+ const lines = raw.split('\n');
452
+ let verdict = 'STALLED';
453
+ let reason = 'Assessment inconclusive';
454
+
455
+ for (const line of lines) {
456
+ const trimmed = line.trim();
457
+ if (trimmed.startsWith('VERDICT:')) {
458
+ verdict = trimmed.slice('VERDICT:'.length).trim().toUpperCase();
459
+ } else if (trimmed.startsWith('REASON:')) {
460
+ reason = trimmed.slice('REASON:'.length).trim();
461
+ }
462
+ }
463
+
464
+ return { verdict, reason };
465
+ }
466
+
467
+ /** Haiku spawner that returns a parsed StallVerdict (for stall assessment) */
468
+ async function spawnHaikuVerdict(
469
+ prompt: string,
470
+ claudeCommand: string,
471
+ verbose: boolean,
472
+ label = 'STALL-ASSESS',
473
+ ): Promise<StallVerdict> {
474
+ const raw = await spawnHaikuRaw(prompt, claudeCommand, verbose, label);
475
+ return parseAssessmentResponse(raw);
476
+ }
477
+
478
+ function runHaikuAssessment(
479
+ ctx: StallContext,
480
+ claudeCommand: string,
481
+ verbose: boolean,
482
+ ): Promise<StallVerdict> {
483
+ return spawnHaikuVerdict(buildAssessmentPrompt(ctx), claudeCommand, verbose);
484
+ }
485
+
486
+ // ========== Approval Prompt Assessment ==========
487
+
488
+ export interface ApprovalVerdict {
489
+ isApproval: boolean;
490
+ reason: string;
491
+ }
492
+
493
+ /**
494
+ * Assess whether a user message is an approval/continuation or a new task.
495
+ * Uses Haiku to classify intent — handles natural language variations that
496
+ * regex patterns miss ("sounds good", "yep do it", "option 2", etc.).
497
+ */
498
+ export async function assessApproval(
499
+ userMessage: string,
500
+ claudeCommand: string,
501
+ verbose: boolean,
502
+ ): Promise<ApprovalVerdict> {
503
+ const prompt = [
504
+ 'You are classifying a user message in a multi-turn conversation with a coding assistant.',
505
+ 'The assistant previously proposed a plan or asked a question, and the user is now responding.',
506
+ '',
507
+ `User's message: "${userMessage}"`,
508
+ '',
509
+ 'Is this an approval/continuation (user agrees, says yes, wants to proceed) or a new task/question?',
510
+ '',
511
+ 'APPROVAL signs: "yes", "sure", "go ahead", "sounds good", "do it", "yep", "option 2", "the first one", "proceed", references to previous proposal, short affirmative with modifications ("yes but use TypeScript").',
512
+ 'NEW_TASK signs: asks a different question, gives new detailed instructions, changes topic, provides new requirements unrelated to any proposal.',
513
+ '',
514
+ 'Respond in EXACTLY this format (2 lines, no extra text):',
515
+ 'VERDICT: APPROVAL or NEW_TASK',
516
+ 'REASON: <brief one-line explanation>',
517
+ ].join('\n');
518
+
519
+ try {
520
+ if (verbose) {
521
+ console.log('[APPROVAL-ASSESS] Running Haiku assessment...');
522
+ }
523
+
524
+ const raw = await spawnHaikuRaw(prompt, claudeCommand, verbose, 'APPROVAL-ASSESS');
525
+ const parsed = parseVerdictResponse(raw);
526
+ const isApproval = parsed.verdict.includes('APPROVAL');
527
+
528
+ if (verbose) {
529
+ console.log(`[APPROVAL-ASSESS] Verdict: ${isApproval ? 'APPROVAL' : 'NEW_TASK'} — ${parsed.reason}`);
530
+ }
531
+
532
+ return { isApproval, reason: parsed.reason };
533
+ } catch (err) {
534
+ if (verbose) {
535
+ console.log(`[APPROVAL-ASSESS] Haiku assessment failed: ${err}`);
536
+ }
537
+ // On failure, assume not an approval (safer to treat as new task)
538
+ return { isApproval: false, reason: `Assessment failed: ${err}` };
539
+ }
540
+ }
541
+
542
+ // ========== Best Result Comparison ==========
543
+
544
+ export interface BestResultContext {
545
+ originalPrompt: string;
546
+ resultA: {
547
+ successfulToolCalls: number;
548
+ responseLength: number;
549
+ hasThinking: boolean;
550
+ responseTail: string;
551
+ };
552
+ resultB: {
553
+ successfulToolCalls: number;
554
+ responseLength: number;
555
+ hasThinking: boolean;
556
+ responseTail: string;
557
+ };
558
+ }
559
+
560
+ export interface BestResultVerdict {
561
+ winner: 'A' | 'B';
562
+ reason: string;
563
+ }
564
+
565
+ /**
566
+ * Compare two retry results and determine which made more meaningful progress.
567
+ * Uses Haiku to evaluate quality — replaces arbitrary numeric scoring
568
+ * (tool count * 10 + response length / 50 + thinking bonus).
569
+ */
570
+ export async function assessBestResult(
571
+ ctx: BestResultContext,
572
+ claudeCommand: string,
573
+ verbose: boolean,
574
+ ): Promise<BestResultVerdict> {
575
+ const promptPreview = ctx.originalPrompt.length > 300
576
+ ? `${ctx.originalPrompt.slice(0, 300)}...`
577
+ : ctx.originalPrompt;
578
+
579
+ const prompt = [
580
+ 'You are comparing two AI assistant responses from retry attempts to determine which made more meaningful progress on the user\'s task.',
581
+ '',
582
+ `Original task: ${promptPreview}`,
583
+ '',
584
+ `Response A: ${ctx.resultA.successfulToolCalls} successful tool calls, ${ctx.resultA.responseLength} chars, ${ctx.resultA.hasThinking ? 'has' : 'no'} thinking output`,
585
+ `Last 500 chars of A: ${ctx.resultA.responseTail}`,
586
+ '',
587
+ `Response B: ${ctx.resultB.successfulToolCalls} successful tool calls, ${ctx.resultB.responseLength} chars, ${ctx.resultB.hasThinking ? 'has' : 'no'} thinking output`,
588
+ `Last 500 chars of B: ${ctx.resultB.responseTail}`,
589
+ '',
590
+ 'Which response made more meaningful progress? Consider:',
591
+ '- Did it actually work on the task (tool calls, code changes) vs just talking about it?',
592
+ '- Is it confused/lost context ("How can I help?") vs engaged with the original task?',
593
+ '- Quality of analysis and output, not just quantity.',
594
+ '',
595
+ 'Respond in EXACTLY this format (2 lines, no extra text):',
596
+ 'VERDICT: A or B',
597
+ 'REASON: <brief one-line explanation>',
598
+ ].join('\n');
599
+
600
+ try {
601
+ if (verbose) {
602
+ console.log('[BEST-RESULT] Running Haiku assessment...');
603
+ }
604
+
605
+ const raw = await spawnHaikuRaw(prompt, claudeCommand, verbose, 'BEST-RESULT');
606
+ const parsed = parseVerdictResponse(raw);
607
+ const winner: 'A' | 'B' = parsed.verdict.includes('B') ? 'B' : 'A';
608
+
609
+ if (verbose) {
610
+ console.log(`[BEST-RESULT] Verdict: ${winner} — ${parsed.reason}`);
611
+ }
612
+
613
+ return { winner, reason: parsed.reason };
614
+ } catch (err) {
615
+ if (verbose) {
616
+ console.log(`[BEST-RESULT] Haiku assessment failed: ${err}`);
617
+ }
618
+ // On failure, prefer A (the previously-tracked best result)
619
+ return { winner: 'A', reason: `Assessment failed: ${err}` };
620
+ }
621
+ }
622
+
623
+ // ========== Error Classification ==========
624
+
625
+ export interface ErrorClassification {
626
+ errorCode: string;
627
+ message: string;
628
+ }
629
+
630
+ /**
631
+ * Classify an unrecognized error from stderr using Haiku.
632
+ * Called as a fallback when regex patterns in output-utils.ts don't match.
633
+ * Returns null if the stderr content isn't a real error (just warnings/debug info).
634
+ */
635
+ export async function classifyError(
636
+ stderrContent: string,
637
+ claudeCommand: string,
638
+ verbose: boolean,
639
+ ): Promise<ErrorClassification | null> {
640
+ const tail = stderrContent.slice(-500);
641
+ if (!tail.trim()) return null;
642
+
643
+ const prompt = [
644
+ 'You are classifying an error message from the Claude Code CLI that did not match known patterns.',
645
+ '',
646
+ `stderr (last ${tail.length} chars):`,
647
+ tail,
648
+ '',
649
+ 'Classify into one of these categories:',
650
+ '- AUTH_REQUIRED: Authentication/login issues',
651
+ '- API_KEY_INVALID: API key problems',
652
+ '- QUOTA_EXCEEDED: Usage limits, billing, subscription',
653
+ '- RATE_LIMITED: Too many requests, throttling',
654
+ '- NETWORK_ERROR: Connection, DNS, timeout issues',
655
+ '- SSL_ERROR: Certificate/TLS problems',
656
+ '- SERVICE_UNAVAILABLE: Backend down (502/503/504)',
657
+ '- INTERNAL_ERROR: Server errors (500)',
658
+ '- CONTEXT_TOO_LONG: Token/context limit exceeded',
659
+ '- SESSION_NOT_FOUND: Invalid/expired session',
660
+ '- UNKNOWN: Cannot determine, not a real error, or just warnings/debug output',
661
+ '',
662
+ 'If the stderr content is just warnings, debug info, or not an actual error, use UNKNOWN.',
663
+ '',
664
+ 'Respond in EXACTLY this format (2 lines, no extra text):',
665
+ 'CATEGORY: <one of the above>',
666
+ 'MESSAGE: <brief user-friendly description of the error>',
667
+ ].join('\n');
668
+
669
+ try {
670
+ if (verbose) {
671
+ console.log('[ERROR-CLASSIFY] Running Haiku assessment...');
672
+ }
673
+
674
+ const raw = await spawnHaikuRaw(prompt, claudeCommand, verbose, 'ERROR-CLASSIFY');
675
+ const lines = raw.split('\n');
676
+ let category = 'UNKNOWN';
677
+ let message = '';
678
+
679
+ for (const line of lines) {
680
+ const trimmed = line.trim();
681
+ if (trimmed.startsWith('CATEGORY:')) {
682
+ category = trimmed.slice('CATEGORY:'.length).trim().toUpperCase();
683
+ } else if (trimmed.startsWith('MESSAGE:')) {
684
+ message = trimmed.slice('MESSAGE:'.length).trim();
685
+ }
686
+ }
687
+
688
+ if (category === 'UNKNOWN' || !message) return null;
689
+
690
+ if (verbose) {
691
+ console.log(`[ERROR-CLASSIFY] Verdict: ${category} — ${message}`);
692
+ }
693
+
694
+ return { errorCode: category, message };
695
+ } catch (err) {
696
+ if (verbose) {
697
+ console.log(`[ERROR-CLASSIFY] Haiku assessment failed: ${err}`);
698
+ }
699
+ return null;
700
+ }
701
+ }