mstro-app 0.1.58 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. package/PRIVACY.md +126 -0
  2. package/README.md +24 -23
  3. package/bin/commands/login.js +85 -42
  4. package/bin/commands/logout.js +35 -1
  5. package/bin/commands/status.js +1 -1
  6. package/bin/mstro.js +231 -131
  7. package/dist/server/cli/headless/claude-invoker.d.ts.map +1 -1
  8. package/dist/server/cli/headless/claude-invoker.js +550 -115
  9. package/dist/server/cli/headless/claude-invoker.js.map +1 -1
  10. package/dist/server/cli/headless/index.d.ts +2 -1
  11. package/dist/server/cli/headless/index.d.ts.map +1 -1
  12. package/dist/server/cli/headless/index.js +2 -0
  13. package/dist/server/cli/headless/index.js.map +1 -1
  14. package/dist/server/cli/headless/prompt-utils.d.ts +5 -8
  15. package/dist/server/cli/headless/prompt-utils.d.ts.map +1 -1
  16. package/dist/server/cli/headless/prompt-utils.js +40 -5
  17. package/dist/server/cli/headless/prompt-utils.js.map +1 -1
  18. package/dist/server/cli/headless/runner.d.ts +1 -1
  19. package/dist/server/cli/headless/runner.d.ts.map +1 -1
  20. package/dist/server/cli/headless/runner.js +52 -7
  21. package/dist/server/cli/headless/runner.js.map +1 -1
  22. package/dist/server/cli/headless/stall-assessor.d.ts +79 -1
  23. package/dist/server/cli/headless/stall-assessor.d.ts.map +1 -1
  24. package/dist/server/cli/headless/stall-assessor.js +355 -20
  25. package/dist/server/cli/headless/stall-assessor.js.map +1 -1
  26. package/dist/server/cli/headless/tool-watchdog.d.ts +70 -0
  27. package/dist/server/cli/headless/tool-watchdog.d.ts.map +1 -0
  28. package/dist/server/cli/headless/tool-watchdog.js +302 -0
  29. package/dist/server/cli/headless/tool-watchdog.js.map +1 -0
  30. package/dist/server/cli/headless/types.d.ts +98 -1
  31. package/dist/server/cli/headless/types.d.ts.map +1 -1
  32. package/dist/server/cli/improvisation-session-manager.d.ts +136 -2
  33. package/dist/server/cli/improvisation-session-manager.d.ts.map +1 -1
  34. package/dist/server/cli/improvisation-session-manager.js +929 -132
  35. package/dist/server/cli/improvisation-session-manager.js.map +1 -1
  36. package/dist/server/index.js +5 -13
  37. package/dist/server/index.js.map +1 -1
  38. package/dist/server/mcp/bouncer-integration.d.ts.map +1 -1
  39. package/dist/server/mcp/bouncer-integration.js +18 -0
  40. package/dist/server/mcp/bouncer-integration.js.map +1 -1
  41. package/dist/server/mcp/security-audit.d.ts +2 -2
  42. package/dist/server/mcp/security-audit.d.ts.map +1 -1
  43. package/dist/server/mcp/security-audit.js +12 -8
  44. package/dist/server/mcp/security-audit.js.map +1 -1
  45. package/dist/server/mcp/security-patterns.d.ts.map +1 -1
  46. package/dist/server/mcp/security-patterns.js +9 -4
  47. package/dist/server/mcp/security-patterns.js.map +1 -1
  48. package/dist/server/routes/improvise.js +6 -6
  49. package/dist/server/routes/improvise.js.map +1 -1
  50. package/dist/server/services/analytics.d.ts +2 -0
  51. package/dist/server/services/analytics.d.ts.map +1 -1
  52. package/dist/server/services/analytics.js +26 -4
  53. package/dist/server/services/analytics.js.map +1 -1
  54. package/dist/server/services/platform.d.ts.map +1 -1
  55. package/dist/server/services/platform.js +17 -10
  56. package/dist/server/services/platform.js.map +1 -1
  57. package/dist/server/services/sandbox-utils.d.ts +6 -0
  58. package/dist/server/services/sandbox-utils.d.ts.map +1 -0
  59. package/dist/server/services/sandbox-utils.js +72 -0
  60. package/dist/server/services/sandbox-utils.js.map +1 -0
  61. package/dist/server/services/settings.d.ts +6 -0
  62. package/dist/server/services/settings.d.ts.map +1 -1
  63. package/dist/server/services/settings.js +21 -0
  64. package/dist/server/services/settings.js.map +1 -1
  65. package/dist/server/services/terminal/pty-manager.d.ts +5 -51
  66. package/dist/server/services/terminal/pty-manager.d.ts.map +1 -1
  67. package/dist/server/services/terminal/pty-manager.js +63 -102
  68. package/dist/server/services/terminal/pty-manager.js.map +1 -1
  69. package/dist/server/services/websocket/file-explorer-handlers.d.ts +5 -0
  70. package/dist/server/services/websocket/file-explorer-handlers.d.ts.map +1 -0
  71. package/dist/server/services/websocket/file-explorer-handlers.js +518 -0
  72. package/dist/server/services/websocket/file-explorer-handlers.js.map +1 -0
  73. package/dist/server/services/websocket/git-handlers.d.ts +36 -0
  74. package/dist/server/services/websocket/git-handlers.d.ts.map +1 -0
  75. package/dist/server/services/websocket/git-handlers.js +797 -0
  76. package/dist/server/services/websocket/git-handlers.js.map +1 -0
  77. package/dist/server/services/websocket/git-pr-handlers.d.ts +4 -0
  78. package/dist/server/services/websocket/git-pr-handlers.d.ts.map +1 -0
  79. package/dist/server/services/websocket/git-pr-handlers.js +299 -0
  80. package/dist/server/services/websocket/git-pr-handlers.js.map +1 -0
  81. package/dist/server/services/websocket/git-worktree-handlers.d.ts +4 -0
  82. package/dist/server/services/websocket/git-worktree-handlers.d.ts.map +1 -0
  83. package/dist/server/services/websocket/git-worktree-handlers.js +353 -0
  84. package/dist/server/services/websocket/git-worktree-handlers.js.map +1 -0
  85. package/dist/server/services/websocket/handler-context.d.ts +32 -0
  86. package/dist/server/services/websocket/handler-context.d.ts.map +1 -0
  87. package/dist/server/services/websocket/handler-context.js +4 -0
  88. package/dist/server/services/websocket/handler-context.js.map +1 -0
  89. package/dist/server/services/websocket/handler.d.ts +27 -338
  90. package/dist/server/services/websocket/handler.d.ts.map +1 -1
  91. package/dist/server/services/websocket/handler.js +74 -2106
  92. package/dist/server/services/websocket/handler.js.map +1 -1
  93. package/dist/server/services/websocket/index.d.ts +1 -1
  94. package/dist/server/services/websocket/index.d.ts.map +1 -1
  95. package/dist/server/services/websocket/index.js.map +1 -1
  96. package/dist/server/services/websocket/session-handlers.d.ts +10 -0
  97. package/dist/server/services/websocket/session-handlers.d.ts.map +1 -0
  98. package/dist/server/services/websocket/session-handlers.js +507 -0
  99. package/dist/server/services/websocket/session-handlers.js.map +1 -0
  100. package/dist/server/services/websocket/settings-handlers.d.ts +6 -0
  101. package/dist/server/services/websocket/settings-handlers.d.ts.map +1 -0
  102. package/dist/server/services/websocket/settings-handlers.js +125 -0
  103. package/dist/server/services/websocket/settings-handlers.js.map +1 -0
  104. package/dist/server/services/websocket/tab-handlers.d.ts +10 -0
  105. package/dist/server/services/websocket/tab-handlers.d.ts.map +1 -0
  106. package/dist/server/services/websocket/tab-handlers.js +131 -0
  107. package/dist/server/services/websocket/tab-handlers.js.map +1 -0
  108. package/dist/server/services/websocket/terminal-handlers.d.ts +9 -0
  109. package/dist/server/services/websocket/terminal-handlers.d.ts.map +1 -0
  110. package/dist/server/services/websocket/terminal-handlers.js +220 -0
  111. package/dist/server/services/websocket/terminal-handlers.js.map +1 -0
  112. package/dist/server/services/websocket/types.d.ts +67 -2
  113. package/dist/server/services/websocket/types.d.ts.map +1 -1
  114. package/hooks/bouncer.sh +11 -4
  115. package/package.json +7 -2
  116. package/server/README.md +176 -159
  117. package/server/cli/headless/claude-invoker.ts +740 -133
  118. package/server/cli/headless/index.ts +7 -1
  119. package/server/cli/headless/output-utils.test.ts +225 -0
  120. package/server/cli/headless/prompt-utils.ts +37 -5
  121. package/server/cli/headless/runner.ts +55 -8
  122. package/server/cli/headless/stall-assessor.test.ts +165 -0
  123. package/server/cli/headless/stall-assessor.ts +478 -22
  124. package/server/cli/headless/tool-watchdog.test.ts +429 -0
  125. package/server/cli/headless/tool-watchdog.ts +398 -0
  126. package/server/cli/headless/types.ts +93 -1
  127. package/server/cli/improvisation-session-manager.ts +1133 -145
  128. package/server/index.ts +5 -14
  129. package/server/mcp/README.md +59 -67
  130. package/server/mcp/bouncer-integration.test.ts +161 -0
  131. package/server/mcp/bouncer-integration.ts +28 -0
  132. package/server/mcp/security-audit.ts +12 -8
  133. package/server/mcp/security-patterns.test.ts +258 -0
  134. package/server/mcp/security-patterns.ts +8 -2
  135. package/server/routes/improvise.ts +6 -6
  136. package/server/services/analytics.ts +26 -4
  137. package/server/services/platform.test.ts +0 -10
  138. package/server/services/platform.ts +16 -11
  139. package/server/services/sandbox-utils.ts +78 -0
  140. package/server/services/settings.ts +25 -0
  141. package/server/services/terminal/pty-manager.ts +68 -129
  142. package/server/services/websocket/autocomplete.test.ts +194 -0
  143. package/server/services/websocket/file-explorer-handlers.ts +587 -0
  144. package/server/services/websocket/git-handlers.ts +924 -0
  145. package/server/services/websocket/git-pr-handlers.ts +363 -0
  146. package/server/services/websocket/git-worktree-handlers.ts +403 -0
  147. package/server/services/websocket/handler-context.ts +44 -0
  148. package/server/services/websocket/handler.test.ts +1 -1
  149. package/server/services/websocket/handler.ts +90 -2421
  150. package/server/services/websocket/index.ts +1 -1
  151. package/server/services/websocket/session-handlers.ts +574 -0
  152. package/server/services/websocket/settings-handlers.ts +150 -0
  153. package/server/services/websocket/tab-handlers.ts +150 -0
  154. package/server/services/websocket/terminal-handlers.ts +277 -0
  155. package/server/services/websocket/types.ts +145 -4
  156. package/bin/release.sh +0 -110
  157. package/dist/server/services/terminal/tmux-manager.d.ts +0 -82
  158. package/dist/server/services/terminal/tmux-manager.d.ts.map +0 -1
  159. package/dist/server/services/terminal/tmux-manager.js +0 -352
  160. package/dist/server/services/terminal/tmux-manager.js.map +0 -1
  161. package/server/services/terminal/tmux-manager.ts +0 -426
@@ -8,14 +8,17 @@
8
8
  */
9
9
 
10
10
  import { type ChildProcess, spawn } from 'node:child_process';
11
+ import { sanitizeEnvForSandbox } from '../../services/sandbox-utils.js';
11
12
  import { generateMcpConfig } from './mcp-config.js';
12
13
  import { detectErrorInStderr, } from './output-utils.js';
13
14
  import { buildMultimodalMessage } from './prompt-utils.js';
14
- import { assessStall, type StallContext } from './stall-assessor.js';
15
+ import { assessStall, assessToolTimeout, classifyError, type StallContext } from './stall-assessor.js';
16
+ import { ToolWatchdog } from './tool-watchdog.js';
15
17
  import type {
16
18
  ExecutionResult,
17
19
  ResolvedHeadlessConfig,
18
20
  ToolUseAccumulator,
21
+ ToolUseEvent,
19
22
  } from './types.js';
20
23
 
21
24
  export interface ClaudeInvokerOptions {
@@ -23,6 +26,17 @@ export interface ClaudeInvokerOptions {
23
26
  runningProcesses: Map<number, ChildProcess>;
24
27
  }
25
28
 
29
+ // ========== Signal Helpers ==========
30
+
31
+ /** Map a Node.js signal name to its numeric value for exit code computation */
32
+ function signalToNumber(signal: string): number | undefined {
33
+ const map: Record<string, number> = {
34
+ SIGHUP: 1, SIGINT: 2, SIGQUIT: 3, SIGABRT: 6,
35
+ SIGKILL: 9, SIGTERM: 15, SIGUSR1: 10, SIGUSR2: 12,
36
+ };
37
+ return map[signal];
38
+ }
39
+
26
40
  // ========== Stall Detection Helpers ==========
27
41
 
28
42
  /** Summarize a tool's input for stall assessment context */
@@ -69,20 +83,32 @@ interface StallAssessmentParams {
69
83
  now: number;
70
84
  extensionsGranted: number;
71
85
  maxExtensions: number;
86
+ toolWatchdogActive?: boolean;
72
87
  }
73
88
 
74
89
  /** Run stall assessment and return updated state if extended, null otherwise */
75
90
  async function runStallAssessment(
76
91
  params: StallAssessmentParams,
77
92
  ): Promise<{ extensionsGranted: number; currentKillDeadline: number } | null> {
78
- const { stallCtx, config, now, extensionsGranted, maxExtensions } = params;
93
+ const { stallCtx, config, now, extensionsGranted, maxExtensions, toolWatchdogActive } = params;
79
94
  try {
80
- const verdict = await assessStall(stallCtx, config.claudeCommand, config.verbose);
95
+ const verdict = await assessStall(stallCtx, config.claudeCommand, config.verbose, toolWatchdogActive);
81
96
  if (verdict.action === 'extend') {
82
97
  const newExtensions = extensionsGranted + 1;
83
- config.outputCallback?.(
84
- `\n[[MSTRO_STALL_EXTENDED]] Assessment: process likely working. ${verdict.reason}. Extension ${newExtensions}/${maxExtensions}.\n`
85
- );
98
+ const elapsedMin = Math.round(stallCtx.elapsedTotalMs / 60_000);
99
+ const pendingNames = stallCtx.pendingToolNames ?? new Set<string>();
100
+
101
+ // Emit a progress message instead of a scary stall warning.
102
+ // Task subagents get a friendlier message since long silence is expected.
103
+ if (pendingNames.has('Task')) {
104
+ config.outputCallback?.(
105
+ `\n[[MSTRO_STALL_EXTENDED]] Task subagent still running (${elapsedMin} min elapsed). ${verdict.reason}.\n`
106
+ );
107
+ } else {
108
+ config.outputCallback?.(
109
+ `\n[[MSTRO_STALL_EXTENDED]] Process still working (${elapsedMin} min elapsed). ${verdict.reason}. Extension ${newExtensions}/${maxExtensions}.\n`
110
+ );
111
+ }
86
112
  if (config.verbose) {
87
113
  console.log(`[STALL] Extended by ${Math.round(verdict.extensionMs / 60_000)} min: ${verdict.reason}`);
88
114
  }
@@ -102,6 +128,136 @@ async function runStallAssessment(
102
128
  return null;
103
129
  }
104
130
 
131
+ // ========== Native Timeout Detection ==========
132
+
133
+ /** Regex matching Claude Code's internal tool timeout messages */
134
+ const NATIVE_TIMEOUT_PATTERN = /^(\w+) timed out — (continuing|retrying) with (\d+) results? preserved$/;
135
+
136
+ /** Quick prefix check: does incomplete text look like it might be a timeout? */
137
+ const TIMEOUT_PREFIX_PATTERN = /^(\w+) timed/;
138
+
139
+ /** Known tool names that Claude Code may report timeouts for */
140
+ const NATIVE_TIMEOUT_TOOL_NAMES = new Set([
141
+ 'Read', 'Grep', 'Glob', 'Edit', 'Write', 'Bash',
142
+ 'WebFetch', 'WebSearch', 'Task', 'TodoRead', 'TodoWrite',
143
+ 'NotebookEdit', 'MultiEdit',
144
+ ]);
145
+
146
+ interface NativeTimeoutEvent {
147
+ toolName: string;
148
+ action: 'continuing' | 'retrying';
149
+ preservedCount: number;
150
+ }
151
+
152
+ /**
153
+ * Detects Claude Code's internal tool timeout messages in the text stream.
154
+ *
155
+ * Buffers text at newline boundaries to detect complete timeout lines.
156
+ * Non-matching text is forwarded immediately to minimize streaming latency.
157
+ */
158
+ class NativeTimeoutDetector {
159
+ private lineBuffer = '';
160
+ private detectedTimeouts: NativeTimeoutEvent[] = [];
161
+ /** Text buffered after native timeouts — held back from streaming until context is assessed */
162
+ private postTimeoutBuffer = '';
163
+
164
+ /**
165
+ * Process a text_delta chunk.
166
+ * Returns passthrough text (for outputCallback) and any detected timeouts.
167
+ *
168
+ * After the first native timeout is detected, subsequent passthrough text
169
+ * is held in postTimeoutBuffer instead of returned as passthrough. This
170
+ * prevents confused "What were you working on?" responses from streaming
171
+ * to the user before context loss can be assessed.
172
+ */
173
+ processChunk(text: string): { passthrough: string; timeouts: NativeTimeoutEvent[] } {
174
+ const timeouts: NativeTimeoutEvent[] = [];
175
+ let passthrough = '';
176
+
177
+ this.lineBuffer += text;
178
+
179
+ const lines = this.lineBuffer.split('\n');
180
+ const incomplete = lines.pop() ?? '';
181
+
182
+ for (const line of lines) {
183
+ const trimmed = line.trim();
184
+ const match = trimmed.match(NATIVE_TIMEOUT_PATTERN);
185
+
186
+ if (match) {
187
+ const event: NativeTimeoutEvent = {
188
+ toolName: match[1],
189
+ action: match[2] as 'continuing' | 'retrying',
190
+ preservedCount: parseInt(match[3], 10),
191
+ };
192
+ timeouts.push(event);
193
+ this.detectedTimeouts.push(event);
194
+ // Suppress this line from passthrough — replaced by structured marker
195
+ } else {
196
+ passthrough += `${line}\n`;
197
+ }
198
+ }
199
+
200
+ // Handle incomplete trailing text
201
+ if (incomplete) {
202
+ const prefixMatch = incomplete.match(TIMEOUT_PREFIX_PATTERN);
203
+ if (prefixMatch && NATIVE_TIMEOUT_TOOL_NAMES.has(prefixMatch[1])) {
204
+ // Looks like the start of a timeout message — hold it
205
+ this.lineBuffer = incomplete;
206
+ } else {
207
+ passthrough += incomplete;
208
+ this.lineBuffer = '';
209
+ }
210
+ } else {
211
+ this.lineBuffer = '';
212
+ }
213
+
214
+ // After native timeouts, buffer passthrough text instead of returning it.
215
+ // The session manager will assess context loss and either flush or discard.
216
+ if (this.detectedTimeouts.length > 0 && passthrough) {
217
+ this.postTimeoutBuffer += passthrough;
218
+ passthrough = '';
219
+ }
220
+
221
+ return { passthrough, timeouts };
222
+ }
223
+
224
+ /** Flush any held buffer (call on stream end).
225
+ * Also checks remaining buffer for timeout patterns so the last
226
+ * timeout message (without trailing newline) is always counted.
227
+ */
228
+ flush(): string {
229
+ const remaining = this.lineBuffer;
230
+ this.lineBuffer = '';
231
+
232
+ // Check if the unflushed buffer IS a timeout message
233
+ if (remaining) {
234
+ const trimmed = remaining.trim();
235
+ const match = trimmed.match(NATIVE_TIMEOUT_PATTERN);
236
+ if (match) {
237
+ this.detectedTimeouts.push({
238
+ toolName: match[1],
239
+ action: match[2] as 'continuing' | 'retrying',
240
+ preservedCount: parseInt(match[3], 10),
241
+ });
242
+ // Return empty — this was a timeout message, not user-visible text
243
+ return '';
244
+ }
245
+ }
246
+
247
+ return remaining;
248
+ }
249
+
250
+ /** Get count of detected timeouts */
251
+ get timeoutCount(): number {
252
+ return this.detectedTimeouts.length;
253
+ }
254
+
255
+ /** Get buffered post-timeout text (for session manager to flush or discard) */
256
+ get bufferedPostTimeoutOutput(): string {
257
+ return this.postTimeoutBuffer;
258
+ }
259
+ }
260
+
105
261
  // ========== Stream Event Handlers ==========
106
262
 
107
263
  interface StreamHandlerContext {
@@ -110,6 +266,18 @@ interface StreamHandlerContext {
110
266
  accumulatedThinking: string;
111
267
  accumulatedToolUse: ToolUseAccumulator[];
112
268
  toolInputBuffers: Map<number, { name: string; id: string; inputJson: string; startTime: number }>;
269
+ nativeTimeoutDetector: NativeTimeoutDetector;
270
+ /** When true, assistant text is buffered instead of forwarded to outputCallback.
271
+ * Active during resume mode until thinking/tool activity confirms Claude has context. */
272
+ resumeAssessmentActive: boolean;
273
+ /** Buffered assistant text during resume assessment */
274
+ resumeAssessmentBuffer: string;
275
+ /** Cumulative API token usage from message_start/message_delta events */
276
+ apiTokenUsage: { inputTokens: number; outputTokens: number };
277
+ /** Tracks cumulative output_tokens within the current step (message_delta is cumulative per-step) */
278
+ currentStepOutputTokens: number;
279
+ /** Timestamp of the last token usage change (tokens still flowing = process alive) */
280
+ lastTokenActivityTime: number;
113
281
  }
114
282
 
115
283
  function handleSessionCapture(
@@ -133,6 +301,15 @@ function handleThinkingDelta(event: any, ctx: StreamHandlerContext): string {
133
301
  return ctx.accumulatedThinking;
134
302
  }
135
303
 
304
+ // Thinking activity confirms Claude has context — flush resume buffer
305
+ if (ctx.resumeAssessmentActive) {
306
+ ctx.resumeAssessmentActive = false;
307
+ if (ctx.resumeAssessmentBuffer) {
308
+ ctx.config.outputCallback?.(ctx.resumeAssessmentBuffer);
309
+ ctx.resumeAssessmentBuffer = '';
310
+ }
311
+ }
312
+
136
313
  const thinking = event.delta.thinking;
137
314
  const updated = ctx.accumulatedThinking + thinking;
138
315
 
@@ -157,10 +334,33 @@ function handleTextDelta(event: any, ctx: StreamHandlerContext): string {
157
334
  }
158
335
 
159
336
  const text = event.delta.text;
337
+
338
+ // Always accumulate raw text for checkpoint context
160
339
  const updated = ctx.accumulatedAssistantResponse + text;
161
340
 
162
- if (ctx.config.outputCallback) {
163
- ctx.config.outputCallback(text);
341
+ // Route through native timeout detector to intercept Claude Code's internal timeout messages
342
+ const { passthrough, timeouts } = ctx.nativeTimeoutDetector.processChunk(text);
343
+
344
+ // Emit structured markers for detected native timeouts
345
+ for (const timeout of timeouts) {
346
+ ctx.config.outputCallback?.(
347
+ `\n[[MSTRO_NATIVE_TIMEOUT]] ${timeout.toolName} timed out \u2014 ${timeout.action} with ${timeout.preservedCount} results preserved\n`
348
+ );
349
+ }
350
+
351
+ // When resume assessment is active, buffer text instead of forwarding.
352
+ // This prevents confused "What were you working on?" responses from streaming
353
+ // to the user before we can assess whether Claude retained context.
354
+ if (ctx.resumeAssessmentActive) {
355
+ if (passthrough) {
356
+ ctx.resumeAssessmentBuffer += passthrough;
357
+ }
358
+ return updated;
359
+ }
360
+
361
+ // Forward non-timeout text to output
362
+ if (passthrough && ctx.config.outputCallback) {
363
+ ctx.config.outputCallback(passthrough);
164
364
  }
165
365
 
166
366
  return updated;
@@ -174,6 +374,15 @@ function handleToolStart(event: any, ctx: StreamHandlerContext): void {
174
374
  return;
175
375
  }
176
376
 
377
+ // Tool activity confirms Claude has context — flush resume buffer
378
+ if (ctx.resumeAssessmentActive) {
379
+ ctx.resumeAssessmentActive = false;
380
+ if (ctx.resumeAssessmentBuffer) {
381
+ ctx.config.outputCallback?.(ctx.resumeAssessmentBuffer);
382
+ ctx.resumeAssessmentBuffer = '';
383
+ }
384
+ }
385
+
177
386
  const toolName = event.content_block.name;
178
387
  const toolId = event.content_block.id;
179
388
  const index = event.index;
@@ -236,6 +445,9 @@ function handleToolComplete(event: any, ctx: StreamHandlerContext): void {
236
445
  startTime: toolBuffer.startTime
237
446
  });
238
447
 
448
+ // Clean up the input buffer — it's no longer needed after accumulation
449
+ ctx.toolInputBuffers.delete(index);
450
+
239
451
  if (ctx.config.toolUseCallback) {
240
452
  ctx.config.toolUseCallback({
241
453
  type: 'tool_complete',
@@ -247,6 +459,80 @@ function handleToolComplete(event: any, ctx: StreamHandlerContext): void {
247
459
  }
248
460
  }
249
461
 
462
+ /** Accumulate input tokens from a message_start event. Returns true if any tokens were added. */
463
+ function handleMessageStartTokens(event: any, ctx: StreamHandlerContext): boolean {
464
+ if (event.type !== 'message_start' || !event.message?.usage) return false;
465
+ const usage = event.message.usage;
466
+ ctx.currentStepOutputTokens = 0;
467
+ let changed = false;
468
+ if (typeof usage.input_tokens === 'number') {
469
+ ctx.apiTokenUsage.inputTokens += usage.input_tokens;
470
+ changed = true;
471
+ }
472
+ if (typeof usage.cache_creation_input_tokens === 'number') {
473
+ ctx.apiTokenUsage.inputTokens += usage.cache_creation_input_tokens;
474
+ changed = true;
475
+ }
476
+ if (typeof usage.cache_read_input_tokens === 'number') {
477
+ ctx.apiTokenUsage.inputTokens += usage.cache_read_input_tokens;
478
+ changed = true;
479
+ }
480
+ verboseLog(ctx.config.verbose,
481
+ `[TOKENS] message_start: input=${usage.input_tokens ?? 0} cache_create=${usage.cache_creation_input_tokens ?? 0} cache_read=${usage.cache_read_input_tokens ?? 0} → total_input=${ctx.apiTokenUsage.inputTokens}`);
482
+ return changed;
483
+ }
484
+
485
+ /** Accumulate output tokens from a message_delta event. Returns true if any tokens were added.
486
+ * message_delta carries CUMULATIVE output token count for the current step.
487
+ * Per Anthropic docs: "The token counts shown in the usage field of the
488
+ * message_delta event are cumulative" and there can be "one or more message_delta
489
+ * events" per message. We track the delta from the previous value to avoid
490
+ * double-counting when multiple message_delta events fire per step. */
491
+ function handleMessageDeltaTokens(event: any, ctx: StreamHandlerContext): boolean {
492
+ if (event.type !== 'message_delta' || !event.usage) return false;
493
+ if (typeof event.usage.output_tokens !== 'number') return false;
494
+ const increment = event.usage.output_tokens - ctx.currentStepOutputTokens;
495
+ verboseLog(ctx.config.verbose,
496
+ `[TOKENS] message_delta: output=${event.usage.output_tokens} (step_prev=${ctx.currentStepOutputTokens} increment=${increment}) → total_output=${ctx.apiTokenUsage.outputTokens + Math.max(increment, 0)}`);
497
+ if (increment <= 0) return false;
498
+ ctx.apiTokenUsage.outputTokens += increment;
499
+ ctx.currentStepOutputTokens = event.usage.output_tokens;
500
+ return true;
501
+ }
502
+
503
+ function handleTokenUsage(event: any, ctx: StreamHandlerContext): void {
504
+ const changed = handleMessageStartTokens(event, ctx) || handleMessageDeltaTokens(event, ctx);
505
+ if (changed) {
506
+ ctx.lastTokenActivityTime = Date.now();
507
+ ctx.config.tokenUsageCallback?.({ ...ctx.apiTokenUsage });
508
+ }
509
+ }
510
+
511
+ /**
512
+ * Extract definitive token usage from the result event emitted at the end of a Claude session.
513
+ * The result event's `usage` field contains the authoritative total — it overrides any
514
+ * accumulated stream-based counts which may be incomplete (e.g., when extended thinking
515
+ * suppresses stream_event emissions).
516
+ */
517
+ function handleResultTokenUsage(parsed: any, ctx: StreamHandlerContext): void {
518
+ if (!parsed.usage) return;
519
+ const u = parsed.usage;
520
+ const input = (typeof u.input_tokens === 'number' ? u.input_tokens : 0)
521
+ + (typeof u.cache_creation_input_tokens === 'number' ? u.cache_creation_input_tokens : 0)
522
+ + (typeof u.cache_read_input_tokens === 'number' ? u.cache_read_input_tokens : 0);
523
+ const output = typeof u.output_tokens === 'number' ? u.output_tokens : 0;
524
+
525
+ if (input > 0 || output > 0) {
526
+ verboseLog(ctx.config.verbose,
527
+ `[TOKENS] Result event usage: input=${input} output=${output} ` +
528
+ `(stream accumulated: input=${ctx.apiTokenUsage.inputTokens} output=${ctx.apiTokenUsage.outputTokens})`);
529
+ // Replace with authoritative counts from the result event
530
+ ctx.apiTokenUsage = { inputTokens: input, outputTokens: output };
531
+ ctx.lastTokenActivityTime = Date.now();
532
+ ctx.config.tokenUsageCallback?.({ ...ctx.apiTokenUsage });
533
+ }
534
+ }
535
+
250
536
  function handleToolResult(parsed: any, ctx: StreamHandlerContext): void {
251
537
  if (parsed.type !== 'user' || !parsed.message?.content) {
252
538
  return;
@@ -298,6 +584,23 @@ function processStreamLines(
298
584
  }
299
585
 
300
586
  function processStreamEvent(parsed: any, ctx: StreamHandlerContext): void {
587
+ // Handle error events from Claude CLI (API errors, model errors, etc.)
588
+ if (parsed.type === 'error') {
589
+ const errorMessage = parsed.error?.message || parsed.message || JSON.stringify(parsed);
590
+ ctx.config.outputCallback?.(`\n[[MSTRO_ERROR:CLAUDE_ERROR]] ${errorMessage}\n`);
591
+ return;
592
+ }
593
+
594
+ // Handle result events — extract definitive token usage and surface errors
595
+ if (parsed.type === 'result') {
596
+ handleResultTokenUsage(parsed, ctx);
597
+ if (parsed.is_error) {
598
+ const errorMessage = parsed.error || parsed.result || 'Unknown error in result';
599
+ ctx.config.outputCallback?.(`\n[[MSTRO_ERROR:CLAUDE_RESULT_ERROR]] ${errorMessage}\n`);
600
+ return;
601
+ }
602
+ }
603
+
301
604
  if (parsed.type === 'stream_event' && parsed.event) {
302
605
  const event = parsed.event;
303
606
  ctx.accumulatedThinking = handleThinkingDelta(event, ctx);
@@ -305,10 +608,47 @@ function processStreamEvent(parsed: any, ctx: StreamHandlerContext): void {
305
608
  handleToolStart(event, ctx);
306
609
  handleToolInputDelta(event, ctx);
307
610
  handleToolComplete(event, ctx);
611
+ handleTokenUsage(event, ctx);
308
612
  }
309
613
  handleToolResult(parsed, ctx);
310
614
  }
311
615
 
616
+ // ========== Close Handler Helpers ==========
617
+
618
+ /** Flush native timeout detector buffers and return post-timeout output if any */
619
+ function flushNativeTimeoutBuffers(ctx: StreamHandlerContext): string | undefined {
620
+ const remaining = ctx.nativeTimeoutDetector.flush();
621
+ const buffered = ctx.nativeTimeoutDetector.bufferedPostTimeoutOutput;
622
+ const postTimeout = (buffered + remaining) || undefined;
623
+
624
+ // Only flush remaining text if there were no native timeouts
625
+ // (when there are timeouts, the session manager decides what to show)
626
+ if (!postTimeout && remaining) {
627
+ ctx.config.outputCallback?.(remaining);
628
+ }
629
+
630
+ return postTimeout;
631
+ }
632
+
633
+ /** Classify unmatched stderr via Haiku when process exits with error */
634
+ async function classifyUnmatchedStderr(
635
+ stderr: string,
636
+ errorAlreadySurfaced: boolean,
637
+ code: number | null,
638
+ config: ResolvedHeadlessConfig,
639
+ ): Promise<void> {
640
+ if (!stderr || errorAlreadySurfaced || code === 0) return;
641
+
642
+ try {
643
+ const classified = await classifyError(stderr, config.claudeCommand, config.verbose);
644
+ if (classified) {
645
+ config.outputCallback?.(`\n[[MSTRO_ERROR:${classified.errorCode}]] ${classified.message}\n`);
646
+ }
647
+ } catch {
648
+ // Haiku classification failed — proceed without it
649
+ }
650
+ }
651
+
312
652
  // ========== Error Handling ==========
313
653
 
314
654
  const SPAWN_ERROR_MAP: Record<string, { code: string; message: string }> = {
@@ -386,24 +726,295 @@ function buildClaudeArgs(
386
726
  return args;
387
727
  }
388
728
 
389
- /**
390
- * Execute a Claude CLI command for a single movement
391
- * Supports multimodal prompts via --input-format stream-json when image attachments are present
392
- */
393
- export async function executeClaudeCommand(
729
+ /** Write image attachments to the Claude process stdin as stream-json */
730
+ function writeImageAttachmentsToStdin(
731
+ claudeProcess: ChildProcess,
394
732
  prompt: string,
395
- _movementId: string,
396
- _sessionNumber: number,
397
- options: ClaudeInvokerOptions
398
- ): Promise<ExecutionResult> {
399
- const { config, runningProcesses } = options;
400
- const perfStart = Date.now();
401
- if (config.verbose) {
402
- console.log(`[PERF] executeMovement started`);
733
+ config: ResolvedHeadlessConfig,
734
+ ): void {
735
+ claudeProcess.stdin!.on('error', (err) => {
736
+ if (config.verbose) {
737
+ console.error('[STDIN] Write error:', err.message);
738
+ }
739
+ config.outputCallback?.(`\n[[MSTRO_ERROR:STDIN_WRITE_FAILED]] Failed to send image data to Claude: ${err.message}\n`);
740
+ });
741
+ const multimodalMessage = buildMultimodalMessage(prompt, config.imageAttachments!);
742
+ claudeProcess.stdin!.write(multimodalMessage);
743
+ claudeProcess.stdin!.end();
744
+ }
745
+
746
+ /** Mutable state for stall detection, shared between the interval callback and the outer function */
747
+ interface StallState {
748
+ lastActivityTime: number;
749
+ stallWarningEmitted: boolean;
750
+ assessmentInProgress: boolean;
751
+ extensionsGranted: number;
752
+ currentKillDeadline: number;
753
+ nextWarningAfter: number;
754
+ }
755
+
756
+ /** Run a single stall-check tick. Extracted to reduce cognitive complexity of executeClaudeCommand. */
757
+ async function runStallCheckTick(
758
+ state: StallState,
759
+ opts: {
760
+ perfStart: number;
761
+ stallWarningMs: number;
762
+ stallHardCapMs: number;
763
+ maxExtensions: number;
764
+ stallAssessEnabled: boolean;
765
+ toolWatchdogActive: boolean;
766
+ prompt: string;
767
+ pendingTools: Map<string, string>;
768
+ lastToolInputSummary: string | undefined;
769
+ totalToolCalls: number;
770
+ claudeProcess: ChildProcess;
771
+ stallCheckInterval: ReturnType<typeof setInterval>;
772
+ config: ResolvedHeadlessConfig;
773
+ lastTokenActivityTime: number;
774
+ },
775
+ ): Promise<void> {
776
+ const now = Date.now();
777
+ const silenceMs = now - state.lastActivityTime;
778
+ const totalElapsed = now - opts.perfStart;
779
+ const tokenSilenceMs = now - opts.lastTokenActivityTime;
780
+
781
+ if (totalElapsed >= opts.stallHardCapMs) {
782
+ terminateStallProcess(opts.claudeProcess, opts.stallCheckInterval, opts.config,
783
+ `\n[[MSTRO_ERROR:EXECUTION_STALLED]] Hard time limit reached (${Math.round(opts.stallHardCapMs / 60000)} min total). Terminating process.\n`
784
+ );
785
+ return;
403
786
  }
404
787
 
405
- const hasImageAttachments = config.imageAttachments && config.imageAttachments.length > 0;
406
- const useStreamJson = hasImageAttachments || config.thinkingCallback || config.outputCallback || config.toolUseCallback;
788
+ // Token activity pushes the kill deadline forward — tokens flowing means
789
+ // the process is alive even if stdout is silent (e.g. silent thinking).
790
+ if (tokenSilenceMs < 60_000 && now < state.currentKillDeadline) {
791
+ const killMs = opts.config.stallKillMs ?? 1_800_000;
792
+ state.currentKillDeadline = Math.max(state.currentKillDeadline, now + killMs);
793
+ }
794
+
795
+ if (now >= state.currentKillDeadline) {
796
+ terminateStallProcess(opts.claudeProcess, opts.stallCheckInterval, opts.config,
797
+ `\n[[MSTRO_ERROR:EXECUTION_STALLED]] No output for ${Math.round(silenceMs / 60_000)} minutes. Terminating process.\n`
798
+ );
799
+ return;
800
+ }
801
+
802
+ if (silenceMs < opts.stallWarningMs || state.stallWarningEmitted || now < state.nextWarningAfter || state.assessmentInProgress) return;
803
+
804
+ const stallCtx: StallContext = {
805
+ originalPrompt: opts.prompt,
806
+ silenceMs,
807
+ lastToolName: opts.pendingTools.size > 0 ? Array.from(opts.pendingTools.values()).pop() : undefined,
808
+ lastToolInputSummary: opts.lastToolInputSummary,
809
+ pendingToolCount: opts.pendingTools.size,
810
+ pendingToolNames: new Set(opts.pendingTools.values()),
811
+ totalToolCalls: opts.totalToolCalls,
812
+ elapsedTotalMs: totalElapsed,
813
+ tokenSilenceMs,
814
+ };
815
+
816
+ if (opts.stallAssessEnabled && state.extensionsGranted < opts.maxExtensions) {
817
+ state.assessmentInProgress = true;
818
+ const result = await runStallAssessment({ stallCtx, config: opts.config, now, extensionsGranted: state.extensionsGranted, maxExtensions: opts.maxExtensions, toolWatchdogActive: opts.toolWatchdogActive });
819
+ state.assessmentInProgress = false;
820
+
821
+ if (result) {
822
+ state.extensionsGranted = result.extensionsGranted;
823
+ state.currentKillDeadline = result.currentKillDeadline;
824
+ state.nextWarningAfter = now + opts.stallWarningMs;
825
+ return;
826
+ }
827
+ }
828
+
829
+ state.stallWarningEmitted = true;
830
+ const killIn = Math.round((state.currentKillDeadline - now) / 60_000);
831
+ opts.config.outputCallback?.(
832
+ `\n[[MSTRO_ERROR:EXECUTION_STALLED]] No output for ${Math.round(silenceMs / 60_000)} minutes. Will terminate in ${killIn} minutes if no activity.\n`
833
+ );
834
+ }
835
+
836
+ // ========== Tool Tracking Setup ==========
837
+
838
+ /** Shared mutable state for tool event handlers */
839
+ interface ToolTrackingState {
840
+ pendingTools: Map<string, string>;
841
+ counters: { lastToolInputSummary: string | undefined; totalToolCalls: number };
842
+ toolIdToName: Map<string, string>;
843
+ toolIdToInput: Map<string, Record<string, unknown>>;
844
+ watchdog: ToolWatchdog | null;
845
+ stallState: StallState;
846
+ ctx: StreamHandlerContext;
847
+ onTimeout: (hungToolId: string) => void;
848
+ }
849
+
850
+ interface ToolTrackingResult {
851
+ pendingTools: Map<string, string>;
852
+ watchdog: ToolWatchdog | null;
853
+ toolWatchdogActive: boolean;
854
+ counters: { lastToolInputSummary: string | undefined; totalToolCalls: number };
855
+ /** Must be called after stallCheckInterval is created, to wire up the kill handler */
856
+ setKillContext: (claudeProcess: ChildProcess, stallCheckInterval: ReturnType<typeof setInterval>) => void;
857
+ }
858
+
859
+ /** Handle tool_start events. Extracted to reduce cognitive complexity. */
860
+ function onToolStart(event: ToolUseEvent, s: ToolTrackingState): void {
861
+ const id = event.toolId!;
862
+ s.pendingTools.set(id, event.toolName!);
863
+ s.counters.totalToolCalls++;
864
+ s.toolIdToName.set(id, event.toolName!);
865
+ if (s.watchdog) {
866
+ s.watchdog.startWatch(id, event.toolName!, {}, () => { s.onTimeout(id); });
867
+ }
868
+ }
869
+
870
+ /** Handle tool_complete events. Extracted to reduce cognitive complexity. */
871
+ function onToolComplete(event: ToolUseEvent, s: ToolTrackingState): void {
872
+ const id = event.toolId!;
873
+ s.counters.lastToolInputSummary = summarizeToolInput(event.completeInput);
874
+ s.toolIdToInput.set(id, event.completeInput);
875
+ if (!s.watchdog) return;
876
+ const toolName = s.toolIdToName.get(id);
877
+ if (toolName) {
878
+ s.watchdog.startWatch(id, toolName, event.completeInput, () => { s.onTimeout(id); });
879
+ }
880
+ }
881
+
882
+ /** Handle tool_result events. Extracted to reduce cognitive complexity. */
883
+ function onToolResult(event: ToolUseEvent, s: ToolTrackingState): void {
884
+ const id = event.toolId!;
885
+ s.pendingTools.delete(id);
886
+ s.stallState.stallWarningEmitted = false;
887
+ s.stallState.lastActivityTime = Date.now();
888
+ const toolEntry = s.ctx.accumulatedToolUse.find(t => t.toolId === id);
889
+ if (!s.watchdog || !toolEntry) return;
890
+ const toolName = s.toolIdToName.get(id);
891
+ if (toolName && toolEntry.duration) {
892
+ s.watchdog.recordCompletion(toolName, toolEntry.duration);
893
+ }
894
+ s.watchdog.clearWatch(id);
895
+ }
896
+
897
+ /** Resolve a display URL from tool input for timeout messages */
898
+ function resolveToolUrl(toolInput: Record<string, unknown>): string | undefined {
899
+ if (toolInput.url) return String(toolInput.url);
900
+ if (toolInput.query) return String(toolInput.query);
901
+ return undefined;
902
+ }
903
+
904
+ /** Handle a tool timeout by building a checkpoint and killing the process. */
905
+ function executeToolTimeout(
906
+ hungToolId: string,
907
+ watchdog: ToolWatchdog,
908
+ killCtx: { claudeProcess: ChildProcess; stallCheckInterval: ReturnType<typeof setInterval> },
909
+ s: ToolTrackingState,
910
+ config: ResolvedHeadlessConfig,
911
+ prompt: string,
912
+ sessionCapture: { claudeSessionId?: string },
913
+ perfStart: number,
914
+ ): void {
915
+ const checkpoint = watchdog.buildCheckpoint(
916
+ prompt, s.ctx.accumulatedAssistantResponse, s.ctx.accumulatedThinking,
917
+ s.ctx.accumulatedToolUse, hungToolId, sessionCapture.claudeSessionId, perfStart,
918
+ );
919
+
920
+ const toolName = s.toolIdToName.get(hungToolId) || 'unknown';
921
+ const toolInput = s.toolIdToInput.get(hungToolId) || {};
922
+ const timeoutMs = watchdog.getTimeout(toolName);
923
+ const url = resolveToolUrl(toolInput);
924
+
925
+ config.outputCallback?.(
926
+ `\n[[MSTRO_TOOL_TIMEOUT]] ${toolName} timed out after ${Math.round(timeoutMs / 1000)}s${url ? ` fetching: ${url.slice(0, 100)}` : ''}. ${s.ctx.accumulatedToolUse.filter(t => t.result !== undefined).length} completed results preserved.\n`
927
+ );
928
+
929
+ if (checkpoint) {
930
+ config.onToolTimeout?.(checkpoint);
931
+ }
932
+
933
+ verboseLog(config.verbose, `[WATCHDOG] Killing process due to ${toolName} timeout`);
934
+ watchdog.clearAll();
935
+ clearInterval(killCtx.stallCheckInterval);
936
+ killCtx.claudeProcess.kill('SIGTERM');
937
+ const proc = killCtx.claudeProcess;
938
+ setTimeout(() => { if (!proc.killed) proc.kill('SIGKILL'); }, 5000);
939
+ }
940
+
941
+ /** Set up tool activity tracking and watchdog. Extracted to reduce cognitive complexity. */
942
+ function setupToolTracking(
943
+ config: ResolvedHeadlessConfig,
944
+ stallState: StallState,
945
+ ctx: StreamHandlerContext,
946
+ sessionCapture: { claudeSessionId?: string },
947
+ prompt: string,
948
+ perfStart: number,
949
+ ): ToolTrackingResult {
950
+ const pendingTools = new Map<string, string>();
951
+ const counters = { lastToolInputSummary: undefined as string | undefined, totalToolCalls: 0 };
952
+
953
+ const toolWatchdogActive = config.enableToolWatchdog !== false;
954
+ const watchdog = toolWatchdogActive
955
+ ? new ToolWatchdog({
956
+ profiles: config.toolTimeoutProfiles,
957
+ verbose: config.verbose,
958
+ onTiebreaker: async (toolName, toolInput, elapsedMs, tokenSilenceMs) => {
959
+ return assessToolTimeout(toolName, toolInput, elapsedMs, config.claudeCommand, config.verbose, tokenSilenceMs);
960
+ },
961
+ getTokenSilenceMs: () => {
962
+ const last = ctx.lastTokenActivityTime;
963
+ return last > 0 ? Date.now() - last : undefined;
964
+ },
965
+ })
966
+ : null;
967
+
968
+ // Deferred kill context — set after stallCheckInterval is created
969
+ let killCtx: { claudeProcess: ChildProcess; stallCheckInterval: ReturnType<typeof setInterval> } | null = null;
970
+
971
+ const trackingState: ToolTrackingState = {
972
+ pendingTools, counters,
973
+ toolIdToName: new Map(), toolIdToInput: new Map(),
974
+ watchdog, stallState, ctx,
975
+ onTimeout: (hungToolId) => {
976
+ if (!watchdog || !killCtx) return;
977
+ executeToolTimeout(hungToolId, watchdog, killCtx, trackingState, config, prompt, sessionCapture, perfStart);
978
+ },
979
+ };
980
+
981
+ const origToolUseCallback = config.toolUseCallback;
982
+
983
+ config.toolUseCallback = (event) => {
984
+ if (event.type === 'tool_start' && event.toolName && event.toolId) {
985
+ onToolStart(event, trackingState);
986
+ } else if (event.type === 'tool_complete' && event.completeInput && event.toolId) {
987
+ onToolComplete(event, trackingState);
988
+ } else if (event.type === 'tool_result' && event.toolId) {
989
+ onToolResult(event, trackingState);
990
+ }
991
+ origToolUseCallback?.(event);
992
+ };
993
+
994
+ return {
995
+ pendingTools, watchdog, toolWatchdogActive, counters,
996
+ setKillContext: (claudeProcess, stallCheckInterval) => {
997
+ killCtx = { claudeProcess, stallCheckInterval };
998
+ },
999
+ };
1000
+ }
1001
+
1002
+ /** Log messages when verbose mode is enabled. Extracted to reduce cognitive complexity. */
1003
+ function verboseLog(verbose: boolean | undefined, ...msgs: string[]): void {
1004
+ if (verbose) {
1005
+ for (const msg of msgs) console.log(msg);
1006
+ }
1007
+ }
1008
+
1009
+ /** Spawn the Claude CLI process and register it. Extracted to reduce cognitive complexity. */
1010
+ function spawnAndRegister(
1011
+ config: ResolvedHeadlessConfig,
1012
+ prompt: string,
1013
+ hasImageAttachments: boolean,
1014
+ useStreamJson: boolean,
1015
+ runningProcesses: Map<number, ChildProcess>,
1016
+ perfStart: number,
1017
+ ): ChildProcess {
407
1018
  const mcpConfigPath = generateMcpConfig(config.workingDir, config.verbose);
408
1019
 
409
1020
  if (!mcpConfigPath && config.outputCallback) {
@@ -412,32 +1023,52 @@ export async function executeClaudeCommand(
412
1023
  );
413
1024
  }
414
1025
 
415
- const args = buildClaudeArgs(config, prompt, !!hasImageAttachments, !!useStreamJson, mcpConfigPath);
1026
+ const args = buildClaudeArgs(config, prompt, hasImageAttachments, useStreamJson, mcpConfigPath);
416
1027
 
417
- if (config.verbose) {
418
- console.log(`[PERF] About to spawn: ${Date.now() - perfStart}ms`);
419
- console.log(`[PERF] Command: ${config.claudeCommand} ${args.join(' ')}`);
420
- }
1028
+ verboseLog(config.verbose,
1029
+ `[PERF] About to spawn: ${Date.now() - perfStart}ms`,
1030
+ `[PERF] Command: ${config.claudeCommand} ${args.join(' ')}`,
1031
+ );
421
1032
 
422
1033
  const claudeProcess = spawn(config.claudeCommand, args, {
423
1034
  cwd: config.workingDir,
424
- env: { ...process.env },
1035
+ env: config.sandboxed
1036
+ ? sanitizeEnvForSandbox(process.env, config.workingDir)
1037
+ : { ...process.env },
425
1038
  stdio: [hasImageAttachments ? 'pipe' : 'ignore', 'pipe', 'pipe']
426
1039
  });
427
1040
 
428
1041
  if (hasImageAttachments && claudeProcess.stdin) {
429
- const multimodalMessage = buildMultimodalMessage(prompt, config.imageAttachments!);
430
- claudeProcess.stdin.write(multimodalMessage);
431
- claudeProcess.stdin.end();
1042
+ writeImageAttachmentsToStdin(claudeProcess, prompt, config);
432
1043
  }
433
1044
 
434
1045
  if (claudeProcess.pid) {
435
1046
  runningProcesses.set(claudeProcess.pid, claudeProcess);
436
1047
  }
437
1048
 
438
- if (config.verbose) {
439
- console.log(`[PERF] Spawned: ${Date.now() - perfStart}ms`);
440
- }
1049
+ verboseLog(config.verbose, `[PERF] Spawned: ${Date.now() - perfStart}ms`);
1050
+
1051
+ return claudeProcess;
1052
+ }
1053
+
1054
+ /**
1055
+ * Execute a Claude CLI command for a single movement
1056
+ * Supports multimodal prompts via --input-format stream-json when image attachments are present
1057
+ */
1058
+ export async function executeClaudeCommand(
1059
+ prompt: string,
1060
+ _movementId: string,
1061
+ _sessionNumber: number,
1062
+ options: ClaudeInvokerOptions
1063
+ ): Promise<ExecutionResult> {
1064
+ const { config, runningProcesses } = options;
1065
+ const perfStart = Date.now();
1066
+ verboseLog(config.verbose, `[PERF] executeMovement started`);
1067
+
1068
+ const hasImageAttachments = config.imageAttachments && config.imageAttachments.length > 0;
1069
+ const useStreamJson = hasImageAttachments || config.thinkingCallback || config.outputCallback || config.toolUseCallback;
1070
+
1071
+ const claudeProcess = spawnAndRegister(config, prompt, !!hasImageAttachments, !!useStreamJson, runningProcesses, perfStart);
441
1072
 
442
1073
  let stdout = '';
443
1074
  let stderr = '';
@@ -446,54 +1077,51 @@ export async function executeClaudeCommand(
446
1077
  let errorAlreadySurfaced = false;
447
1078
 
448
1079
  const sessionCapture: { claudeSessionId?: string } = {};
1080
+ // Activate resume assessment buffering when resuming a session.
1081
+ // Text is held until thinking/tool activity confirms Claude has context.
1082
+ const isResumeMode = !!(config.continueSession && config.claudeSessionId);
1083
+
449
1084
  const ctx: StreamHandlerContext = {
450
1085
  config,
451
1086
  accumulatedAssistantResponse: '',
452
1087
  accumulatedThinking: '',
453
1088
  accumulatedToolUse: [],
454
1089
  toolInputBuffers: new Map(),
1090
+ nativeTimeoutDetector: new NativeTimeoutDetector(),
1091
+ resumeAssessmentActive: isResumeMode,
1092
+ resumeAssessmentBuffer: '',
1093
+ apiTokenUsage: { inputTokens: 0, outputTokens: 0 },
1094
+ currentStepOutputTokens: 0,
1095
+ lastTokenActivityTime: Date.now(),
455
1096
  };
456
1097
 
457
- // Stall detection state
458
- let lastActivityTime = Date.now();
459
- let stallWarningEmitted = false;
460
- let assessmentInProgress = false;
461
- let extensionsGranted = 0;
462
- let currentKillDeadline = Date.now() + (config.stallKillMs ?? 1_800_000);
1098
+ // Stall detection state (mutable object shared with runStallCheckTick)
1099
+ const stallState: StallState = {
1100
+ lastActivityTime: Date.now(),
1101
+ stallWarningEmitted: false,
1102
+ assessmentInProgress: false,
1103
+ extensionsGranted: 0,
1104
+ currentKillDeadline: Date.now() + (config.stallKillMs ?? 1_800_000),
1105
+ nextWarningAfter: 0,
1106
+ };
463
1107
 
464
1108
  // Tool activity tracking for stall assessment context
465
- let lastToolName: string | undefined;
466
- let lastToolInputSummary: string | undefined;
467
- let pendingToolCount = 0;
468
- let totalToolCalls = 0;
469
-
470
- // Wrap the existing tool handlers to track activity
471
- const origToolUseCallback = config.toolUseCallback;
472
- config.toolUseCallback = (event) => {
473
- if (event.type === 'tool_start' && event.toolName) {
474
- lastToolName = event.toolName;
475
- pendingToolCount++;
476
- totalToolCalls++;
477
- } else if (event.type === 'tool_complete' && event.completeInput) {
478
- lastToolInputSummary = summarizeToolInput(event.completeInput);
479
- } else if (event.type === 'tool_result') {
480
- pendingToolCount = Math.max(0, pendingToolCount - 1);
481
- }
482
- origToolUseCallback?.(event);
483
- };
1109
+ const toolTracking = setupToolTracking(config, stallState, ctx, sessionCapture, prompt, perfStart);
1110
+ const { pendingTools, watchdog, toolWatchdogActive } = toolTracking;
1111
+ // Mutable counters accessed by stall check tick
1112
+ const toolCounters = toolTracking.counters;
484
1113
 
485
1114
  claudeProcess.stdout!.on('data', (data) => {
486
- lastActivityTime = Date.now();
487
- stallWarningEmitted = false;
1115
+ stallState.lastActivityTime = Date.now();
1116
+ stallState.stallWarningEmitted = false;
1117
+ stallState.nextWarningAfter = 0; // Real activity resets throttle
488
1118
  // Push kill deadline forward on any activity
489
1119
  const killMs = config.stallKillMs ?? 1_800_000;
490
- currentKillDeadline = Date.now() + killMs;
1120
+ stallState.currentKillDeadline = Date.now() + killMs;
491
1121
 
492
1122
  if (!firstStdoutReceived) {
493
1123
  firstStdoutReceived = true;
494
- if (config.verbose) {
495
- console.log(`[PERF] First stdout data: ${Date.now() - perfStart}ms`);
496
- }
1124
+ verboseLog(config.verbose, `[PERF] First stdout data: ${Date.now() - perfStart}ms`);
497
1125
  }
498
1126
 
499
1127
  const chunk = data.toString();
@@ -525,82 +1153,61 @@ export async function executeClaudeCommand(
525
1153
  const maxExtensions = config.stallMaxExtensions ?? 3;
526
1154
  const stallAssessEnabled = config.stallAssessEnabled !== false;
527
1155
 
528
- const stallCheckInterval = setInterval(async () => {
529
- const now = Date.now();
530
- const silenceMs = now - lastActivityTime;
531
- const totalElapsed = now - perfStart;
532
-
533
- // Hard cap: absolute wall-clock limit regardless of extensions
534
- if (totalElapsed >= stallHardCapMs) {
535
- terminateStallProcess(claudeProcess, stallCheckInterval, config,
536
- `\n[[MSTRO_ERROR:EXECUTION_STALLED]] Hard time limit reached (${Math.round(stallHardCapMs / 60000)} min total). Terminating process.\n`
537
- );
538
- return;
539
- }
540
-
541
- // Kill deadline reached
542
- if (now >= currentKillDeadline) {
543
- terminateStallProcess(claudeProcess, stallCheckInterval, config,
544
- `\n[[MSTRO_ERROR:EXECUTION_STALLED]] No output for ${Math.round(silenceMs / 60_000)} minutes. Terminating process.\n`
545
- );
546
- return;
547
- }
548
-
549
- // Warning + assessment trigger
550
- if (silenceMs < stallWarningMs || stallWarningEmitted) return;
551
-
552
- stallWarningEmitted = true;
553
- const killIn = Math.round((currentKillDeadline - now) / 60_000);
554
- config.outputCallback?.(
555
- `\n[[MSTRO_ERROR:EXECUTION_STALLED]] No output for ${Math.round(silenceMs / 60_000)} minutes. Will terminate in ${killIn} minutes if no activity.\n`
556
- );
557
-
558
- // Run stall assessment if enabled and we haven't exhausted extensions
559
- if (!stallAssessEnabled || assessmentInProgress || extensionsGranted >= maxExtensions) return;
560
-
561
- assessmentInProgress = true;
562
- const stallCtx: StallContext = {
563
- originalPrompt: prompt,
564
- silenceMs,
565
- lastToolName,
566
- lastToolInputSummary,
567
- pendingToolCount,
568
- totalToolCalls,
569
- elapsedTotalMs: totalElapsed,
570
- };
571
-
572
- const result = await runStallAssessment({ stallCtx, config, now, extensionsGranted, maxExtensions });
573
- if (result) {
574
- extensionsGranted = result.extensionsGranted;
575
- currentKillDeadline = result.currentKillDeadline;
576
- stallWarningEmitted = false; // Allow re-warning after extension
577
- }
578
- assessmentInProgress = false;
1156
+ // eslint-disable-next-line prefer-const
1157
+ let stallCheckInterval: ReturnType<typeof setInterval>;
1158
+ stallCheckInterval = setInterval(() => {
1159
+ runStallCheckTick(stallState, {
1160
+ perfStart, stallWarningMs, stallHardCapMs, maxExtensions, stallAssessEnabled,
1161
+ toolWatchdogActive, prompt, pendingTools, lastToolInputSummary: toolCounters.lastToolInputSummary, totalToolCalls: toolCounters.totalToolCalls,
1162
+ claudeProcess, stallCheckInterval, config, lastTokenActivityTime: ctx.lastTokenActivityTime,
1163
+ });
579
1164
  }, 10_000);
580
1165
 
1166
+ // Wire up the kill context now that stallCheckInterval exists
1167
+ toolTracking.setKillContext(claudeProcess, stallCheckInterval);
1168
+
581
1169
  return new Promise((resolve, reject) => {
582
- claudeProcess.on('close', (code) => {
1170
+ claudeProcess.on('close', async (code, signal) => {
583
1171
  clearInterval(stallCheckInterval);
584
- if (claudeProcess.pid) {
585
- runningProcesses.delete(claudeProcess.pid);
586
- }
587
- resolve({
588
- output: stdout,
589
- error: stderr || undefined,
590
- exitCode: code || 0,
591
- assistantResponse: ctx.accumulatedAssistantResponse || undefined,
592
- thinkingOutput: ctx.accumulatedThinking || undefined,
593
- toolUseHistory: ctx.accumulatedToolUse.length > 0 ? ctx.accumulatedToolUse : undefined,
594
- claudeSessionId: sessionCapture.claudeSessionId
595
- });
1172
+ watchdog?.clearAll();
1173
+ await classifyUnmatchedStderr(stderr, errorAlreadySurfaced, code, config);
1174
+ if (claudeProcess.pid) runningProcesses.delete(claudeProcess.pid);
1175
+ resolve(buildCloseResult(ctx, stdout, stderr, code, signal, sessionCapture));
596
1176
  });
597
1177
 
598
1178
  claudeProcess.on('error', (error: NodeJS.ErrnoException) => {
599
1179
  clearInterval(stallCheckInterval);
600
- if (claudeProcess.pid) {
601
- runningProcesses.delete(claudeProcess.pid);
602
- }
1180
+ watchdog?.clearAll();
1181
+ if (claudeProcess.pid) runningProcesses.delete(claudeProcess.pid);
603
1182
  handleSpawnError(error, config, reject);
604
1183
  });
605
1184
  });
606
1185
  }
1186
+
1187
+ function buildCloseResult(
1188
+ ctx: StreamHandlerContext,
1189
+ stdout: string,
1190
+ stderr: string,
1191
+ code: number | null,
1192
+ signal: NodeJS.Signals | null,
1193
+ sessionCapture: { claudeSessionId?: string },
1194
+ ): ExecutionResult {
1195
+ const postTimeout = flushNativeTimeoutBuffers(ctx);
1196
+ const resumeBuffered = ctx.resumeAssessmentActive ? (ctx.resumeAssessmentBuffer || undefined) : undefined;
1197
+ const exitCode = code ?? (signal ? 128 + (signalToNumber(signal) ?? 0) : 0);
1198
+ const hasTokenUsage = ctx.apiTokenUsage.inputTokens > 0 || ctx.apiTokenUsage.outputTokens > 0;
1199
+ return {
1200
+ output: stdout,
1201
+ error: stderr || undefined,
1202
+ exitCode,
1203
+ signalName: signal || undefined,
1204
+ assistantResponse: ctx.accumulatedAssistantResponse || undefined,
1205
+ thinkingOutput: ctx.accumulatedThinking || undefined,
1206
+ toolUseHistory: ctx.accumulatedToolUse.length > 0 ? ctx.accumulatedToolUse : undefined,
1207
+ claudeSessionId: sessionCapture.claudeSessionId,
1208
+ nativeTimeoutCount: ctx.nativeTimeoutDetector.timeoutCount || undefined,
1209
+ postTimeoutOutput: postTimeout,
1210
+ resumeBufferedOutput: resumeBuffered,
1211
+ apiTokenUsage: hasTokenUsage ? { ...ctx.apiTokenUsage } : undefined,
1212
+ };
1213
+ }