mstro-app 0.1.57 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/bin/commands/login.js +27 -14
  2. package/bin/commands/logout.js +35 -1
  3. package/bin/commands/status.js +1 -1
  4. package/bin/mstro.js +5 -108
  5. package/dist/server/cli/headless/claude-invoker.d.ts.map +1 -1
  6. package/dist/server/cli/headless/claude-invoker.js +432 -103
  7. package/dist/server/cli/headless/claude-invoker.js.map +1 -1
  8. package/dist/server/cli/headless/index.d.ts +2 -1
  9. package/dist/server/cli/headless/index.d.ts.map +1 -1
  10. package/dist/server/cli/headless/index.js +2 -0
  11. package/dist/server/cli/headless/index.js.map +1 -1
  12. package/dist/server/cli/headless/prompt-utils.d.ts +5 -8
  13. package/dist/server/cli/headless/prompt-utils.d.ts.map +1 -1
  14. package/dist/server/cli/headless/prompt-utils.js +40 -5
  15. package/dist/server/cli/headless/prompt-utils.js.map +1 -1
  16. package/dist/server/cli/headless/runner.d.ts +1 -1
  17. package/dist/server/cli/headless/runner.d.ts.map +1 -1
  18. package/dist/server/cli/headless/runner.js +29 -7
  19. package/dist/server/cli/headless/runner.js.map +1 -1
  20. package/dist/server/cli/headless/stall-assessor.d.ts +77 -1
  21. package/dist/server/cli/headless/stall-assessor.d.ts.map +1 -1
  22. package/dist/server/cli/headless/stall-assessor.js +336 -20
  23. package/dist/server/cli/headless/stall-assessor.js.map +1 -1
  24. package/dist/server/cli/headless/tool-watchdog.d.ts +67 -0
  25. package/dist/server/cli/headless/tool-watchdog.d.ts.map +1 -0
  26. package/dist/server/cli/headless/tool-watchdog.js +296 -0
  27. package/dist/server/cli/headless/tool-watchdog.js.map +1 -0
  28. package/dist/server/cli/headless/types.d.ts +80 -1
  29. package/dist/server/cli/headless/types.d.ts.map +1 -1
  30. package/dist/server/cli/improvisation-session-manager.d.ts +109 -2
  31. package/dist/server/cli/improvisation-session-manager.d.ts.map +1 -1
  32. package/dist/server/cli/improvisation-session-manager.js +737 -132
  33. package/dist/server/cli/improvisation-session-manager.js.map +1 -1
  34. package/dist/server/index.js +5 -10
  35. package/dist/server/index.js.map +1 -1
  36. package/dist/server/mcp/bouncer-integration.d.ts.map +1 -1
  37. package/dist/server/mcp/bouncer-integration.js +18 -0
  38. package/dist/server/mcp/bouncer-integration.js.map +1 -1
  39. package/dist/server/mcp/security-audit.d.ts +2 -2
  40. package/dist/server/mcp/security-audit.d.ts.map +1 -1
  41. package/dist/server/mcp/security-audit.js +12 -8
  42. package/dist/server/mcp/security-audit.js.map +1 -1
  43. package/dist/server/mcp/security-patterns.d.ts.map +1 -1
  44. package/dist/server/mcp/security-patterns.js +9 -4
  45. package/dist/server/mcp/security-patterns.js.map +1 -1
  46. package/dist/server/routes/improvise.js +6 -6
  47. package/dist/server/routes/improvise.js.map +1 -1
  48. package/dist/server/services/analytics.d.ts +2 -0
  49. package/dist/server/services/analytics.d.ts.map +1 -1
  50. package/dist/server/services/analytics.js +13 -3
  51. package/dist/server/services/analytics.js.map +1 -1
  52. package/dist/server/services/platform.d.ts.map +1 -1
  53. package/dist/server/services/platform.js +4 -9
  54. package/dist/server/services/platform.js.map +1 -1
  55. package/dist/server/services/sandbox-utils.d.ts +6 -0
  56. package/dist/server/services/sandbox-utils.d.ts.map +1 -0
  57. package/dist/server/services/sandbox-utils.js +72 -0
  58. package/dist/server/services/sandbox-utils.js.map +1 -0
  59. package/dist/server/services/settings.d.ts +6 -0
  60. package/dist/server/services/settings.d.ts.map +1 -1
  61. package/dist/server/services/settings.js +21 -0
  62. package/dist/server/services/settings.js.map +1 -1
  63. package/dist/server/services/terminal/pty-manager.d.ts +3 -51
  64. package/dist/server/services/terminal/pty-manager.d.ts.map +1 -1
  65. package/dist/server/services/terminal/pty-manager.js +14 -100
  66. package/dist/server/services/terminal/pty-manager.js.map +1 -1
  67. package/dist/server/services/websocket/handler.d.ts +36 -15
  68. package/dist/server/services/websocket/handler.d.ts.map +1 -1
  69. package/dist/server/services/websocket/handler.js +452 -223
  70. package/dist/server/services/websocket/handler.js.map +1 -1
  71. package/dist/server/services/websocket/types.d.ts +6 -2
  72. package/dist/server/services/websocket/types.d.ts.map +1 -1
  73. package/hooks/bouncer.sh +11 -4
  74. package/package.json +4 -1
  75. package/server/cli/headless/claude-invoker.ts +602 -119
  76. package/server/cli/headless/index.ts +7 -1
  77. package/server/cli/headless/prompt-utils.ts +37 -5
  78. package/server/cli/headless/runner.ts +30 -8
  79. package/server/cli/headless/stall-assessor.ts +453 -22
  80. package/server/cli/headless/tool-watchdog.ts +390 -0
  81. package/server/cli/headless/types.ts +84 -1
  82. package/server/cli/improvisation-session-manager.ts +884 -143
  83. package/server/index.ts +5 -10
  84. package/server/mcp/bouncer-integration.ts +28 -0
  85. package/server/mcp/security-audit.ts +12 -8
  86. package/server/mcp/security-patterns.ts +8 -2
  87. package/server/routes/improvise.ts +6 -6
  88. package/server/services/analytics.ts +13 -3
  89. package/server/services/platform.test.ts +0 -10
  90. package/server/services/platform.ts +4 -10
  91. package/server/services/sandbox-utils.ts +78 -0
  92. package/server/services/settings.ts +25 -0
  93. package/server/services/terminal/pty-manager.ts +16 -127
  94. package/server/services/websocket/handler.ts +515 -251
  95. package/server/services/websocket/types.ts +10 -4
  96. package/dist/server/services/terminal/tmux-manager.d.ts +0 -82
  97. package/dist/server/services/terminal/tmux-manager.d.ts.map +0 -1
  98. package/dist/server/services/terminal/tmux-manager.js +0 -352
  99. package/dist/server/services/terminal/tmux-manager.js.map +0 -1
  100. package/server/services/terminal/tmux-manager.ts +0 -426
@@ -8,14 +8,17 @@
8
8
  */
9
9
 
10
10
  import { type ChildProcess, spawn } from 'node:child_process';
11
+ import { sanitizeEnvForSandbox } from '../../services/sandbox-utils.js';
11
12
  import { generateMcpConfig } from './mcp-config.js';
12
13
  import { detectErrorInStderr, } from './output-utils.js';
13
14
  import { buildMultimodalMessage } from './prompt-utils.js';
14
- import { assessStall, type StallContext } from './stall-assessor.js';
15
+ import { assessStall, assessToolTimeout, classifyError, type StallContext } from './stall-assessor.js';
16
+ import { ToolWatchdog } from './tool-watchdog.js';
15
17
  import type {
16
18
  ExecutionResult,
17
19
  ResolvedHeadlessConfig,
18
20
  ToolUseAccumulator,
21
+ ToolUseEvent,
19
22
  } from './types.js';
20
23
 
21
24
  export interface ClaudeInvokerOptions {
@@ -69,20 +72,32 @@ interface StallAssessmentParams {
69
72
  now: number;
70
73
  extensionsGranted: number;
71
74
  maxExtensions: number;
75
+ toolWatchdogActive?: boolean;
72
76
  }
73
77
 
74
78
  /** Run stall assessment and return updated state if extended, null otherwise */
75
79
  async function runStallAssessment(
76
80
  params: StallAssessmentParams,
77
81
  ): Promise<{ extensionsGranted: number; currentKillDeadline: number } | null> {
78
- const { stallCtx, config, now, extensionsGranted, maxExtensions } = params;
82
+ const { stallCtx, config, now, extensionsGranted, maxExtensions, toolWatchdogActive } = params;
79
83
  try {
80
- const verdict = await assessStall(stallCtx, config.claudeCommand, config.verbose);
84
+ const verdict = await assessStall(stallCtx, config.claudeCommand, config.verbose, toolWatchdogActive);
81
85
  if (verdict.action === 'extend') {
82
86
  const newExtensions = extensionsGranted + 1;
83
- config.outputCallback?.(
84
- `\n[[MSTRO_STALL_EXTENDED]] Assessment: process likely working. ${verdict.reason}. Extension ${newExtensions}/${maxExtensions}.\n`
85
- );
87
+ const elapsedMin = Math.round(stallCtx.elapsedTotalMs / 60_000);
88
+ const pendingNames = stallCtx.pendingToolNames ?? new Set<string>();
89
+
90
+ // Emit a progress message instead of a scary stall warning.
91
+ // Task subagents get a friendlier message since long silence is expected.
92
+ if (pendingNames.has('Task')) {
93
+ config.outputCallback?.(
94
+ `\n[[MSTRO_STALL_EXTENDED]] Task subagent still running (${elapsedMin} min elapsed). ${verdict.reason}.\n`
95
+ );
96
+ } else {
97
+ config.outputCallback?.(
98
+ `\n[[MSTRO_STALL_EXTENDED]] Process still working (${elapsedMin} min elapsed). ${verdict.reason}. Extension ${newExtensions}/${maxExtensions}.\n`
99
+ );
100
+ }
86
101
  if (config.verbose) {
87
102
  console.log(`[STALL] Extended by ${Math.round(verdict.extensionMs / 60_000)} min: ${verdict.reason}`);
88
103
  }
@@ -102,6 +117,136 @@ async function runStallAssessment(
102
117
  return null;
103
118
  }
104
119
 
120
+ // ========== Native Timeout Detection ==========
121
+
122
+ /** Regex matching Claude Code's internal tool timeout messages */
123
+ const NATIVE_TIMEOUT_PATTERN = /^(\w+) timed out — (continuing|retrying) with (\d+) results? preserved$/;
124
+
125
+ /** Quick prefix check: does incomplete text look like it might be a timeout? */
126
+ const TIMEOUT_PREFIX_PATTERN = /^(\w+) timed/;
127
+
128
+ /** Known tool names that Claude Code may report timeouts for */
129
+ const NATIVE_TIMEOUT_TOOL_NAMES = new Set([
130
+ 'Read', 'Grep', 'Glob', 'Edit', 'Write', 'Bash',
131
+ 'WebFetch', 'WebSearch', 'Task', 'TodoRead', 'TodoWrite',
132
+ 'NotebookEdit', 'MultiEdit',
133
+ ]);
134
+
135
+ interface NativeTimeoutEvent {
136
+ toolName: string;
137
+ action: 'continuing' | 'retrying';
138
+ preservedCount: number;
139
+ }
140
+
141
+ /**
142
+ * Detects Claude Code's internal tool timeout messages in the text stream.
143
+ *
144
+ * Buffers text at newline boundaries to detect complete timeout lines.
145
+ * Non-matching text is forwarded immediately to minimize streaming latency.
146
+ */
147
+ class NativeTimeoutDetector {
148
+ private lineBuffer = '';
149
+ private detectedTimeouts: NativeTimeoutEvent[] = [];
150
+ /** Text buffered after native timeouts — held back from streaming until context is assessed */
151
+ private postTimeoutBuffer = '';
152
+
153
+ /**
154
+ * Process a text_delta chunk.
155
+ * Returns passthrough text (for outputCallback) and any detected timeouts.
156
+ *
157
+ * After the first native timeout is detected, subsequent passthrough text
158
+ * is held in postTimeoutBuffer instead of returned as passthrough. This
159
+ * prevents confused "What were you working on?" responses from streaming
160
+ * to the user before context loss can be assessed.
161
+ */
162
+ processChunk(text: string): { passthrough: string; timeouts: NativeTimeoutEvent[] } {
163
+ const timeouts: NativeTimeoutEvent[] = [];
164
+ let passthrough = '';
165
+
166
+ this.lineBuffer += text;
167
+
168
+ const lines = this.lineBuffer.split('\n');
169
+ const incomplete = lines.pop() ?? '';
170
+
171
+ for (const line of lines) {
172
+ const trimmed = line.trim();
173
+ const match = trimmed.match(NATIVE_TIMEOUT_PATTERN);
174
+
175
+ if (match) {
176
+ const event: NativeTimeoutEvent = {
177
+ toolName: match[1],
178
+ action: match[2] as 'continuing' | 'retrying',
179
+ preservedCount: parseInt(match[3], 10),
180
+ };
181
+ timeouts.push(event);
182
+ this.detectedTimeouts.push(event);
183
+ // Suppress this line from passthrough — replaced by structured marker
184
+ } else {
185
+ passthrough += `${line}\n`;
186
+ }
187
+ }
188
+
189
+ // Handle incomplete trailing text
190
+ if (incomplete) {
191
+ const prefixMatch = incomplete.match(TIMEOUT_PREFIX_PATTERN);
192
+ if (prefixMatch && NATIVE_TIMEOUT_TOOL_NAMES.has(prefixMatch[1])) {
193
+ // Looks like the start of a timeout message — hold it
194
+ this.lineBuffer = incomplete;
195
+ } else {
196
+ passthrough += incomplete;
197
+ this.lineBuffer = '';
198
+ }
199
+ } else {
200
+ this.lineBuffer = '';
201
+ }
202
+
203
+ // After native timeouts, buffer passthrough text instead of returning it.
204
+ // The session manager will assess context loss and either flush or discard.
205
+ if (this.detectedTimeouts.length > 0 && passthrough) {
206
+ this.postTimeoutBuffer += passthrough;
207
+ passthrough = '';
208
+ }
209
+
210
+ return { passthrough, timeouts };
211
+ }
212
+
213
+ /** Flush any held buffer (call on stream end).
214
+ * Also checks remaining buffer for timeout patterns so the last
215
+ * timeout message (without trailing newline) is always counted.
216
+ */
217
+ flush(): string {
218
+ const remaining = this.lineBuffer;
219
+ this.lineBuffer = '';
220
+
221
+ // Check if the unflushed buffer IS a timeout message
222
+ if (remaining) {
223
+ const trimmed = remaining.trim();
224
+ const match = trimmed.match(NATIVE_TIMEOUT_PATTERN);
225
+ if (match) {
226
+ this.detectedTimeouts.push({
227
+ toolName: match[1],
228
+ action: match[2] as 'continuing' | 'retrying',
229
+ preservedCount: parseInt(match[3], 10),
230
+ });
231
+ // Return empty — this was a timeout message, not user-visible text
232
+ return '';
233
+ }
234
+ }
235
+
236
+ return remaining;
237
+ }
238
+
239
+ /** Get count of detected timeouts */
240
+ get timeoutCount(): number {
241
+ return this.detectedTimeouts.length;
242
+ }
243
+
244
+ /** Get buffered post-timeout text (for session manager to flush or discard) */
245
+ get bufferedPostTimeoutOutput(): string {
246
+ return this.postTimeoutBuffer;
247
+ }
248
+ }
249
+
105
250
  // ========== Stream Event Handlers ==========
106
251
 
107
252
  interface StreamHandlerContext {
@@ -110,6 +255,12 @@ interface StreamHandlerContext {
110
255
  accumulatedThinking: string;
111
256
  accumulatedToolUse: ToolUseAccumulator[];
112
257
  toolInputBuffers: Map<number, { name: string; id: string; inputJson: string; startTime: number }>;
258
+ nativeTimeoutDetector: NativeTimeoutDetector;
259
+ /** When true, assistant text is buffered instead of forwarded to outputCallback.
260
+ * Active during resume mode until thinking/tool activity confirms Claude has context. */
261
+ resumeAssessmentActive: boolean;
262
+ /** Buffered assistant text during resume assessment */
263
+ resumeAssessmentBuffer: string;
113
264
  }
114
265
 
115
266
  function handleSessionCapture(
@@ -133,6 +284,15 @@ function handleThinkingDelta(event: any, ctx: StreamHandlerContext): string {
133
284
  return ctx.accumulatedThinking;
134
285
  }
135
286
 
287
+ // Thinking activity confirms Claude has context — flush resume buffer
288
+ if (ctx.resumeAssessmentActive) {
289
+ ctx.resumeAssessmentActive = false;
290
+ if (ctx.resumeAssessmentBuffer) {
291
+ ctx.config.outputCallback?.(ctx.resumeAssessmentBuffer);
292
+ ctx.resumeAssessmentBuffer = '';
293
+ }
294
+ }
295
+
136
296
  const thinking = event.delta.thinking;
137
297
  const updated = ctx.accumulatedThinking + thinking;
138
298
 
@@ -157,10 +317,33 @@ function handleTextDelta(event: any, ctx: StreamHandlerContext): string {
157
317
  }
158
318
 
159
319
  const text = event.delta.text;
320
+
321
+ // Always accumulate raw text for checkpoint context
160
322
  const updated = ctx.accumulatedAssistantResponse + text;
161
323
 
162
- if (ctx.config.outputCallback) {
163
- ctx.config.outputCallback(text);
324
+ // Route through native timeout detector to intercept Claude Code's internal timeout messages
325
+ const { passthrough, timeouts } = ctx.nativeTimeoutDetector.processChunk(text);
326
+
327
+ // Emit structured markers for detected native timeouts
328
+ for (const timeout of timeouts) {
329
+ ctx.config.outputCallback?.(
330
+ `\n[[MSTRO_NATIVE_TIMEOUT]] ${timeout.toolName} timed out \u2014 ${timeout.action} with ${timeout.preservedCount} results preserved\n`
331
+ );
332
+ }
333
+
334
+ // When resume assessment is active, buffer text instead of forwarding.
335
+ // This prevents confused "What were you working on?" responses from streaming
336
+ // to the user before we can assess whether Claude retained context.
337
+ if (ctx.resumeAssessmentActive) {
338
+ if (passthrough) {
339
+ ctx.resumeAssessmentBuffer += passthrough;
340
+ }
341
+ return updated;
342
+ }
343
+
344
+ // Forward non-timeout text to output
345
+ if (passthrough && ctx.config.outputCallback) {
346
+ ctx.config.outputCallback(passthrough);
164
347
  }
165
348
 
166
349
  return updated;
@@ -174,6 +357,15 @@ function handleToolStart(event: any, ctx: StreamHandlerContext): void {
174
357
  return;
175
358
  }
176
359
 
360
+ // Tool activity confirms Claude has context — flush resume buffer
361
+ if (ctx.resumeAssessmentActive) {
362
+ ctx.resumeAssessmentActive = false;
363
+ if (ctx.resumeAssessmentBuffer) {
364
+ ctx.config.outputCallback?.(ctx.resumeAssessmentBuffer);
365
+ ctx.resumeAssessmentBuffer = '';
366
+ }
367
+ }
368
+
177
369
  const toolName = event.content_block.name;
178
370
  const toolId = event.content_block.id;
179
371
  const index = event.index;
@@ -298,6 +490,20 @@ function processStreamLines(
298
490
  }
299
491
 
300
492
  function processStreamEvent(parsed: any, ctx: StreamHandlerContext): void {
493
+ // Handle error events from Claude CLI (API errors, model errors, etc.)
494
+ if (parsed.type === 'error') {
495
+ const errorMessage = parsed.error?.message || parsed.message || JSON.stringify(parsed);
496
+ ctx.config.outputCallback?.(`\n[[MSTRO_ERROR:CLAUDE_ERROR]] ${errorMessage}\n`);
497
+ return;
498
+ }
499
+
500
+ // Handle result events that contain error info
501
+ if (parsed.type === 'result' && parsed.is_error) {
502
+ const errorMessage = parsed.error || parsed.result || 'Unknown error in result';
503
+ ctx.config.outputCallback?.(`\n[[MSTRO_ERROR:CLAUDE_RESULT_ERROR]] ${errorMessage}\n`);
504
+ return;
505
+ }
506
+
301
507
  if (parsed.type === 'stream_event' && parsed.event) {
302
508
  const event = parsed.event;
303
509
  ctx.accumulatedThinking = handleThinkingDelta(event, ctx);
@@ -309,6 +515,42 @@ function processStreamEvent(parsed: any, ctx: StreamHandlerContext): void {
309
515
  handleToolResult(parsed, ctx);
310
516
  }
311
517
 
518
+ // ========== Close Handler Helpers ==========
519
+
520
+ /** Flush native timeout detector buffers and return post-timeout output if any */
521
+ function flushNativeTimeoutBuffers(ctx: StreamHandlerContext): string | undefined {
522
+ const remaining = ctx.nativeTimeoutDetector.flush();
523
+ const buffered = ctx.nativeTimeoutDetector.bufferedPostTimeoutOutput;
524
+ const postTimeout = (buffered + remaining) || undefined;
525
+
526
+ // Only flush remaining text if there were no native timeouts
527
+ // (when there are timeouts, the session manager decides what to show)
528
+ if (!postTimeout && remaining) {
529
+ ctx.config.outputCallback?.(remaining);
530
+ }
531
+
532
+ return postTimeout;
533
+ }
534
+
535
+ /** Classify unmatched stderr via Haiku when process exits with error */
536
+ async function classifyUnmatchedStderr(
537
+ stderr: string,
538
+ errorAlreadySurfaced: boolean,
539
+ code: number | null,
540
+ config: ResolvedHeadlessConfig,
541
+ ): Promise<void> {
542
+ if (!stderr || errorAlreadySurfaced || code === 0) return;
543
+
544
+ try {
545
+ const classified = await classifyError(stderr, config.claudeCommand, config.verbose);
546
+ if (classified) {
547
+ config.outputCallback?.(`\n[[MSTRO_ERROR:${classified.errorCode}]] ${classified.message}\n`);
548
+ }
549
+ } catch {
550
+ // Haiku classification failed — proceed without it
551
+ }
552
+ }
553
+
312
554
  // ========== Error Handling ==========
313
555
 
314
556
  const SPAWN_ERROR_MAP: Record<string, { code: string; message: string }> = {
@@ -386,24 +628,281 @@ function buildClaudeArgs(
386
628
  return args;
387
629
  }
388
630
 
389
- /**
390
- * Execute a Claude CLI command for a single movement
391
- * Supports multimodal prompts via --input-format stream-json when image attachments are present
392
- */
393
- export async function executeClaudeCommand(
631
+ /** Write image attachments to the Claude process stdin as stream-json */
632
+ function writeImageAttachmentsToStdin(
633
+ claudeProcess: ChildProcess,
394
634
  prompt: string,
395
- _movementId: string,
396
- _sessionNumber: number,
397
- options: ClaudeInvokerOptions
398
- ): Promise<ExecutionResult> {
399
- const { config, runningProcesses } = options;
400
- const perfStart = Date.now();
401
- if (config.verbose) {
402
- console.log(`[PERF] executeMovement started`);
635
+ config: ResolvedHeadlessConfig,
636
+ ): void {
637
+ claudeProcess.stdin!.on('error', (err) => {
638
+ if (config.verbose) {
639
+ console.error('[STDIN] Write error:', err.message);
640
+ }
641
+ config.outputCallback?.(`\n[[MSTRO_ERROR:STDIN_WRITE_FAILED]] Failed to send image data to Claude: ${err.message}\n`);
642
+ });
643
+ const multimodalMessage = buildMultimodalMessage(prompt, config.imageAttachments!);
644
+ claudeProcess.stdin!.write(multimodalMessage);
645
+ claudeProcess.stdin!.end();
646
+ }
647
+
648
+ /** Mutable state for stall detection, shared between the interval callback and the outer function */
649
+ interface StallState {
650
+ lastActivityTime: number;
651
+ stallWarningEmitted: boolean;
652
+ assessmentInProgress: boolean;
653
+ extensionsGranted: number;
654
+ currentKillDeadline: number;
655
+ nextWarningAfter: number;
656
+ }
657
+
658
+ /** Run a single stall-check tick. Extracted to reduce cognitive complexity of executeClaudeCommand. */
659
+ async function runStallCheckTick(
660
+ state: StallState,
661
+ opts: {
662
+ perfStart: number;
663
+ stallWarningMs: number;
664
+ stallHardCapMs: number;
665
+ maxExtensions: number;
666
+ stallAssessEnabled: boolean;
667
+ toolWatchdogActive: boolean;
668
+ prompt: string;
669
+ pendingTools: Map<string, string>;
670
+ lastToolInputSummary: string | undefined;
671
+ totalToolCalls: number;
672
+ claudeProcess: ChildProcess;
673
+ stallCheckInterval: ReturnType<typeof setInterval>;
674
+ config: ResolvedHeadlessConfig;
675
+ },
676
+ ): Promise<void> {
677
+ const now = Date.now();
678
+ const silenceMs = now - state.lastActivityTime;
679
+ const totalElapsed = now - opts.perfStart;
680
+
681
+ if (totalElapsed >= opts.stallHardCapMs) {
682
+ terminateStallProcess(opts.claudeProcess, opts.stallCheckInterval, opts.config,
683
+ `\n[[MSTRO_ERROR:EXECUTION_STALLED]] Hard time limit reached (${Math.round(opts.stallHardCapMs / 60000)} min total). Terminating process.\n`
684
+ );
685
+ return;
403
686
  }
404
687
 
405
- const hasImageAttachments = config.imageAttachments && config.imageAttachments.length > 0;
406
- const useStreamJson = hasImageAttachments || config.thinkingCallback || config.outputCallback || config.toolUseCallback;
688
+ if (now >= state.currentKillDeadline) {
689
+ terminateStallProcess(opts.claudeProcess, opts.stallCheckInterval, opts.config,
690
+ `\n[[MSTRO_ERROR:EXECUTION_STALLED]] No output for ${Math.round(silenceMs / 60_000)} minutes. Terminating process.\n`
691
+ );
692
+ return;
693
+ }
694
+
695
+ if (silenceMs < opts.stallWarningMs || state.stallWarningEmitted || now < state.nextWarningAfter || state.assessmentInProgress) return;
696
+
697
+ const stallCtx: StallContext = {
698
+ originalPrompt: opts.prompt,
699
+ silenceMs,
700
+ lastToolName: opts.pendingTools.size > 0 ? Array.from(opts.pendingTools.values()).pop() : undefined,
701
+ lastToolInputSummary: opts.lastToolInputSummary,
702
+ pendingToolCount: opts.pendingTools.size,
703
+ pendingToolNames: new Set(opts.pendingTools.values()),
704
+ totalToolCalls: opts.totalToolCalls,
705
+ elapsedTotalMs: totalElapsed,
706
+ };
707
+
708
+ if (opts.stallAssessEnabled && state.extensionsGranted < opts.maxExtensions) {
709
+ state.assessmentInProgress = true;
710
+ const result = await runStallAssessment({ stallCtx, config: opts.config, now, extensionsGranted: state.extensionsGranted, maxExtensions: opts.maxExtensions, toolWatchdogActive: opts.toolWatchdogActive });
711
+ state.assessmentInProgress = false;
712
+
713
+ if (result) {
714
+ state.extensionsGranted = result.extensionsGranted;
715
+ state.currentKillDeadline = result.currentKillDeadline;
716
+ state.nextWarningAfter = now + opts.stallWarningMs;
717
+ return;
718
+ }
719
+ }
720
+
721
+ state.stallWarningEmitted = true;
722
+ const killIn = Math.round((state.currentKillDeadline - now) / 60_000);
723
+ opts.config.outputCallback?.(
724
+ `\n[[MSTRO_ERROR:EXECUTION_STALLED]] No output for ${Math.round(silenceMs / 60_000)} minutes. Will terminate in ${killIn} minutes if no activity.\n`
725
+ );
726
+ }
727
+
728
+ // ========== Tool Tracking Setup ==========
729
+
730
+ /** Shared mutable state for tool event handlers */
731
+ interface ToolTrackingState {
732
+ pendingTools: Map<string, string>;
733
+ counters: { lastToolInputSummary: string | undefined; totalToolCalls: number };
734
+ toolIdToName: Map<string, string>;
735
+ toolIdToInput: Map<string, Record<string, unknown>>;
736
+ watchdog: ToolWatchdog | null;
737
+ stallState: StallState;
738
+ ctx: StreamHandlerContext;
739
+ onTimeout: (hungToolId: string) => void;
740
+ }
741
+
742
+ interface ToolTrackingResult {
743
+ pendingTools: Map<string, string>;
744
+ watchdog: ToolWatchdog | null;
745
+ toolWatchdogActive: boolean;
746
+ counters: { lastToolInputSummary: string | undefined; totalToolCalls: number };
747
+ /** Must be called after stallCheckInterval is created, to wire up the kill handler */
748
+ setKillContext: (claudeProcess: ChildProcess, stallCheckInterval: ReturnType<typeof setInterval>) => void;
749
+ }
750
+
751
+ /** Handle tool_start events. Extracted to reduce cognitive complexity. */
752
+ function onToolStart(event: ToolUseEvent, s: ToolTrackingState): void {
753
+ const id = event.toolId!;
754
+ s.pendingTools.set(id, event.toolName!);
755
+ s.counters.totalToolCalls++;
756
+ s.toolIdToName.set(id, event.toolName!);
757
+ if (s.watchdog) {
758
+ s.watchdog.startWatch(id, event.toolName!, {}, () => { s.onTimeout(id); });
759
+ }
760
+ }
761
+
762
+ /** Handle tool_complete events. Extracted to reduce cognitive complexity. */
763
+ function onToolComplete(event: ToolUseEvent, s: ToolTrackingState): void {
764
+ const id = event.toolId!;
765
+ s.counters.lastToolInputSummary = summarizeToolInput(event.completeInput);
766
+ s.toolIdToInput.set(id, event.completeInput);
767
+ if (!s.watchdog) return;
768
+ const toolName = s.toolIdToName.get(id);
769
+ if (toolName) {
770
+ s.watchdog.startWatch(id, toolName, event.completeInput, () => { s.onTimeout(id); });
771
+ }
772
+ }
773
+
774
+ /** Handle tool_result events. Extracted to reduce cognitive complexity. */
775
+ function onToolResult(event: ToolUseEvent, s: ToolTrackingState): void {
776
+ const id = event.toolId!;
777
+ s.pendingTools.delete(id);
778
+ s.stallState.stallWarningEmitted = false;
779
+ s.stallState.lastActivityTime = Date.now();
780
+ const toolEntry = s.ctx.accumulatedToolUse.find(t => t.toolId === id);
781
+ if (!s.watchdog || !toolEntry) return;
782
+ const toolName = s.toolIdToName.get(id);
783
+ if (toolName && toolEntry.duration) {
784
+ s.watchdog.recordCompletion(toolName, toolEntry.duration);
785
+ }
786
+ s.watchdog.clearWatch(id);
787
+ }
788
+
789
+ /** Resolve a display URL from tool input for timeout messages */
790
+ function resolveToolUrl(toolInput: Record<string, unknown>): string | undefined {
791
+ if (toolInput.url) return String(toolInput.url);
792
+ if (toolInput.query) return String(toolInput.query);
793
+ return undefined;
794
+ }
795
+
796
+ /** Handle a tool timeout by building a checkpoint and killing the process. */
797
+ function executeToolTimeout(
798
+ hungToolId: string,
799
+ watchdog: ToolWatchdog,
800
+ killCtx: { claudeProcess: ChildProcess; stallCheckInterval: ReturnType<typeof setInterval> },
801
+ s: ToolTrackingState,
802
+ config: ResolvedHeadlessConfig,
803
+ prompt: string,
804
+ sessionCapture: { claudeSessionId?: string },
805
+ perfStart: number,
806
+ ): void {
807
+ const checkpoint = watchdog.buildCheckpoint(
808
+ prompt, s.ctx.accumulatedAssistantResponse, s.ctx.accumulatedThinking,
809
+ s.ctx.accumulatedToolUse, hungToolId, sessionCapture.claudeSessionId, perfStart,
810
+ );
811
+
812
+ const toolName = s.toolIdToName.get(hungToolId) || 'unknown';
813
+ const toolInput = s.toolIdToInput.get(hungToolId) || {};
814
+ const timeoutMs = watchdog.getTimeout(toolName);
815
+ const url = resolveToolUrl(toolInput);
816
+
817
+ config.outputCallback?.(
818
+ `\n[[MSTRO_TOOL_TIMEOUT]] ${toolName} timed out after ${Math.round(timeoutMs / 1000)}s${url ? ` fetching: ${url.slice(0, 100)}` : ''}. ${s.ctx.accumulatedToolUse.filter(t => t.result !== undefined).length} completed results preserved.\n`
819
+ );
820
+
821
+ if (checkpoint) {
822
+ config.onToolTimeout?.(checkpoint);
823
+ }
824
+
825
+ verboseLog(config.verbose, `[WATCHDOG] Killing process due to ${toolName} timeout`);
826
+ watchdog.clearAll();
827
+ clearInterval(killCtx.stallCheckInterval);
828
+ killCtx.claudeProcess.kill('SIGTERM');
829
+ const proc = killCtx.claudeProcess;
830
+ setTimeout(() => { if (!proc.killed) proc.kill('SIGKILL'); }, 5000);
831
+ }
832
+
833
+ /** Set up tool activity tracking and watchdog. Extracted to reduce cognitive complexity. */
834
+ function setupToolTracking(
835
+ config: ResolvedHeadlessConfig,
836
+ stallState: StallState,
837
+ ctx: StreamHandlerContext,
838
+ sessionCapture: { claudeSessionId?: string },
839
+ prompt: string,
840
+ perfStart: number,
841
+ ): ToolTrackingResult {
842
+ const pendingTools = new Map<string, string>();
843
+ const counters = { lastToolInputSummary: undefined as string | undefined, totalToolCalls: 0 };
844
+
845
+ const toolWatchdogActive = config.enableToolWatchdog !== false;
846
+ const watchdog = toolWatchdogActive
847
+ ? new ToolWatchdog({
848
+ profiles: config.toolTimeoutProfiles,
849
+ verbose: config.verbose,
850
+ onTiebreaker: async (toolName, toolInput, elapsedMs) => {
851
+ return assessToolTimeout(toolName, toolInput, elapsedMs, config.claudeCommand, config.verbose);
852
+ },
853
+ })
854
+ : null;
855
+
856
+ // Deferred kill context — set after stallCheckInterval is created
857
+ let killCtx: { claudeProcess: ChildProcess; stallCheckInterval: ReturnType<typeof setInterval> } | null = null;
858
+
859
+ const trackingState: ToolTrackingState = {
860
+ pendingTools, counters,
861
+ toolIdToName: new Map(), toolIdToInput: new Map(),
862
+ watchdog, stallState, ctx,
863
+ onTimeout: (hungToolId) => {
864
+ if (!watchdog || !killCtx) return;
865
+ executeToolTimeout(hungToolId, watchdog, killCtx, trackingState, config, prompt, sessionCapture, perfStart);
866
+ },
867
+ };
868
+
869
+ const origToolUseCallback = config.toolUseCallback;
870
+
871
+ config.toolUseCallback = (event) => {
872
+ if (event.type === 'tool_start' && event.toolName && event.toolId) {
873
+ onToolStart(event, trackingState);
874
+ } else if (event.type === 'tool_complete' && event.completeInput && event.toolId) {
875
+ onToolComplete(event, trackingState);
876
+ } else if (event.type === 'tool_result' && event.toolId) {
877
+ onToolResult(event, trackingState);
878
+ }
879
+ origToolUseCallback?.(event);
880
+ };
881
+
882
+ return {
883
+ pendingTools, watchdog, toolWatchdogActive, counters,
884
+ setKillContext: (claudeProcess, stallCheckInterval) => {
885
+ killCtx = { claudeProcess, stallCheckInterval };
886
+ },
887
+ };
888
+ }
889
+
890
+ /** Log messages when verbose mode is enabled. Extracted to reduce cognitive complexity. */
891
+ function verboseLog(verbose: boolean | undefined, ...msgs: string[]): void {
892
+ if (verbose) {
893
+ for (const msg of msgs) console.log(msg);
894
+ }
895
+ }
896
+
897
+ /** Spawn the Claude CLI process and register it. Extracted to reduce cognitive complexity. */
898
+ function spawnAndRegister(
899
+ config: ResolvedHeadlessConfig,
900
+ prompt: string,
901
+ hasImageAttachments: boolean,
902
+ useStreamJson: boolean,
903
+ runningProcesses: Map<number, ChildProcess>,
904
+ perfStart: number,
905
+ ): ChildProcess {
407
906
  const mcpConfigPath = generateMcpConfig(config.workingDir, config.verbose);
408
907
 
409
908
  if (!mcpConfigPath && config.outputCallback) {
@@ -412,32 +911,52 @@ export async function executeClaudeCommand(
412
911
  );
413
912
  }
414
913
 
415
- const args = buildClaudeArgs(config, prompt, !!hasImageAttachments, !!useStreamJson, mcpConfigPath);
914
+ const args = buildClaudeArgs(config, prompt, hasImageAttachments, useStreamJson, mcpConfigPath);
416
915
 
417
- if (config.verbose) {
418
- console.log(`[PERF] About to spawn: ${Date.now() - perfStart}ms`);
419
- console.log(`[PERF] Command: ${config.claudeCommand} ${args.join(' ')}`);
420
- }
916
+ verboseLog(config.verbose,
917
+ `[PERF] About to spawn: ${Date.now() - perfStart}ms`,
918
+ `[PERF] Command: ${config.claudeCommand} ${args.join(' ')}`,
919
+ );
421
920
 
422
921
  const claudeProcess = spawn(config.claudeCommand, args, {
423
922
  cwd: config.workingDir,
424
- env: { ...process.env },
923
+ env: config.sandboxed
924
+ ? sanitizeEnvForSandbox(process.env, config.workingDir)
925
+ : { ...process.env },
425
926
  stdio: [hasImageAttachments ? 'pipe' : 'ignore', 'pipe', 'pipe']
426
927
  });
427
928
 
428
929
  if (hasImageAttachments && claudeProcess.stdin) {
429
- const multimodalMessage = buildMultimodalMessage(prompt, config.imageAttachments!);
430
- claudeProcess.stdin.write(multimodalMessage);
431
- claudeProcess.stdin.end();
930
+ writeImageAttachmentsToStdin(claudeProcess, prompt, config);
432
931
  }
433
932
 
434
933
  if (claudeProcess.pid) {
435
934
  runningProcesses.set(claudeProcess.pid, claudeProcess);
436
935
  }
437
936
 
438
- if (config.verbose) {
439
- console.log(`[PERF] Spawned: ${Date.now() - perfStart}ms`);
440
- }
937
+ verboseLog(config.verbose, `[PERF] Spawned: ${Date.now() - perfStart}ms`);
938
+
939
+ return claudeProcess;
940
+ }
941
+
942
+ /**
943
+ * Execute a Claude CLI command for a single movement
944
+ * Supports multimodal prompts via --input-format stream-json when image attachments are present
945
+ */
946
+ export async function executeClaudeCommand(
947
+ prompt: string,
948
+ _movementId: string,
949
+ _sessionNumber: number,
950
+ options: ClaudeInvokerOptions
951
+ ): Promise<ExecutionResult> {
952
+ const { config, runningProcesses } = options;
953
+ const perfStart = Date.now();
954
+ verboseLog(config.verbose, `[PERF] executeMovement started`);
955
+
956
+ const hasImageAttachments = config.imageAttachments && config.imageAttachments.length > 0;
957
+ const useStreamJson = hasImageAttachments || config.thinkingCallback || config.outputCallback || config.toolUseCallback;
958
+
959
+ const claudeProcess = spawnAndRegister(config, prompt, !!hasImageAttachments, !!useStreamJson, runningProcesses, perfStart);
441
960
 
442
961
  let stdout = '';
443
962
  let stderr = '';
@@ -446,54 +965,48 @@ export async function executeClaudeCommand(
446
965
  let errorAlreadySurfaced = false;
447
966
 
448
967
  const sessionCapture: { claudeSessionId?: string } = {};
968
+ // Activate resume assessment buffering when resuming a session.
969
+ // Text is held until thinking/tool activity confirms Claude has context.
970
+ const isResumeMode = !!(config.continueSession && config.claudeSessionId);
971
+
449
972
  const ctx: StreamHandlerContext = {
450
973
  config,
451
974
  accumulatedAssistantResponse: '',
452
975
  accumulatedThinking: '',
453
976
  accumulatedToolUse: [],
454
977
  toolInputBuffers: new Map(),
978
+ nativeTimeoutDetector: new NativeTimeoutDetector(),
979
+ resumeAssessmentActive: isResumeMode,
980
+ resumeAssessmentBuffer: '',
455
981
  };
456
982
 
457
- // Stall detection state
458
- let lastActivityTime = Date.now();
459
- let stallWarningEmitted = false;
460
- let assessmentInProgress = false;
461
- let extensionsGranted = 0;
462
- let currentKillDeadline = Date.now() + (config.stallKillMs ?? 1_800_000);
983
+ // Stall detection state (mutable object shared with runStallCheckTick)
984
+ const stallState: StallState = {
985
+ lastActivityTime: Date.now(),
986
+ stallWarningEmitted: false,
987
+ assessmentInProgress: false,
988
+ extensionsGranted: 0,
989
+ currentKillDeadline: Date.now() + (config.stallKillMs ?? 1_800_000),
990
+ nextWarningAfter: 0,
991
+ };
463
992
 
464
993
  // Tool activity tracking for stall assessment context
465
- let lastToolName: string | undefined;
466
- let lastToolInputSummary: string | undefined;
467
- let pendingToolCount = 0;
468
- let totalToolCalls = 0;
469
-
470
- // Wrap the existing tool handlers to track activity
471
- const origToolUseCallback = config.toolUseCallback;
472
- config.toolUseCallback = (event) => {
473
- if (event.type === 'tool_start' && event.toolName) {
474
- lastToolName = event.toolName;
475
- pendingToolCount++;
476
- totalToolCalls++;
477
- } else if (event.type === 'tool_complete' && event.completeInput) {
478
- lastToolInputSummary = summarizeToolInput(event.completeInput);
479
- } else if (event.type === 'tool_result') {
480
- pendingToolCount = Math.max(0, pendingToolCount - 1);
481
- }
482
- origToolUseCallback?.(event);
483
- };
994
+ const toolTracking = setupToolTracking(config, stallState, ctx, sessionCapture, prompt, perfStart);
995
+ const { pendingTools, watchdog, toolWatchdogActive } = toolTracking;
996
+ // Mutable counters accessed by stall check tick
997
+ const toolCounters = toolTracking.counters;
484
998
 
485
999
  claudeProcess.stdout!.on('data', (data) => {
486
- lastActivityTime = Date.now();
487
- stallWarningEmitted = false;
1000
+ stallState.lastActivityTime = Date.now();
1001
+ stallState.stallWarningEmitted = false;
1002
+ stallState.nextWarningAfter = 0; // Real activity resets throttle
488
1003
  // Push kill deadline forward on any activity
489
1004
  const killMs = config.stallKillMs ?? 1_800_000;
490
- currentKillDeadline = Date.now() + killMs;
1005
+ stallState.currentKillDeadline = Date.now() + killMs;
491
1006
 
492
1007
  if (!firstStdoutReceived) {
493
1008
  firstStdoutReceived = true;
494
- if (config.verbose) {
495
- console.log(`[PERF] First stdout data: ${Date.now() - perfStart}ms`);
496
- }
1009
+ verboseLog(config.verbose, `[PERF] First stdout data: ${Date.now() - perfStart}ms`);
497
1010
  }
498
1011
 
499
1012
  const chunk = data.toString();
@@ -525,62 +1038,28 @@ export async function executeClaudeCommand(
525
1038
  const maxExtensions = config.stallMaxExtensions ?? 3;
526
1039
  const stallAssessEnabled = config.stallAssessEnabled !== false;
527
1040
 
528
- const stallCheckInterval = setInterval(async () => {
529
- const now = Date.now();
530
- const silenceMs = now - lastActivityTime;
531
- const totalElapsed = now - perfStart;
532
-
533
- // Hard cap: absolute wall-clock limit regardless of extensions
534
- if (totalElapsed >= stallHardCapMs) {
535
- terminateStallProcess(claudeProcess, stallCheckInterval, config,
536
- `\n[[MSTRO_ERROR:EXECUTION_STALLED]] Hard time limit reached (${Math.round(stallHardCapMs / 60000)} min total). Terminating process.\n`
537
- );
538
- return;
539
- }
540
-
541
- // Kill deadline reached
542
- if (now >= currentKillDeadline) {
543
- terminateStallProcess(claudeProcess, stallCheckInterval, config,
544
- `\n[[MSTRO_ERROR:EXECUTION_STALLED]] No output for ${Math.round(silenceMs / 60_000)} minutes. Terminating process.\n`
545
- );
546
- return;
547
- }
548
-
549
- // Warning + assessment trigger
550
- if (silenceMs < stallWarningMs || stallWarningEmitted) return;
551
-
552
- stallWarningEmitted = true;
553
- const killIn = Math.round((currentKillDeadline - now) / 60_000);
554
- config.outputCallback?.(
555
- `\n[[MSTRO_ERROR:EXECUTION_STALLED]] No output for ${Math.round(silenceMs / 60_000)} minutes. Will terminate in ${killIn} minutes if no activity.\n`
556
- );
557
-
558
- // Run stall assessment if enabled and we haven't exhausted extensions
559
- if (!stallAssessEnabled || assessmentInProgress || extensionsGranted >= maxExtensions) return;
560
-
561
- assessmentInProgress = true;
562
- const stallCtx: StallContext = {
563
- originalPrompt: prompt,
564
- silenceMs,
565
- lastToolName,
566
- lastToolInputSummary,
567
- pendingToolCount,
568
- totalToolCalls,
569
- elapsedTotalMs: totalElapsed,
570
- };
571
-
572
- const result = await runStallAssessment({ stallCtx, config, now, extensionsGranted, maxExtensions });
573
- if (result) {
574
- extensionsGranted = result.extensionsGranted;
575
- currentKillDeadline = result.currentKillDeadline;
576
- stallWarningEmitted = false; // Allow re-warning after extension
577
- }
578
- assessmentInProgress = false;
1041
+ // eslint-disable-next-line prefer-const
1042
+ let stallCheckInterval: ReturnType<typeof setInterval>;
1043
+ stallCheckInterval = setInterval(() => {
1044
+ runStallCheckTick(stallState, {
1045
+ perfStart, stallWarningMs, stallHardCapMs, maxExtensions, stallAssessEnabled,
1046
+ toolWatchdogActive, prompt, pendingTools, lastToolInputSummary: toolCounters.lastToolInputSummary, totalToolCalls: toolCounters.totalToolCalls,
1047
+ claudeProcess, stallCheckInterval, config,
1048
+ });
579
1049
  }, 10_000);
580
1050
 
1051
+ // Wire up the kill context now that stallCheckInterval exists
1052
+ toolTracking.setKillContext(claudeProcess, stallCheckInterval);
1053
+
581
1054
  return new Promise((resolve, reject) => {
582
- claudeProcess.on('close', (code) => {
1055
+ claudeProcess.on('close', async (code) => {
583
1056
  clearInterval(stallCheckInterval);
1057
+ watchdog?.clearAll();
1058
+
1059
+ const postTimeout = flushNativeTimeoutBuffers(ctx);
1060
+ await classifyUnmatchedStderr(stderr, errorAlreadySurfaced, code, config);
1061
+ const resumeBuffered = ctx.resumeAssessmentActive ? (ctx.resumeAssessmentBuffer || undefined) : undefined;
1062
+
584
1063
  if (claudeProcess.pid) {
585
1064
  runningProcesses.delete(claudeProcess.pid);
586
1065
  }
@@ -591,12 +1070,16 @@ export async function executeClaudeCommand(
591
1070
  assistantResponse: ctx.accumulatedAssistantResponse || undefined,
592
1071
  thinkingOutput: ctx.accumulatedThinking || undefined,
593
1072
  toolUseHistory: ctx.accumulatedToolUse.length > 0 ? ctx.accumulatedToolUse : undefined,
594
- claudeSessionId: sessionCapture.claudeSessionId
1073
+ claudeSessionId: sessionCapture.claudeSessionId,
1074
+ nativeTimeoutCount: ctx.nativeTimeoutDetector.timeoutCount || undefined,
1075
+ postTimeoutOutput: postTimeout,
1076
+ resumeBufferedOutput: resumeBuffered,
595
1077
  });
596
1078
  });
597
1079
 
598
1080
  claudeProcess.on('error', (error: NodeJS.ErrnoException) => {
599
1081
  clearInterval(stallCheckInterval);
1082
+ watchdog?.clearAll();
600
1083
  if (claudeProcess.pid) {
601
1084
  runningProcesses.delete(claudeProcess.pid);
602
1085
  }