mstro-app 0.1.57 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/bin/commands/login.js +27 -14
  2. package/bin/commands/logout.js +35 -1
  3. package/bin/commands/status.js +1 -1
  4. package/bin/mstro.js +5 -108
  5. package/dist/server/cli/headless/claude-invoker.d.ts.map +1 -1
  6. package/dist/server/cli/headless/claude-invoker.js +432 -103
  7. package/dist/server/cli/headless/claude-invoker.js.map +1 -1
  8. package/dist/server/cli/headless/index.d.ts +2 -1
  9. package/dist/server/cli/headless/index.d.ts.map +1 -1
  10. package/dist/server/cli/headless/index.js +2 -0
  11. package/dist/server/cli/headless/index.js.map +1 -1
  12. package/dist/server/cli/headless/prompt-utils.d.ts +5 -8
  13. package/dist/server/cli/headless/prompt-utils.d.ts.map +1 -1
  14. package/dist/server/cli/headless/prompt-utils.js +40 -5
  15. package/dist/server/cli/headless/prompt-utils.js.map +1 -1
  16. package/dist/server/cli/headless/runner.d.ts +1 -1
  17. package/dist/server/cli/headless/runner.d.ts.map +1 -1
  18. package/dist/server/cli/headless/runner.js +29 -7
  19. package/dist/server/cli/headless/runner.js.map +1 -1
  20. package/dist/server/cli/headless/stall-assessor.d.ts +77 -1
  21. package/dist/server/cli/headless/stall-assessor.d.ts.map +1 -1
  22. package/dist/server/cli/headless/stall-assessor.js +336 -20
  23. package/dist/server/cli/headless/stall-assessor.js.map +1 -1
  24. package/dist/server/cli/headless/tool-watchdog.d.ts +67 -0
  25. package/dist/server/cli/headless/tool-watchdog.d.ts.map +1 -0
  26. package/dist/server/cli/headless/tool-watchdog.js +296 -0
  27. package/dist/server/cli/headless/tool-watchdog.js.map +1 -0
  28. package/dist/server/cli/headless/types.d.ts +80 -1
  29. package/dist/server/cli/headless/types.d.ts.map +1 -1
  30. package/dist/server/cli/improvisation-session-manager.d.ts +109 -2
  31. package/dist/server/cli/improvisation-session-manager.d.ts.map +1 -1
  32. package/dist/server/cli/improvisation-session-manager.js +737 -132
  33. package/dist/server/cli/improvisation-session-manager.js.map +1 -1
  34. package/dist/server/index.js +5 -10
  35. package/dist/server/index.js.map +1 -1
  36. package/dist/server/mcp/bouncer-integration.d.ts.map +1 -1
  37. package/dist/server/mcp/bouncer-integration.js +18 -0
  38. package/dist/server/mcp/bouncer-integration.js.map +1 -1
  39. package/dist/server/mcp/security-audit.d.ts +2 -2
  40. package/dist/server/mcp/security-audit.d.ts.map +1 -1
  41. package/dist/server/mcp/security-audit.js +12 -8
  42. package/dist/server/mcp/security-audit.js.map +1 -1
  43. package/dist/server/mcp/security-patterns.d.ts.map +1 -1
  44. package/dist/server/mcp/security-patterns.js +9 -4
  45. package/dist/server/mcp/security-patterns.js.map +1 -1
  46. package/dist/server/routes/improvise.js +6 -6
  47. package/dist/server/routes/improvise.js.map +1 -1
  48. package/dist/server/services/analytics.d.ts +2 -0
  49. package/dist/server/services/analytics.d.ts.map +1 -1
  50. package/dist/server/services/analytics.js +13 -3
  51. package/dist/server/services/analytics.js.map +1 -1
  52. package/dist/server/services/platform.d.ts.map +1 -1
  53. package/dist/server/services/platform.js +4 -9
  54. package/dist/server/services/platform.js.map +1 -1
  55. package/dist/server/services/sandbox-utils.d.ts +6 -0
  56. package/dist/server/services/sandbox-utils.d.ts.map +1 -0
  57. package/dist/server/services/sandbox-utils.js +72 -0
  58. package/dist/server/services/sandbox-utils.js.map +1 -0
  59. package/dist/server/services/settings.d.ts +6 -0
  60. package/dist/server/services/settings.d.ts.map +1 -1
  61. package/dist/server/services/settings.js +21 -0
  62. package/dist/server/services/settings.js.map +1 -1
  63. package/dist/server/services/terminal/pty-manager.d.ts +3 -51
  64. package/dist/server/services/terminal/pty-manager.d.ts.map +1 -1
  65. package/dist/server/services/terminal/pty-manager.js +14 -100
  66. package/dist/server/services/terminal/pty-manager.js.map +1 -1
  67. package/dist/server/services/websocket/handler.d.ts +36 -15
  68. package/dist/server/services/websocket/handler.d.ts.map +1 -1
  69. package/dist/server/services/websocket/handler.js +452 -223
  70. package/dist/server/services/websocket/handler.js.map +1 -1
  71. package/dist/server/services/websocket/types.d.ts +6 -2
  72. package/dist/server/services/websocket/types.d.ts.map +1 -1
  73. package/hooks/bouncer.sh +11 -4
  74. package/package.json +4 -1
  75. package/server/cli/headless/claude-invoker.ts +602 -119
  76. package/server/cli/headless/index.ts +7 -1
  77. package/server/cli/headless/prompt-utils.ts +37 -5
  78. package/server/cli/headless/runner.ts +30 -8
  79. package/server/cli/headless/stall-assessor.ts +453 -22
  80. package/server/cli/headless/tool-watchdog.ts +390 -0
  81. package/server/cli/headless/types.ts +84 -1
  82. package/server/cli/improvisation-session-manager.ts +884 -143
  83. package/server/index.ts +5 -10
  84. package/server/mcp/bouncer-integration.ts +28 -0
  85. package/server/mcp/security-audit.ts +12 -8
  86. package/server/mcp/security-patterns.ts +8 -2
  87. package/server/routes/improvise.ts +6 -6
  88. package/server/services/analytics.ts +13 -3
  89. package/server/services/platform.test.ts +0 -10
  90. package/server/services/platform.ts +4 -10
  91. package/server/services/sandbox-utils.ts +78 -0
  92. package/server/services/settings.ts +25 -0
  93. package/server/services/terminal/pty-manager.ts +16 -127
  94. package/server/services/websocket/handler.ts +515 -251
  95. package/server/services/websocket/types.ts +10 -4
  96. package/dist/server/services/terminal/tmux-manager.d.ts +0 -82
  97. package/dist/server/services/terminal/tmux-manager.d.ts.map +0 -1
  98. package/dist/server/services/terminal/tmux-manager.js +0 -352
  99. package/dist/server/services/terminal/tmux-manager.js.map +0 -1
  100. package/server/services/terminal/tmux-manager.ts +0 -426
@@ -6,10 +6,12 @@
6
6
  * Handles spawning and managing Claude CLI processes.
7
7
  */
8
8
  import { spawn } from 'node:child_process';
9
+ import { sanitizeEnvForSandbox } from '../../services/sandbox-utils.js';
9
10
  import { generateMcpConfig } from './mcp-config.js';
10
11
  import { detectErrorInStderr, } from './output-utils.js';
11
12
  import { buildMultimodalMessage } from './prompt-utils.js';
12
- import { assessStall } from './stall-assessor.js';
13
+ import { assessStall, assessToolTimeout, classifyError } from './stall-assessor.js';
14
+ import { ToolWatchdog } from './tool-watchdog.js';
13
15
  // ========== Stall Detection Helpers ==========
14
16
  /** Summarize a tool's input for stall assessment context */
15
17
  function summarizeToolInput(input) {
@@ -45,12 +47,21 @@ function terminateStallProcess(claudeProcess, interval, config, message) {
45
47
  }
46
48
  /** Run stall assessment and return updated state if extended, null otherwise */
47
49
  async function runStallAssessment(params) {
48
- const { stallCtx, config, now, extensionsGranted, maxExtensions } = params;
50
+ const { stallCtx, config, now, extensionsGranted, maxExtensions, toolWatchdogActive } = params;
49
51
  try {
50
- const verdict = await assessStall(stallCtx, config.claudeCommand, config.verbose);
52
+ const verdict = await assessStall(stallCtx, config.claudeCommand, config.verbose, toolWatchdogActive);
51
53
  if (verdict.action === 'extend') {
52
54
  const newExtensions = extensionsGranted + 1;
53
- config.outputCallback?.(`\n[[MSTRO_STALL_EXTENDED]] Assessment: process likely working. ${verdict.reason}. Extension ${newExtensions}/${maxExtensions}.\n`);
55
+ const elapsedMin = Math.round(stallCtx.elapsedTotalMs / 60_000);
56
+ const pendingNames = stallCtx.pendingToolNames ?? new Set();
57
+ // Emit a progress message instead of a scary stall warning.
58
+ // Task subagents get a friendlier message since long silence is expected.
59
+ if (pendingNames.has('Task')) {
60
+ config.outputCallback?.(`\n[[MSTRO_STALL_EXTENDED]] Task subagent still running (${elapsedMin} min elapsed). ${verdict.reason}.\n`);
61
+ }
62
+ else {
63
+ config.outputCallback?.(`\n[[MSTRO_STALL_EXTENDED]] Process still working (${elapsedMin} min elapsed). ${verdict.reason}. Extension ${newExtensions}/${maxExtensions}.\n`);
64
+ }
54
65
  if (config.verbose) {
55
66
  console.log(`[STALL] Extended by ${Math.round(verdict.extensionMs / 60_000)} min: ${verdict.reason}`);
56
67
  }
@@ -68,6 +79,115 @@ async function runStallAssessment(params) {
68
79
  }
69
80
  return null;
70
81
  }
82
+ // ========== Native Timeout Detection ==========
83
+ /** Regex matching Claude Code's internal tool timeout messages */
84
+ const NATIVE_TIMEOUT_PATTERN = /^(\w+) timed out — (continuing|retrying) with (\d+) results? preserved$/;
85
+ /** Quick prefix check: does incomplete text look like it might be a timeout? */
86
+ const TIMEOUT_PREFIX_PATTERN = /^(\w+) timed/;
87
+ /** Known tool names that Claude Code may report timeouts for */
88
+ const NATIVE_TIMEOUT_TOOL_NAMES = new Set([
89
+ 'Read', 'Grep', 'Glob', 'Edit', 'Write', 'Bash',
90
+ 'WebFetch', 'WebSearch', 'Task', 'TodoRead', 'TodoWrite',
91
+ 'NotebookEdit', 'MultiEdit',
92
+ ]);
93
+ /**
94
+ * Detects Claude Code's internal tool timeout messages in the text stream.
95
+ *
96
+ * Buffers text at newline boundaries to detect complete timeout lines.
97
+ * Non-matching text is forwarded immediately to minimize streaming latency.
98
+ */
99
+ class NativeTimeoutDetector {
100
+ lineBuffer = '';
101
+ detectedTimeouts = [];
102
+ /** Text buffered after native timeouts — held back from streaming until context is assessed */
103
+ postTimeoutBuffer = '';
104
+ /**
105
+ * Process a text_delta chunk.
106
+ * Returns passthrough text (for outputCallback) and any detected timeouts.
107
+ *
108
+ * After the first native timeout is detected, subsequent passthrough text
109
+ * is held in postTimeoutBuffer instead of returned as passthrough. This
110
+ * prevents confused "What were you working on?" responses from streaming
111
+ * to the user before context loss can be assessed.
112
+ */
113
+ processChunk(text) {
114
+ const timeouts = [];
115
+ let passthrough = '';
116
+ this.lineBuffer += text;
117
+ const lines = this.lineBuffer.split('\n');
118
+ const incomplete = lines.pop() ?? '';
119
+ for (const line of lines) {
120
+ const trimmed = line.trim();
121
+ const match = trimmed.match(NATIVE_TIMEOUT_PATTERN);
122
+ if (match) {
123
+ const event = {
124
+ toolName: match[1],
125
+ action: match[2],
126
+ preservedCount: parseInt(match[3], 10),
127
+ };
128
+ timeouts.push(event);
129
+ this.detectedTimeouts.push(event);
130
+ // Suppress this line from passthrough — replaced by structured marker
131
+ }
132
+ else {
133
+ passthrough += `${line}\n`;
134
+ }
135
+ }
136
+ // Handle incomplete trailing text
137
+ if (incomplete) {
138
+ const prefixMatch = incomplete.match(TIMEOUT_PREFIX_PATTERN);
139
+ if (prefixMatch && NATIVE_TIMEOUT_TOOL_NAMES.has(prefixMatch[1])) {
140
+ // Looks like the start of a timeout message — hold it
141
+ this.lineBuffer = incomplete;
142
+ }
143
+ else {
144
+ passthrough += incomplete;
145
+ this.lineBuffer = '';
146
+ }
147
+ }
148
+ else {
149
+ this.lineBuffer = '';
150
+ }
151
+ // After native timeouts, buffer passthrough text instead of returning it.
152
+ // The session manager will assess context loss and either flush or discard.
153
+ if (this.detectedTimeouts.length > 0 && passthrough) {
154
+ this.postTimeoutBuffer += passthrough;
155
+ passthrough = '';
156
+ }
157
+ return { passthrough, timeouts };
158
+ }
159
+ /** Flush any held buffer (call on stream end).
160
+ * Also checks remaining buffer for timeout patterns so the last
161
+ * timeout message (without trailing newline) is always counted.
162
+ */
163
+ flush() {
164
+ const remaining = this.lineBuffer;
165
+ this.lineBuffer = '';
166
+ // Check if the unflushed buffer IS a timeout message
167
+ if (remaining) {
168
+ const trimmed = remaining.trim();
169
+ const match = trimmed.match(NATIVE_TIMEOUT_PATTERN);
170
+ if (match) {
171
+ this.detectedTimeouts.push({
172
+ toolName: match[1],
173
+ action: match[2],
174
+ preservedCount: parseInt(match[3], 10),
175
+ });
176
+ // Return empty — this was a timeout message, not user-visible text
177
+ return '';
178
+ }
179
+ }
180
+ return remaining;
181
+ }
182
+ /** Get count of detected timeouts */
183
+ get timeoutCount() {
184
+ return this.detectedTimeouts.length;
185
+ }
186
+ /** Get buffered post-timeout text (for session manager to flush or discard) */
187
+ get bufferedPostTimeoutOutput() {
188
+ return this.postTimeoutBuffer;
189
+ }
190
+ }
71
191
  function handleSessionCapture(parsed, captured) {
72
192
  if (parsed.type === 'system' && parsed.subtype === 'init' && parsed.session_id) {
73
193
  captured.claudeSessionId = parsed.session_id;
@@ -82,6 +202,14 @@ function handleThinkingDelta(event, ctx) {
82
202
  !event.delta?.thinking) {
83
203
  return ctx.accumulatedThinking;
84
204
  }
205
+ // Thinking activity confirms Claude has context — flush resume buffer
206
+ if (ctx.resumeAssessmentActive) {
207
+ ctx.resumeAssessmentActive = false;
208
+ if (ctx.resumeAssessmentBuffer) {
209
+ ctx.config.outputCallback?.(ctx.resumeAssessmentBuffer);
210
+ ctx.resumeAssessmentBuffer = '';
211
+ }
212
+ }
85
213
  const thinking = event.delta.thinking;
86
214
  const updated = ctx.accumulatedThinking + thinking;
87
215
  if (ctx.config.thinkingCallback) {
@@ -102,9 +230,26 @@ function handleTextDelta(event, ctx) {
102
230
  return ctx.accumulatedAssistantResponse;
103
231
  }
104
232
  const text = event.delta.text;
233
+ // Always accumulate raw text for checkpoint context
105
234
  const updated = ctx.accumulatedAssistantResponse + text;
106
- if (ctx.config.outputCallback) {
107
- ctx.config.outputCallback(text);
235
+ // Route through native timeout detector to intercept Claude Code's internal timeout messages
236
+ const { passthrough, timeouts } = ctx.nativeTimeoutDetector.processChunk(text);
237
+ // Emit structured markers for detected native timeouts
238
+ for (const timeout of timeouts) {
239
+ ctx.config.outputCallback?.(`\n[[MSTRO_NATIVE_TIMEOUT]] ${timeout.toolName} timed out \u2014 ${timeout.action} with ${timeout.preservedCount} results preserved\n`);
240
+ }
241
+ // When resume assessment is active, buffer text instead of forwarding.
242
+ // This prevents confused "What were you working on?" responses from streaming
243
+ // to the user before we can assess whether Claude retained context.
244
+ if (ctx.resumeAssessmentActive) {
245
+ if (passthrough) {
246
+ ctx.resumeAssessmentBuffer += passthrough;
247
+ }
248
+ return updated;
249
+ }
250
+ // Forward non-timeout text to output
251
+ if (passthrough && ctx.config.outputCallback) {
252
+ ctx.config.outputCallback(passthrough);
108
253
  }
109
254
  return updated;
110
255
  }
@@ -113,6 +258,14 @@ function handleToolStart(event, ctx) {
113
258
  event.content_block?.type !== 'tool_use') {
114
259
  return;
115
260
  }
261
+ // Tool activity confirms Claude has context — flush resume buffer
262
+ if (ctx.resumeAssessmentActive) {
263
+ ctx.resumeAssessmentActive = false;
264
+ if (ctx.resumeAssessmentBuffer) {
265
+ ctx.config.outputCallback?.(ctx.resumeAssessmentBuffer);
266
+ ctx.resumeAssessmentBuffer = '';
267
+ }
268
+ }
116
269
  const toolName = event.content_block.name;
117
270
  const toolId = event.content_block.id;
118
271
  const index = event.index;
@@ -214,6 +367,18 @@ function processStreamLines(buffer, sessionCapture, ctx) {
214
367
  return remainder;
215
368
  }
216
369
  function processStreamEvent(parsed, ctx) {
370
+ // Handle error events from Claude CLI (API errors, model errors, etc.)
371
+ if (parsed.type === 'error') {
372
+ const errorMessage = parsed.error?.message || parsed.message || JSON.stringify(parsed);
373
+ ctx.config.outputCallback?.(`\n[[MSTRO_ERROR:CLAUDE_ERROR]] ${errorMessage}\n`);
374
+ return;
375
+ }
376
+ // Handle result events that contain error info
377
+ if (parsed.type === 'result' && parsed.is_error) {
378
+ const errorMessage = parsed.error || parsed.result || 'Unknown error in result';
379
+ ctx.config.outputCallback?.(`\n[[MSTRO_ERROR:CLAUDE_RESULT_ERROR]] ${errorMessage}\n`);
380
+ return;
381
+ }
217
382
  if (parsed.type === 'stream_event' && parsed.event) {
218
383
  const event = parsed.event;
219
384
  ctx.accumulatedThinking = handleThinkingDelta(event, ctx);
@@ -224,6 +389,33 @@ function processStreamEvent(parsed, ctx) {
224
389
  }
225
390
  handleToolResult(parsed, ctx);
226
391
  }
392
+ // ========== Close Handler Helpers ==========
393
+ /** Flush native timeout detector buffers and return post-timeout output if any */
394
+ function flushNativeTimeoutBuffers(ctx) {
395
+ const remaining = ctx.nativeTimeoutDetector.flush();
396
+ const buffered = ctx.nativeTimeoutDetector.bufferedPostTimeoutOutput;
397
+ const postTimeout = (buffered + remaining) || undefined;
398
+ // Only flush remaining text if there were no native timeouts
399
+ // (when there are timeouts, the session manager decides what to show)
400
+ if (!postTimeout && remaining) {
401
+ ctx.config.outputCallback?.(remaining);
402
+ }
403
+ return postTimeout;
404
+ }
405
+ /** Classify unmatched stderr via Haiku when process exits with error */
406
+ async function classifyUnmatchedStderr(stderr, errorAlreadySurfaced, code, config) {
407
+ if (!stderr || errorAlreadySurfaced || code === 0)
408
+ return;
409
+ try {
410
+ const classified = await classifyError(stderr, config.claudeCommand, config.verbose);
411
+ if (classified) {
412
+ config.outputCallback?.(`\n[[MSTRO_ERROR:${classified.errorCode}]] ${classified.message}\n`);
413
+ }
414
+ }
415
+ catch {
416
+ // Haiku classification failed — proceed without it
417
+ }
418
+ }
227
419
  // ========== Error Handling ==========
228
420
  const SPAWN_ERROR_MAP = {
229
421
  ENOENT: {
@@ -280,94 +472,253 @@ function buildClaudeArgs(config, prompt, hasImageAttachments, useStreamJson, mcp
280
472
  }
281
473
  return args;
282
474
  }
283
- /**
284
- * Execute a Claude CLI command for a single movement
285
- * Supports multimodal prompts via --input-format stream-json when image attachments are present
286
- */
287
- export async function executeClaudeCommand(prompt, _movementId, _sessionNumber, options) {
288
- const { config, runningProcesses } = options;
289
- const perfStart = Date.now();
290
- if (config.verbose) {
291
- console.log(`[PERF] executeMovement started`);
475
+ /** Write image attachments to the Claude process stdin as stream-json */
476
+ function writeImageAttachmentsToStdin(claudeProcess, prompt, config) {
477
+ claudeProcess.stdin.on('error', (err) => {
478
+ if (config.verbose) {
479
+ console.error('[STDIN] Write error:', err.message);
480
+ }
481
+ config.outputCallback?.(`\n[[MSTRO_ERROR:STDIN_WRITE_FAILED]] Failed to send image data to Claude: ${err.message}\n`);
482
+ });
483
+ const multimodalMessage = buildMultimodalMessage(prompt, config.imageAttachments);
484
+ claudeProcess.stdin.write(multimodalMessage);
485
+ claudeProcess.stdin.end();
486
+ }
487
+ /** Run a single stall-check tick. Extracted to reduce cognitive complexity of executeClaudeCommand. */
488
+ async function runStallCheckTick(state, opts) {
489
+ const now = Date.now();
490
+ const silenceMs = now - state.lastActivityTime;
491
+ const totalElapsed = now - opts.perfStart;
492
+ if (totalElapsed >= opts.stallHardCapMs) {
493
+ terminateStallProcess(opts.claudeProcess, opts.stallCheckInterval, opts.config, `\n[[MSTRO_ERROR:EXECUTION_STALLED]] Hard time limit reached (${Math.round(opts.stallHardCapMs / 60000)} min total). Terminating process.\n`);
494
+ return;
292
495
  }
293
- const hasImageAttachments = config.imageAttachments && config.imageAttachments.length > 0;
294
- const useStreamJson = hasImageAttachments || config.thinkingCallback || config.outputCallback || config.toolUseCallback;
496
+ if (now >= state.currentKillDeadline) {
497
+ terminateStallProcess(opts.claudeProcess, opts.stallCheckInterval, opts.config, `\n[[MSTRO_ERROR:EXECUTION_STALLED]] No output for ${Math.round(silenceMs / 60_000)} minutes. Terminating process.\n`);
498
+ return;
499
+ }
500
+ if (silenceMs < opts.stallWarningMs || state.stallWarningEmitted || now < state.nextWarningAfter || state.assessmentInProgress)
501
+ return;
502
+ const stallCtx = {
503
+ originalPrompt: opts.prompt,
504
+ silenceMs,
505
+ lastToolName: opts.pendingTools.size > 0 ? Array.from(opts.pendingTools.values()).pop() : undefined,
506
+ lastToolInputSummary: opts.lastToolInputSummary,
507
+ pendingToolCount: opts.pendingTools.size,
508
+ pendingToolNames: new Set(opts.pendingTools.values()),
509
+ totalToolCalls: opts.totalToolCalls,
510
+ elapsedTotalMs: totalElapsed,
511
+ };
512
+ if (opts.stallAssessEnabled && state.extensionsGranted < opts.maxExtensions) {
513
+ state.assessmentInProgress = true;
514
+ const result = await runStallAssessment({ stallCtx, config: opts.config, now, extensionsGranted: state.extensionsGranted, maxExtensions: opts.maxExtensions, toolWatchdogActive: opts.toolWatchdogActive });
515
+ state.assessmentInProgress = false;
516
+ if (result) {
517
+ state.extensionsGranted = result.extensionsGranted;
518
+ state.currentKillDeadline = result.currentKillDeadline;
519
+ state.nextWarningAfter = now + opts.stallWarningMs;
520
+ return;
521
+ }
522
+ }
523
+ state.stallWarningEmitted = true;
524
+ const killIn = Math.round((state.currentKillDeadline - now) / 60_000);
525
+ opts.config.outputCallback?.(`\n[[MSTRO_ERROR:EXECUTION_STALLED]] No output for ${Math.round(silenceMs / 60_000)} minutes. Will terminate in ${killIn} minutes if no activity.\n`);
526
+ }
527
+ /** Handle tool_start events. Extracted to reduce cognitive complexity. */
528
+ function onToolStart(event, s) {
529
+ const id = event.toolId;
530
+ s.pendingTools.set(id, event.toolName);
531
+ s.counters.totalToolCalls++;
532
+ s.toolIdToName.set(id, event.toolName);
533
+ if (s.watchdog) {
534
+ s.watchdog.startWatch(id, event.toolName, {}, () => { s.onTimeout(id); });
535
+ }
536
+ }
537
+ /** Handle tool_complete events. Extracted to reduce cognitive complexity. */
538
+ function onToolComplete(event, s) {
539
+ const id = event.toolId;
540
+ s.counters.lastToolInputSummary = summarizeToolInput(event.completeInput);
541
+ s.toolIdToInput.set(id, event.completeInput);
542
+ if (!s.watchdog)
543
+ return;
544
+ const toolName = s.toolIdToName.get(id);
545
+ if (toolName) {
546
+ s.watchdog.startWatch(id, toolName, event.completeInput, () => { s.onTimeout(id); });
547
+ }
548
+ }
549
+ /** Handle tool_result events. Extracted to reduce cognitive complexity. */
550
+ function onToolResult(event, s) {
551
+ const id = event.toolId;
552
+ s.pendingTools.delete(id);
553
+ s.stallState.stallWarningEmitted = false;
554
+ s.stallState.lastActivityTime = Date.now();
555
+ const toolEntry = s.ctx.accumulatedToolUse.find(t => t.toolId === id);
556
+ if (!s.watchdog || !toolEntry)
557
+ return;
558
+ const toolName = s.toolIdToName.get(id);
559
+ if (toolName && toolEntry.duration) {
560
+ s.watchdog.recordCompletion(toolName, toolEntry.duration);
561
+ }
562
+ s.watchdog.clearWatch(id);
563
+ }
564
+ /** Resolve a display URL from tool input for timeout messages */
565
+ function resolveToolUrl(toolInput) {
566
+ if (toolInput.url)
567
+ return String(toolInput.url);
568
+ if (toolInput.query)
569
+ return String(toolInput.query);
570
+ return undefined;
571
+ }
572
+ /** Handle a tool timeout by building a checkpoint and killing the process. */
573
+ function executeToolTimeout(hungToolId, watchdog, killCtx, s, config, prompt, sessionCapture, perfStart) {
574
+ const checkpoint = watchdog.buildCheckpoint(prompt, s.ctx.accumulatedAssistantResponse, s.ctx.accumulatedThinking, s.ctx.accumulatedToolUse, hungToolId, sessionCapture.claudeSessionId, perfStart);
575
+ const toolName = s.toolIdToName.get(hungToolId) || 'unknown';
576
+ const toolInput = s.toolIdToInput.get(hungToolId) || {};
577
+ const timeoutMs = watchdog.getTimeout(toolName);
578
+ const url = resolveToolUrl(toolInput);
579
+ config.outputCallback?.(`\n[[MSTRO_TOOL_TIMEOUT]] ${toolName} timed out after ${Math.round(timeoutMs / 1000)}s${url ? ` fetching: ${url.slice(0, 100)}` : ''}. ${s.ctx.accumulatedToolUse.filter(t => t.result !== undefined).length} completed results preserved.\n`);
580
+ if (checkpoint) {
581
+ config.onToolTimeout?.(checkpoint);
582
+ }
583
+ verboseLog(config.verbose, `[WATCHDOG] Killing process due to ${toolName} timeout`);
584
+ watchdog.clearAll();
585
+ clearInterval(killCtx.stallCheckInterval);
586
+ killCtx.claudeProcess.kill('SIGTERM');
587
+ const proc = killCtx.claudeProcess;
588
+ setTimeout(() => { if (!proc.killed)
589
+ proc.kill('SIGKILL'); }, 5000);
590
+ }
591
+ /** Set up tool activity tracking and watchdog. Extracted to reduce cognitive complexity. */
592
+ function setupToolTracking(config, stallState, ctx, sessionCapture, prompt, perfStart) {
593
+ const pendingTools = new Map();
594
+ const counters = { lastToolInputSummary: undefined, totalToolCalls: 0 };
595
+ const toolWatchdogActive = config.enableToolWatchdog !== false;
596
+ const watchdog = toolWatchdogActive
597
+ ? new ToolWatchdog({
598
+ profiles: config.toolTimeoutProfiles,
599
+ verbose: config.verbose,
600
+ onTiebreaker: async (toolName, toolInput, elapsedMs) => {
601
+ return assessToolTimeout(toolName, toolInput, elapsedMs, config.claudeCommand, config.verbose);
602
+ },
603
+ })
604
+ : null;
605
+ // Deferred kill context — set after stallCheckInterval is created
606
+ let killCtx = null;
607
+ const trackingState = {
608
+ pendingTools, counters,
609
+ toolIdToName: new Map(), toolIdToInput: new Map(),
610
+ watchdog, stallState, ctx,
611
+ onTimeout: (hungToolId) => {
612
+ if (!watchdog || !killCtx)
613
+ return;
614
+ executeToolTimeout(hungToolId, watchdog, killCtx, trackingState, config, prompt, sessionCapture, perfStart);
615
+ },
616
+ };
617
+ const origToolUseCallback = config.toolUseCallback;
618
+ config.toolUseCallback = (event) => {
619
+ if (event.type === 'tool_start' && event.toolName && event.toolId) {
620
+ onToolStart(event, trackingState);
621
+ }
622
+ else if (event.type === 'tool_complete' && event.completeInput && event.toolId) {
623
+ onToolComplete(event, trackingState);
624
+ }
625
+ else if (event.type === 'tool_result' && event.toolId) {
626
+ onToolResult(event, trackingState);
627
+ }
628
+ origToolUseCallback?.(event);
629
+ };
630
+ return {
631
+ pendingTools, watchdog, toolWatchdogActive, counters,
632
+ setKillContext: (claudeProcess, stallCheckInterval) => {
633
+ killCtx = { claudeProcess, stallCheckInterval };
634
+ },
635
+ };
636
+ }
637
+ /** Log messages when verbose mode is enabled. Extracted to reduce cognitive complexity. */
638
+ function verboseLog(verbose, ...msgs) {
639
+ if (verbose) {
640
+ for (const msg of msgs)
641
+ console.log(msg);
642
+ }
643
+ }
644
+ /** Spawn the Claude CLI process and register it. Extracted to reduce cognitive complexity. */
645
+ function spawnAndRegister(config, prompt, hasImageAttachments, useStreamJson, runningProcesses, perfStart) {
295
646
  const mcpConfigPath = generateMcpConfig(config.workingDir, config.verbose);
296
647
  if (!mcpConfigPath && config.outputCallback) {
297
648
  config.outputCallback('\n[[MSTRO_ERROR:BOUNCER_UNAVAILABLE]] Security bouncer not available. Running with limited permissions — file edits allowed, but shell commands may be restricted.\n');
298
649
  }
299
- const args = buildClaudeArgs(config, prompt, !!hasImageAttachments, !!useStreamJson, mcpConfigPath);
300
- if (config.verbose) {
301
- console.log(`[PERF] About to spawn: ${Date.now() - perfStart}ms`);
302
- console.log(`[PERF] Command: ${config.claudeCommand} ${args.join(' ')}`);
303
- }
650
+ const args = buildClaudeArgs(config, prompt, hasImageAttachments, useStreamJson, mcpConfigPath);
651
+ verboseLog(config.verbose, `[PERF] About to spawn: ${Date.now() - perfStart}ms`, `[PERF] Command: ${config.claudeCommand} ${args.join(' ')}`);
304
652
  const claudeProcess = spawn(config.claudeCommand, args, {
305
653
  cwd: config.workingDir,
306
- env: { ...process.env },
654
+ env: config.sandboxed
655
+ ? sanitizeEnvForSandbox(process.env, config.workingDir)
656
+ : { ...process.env },
307
657
  stdio: [hasImageAttachments ? 'pipe' : 'ignore', 'pipe', 'pipe']
308
658
  });
309
659
  if (hasImageAttachments && claudeProcess.stdin) {
310
- const multimodalMessage = buildMultimodalMessage(prompt, config.imageAttachments);
311
- claudeProcess.stdin.write(multimodalMessage);
312
- claudeProcess.stdin.end();
660
+ writeImageAttachmentsToStdin(claudeProcess, prompt, config);
313
661
  }
314
662
  if (claudeProcess.pid) {
315
663
  runningProcesses.set(claudeProcess.pid, claudeProcess);
316
664
  }
317
- if (config.verbose) {
318
- console.log(`[PERF] Spawned: ${Date.now() - perfStart}ms`);
319
- }
665
+ verboseLog(config.verbose, `[PERF] Spawned: ${Date.now() - perfStart}ms`);
666
+ return claudeProcess;
667
+ }
668
+ /**
669
+ * Execute a Claude CLI command for a single movement
670
+ * Supports multimodal prompts via --input-format stream-json when image attachments are present
671
+ */
672
+ export async function executeClaudeCommand(prompt, _movementId, _sessionNumber, options) {
673
+ const { config, runningProcesses } = options;
674
+ const perfStart = Date.now();
675
+ verboseLog(config.verbose, `[PERF] executeMovement started`);
676
+ const hasImageAttachments = config.imageAttachments && config.imageAttachments.length > 0;
677
+ const useStreamJson = hasImageAttachments || config.thinkingCallback || config.outputCallback || config.toolUseCallback;
678
+ const claudeProcess = spawnAndRegister(config, prompt, !!hasImageAttachments, !!useStreamJson, runningProcesses, perfStart);
320
679
  let stdout = '';
321
680
  let stderr = '';
322
681
  let thinkingBuffer = '';
323
682
  let firstStdoutReceived = false;
324
683
  let errorAlreadySurfaced = false;
325
684
  const sessionCapture = {};
685
+ // Activate resume assessment buffering when resuming a session.
686
+ // Text is held until thinking/tool activity confirms Claude has context.
687
+ const isResumeMode = !!(config.continueSession && config.claudeSessionId);
326
688
  const ctx = {
327
689
  config,
328
690
  accumulatedAssistantResponse: '',
329
691
  accumulatedThinking: '',
330
692
  accumulatedToolUse: [],
331
693
  toolInputBuffers: new Map(),
694
+ nativeTimeoutDetector: new NativeTimeoutDetector(),
695
+ resumeAssessmentActive: isResumeMode,
696
+ resumeAssessmentBuffer: '',
332
697
  };
333
- // Stall detection state
334
- let lastActivityTime = Date.now();
335
- let stallWarningEmitted = false;
336
- let assessmentInProgress = false;
337
- let extensionsGranted = 0;
338
- let currentKillDeadline = Date.now() + (config.stallKillMs ?? 1_800_000);
339
- // Tool activity tracking for stall assessment context
340
- let lastToolName;
341
- let lastToolInputSummary;
342
- let pendingToolCount = 0;
343
- let totalToolCalls = 0;
344
- // Wrap the existing tool handlers to track activity
345
- const origToolUseCallback = config.toolUseCallback;
346
- config.toolUseCallback = (event) => {
347
- if (event.type === 'tool_start' && event.toolName) {
348
- lastToolName = event.toolName;
349
- pendingToolCount++;
350
- totalToolCalls++;
351
- }
352
- else if (event.type === 'tool_complete' && event.completeInput) {
353
- lastToolInputSummary = summarizeToolInput(event.completeInput);
354
- }
355
- else if (event.type === 'tool_result') {
356
- pendingToolCount = Math.max(0, pendingToolCount - 1);
357
- }
358
- origToolUseCallback?.(event);
698
+ // Stall detection state (mutable object shared with runStallCheckTick)
699
+ const stallState = {
700
+ lastActivityTime: Date.now(),
701
+ stallWarningEmitted: false,
702
+ assessmentInProgress: false,
703
+ extensionsGranted: 0,
704
+ currentKillDeadline: Date.now() + (config.stallKillMs ?? 1_800_000),
705
+ nextWarningAfter: 0,
359
706
  };
707
+ // Tool activity tracking for stall assessment context
708
+ const toolTracking = setupToolTracking(config, stallState, ctx, sessionCapture, prompt, perfStart);
709
+ const { pendingTools, watchdog, toolWatchdogActive } = toolTracking;
710
+ // Mutable counters accessed by stall check tick
711
+ const toolCounters = toolTracking.counters;
360
712
  claudeProcess.stdout.on('data', (data) => {
361
- lastActivityTime = Date.now();
362
- stallWarningEmitted = false;
713
+ stallState.lastActivityTime = Date.now();
714
+ stallState.stallWarningEmitted = false;
715
+ stallState.nextWarningAfter = 0; // Real activity resets throttle
363
716
  // Push kill deadline forward on any activity
364
717
  const killMs = config.stallKillMs ?? 1_800_000;
365
- currentKillDeadline = Date.now() + killMs;
718
+ stallState.currentKillDeadline = Date.now() + killMs;
366
719
  if (!firstStdoutReceived) {
367
720
  firstStdoutReceived = true;
368
- if (config.verbose) {
369
- console.log(`[PERF] First stdout data: ${Date.now() - perfStart}ms`);
370
- }
721
+ verboseLog(config.verbose, `[PERF] First stdout data: ${Date.now() - perfStart}ms`);
371
722
  }
372
723
  const chunk = data.toString();
373
724
  stdout += chunk;
@@ -393,50 +744,24 @@ export async function executeClaudeCommand(prompt, _movementId, _sessionNumber,
393
744
  const stallHardCapMs = config.stallHardCapMs ?? 3_600_000;
394
745
  const maxExtensions = config.stallMaxExtensions ?? 3;
395
746
  const stallAssessEnabled = config.stallAssessEnabled !== false;
396
- const stallCheckInterval = setInterval(async () => {
397
- const now = Date.now();
398
- const silenceMs = now - lastActivityTime;
399
- const totalElapsed = now - perfStart;
400
- // Hard cap: absolute wall-clock limit regardless of extensions
401
- if (totalElapsed >= stallHardCapMs) {
402
- terminateStallProcess(claudeProcess, stallCheckInterval, config, `\n[[MSTRO_ERROR:EXECUTION_STALLED]] Hard time limit reached (${Math.round(stallHardCapMs / 60000)} min total). Terminating process.\n`);
403
- return;
404
- }
405
- // Kill deadline reached
406
- if (now >= currentKillDeadline) {
407
- terminateStallProcess(claudeProcess, stallCheckInterval, config, `\n[[MSTRO_ERROR:EXECUTION_STALLED]] No output for ${Math.round(silenceMs / 60_000)} minutes. Terminating process.\n`);
408
- return;
409
- }
410
- // Warning + assessment trigger
411
- if (silenceMs < stallWarningMs || stallWarningEmitted)
412
- return;
413
- stallWarningEmitted = true;
414
- const killIn = Math.round((currentKillDeadline - now) / 60_000);
415
- config.outputCallback?.(`\n[[MSTRO_ERROR:EXECUTION_STALLED]] No output for ${Math.round(silenceMs / 60_000)} minutes. Will terminate in ${killIn} minutes if no activity.\n`);
416
- // Run stall assessment if enabled and we haven't exhausted extensions
417
- if (!stallAssessEnabled || assessmentInProgress || extensionsGranted >= maxExtensions)
418
- return;
419
- assessmentInProgress = true;
420
- const stallCtx = {
421
- originalPrompt: prompt,
422
- silenceMs,
423
- lastToolName,
424
- lastToolInputSummary,
425
- pendingToolCount,
426
- totalToolCalls,
427
- elapsedTotalMs: totalElapsed,
428
- };
429
- const result = await runStallAssessment({ stallCtx, config, now, extensionsGranted, maxExtensions });
430
- if (result) {
431
- extensionsGranted = result.extensionsGranted;
432
- currentKillDeadline = result.currentKillDeadline;
433
- stallWarningEmitted = false; // Allow re-warning after extension
434
- }
435
- assessmentInProgress = false;
747
+ // eslint-disable-next-line prefer-const
748
+ let stallCheckInterval;
749
+ stallCheckInterval = setInterval(() => {
750
+ runStallCheckTick(stallState, {
751
+ perfStart, stallWarningMs, stallHardCapMs, maxExtensions, stallAssessEnabled,
752
+ toolWatchdogActive, prompt, pendingTools, lastToolInputSummary: toolCounters.lastToolInputSummary, totalToolCalls: toolCounters.totalToolCalls,
753
+ claudeProcess, stallCheckInterval, config,
754
+ });
436
755
  }, 10_000);
756
+ // Wire up the kill context now that stallCheckInterval exists
757
+ toolTracking.setKillContext(claudeProcess, stallCheckInterval);
437
758
  return new Promise((resolve, reject) => {
438
- claudeProcess.on('close', (code) => {
759
+ claudeProcess.on('close', async (code) => {
439
760
  clearInterval(stallCheckInterval);
761
+ watchdog?.clearAll();
762
+ const postTimeout = flushNativeTimeoutBuffers(ctx);
763
+ await classifyUnmatchedStderr(stderr, errorAlreadySurfaced, code, config);
764
+ const resumeBuffered = ctx.resumeAssessmentActive ? (ctx.resumeAssessmentBuffer || undefined) : undefined;
440
765
  if (claudeProcess.pid) {
441
766
  runningProcesses.delete(claudeProcess.pid);
442
767
  }
@@ -447,11 +772,15 @@ export async function executeClaudeCommand(prompt, _movementId, _sessionNumber,
447
772
  assistantResponse: ctx.accumulatedAssistantResponse || undefined,
448
773
  thinkingOutput: ctx.accumulatedThinking || undefined,
449
774
  toolUseHistory: ctx.accumulatedToolUse.length > 0 ? ctx.accumulatedToolUse : undefined,
450
- claudeSessionId: sessionCapture.claudeSessionId
775
+ claudeSessionId: sessionCapture.claudeSessionId,
776
+ nativeTimeoutCount: ctx.nativeTimeoutDetector.timeoutCount || undefined,
777
+ postTimeoutOutput: postTimeout,
778
+ resumeBufferedOutput: resumeBuffered,
451
779
  });
452
780
  });
453
781
  claudeProcess.on('error', (error) => {
454
782
  clearInterval(stallCheckInterval);
783
+ watchdog?.clearAll();
455
784
  if (claudeProcess.pid) {
456
785
  runningProcesses.delete(claudeProcess.pid);
457
786
  }