mstro-app 0.3.8 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/LICENSE +191 -21
  2. package/PRIVACY.md +286 -62
  3. package/README.md +81 -58
  4. package/bin/commands/status.js +1 -1
  5. package/dist/server/cli/headless/claude-invoker.d.ts.map +1 -1
  6. package/dist/server/cli/headless/claude-invoker.js +22 -12
  7. package/dist/server/cli/headless/claude-invoker.js.map +1 -1
  8. package/dist/server/cli/headless/headless-logger.d.ts +10 -0
  9. package/dist/server/cli/headless/headless-logger.d.ts.map +1 -0
  10. package/dist/server/cli/headless/headless-logger.js +66 -0
  11. package/dist/server/cli/headless/headless-logger.js.map +1 -0
  12. package/dist/server/cli/headless/mcp-config.d.ts.map +1 -1
  13. package/dist/server/cli/headless/mcp-config.js +6 -5
  14. package/dist/server/cli/headless/mcp-config.js.map +1 -1
  15. package/dist/server/cli/headless/runner.d.ts.map +1 -1
  16. package/dist/server/cli/headless/runner.js +4 -0
  17. package/dist/server/cli/headless/runner.js.map +1 -1
  18. package/dist/server/cli/headless/stall-assessor.d.ts +21 -0
  19. package/dist/server/cli/headless/stall-assessor.d.ts.map +1 -1
  20. package/dist/server/cli/headless/stall-assessor.js +100 -24
  21. package/dist/server/cli/headless/stall-assessor.js.map +1 -1
  22. package/dist/server/cli/headless/tool-watchdog.d.ts +0 -12
  23. package/dist/server/cli/headless/tool-watchdog.d.ts.map +1 -1
  24. package/dist/server/cli/headless/tool-watchdog.js +22 -9
  25. package/dist/server/cli/headless/tool-watchdog.js.map +1 -1
  26. package/dist/server/cli/headless/types.d.ts +8 -1
  27. package/dist/server/cli/headless/types.d.ts.map +1 -1
  28. package/dist/server/cli/improvisation-session-manager.d.ts +16 -0
  29. package/dist/server/cli/improvisation-session-manager.d.ts.map +1 -1
  30. package/dist/server/cli/improvisation-session-manager.js +94 -11
  31. package/dist/server/cli/improvisation-session-manager.js.map +1 -1
  32. package/dist/server/mcp/bouncer-cli.d.ts +3 -0
  33. package/dist/server/mcp/bouncer-cli.d.ts.map +1 -0
  34. package/dist/server/mcp/bouncer-cli.js +54 -0
  35. package/dist/server/mcp/bouncer-cli.js.map +1 -0
  36. package/dist/server/services/plan/composer.d.ts +4 -0
  37. package/dist/server/services/plan/composer.d.ts.map +1 -0
  38. package/dist/server/services/plan/composer.js +181 -0
  39. package/dist/server/services/plan/composer.js.map +1 -0
  40. package/dist/server/services/plan/dependency-resolver.d.ts +28 -0
  41. package/dist/server/services/plan/dependency-resolver.d.ts.map +1 -0
  42. package/dist/server/services/plan/dependency-resolver.js +154 -0
  43. package/dist/server/services/plan/dependency-resolver.js.map +1 -0
  44. package/dist/server/services/plan/executor.d.ts +110 -0
  45. package/dist/server/services/plan/executor.d.ts.map +1 -0
  46. package/dist/server/services/plan/executor.js +641 -0
  47. package/dist/server/services/plan/executor.js.map +1 -0
  48. package/dist/server/services/plan/parser.d.ts +11 -0
  49. package/dist/server/services/plan/parser.d.ts.map +1 -0
  50. package/dist/server/services/plan/parser.js +445 -0
  51. package/dist/server/services/plan/parser.js.map +1 -0
  52. package/dist/server/services/plan/state-reconciler.d.ts +2 -0
  53. package/dist/server/services/plan/state-reconciler.d.ts.map +1 -0
  54. package/dist/server/services/plan/state-reconciler.js +145 -0
  55. package/dist/server/services/plan/state-reconciler.js.map +1 -0
  56. package/dist/server/services/plan/types.d.ts +121 -0
  57. package/dist/server/services/plan/types.d.ts.map +1 -0
  58. package/dist/server/services/plan/types.js +4 -0
  59. package/dist/server/services/plan/types.js.map +1 -0
  60. package/dist/server/services/plan/watcher.d.ts +14 -0
  61. package/dist/server/services/plan/watcher.d.ts.map +1 -0
  62. package/dist/server/services/plan/watcher.js +69 -0
  63. package/dist/server/services/plan/watcher.js.map +1 -0
  64. package/dist/server/services/websocket/file-explorer-handlers.js +20 -0
  65. package/dist/server/services/websocket/file-explorer-handlers.js.map +1 -1
  66. package/dist/server/services/websocket/handler.d.ts.map +1 -1
  67. package/dist/server/services/websocket/handler.js +21 -0
  68. package/dist/server/services/websocket/handler.js.map +1 -1
  69. package/dist/server/services/websocket/plan-handlers.d.ts +6 -0
  70. package/dist/server/services/websocket/plan-handlers.d.ts.map +1 -0
  71. package/dist/server/services/websocket/plan-handlers.js +494 -0
  72. package/dist/server/services/websocket/plan-handlers.js.map +1 -0
  73. package/dist/server/services/websocket/quality-handlers.d.ts.map +1 -1
  74. package/dist/server/services/websocket/quality-handlers.js +384 -12
  75. package/dist/server/services/websocket/quality-handlers.js.map +1 -1
  76. package/dist/server/services/websocket/quality-persistence.d.ts +45 -0
  77. package/dist/server/services/websocket/quality-persistence.d.ts.map +1 -0
  78. package/dist/server/services/websocket/quality-persistence.js +187 -0
  79. package/dist/server/services/websocket/quality-persistence.js.map +1 -0
  80. package/dist/server/services/websocket/quality-service.d.ts +12 -2
  81. package/dist/server/services/websocket/quality-service.d.ts.map +1 -1
  82. package/dist/server/services/websocket/quality-service.js +162 -18
  83. package/dist/server/services/websocket/quality-service.js.map +1 -1
  84. package/dist/server/services/websocket/types.d.ts +2 -2
  85. package/dist/server/services/websocket/types.d.ts.map +1 -1
  86. package/package.json +3 -3
  87. package/server/cli/headless/claude-invoker.ts +25 -12
  88. package/server/cli/headless/headless-logger.ts +78 -0
  89. package/server/cli/headless/mcp-config.ts +6 -5
  90. package/server/cli/headless/runner.ts +4 -0
  91. package/server/cli/headless/stall-assessor.ts +131 -24
  92. package/server/cli/headless/tool-watchdog.ts +10 -9
  93. package/server/cli/headless/types.ts +10 -1
  94. package/server/cli/improvisation-session-manager.ts +118 -11
  95. package/server/mcp/bouncer-cli.ts +73 -0
  96. package/server/services/plan/composer.ts +199 -0
  97. package/server/services/plan/dependency-resolver.ts +182 -0
  98. package/server/services/plan/executor.ts +700 -0
  99. package/server/services/plan/parser.ts +491 -0
  100. package/server/services/plan/state-reconciler.ts +174 -0
  101. package/server/services/plan/types.ts +166 -0
  102. package/server/services/plan/watcher.ts +73 -0
  103. package/server/services/websocket/file-explorer-handlers.ts +20 -0
  104. package/server/services/websocket/handler.ts +21 -0
  105. package/server/services/websocket/plan-handlers.ts +592 -0
  106. package/server/services/websocket/quality-handlers.ts +450 -12
  107. package/server/services/websocket/quality-persistence.ts +250 -0
  108. package/server/services/websocket/quality-service.ts +183 -18
  109. package/server/services/websocket/types.ts +48 -2
@@ -17,6 +17,7 @@
17
17
  */
18
18
 
19
19
  import { type ChildProcess, spawn } from 'node:child_process';
20
+ import { hlog } from './headless-logger.js';
20
21
 
21
22
  export interface StallContext {
22
23
  /** The original user prompt being executed */
@@ -48,6 +49,35 @@ export interface StallVerdict {
48
49
  reason: string;
49
50
  }
50
51
 
52
+ /** Check if Task/Agent subagents are currently pending (producing expected silence) */
53
+ function hasSubagentPending(pendingNames: Set<string>, lastToolName: string | undefined, hasPendingTools: boolean): boolean {
54
+ return pendingNames.has('Task') || pendingNames.has('Agent')
55
+ || ((lastToolName === 'Task' || lastToolName === 'Agent') && hasPendingTools);
56
+ }
57
+
58
+ /**
59
+ * Check if an Agent Teams lead is idle-waiting for teammate notifications.
60
+ * After spawning teammates (Agent tool calls complete), the lead has no pending
61
+ * tools but is legitimately waiting for teammate idle events.
62
+ */
63
+ function checkAgentTeamsWaiting(ctx: StallContext, hasPendingTools: boolean): StallVerdict | null {
64
+ // The lead may use any tool while waiting (Glob to verify outputs, Bash to
65
+ // check disk, ToolSearch, etc.), so don't gate on lastToolName. The key
66
+ // signal is: prompt contains team_name, tools were called, nothing pending.
67
+ if (
68
+ !hasPendingTools &&
69
+ ctx.totalToolCalls > 0 &&
70
+ ctx.originalPrompt.includes('team_name')
71
+ ) {
72
+ return {
73
+ action: 'extend',
74
+ extensionMs: 30 * 60_000,
75
+ reason: 'Agent Teams lead waiting for teammate idle notifications — extending 30 min',
76
+ };
77
+ }
78
+ return null;
79
+ }
80
+
51
81
  /**
52
82
  * Fast heuristic for known long-running patterns.
53
83
  * Returns a verdict immediately if the pattern is recognized, null otherwise.
@@ -86,11 +116,7 @@ function quickHeuristic(ctx: StallContext, toolWatchdogActive = false): StallVer
86
116
 
87
117
  // Task/subagent launches are known to produce long silence periods.
88
118
  // The parent Claude process emits nothing while waiting for subagent results.
89
- // Check pendingToolNames (reliable) first, fall back to lastToolName (legacy).
90
- // Claude Code renamed Task → Agent; check both for backward compatibility
91
- const hasTaskPending = pendingNames.has('Task') || pendingNames.has('Agent')
92
- || ((ctx.lastToolName === 'Task' || ctx.lastToolName === 'Agent') && hasPendingTools);
93
- if (hasTaskPending) {
119
+ if (hasSubagentPending(pendingNames, ctx.lastToolName, hasPendingTools)) {
94
120
  const extensionMin = Math.min(30, 10 + ctx.pendingToolCount * 5);
95
121
  return {
96
122
  action: 'extend',
@@ -99,6 +125,10 @@ function quickHeuristic(ctx: StallContext, toolWatchdogActive = false): StallVer
99
125
  };
100
126
  }
101
127
 
128
+ // Agent Teams lead waiting for teammate idle notifications (extracted for complexity)
129
+ const agentTeamsVerdict = checkAgentTeamsWaiting(ctx, hasPendingTools);
130
+ if (agentTeamsVerdict) return agentTeamsVerdict;
131
+
102
132
  // Multiple parallel tool calls (e.g., parallel Bash, parallel Read/Grep)
103
133
  if (ctx.pendingToolCount >= 3) {
104
134
  return {
@@ -137,7 +167,7 @@ export async function assessStall(
137
167
  const quick = quickHeuristic(ctx, toolWatchdogActive);
138
168
  if (quick) {
139
169
  if (verbose) {
140
- console.log(`[STALL-ASSESS] Heuristic verdict: ${quick.reason}`);
170
+ hlog(`[STALL-ASSESS] Heuristic verdict: ${quick.reason}`);
141
171
  }
142
172
  return quick;
143
173
  }
@@ -145,12 +175,12 @@ export async function assessStall(
145
175
  // Layer 2: Haiku assessment
146
176
  try {
147
177
  if (verbose) {
148
- console.log('[STALL-ASSESS] Running Haiku assessment...');
178
+ hlog('[STALL-ASSESS] Running Haiku assessment...');
149
179
  }
150
180
  return await runHaikuAssessment(ctx, claudeCommand, verbose);
151
181
  } catch (err) {
152
182
  if (verbose) {
153
- console.log(`[STALL-ASSESS] Haiku assessment failed: ${err}`);
183
+ hlog(`[STALL-ASSESS] Haiku assessment failed: ${err}`);
154
184
  }
155
185
  // If Haiku fails (timeout, auth issue, etc.), extend cautiously
156
186
  return {
@@ -220,13 +250,13 @@ export async function assessToolTimeout(
220
250
 
221
251
  try {
222
252
  if (verbose) {
223
- console.log(`[TOOL-ASSESS] Running Haiku assessment for ${toolName} (${elapsedSec}s elapsed)...`);
253
+ hlog(`[TOOL-ASSESS] Running Haiku assessment for ${toolName} (${elapsedSec}s elapsed)...`);
224
254
  }
225
255
 
226
256
  return await spawnHaikuVerdict(prompt, claudeCommand, verbose, 'TOOL-ASSESS');
227
257
  } catch (err) {
228
258
  if (verbose) {
229
- console.log(`[TOOL-ASSESS] Haiku assessment failed: ${err}`);
259
+ hlog(`[TOOL-ASSESS] Haiku assessment failed: ${err}`);
230
260
  }
231
261
  // On failure, default to kill (the tool has already exceeded its timeout)
232
262
  return {
@@ -295,7 +325,7 @@ export async function assessContextLoss(
295
325
 
296
326
  try {
297
327
  if (verbose) {
298
- console.log(`[CONTEXT-ASSESS] Running Haiku assessment (${ctx.effectiveTimeouts} timeouts, ${ctx.successfulToolCalls} successes, ${ctx.thinkingOutputLength} thinking chars)...`);
328
+ hlog(`[CONTEXT-ASSESS] Running Haiku assessment (${ctx.effectiveTimeouts} timeouts, ${ctx.successfulToolCalls} successes, ${ctx.thinkingOutputLength} thinking chars)...`);
299
329
  }
300
330
 
301
331
  const raw = await spawnHaikuRaw(prompt, claudeCommand, verbose, 'CONTEXT-ASSESS');
@@ -303,13 +333,13 @@ export async function assessContextLoss(
303
333
  const contextLost = parsed.verdict === 'STALLED';
304
334
 
305
335
  if (verbose) {
306
- console.log(`[CONTEXT-ASSESS] Verdict: ${contextLost ? 'LOST' : 'CONTINUED'} — ${parsed.reason}`);
336
+ hlog(`[CONTEXT-ASSESS] Verdict: ${contextLost ? 'LOST' : 'CONTINUED'} — ${parsed.reason}`);
307
337
  }
308
338
 
309
339
  return { contextLost, reason: parsed.reason };
310
340
  } catch (err) {
311
341
  if (verbose) {
312
- console.log(`[CONTEXT-ASSESS] Haiku assessment failed: ${err}`);
342
+ hlog(`[CONTEXT-ASSESS] Haiku assessment failed: ${err}`);
313
343
  }
314
344
  // On failure, assume context was lost (safer to retry than to show a confused response)
315
345
  return {
@@ -419,7 +449,7 @@ function spawnHaikuRaw(
419
449
 
420
450
  proc.stderr!.on('data', (data) => {
421
451
  if (verbose) {
422
- console.log(`[${label}] haiku stderr: ${data.toString().trim()}`);
452
+ hlog(`[${label}] haiku stderr: ${data.toString().trim()}`);
423
453
  }
424
454
  });
425
455
 
@@ -434,7 +464,7 @@ function spawnHaikuRaw(
434
464
  }
435
465
 
436
466
  if (verbose) {
437
- console.log(`[${label}] Haiku response: ${stdout.trim()}`);
467
+ hlog(`[${label}] Haiku response: ${stdout.trim()}`);
438
468
  }
439
469
 
440
470
  resolve(stdout.trim());
@@ -521,7 +551,7 @@ export async function assessApproval(
521
551
 
522
552
  try {
523
553
  if (verbose) {
524
- console.log('[APPROVAL-ASSESS] Running Haiku assessment...');
554
+ hlog('[APPROVAL-ASSESS] Running Haiku assessment...');
525
555
  }
526
556
 
527
557
  const raw = await spawnHaikuRaw(prompt, claudeCommand, verbose, 'APPROVAL-ASSESS');
@@ -529,19 +559,96 @@ export async function assessApproval(
529
559
  const isApproval = parsed.verdict.includes('APPROVAL');
530
560
 
531
561
  if (verbose) {
532
- console.log(`[APPROVAL-ASSESS] Verdict: ${isApproval ? 'APPROVAL' : 'NEW_TASK'} — ${parsed.reason}`);
562
+ hlog(`[APPROVAL-ASSESS] Verdict: ${isApproval ? 'APPROVAL' : 'NEW_TASK'} — ${parsed.reason}`);
533
563
  }
534
564
 
535
565
  return { isApproval, reason: parsed.reason };
536
566
  } catch (err) {
537
567
  if (verbose) {
538
- console.log(`[APPROVAL-ASSESS] Haiku assessment failed: ${err}`);
568
+ hlog(`[APPROVAL-ASSESS] Haiku assessment failed: ${err}`);
539
569
  }
540
570
  // On failure, assume not an approval (safer to treat as new task)
541
571
  return { isApproval: false, reason: `Assessment failed: ${err}` };
542
572
  }
543
573
  }
544
574
 
575
+ // ========== Premature Completion Assessment ==========
576
+
577
+ export interface PrematureCompletionContext {
578
+ /** The trailing portion of the assistant response (last ~800 chars) */
579
+ responseTail: string;
580
+ /** Total number of successful tool calls in this execution */
581
+ successfulToolCalls: number;
582
+ /** Whether extended thinking output was produced */
583
+ hasThinking: boolean;
584
+ /** Total response length */
585
+ responseLength: number;
586
+ }
587
+
588
+ export interface PrematureCompletionVerdict {
589
+ /** True if the task appears incomplete and should be auto-continued */
590
+ isIncomplete: boolean;
591
+ reason: string;
592
+ }
593
+
594
+ /**
595
+ * Assess whether a completed Claude execution ended prematurely.
596
+ * Called when stop_reason is 'end_turn' but the task may not be finished.
597
+ * Haiku determines if the trailing response text indicates planned-but-unexecuted work.
598
+ */
599
+ export async function assessPrematureCompletion(
600
+ ctx: PrematureCompletionContext,
601
+ claudeCommand: string,
602
+ verbose: boolean,
603
+ ): Promise<PrematureCompletionVerdict> {
604
+ const prompt = [
605
+ 'You are analyzing the FINAL output of a Claude Code agent that just exited normally.',
606
+ 'Determine whether the agent finished its task or stopped prematurely mid-work.',
607
+ '',
608
+ 'Session signals:',
609
+ `- ${ctx.successfulToolCalls} tool calls completed successfully`,
610
+ `- Response length: ${ctx.responseLength} characters`,
611
+ `- Extended thinking: ${ctx.hasThinking ? 'YES' : 'NO'}`,
612
+ '',
613
+ `Final response text (last ${ctx.responseTail.length} chars):`,
614
+ ctx.responseTail,
615
+ '',
616
+ 'INCOMPLETE signals: "Now I\'ll...", "Let me fix...", "Next I\'ll...", "Moving on to...",',
617
+ '"I\'ll continue with...", announcing next steps that were never executed,',
618
+ 'describing work that will happen next but no tool call followed.',
619
+ '',
620
+ 'COMPLETE signals: summarizing what was done, confirming changes, reporting results,',
621
+ 'asking the user a question, past-tense descriptions of completed work,',
622
+ '"all done", "changes applied", referencing finished state.',
623
+ '',
624
+ 'Respond in EXACTLY this format (2 lines, no extra text):',
625
+ 'VERDICT: COMPLETE or INCOMPLETE',
626
+ 'REASON: <brief one-line explanation>',
627
+ ].join('\n');
628
+
629
+ try {
630
+ if (verbose) {
631
+ hlog(`[PREMATURE-ASSESS] Running Haiku assessment (${ctx.successfulToolCalls} tools, ${ctx.responseLength} chars)...`);
632
+ }
633
+
634
+ const raw = await spawnHaikuRaw(prompt, claudeCommand, verbose, 'PREMATURE-ASSESS');
635
+ const parsed = parseVerdictResponse(raw);
636
+ const isIncomplete = parsed.verdict.includes('INCOMPLETE');
637
+
638
+ if (verbose) {
639
+ hlog(`[PREMATURE-ASSESS] Verdict: ${isIncomplete ? 'INCOMPLETE' : 'COMPLETE'} — ${parsed.reason}`);
640
+ }
641
+
642
+ return { isIncomplete, reason: parsed.reason };
643
+ } catch (err) {
644
+ if (verbose) {
645
+ hlog(`[PREMATURE-ASSESS] Haiku assessment failed: ${err}`);
646
+ }
647
+ // On failure, don't retry — safer to let the user decide than to auto-continue incorrectly
648
+ return { isIncomplete: false, reason: `Assessment failed: ${err}` };
649
+ }
650
+ }
651
+
545
652
  // ========== Best Result Comparison ==========
546
653
 
547
654
  export interface BestResultContext {
@@ -602,7 +709,7 @@ export async function assessBestResult(
602
709
 
603
710
  try {
604
711
  if (verbose) {
605
- console.log('[BEST-RESULT] Running Haiku assessment...');
712
+ hlog('[BEST-RESULT] Running Haiku assessment...');
606
713
  }
607
714
 
608
715
  const raw = await spawnHaikuRaw(prompt, claudeCommand, verbose, 'BEST-RESULT');
@@ -610,13 +717,13 @@ export async function assessBestResult(
610
717
  const winner: 'A' | 'B' = parsed.verdict.includes('B') ? 'B' : 'A';
611
718
 
612
719
  if (verbose) {
613
- console.log(`[BEST-RESULT] Verdict: ${winner} — ${parsed.reason}`);
720
+ hlog(`[BEST-RESULT] Verdict: ${winner} — ${parsed.reason}`);
614
721
  }
615
722
 
616
723
  return { winner, reason: parsed.reason };
617
724
  } catch (err) {
618
725
  if (verbose) {
619
- console.log(`[BEST-RESULT] Haiku assessment failed: ${err}`);
726
+ hlog(`[BEST-RESULT] Haiku assessment failed: ${err}`);
620
727
  }
621
728
  // On failure, prefer A (the previously-tracked best result)
622
729
  return { winner: 'A', reason: `Assessment failed: ${err}` };
@@ -671,7 +778,7 @@ export async function classifyError(
671
778
 
672
779
  try {
673
780
  if (verbose) {
674
- console.log('[ERROR-CLASSIFY] Running Haiku assessment...');
781
+ hlog('[ERROR-CLASSIFY] Running Haiku assessment...');
675
782
  }
676
783
 
677
784
  const raw = await spawnHaikuRaw(prompt, claudeCommand, verbose, 'ERROR-CLASSIFY');
@@ -691,13 +798,13 @@ export async function classifyError(
691
798
  if (category === 'UNKNOWN' || !message) return null;
692
799
 
693
800
  if (verbose) {
694
- console.log(`[ERROR-CLASSIFY] Verdict: ${category} — ${message}`);
801
+ hlog(`[ERROR-CLASSIFY] Verdict: ${category} — ${message}`);
695
802
  }
696
803
 
697
804
  return { errorCode: category, message };
698
805
  } catch (err) {
699
806
  if (verbose) {
700
- console.log(`[ERROR-CLASSIFY] Haiku assessment failed: ${err}`);
807
+ hlog(`[ERROR-CLASSIFY] Haiku assessment failed: ${err}`);
701
808
  }
702
809
  return null;
703
810
  }
@@ -14,6 +14,7 @@
14
14
  * 3. Haiku tiebreaker: optional AI assessment before killing ambiguous cases
15
15
  */
16
16
 
17
+ import { hlog } from './headless-logger.js';
17
18
  import type {
18
19
  ExecutionCheckpoint,
19
20
  ToolDurationTracker,
@@ -167,7 +168,7 @@ export class ToolWatchdog {
167
168
  sampleCount: 1,
168
169
  });
169
170
  if (this.verbose) {
170
- console.log(`[WATCHDOG] ${toolName}: first sample ${durationMs}ms, initial timeout ${this.getTimeout(toolName)}ms`);
171
+ hlog(`[WATCHDOG] ${toolName}: first sample ${durationMs}ms, initial timeout ${this.getTimeout(toolName)}ms`);
171
172
  }
172
173
  return;
173
174
  }
@@ -178,7 +179,7 @@ export class ToolWatchdog {
178
179
  tracker.sampleCount++;
179
180
 
180
181
  if (this.verbose) {
181
- console.log(`[WATCHDOG] ${toolName}: sample #${tracker.sampleCount} ${durationMs}ms, est=${Math.round(tracker.estimatedDuration)}ms, dev=${Math.round(tracker.deviation)}ms, timeout=${this.getTimeout(toolName)}ms`);
182
+ hlog(`[WATCHDOG] ${toolName}: sample #${tracker.sampleCount} ${durationMs}ms, est=${Math.round(tracker.estimatedDuration)}ms, dev=${Math.round(tracker.deviation)}ms, timeout=${this.getTimeout(toolName)}ms`);
182
183
  }
183
184
  }
184
185
 
@@ -208,7 +209,7 @@ export class ToolWatchdog {
208
209
  const profile = this.getProfile(toolName);
209
210
 
210
211
  if (this.verbose) {
211
- console.log(`[WATCHDOG] Starting watch: ${toolName} (${toolId}), timeout=${Math.round(timeoutMs / 1000)}s`);
212
+ hlog(`[WATCHDOG] Starting watch: ${toolName} (${toolId}), timeout=${Math.round(timeoutMs / 1000)}s`);
212
213
  }
213
214
 
214
215
  const timer = setTimeout(async () => {
@@ -245,7 +246,7 @@ export class ToolWatchdog {
245
246
 
246
247
  if (!profile.useHaikuTiebreaker || !this.onTiebreaker || watch.tiebreakerAttempted) {
247
248
  if (this.verbose) {
248
- console.log(`[WATCHDOG] ${toolName} (${toolId}) timed out after ${Math.round(elapsedMs / 1000)}s, killing`);
249
+ hlog(`[WATCHDOG] ${toolName} (${toolId}) timed out after ${Math.round(elapsedMs / 1000)}s, killing`);
249
250
  }
250
251
  return false;
251
252
  }
@@ -265,7 +266,7 @@ export class ToolWatchdog {
265
266
  watch.tiebreakerAttempted = true;
266
267
 
267
268
  if (this.verbose) {
268
- console.log(`[WATCHDOG] ${toolName} (${toolId}) hit timeout after ${Math.round(elapsedMs / 1000)}s, running tiebreaker...`);
269
+ hlog(`[WATCHDOG] ${toolName} (${toolId}) hit timeout after ${Math.round(elapsedMs / 1000)}s, running tiebreaker...`);
269
270
  }
270
271
 
271
272
  try {
@@ -274,7 +275,7 @@ export class ToolWatchdog {
274
275
 
275
276
  if (verdict.action === 'extend') {
276
277
  if (this.verbose) {
277
- console.log(`[WATCHDOG] Tiebreaker: extend ${toolName} by ${Math.round(verdict.extensionMs / 1000)}s — ${verdict.reason}`);
278
+ hlog(`[WATCHDOG] Tiebreaker: extend ${toolName} by ${Math.round(verdict.extensionMs / 1000)}s — ${verdict.reason}`);
278
279
  }
279
280
  this.scheduleExtensionTimeout(watch, toolId, toolName, verdict.extensionMs, onTimeout);
280
281
  watch.timeoutMs = elapsedMs + verdict.extensionMs;
@@ -282,11 +283,11 @@ export class ToolWatchdog {
282
283
  }
283
284
 
284
285
  if (this.verbose) {
285
- console.log(`[WATCHDOG] Tiebreaker: kill ${toolName} — ${verdict.reason}`);
286
+ hlog(`[WATCHDOG] Tiebreaker: kill ${toolName} — ${verdict.reason}`);
286
287
  }
287
288
  } catch (err) {
288
289
  if (this.verbose) {
289
- console.log(`[WATCHDOG] Tiebreaker failed: ${err}, proceeding with kill`);
290
+ hlog(`[WATCHDOG] Tiebreaker failed: ${err}, proceeding with kill`);
290
291
  }
291
292
  }
292
293
 
@@ -305,7 +306,7 @@ export class ToolWatchdog {
305
306
  const w = this.activeWatches.get(toolId);
306
307
  if (!w) return;
307
308
  if (this.verbose) {
308
- console.log(`[WATCHDOG] ${toolName} (${toolId}) still running after extension, killing`);
309
+ hlog(`[WATCHDOG] ${toolName} (${toolId}) still running after extension, killing`);
309
310
  }
310
311
  // Don't delete the watch — buildCheckpoint() needs it.
311
312
  // handleToolTimeout() calls clearAll() after building the checkpoint.
@@ -121,6 +121,8 @@ export interface HeadlessConfig {
121
121
  onToolTimeout?: (checkpoint: ExecutionCheckpoint) => void;
122
122
  /** When true, spawn Claude with sanitized env (strips secrets, HOME=workingDir) */
123
123
  sandboxed?: boolean;
124
+ /** Extra environment variables to merge into the spawned Claude process env */
125
+ extraEnv?: Record<string, string>;
124
126
  }
125
127
 
126
128
  export interface SessionState {
@@ -165,6 +167,8 @@ export interface SessionResult {
165
167
  /** Assistant text buffered during resume assessment — held back until thinking/tool activity
166
168
  * confirms Claude has context. Undefined when not in resume mode or buffer was flushed. */
167
169
  resumeBufferedOutput?: string;
170
+ /** Claude Code result event stop_reason: 'end_turn', 'max_tokens', or undefined if not captured */
171
+ stopReason?: string;
168
172
  }
169
173
 
170
174
  export interface ToolUseAccumulator {
@@ -200,10 +204,12 @@ export interface ExecutionResult {
200
204
  resumeBufferedOutput?: string;
201
205
  /** Actual API token usage from Claude Code stream events (summed across all turns) */
202
206
  apiTokenUsage?: { inputTokens: number; outputTokens: number };
207
+ /** Claude Code result event stop_reason: 'end_turn', 'max_tokens', or undefined if not captured */
208
+ stopReason?: string;
203
209
  }
204
210
 
205
211
  /** Resolved config with all defaults applied */
206
- export type ResolvedHeadlessConfig = Omit<Required<HeadlessConfig>, 'outputCallback' | 'thinkingCallback' | 'toolUseCallback' | 'tokenUsageCallback' | 'continueSession' | 'claudeSessionId' | 'imageAttachments' | 'model' | 'toolTimeoutProfiles' | 'onToolTimeout' | 'sandboxed'> & {
212
+ export type ResolvedHeadlessConfig = Omit<Required<HeadlessConfig>, 'outputCallback' | 'thinkingCallback' | 'toolUseCallback' | 'tokenUsageCallback' | 'continueSession' | 'claudeSessionId' | 'imageAttachments' | 'model' | 'toolTimeoutProfiles' | 'onToolTimeout' | 'sandboxed' | 'extraEnv'> & {
207
213
  outputCallback?: (text: string) => void;
208
214
  thinkingCallback?: (text: string) => void;
209
215
  toolUseCallback?: (event: ToolUseEvent) => void;
@@ -215,4 +221,7 @@ export type ResolvedHeadlessConfig = Omit<Required<HeadlessConfig>, 'outputCallb
215
221
  toolTimeoutProfiles?: Record<string, Partial<ToolTimeoutProfile>>;
216
222
  onToolTimeout?: (checkpoint: ExecutionCheckpoint) => void;
217
223
  sandboxed?: boolean;
224
+ extraEnv?: Record<string, string>;
218
225
  };
226
+
227
+
@@ -12,8 +12,9 @@ import { EventEmitter } from 'node:events';
12
12
  import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
13
13
  import { join } from 'node:path';
14
14
  import { AnalyticsEvents, trackEvent } from '../services/analytics.js';
15
+ import { herror, hlog } from './headless/headless-logger.js';
15
16
  import { HeadlessRunner } from './headless/index.js';
16
- import { assessBestResult, assessContextLoss, type ContextLossContext } from './headless/stall-assessor.js';
17
+ import { assessBestResult, assessContextLoss, assessPrematureCompletion, type ContextLossContext } from './headless/stall-assessor.js';
17
18
  import type { ExecutionCheckpoint } from './headless/types.js';
18
19
 
19
20
  export interface ImprovisationOptions {
@@ -302,7 +303,7 @@ export class ImprovisationSessionManager extends EventEmitter {
302
303
  writeFileSync(filePath, Buffer.from(attachment.content, 'base64'));
303
304
  paths.push(filePath);
304
305
  } catch (err) {
305
- console.error(`Failed to persist attachment ${attachment.fileName}:`, err);
306
+ herror(`Failed to persist attachment ${attachment.fileName}:`, err);
306
307
  }
307
308
  }
308
309
 
@@ -503,6 +504,8 @@ export class ImprovisationSessionManager extends EventEmitter {
503
504
  if (this.shouldRetrySignalCrash(result, state, maxRetries, promptWithAttachments)) continue;
504
505
  if (this.shouldRetryContextLoss(result, state, useResume, nativeTimeouts, maxRetries, promptWithAttachments)) continue;
505
506
  if (this.applyToolTimeoutRetry(state, maxRetries, promptWithAttachments)) continue;
507
+ // Premature completion: model exited normally but task appears incomplete
508
+ if (await this.shouldRetryPrematureCompletion(result, state, maxRetries)) continue;
506
509
  break;
507
510
  }
508
511
  return result;
@@ -522,7 +525,7 @@ export class ImprovisationSessionManager extends EventEmitter {
522
525
  try {
523
526
  attachment.content = readFileSync(attachment.filePath).toString('base64');
524
527
  } catch (err) {
525
- console.error(`Failed to read pre-uploaded image ${attachment.filePath}:`, err);
528
+ herror(`Failed to read pre-uploaded image ${attachment.filePath}:`, err);
526
529
  attachment.isImage = false;
527
530
  }
528
531
  }
@@ -662,17 +665,17 @@ export class ImprovisationSessionManager extends EventEmitter {
662
665
  }
663
666
  if (!result.assistantResponse || result.assistantResponse.trim().length === 0) {
664
667
  state.contextLost = true;
665
- if (this.options.verbose) console.log('[CONTEXT-RECOVERY] Resume context loss: null/empty response');
668
+ if (this.options.verbose) hlog('[CONTEXT-RECOVERY] Resume context loss: null/empty response');
666
669
  } else if (result.resumeBufferedOutput !== undefined) {
667
670
  state.contextLost = true;
668
- if (this.options.verbose) console.log('[CONTEXT-RECOVERY] Resume context loss: buffer never flushed (no thinking/tools)');
671
+ if (this.options.verbose) hlog('[CONTEXT-RECOVERY] Resume context loss: buffer never flushed (no thinking/tools)');
669
672
  } else if (
670
673
  (!result.toolUseHistory || result.toolUseHistory.length === 0) &&
671
674
  !result.thinkingOutput &&
672
675
  result.assistantResponse.length < 500
673
676
  ) {
674
677
  state.contextLost = true;
675
- if (this.options.verbose) console.log('[CONTEXT-RECOVERY] Resume context loss: no tools, no thinking, short response');
678
+ if (this.options.verbose) hlog('[CONTEXT-RECOVERY] Resume context loss: no tools, no thinking, short response');
676
679
  }
677
680
  }
678
681
 
@@ -716,7 +719,7 @@ export class ImprovisationSessionManager extends EventEmitter {
716
719
  const verdict = await assessContextLoss(contextLossCtx, claudeCmd, this.options.verbose);
717
720
  state.contextLost = verdict.contextLost;
718
721
  if (this.options.verbose) {
719
- console.log(`[CONTEXT-RECOVERY] Haiku verdict: ${state.contextLost ? 'LOST' : 'OK'} — ${verdict.reason}`);
722
+ hlog(`[CONTEXT-RECOVERY] Haiku verdict: ${state.contextLost ? 'LOST' : 'OK'} — ${verdict.reason}`);
720
723
  }
721
724
  }
722
725
 
@@ -1015,6 +1018,110 @@ export class ImprovisationSessionManager extends EventEmitter {
1015
1018
  return parts.join('\n');
1016
1019
  }
1017
1020
 
1021
+ /**
1022
+ * Detect premature completion: Claude exited normally (exit code 0, end_turn) but the
1023
+ * response indicates more work was planned. This happens when the model "context-fatigues"
1024
+ * during long multi-step tasks and produces end_turn after completing a subset of the work.
1025
+ *
1026
+ * Two paths:
1027
+ * - max_tokens: always retry (model was forcibly stopped mid-generation)
1028
+ * - end_turn: Haiku assessment determines if the response looks incomplete
1029
+ */
1030
+ private async shouldRetryPrematureCompletion(
1031
+ result: HeadlessRunResult,
1032
+ state: RetryLoopState,
1033
+ maxRetries: number,
1034
+ ): Promise<boolean> {
1035
+ if (!this.isPrematureCompletionCandidate(result, state, maxRetries)) {
1036
+ return false;
1037
+ }
1038
+
1039
+ const stopReason = result.stopReason!;
1040
+ const isMaxTokens = stopReason === 'max_tokens';
1041
+ const isIncomplete = isMaxTokens || await this.assessEndTurnCompletion(result);
1042
+
1043
+ if (!isIncomplete) return false;
1044
+
1045
+ this.applyPrematureCompletionRetry(result, state, maxRetries, stopReason, isMaxTokens);
1046
+ return true;
1047
+ }
1048
+
1049
+ /** Guard checks for premature completion — must pass all to proceed with assessment */
1050
+ private isPrematureCompletionCandidate(
1051
+ result: HeadlessRunResult,
1052
+ state: RetryLoopState,
1053
+ maxRetries: number,
1054
+ ): boolean {
1055
+ // Only trigger for clean exits with a known stop reason
1056
+ if (!result.completed || result.signalName || state.retryNumber >= maxRetries) return false;
1057
+ // Don't re-trigger if other recovery paths already handled this iteration
1058
+ if (state.checkpointRef.value || state.contextLost) return false;
1059
+ // Must have a session ID to resume, and a stop reason to classify
1060
+ if (!result.claudeSessionId || !result.stopReason) return false;
1061
+ // Only act on max_tokens or end_turn
1062
+ return result.stopReason === 'max_tokens' || result.stopReason === 'end_turn';
1063
+ }
1064
+
1065
+ /** Use Haiku to assess whether an end_turn response is genuinely complete */
1066
+ private async assessEndTurnCompletion(result: HeadlessRunResult): Promise<boolean> {
1067
+ if (!result.assistantResponse) return false;
1068
+
1069
+ const claudeCmd = process.env.CLAUDE_COMMAND || 'claude';
1070
+ const verdict = await assessPrematureCompletion({
1071
+ responseTail: result.assistantResponse.slice(-800),
1072
+ successfulToolCalls: result.toolUseHistory?.filter(t => t.result !== undefined && !t.isError).length ?? 0,
1073
+ hasThinking: !!result.thinkingOutput,
1074
+ responseLength: result.assistantResponse.length,
1075
+ }, claudeCmd, this.options.verbose);
1076
+
1077
+ if (this.options.verbose) {
1078
+ hlog(`[PREMATURE-COMPLETION] Haiku verdict: ${verdict.isIncomplete ? 'INCOMPLETE' : 'COMPLETE'} — ${verdict.reason}`);
1079
+ }
1080
+ return verdict.isIncomplete;
1081
+ }
1082
+
1083
+ /** Apply the retry: emit events, update state, set continuation prompt */
1084
+ private applyPrematureCompletionRetry(
1085
+ result: HeadlessRunResult,
1086
+ state: RetryLoopState,
1087
+ maxRetries: number,
1088
+ stopReason: string,
1089
+ isMaxTokens: boolean,
1090
+ ): void {
1091
+ state.retryNumber++;
1092
+ const reason = isMaxTokens ? 'Output limit reached' : 'Task appears unfinished (AI assessment)';
1093
+
1094
+ state.retryLog.push({
1095
+ retryNumber: state.retryNumber,
1096
+ path: 'PrematureCompletion',
1097
+ reason,
1098
+ timestamp: Date.now(),
1099
+ });
1100
+
1101
+ this.emit('onAutoRetry', {
1102
+ retryNumber: state.retryNumber,
1103
+ maxRetries,
1104
+ toolName: `PrematureCompletion(${stopReason})`,
1105
+ completedCount: result.toolUseHistory?.length ?? 0,
1106
+ });
1107
+
1108
+ trackEvent(AnalyticsEvents.IMPROVISE_AUTO_RETRY, {
1109
+ retry_number: state.retryNumber,
1110
+ hung_tool: `premature_completion:${stopReason}`,
1111
+ completed_tools: result.toolUseHistory?.length ?? 0,
1112
+ resume_attempted: true,
1113
+ });
1114
+
1115
+ this.queueOutput(
1116
+ `\n[[MSTRO_AUTO_CONTINUE]] ${reason} — resuming session (retry ${state.retryNumber}/${maxRetries}).\n`
1117
+ );
1118
+ this.flushOutputQueue();
1119
+
1120
+ state.contextRecoverySessionId = result.claudeSessionId;
1121
+ this.claudeSessionId = result.claudeSessionId;
1122
+ state.currentPrompt = 'continue';
1123
+ }
1124
+
1018
1125
  /** Select the best result across retries using Haiku assessment */
1019
1126
  private async selectBestResult(
1020
1127
  state: RetryLoopState,
@@ -1047,10 +1154,10 @@ export class ImprovisationSessionManager extends EventEmitter {
1047
1154
  }, claudeCmd, this.options.verbose);
1048
1155
 
1049
1156
  if (verdict.winner === 'A') {
1050
- if (this.options.verbose) console.log(`[BEST-RESULT] Haiku picked earlier attempt: ${verdict.reason}`);
1157
+ if (this.options.verbose) hlog(`[BEST-RESULT] Haiku picked earlier attempt: ${verdict.reason}`);
1051
1158
  return this.mergeResultSessionId(state.bestResult, result.claudeSessionId);
1052
1159
  }
1053
- if (this.options.verbose) console.log(`[BEST-RESULT] Haiku picked final attempt: ${verdict.reason}`);
1160
+ if (this.options.verbose) hlog(`[BEST-RESULT] Haiku picked final attempt: ${verdict.reason}`);
1054
1161
  return result;
1055
1162
  } catch {
1056
1163
  return this.fallbackBestResult(state.bestResult, result);
@@ -1061,7 +1168,7 @@ export class ImprovisationSessionManager extends EventEmitter {
1061
1168
  private fallbackBestResult(bestResult: HeadlessRunResult, result: HeadlessRunResult): HeadlessRunResult {
1062
1169
  if (scoreRunResult(bestResult) > scoreRunResult(result)) {
1063
1170
  if (this.options.verbose) {
1064
- console.log(`[BEST-RESULT] Haiku unavailable, numeric fallback: earlier attempt (score ${scoreRunResult(bestResult)} vs ${scoreRunResult(result)})`);
1171
+ hlog(`[BEST-RESULT] Haiku unavailable, numeric fallback: earlier attempt (score ${scoreRunResult(bestResult)} vs ${scoreRunResult(result)})`);
1065
1172
  }
1066
1173
  return this.mergeResultSessionId(bestResult, result.claudeSessionId);
1067
1174
  }
@@ -1497,7 +1604,7 @@ export class ImprovisationSessionManager extends EventEmitter {
1497
1604
  const data = readFileSync(this.historyPath, 'utf-8');
1498
1605
  return JSON.parse(data);
1499
1606
  } catch (error) {
1500
- console.error('Failed to load history:', error);
1607
+ herror('Failed to load history:', error);
1501
1608
  }
1502
1609
  }
1503
1610