mstro-app 0.3.8 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/dist/server/cli/headless/claude-invoker.d.ts.map +1 -1
  2. package/dist/server/cli/headless/claude-invoker.js +18 -9
  3. package/dist/server/cli/headless/claude-invoker.js.map +1 -1
  4. package/dist/server/cli/headless/headless-logger.d.ts +10 -0
  5. package/dist/server/cli/headless/headless-logger.d.ts.map +1 -0
  6. package/dist/server/cli/headless/headless-logger.js +66 -0
  7. package/dist/server/cli/headless/headless-logger.js.map +1 -0
  8. package/dist/server/cli/headless/mcp-config.d.ts.map +1 -1
  9. package/dist/server/cli/headless/mcp-config.js +6 -5
  10. package/dist/server/cli/headless/mcp-config.js.map +1 -1
  11. package/dist/server/cli/headless/runner.d.ts.map +1 -1
  12. package/dist/server/cli/headless/runner.js +4 -0
  13. package/dist/server/cli/headless/runner.js.map +1 -1
  14. package/dist/server/cli/headless/stall-assessor.d.ts +21 -0
  15. package/dist/server/cli/headless/stall-assessor.d.ts.map +1 -1
  16. package/dist/server/cli/headless/stall-assessor.js +70 -19
  17. package/dist/server/cli/headless/stall-assessor.js.map +1 -1
  18. package/dist/server/cli/headless/tool-watchdog.d.ts +0 -12
  19. package/dist/server/cli/headless/tool-watchdog.d.ts.map +1 -1
  20. package/dist/server/cli/headless/tool-watchdog.js +22 -9
  21. package/dist/server/cli/headless/tool-watchdog.js.map +1 -1
  22. package/dist/server/cli/headless/types.d.ts +8 -1
  23. package/dist/server/cli/headless/types.d.ts.map +1 -1
  24. package/dist/server/cli/improvisation-session-manager.d.ts +16 -0
  25. package/dist/server/cli/improvisation-session-manager.d.ts.map +1 -1
  26. package/dist/server/cli/improvisation-session-manager.js +94 -11
  27. package/dist/server/cli/improvisation-session-manager.js.map +1 -1
  28. package/dist/server/mcp/bouncer-cli.d.ts +3 -0
  29. package/dist/server/mcp/bouncer-cli.d.ts.map +1 -0
  30. package/dist/server/mcp/bouncer-cli.js +54 -0
  31. package/dist/server/mcp/bouncer-cli.js.map +1 -0
  32. package/dist/server/services/plan/composer.d.ts +4 -0
  33. package/dist/server/services/plan/composer.d.ts.map +1 -0
  34. package/dist/server/services/plan/composer.js +181 -0
  35. package/dist/server/services/plan/composer.js.map +1 -0
  36. package/dist/server/services/plan/dependency-resolver.d.ts +28 -0
  37. package/dist/server/services/plan/dependency-resolver.d.ts.map +1 -0
  38. package/dist/server/services/plan/dependency-resolver.js +152 -0
  39. package/dist/server/services/plan/dependency-resolver.js.map +1 -0
  40. package/dist/server/services/plan/executor.d.ts +91 -0
  41. package/dist/server/services/plan/executor.d.ts.map +1 -0
  42. package/dist/server/services/plan/executor.js +545 -0
  43. package/dist/server/services/plan/executor.js.map +1 -0
  44. package/dist/server/services/plan/parser.d.ts +11 -0
  45. package/dist/server/services/plan/parser.d.ts.map +1 -0
  46. package/dist/server/services/plan/parser.js +415 -0
  47. package/dist/server/services/plan/parser.js.map +1 -0
  48. package/dist/server/services/plan/state-reconciler.d.ts +2 -0
  49. package/dist/server/services/plan/state-reconciler.d.ts.map +1 -0
  50. package/dist/server/services/plan/state-reconciler.js +105 -0
  51. package/dist/server/services/plan/state-reconciler.js.map +1 -0
  52. package/dist/server/services/plan/types.d.ts +120 -0
  53. package/dist/server/services/plan/types.d.ts.map +1 -0
  54. package/dist/server/services/plan/types.js +4 -0
  55. package/dist/server/services/plan/types.js.map +1 -0
  56. package/dist/server/services/plan/watcher.d.ts +14 -0
  57. package/dist/server/services/plan/watcher.d.ts.map +1 -0
  58. package/dist/server/services/plan/watcher.js +69 -0
  59. package/dist/server/services/plan/watcher.js.map +1 -0
  60. package/dist/server/services/websocket/file-explorer-handlers.js +20 -0
  61. package/dist/server/services/websocket/file-explorer-handlers.js.map +1 -1
  62. package/dist/server/services/websocket/handler.d.ts.map +1 -1
  63. package/dist/server/services/websocket/handler.js +21 -0
  64. package/dist/server/services/websocket/handler.js.map +1 -1
  65. package/dist/server/services/websocket/plan-handlers.d.ts +6 -0
  66. package/dist/server/services/websocket/plan-handlers.d.ts.map +1 -0
  67. package/dist/server/services/websocket/plan-handlers.js +494 -0
  68. package/dist/server/services/websocket/plan-handlers.js.map +1 -0
  69. package/dist/server/services/websocket/quality-handlers.d.ts.map +1 -1
  70. package/dist/server/services/websocket/quality-handlers.js +375 -11
  71. package/dist/server/services/websocket/quality-handlers.js.map +1 -1
  72. package/dist/server/services/websocket/quality-persistence.d.ts +45 -0
  73. package/dist/server/services/websocket/quality-persistence.d.ts.map +1 -0
  74. package/dist/server/services/websocket/quality-persistence.js +187 -0
  75. package/dist/server/services/websocket/quality-persistence.js.map +1 -0
  76. package/dist/server/services/websocket/quality-service.d.ts +2 -2
  77. package/dist/server/services/websocket/quality-service.d.ts.map +1 -1
  78. package/dist/server/services/websocket/quality-service.js +62 -12
  79. package/dist/server/services/websocket/quality-service.js.map +1 -1
  80. package/dist/server/services/websocket/types.d.ts +2 -2
  81. package/dist/server/services/websocket/types.d.ts.map +1 -1
  82. package/package.json +2 -2
  83. package/server/cli/headless/claude-invoker.ts +21 -9
  84. package/server/cli/headless/headless-logger.ts +78 -0
  85. package/server/cli/headless/mcp-config.ts +6 -5
  86. package/server/cli/headless/runner.ts +4 -0
  87. package/server/cli/headless/stall-assessor.ts +97 -19
  88. package/server/cli/headless/tool-watchdog.ts +10 -9
  89. package/server/cli/headless/types.ts +10 -1
  90. package/server/cli/improvisation-session-manager.ts +118 -11
  91. package/server/mcp/bouncer-cli.ts +73 -0
  92. package/server/services/plan/composer.ts +199 -0
  93. package/server/services/plan/dependency-resolver.ts +179 -0
  94. package/server/services/plan/executor.ts +604 -0
  95. package/server/services/plan/parser.ts +459 -0
  96. package/server/services/plan/state-reconciler.ts +132 -0
  97. package/server/services/plan/types.ts +164 -0
  98. package/server/services/plan/watcher.ts +73 -0
  99. package/server/services/websocket/file-explorer-handlers.ts +20 -0
  100. package/server/services/websocket/handler.ts +21 -0
  101. package/server/services/websocket/plan-handlers.ts +592 -0
  102. package/server/services/websocket/quality-handlers.ts +441 -11
  103. package/server/services/websocket/quality-persistence.ts +250 -0
  104. package/server/services/websocket/quality-service.ts +65 -12
  105. package/server/services/websocket/types.ts +48 -2
@@ -9,6 +9,7 @@
9
9
 
10
10
  import { type ChildProcess, spawn } from 'node:child_process';
11
11
  import { sanitizeEnvForSandbox } from '../../services/sandbox-utils.js';
12
+ import { herror, hlog } from './headless-logger.js';
12
13
  import { generateMcpConfig } from './mcp-config.js';
13
14
  import { detectErrorInStderr, } from './output-utils.js';
14
15
  import { buildMultimodalMessage } from './prompt-utils.js';
@@ -115,7 +116,7 @@ async function runStallAssessment(
115
116
  );
116
117
  }
117
118
  if (config.verbose) {
118
- console.log(`[STALL] Extended by ${Math.round(verdict.extensionMs / 60_000)} min: ${verdict.reason}`);
119
+ hlog(`[STALL] Extended by ${Math.round(verdict.extensionMs / 60_000)} min: ${verdict.reason}`);
119
120
  }
120
121
  return { extensionsGranted: newExtensions, currentKillDeadline: now + verdict.extensionMs };
121
122
  }
@@ -123,11 +124,11 @@ async function runStallAssessment(
123
124
  `\n[[MSTRO_STALL_CONFIRMED]] Assessment: process likely stalled. ${verdict.reason}.\n`
124
125
  );
125
126
  if (config.verbose) {
126
- console.log(`[STALL] Assessment says stalled: ${verdict.reason}`);
127
+ hlog(`[STALL] Assessment says stalled: ${verdict.reason}`);
127
128
  }
128
129
  } catch (err) {
129
130
  if (config.verbose) {
130
- console.log(`[STALL] Assessment error: ${err}`);
131
+ hlog(`[STALL] Assessment error: ${err}`);
131
132
  }
132
133
  }
133
134
  return null;
@@ -277,6 +278,8 @@ interface StreamHandlerContext {
277
278
  currentStepOutputTokens: number;
278
279
  /** Timestamp of the last token usage change (tokens still flowing = process alive) */
279
280
  lastTokenActivityTime: number;
281
+ /** Claude Code result event stop_reason (e.g., 'end_turn', 'max_tokens') */
282
+ stopReason?: string;
280
283
  }
281
284
 
282
285
  function handleSessionCapture(
@@ -590,9 +593,12 @@ function processStreamEvent(parsed: StreamJson, ctx: StreamHandlerContext): void
590
593
  return;
591
594
  }
592
595
 
593
- // Handle result events — extract definitive token usage and surface errors
596
+ // Handle result events — extract definitive token usage, stop_reason, and surface errors
594
597
  if (parsed.type === 'result') {
595
598
  handleResultTokenUsage(parsed, ctx);
599
+ if (parsed.stop_reason) {
600
+ ctx.stopReason = parsed.stop_reason;
601
+ }
596
602
  if (parsed.is_error) {
597
603
  const errorMessage = parsed.error || parsed.result || 'Unknown error in result';
598
604
  ctx.config.outputCallback?.(`\n[[MSTRO_ERROR:CLAUDE_RESULT_ERROR]] ${errorMessage}\n`);
@@ -733,7 +739,7 @@ function writeImageAttachmentsToStdin(
733
739
  ): void {
734
740
  claudeProcess.stdin!.on('error', (err) => {
735
741
  if (config.verbose) {
736
- console.error('[STDIN] Write error:', err.message);
742
+ herror('[STDIN] Write error:', err.message);
737
743
  }
738
744
  config.outputCallback?.(`\n[[MSTRO_ERROR:STDIN_WRITE_FAILED]] Failed to send image data to Claude: ${err.message}\n`);
739
745
  });
@@ -1002,7 +1008,7 @@ function setupToolTracking(
1002
1008
  /** Log messages when verbose mode is enabled. Extracted to reduce cognitive complexity. */
1003
1009
  function verboseLog(verbose: boolean | undefined, ...msgs: string[]): void {
1004
1010
  if (verbose) {
1005
- for (const msg of msgs) console.log(msg);
1011
+ for (const msg of msgs) hlog(msg);
1006
1012
  }
1007
1013
  }
1008
1014
 
@@ -1030,12 +1036,17 @@ function spawnAndRegister(
1030
1036
  `[PERF] Command: ${config.claudeCommand} ${args.join(' ')}`,
1031
1037
  );
1032
1038
 
1039
+ const baseEnv = config.sandboxed
1040
+ ? sanitizeEnvForSandbox(process.env, config.workingDir)
1041
+ : { ...process.env };
1042
+ const spawnEnv = config.extraEnv
1043
+ ? { ...baseEnv, ...config.extraEnv }
1044
+ : baseEnv;
1045
+
1033
1046
  const claudeProcess = spawn(config.claudeCommand, args, {
1034
1047
  cwd: config.workingDir,
1035
1048
  detached: true,
1036
- env: config.sandboxed
1037
- ? sanitizeEnvForSandbox(process.env, config.workingDir)
1038
- : { ...process.env },
1049
+ env: spawnEnv,
1039
1050
  stdio: [hasImageAttachments ? 'pipe' : 'ignore', 'pipe', 'pipe']
1040
1051
  });
1041
1052
 
@@ -1210,5 +1221,6 @@ function buildCloseResult(
1210
1221
  postTimeoutOutput: postTimeout,
1211
1222
  resumeBufferedOutput: resumeBuffered,
1212
1223
  apiTokenUsage: hasTokenUsage ? { ...ctx.apiTokenUsage } : undefined,
1224
+ stopReason: ctx.stopReason,
1213
1225
  };
1214
1226
  }
@@ -0,0 +1,78 @@
1
+ // Copyright (c) 2025-present Mstro, Inc. All rights reserved.
2
+ // Licensed under the MIT License. See LICENSE file for details.
3
+
4
+ /**
5
+ * Headless Logger
6
+ *
7
+ * Provides AsyncLocalStorage-based logging redirection for headless execution.
8
+ * When background operations (code review, PM compose/execute) run, their
9
+ * console output is redirected to log files under ~/.mstro/logs/ instead of
10
+ * polluting the terminal where the mstro CLI was started.
11
+ */
12
+
13
+ import { AsyncLocalStorage } from 'node:async_hooks';
14
+ import type { WriteStream } from 'node:fs';
15
+ import { createWriteStream, mkdirSync } from 'node:fs';
16
+ import { homedir } from 'node:os';
17
+ import { join } from 'node:path';
18
+
19
+ interface LogTarget {
20
+ log: (...args: unknown[]) => void;
21
+ error: (...args: unknown[]) => void;
22
+ }
23
+
24
+ const logContext = new AsyncLocalStorage<LogTarget>();
25
+
26
+ function formatArgs(args: unknown[]): string {
27
+ return args.map(a => (typeof a === 'string' ? a : String(a))).join(' ');
28
+ }
29
+
30
+ /** Log a message. Writes to file when inside runWithFileLogger, otherwise to console. */
31
+ export function hlog(...args: unknown[]): void {
32
+ const target = logContext.getStore();
33
+ if (target) {
34
+ target.log(...args);
35
+ } else {
36
+ console.log(...args);
37
+ }
38
+ }
39
+
40
+ /** Log an error. Writes to file when inside runWithFileLogger, otherwise to console. */
41
+ export function herror(...args: unknown[]): void {
42
+ const target = logContext.getStore();
43
+ if (target) {
44
+ target.error(...args);
45
+ } else {
46
+ console.error(...args);
47
+ }
48
+ }
49
+
50
+ const LOG_DIR = join(homedir(), '.mstro', 'logs', 'headless');
51
+
52
+ /**
53
+ * Run an async function with all hlog/herror output redirected to a log file.
54
+ * The log file is created at ~/.mstro/logs/headless/{label}-{timestamp}.log.
55
+ */
56
+ export async function runWithFileLogger<T>(label: string, fn: () => Promise<T>): Promise<T> {
57
+ mkdirSync(LOG_DIR, { recursive: true });
58
+ const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
59
+ const logPath = join(LOG_DIR, `${label}-${timestamp}.log`);
60
+ const stream: WriteStream = createWriteStream(logPath, { flags: 'a' });
61
+
62
+ const target: LogTarget = {
63
+ log: (...args: unknown[]) => {
64
+ stream.write(`[${new Date().toISOString()}] ${formatArgs(args)}\n`);
65
+ },
66
+ error: (...args: unknown[]) => {
67
+ stream.write(`[${new Date().toISOString()}] ERROR: ${formatArgs(args)}\n`);
68
+ },
69
+ };
70
+
71
+ return logContext.run(target, async () => {
72
+ try {
73
+ return await fn();
74
+ } finally {
75
+ stream.end();
76
+ }
77
+ });
78
+ }
@@ -8,6 +8,7 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
8
8
  import { homedir } from 'node:os';
9
9
  import { join } from 'node:path';
10
10
  import { MCP_SERVER_PATH, MSTRO_ROOT } from '../../utils/paths.js';
11
+ import { herror, hlog } from './headless-logger.js';
11
12
 
12
13
  /**
13
14
  * Load user's MCP servers from ~/.claude.json (global + project-level)
@@ -37,10 +38,10 @@ function loadUserMcpServers(workingDir: string, verbose: boolean): Record<string
37
38
  }
38
39
 
39
40
  if (verbose) {
40
- console.log(`[${new Date().toISOString()}] Loaded ${Object.keys(servers).length} user MCP servers from ~/.claude.json`);
41
+ hlog(`[${new Date().toISOString()}] Loaded ${Object.keys(servers).length} user MCP servers from ~/.claude.json`);
41
42
  }
42
43
  } catch (parseError: unknown) {
43
- console.error(`[${new Date().toISOString()}] Failed to parse ~/.claude.json: ${parseError instanceof Error ? parseError.message : String(parseError)}`);
44
+ herror(`[${new Date().toISOString()}] Failed to parse ~/.claude.json: ${parseError instanceof Error ? parseError.message : String(parseError)}`);
44
45
  }
45
46
 
46
47
  return servers;
@@ -53,7 +54,7 @@ function loadUserMcpServers(workingDir: string, verbose: boolean): Record<string
53
54
  export function generateMcpConfig(workingDir: string, verbose: boolean = false): string | null {
54
55
  try {
55
56
  if (!existsSync(MCP_SERVER_PATH)) {
56
- console.error(`[${new Date().toISOString()}] MCP server not found at ${MCP_SERVER_PATH}`);
57
+ herror(`[${new Date().toISOString()}] MCP server not found at ${MCP_SERVER_PATH}`);
57
58
  return null;
58
59
  }
59
60
 
@@ -76,12 +77,12 @@ export function generateMcpConfig(workingDir: string, verbose: boolean = false):
76
77
  writeFileSync(configPath, JSON.stringify({ mcpServers }, null, 2));
77
78
 
78
79
  if (verbose) {
79
- console.log(`[${new Date().toISOString()}] Generated MCP config at ${configPath} (${Object.keys(mcpServers).length} servers)`);
80
+ hlog(`[${new Date().toISOString()}] Generated MCP config at ${configPath} (${Object.keys(mcpServers).length} servers)`);
80
81
  }
81
82
 
82
83
  return configPath;
83
84
  } catch (error: unknown) {
84
- console.error(`[${new Date().toISOString()}] Failed to generate MCP config: ${error instanceof Error ? error.message : String(error)}`);
85
+ herror(`[${new Date().toISOString()}] Failed to generate MCP config: ${error instanceof Error ? error.message : String(error)}`);
85
86
  return null;
86
87
  }
87
88
  }
@@ -69,6 +69,7 @@ export class HeadlessRunner {
69
69
  maxAutoRetries: config.maxAutoRetries ?? 2,
70
70
  onToolTimeout: config.onToolTimeout,
71
71
  sandboxed: config.sandboxed,
72
+ extraEnv: config.extraEnv,
72
73
  };
73
74
  }
74
75
 
@@ -122,6 +123,7 @@ export class HeadlessRunner {
122
123
  nativeTimeoutCount: result.nativeTimeoutCount,
123
124
  postTimeoutOutput: result.postTimeoutOutput,
124
125
  resumeBufferedOutput: result.resumeBufferedOutput,
126
+ stopReason: result.stopReason,
125
127
  };
126
128
  }
127
129
 
@@ -150,6 +152,7 @@ export class HeadlessRunner {
150
152
  nativeTimeoutCount: result.nativeTimeoutCount,
151
153
  postTimeoutOutput: result.postTimeoutOutput,
152
154
  resumeBufferedOutput: result.resumeBufferedOutput,
155
+ stopReason: result.stopReason,
153
156
  };
154
157
  }
155
158
 
@@ -168,6 +171,7 @@ export class HeadlessRunner {
168
171
  nativeTimeoutCount: result.nativeTimeoutCount,
169
172
  postTimeoutOutput: result.postTimeoutOutput,
170
173
  resumeBufferedOutput: result.resumeBufferedOutput,
174
+ stopReason: result.stopReason,
171
175
  };
172
176
  }
173
177
 
@@ -17,6 +17,7 @@
17
17
  */
18
18
 
19
19
  import { type ChildProcess, spawn } from 'node:child_process';
20
+ import { hlog } from './headless-logger.js';
20
21
 
21
22
  export interface StallContext {
22
23
  /** The original user prompt being executed */
@@ -137,7 +138,7 @@ export async function assessStall(
137
138
  const quick = quickHeuristic(ctx, toolWatchdogActive);
138
139
  if (quick) {
139
140
  if (verbose) {
140
- console.log(`[STALL-ASSESS] Heuristic verdict: ${quick.reason}`);
141
+ hlog(`[STALL-ASSESS] Heuristic verdict: ${quick.reason}`);
141
142
  }
142
143
  return quick;
143
144
  }
@@ -145,12 +146,12 @@ export async function assessStall(
145
146
  // Layer 2: Haiku assessment
146
147
  try {
147
148
  if (verbose) {
148
- console.log('[STALL-ASSESS] Running Haiku assessment...');
149
+ hlog('[STALL-ASSESS] Running Haiku assessment...');
149
150
  }
150
151
  return await runHaikuAssessment(ctx, claudeCommand, verbose);
151
152
  } catch (err) {
152
153
  if (verbose) {
153
- console.log(`[STALL-ASSESS] Haiku assessment failed: ${err}`);
154
+ hlog(`[STALL-ASSESS] Haiku assessment failed: ${err}`);
154
155
  }
155
156
  // If Haiku fails (timeout, auth issue, etc.), extend cautiously
156
157
  return {
@@ -220,13 +221,13 @@ export async function assessToolTimeout(
220
221
 
221
222
  try {
222
223
  if (verbose) {
223
- console.log(`[TOOL-ASSESS] Running Haiku assessment for ${toolName} (${elapsedSec}s elapsed)...`);
224
+ hlog(`[TOOL-ASSESS] Running Haiku assessment for ${toolName} (${elapsedSec}s elapsed)...`);
224
225
  }
225
226
 
226
227
  return await spawnHaikuVerdict(prompt, claudeCommand, verbose, 'TOOL-ASSESS');
227
228
  } catch (err) {
228
229
  if (verbose) {
229
- console.log(`[TOOL-ASSESS] Haiku assessment failed: ${err}`);
230
+ hlog(`[TOOL-ASSESS] Haiku assessment failed: ${err}`);
230
231
  }
231
232
  // On failure, default to kill (the tool has already exceeded its timeout)
232
233
  return {
@@ -295,7 +296,7 @@ export async function assessContextLoss(
295
296
 
296
297
  try {
297
298
  if (verbose) {
298
- console.log(`[CONTEXT-ASSESS] Running Haiku assessment (${ctx.effectiveTimeouts} timeouts, ${ctx.successfulToolCalls} successes, ${ctx.thinkingOutputLength} thinking chars)...`);
299
+ hlog(`[CONTEXT-ASSESS] Running Haiku assessment (${ctx.effectiveTimeouts} timeouts, ${ctx.successfulToolCalls} successes, ${ctx.thinkingOutputLength} thinking chars)...`);
299
300
  }
300
301
 
301
302
  const raw = await spawnHaikuRaw(prompt, claudeCommand, verbose, 'CONTEXT-ASSESS');
@@ -303,13 +304,13 @@ export async function assessContextLoss(
303
304
  const contextLost = parsed.verdict === 'STALLED';
304
305
 
305
306
  if (verbose) {
306
- console.log(`[CONTEXT-ASSESS] Verdict: ${contextLost ? 'LOST' : 'CONTINUED'} — ${parsed.reason}`);
307
+ hlog(`[CONTEXT-ASSESS] Verdict: ${contextLost ? 'LOST' : 'CONTINUED'} — ${parsed.reason}`);
307
308
  }
308
309
 
309
310
  return { contextLost, reason: parsed.reason };
310
311
  } catch (err) {
311
312
  if (verbose) {
312
- console.log(`[CONTEXT-ASSESS] Haiku assessment failed: ${err}`);
313
+ hlog(`[CONTEXT-ASSESS] Haiku assessment failed: ${err}`);
313
314
  }
314
315
  // On failure, assume context was lost (safer to retry than to show a confused response)
315
316
  return {
@@ -419,7 +420,7 @@ function spawnHaikuRaw(
419
420
 
420
421
  proc.stderr!.on('data', (data) => {
421
422
  if (verbose) {
422
- console.log(`[${label}] haiku stderr: ${data.toString().trim()}`);
423
+ hlog(`[${label}] haiku stderr: ${data.toString().trim()}`);
423
424
  }
424
425
  });
425
426
 
@@ -434,7 +435,7 @@ function spawnHaikuRaw(
434
435
  }
435
436
 
436
437
  if (verbose) {
437
- console.log(`[${label}] Haiku response: ${stdout.trim()}`);
438
+ hlog(`[${label}] Haiku response: ${stdout.trim()}`);
438
439
  }
439
440
 
440
441
  resolve(stdout.trim());
@@ -521,7 +522,7 @@ export async function assessApproval(
521
522
 
522
523
  try {
523
524
  if (verbose) {
524
- console.log('[APPROVAL-ASSESS] Running Haiku assessment...');
525
+ hlog('[APPROVAL-ASSESS] Running Haiku assessment...');
525
526
  }
526
527
 
527
528
  const raw = await spawnHaikuRaw(prompt, claudeCommand, verbose, 'APPROVAL-ASSESS');
@@ -529,19 +530,96 @@ export async function assessApproval(
529
530
  const isApproval = parsed.verdict.includes('APPROVAL');
530
531
 
531
532
  if (verbose) {
532
- console.log(`[APPROVAL-ASSESS] Verdict: ${isApproval ? 'APPROVAL' : 'NEW_TASK'} — ${parsed.reason}`);
533
+ hlog(`[APPROVAL-ASSESS] Verdict: ${isApproval ? 'APPROVAL' : 'NEW_TASK'} — ${parsed.reason}`);
533
534
  }
534
535
 
535
536
  return { isApproval, reason: parsed.reason };
536
537
  } catch (err) {
537
538
  if (verbose) {
538
- console.log(`[APPROVAL-ASSESS] Haiku assessment failed: ${err}`);
539
+ hlog(`[APPROVAL-ASSESS] Haiku assessment failed: ${err}`);
539
540
  }
540
541
  // On failure, assume not an approval (safer to treat as new task)
541
542
  return { isApproval: false, reason: `Assessment failed: ${err}` };
542
543
  }
543
544
  }
544
545
 
546
+ // ========== Premature Completion Assessment ==========
547
+
548
+ export interface PrematureCompletionContext {
549
+ /** The trailing portion of the assistant response (last ~800 chars) */
550
+ responseTail: string;
551
+ /** Total number of successful tool calls in this execution */
552
+ successfulToolCalls: number;
553
+ /** Whether extended thinking output was produced */
554
+ hasThinking: boolean;
555
+ /** Total response length */
556
+ responseLength: number;
557
+ }
558
+
559
+ export interface PrematureCompletionVerdict {
560
+ /** True if the task appears incomplete and should be auto-continued */
561
+ isIncomplete: boolean;
562
+ reason: string;
563
+ }
564
+
565
+ /**
566
+ * Assess whether a completed Claude execution ended prematurely.
567
+ * Called when stop_reason is 'end_turn' but the task may not be finished.
568
+ * Haiku determines if the trailing response text indicates planned-but-unexecuted work.
569
+ */
570
+ export async function assessPrematureCompletion(
571
+ ctx: PrematureCompletionContext,
572
+ claudeCommand: string,
573
+ verbose: boolean,
574
+ ): Promise<PrematureCompletionVerdict> {
575
+ const prompt = [
576
+ 'You are analyzing the FINAL output of a Claude Code agent that just exited normally.',
577
+ 'Determine whether the agent finished its task or stopped prematurely mid-work.',
578
+ '',
579
+ 'Session signals:',
580
+ `- ${ctx.successfulToolCalls} tool calls completed successfully`,
581
+ `- Response length: ${ctx.responseLength} characters`,
582
+ `- Extended thinking: ${ctx.hasThinking ? 'YES' : 'NO'}`,
583
+ '',
584
+ `Final response text (last ${ctx.responseTail.length} chars):`,
585
+ ctx.responseTail,
586
+ '',
587
+ 'INCOMPLETE signals: "Now I\'ll...", "Let me fix...", "Next I\'ll...", "Moving on to...",',
588
+ '"I\'ll continue with...", announcing next steps that were never executed,',
589
+ 'describing work that will happen next but no tool call followed.',
590
+ '',
591
+ 'COMPLETE signals: summarizing what was done, confirming changes, reporting results,',
592
+ 'asking the user a question, past-tense descriptions of completed work,',
593
+ '"all done", "changes applied", referencing finished state.',
594
+ '',
595
+ 'Respond in EXACTLY this format (2 lines, no extra text):',
596
+ 'VERDICT: COMPLETE or INCOMPLETE',
597
+ 'REASON: <brief one-line explanation>',
598
+ ].join('\n');
599
+
600
+ try {
601
+ if (verbose) {
602
+ hlog(`[PREMATURE-ASSESS] Running Haiku assessment (${ctx.successfulToolCalls} tools, ${ctx.responseLength} chars)...`);
603
+ }
604
+
605
+ const raw = await spawnHaikuRaw(prompt, claudeCommand, verbose, 'PREMATURE-ASSESS');
606
+ const parsed = parseVerdictResponse(raw);
607
+ const isIncomplete = parsed.verdict.includes('INCOMPLETE');
608
+
609
+ if (verbose) {
610
+ hlog(`[PREMATURE-ASSESS] Verdict: ${isIncomplete ? 'INCOMPLETE' : 'COMPLETE'} — ${parsed.reason}`);
611
+ }
612
+
613
+ return { isIncomplete, reason: parsed.reason };
614
+ } catch (err) {
615
+ if (verbose) {
616
+ hlog(`[PREMATURE-ASSESS] Haiku assessment failed: ${err}`);
617
+ }
618
+ // On failure, don't retry — safer to let the user decide than to auto-continue incorrectly
619
+ return { isIncomplete: false, reason: `Assessment failed: ${err}` };
620
+ }
621
+ }
622
+
545
623
  // ========== Best Result Comparison ==========
546
624
 
547
625
  export interface BestResultContext {
@@ -602,7 +680,7 @@ export async function assessBestResult(
602
680
 
603
681
  try {
604
682
  if (verbose) {
605
- console.log('[BEST-RESULT] Running Haiku assessment...');
683
+ hlog('[BEST-RESULT] Running Haiku assessment...');
606
684
  }
607
685
 
608
686
  const raw = await spawnHaikuRaw(prompt, claudeCommand, verbose, 'BEST-RESULT');
@@ -610,13 +688,13 @@ export async function assessBestResult(
610
688
  const winner: 'A' | 'B' = parsed.verdict.includes('B') ? 'B' : 'A';
611
689
 
612
690
  if (verbose) {
613
- console.log(`[BEST-RESULT] Verdict: ${winner} — ${parsed.reason}`);
691
+ hlog(`[BEST-RESULT] Verdict: ${winner} — ${parsed.reason}`);
614
692
  }
615
693
 
616
694
  return { winner, reason: parsed.reason };
617
695
  } catch (err) {
618
696
  if (verbose) {
619
- console.log(`[BEST-RESULT] Haiku assessment failed: ${err}`);
697
+ hlog(`[BEST-RESULT] Haiku assessment failed: ${err}`);
620
698
  }
621
699
  // On failure, prefer A (the previously-tracked best result)
622
700
  return { winner: 'A', reason: `Assessment failed: ${err}` };
@@ -671,7 +749,7 @@ export async function classifyError(
671
749
 
672
750
  try {
673
751
  if (verbose) {
674
- console.log('[ERROR-CLASSIFY] Running Haiku assessment...');
752
+ hlog('[ERROR-CLASSIFY] Running Haiku assessment...');
675
753
  }
676
754
 
677
755
  const raw = await spawnHaikuRaw(prompt, claudeCommand, verbose, 'ERROR-CLASSIFY');
@@ -691,13 +769,13 @@ export async function classifyError(
691
769
  if (category === 'UNKNOWN' || !message) return null;
692
770
 
693
771
  if (verbose) {
694
- console.log(`[ERROR-CLASSIFY] Verdict: ${category} — ${message}`);
772
+ hlog(`[ERROR-CLASSIFY] Verdict: ${category} — ${message}`);
695
773
  }
696
774
 
697
775
  return { errorCode: category, message };
698
776
  } catch (err) {
699
777
  if (verbose) {
700
- console.log(`[ERROR-CLASSIFY] Haiku assessment failed: ${err}`);
778
+ hlog(`[ERROR-CLASSIFY] Haiku assessment failed: ${err}`);
701
779
  }
702
780
  return null;
703
781
  }
@@ -14,6 +14,7 @@
14
14
  * 3. Haiku tiebreaker: optional AI assessment before killing ambiguous cases
15
15
  */
16
16
 
17
+ import { hlog } from './headless-logger.js';
17
18
  import type {
18
19
  ExecutionCheckpoint,
19
20
  ToolDurationTracker,
@@ -167,7 +168,7 @@ export class ToolWatchdog {
167
168
  sampleCount: 1,
168
169
  });
169
170
  if (this.verbose) {
170
- console.log(`[WATCHDOG] ${toolName}: first sample ${durationMs}ms, initial timeout ${this.getTimeout(toolName)}ms`);
171
+ hlog(`[WATCHDOG] ${toolName}: first sample ${durationMs}ms, initial timeout ${this.getTimeout(toolName)}ms`);
171
172
  }
172
173
  return;
173
174
  }
@@ -178,7 +179,7 @@ export class ToolWatchdog {
178
179
  tracker.sampleCount++;
179
180
 
180
181
  if (this.verbose) {
181
- console.log(`[WATCHDOG] ${toolName}: sample #${tracker.sampleCount} ${durationMs}ms, est=${Math.round(tracker.estimatedDuration)}ms, dev=${Math.round(tracker.deviation)}ms, timeout=${this.getTimeout(toolName)}ms`);
182
+ hlog(`[WATCHDOG] ${toolName}: sample #${tracker.sampleCount} ${durationMs}ms, est=${Math.round(tracker.estimatedDuration)}ms, dev=${Math.round(tracker.deviation)}ms, timeout=${this.getTimeout(toolName)}ms`);
182
183
  }
183
184
  }
184
185
 
@@ -208,7 +209,7 @@ export class ToolWatchdog {
208
209
  const profile = this.getProfile(toolName);
209
210
 
210
211
  if (this.verbose) {
211
- console.log(`[WATCHDOG] Starting watch: ${toolName} (${toolId}), timeout=${Math.round(timeoutMs / 1000)}s`);
212
+ hlog(`[WATCHDOG] Starting watch: ${toolName} (${toolId}), timeout=${Math.round(timeoutMs / 1000)}s`);
212
213
  }
213
214
 
214
215
  const timer = setTimeout(async () => {
@@ -245,7 +246,7 @@ export class ToolWatchdog {
245
246
 
246
247
  if (!profile.useHaikuTiebreaker || !this.onTiebreaker || watch.tiebreakerAttempted) {
247
248
  if (this.verbose) {
248
- console.log(`[WATCHDOG] ${toolName} (${toolId}) timed out after ${Math.round(elapsedMs / 1000)}s, killing`);
249
+ hlog(`[WATCHDOG] ${toolName} (${toolId}) timed out after ${Math.round(elapsedMs / 1000)}s, killing`);
249
250
  }
250
251
  return false;
251
252
  }
@@ -265,7 +266,7 @@ export class ToolWatchdog {
265
266
  watch.tiebreakerAttempted = true;
266
267
 
267
268
  if (this.verbose) {
268
- console.log(`[WATCHDOG] ${toolName} (${toolId}) hit timeout after ${Math.round(elapsedMs / 1000)}s, running tiebreaker...`);
269
+ hlog(`[WATCHDOG] ${toolName} (${toolId}) hit timeout after ${Math.round(elapsedMs / 1000)}s, running tiebreaker...`);
269
270
  }
270
271
 
271
272
  try {
@@ -274,7 +275,7 @@ export class ToolWatchdog {
274
275
 
275
276
  if (verdict.action === 'extend') {
276
277
  if (this.verbose) {
277
- console.log(`[WATCHDOG] Tiebreaker: extend ${toolName} by ${Math.round(verdict.extensionMs / 1000)}s — ${verdict.reason}`);
278
+ hlog(`[WATCHDOG] Tiebreaker: extend ${toolName} by ${Math.round(verdict.extensionMs / 1000)}s — ${verdict.reason}`);
278
279
  }
279
280
  this.scheduleExtensionTimeout(watch, toolId, toolName, verdict.extensionMs, onTimeout);
280
281
  watch.timeoutMs = elapsedMs + verdict.extensionMs;
@@ -282,11 +283,11 @@ export class ToolWatchdog {
282
283
  }
283
284
 
284
285
  if (this.verbose) {
285
- console.log(`[WATCHDOG] Tiebreaker: kill ${toolName} — ${verdict.reason}`);
286
+ hlog(`[WATCHDOG] Tiebreaker: kill ${toolName} — ${verdict.reason}`);
286
287
  }
287
288
  } catch (err) {
288
289
  if (this.verbose) {
289
- console.log(`[WATCHDOG] Tiebreaker failed: ${err}, proceeding with kill`);
290
+ hlog(`[WATCHDOG] Tiebreaker failed: ${err}, proceeding with kill`);
290
291
  }
291
292
  }
292
293
 
@@ -305,7 +306,7 @@ export class ToolWatchdog {
305
306
  const w = this.activeWatches.get(toolId);
306
307
  if (!w) return;
307
308
  if (this.verbose) {
308
- console.log(`[WATCHDOG] ${toolName} (${toolId}) still running after extension, killing`);
309
+ hlog(`[WATCHDOG] ${toolName} (${toolId}) still running after extension, killing`);
309
310
  }
310
311
  // Don't delete the watch — buildCheckpoint() needs it.
311
312
  // handleToolTimeout() calls clearAll() after building the checkpoint.
@@ -121,6 +121,8 @@ export interface HeadlessConfig {
121
121
  onToolTimeout?: (checkpoint: ExecutionCheckpoint) => void;
122
122
  /** When true, spawn Claude with sanitized env (strips secrets, HOME=workingDir) */
123
123
  sandboxed?: boolean;
124
+ /** Extra environment variables to merge into the spawned Claude process env */
125
+ extraEnv?: Record<string, string>;
124
126
  }
125
127
 
126
128
  export interface SessionState {
@@ -165,6 +167,8 @@ export interface SessionResult {
165
167
  /** Assistant text buffered during resume assessment — held back until thinking/tool activity
166
168
  * confirms Claude has context. Undefined when not in resume mode or buffer was flushed. */
167
169
  resumeBufferedOutput?: string;
170
+ /** Claude Code result event stop_reason: 'end_turn', 'max_tokens', or undefined if not captured */
171
+ stopReason?: string;
168
172
  }
169
173
 
170
174
  export interface ToolUseAccumulator {
@@ -200,10 +204,12 @@ export interface ExecutionResult {
200
204
  resumeBufferedOutput?: string;
201
205
  /** Actual API token usage from Claude Code stream events (summed across all turns) */
202
206
  apiTokenUsage?: { inputTokens: number; outputTokens: number };
207
+ /** Claude Code result event stop_reason: 'end_turn', 'max_tokens', or undefined if not captured */
208
+ stopReason?: string;
203
209
  }
204
210
 
205
211
  /** Resolved config with all defaults applied */
206
- export type ResolvedHeadlessConfig = Omit<Required<HeadlessConfig>, 'outputCallback' | 'thinkingCallback' | 'toolUseCallback' | 'tokenUsageCallback' | 'continueSession' | 'claudeSessionId' | 'imageAttachments' | 'model' | 'toolTimeoutProfiles' | 'onToolTimeout' | 'sandboxed'> & {
212
+ export type ResolvedHeadlessConfig = Omit<Required<HeadlessConfig>, 'outputCallback' | 'thinkingCallback' | 'toolUseCallback' | 'tokenUsageCallback' | 'continueSession' | 'claudeSessionId' | 'imageAttachments' | 'model' | 'toolTimeoutProfiles' | 'onToolTimeout' | 'sandboxed' | 'extraEnv'> & {
207
213
  outputCallback?: (text: string) => void;
208
214
  thinkingCallback?: (text: string) => void;
209
215
  toolUseCallback?: (event: ToolUseEvent) => void;
@@ -215,4 +221,7 @@ export type ResolvedHeadlessConfig = Omit<Required<HeadlessConfig>, 'outputCallb
215
221
  toolTimeoutProfiles?: Record<string, Partial<ToolTimeoutProfile>>;
216
222
  onToolTimeout?: (checkpoint: ExecutionCheckpoint) => void;
217
223
  sandboxed?: boolean;
224
+ extraEnv?: Record<string, string>;
218
225
  };
226
+
227
+