mstro-app 0.3.8 → 0.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/server/cli/headless/claude-invoker.d.ts.map +1 -1
- package/dist/server/cli/headless/claude-invoker.js +18 -9
- package/dist/server/cli/headless/claude-invoker.js.map +1 -1
- package/dist/server/cli/headless/headless-logger.d.ts +10 -0
- package/dist/server/cli/headless/headless-logger.d.ts.map +1 -0
- package/dist/server/cli/headless/headless-logger.js +66 -0
- package/dist/server/cli/headless/headless-logger.js.map +1 -0
- package/dist/server/cli/headless/mcp-config.d.ts.map +1 -1
- package/dist/server/cli/headless/mcp-config.js +6 -5
- package/dist/server/cli/headless/mcp-config.js.map +1 -1
- package/dist/server/cli/headless/runner.d.ts.map +1 -1
- package/dist/server/cli/headless/runner.js +4 -0
- package/dist/server/cli/headless/runner.js.map +1 -1
- package/dist/server/cli/headless/stall-assessor.d.ts +21 -0
- package/dist/server/cli/headless/stall-assessor.d.ts.map +1 -1
- package/dist/server/cli/headless/stall-assessor.js +70 -19
- package/dist/server/cli/headless/stall-assessor.js.map +1 -1
- package/dist/server/cli/headless/tool-watchdog.d.ts +0 -12
- package/dist/server/cli/headless/tool-watchdog.d.ts.map +1 -1
- package/dist/server/cli/headless/tool-watchdog.js +22 -9
- package/dist/server/cli/headless/tool-watchdog.js.map +1 -1
- package/dist/server/cli/headless/types.d.ts +8 -1
- package/dist/server/cli/headless/types.d.ts.map +1 -1
- package/dist/server/cli/improvisation-session-manager.d.ts +16 -0
- package/dist/server/cli/improvisation-session-manager.d.ts.map +1 -1
- package/dist/server/cli/improvisation-session-manager.js +94 -11
- package/dist/server/cli/improvisation-session-manager.js.map +1 -1
- package/dist/server/mcp/bouncer-cli.d.ts +3 -0
- package/dist/server/mcp/bouncer-cli.d.ts.map +1 -0
- package/dist/server/mcp/bouncer-cli.js +54 -0
- package/dist/server/mcp/bouncer-cli.js.map +1 -0
- package/dist/server/services/plan/composer.d.ts +4 -0
- package/dist/server/services/plan/composer.d.ts.map +1 -0
- package/dist/server/services/plan/composer.js +181 -0
- package/dist/server/services/plan/composer.js.map +1 -0
- package/dist/server/services/plan/dependency-resolver.d.ts +28 -0
- package/dist/server/services/plan/dependency-resolver.d.ts.map +1 -0
- package/dist/server/services/plan/dependency-resolver.js +152 -0
- package/dist/server/services/plan/dependency-resolver.js.map +1 -0
- package/dist/server/services/plan/executor.d.ts +91 -0
- package/dist/server/services/plan/executor.d.ts.map +1 -0
- package/dist/server/services/plan/executor.js +545 -0
- package/dist/server/services/plan/executor.js.map +1 -0
- package/dist/server/services/plan/parser.d.ts +11 -0
- package/dist/server/services/plan/parser.d.ts.map +1 -0
- package/dist/server/services/plan/parser.js +415 -0
- package/dist/server/services/plan/parser.js.map +1 -0
- package/dist/server/services/plan/state-reconciler.d.ts +2 -0
- package/dist/server/services/plan/state-reconciler.d.ts.map +1 -0
- package/dist/server/services/plan/state-reconciler.js +105 -0
- package/dist/server/services/plan/state-reconciler.js.map +1 -0
- package/dist/server/services/plan/types.d.ts +120 -0
- package/dist/server/services/plan/types.d.ts.map +1 -0
- package/dist/server/services/plan/types.js +4 -0
- package/dist/server/services/plan/types.js.map +1 -0
- package/dist/server/services/plan/watcher.d.ts +14 -0
- package/dist/server/services/plan/watcher.d.ts.map +1 -0
- package/dist/server/services/plan/watcher.js +69 -0
- package/dist/server/services/plan/watcher.js.map +1 -0
- package/dist/server/services/websocket/file-explorer-handlers.js +20 -0
- package/dist/server/services/websocket/file-explorer-handlers.js.map +1 -1
- package/dist/server/services/websocket/handler.d.ts.map +1 -1
- package/dist/server/services/websocket/handler.js +21 -0
- package/dist/server/services/websocket/handler.js.map +1 -1
- package/dist/server/services/websocket/plan-handlers.d.ts +6 -0
- package/dist/server/services/websocket/plan-handlers.d.ts.map +1 -0
- package/dist/server/services/websocket/plan-handlers.js +494 -0
- package/dist/server/services/websocket/plan-handlers.js.map +1 -0
- package/dist/server/services/websocket/quality-handlers.d.ts.map +1 -1
- package/dist/server/services/websocket/quality-handlers.js +375 -11
- package/dist/server/services/websocket/quality-handlers.js.map +1 -1
- package/dist/server/services/websocket/quality-persistence.d.ts +45 -0
- package/dist/server/services/websocket/quality-persistence.d.ts.map +1 -0
- package/dist/server/services/websocket/quality-persistence.js +187 -0
- package/dist/server/services/websocket/quality-persistence.js.map +1 -0
- package/dist/server/services/websocket/quality-service.d.ts +2 -2
- package/dist/server/services/websocket/quality-service.d.ts.map +1 -1
- package/dist/server/services/websocket/quality-service.js +62 -12
- package/dist/server/services/websocket/quality-service.js.map +1 -1
- package/dist/server/services/websocket/types.d.ts +2 -2
- package/dist/server/services/websocket/types.d.ts.map +1 -1
- package/package.json +2 -2
- package/server/cli/headless/claude-invoker.ts +21 -9
- package/server/cli/headless/headless-logger.ts +78 -0
- package/server/cli/headless/mcp-config.ts +6 -5
- package/server/cli/headless/runner.ts +4 -0
- package/server/cli/headless/stall-assessor.ts +97 -19
- package/server/cli/headless/tool-watchdog.ts +10 -9
- package/server/cli/headless/types.ts +10 -1
- package/server/cli/improvisation-session-manager.ts +118 -11
- package/server/mcp/bouncer-cli.ts +73 -0
- package/server/services/plan/composer.ts +199 -0
- package/server/services/plan/dependency-resolver.ts +179 -0
- package/server/services/plan/executor.ts +604 -0
- package/server/services/plan/parser.ts +459 -0
- package/server/services/plan/state-reconciler.ts +132 -0
- package/server/services/plan/types.ts +164 -0
- package/server/services/plan/watcher.ts +73 -0
- package/server/services/websocket/file-explorer-handlers.ts +20 -0
- package/server/services/websocket/handler.ts +21 -0
- package/server/services/websocket/plan-handlers.ts +592 -0
- package/server/services/websocket/quality-handlers.ts +441 -11
- package/server/services/websocket/quality-persistence.ts +250 -0
- package/server/services/websocket/quality-service.ts +65 -12
- package/server/services/websocket/types.ts +48 -2
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
import { type ChildProcess, spawn } from 'node:child_process';
|
|
11
11
|
import { sanitizeEnvForSandbox } from '../../services/sandbox-utils.js';
|
|
12
|
+
import { herror, hlog } from './headless-logger.js';
|
|
12
13
|
import { generateMcpConfig } from './mcp-config.js';
|
|
13
14
|
import { detectErrorInStderr, } from './output-utils.js';
|
|
14
15
|
import { buildMultimodalMessage } from './prompt-utils.js';
|
|
@@ -115,7 +116,7 @@ async function runStallAssessment(
|
|
|
115
116
|
);
|
|
116
117
|
}
|
|
117
118
|
if (config.verbose) {
|
|
118
|
-
|
|
119
|
+
hlog(`[STALL] Extended by ${Math.round(verdict.extensionMs / 60_000)} min: ${verdict.reason}`);
|
|
119
120
|
}
|
|
120
121
|
return { extensionsGranted: newExtensions, currentKillDeadline: now + verdict.extensionMs };
|
|
121
122
|
}
|
|
@@ -123,11 +124,11 @@ async function runStallAssessment(
|
|
|
123
124
|
`\n[[MSTRO_STALL_CONFIRMED]] Assessment: process likely stalled. ${verdict.reason}.\n`
|
|
124
125
|
);
|
|
125
126
|
if (config.verbose) {
|
|
126
|
-
|
|
127
|
+
hlog(`[STALL] Assessment says stalled: ${verdict.reason}`);
|
|
127
128
|
}
|
|
128
129
|
} catch (err) {
|
|
129
130
|
if (config.verbose) {
|
|
130
|
-
|
|
131
|
+
hlog(`[STALL] Assessment error: ${err}`);
|
|
131
132
|
}
|
|
132
133
|
}
|
|
133
134
|
return null;
|
|
@@ -277,6 +278,8 @@ interface StreamHandlerContext {
|
|
|
277
278
|
currentStepOutputTokens: number;
|
|
278
279
|
/** Timestamp of the last token usage change (tokens still flowing = process alive) */
|
|
279
280
|
lastTokenActivityTime: number;
|
|
281
|
+
/** Claude Code result event stop_reason (e.g., 'end_turn', 'max_tokens') */
|
|
282
|
+
stopReason?: string;
|
|
280
283
|
}
|
|
281
284
|
|
|
282
285
|
function handleSessionCapture(
|
|
@@ -590,9 +593,12 @@ function processStreamEvent(parsed: StreamJson, ctx: StreamHandlerContext): void
|
|
|
590
593
|
return;
|
|
591
594
|
}
|
|
592
595
|
|
|
593
|
-
// Handle result events — extract definitive token usage and surface errors
|
|
596
|
+
// Handle result events — extract definitive token usage, stop_reason, and surface errors
|
|
594
597
|
if (parsed.type === 'result') {
|
|
595
598
|
handleResultTokenUsage(parsed, ctx);
|
|
599
|
+
if (parsed.stop_reason) {
|
|
600
|
+
ctx.stopReason = parsed.stop_reason;
|
|
601
|
+
}
|
|
596
602
|
if (parsed.is_error) {
|
|
597
603
|
const errorMessage = parsed.error || parsed.result || 'Unknown error in result';
|
|
598
604
|
ctx.config.outputCallback?.(`\n[[MSTRO_ERROR:CLAUDE_RESULT_ERROR]] ${errorMessage}\n`);
|
|
@@ -733,7 +739,7 @@ function writeImageAttachmentsToStdin(
|
|
|
733
739
|
): void {
|
|
734
740
|
claudeProcess.stdin!.on('error', (err) => {
|
|
735
741
|
if (config.verbose) {
|
|
736
|
-
|
|
742
|
+
herror('[STDIN] Write error:', err.message);
|
|
737
743
|
}
|
|
738
744
|
config.outputCallback?.(`\n[[MSTRO_ERROR:STDIN_WRITE_FAILED]] Failed to send image data to Claude: ${err.message}\n`);
|
|
739
745
|
});
|
|
@@ -1002,7 +1008,7 @@ function setupToolTracking(
|
|
|
1002
1008
|
/** Log messages when verbose mode is enabled. Extracted to reduce cognitive complexity. */
|
|
1003
1009
|
function verboseLog(verbose: boolean | undefined, ...msgs: string[]): void {
|
|
1004
1010
|
if (verbose) {
|
|
1005
|
-
for (const msg of msgs)
|
|
1011
|
+
for (const msg of msgs) hlog(msg);
|
|
1006
1012
|
}
|
|
1007
1013
|
}
|
|
1008
1014
|
|
|
@@ -1030,12 +1036,17 @@ function spawnAndRegister(
|
|
|
1030
1036
|
`[PERF] Command: ${config.claudeCommand} ${args.join(' ')}`,
|
|
1031
1037
|
);
|
|
1032
1038
|
|
|
1039
|
+
const baseEnv = config.sandboxed
|
|
1040
|
+
? sanitizeEnvForSandbox(process.env, config.workingDir)
|
|
1041
|
+
: { ...process.env };
|
|
1042
|
+
const spawnEnv = config.extraEnv
|
|
1043
|
+
? { ...baseEnv, ...config.extraEnv }
|
|
1044
|
+
: baseEnv;
|
|
1045
|
+
|
|
1033
1046
|
const claudeProcess = spawn(config.claudeCommand, args, {
|
|
1034
1047
|
cwd: config.workingDir,
|
|
1035
1048
|
detached: true,
|
|
1036
|
-
env:
|
|
1037
|
-
? sanitizeEnvForSandbox(process.env, config.workingDir)
|
|
1038
|
-
: { ...process.env },
|
|
1049
|
+
env: spawnEnv,
|
|
1039
1050
|
stdio: [hasImageAttachments ? 'pipe' : 'ignore', 'pipe', 'pipe']
|
|
1040
1051
|
});
|
|
1041
1052
|
|
|
@@ -1210,5 +1221,6 @@ function buildCloseResult(
|
|
|
1210
1221
|
postTimeoutOutput: postTimeout,
|
|
1211
1222
|
resumeBufferedOutput: resumeBuffered,
|
|
1212
1223
|
apiTokenUsage: hasTokenUsage ? { ...ctx.apiTokenUsage } : undefined,
|
|
1224
|
+
stopReason: ctx.stopReason,
|
|
1213
1225
|
};
|
|
1214
1226
|
}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
// Copyright (c) 2025-present Mstro, Inc. All rights reserved.
|
|
2
|
+
// Licensed under the MIT License. See LICENSE file for details.
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Headless Logger
|
|
6
|
+
*
|
|
7
|
+
* Provides AsyncLocalStorage-based logging redirection for headless execution.
|
|
8
|
+
* When background operations (code review, PM compose/execute) run, their
|
|
9
|
+
* console output is redirected to log files under ~/.mstro/logs/ instead of
|
|
10
|
+
* polluting the terminal where the mstro CLI was started.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { AsyncLocalStorage } from 'node:async_hooks';
|
|
14
|
+
import type { WriteStream } from 'node:fs';
|
|
15
|
+
import { createWriteStream, mkdirSync } from 'node:fs';
|
|
16
|
+
import { homedir } from 'node:os';
|
|
17
|
+
import { join } from 'node:path';
|
|
18
|
+
|
|
19
|
+
interface LogTarget {
|
|
20
|
+
log: (...args: unknown[]) => void;
|
|
21
|
+
error: (...args: unknown[]) => void;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
const logContext = new AsyncLocalStorage<LogTarget>();
|
|
25
|
+
|
|
26
|
+
function formatArgs(args: unknown[]): string {
|
|
27
|
+
return args.map(a => (typeof a === 'string' ? a : String(a))).join(' ');
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/** Log a message. Writes to file when inside runWithFileLogger, otherwise to console. */
|
|
31
|
+
export function hlog(...args: unknown[]): void {
|
|
32
|
+
const target = logContext.getStore();
|
|
33
|
+
if (target) {
|
|
34
|
+
target.log(...args);
|
|
35
|
+
} else {
|
|
36
|
+
console.log(...args);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/** Log an error. Writes to file when inside runWithFileLogger, otherwise to console. */
|
|
41
|
+
export function herror(...args: unknown[]): void {
|
|
42
|
+
const target = logContext.getStore();
|
|
43
|
+
if (target) {
|
|
44
|
+
target.error(...args);
|
|
45
|
+
} else {
|
|
46
|
+
console.error(...args);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const LOG_DIR = join(homedir(), '.mstro', 'logs', 'headless');
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Run an async function with all hlog/herror output redirected to a log file.
|
|
54
|
+
* The log file is created at ~/.mstro/logs/headless/{label}-{timestamp}.log.
|
|
55
|
+
*/
|
|
56
|
+
export async function runWithFileLogger<T>(label: string, fn: () => Promise<T>): Promise<T> {
|
|
57
|
+
mkdirSync(LOG_DIR, { recursive: true });
|
|
58
|
+
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
|
59
|
+
const logPath = join(LOG_DIR, `${label}-${timestamp}.log`);
|
|
60
|
+
const stream: WriteStream = createWriteStream(logPath, { flags: 'a' });
|
|
61
|
+
|
|
62
|
+
const target: LogTarget = {
|
|
63
|
+
log: (...args: unknown[]) => {
|
|
64
|
+
stream.write(`[${new Date().toISOString()}] ${formatArgs(args)}\n`);
|
|
65
|
+
},
|
|
66
|
+
error: (...args: unknown[]) => {
|
|
67
|
+
stream.write(`[${new Date().toISOString()}] ERROR: ${formatArgs(args)}\n`);
|
|
68
|
+
},
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
return logContext.run(target, async () => {
|
|
72
|
+
try {
|
|
73
|
+
return await fn();
|
|
74
|
+
} finally {
|
|
75
|
+
stream.end();
|
|
76
|
+
}
|
|
77
|
+
});
|
|
78
|
+
}
|
|
@@ -8,6 +8,7 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
|
|
8
8
|
import { homedir } from 'node:os';
|
|
9
9
|
import { join } from 'node:path';
|
|
10
10
|
import { MCP_SERVER_PATH, MSTRO_ROOT } from '../../utils/paths.js';
|
|
11
|
+
import { herror, hlog } from './headless-logger.js';
|
|
11
12
|
|
|
12
13
|
/**
|
|
13
14
|
* Load user's MCP servers from ~/.claude.json (global + project-level)
|
|
@@ -37,10 +38,10 @@ function loadUserMcpServers(workingDir: string, verbose: boolean): Record<string
|
|
|
37
38
|
}
|
|
38
39
|
|
|
39
40
|
if (verbose) {
|
|
40
|
-
|
|
41
|
+
hlog(`[${new Date().toISOString()}] Loaded ${Object.keys(servers).length} user MCP servers from ~/.claude.json`);
|
|
41
42
|
}
|
|
42
43
|
} catch (parseError: unknown) {
|
|
43
|
-
|
|
44
|
+
herror(`[${new Date().toISOString()}] Failed to parse ~/.claude.json: ${parseError instanceof Error ? parseError.message : String(parseError)}`);
|
|
44
45
|
}
|
|
45
46
|
|
|
46
47
|
return servers;
|
|
@@ -53,7 +54,7 @@ function loadUserMcpServers(workingDir: string, verbose: boolean): Record<string
|
|
|
53
54
|
export function generateMcpConfig(workingDir: string, verbose: boolean = false): string | null {
|
|
54
55
|
try {
|
|
55
56
|
if (!existsSync(MCP_SERVER_PATH)) {
|
|
56
|
-
|
|
57
|
+
herror(`[${new Date().toISOString()}] MCP server not found at ${MCP_SERVER_PATH}`);
|
|
57
58
|
return null;
|
|
58
59
|
}
|
|
59
60
|
|
|
@@ -76,12 +77,12 @@ export function generateMcpConfig(workingDir: string, verbose: boolean = false):
|
|
|
76
77
|
writeFileSync(configPath, JSON.stringify({ mcpServers }, null, 2));
|
|
77
78
|
|
|
78
79
|
if (verbose) {
|
|
79
|
-
|
|
80
|
+
hlog(`[${new Date().toISOString()}] Generated MCP config at ${configPath} (${Object.keys(mcpServers).length} servers)`);
|
|
80
81
|
}
|
|
81
82
|
|
|
82
83
|
return configPath;
|
|
83
84
|
} catch (error: unknown) {
|
|
84
|
-
|
|
85
|
+
herror(`[${new Date().toISOString()}] Failed to generate MCP config: ${error instanceof Error ? error.message : String(error)}`);
|
|
85
86
|
return null;
|
|
86
87
|
}
|
|
87
88
|
}
|
|
@@ -69,6 +69,7 @@ export class HeadlessRunner {
|
|
|
69
69
|
maxAutoRetries: config.maxAutoRetries ?? 2,
|
|
70
70
|
onToolTimeout: config.onToolTimeout,
|
|
71
71
|
sandboxed: config.sandboxed,
|
|
72
|
+
extraEnv: config.extraEnv,
|
|
72
73
|
};
|
|
73
74
|
}
|
|
74
75
|
|
|
@@ -122,6 +123,7 @@ export class HeadlessRunner {
|
|
|
122
123
|
nativeTimeoutCount: result.nativeTimeoutCount,
|
|
123
124
|
postTimeoutOutput: result.postTimeoutOutput,
|
|
124
125
|
resumeBufferedOutput: result.resumeBufferedOutput,
|
|
126
|
+
stopReason: result.stopReason,
|
|
125
127
|
};
|
|
126
128
|
}
|
|
127
129
|
|
|
@@ -150,6 +152,7 @@ export class HeadlessRunner {
|
|
|
150
152
|
nativeTimeoutCount: result.nativeTimeoutCount,
|
|
151
153
|
postTimeoutOutput: result.postTimeoutOutput,
|
|
152
154
|
resumeBufferedOutput: result.resumeBufferedOutput,
|
|
155
|
+
stopReason: result.stopReason,
|
|
153
156
|
};
|
|
154
157
|
}
|
|
155
158
|
|
|
@@ -168,6 +171,7 @@ export class HeadlessRunner {
|
|
|
168
171
|
nativeTimeoutCount: result.nativeTimeoutCount,
|
|
169
172
|
postTimeoutOutput: result.postTimeoutOutput,
|
|
170
173
|
resumeBufferedOutput: result.resumeBufferedOutput,
|
|
174
|
+
stopReason: result.stopReason,
|
|
171
175
|
};
|
|
172
176
|
}
|
|
173
177
|
|
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
*/
|
|
18
18
|
|
|
19
19
|
import { type ChildProcess, spawn } from 'node:child_process';
|
|
20
|
+
import { hlog } from './headless-logger.js';
|
|
20
21
|
|
|
21
22
|
export interface StallContext {
|
|
22
23
|
/** The original user prompt being executed */
|
|
@@ -137,7 +138,7 @@ export async function assessStall(
|
|
|
137
138
|
const quick = quickHeuristic(ctx, toolWatchdogActive);
|
|
138
139
|
if (quick) {
|
|
139
140
|
if (verbose) {
|
|
140
|
-
|
|
141
|
+
hlog(`[STALL-ASSESS] Heuristic verdict: ${quick.reason}`);
|
|
141
142
|
}
|
|
142
143
|
return quick;
|
|
143
144
|
}
|
|
@@ -145,12 +146,12 @@ export async function assessStall(
|
|
|
145
146
|
// Layer 2: Haiku assessment
|
|
146
147
|
try {
|
|
147
148
|
if (verbose) {
|
|
148
|
-
|
|
149
|
+
hlog('[STALL-ASSESS] Running Haiku assessment...');
|
|
149
150
|
}
|
|
150
151
|
return await runHaikuAssessment(ctx, claudeCommand, verbose);
|
|
151
152
|
} catch (err) {
|
|
152
153
|
if (verbose) {
|
|
153
|
-
|
|
154
|
+
hlog(`[STALL-ASSESS] Haiku assessment failed: ${err}`);
|
|
154
155
|
}
|
|
155
156
|
// If Haiku fails (timeout, auth issue, etc.), extend cautiously
|
|
156
157
|
return {
|
|
@@ -220,13 +221,13 @@ export async function assessToolTimeout(
|
|
|
220
221
|
|
|
221
222
|
try {
|
|
222
223
|
if (verbose) {
|
|
223
|
-
|
|
224
|
+
hlog(`[TOOL-ASSESS] Running Haiku assessment for ${toolName} (${elapsedSec}s elapsed)...`);
|
|
224
225
|
}
|
|
225
226
|
|
|
226
227
|
return await spawnHaikuVerdict(prompt, claudeCommand, verbose, 'TOOL-ASSESS');
|
|
227
228
|
} catch (err) {
|
|
228
229
|
if (verbose) {
|
|
229
|
-
|
|
230
|
+
hlog(`[TOOL-ASSESS] Haiku assessment failed: ${err}`);
|
|
230
231
|
}
|
|
231
232
|
// On failure, default to kill (the tool has already exceeded its timeout)
|
|
232
233
|
return {
|
|
@@ -295,7 +296,7 @@ export async function assessContextLoss(
|
|
|
295
296
|
|
|
296
297
|
try {
|
|
297
298
|
if (verbose) {
|
|
298
|
-
|
|
299
|
+
hlog(`[CONTEXT-ASSESS] Running Haiku assessment (${ctx.effectiveTimeouts} timeouts, ${ctx.successfulToolCalls} successes, ${ctx.thinkingOutputLength} thinking chars)...`);
|
|
299
300
|
}
|
|
300
301
|
|
|
301
302
|
const raw = await spawnHaikuRaw(prompt, claudeCommand, verbose, 'CONTEXT-ASSESS');
|
|
@@ -303,13 +304,13 @@ export async function assessContextLoss(
|
|
|
303
304
|
const contextLost = parsed.verdict === 'STALLED';
|
|
304
305
|
|
|
305
306
|
if (verbose) {
|
|
306
|
-
|
|
307
|
+
hlog(`[CONTEXT-ASSESS] Verdict: ${contextLost ? 'LOST' : 'CONTINUED'} — ${parsed.reason}`);
|
|
307
308
|
}
|
|
308
309
|
|
|
309
310
|
return { contextLost, reason: parsed.reason };
|
|
310
311
|
} catch (err) {
|
|
311
312
|
if (verbose) {
|
|
312
|
-
|
|
313
|
+
hlog(`[CONTEXT-ASSESS] Haiku assessment failed: ${err}`);
|
|
313
314
|
}
|
|
314
315
|
// On failure, assume context was lost (safer to retry than to show a confused response)
|
|
315
316
|
return {
|
|
@@ -419,7 +420,7 @@ function spawnHaikuRaw(
|
|
|
419
420
|
|
|
420
421
|
proc.stderr!.on('data', (data) => {
|
|
421
422
|
if (verbose) {
|
|
422
|
-
|
|
423
|
+
hlog(`[${label}] haiku stderr: ${data.toString().trim()}`);
|
|
423
424
|
}
|
|
424
425
|
});
|
|
425
426
|
|
|
@@ -434,7 +435,7 @@ function spawnHaikuRaw(
|
|
|
434
435
|
}
|
|
435
436
|
|
|
436
437
|
if (verbose) {
|
|
437
|
-
|
|
438
|
+
hlog(`[${label}] Haiku response: ${stdout.trim()}`);
|
|
438
439
|
}
|
|
439
440
|
|
|
440
441
|
resolve(stdout.trim());
|
|
@@ -521,7 +522,7 @@ export async function assessApproval(
|
|
|
521
522
|
|
|
522
523
|
try {
|
|
523
524
|
if (verbose) {
|
|
524
|
-
|
|
525
|
+
hlog('[APPROVAL-ASSESS] Running Haiku assessment...');
|
|
525
526
|
}
|
|
526
527
|
|
|
527
528
|
const raw = await spawnHaikuRaw(prompt, claudeCommand, verbose, 'APPROVAL-ASSESS');
|
|
@@ -529,19 +530,96 @@ export async function assessApproval(
|
|
|
529
530
|
const isApproval = parsed.verdict.includes('APPROVAL');
|
|
530
531
|
|
|
531
532
|
if (verbose) {
|
|
532
|
-
|
|
533
|
+
hlog(`[APPROVAL-ASSESS] Verdict: ${isApproval ? 'APPROVAL' : 'NEW_TASK'} — ${parsed.reason}`);
|
|
533
534
|
}
|
|
534
535
|
|
|
535
536
|
return { isApproval, reason: parsed.reason };
|
|
536
537
|
} catch (err) {
|
|
537
538
|
if (verbose) {
|
|
538
|
-
|
|
539
|
+
hlog(`[APPROVAL-ASSESS] Haiku assessment failed: ${err}`);
|
|
539
540
|
}
|
|
540
541
|
// On failure, assume not an approval (safer to treat as new task)
|
|
541
542
|
return { isApproval: false, reason: `Assessment failed: ${err}` };
|
|
542
543
|
}
|
|
543
544
|
}
|
|
544
545
|
|
|
546
|
+
// ========== Premature Completion Assessment ==========
|
|
547
|
+
|
|
548
|
+
export interface PrematureCompletionContext {
|
|
549
|
+
/** The trailing portion of the assistant response (last ~800 chars) */
|
|
550
|
+
responseTail: string;
|
|
551
|
+
/** Total number of successful tool calls in this execution */
|
|
552
|
+
successfulToolCalls: number;
|
|
553
|
+
/** Whether extended thinking output was produced */
|
|
554
|
+
hasThinking: boolean;
|
|
555
|
+
/** Total response length */
|
|
556
|
+
responseLength: number;
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
export interface PrematureCompletionVerdict {
|
|
560
|
+
/** True if the task appears incomplete and should be auto-continued */
|
|
561
|
+
isIncomplete: boolean;
|
|
562
|
+
reason: string;
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
/**
|
|
566
|
+
* Assess whether a completed Claude execution ended prematurely.
|
|
567
|
+
* Called when stop_reason is 'end_turn' but the task may not be finished.
|
|
568
|
+
* Haiku determines if the trailing response text indicates planned-but-unexecuted work.
|
|
569
|
+
*/
|
|
570
|
+
export async function assessPrematureCompletion(
|
|
571
|
+
ctx: PrematureCompletionContext,
|
|
572
|
+
claudeCommand: string,
|
|
573
|
+
verbose: boolean,
|
|
574
|
+
): Promise<PrematureCompletionVerdict> {
|
|
575
|
+
const prompt = [
|
|
576
|
+
'You are analyzing the FINAL output of a Claude Code agent that just exited normally.',
|
|
577
|
+
'Determine whether the agent finished its task or stopped prematurely mid-work.',
|
|
578
|
+
'',
|
|
579
|
+
'Session signals:',
|
|
580
|
+
`- ${ctx.successfulToolCalls} tool calls completed successfully`,
|
|
581
|
+
`- Response length: ${ctx.responseLength} characters`,
|
|
582
|
+
`- Extended thinking: ${ctx.hasThinking ? 'YES' : 'NO'}`,
|
|
583
|
+
'',
|
|
584
|
+
`Final response text (last ${ctx.responseTail.length} chars):`,
|
|
585
|
+
ctx.responseTail,
|
|
586
|
+
'',
|
|
587
|
+
'INCOMPLETE signals: "Now I\'ll...", "Let me fix...", "Next I\'ll...", "Moving on to...",',
|
|
588
|
+
'"I\'ll continue with...", announcing next steps that were never executed,',
|
|
589
|
+
'describing work that will happen next but no tool call followed.',
|
|
590
|
+
'',
|
|
591
|
+
'COMPLETE signals: summarizing what was done, confirming changes, reporting results,',
|
|
592
|
+
'asking the user a question, past-tense descriptions of completed work,',
|
|
593
|
+
'"all done", "changes applied", referencing finished state.',
|
|
594
|
+
'',
|
|
595
|
+
'Respond in EXACTLY this format (2 lines, no extra text):',
|
|
596
|
+
'VERDICT: COMPLETE or INCOMPLETE',
|
|
597
|
+
'REASON: <brief one-line explanation>',
|
|
598
|
+
].join('\n');
|
|
599
|
+
|
|
600
|
+
try {
|
|
601
|
+
if (verbose) {
|
|
602
|
+
hlog(`[PREMATURE-ASSESS] Running Haiku assessment (${ctx.successfulToolCalls} tools, ${ctx.responseLength} chars)...`);
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
const raw = await spawnHaikuRaw(prompt, claudeCommand, verbose, 'PREMATURE-ASSESS');
|
|
606
|
+
const parsed = parseVerdictResponse(raw);
|
|
607
|
+
const isIncomplete = parsed.verdict.includes('INCOMPLETE');
|
|
608
|
+
|
|
609
|
+
if (verbose) {
|
|
610
|
+
hlog(`[PREMATURE-ASSESS] Verdict: ${isIncomplete ? 'INCOMPLETE' : 'COMPLETE'} — ${parsed.reason}`);
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
return { isIncomplete, reason: parsed.reason };
|
|
614
|
+
} catch (err) {
|
|
615
|
+
if (verbose) {
|
|
616
|
+
hlog(`[PREMATURE-ASSESS] Haiku assessment failed: ${err}`);
|
|
617
|
+
}
|
|
618
|
+
// On failure, don't retry — safer to let the user decide than to auto-continue incorrectly
|
|
619
|
+
return { isIncomplete: false, reason: `Assessment failed: ${err}` };
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
|
|
545
623
|
// ========== Best Result Comparison ==========
|
|
546
624
|
|
|
547
625
|
export interface BestResultContext {
|
|
@@ -602,7 +680,7 @@ export async function assessBestResult(
|
|
|
602
680
|
|
|
603
681
|
try {
|
|
604
682
|
if (verbose) {
|
|
605
|
-
|
|
683
|
+
hlog('[BEST-RESULT] Running Haiku assessment...');
|
|
606
684
|
}
|
|
607
685
|
|
|
608
686
|
const raw = await spawnHaikuRaw(prompt, claudeCommand, verbose, 'BEST-RESULT');
|
|
@@ -610,13 +688,13 @@ export async function assessBestResult(
|
|
|
610
688
|
const winner: 'A' | 'B' = parsed.verdict.includes('B') ? 'B' : 'A';
|
|
611
689
|
|
|
612
690
|
if (verbose) {
|
|
613
|
-
|
|
691
|
+
hlog(`[BEST-RESULT] Verdict: ${winner} — ${parsed.reason}`);
|
|
614
692
|
}
|
|
615
693
|
|
|
616
694
|
return { winner, reason: parsed.reason };
|
|
617
695
|
} catch (err) {
|
|
618
696
|
if (verbose) {
|
|
619
|
-
|
|
697
|
+
hlog(`[BEST-RESULT] Haiku assessment failed: ${err}`);
|
|
620
698
|
}
|
|
621
699
|
// On failure, prefer A (the previously-tracked best result)
|
|
622
700
|
return { winner: 'A', reason: `Assessment failed: ${err}` };
|
|
@@ -671,7 +749,7 @@ export async function classifyError(
|
|
|
671
749
|
|
|
672
750
|
try {
|
|
673
751
|
if (verbose) {
|
|
674
|
-
|
|
752
|
+
hlog('[ERROR-CLASSIFY] Running Haiku assessment...');
|
|
675
753
|
}
|
|
676
754
|
|
|
677
755
|
const raw = await spawnHaikuRaw(prompt, claudeCommand, verbose, 'ERROR-CLASSIFY');
|
|
@@ -691,13 +769,13 @@ export async function classifyError(
|
|
|
691
769
|
if (category === 'UNKNOWN' || !message) return null;
|
|
692
770
|
|
|
693
771
|
if (verbose) {
|
|
694
|
-
|
|
772
|
+
hlog(`[ERROR-CLASSIFY] Verdict: ${category} — ${message}`);
|
|
695
773
|
}
|
|
696
774
|
|
|
697
775
|
return { errorCode: category, message };
|
|
698
776
|
} catch (err) {
|
|
699
777
|
if (verbose) {
|
|
700
|
-
|
|
778
|
+
hlog(`[ERROR-CLASSIFY] Haiku assessment failed: ${err}`);
|
|
701
779
|
}
|
|
702
780
|
return null;
|
|
703
781
|
}
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
* 3. Haiku tiebreaker: optional AI assessment before killing ambiguous cases
|
|
15
15
|
*/
|
|
16
16
|
|
|
17
|
+
import { hlog } from './headless-logger.js';
|
|
17
18
|
import type {
|
|
18
19
|
ExecutionCheckpoint,
|
|
19
20
|
ToolDurationTracker,
|
|
@@ -167,7 +168,7 @@ export class ToolWatchdog {
|
|
|
167
168
|
sampleCount: 1,
|
|
168
169
|
});
|
|
169
170
|
if (this.verbose) {
|
|
170
|
-
|
|
171
|
+
hlog(`[WATCHDOG] ${toolName}: first sample ${durationMs}ms, initial timeout ${this.getTimeout(toolName)}ms`);
|
|
171
172
|
}
|
|
172
173
|
return;
|
|
173
174
|
}
|
|
@@ -178,7 +179,7 @@ export class ToolWatchdog {
|
|
|
178
179
|
tracker.sampleCount++;
|
|
179
180
|
|
|
180
181
|
if (this.verbose) {
|
|
181
|
-
|
|
182
|
+
hlog(`[WATCHDOG] ${toolName}: sample #${tracker.sampleCount} ${durationMs}ms, est=${Math.round(tracker.estimatedDuration)}ms, dev=${Math.round(tracker.deviation)}ms, timeout=${this.getTimeout(toolName)}ms`);
|
|
182
183
|
}
|
|
183
184
|
}
|
|
184
185
|
|
|
@@ -208,7 +209,7 @@ export class ToolWatchdog {
|
|
|
208
209
|
const profile = this.getProfile(toolName);
|
|
209
210
|
|
|
210
211
|
if (this.verbose) {
|
|
211
|
-
|
|
212
|
+
hlog(`[WATCHDOG] Starting watch: ${toolName} (${toolId}), timeout=${Math.round(timeoutMs / 1000)}s`);
|
|
212
213
|
}
|
|
213
214
|
|
|
214
215
|
const timer = setTimeout(async () => {
|
|
@@ -245,7 +246,7 @@ export class ToolWatchdog {
|
|
|
245
246
|
|
|
246
247
|
if (!profile.useHaikuTiebreaker || !this.onTiebreaker || watch.tiebreakerAttempted) {
|
|
247
248
|
if (this.verbose) {
|
|
248
|
-
|
|
249
|
+
hlog(`[WATCHDOG] ${toolName} (${toolId}) timed out after ${Math.round(elapsedMs / 1000)}s, killing`);
|
|
249
250
|
}
|
|
250
251
|
return false;
|
|
251
252
|
}
|
|
@@ -265,7 +266,7 @@ export class ToolWatchdog {
|
|
|
265
266
|
watch.tiebreakerAttempted = true;
|
|
266
267
|
|
|
267
268
|
if (this.verbose) {
|
|
268
|
-
|
|
269
|
+
hlog(`[WATCHDOG] ${toolName} (${toolId}) hit timeout after ${Math.round(elapsedMs / 1000)}s, running tiebreaker...`);
|
|
269
270
|
}
|
|
270
271
|
|
|
271
272
|
try {
|
|
@@ -274,7 +275,7 @@ export class ToolWatchdog {
|
|
|
274
275
|
|
|
275
276
|
if (verdict.action === 'extend') {
|
|
276
277
|
if (this.verbose) {
|
|
277
|
-
|
|
278
|
+
hlog(`[WATCHDOG] Tiebreaker: extend ${toolName} by ${Math.round(verdict.extensionMs / 1000)}s — ${verdict.reason}`);
|
|
278
279
|
}
|
|
279
280
|
this.scheduleExtensionTimeout(watch, toolId, toolName, verdict.extensionMs, onTimeout);
|
|
280
281
|
watch.timeoutMs = elapsedMs + verdict.extensionMs;
|
|
@@ -282,11 +283,11 @@ export class ToolWatchdog {
|
|
|
282
283
|
}
|
|
283
284
|
|
|
284
285
|
if (this.verbose) {
|
|
285
|
-
|
|
286
|
+
hlog(`[WATCHDOG] Tiebreaker: kill ${toolName} — ${verdict.reason}`);
|
|
286
287
|
}
|
|
287
288
|
} catch (err) {
|
|
288
289
|
if (this.verbose) {
|
|
289
|
-
|
|
290
|
+
hlog(`[WATCHDOG] Tiebreaker failed: ${err}, proceeding with kill`);
|
|
290
291
|
}
|
|
291
292
|
}
|
|
292
293
|
|
|
@@ -305,7 +306,7 @@ export class ToolWatchdog {
|
|
|
305
306
|
const w = this.activeWatches.get(toolId);
|
|
306
307
|
if (!w) return;
|
|
307
308
|
if (this.verbose) {
|
|
308
|
-
|
|
309
|
+
hlog(`[WATCHDOG] ${toolName} (${toolId}) still running after extension, killing`);
|
|
309
310
|
}
|
|
310
311
|
// Don't delete the watch — buildCheckpoint() needs it.
|
|
311
312
|
// handleToolTimeout() calls clearAll() after building the checkpoint.
|
|
@@ -121,6 +121,8 @@ export interface HeadlessConfig {
|
|
|
121
121
|
onToolTimeout?: (checkpoint: ExecutionCheckpoint) => void;
|
|
122
122
|
/** When true, spawn Claude with sanitized env (strips secrets, HOME=workingDir) */
|
|
123
123
|
sandboxed?: boolean;
|
|
124
|
+
/** Extra environment variables to merge into the spawned Claude process env */
|
|
125
|
+
extraEnv?: Record<string, string>;
|
|
124
126
|
}
|
|
125
127
|
|
|
126
128
|
export interface SessionState {
|
|
@@ -165,6 +167,8 @@ export interface SessionResult {
|
|
|
165
167
|
/** Assistant text buffered during resume assessment — held back until thinking/tool activity
|
|
166
168
|
* confirms Claude has context. Undefined when not in resume mode or buffer was flushed. */
|
|
167
169
|
resumeBufferedOutput?: string;
|
|
170
|
+
/** Claude Code result event stop_reason: 'end_turn', 'max_tokens', or undefined if not captured */
|
|
171
|
+
stopReason?: string;
|
|
168
172
|
}
|
|
169
173
|
|
|
170
174
|
export interface ToolUseAccumulator {
|
|
@@ -200,10 +204,12 @@ export interface ExecutionResult {
|
|
|
200
204
|
resumeBufferedOutput?: string;
|
|
201
205
|
/** Actual API token usage from Claude Code stream events (summed across all turns) */
|
|
202
206
|
apiTokenUsage?: { inputTokens: number; outputTokens: number };
|
|
207
|
+
/** Claude Code result event stop_reason: 'end_turn', 'max_tokens', or undefined if not captured */
|
|
208
|
+
stopReason?: string;
|
|
203
209
|
}
|
|
204
210
|
|
|
205
211
|
/** Resolved config with all defaults applied */
|
|
206
|
-
export type ResolvedHeadlessConfig = Omit<Required<HeadlessConfig>, 'outputCallback' | 'thinkingCallback' | 'toolUseCallback' | 'tokenUsageCallback' | 'continueSession' | 'claudeSessionId' | 'imageAttachments' | 'model' | 'toolTimeoutProfiles' | 'onToolTimeout' | 'sandboxed'> & {
|
|
212
|
+
export type ResolvedHeadlessConfig = Omit<Required<HeadlessConfig>, 'outputCallback' | 'thinkingCallback' | 'toolUseCallback' | 'tokenUsageCallback' | 'continueSession' | 'claudeSessionId' | 'imageAttachments' | 'model' | 'toolTimeoutProfiles' | 'onToolTimeout' | 'sandboxed' | 'extraEnv'> & {
|
|
207
213
|
outputCallback?: (text: string) => void;
|
|
208
214
|
thinkingCallback?: (text: string) => void;
|
|
209
215
|
toolUseCallback?: (event: ToolUseEvent) => void;
|
|
@@ -215,4 +221,7 @@ export type ResolvedHeadlessConfig = Omit<Required<HeadlessConfig>, 'outputCallb
|
|
|
215
221
|
toolTimeoutProfiles?: Record<string, Partial<ToolTimeoutProfile>>;
|
|
216
222
|
onToolTimeout?: (checkpoint: ExecutionCheckpoint) => void;
|
|
217
223
|
sandboxed?: boolean;
|
|
224
|
+
extraEnv?: Record<string, string>;
|
|
218
225
|
};
|
|
226
|
+
|
|
227
|
+
|