mstro-app 0.1.58 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. package/PRIVACY.md +126 -0
  2. package/README.md +24 -23
  3. package/bin/commands/login.js +85 -42
  4. package/bin/commands/logout.js +35 -1
  5. package/bin/commands/status.js +1 -1
  6. package/bin/mstro.js +231 -131
  7. package/dist/server/cli/headless/claude-invoker.d.ts.map +1 -1
  8. package/dist/server/cli/headless/claude-invoker.js +550 -115
  9. package/dist/server/cli/headless/claude-invoker.js.map +1 -1
  10. package/dist/server/cli/headless/index.d.ts +2 -1
  11. package/dist/server/cli/headless/index.d.ts.map +1 -1
  12. package/dist/server/cli/headless/index.js +2 -0
  13. package/dist/server/cli/headless/index.js.map +1 -1
  14. package/dist/server/cli/headless/prompt-utils.d.ts +5 -8
  15. package/dist/server/cli/headless/prompt-utils.d.ts.map +1 -1
  16. package/dist/server/cli/headless/prompt-utils.js +40 -5
  17. package/dist/server/cli/headless/prompt-utils.js.map +1 -1
  18. package/dist/server/cli/headless/runner.d.ts +1 -1
  19. package/dist/server/cli/headless/runner.d.ts.map +1 -1
  20. package/dist/server/cli/headless/runner.js +52 -7
  21. package/dist/server/cli/headless/runner.js.map +1 -1
  22. package/dist/server/cli/headless/stall-assessor.d.ts +79 -1
  23. package/dist/server/cli/headless/stall-assessor.d.ts.map +1 -1
  24. package/dist/server/cli/headless/stall-assessor.js +355 -20
  25. package/dist/server/cli/headless/stall-assessor.js.map +1 -1
  26. package/dist/server/cli/headless/tool-watchdog.d.ts +70 -0
  27. package/dist/server/cli/headless/tool-watchdog.d.ts.map +1 -0
  28. package/dist/server/cli/headless/tool-watchdog.js +302 -0
  29. package/dist/server/cli/headless/tool-watchdog.js.map +1 -0
  30. package/dist/server/cli/headless/types.d.ts +98 -1
  31. package/dist/server/cli/headless/types.d.ts.map +1 -1
  32. package/dist/server/cli/improvisation-session-manager.d.ts +136 -2
  33. package/dist/server/cli/improvisation-session-manager.d.ts.map +1 -1
  34. package/dist/server/cli/improvisation-session-manager.js +929 -132
  35. package/dist/server/cli/improvisation-session-manager.js.map +1 -1
  36. package/dist/server/index.js +5 -13
  37. package/dist/server/index.js.map +1 -1
  38. package/dist/server/mcp/bouncer-integration.d.ts.map +1 -1
  39. package/dist/server/mcp/bouncer-integration.js +18 -0
  40. package/dist/server/mcp/bouncer-integration.js.map +1 -1
  41. package/dist/server/mcp/security-audit.d.ts +2 -2
  42. package/dist/server/mcp/security-audit.d.ts.map +1 -1
  43. package/dist/server/mcp/security-audit.js +12 -8
  44. package/dist/server/mcp/security-audit.js.map +1 -1
  45. package/dist/server/mcp/security-patterns.d.ts.map +1 -1
  46. package/dist/server/mcp/security-patterns.js +9 -4
  47. package/dist/server/mcp/security-patterns.js.map +1 -1
  48. package/dist/server/routes/improvise.js +6 -6
  49. package/dist/server/routes/improvise.js.map +1 -1
  50. package/dist/server/services/analytics.d.ts +2 -0
  51. package/dist/server/services/analytics.d.ts.map +1 -1
  52. package/dist/server/services/analytics.js +26 -4
  53. package/dist/server/services/analytics.js.map +1 -1
  54. package/dist/server/services/platform.d.ts.map +1 -1
  55. package/dist/server/services/platform.js +17 -10
  56. package/dist/server/services/platform.js.map +1 -1
  57. package/dist/server/services/sandbox-utils.d.ts +6 -0
  58. package/dist/server/services/sandbox-utils.d.ts.map +1 -0
  59. package/dist/server/services/sandbox-utils.js +72 -0
  60. package/dist/server/services/sandbox-utils.js.map +1 -0
  61. package/dist/server/services/settings.d.ts +6 -0
  62. package/dist/server/services/settings.d.ts.map +1 -1
  63. package/dist/server/services/settings.js +21 -0
  64. package/dist/server/services/settings.js.map +1 -1
  65. package/dist/server/services/terminal/pty-manager.d.ts +5 -51
  66. package/dist/server/services/terminal/pty-manager.d.ts.map +1 -1
  67. package/dist/server/services/terminal/pty-manager.js +63 -102
  68. package/dist/server/services/terminal/pty-manager.js.map +1 -1
  69. package/dist/server/services/websocket/file-explorer-handlers.d.ts +5 -0
  70. package/dist/server/services/websocket/file-explorer-handlers.d.ts.map +1 -0
  71. package/dist/server/services/websocket/file-explorer-handlers.js +518 -0
  72. package/dist/server/services/websocket/file-explorer-handlers.js.map +1 -0
  73. package/dist/server/services/websocket/git-handlers.d.ts +36 -0
  74. package/dist/server/services/websocket/git-handlers.d.ts.map +1 -0
  75. package/dist/server/services/websocket/git-handlers.js +797 -0
  76. package/dist/server/services/websocket/git-handlers.js.map +1 -0
  77. package/dist/server/services/websocket/git-pr-handlers.d.ts +4 -0
  78. package/dist/server/services/websocket/git-pr-handlers.d.ts.map +1 -0
  79. package/dist/server/services/websocket/git-pr-handlers.js +299 -0
  80. package/dist/server/services/websocket/git-pr-handlers.js.map +1 -0
  81. package/dist/server/services/websocket/git-worktree-handlers.d.ts +4 -0
  82. package/dist/server/services/websocket/git-worktree-handlers.d.ts.map +1 -0
  83. package/dist/server/services/websocket/git-worktree-handlers.js +353 -0
  84. package/dist/server/services/websocket/git-worktree-handlers.js.map +1 -0
  85. package/dist/server/services/websocket/handler-context.d.ts +32 -0
  86. package/dist/server/services/websocket/handler-context.d.ts.map +1 -0
  87. package/dist/server/services/websocket/handler-context.js +4 -0
  88. package/dist/server/services/websocket/handler-context.js.map +1 -0
  89. package/dist/server/services/websocket/handler.d.ts +27 -338
  90. package/dist/server/services/websocket/handler.d.ts.map +1 -1
  91. package/dist/server/services/websocket/handler.js +74 -2106
  92. package/dist/server/services/websocket/handler.js.map +1 -1
  93. package/dist/server/services/websocket/index.d.ts +1 -1
  94. package/dist/server/services/websocket/index.d.ts.map +1 -1
  95. package/dist/server/services/websocket/index.js.map +1 -1
  96. package/dist/server/services/websocket/session-handlers.d.ts +10 -0
  97. package/dist/server/services/websocket/session-handlers.d.ts.map +1 -0
  98. package/dist/server/services/websocket/session-handlers.js +507 -0
  99. package/dist/server/services/websocket/session-handlers.js.map +1 -0
  100. package/dist/server/services/websocket/settings-handlers.d.ts +6 -0
  101. package/dist/server/services/websocket/settings-handlers.d.ts.map +1 -0
  102. package/dist/server/services/websocket/settings-handlers.js +125 -0
  103. package/dist/server/services/websocket/settings-handlers.js.map +1 -0
  104. package/dist/server/services/websocket/tab-handlers.d.ts +10 -0
  105. package/dist/server/services/websocket/tab-handlers.d.ts.map +1 -0
  106. package/dist/server/services/websocket/tab-handlers.js +131 -0
  107. package/dist/server/services/websocket/tab-handlers.js.map +1 -0
  108. package/dist/server/services/websocket/terminal-handlers.d.ts +9 -0
  109. package/dist/server/services/websocket/terminal-handlers.d.ts.map +1 -0
  110. package/dist/server/services/websocket/terminal-handlers.js +220 -0
  111. package/dist/server/services/websocket/terminal-handlers.js.map +1 -0
  112. package/dist/server/services/websocket/types.d.ts +67 -2
  113. package/dist/server/services/websocket/types.d.ts.map +1 -1
  114. package/hooks/bouncer.sh +11 -4
  115. package/package.json +7 -2
  116. package/server/README.md +176 -159
  117. package/server/cli/headless/claude-invoker.ts +740 -133
  118. package/server/cli/headless/index.ts +7 -1
  119. package/server/cli/headless/output-utils.test.ts +225 -0
  120. package/server/cli/headless/prompt-utils.ts +37 -5
  121. package/server/cli/headless/runner.ts +55 -8
  122. package/server/cli/headless/stall-assessor.test.ts +165 -0
  123. package/server/cli/headless/stall-assessor.ts +478 -22
  124. package/server/cli/headless/tool-watchdog.test.ts +429 -0
  125. package/server/cli/headless/tool-watchdog.ts +398 -0
  126. package/server/cli/headless/types.ts +93 -1
  127. package/server/cli/improvisation-session-manager.ts +1133 -145
  128. package/server/index.ts +5 -14
  129. package/server/mcp/README.md +59 -67
  130. package/server/mcp/bouncer-integration.test.ts +161 -0
  131. package/server/mcp/bouncer-integration.ts +28 -0
  132. package/server/mcp/security-audit.ts +12 -8
  133. package/server/mcp/security-patterns.test.ts +258 -0
  134. package/server/mcp/security-patterns.ts +8 -2
  135. package/server/routes/improvise.ts +6 -6
  136. package/server/services/analytics.ts +26 -4
  137. package/server/services/platform.test.ts +0 -10
  138. package/server/services/platform.ts +16 -11
  139. package/server/services/sandbox-utils.ts +78 -0
  140. package/server/services/settings.ts +25 -0
  141. package/server/services/terminal/pty-manager.ts +68 -129
  142. package/server/services/websocket/autocomplete.test.ts +194 -0
  143. package/server/services/websocket/file-explorer-handlers.ts +587 -0
  144. package/server/services/websocket/git-handlers.ts +924 -0
  145. package/server/services/websocket/git-pr-handlers.ts +363 -0
  146. package/server/services/websocket/git-worktree-handlers.ts +403 -0
  147. package/server/services/websocket/handler-context.ts +44 -0
  148. package/server/services/websocket/handler.test.ts +1 -1
  149. package/server/services/websocket/handler.ts +90 -2421
  150. package/server/services/websocket/index.ts +1 -1
  151. package/server/services/websocket/session-handlers.ts +574 -0
  152. package/server/services/websocket/settings-handlers.ts +150 -0
  153. package/server/services/websocket/tab-handlers.ts +150 -0
  154. package/server/services/websocket/terminal-handlers.ts +277 -0
  155. package/server/services/websocket/types.ts +145 -4
  156. package/bin/release.sh +0 -110
  157. package/dist/server/services/terminal/tmux-manager.d.ts +0 -82
  158. package/dist/server/services/terminal/tmux-manager.d.ts.map +0 -1
  159. package/dist/server/services/terminal/tmux-manager.js +0 -352
  160. package/dist/server/services/terminal/tmux-manager.js.map +0 -1
  161. package/server/services/terminal/tmux-manager.ts +0 -426
@@ -26,14 +26,20 @@ export {
26
26
  } from './prompt-utils.js';
27
27
  // Main runner class
28
28
  export { HeadlessRunner } from './runner.js';
29
+ // Tool watchdog
30
+ export { ToolWatchdog } from './tool-watchdog.js';
29
31
  // Types
30
32
  export type {
33
+ ExecutionCheckpoint,
31
34
  ExecutionResult,
32
35
  HeadlessConfig,
33
36
  ImageAttachment,
34
- ResolvedHeadlessConfig,
37
+ PendingToolMap,
38
+ ResolvedHeadlessConfig,
35
39
  SessionResult,
36
40
  SessionState,
41
+ ToolDurationTracker,
42
+ ToolTimeoutProfile,
37
43
  ToolUseAccumulator,
38
44
  ToolUseEvent
39
45
  } from './types.js';
@@ -0,0 +1,225 @@
1
+ import { describe, expect, it } from 'vitest';
2
+ import {
3
+ detectErrorInStderr,
4
+ estimateTokensFromOutput,
5
+ extractCleanOutput,
6
+ extractModifiedFiles,
7
+ } from './output-utils.js';
8
+
9
+ // ========== extractCleanOutput ==========
10
+
11
+ describe('extractCleanOutput', () => {
12
+ it('filters out JSON lines with "type" field', () => {
13
+ const input = [
14
+ '{"type": "system", "data": "init"}',
15
+ 'Hello world',
16
+ '{"type": "assistant", "text": "hi"}',
17
+ 'Some output',
18
+ ].join('\n');
19
+ expect(extractCleanOutput(input)).toBe('Hello world\nSome output');
20
+ });
21
+
22
+ it('strips ANSI color codes', () => {
23
+ const input = '\x1b[32mgreen text\x1b[0m and \x1b[1;31mred bold\x1b[0m';
24
+ expect(extractCleanOutput(input)).toBe('green text and red bold');
25
+ });
26
+
27
+ it('normalizes CRLF to LF', () => {
28
+ const input = 'line1\r\nline2\r\nline3';
29
+ expect(extractCleanOutput(input)).toBe('line1\nline2\nline3');
30
+ });
31
+
32
+ it('trims whitespace', () => {
33
+ const input = ' \n Hello \n ';
34
+ expect(extractCleanOutput(input)).toBe('Hello');
35
+ });
36
+
37
+ it('filters empty lines', () => {
38
+ const input = 'line1\n\n\nline2';
39
+ expect(extractCleanOutput(input)).toBe('line1\nline2');
40
+ });
41
+
42
+ it('returns empty string for all-JSON input', () => {
43
+ const input = '{"type": "system"}\n{"type": "result"}';
44
+ expect(extractCleanOutput(input)).toBe('');
45
+ });
46
+
47
+ it('handles combined ANSI + JSON + CRLF', () => {
48
+ const input = '{"type": "system"}\r\n\x1b[33mwarning\x1b[0m\r\n{"type": "result"}';
49
+ expect(extractCleanOutput(input)).toBe('warning');
50
+ });
51
+ });
52
+
53
+ // ========== estimateTokensFromOutput ==========
54
+
55
+ describe('estimateTokensFromOutput', () => {
56
+ it('estimates tokens as length / 4', () => {
57
+ expect(estimateTokensFromOutput('12345678')).toBe(2);
58
+ expect(estimateTokensFromOutput('1234')).toBe(1);
59
+ });
60
+
61
+ it('floors the result', () => {
62
+ expect(estimateTokensFromOutput('12345')).toBe(1); // 5/4 = 1.25 → 1
63
+ expect(estimateTokensFromOutput('123')).toBe(0); // 3/4 = 0.75 → 0
64
+ });
65
+
66
+ it('returns 0 for empty string', () => {
67
+ expect(estimateTokensFromOutput('')).toBe(0);
68
+ });
69
+ });
70
+
71
+ // ========== extractModifiedFiles ==========
72
+
73
+ describe('extractModifiedFiles', () => {
74
+ it('extracts files from "wrote" pattern', () => {
75
+ const output = 'wrote file "src/index.ts" successfully';
76
+ expect(extractModifiedFiles(output)).toContain('src/index.ts');
77
+ });
78
+
79
+ it('extracts files from "modified" pattern', () => {
80
+ const output = 'modified utils.js in place';
81
+ expect(extractModifiedFiles(output)).toContain('utils.js');
82
+ });
83
+
84
+ it('extracts files from "created" pattern', () => {
85
+ const output = "created file 'new-file.tsx'";
86
+ expect(extractModifiedFiles(output)).toContain('new-file.tsx');
87
+ });
88
+
89
+ it('extracts files from "edited" pattern', () => {
90
+ const output = 'edited config.json';
91
+ expect(extractModifiedFiles(output)).toContain('config.json');
92
+ });
93
+
94
+ it('deduplicates files', () => {
95
+ const output = 'wrote src/index.ts\nmodified src/index.ts';
96
+ const files = extractModifiedFiles(output);
97
+ expect(files.filter(f => f === 'src/index.ts')).toHaveLength(1);
98
+ });
99
+
100
+ it('returns empty array when no files found', () => {
101
+ expect(extractModifiedFiles('no files mentioned here')).toEqual([]);
102
+ });
103
+
104
+ it('extracts multiple different files', () => {
105
+ const output = 'wrote src/a.ts\ncreated src/b.ts\nedited src/c.ts';
106
+ const files = extractModifiedFiles(output);
107
+ expect(files).toContain('src/a.ts');
108
+ expect(files).toContain('src/b.ts');
109
+ expect(files).toContain('src/c.ts');
110
+ });
111
+ });
112
+
113
+ // ========== detectErrorInStderr ==========
114
+
115
+ describe('detectErrorInStderr', () => {
116
+ it('detects auth errors', () => {
117
+ const result = detectErrorInStderr('Error: not logged in to Claude');
118
+ expect(result).not.toBeNull();
119
+ expect(result!.errorCode).toBe('AUTH_REQUIRED');
120
+ });
121
+
122
+ it('detects session expired', () => {
123
+ const result = detectErrorInStderr('Your session has expired, please re-authenticate');
124
+ expect(result).not.toBeNull();
125
+ expect(result!.errorCode).toBe('AUTH_REQUIRED');
126
+ });
127
+
128
+ it('detects account not found', () => {
129
+ const result = detectErrorInStderr('account not found for this user');
130
+ expect(result).not.toBeNull();
131
+ expect(result!.errorCode).toBe('ACCOUNT_NOT_FOUND');
132
+ });
133
+
134
+ it('detects API key errors', () => {
135
+ const result = detectErrorInStderr('invalid api key provided');
136
+ expect(result).not.toBeNull();
137
+ expect(result!.errorCode).toBe('API_KEY_INVALID');
138
+ });
139
+
140
+ it('detects quota exceeded', () => {
141
+ const result = detectErrorInStderr('quota exceeded for your subscription');
142
+ expect(result).not.toBeNull();
143
+ expect(result!.errorCode).toBe('QUOTA_EXCEEDED');
144
+ });
145
+
146
+ it('detects billing issues', () => {
147
+ const result = detectErrorInStderr('payment required to continue');
148
+ expect(result).not.toBeNull();
149
+ expect(result!.errorCode).toBe('QUOTA_EXCEEDED');
150
+ });
151
+
152
+ it('detects rate limiting', () => {
153
+ const result = detectErrorInStderr('rate limit exceeded, retry after 30s');
154
+ expect(result).not.toBeNull();
155
+ expect(result!.errorCode).toBe('RATE_LIMITED');
156
+ });
157
+
158
+ it('detects 429 status', () => {
159
+ const result = detectErrorInStderr('HTTP 429 too many requests');
160
+ expect(result).not.toBeNull();
161
+ expect(result!.errorCode).toBe('RATE_LIMITED');
162
+ });
163
+
164
+ it('detects network errors', () => {
165
+ const result = detectErrorInStderr('ECONNREFUSED 127.0.0.1:443');
166
+ expect(result).not.toBeNull();
167
+ expect(result!.errorCode).toBe('NETWORK_ERROR');
168
+ });
169
+
170
+ it('detects DNS failures', () => {
171
+ const result = detectErrorInStderr('ENOTFOUND api.anthropic.com');
172
+ expect(result).not.toBeNull();
173
+ expect(result!.errorCode).toBe('NETWORK_ERROR');
174
+ });
175
+
176
+ it('detects SSL errors', () => {
177
+ const result = detectErrorInStderr('CERT_HAS_EXPIRED for api.example.com');
178
+ expect(result).not.toBeNull();
179
+ expect(result!.errorCode).toBe('SSL_ERROR');
180
+ });
181
+
182
+ it('detects service unavailable', () => {
183
+ const result = detectErrorInStderr('service unavailable, try again later');
184
+ expect(result).not.toBeNull();
185
+ expect(result!.errorCode).toBe('SERVICE_UNAVAILABLE');
186
+ });
187
+
188
+ it('detects 503 status', () => {
189
+ const result = detectErrorInStderr('HTTP 503 from upstream');
190
+ expect(result).not.toBeNull();
191
+ expect(result!.errorCode).toBe('SERVICE_UNAVAILABLE');
192
+ });
193
+
194
+ it('detects internal errors', () => {
195
+ const result = detectErrorInStderr('internal server error occurred');
196
+ expect(result).not.toBeNull();
197
+ expect(result!.errorCode).toBe('INTERNAL_ERROR');
198
+ });
199
+
200
+ it('detects context too long', () => {
201
+ const result = detectErrorInStderr('context too long, exceeds 200k tokens');
202
+ expect(result).not.toBeNull();
203
+ expect(result!.errorCode).toBe('CONTEXT_TOO_LONG');
204
+ });
205
+
206
+ it('detects session not found', () => {
207
+ const result = detectErrorInStderr('session not found, please create a new one');
208
+ expect(result).not.toBeNull();
209
+ expect(result!.errorCode).toBe('SESSION_NOT_FOUND');
210
+ });
211
+
212
+ it('returns null for non-matching stderr', () => {
213
+ expect(detectErrorInStderr('Processing file...')).toBeNull();
214
+ expect(detectErrorInStderr('Warning: deprecated API usage')).toBeNull();
215
+ expect(detectErrorInStderr('')).toBeNull();
216
+ });
217
+
218
+ it('returns user-friendly messages', () => {
219
+ const result = detectErrorInStderr('not logged in');
220
+ expect(result).not.toBeNull();
221
+ expect(result!.message).toContain('authentication');
222
+ // Should not expose raw error
223
+ expect(result!.message).not.toContain('not logged in');
224
+ });
225
+ });
@@ -7,16 +7,19 @@
7
7
  * Utilities for enriching prompts with context from previous conversation.
8
8
  */
9
9
 
10
+ import { assessApproval } from './stall-assessor.js';
10
11
  import type { ImageAttachment, PromptContext } from './types.js';
11
12
 
12
13
  /**
13
- * Enrich prompt with context from previous conversation
14
+ * Enrich prompt with context from previous conversation.
15
+ * Async because ambiguous short prompts are classified by Haiku.
14
16
  */
15
- export function enrichPromptWithContext(prompt: string, context: PromptContext): string {
17
+ export async function enrichPromptWithContext(prompt: string, context: PromptContext): Promise<string> {
16
18
  let enriched = prompt;
17
19
 
18
- // Detect if this is a continuation/approval prompt
19
- const isApprovalOrContinuation = isApprovalPrompt(prompt);
20
+ // Detect if this is a continuation/approval prompt.
21
+ // Fast regex path for obvious approvals, Haiku for ambiguous short prompts.
22
+ const isApprovalOrContinuation = await detectApproval(prompt);
20
23
 
21
24
  // Add accumulated knowledge from previous prompts
22
25
  if (context.accumulatedKnowledge) {
@@ -36,9 +39,38 @@ export function enrichPromptWithContext(prompt: string, context: PromptContext):
36
39
  }
37
40
 
38
41
  /**
39
- * Detect if a prompt is an approval or continuation
42
+ * Detect if a prompt is an approval or continuation.
43
+ * Layer 1: Regex fast path for obvious approvals (free, sync).
44
+ * Layer 2: Haiku assessment for ambiguous short prompts (<100 chars).
45
+ */
46
+ async function detectApproval(prompt: string): Promise<boolean> {
47
+ // Layer 1: fast regex path
48
+ if (isApprovalPromptFast(prompt)) return true;
49
+
50
+ // Layer 2: Haiku for short ambiguous prompts.
51
+ // Long prompts (>100 chars) are almost certainly new tasks, not approvals.
52
+ if (prompt.trim().length <= 100) {
53
+ try {
54
+ const claudeCmd = process.env.CLAUDE_COMMAND || 'claude';
55
+ const verdict = await assessApproval(prompt, claudeCmd, false);
56
+ return verdict.isApproval;
57
+ } catch {
58
+ // Haiku failed — fall through to false
59
+ }
60
+ }
61
+
62
+ return false;
63
+ }
64
+
65
+ /**
66
+ * Fast regex-based approval detection (sync, no API call).
67
+ * Catches obvious affirmatives. Ambiguous cases fall through to Haiku.
40
68
  */
41
69
  export function isApprovalPrompt(prompt: string): boolean {
70
+ return isApprovalPromptFast(prompt);
71
+ }
72
+
73
+ function isApprovalPromptFast(prompt: string): boolean {
42
74
  const lower = prompt.toLowerCase().trim();
43
75
 
44
76
  // Short affirmative responses
@@ -20,7 +20,7 @@ import type {
20
20
  } from './types.js';
21
21
 
22
22
  // Re-export types for backward compatibility
23
- export type { HeadlessConfig, ImageAttachment, SessionResult, SessionState, ToolUseEvent } from './types.js';
23
+ export type { ExecutionCheckpoint, HeadlessConfig, ImageAttachment, SessionResult, SessionState, ToolTimeoutProfile, ToolUseEvent } from './types.js';
24
24
 
25
25
  export class HeadlessRunner {
26
26
  private config: ResolvedHeadlessConfig;
@@ -33,9 +33,9 @@ export class HeadlessRunner {
33
33
  maxSessions: config.maxSessions || 50,
34
34
  maxRetries: config.maxRetries || 3,
35
35
  claudeCommand: config.claudeCommand || process.env.CLAUDE_COMMAND || 'claude',
36
- verbose: config.verbose || false,
37
- noColor: config.noColor || false,
38
- improvisationMode: config.improvisationMode || false,
36
+ verbose: !!config.verbose,
37
+ noColor: !!config.noColor,
38
+ improvisationMode: !!config.improvisationMode,
39
39
  movementNumber: config.movementNumber ?? 0,
40
40
  outputCallback: config.outputCallback,
41
41
  thinkingCallback: config.thinkingCallback,
@@ -51,6 +51,11 @@ export class HeadlessRunner {
51
51
  stallMaxExtensions: config.stallMaxExtensions ?? 3,
52
52
  stallHardCapMs: config.stallHardCapMs ?? 3_600_000,
53
53
  model: config.model,
54
+ toolTimeoutProfiles: config.toolTimeoutProfiles,
55
+ enableToolWatchdog: config.enableToolWatchdog !== false,
56
+ maxAutoRetries: config.maxAutoRetries ?? 2,
57
+ onToolTimeout: config.onToolTimeout,
58
+ sandboxed: config.sandboxed,
54
59
  };
55
60
  }
56
61
 
@@ -78,22 +83,60 @@ export class HeadlessRunner {
78
83
  const sessionId = `direct-${Date.now()}`;
79
84
 
80
85
  const enrichedPrompt = context
81
- ? enrichPromptWithContext(userPrompt, context)
86
+ ? await enrichPromptWithContext(userPrompt, context)
82
87
  : userPrompt;
83
88
 
84
89
  const result = await this.executePromptCommand(enrichedPrompt, 'main', 1);
85
90
 
86
91
  if (result.exitCode !== 0) {
92
+ // Signal exits (128+) with meaningful output are successful completions —
93
+ // Claude finished its work but the process was killed by signal (e.g., stall watchdog SIGTERM)
94
+ const isSignalExit = result.exitCode >= 128;
95
+ const hasOutput = !!(result.assistantResponse || (result.toolUseHistory && result.toolUseHistory.length > 0));
96
+
97
+ if (isSignalExit && hasOutput) {
98
+ const tokens = estimateTokensFromOutput(result.output);
99
+ return {
100
+ completed: true,
101
+ needsHandoff: false,
102
+ totalTokens: tokens,
103
+ sessionId,
104
+ signalName: result.signalName,
105
+ assistantResponse: result.assistantResponse,
106
+ thinkingOutput: result.thinkingOutput,
107
+ toolUseHistory: result.toolUseHistory,
108
+ claudeSessionId: result.claudeSessionId,
109
+ nativeTimeoutCount: result.nativeTimeoutCount,
110
+ postTimeoutOutput: result.postTimeoutOutput,
111
+ resumeBufferedOutput: result.resumeBufferedOutput,
112
+ };
113
+ }
114
+
115
+ // Build meaningful error: prefer stderr, fall back to non-JSON stdout lines
116
+ let errorMessage = result.error;
117
+ if (!errorMessage && result.output) {
118
+ const plainLines = result.output.split('\n')
119
+ .filter(l => l.trim() && !l.trim().startsWith('{'))
120
+ .join('\n')
121
+ .trim();
122
+ if (plainLines) {
123
+ errorMessage = plainLines.slice(0, 500);
124
+ }
125
+ }
87
126
  return {
88
127
  completed: false,
89
128
  needsHandoff: false,
90
129
  totalTokens: 0,
91
130
  sessionId,
92
- error: result.error || 'Execution failed',
131
+ error: errorMessage || `Claude exited with code ${result.exitCode}`,
132
+ signalName: result.signalName,
93
133
  assistantResponse: result.assistantResponse,
94
134
  thinkingOutput: result.thinkingOutput,
95
135
  toolUseHistory: result.toolUseHistory,
96
- claudeSessionId: result.claudeSessionId
136
+ claudeSessionId: result.claudeSessionId,
137
+ nativeTimeoutCount: result.nativeTimeoutCount,
138
+ postTimeoutOutput: result.postTimeoutOutput,
139
+ resumeBufferedOutput: result.resumeBufferedOutput,
97
140
  };
98
141
  }
99
142
 
@@ -104,10 +147,14 @@ export class HeadlessRunner {
104
147
  needsHandoff: false,
105
148
  totalTokens: tokens,
106
149
  sessionId,
150
+ signalName: result.signalName,
107
151
  assistantResponse: result.assistantResponse,
108
152
  thinkingOutput: result.thinkingOutput,
109
153
  toolUseHistory: result.toolUseHistory,
110
- claudeSessionId: result.claudeSessionId
154
+ claudeSessionId: result.claudeSessionId,
155
+ nativeTimeoutCount: result.nativeTimeoutCount,
156
+ postTimeoutOutput: result.postTimeoutOutput,
157
+ resumeBufferedOutput: result.resumeBufferedOutput,
111
158
  };
112
159
  }
113
160
 
@@ -0,0 +1,165 @@
1
+ import { describe, expect, it } from 'vitest';
2
+ import type { StallContext } from './stall-assessor.js';
3
+
4
+ // quickHeuristic, parseAssessmentResponse, and parseVerdictResponse are not exported.
5
+ // We test them via assessStall (which calls quickHeuristic first) and by testing
6
+ // the parsing functions indirectly. Since quickHeuristic is the critical logic
7
+ // and assessStall calls it before Haiku, we can test the heuristic paths by
8
+ // providing contexts that match known patterns.
9
+ //
10
+ // To avoid spawning Haiku (which requires `claude` CLI), we only test contexts
11
+ // that trigger the heuristic fast-path (return non-null from quickHeuristic).
12
+
13
+ import { assessStall } from './stall-assessor.js';
14
+
15
+ function makeContext(overrides: Partial<StallContext> = {}): StallContext {
16
+ return {
17
+ originalPrompt: 'Fix the bug in auth.ts',
18
+ silenceMs: 120_000,
19
+ pendingToolCount: 0,
20
+ totalToolCalls: 5,
21
+ elapsedTotalMs: 300_000,
22
+ ...overrides,
23
+ };
24
+ }
25
+
26
+ describe('assessStall - quickHeuristic paths', () => {
27
+ it('extends when tokens are still flowing (tokenSilenceMs < 60s)', async () => {
28
+ const ctx = makeContext({ tokenSilenceMs: 30_000 });
29
+ const verdict = await assessStall(ctx, 'claude', false, false);
30
+ expect(verdict.action).toBe('extend');
31
+ expect(verdict.extensionMs).toBe(10 * 60_000);
32
+ expect(verdict.reason).toContain('Tokens still flowing');
33
+ });
34
+
35
+ it('extends when tokenSilenceMs is 0', async () => {
36
+ const ctx = makeContext({ tokenSilenceMs: 0 });
37
+ const verdict = await assessStall(ctx, 'claude', false, false);
38
+ expect(verdict.action).toBe('extend');
39
+ expect(verdict.reason).toContain('Tokens still flowing');
40
+ });
41
+
42
+ it('does not use token heuristic when tokenSilenceMs >= 60s', async () => {
43
+ const ctx = makeContext({
44
+ tokenSilenceMs: 60_000,
45
+ pendingToolCount: 3, // will trigger parallel tools heuristic
46
+ });
47
+ const verdict = await assessStall(ctx, 'claude', false, false);
48
+ // Should NOT hit the token heuristic, should hit the 3+ parallel tools one
49
+ expect(verdict.action).toBe('extend');
50
+ expect(verdict.reason).toContain('parallel tool calls');
51
+ });
52
+
53
+ it('defers to watchdog when active and tools are pending', async () => {
54
+ const ctx = makeContext({ pendingToolCount: 1, lastToolName: 'Bash' });
55
+ const verdict = await assessStall(ctx, 'claude', false, true);
56
+ expect(verdict.action).toBe('extend');
57
+ expect(verdict.extensionMs).toBe(15 * 60_000);
58
+ expect(verdict.reason).toContain('Watchdog active');
59
+ });
60
+
61
+ it('defers to watchdog and lists pending tool names', async () => {
62
+ const ctx = makeContext({
63
+ pendingToolCount: 2,
64
+ pendingToolNames: new Set(['WebFetch', 'Bash']),
65
+ });
66
+ const verdict = await assessStall(ctx, 'claude', false, true);
67
+ expect(verdict.action).toBe('extend');
68
+ expect(verdict.reason).toContain('WebFetch');
69
+ expect(verdict.reason).toContain('Bash');
70
+ });
71
+
72
+ it('extends for Task subagent via pendingToolNames', async () => {
73
+ const ctx = makeContext({
74
+ pendingToolCount: 1,
75
+ pendingToolNames: new Set(['Task']),
76
+ });
77
+ const verdict = await assessStall(ctx, 'claude', false, false);
78
+ expect(verdict.action).toBe('extend');
79
+ expect(verdict.reason).toContain('Task subagent');
80
+ });
81
+
82
+ it('extends for Task subagent via lastToolName fallback', async () => {
83
+ const ctx = makeContext({
84
+ pendingToolCount: 1,
85
+ lastToolName: 'Task',
86
+ });
87
+ const verdict = await assessStall(ctx, 'claude', false, false);
88
+ expect(verdict.action).toBe('extend');
89
+ expect(verdict.reason).toContain('Task subagent');
90
+ });
91
+
92
+ it('scales Task extension with pending count', async () => {
93
+ const ctx1 = makeContext({
94
+ pendingToolCount: 1,
95
+ pendingToolNames: new Set(['Task']),
96
+ });
97
+ const ctx3 = makeContext({
98
+ pendingToolCount: 3,
99
+ pendingToolNames: new Set(['Task']),
100
+ });
101
+ const v1 = await assessStall(ctx1, 'claude', false, false);
102
+ const v3 = await assessStall(ctx3, 'claude', false, false);
103
+ // More pending = more extension, capped at 30 min
104
+ expect(v3.extensionMs).toBeGreaterThanOrEqual(v1.extensionMs);
105
+ expect(v3.extensionMs).toBeLessThanOrEqual(30 * 60_000);
106
+ });
107
+
108
+ it('extends for 3+ parallel tool calls', async () => {
109
+ const ctx = makeContext({ pendingToolCount: 3 });
110
+ const verdict = await assessStall(ctx, 'claude', false, false);
111
+ expect(verdict.action).toBe('extend');
112
+ expect(verdict.extensionMs).toBe(15 * 60_000);
113
+ expect(verdict.reason).toContain('parallel tool calls');
114
+ });
115
+
116
+ it('extends for 5 parallel tool calls', async () => {
117
+ const ctx = makeContext({ pendingToolCount: 5 });
118
+ const verdict = await assessStall(ctx, 'claude', false, false);
119
+ expect(verdict.action).toBe('extend');
120
+ expect(verdict.reason).toContain('5 parallel tool calls');
121
+ });
122
+
123
+ it('extends for WebSearch without watchdog', async () => {
124
+ const ctx = makeContext({ lastToolName: 'WebSearch', pendingToolCount: 1 });
125
+ // pendingToolCount < 3, not Task, not watchdog active, but WebSearch
126
+ const verdict = await assessStall(ctx, 'claude', false, false);
127
+ expect(verdict.action).toBe('extend');
128
+ expect(verdict.extensionMs).toBe(5 * 60_000);
129
+ expect(verdict.reason).toContain('WebSearch');
130
+ });
131
+
132
+ it('extends for WebFetch without watchdog', async () => {
133
+ const ctx = makeContext({ lastToolName: 'WebFetch', pendingToolCount: 1 });
134
+ const verdict = await assessStall(ctx, 'claude', false, false);
135
+ expect(verdict.action).toBe('extend');
136
+ expect(verdict.extensionMs).toBe(5 * 60_000);
137
+ expect(verdict.reason).toContain('WebFetch');
138
+ });
139
+
140
+ it('does NOT extend for WebSearch when watchdog is active', async () => {
141
+ // When watchdog is active and tools are pending, the watchdog deferral
142
+ // takes priority over the WebSearch heuristic
143
+ const ctx = makeContext({
144
+ lastToolName: 'WebSearch',
145
+ pendingToolCount: 1,
146
+ });
147
+ const verdict = await assessStall(ctx, 'claude', false, true);
148
+ // Should defer to watchdog, not WebSearch heuristic
149
+ expect(verdict.action).toBe('extend');
150
+ expect(verdict.reason).toContain('Watchdog active');
151
+ });
152
+
153
+ it('falls back to extend when Haiku assessment fails', async () => {
154
+ // Context that doesn't match any heuristic → triggers Haiku →
155
+ // Haiku fails (no `claude` binary) → cautious extend
156
+ const ctx = makeContext({
157
+ pendingToolCount: 1,
158
+ lastToolName: 'Edit',
159
+ });
160
+ const verdict = await assessStall(ctx, 'nonexistent-claude-binary', false, false);
161
+ expect(verdict.action).toBe('extend');
162
+ expect(verdict.extensionMs).toBe(10 * 60_000);
163
+ expect(verdict.reason).toContain('unavailable');
164
+ });
165
+ });