compact-agent 1.8.2 → 1.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/query.js CHANGED
@@ -1,29 +1,42 @@
1
1
  import { ALL_TOOLS, getToolByName } from './tools/index.js';
2
- import { streamChat } from './api.js';
2
+ import { streamChat, resetClient } from './api.js';
3
3
  import { checkPermission } from './permissions.js';
4
4
  import { buildSystemPrompt } from './system-prompt.js';
5
5
  import { runHooks } from './hooks.js';
6
6
  import { scanToolCall, printSecurityWarning } from './security.js';
7
7
  import { trackUsage } from './cost-tracker.js';
8
8
  import { shouldCompact, compactMessages, quickCompact, DEFAULT_COMPACTION } from './compaction.js';
9
- import { theme, sym, printToolRun, printToolResult, printThinkingOpen, printThinkingText, printThinkingClose, printCost, printApiError, formatDuration } from './theme.js';
9
+ import { theme, sym, printToolRun, printToolResult, printThinkingOpen, printThinkingText, printThinkingClose, printCost, printApiError, formatDuration, categorizeApiError } from './theme.js';
10
10
  import { isVoiceEnabled, getTtsConfig, getAccessibilityConfig, speakAssistantResponse, speak, speakUserEcho, } from './voice.js';
11
11
  import { isLikelyDestructive, describeDestructive, countWords, summarize } from './accessibility.js';
12
12
  import { audioCue } from './audio.js';
13
13
  import { setStatus } from './status.js';
14
- function suppressInputDuringStream() {
14
+ function startInputSuppression() {
15
15
  const stdin = process.stdin;
16
16
  if (!stdin.isTTY) {
17
- return { restore: () => { } };
17
+ return { pause: () => { }, resume: () => { }, restore: () => { } };
18
18
  }
19
19
  const wasRaw = stdin.isRaw;
20
- // Snapshot the keypress listeners that aren't ours. Those are what we
21
- // need to detach to stop readline from echoing + buffering. Slice to
22
- // protect against the array mutating mid-iteration.
20
+ // Snapshot non-tagged keypress listeners. These are the ones we toggle
21
+ // on suppress/unsuppress; the tagged hotkey listener (F1–F10) stays
22
+ // attached unconditionally so status keys work during streaming and
23
+ // tool execution alike.
23
24
  const allKeypressListeners = stdin.listeners('keypress').slice();
24
- const detachedListeners = allKeypressListeners.filter((l) => !l.__crowcoderHotkey__);
25
- for (const l of detachedListeners) {
26
- stdin.removeListener('keypress', l);
25
+ const togglableListeners = allKeypressListeners.filter((l) => !l.__crowcoderHotkey__);
26
+ let detached = false;
27
+ function suppress() {
28
+ if (detached)
29
+ return;
30
+ for (const l of togglableListeners)
31
+ stdin.removeListener('keypress', l);
32
+ detached = true;
33
+ }
34
+ function unsuppress() {
35
+ if (!detached)
36
+ return;
37
+ for (const l of togglableListeners)
38
+ stdin.on('keypress', l);
39
+ detached = false;
27
40
  }
28
41
  // Swallow data — Ctrl+C still exits, everything else is discarded so
29
42
  // it can't bubble up to anything we missed.
@@ -42,13 +55,14 @@ function suppressInputDuringStream() {
42
55
  catch { /* noop */ }
43
56
  stdin.on('data', dataHandler);
44
57
  stdin.resume();
58
+ // Start suppressed — typing during model streaming is the default-block case
59
+ suppress();
45
60
  return {
61
+ pause: unsuppress, // pause suppression = allow typing (for permission prompts)
62
+ resume: suppress, // resume suppression = block typing again
46
63
  restore: () => {
64
+ unsuppress(); // ensure listeners are back before we leave
47
65
  stdin.removeListener('data', dataHandler);
48
- // Re-attach readline's keypress listeners in the original order.
49
- for (const l of detachedListeners) {
50
- stdin.on('keypress', l);
51
- }
52
66
  try {
53
67
  stdin.setRawMode(wasRaw);
54
68
  }
@@ -113,215 +127,261 @@ export async function runQuery(ctx) {
113
127
  // assistant turn, but the final TTS pass only fires after the no-tool-call
114
128
  // exit so tool descriptions aren't read out.
115
129
  let accumulatedAssistantText = '';
116
- // Auto-compact if context is getting large
117
- if (shouldCompact(ctx.messages, DEFAULT_COMPACTION)) {
118
- console.log(theme.dim(` ${sym.running} auto-compacting conversation context...`));
119
- setStatus({ state: 'compacting' });
120
- ctx.messages = await compactMessages(ctx.messages, ctx.config);
121
- }
122
- else {
123
- // Quick compact: truncate oversized tool results
124
- ctx.messages = quickCompact(ctx.messages);
125
- }
126
- // Tell the status singleton who we are. This is what F2 ("where am I?")
127
- // speaks back to the user. Updated once per chain model/provider/mode
128
- // can't change mid-chain.
129
- setStatus({
130
- model: ctx.config.model,
131
- provider: ctx.config.provider,
132
- mode: ctx.mode,
133
- permissionMode: ctx.config.permissionMode,
134
- });
135
- while (turns < maxTurns) {
136
- turns++;
137
- // Get the last user message for context-aware system prompt
138
- const lastUserMsg = ctx.messages.filter((m) => m.role === 'user').pop();
139
- const userQuery = typeof lastUserMsg?.content === 'string' ? lastUserMsg.content : undefined;
140
- // Build full messages array with system prompt
141
- const systemPrompt = buildSystemPrompt(ctx.config, ctx.cwd, ctx.mode, userQuery);
142
- const apiMessages = [
143
- { role: 'system', content: systemPrompt },
144
- ...ctx.messages,
145
- ];
146
- let fullText = '';
147
- let toolCalls;
148
- let hasOutput = false;
149
- let thinkingActive = false;
150
- let leadingTrimmed = false; // strip leading whitespace from the model's first text chunk
151
- let lastCharWasNewline = false; // collapse 3+ consecutive newlines down to 2
152
- let consecutiveNewlines = 0;
153
- const turnStart = Date.now();
154
- function writeStreamText(chunk) {
155
- // Trim leading whitespace until the first non-whitespace character so
156
- // the model can't produce big vertical gaps before its real reply.
157
- let text = chunk;
158
- if (!leadingTrimmed) {
159
- text = text.replace(/^[\s\n]+/, '');
160
- if (text.length === 0)
161
- return; // entire chunk was leading whitespace
162
- leadingTrimmed = true;
163
- }
164
- // Collapse runs of 3+ newlines into 2 so the body of the response is
165
- // dense but still has paragraph breaks where the model intended them.
166
- let out = '';
167
- for (const ch of text) {
168
- if (ch === '\n') {
169
- consecutiveNewlines++;
170
- if (consecutiveNewlines <= 2)
171
- out += ch;
130
+ // Auto-fallback: when the primary model returns a cryptic / unknown
131
+ // provider error (common for free experimental models like
132
+ // openrouter/owl-alpha which returns literally "ERROR" or "Provider
133
+ // returned error"), we transparently retry the SAME turn once with the
134
+ // user's configured fallbackModel. After we use it, this latches so we
135
+ // don't bounce back and forth between failing models in a single chain.
136
+ let usedFallbackModel = false;
137
+ // Input suppression spans the entire chain: model streaming AND tool
138
+ // execution. executeToolCalls calls inputGuard.pause()/resume() around
139
+ // permission prompts so rl.question() can still read user input. Final
140
+ // teardown happens in the finally block at the bottom of runQuery so
141
+ // the guard is always cleaned up even if something throws unexpectedly.
142
+ const inputGuard = startInputSuppression();
143
+ try {
144
+ // Auto-compact if context is getting large
145
+ if (shouldCompact(ctx.messages, DEFAULT_COMPACTION)) {
146
+ console.log(theme.dim(` ${sym.running} auto-compacting conversation context...`));
147
+ setStatus({ state: 'compacting' });
148
+ ctx.messages = await compactMessages(ctx.messages, ctx.config);
149
+ }
150
+ else {
151
+ // Quick compact: truncate oversized tool results
152
+ ctx.messages = quickCompact(ctx.messages);
153
+ }
154
+ // Tell the status singleton who we are. This is what F2 ("where am I?")
155
+ // speaks back to the user. Updated once per chain — model/provider/mode
156
+ // can't change mid-chain.
157
+ setStatus({
158
+ model: ctx.config.model,
159
+ provider: ctx.config.provider,
160
+ mode: ctx.mode,
161
+ permissionMode: ctx.config.permissionMode,
162
+ });
163
+ while (turns < maxTurns) {
164
+ turns++;
165
+ // Get the last user message for context-aware system prompt
166
+ const lastUserMsg = ctx.messages.filter((m) => m.role === 'user').pop();
167
+ const userQuery = typeof lastUserMsg?.content === 'string' ? lastUserMsg.content : undefined;
168
+ // Build full messages array with system prompt
169
+ const systemPrompt = buildSystemPrompt(ctx.config, ctx.cwd, ctx.mode, userQuery);
170
+ const apiMessages = [
171
+ { role: 'system', content: systemPrompt },
172
+ ...ctx.messages,
173
+ ];
174
+ let fullText = '';
175
+ let toolCalls;
176
+ let hasOutput = false;
177
+ let thinkingActive = false;
178
+ let leadingTrimmed = false; // strip leading whitespace from the model's first text chunk
179
+ let lastCharWasNewline = false; // collapse 3+ consecutive newlines down to 2
180
+ let consecutiveNewlines = 0;
181
+ const turnStart = Date.now();
182
+ function writeStreamText(chunk) {
183
+ // Trim leading whitespace until the first non-whitespace character so
184
+ // the model can't produce big vertical gaps before its real reply.
185
+ let text = chunk;
186
+ if (!leadingTrimmed) {
187
+ text = text.replace(/^[\s\n]+/, '');
188
+ if (text.length === 0)
189
+ return; // entire chunk was leading whitespace
190
+ leadingTrimmed = true;
172
191
  }
173
- else {
174
- consecutiveNewlines = 0;
175
- out += ch;
192
+ // Collapse runs of 3+ newlines into 2 so the body of the response is
193
+ // dense but still has paragraph breaks where the model intended them.
194
+ let out = '';
195
+ for (const ch of text) {
196
+ if (ch === '\n') {
197
+ consecutiveNewlines++;
198
+ if (consecutiveNewlines <= 2)
199
+ out += ch;
200
+ }
201
+ else {
202
+ consecutiveNewlines = 0;
203
+ out += ch;
204
+ }
176
205
  }
206
+ if (out.length === 0)
207
+ return;
208
+ lastCharWasNewline = out.endsWith('\n');
209
+ process.stdout.write(theme.primary(out));
210
+ fullText += out;
177
211
  }
178
- if (out.length === 0)
179
- return;
180
- lastCharWasNewline = out.endsWith('\n');
181
- process.stdout.write(theme.primary(out));
182
- fullText += out;
183
- }
184
- // Suppress terminal echo while we stream so mid-stream keystrokes
185
- // don't interleave with the model's output. Restored in `finally`.
186
- const inputGuard = suppressInputDuringStream();
187
- // We're about to wait on the API; tell the status singleton so a blind
188
- // user pressing F1 hears "calling claude-sonnet-4, 6 seconds elapsed"
189
- // instead of a stale "idle".
190
- setStatus({ state: 'streaming' });
191
- try {
192
- for await (const event of streamChat(ctx.config, apiMessages, ALL_TOOLS)) {
193
- if (event.type === 'thinking' && event.content) {
194
- // showThinking defaults to true; only off when explicitly disabled.
195
- if (ctx.config.showThinking !== false) {
196
- if (!thinkingActive) {
197
- printThinkingOpen();
198
- thinkingActive = true;
212
+ // (inputGuard is now lifted to runQuery scope — see above. It spans
213
+ // both streaming and tool execution, with pause/resume around the
214
+ // permission prompts inside executeToolCalls.)
215
+ // We're about to wait on the API; tell the status singleton so a blind
216
+ // user pressing F1 hears "calling claude-sonnet-4, 6 seconds elapsed"
217
+ // instead of a stale "idle".
218
+ setStatus({ state: 'streaming' });
219
+ try {
220
+ for await (const event of streamChat(ctx.config, apiMessages, ALL_TOOLS)) {
221
+ if (event.type === 'thinking' && event.content) {
222
+ // showThinking defaults to true; only off when explicitly disabled.
223
+ if (ctx.config.showThinking !== false) {
224
+ if (!thinkingActive) {
225
+ printThinkingOpen();
226
+ thinkingActive = true;
227
+ }
228
+ printThinkingText(event.content);
199
229
  }
200
- printThinkingText(event.content);
201
230
  }
202
- }
203
- else if (event.type === 'text' && event.content) {
204
- if (thinkingActive) {
205
- printThinkingClose();
206
- thinkingActive = false;
231
+ else if (event.type === 'text' && event.content) {
232
+ if (thinkingActive) {
233
+ printThinkingClose();
234
+ thinkingActive = false;
235
+ }
236
+ if (!hasOutput) {
237
+ hasOutput = true;
238
+ // First token arrived; promote status so F1 reports "receiving"
239
+ // rather than the still-waiting "streaming" message.
240
+ setStatus({ state: 'responding' });
241
+ }
242
+ writeStreamText(event.content);
207
243
  }
208
- if (!hasOutput) {
209
- hasOutput = true;
210
- // First token arrived; promote status so F1 reports "receiving"
211
- // rather than the still-waiting "streaming" message.
212
- setStatus({ state: 'responding' });
244
+ else if (event.type === 'tool_call') {
245
+ toolCalls = event.toolCalls;
246
+ }
247
+ else if (event.type === 'done') {
248
+ if (event.usage) {
249
+ const u = event.usage;
250
+ const { cost, warning } = trackUsage(ctx.sessionId, ctx.config.model, u.prompt, u.completion);
251
+ // Single newline separator if we just streamed text, then the
252
+ // compact telemetry line.
253
+ if (hasOutput && !lastCharWasNewline)
254
+ process.stdout.write('\n');
255
+ printCost(u.prompt, u.completion, cost, warning, Date.now() - turnStart);
256
+ }
213
257
  }
214
- writeStreamText(event.content);
215
258
  }
216
- else if (event.type === 'tool_call') {
217
- toolCalls = event.toolCalls;
259
+ }
260
+ catch (err) {
261
+ const msg = err instanceof Error ? err.message : String(err);
262
+ // Always close the streaming line first so the error doesn't glue to text.
263
+ if (hasOutput && !lastCharWasNewline)
264
+ process.stdout.write('\n');
265
+ // ── Auto-fallback path ─────────────────────────────────
266
+ // Categorize the error. If it's "unknown" (the provider returned a
267
+ // cryptic empty error like "ERROR" or "Provider returned error" that
268
+ // matches no specific pattern) AND we have a fallbackModel configured
269
+ // AND we haven't already used it, swap models and silently retry the
270
+ // same turn. This rescues users from broken free models without them
271
+ // having to manually /clear and /model switch.
272
+ const cat = categorizeApiError(msg, {
273
+ baseURL: ctx.config.baseURL,
274
+ provider: ctx.config.provider,
275
+ model: ctx.config.model,
276
+ });
277
+ const canFallback = cat.category === 'unknown'
278
+ && ctx.config.fallbackModel
279
+ && ctx.config.fallbackModel !== ctx.config.model
280
+ && !usedFallbackModel;
281
+ if (canFallback) {
282
+ usedFallbackModel = true;
283
+ const failedModel = ctx.config.model;
284
+ const fallback = ctx.config.fallbackModel;
285
+ ctx.config.model = fallback;
286
+ resetClient();
287
+ console.log(theme.warning(` ${sym.warn} ${failedModel} returned a cryptic provider error — retrying once with fallback model ${fallback}.`));
288
+ console.log(theme.dim(' (configure a different fallback with: /fallback <model-id>)'));
289
+ turns--; // this retry doesn't burn a turn slot from the max-turns budget
290
+ continue;
218
291
  }
219
- else if (event.type === 'done') {
220
- if (event.usage) {
221
- const u = event.usage;
222
- const { cost, warning } = trackUsage(ctx.sessionId, ctx.config.model, u.prompt, u.completion);
223
- // Single newline separator if we just streamed text, then the
224
- // compact telemetry line.
225
- if (hasOutput && !lastCharWasNewline)
226
- process.stdout.write('\n');
227
- printCost(u.prompt, u.completion, cost, warning, Date.now() - turnStart);
292
+ printApiError(msg, {
293
+ baseURL: ctx.config.baseURL,
294
+ provider: ctx.config.provider,
295
+ model: ctx.config.model,
296
+ });
297
+ // Voice: announce errors aloud for screen-reader users
298
+ if (isVoiceEnabled(ctx.config) && getAccessibilityConfig(ctx.config).announceErrors) {
299
+ const tts = getTtsConfig(ctx.config);
300
+ if (tts.apiKey) {
301
+ // Keep it terse — one short sentence — to avoid burning quota on
302
+ // long stack traces. The error pretty-printer already showed the
303
+ // categorized version to the screen-reader.
304
+ speak(`API error: ${msg.slice(0, 120)}`, ctx.config, { voiceId: tts.assistantVoiceId }).catch(() => { });
305
+ }
306
+ if (getAccessibilityConfig(ctx.config).audioCues) {
307
+ audioCue('error').catch(() => { });
228
308
  }
229
309
  }
310
+ ctx.messages.push({ role: 'assistant', content: `[API error: ${msg}]` });
311
+ break;
230
312
  }
231
- }
232
- catch (err) {
233
- const msg = err instanceof Error ? err.message : String(err);
234
- // Always close the streaming line first so the error doesn't glue to text.
235
- if (hasOutput && !lastCharWasNewline)
313
+ if (hasOutput && !lastCharWasNewline) {
236
314
  process.stdout.write('\n');
237
- printApiError(msg, {
238
- baseURL: ctx.config.baseURL,
239
- provider: ctx.config.provider,
240
- model: ctx.config.model,
241
- });
242
- // Voice: announce errors aloud for screen-reader users
243
- if (isVoiceEnabled(ctx.config) && getAccessibilityConfig(ctx.config).announceErrors) {
244
- const tts = getTtsConfig(ctx.config);
245
- if (tts.apiKey) {
246
- // Keep it terse — one short sentence — to avoid burning quota on
247
- // long stack traces. The error pretty-printer already showed the
248
- // categorized version to the screen-reader.
249
- speak(`API error: ${msg.slice(0, 120)}`, ctx.config, { voiceId: tts.assistantVoiceId }).catch(() => { });
250
- }
251
- if (getAccessibilityConfig(ctx.config).audioCues) {
252
- audioCue('error').catch(() => { });
253
- }
254
315
  }
255
- ctx.messages.push({ role: 'assistant', content: `[API error: ${msg}]` });
256
- inputGuard.restore();
257
- break;
316
+ // Save assistant message
317
+ const assistantMsg = { role: 'assistant', content: fullText || null };
318
+ if (toolCalls && toolCalls.length > 0) {
319
+ assistantMsg.tool_calls = toolCalls;
320
+ }
321
+ ctx.messages.push(assistantMsg);
322
+ // Accumulate visible assistant text for chain-end TTS. We don't TTS
323
+ // mid-chain because the model often emits short bridging sentences
324
+ // between tool calls — speaking each one is noisy and slow.
325
+ if (fullText)
326
+ accumulatedAssistantText += (accumulatedAssistantText ? '\n\n' : '') + fullText;
327
+ // If no tool calls, we're done
328
+ if (!toolCalls || toolCalls.length === 0)
329
+ break;
330
+ // Execute tool calls — executeToolCalls itself flips per-tool state
331
+ // and uses inputGuard.pause()/resume() around each permission prompt
332
+ // so rl.question() can read user input even though suppression is on
333
+ // for the rest of the chain.
334
+ const toolResults = await executeToolCalls(toolCalls, ctx, inputGuard);
335
+ ctx.messages.push(...toolResults);
258
336
  }
259
- inputGuard.restore();
260
- if (hasOutput && !lastCharWasNewline) {
261
- process.stdout.write('\n');
337
+ // Chain ended; back to idle so F1 reports the correct state.
338
+ setStatus({ state: 'idle' });
339
+ // ── Voice: read the assistant's final response ────────────
340
+ // Off the hot path — fire-and-forget so the next prompt appears
341
+ // immediately. The playback runs in background; F2 pauses, F4 skips.
342
+ if (isVoiceEnabled(ctx.config) && accumulatedAssistantText.trim()) {
343
+ const tts = getTtsConfig(ctx.config);
344
+ if (tts.apiKey) {
345
+ const a = getAccessibilityConfig(ctx.config);
346
+ let toRead = accumulatedAssistantText;
347
+ // If the response is long, abbreviate via cheap heuristic summary so
348
+ // blind users aren't forced to listen to 800 words. They can press
349
+ // F3 (replay) on chunks or ask "give me the full version" verbally.
350
+ const words = countWords(toRead);
351
+ if (words >= a.longResponseThreshold) {
352
+ toRead = summarize(toRead, a.longResponseThreshold);
353
+ }
354
+ // Register an abort controller + last-chunk + last-full-response
355
+ // globally so the hotkey handler in index.ts can cancel / replay.
356
+ // - __voiceLastChunk drives PGUP "replay last chunk"
357
+ // - __voiceLastFullResponse drives F3 "read full" + F4 "read summary"
358
+ const g = globalThis;
359
+ const ctl = new AbortController();
360
+ g.__voicePlaybackCtl = ctl;
361
+ g.__voiceLastChunk = toRead;
362
+ g.__voiceLastFullResponse = accumulatedAssistantText;
363
+ speakAssistantResponse(toRead, ctx.config, ctl.signal).catch(() => { });
364
+ }
262
365
  }
263
- // Save assistant message
264
- const assistantMsg = { role: 'assistant', content: fullText || null };
265
- if (toolCalls && toolCalls.length > 0) {
266
- assistantMsg.tool_calls = toolCalls;
366
+ if (turns >= maxTurns) {
367
+ console.log(theme.warning(`\n ${sym.warn} reached max turns limit`));
267
368
  }
268
- ctx.messages.push(assistantMsg);
269
- // Accumulate visible assistant text for chain-end TTS. We don't TTS
270
- // mid-chain because the model often emits short bridging sentences
271
- // between tool calls speaking each one is noisy and slow.
272
- if (fullText)
273
- accumulatedAssistantText += (accumulatedAssistantText ? '\n\n' : '') + fullText;
274
- // If no tool calls, we're done
275
- if (!toolCalls || toolCalls.length === 0)
276
- break;
277
- // Execute tool calls — executeToolCalls itself flips per-tool state
278
- const toolResults = await executeToolCalls(toolCalls, ctx);
279
- ctx.messages.push(...toolResults);
280
- }
281
- // Chain ended; back to idle so F1 reports the correct state.
282
- setStatus({ state: 'idle' });
283
- // ── Voice: read the assistant's final response ────────────
284
- // Off the hot path — fire-and-forget so the next prompt appears
285
- // immediately. The playback runs in background; F2 pauses, F4 skips.
286
- if (isVoiceEnabled(ctx.config) && accumulatedAssistantText.trim()) {
287
- const tts = getTtsConfig(ctx.config);
288
- if (tts.apiKey) {
289
- const a = getAccessibilityConfig(ctx.config);
290
- let toRead = accumulatedAssistantText;
291
- // If the response is long, abbreviate via cheap heuristic summary so
292
- // blind users aren't forced to listen to 800 words. They can press
293
- // F3 (replay) on chunks or ask "give me the full version" verbally.
294
- const words = countWords(toRead);
295
- if (words >= a.longResponseThreshold) {
296
- toRead = summarize(toRead, a.longResponseThreshold);
297
- }
298
- // Register an abort controller + last-chunk + last-full-response
299
- // globally so the hotkey handler in index.ts can cancel / replay.
300
- // - __voiceLastChunk drives PGUP "replay last chunk"
301
- // - __voiceLastFullResponse drives F3 "read full" + F4 "read summary"
302
- const g = globalThis;
303
- const ctl = new AbortController();
304
- g.__voicePlaybackCtl = ctl;
305
- g.__voiceLastChunk = toRead;
306
- g.__voiceLastFullResponse = accumulatedAssistantText;
307
- speakAssistantResponse(toRead, ctx.config, ctl.signal).catch(() => { });
369
+ // Chain-elapsed summary. One line per response chain (user msg → assistant
370
+ // ending without a tool call), printed regardless of how many tool-call
371
+ // iterations the chain took. Lets the user see how long that whole
372
+ // exchange took, separate from per-turn cost timings.
373
+ const chainMs = Date.now() - chainStart;
374
+ // Only show if there was meaningful work — multi-second chains. Sub-second
375
+ // chains (slash command rejects, instant returns) don't need a chain line.
376
+ if (chainMs > 1500) {
377
+ console.log(theme.dim(` chain ${formatDuration(chainMs)} · ${turns} ${turns === 1 ? 'turn' : 'turns'}`));
308
378
  }
309
379
  }
310
- if (turns >= maxTurns) {
311
- console.log(theme.warning(`\n ${sym.warn} reached max turns limit`));
312
- }
313
- // Chain-elapsed summary. One line per response chain (user msg → assistant
314
- // ending without a tool call), printed regardless of how many tool-call
315
- // iterations the chain took. Lets the user see how long that whole
316
- // exchange took, separate from per-turn cost timings.
317
- const chainMs = Date.now() - chainStart;
318
- // Only show if there was meaningful work — multi-second chains. Sub-second
319
- // chains (slash command rejects, instant returns) don't need a chain line.
320
- if (chainMs > 1500) {
321
- console.log(theme.dim(` chain ${formatDuration(chainMs)} · ${turns} ${turns === 1 ? 'turn' : 'turns'}`));
380
+ finally {
381
+ inputGuard.restore();
322
382
  }
323
383
  }
324
- async function executeToolCalls(toolCalls, ctx) {
384
+ async function executeToolCalls(toolCalls, ctx, inputGuard) {
325
385
  const results = [];
326
386
  for (const tc of toolCalls) {
327
387
  const toolName = tc.function.name;
@@ -409,7 +469,18 @@ async function executeToolCalls(toolCalls, ctx) {
409
469
  }
410
470
  }
411
471
  // ── Permission check ──────────────────────────────────
412
- const allowed = await checkPermission(tool, input, ctx.config, ctx.rl);
472
+ // Pause input suppression so rl.question() can read the user's
473
+ // Y/n/always response — without this, readline's keypress listener is
474
+ // detached and the prompt would hang forever. Re-suppress immediately
475
+ // after so any typing during the next tool's execution is blocked.
476
+ inputGuard.pause();
477
+ let allowed;
478
+ try {
479
+ allowed = await checkPermission(tool, input, ctx.config, ctx.rl);
480
+ }
481
+ finally {
482
+ inputGuard.resume();
483
+ }
413
484
  if (!allowed) {
414
485
  console.log(theme.warning(` ${sym.warn} Denied: ${toolName}`));
415
486
  results.push({