compact-agent 1.8.3 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/query.js CHANGED
@@ -1,29 +1,43 @@
1
1
  import { ALL_TOOLS, getToolByName } from './tools/index.js';
2
- import { streamChat } from './api.js';
2
+ import { streamChat, resetClient } from './api.js';
3
3
  import { checkPermission } from './permissions.js';
4
4
  import { buildSystemPrompt } from './system-prompt.js';
5
5
  import { runHooks } from './hooks.js';
6
6
  import { scanToolCall, printSecurityWarning } from './security.js';
7
7
  import { trackUsage } from './cost-tracker.js';
8
8
  import { shouldCompact, compactMessages, quickCompact, DEFAULT_COMPACTION } from './compaction.js';
9
- import { theme, sym, printToolRun, printToolResult, printThinkingOpen, printThinkingText, printThinkingClose, printCost, printApiError, formatDuration } from './theme.js';
9
+ import { theme, sym, printToolRun, printToolResult, printThinkingOpen, printThinkingText, printThinkingClose, printCost, printApiError, formatDuration, categorizeApiError } from './theme.js';
10
10
  import { isVoiceEnabled, getTtsConfig, getAccessibilityConfig, speakAssistantResponse, speak, speakUserEcho, } from './voice.js';
11
11
  import { isLikelyDestructive, describeDestructive, countWords, summarize } from './accessibility.js';
12
12
  import { audioCue } from './audio.js';
13
13
  import { setStatus } from './status.js';
14
- function suppressInputDuringStream() {
14
+ import { collapseCompletedTurns } from './turn-context.js';
15
+ function startInputSuppression() {
15
16
  const stdin = process.stdin;
16
17
  if (!stdin.isTTY) {
17
- return { restore: () => { } };
18
+ return { pause: () => { }, resume: () => { }, restore: () => { } };
18
19
  }
19
20
  const wasRaw = stdin.isRaw;
20
- // Snapshot the keypress listeners that aren't ours. Those are what we
21
- // need to detach to stop readline from echoing + buffering. Slice to
22
- // protect against the array mutating mid-iteration.
21
+ // Snapshot non-tagged keypress listeners. These are the ones we toggle
22
+ // on suppress/unsuppress; the tagged hotkey listener (F1–F10) stays
23
+ // attached unconditionally so status keys work during streaming and
24
+ // tool execution alike.
23
25
  const allKeypressListeners = stdin.listeners('keypress').slice();
24
- const detachedListeners = allKeypressListeners.filter((l) => !l.__crowcoderHotkey__);
25
- for (const l of detachedListeners) {
26
- stdin.removeListener('keypress', l);
26
+ const togglableListeners = allKeypressListeners.filter((l) => !l.__crowcoderHotkey__);
27
+ let detached = false;
28
+ function suppress() {
29
+ if (detached)
30
+ return;
31
+ for (const l of togglableListeners)
32
+ stdin.removeListener('keypress', l);
33
+ detached = true;
34
+ }
35
+ function unsuppress() {
36
+ if (!detached)
37
+ return;
38
+ for (const l of togglableListeners)
39
+ stdin.on('keypress', l);
40
+ detached = false;
27
41
  }
28
42
  // Swallow data — Ctrl+C still exits, everything else is discarded so
29
43
  // it can't bubble up to anything we missed.
@@ -42,13 +56,14 @@ function suppressInputDuringStream() {
42
56
  catch { /* noop */ }
43
57
  stdin.on('data', dataHandler);
44
58
  stdin.resume();
59
+ // Start suppressed — typing during model streaming is the default-block case
60
+ suppress();
45
61
  return {
62
+ pause: unsuppress, // pause suppression = allow typing (for permission prompts)
63
+ resume: suppress, // resume suppression = block typing again
46
64
  restore: () => {
65
+ unsuppress(); // ensure listeners are back before we leave
47
66
  stdin.removeListener('data', dataHandler);
48
- // Re-attach readline's keypress listeners in the original order.
49
- for (const l of detachedListeners) {
50
- stdin.on('keypress', l);
51
- }
52
67
  try {
53
68
  stdin.setRawMode(wasRaw);
54
69
  }
@@ -113,215 +128,268 @@ export async function runQuery(ctx) {
113
128
  // assistant turn, but the final TTS pass only fires after the no-tool-call
114
129
  // exit so tool descriptions aren't read out.
115
130
  let accumulatedAssistantText = '';
116
- // Auto-compact if context is getting large
117
- if (shouldCompact(ctx.messages, DEFAULT_COMPACTION)) {
118
- console.log(theme.dim(` ${sym.running} auto-compacting conversation context...`));
119
- setStatus({ state: 'compacting' });
120
- ctx.messages = await compactMessages(ctx.messages, ctx.config);
121
- }
122
- else {
123
- // Quick compact: truncate oversized tool results
124
- ctx.messages = quickCompact(ctx.messages);
125
- }
126
- // Tell the status singleton who we are. This is what F2 ("where am I?")
127
- // speaks back to the user. Updated once per chain model/provider/mode
128
- // can't change mid-chain.
129
- setStatus({
130
- model: ctx.config.model,
131
- provider: ctx.config.provider,
132
- mode: ctx.mode,
133
- permissionMode: ctx.config.permissionMode,
134
- });
135
- while (turns < maxTurns) {
136
- turns++;
137
- // Get the last user message for context-aware system prompt
138
- const lastUserMsg = ctx.messages.filter((m) => m.role === 'user').pop();
139
- const userQuery = typeof lastUserMsg?.content === 'string' ? lastUserMsg.content : undefined;
140
- // Build full messages array with system prompt
141
- const systemPrompt = buildSystemPrompt(ctx.config, ctx.cwd, ctx.mode, userQuery);
142
- const apiMessages = [
143
- { role: 'system', content: systemPrompt },
144
- ...ctx.messages,
145
- ];
146
- let fullText = '';
147
- let toolCalls;
148
- let hasOutput = false;
149
- let thinkingActive = false;
150
- let leadingTrimmed = false; // strip leading whitespace from the model's first text chunk
151
- let lastCharWasNewline = false; // collapse 3+ consecutive newlines down to 2
152
- let consecutiveNewlines = 0;
153
- const turnStart = Date.now();
154
- function writeStreamText(chunk) {
155
- // Trim leading whitespace until the first non-whitespace character so
156
- // the model can't produce big vertical gaps before its real reply.
157
- let text = chunk;
158
- if (!leadingTrimmed) {
159
- text = text.replace(/^[\s\n]+/, '');
160
- if (text.length === 0)
161
- return; // entire chunk was leading whitespace
162
- leadingTrimmed = true;
163
- }
164
- // Collapse runs of 3+ newlines into 2 so the body of the response is
165
- // dense but still has paragraph breaks where the model intended them.
166
- let out = '';
167
- for (const ch of text) {
168
- if (ch === '\n') {
169
- consecutiveNewlines++;
170
- if (consecutiveNewlines <= 2)
171
- out += ch;
131
+ // Auto-fallback: when the primary model returns a cryptic / unknown
132
+ // provider error (common for free experimental models like
133
+ // openrouter/owl-alpha which returns literally "ERROR" or "Provider
134
+ // returned error"), we transparently retry the SAME turn once with the
135
+ // user's configured fallbackModel. After we use it, this latches so we
136
+ // don't bounce back and forth between failing models in a single chain.
137
+ let usedFallbackModel = false;
138
+ // Input suppression spans the entire chain: model streaming AND tool
139
+ // execution. executeToolCalls calls inputGuard.pause()/resume() around
140
+ // permission prompts so rl.question() can still read user input. Final
141
+ // teardown happens in the finally block at the bottom of runQuery so
142
+ // the guard is always cleaned up even if something throws unexpectedly.
143
+ const inputGuard = startInputSuppression();
144
+ try {
145
+ // Turn-boundary collapse runs BEFORE compaction. Every completed prior
146
+ // turn becomes [user, "<final text>\n[Completed: used X, Y]"] — the
147
+ // model no longer sees stale tool_calls that it might mistake for
148
+ // pending work (the "I'll handle BOTH requests" / "all THREE requests"
149
+ // bug). The current turn (latest user message forward) is left intact
150
+ // because its tool_calls and tool messages are still in flight.
151
+ ctx.messages = collapseCompletedTurns(ctx.messages);
152
+ // Auto-compact if context is getting large
153
+ if (shouldCompact(ctx.messages, DEFAULT_COMPACTION)) {
154
+ console.log(theme.dim(` ${sym.running} auto-compacting conversation context...`));
155
+ setStatus({ state: 'compacting' });
156
+ ctx.messages = await compactMessages(ctx.messages, ctx.config);
157
+ }
158
+ else {
159
+ // Quick compact: truncate oversized tool results
160
+ ctx.messages = quickCompact(ctx.messages);
161
+ }
162
+ // Tell the status singleton who we are. This is what F2 ("where am I?")
163
+ // speaks back to the user. Updated once per chain — model/provider/mode
164
+ // can't change mid-chain.
165
+ setStatus({
166
+ model: ctx.config.model,
167
+ provider: ctx.config.provider,
168
+ mode: ctx.mode,
169
+ permissionMode: ctx.config.permissionMode,
170
+ });
171
+ while (turns < maxTurns) {
172
+ turns++;
173
+ // Get the last user message for context-aware system prompt
174
+ const lastUserMsg = ctx.messages.filter((m) => m.role === 'user').pop();
175
+ const userQuery = typeof lastUserMsg?.content === 'string' ? lastUserMsg.content : undefined;
176
+ // Build full messages array with system prompt
177
+ const systemPrompt = buildSystemPrompt(ctx.config, ctx.cwd, ctx.mode, userQuery);
178
+ const apiMessages = [
179
+ { role: 'system', content: systemPrompt },
180
+ ...ctx.messages,
181
+ ];
182
+ let fullText = '';
183
+ let toolCalls;
184
+ let hasOutput = false;
185
+ let thinkingActive = false;
186
+ let leadingTrimmed = false; // strip leading whitespace from the model's first text chunk
187
+ let lastCharWasNewline = false; // collapse 3+ consecutive newlines down to 2
188
+ let consecutiveNewlines = 0;
189
+ const turnStart = Date.now();
190
+ function writeStreamText(chunk) {
191
+ // Trim leading whitespace until the first non-whitespace character so
192
+ // the model can't produce big vertical gaps before its real reply.
193
+ let text = chunk;
194
+ if (!leadingTrimmed) {
195
+ text = text.replace(/^[\s\n]+/, '');
196
+ if (text.length === 0)
197
+ return; // entire chunk was leading whitespace
198
+ leadingTrimmed = true;
172
199
  }
173
- else {
174
- consecutiveNewlines = 0;
175
- out += ch;
200
+ // Collapse runs of 3+ newlines into 2 so the body of the response is
201
+ // dense but still has paragraph breaks where the model intended them.
202
+ let out = '';
203
+ for (const ch of text) {
204
+ if (ch === '\n') {
205
+ consecutiveNewlines++;
206
+ if (consecutiveNewlines <= 2)
207
+ out += ch;
208
+ }
209
+ else {
210
+ consecutiveNewlines = 0;
211
+ out += ch;
212
+ }
176
213
  }
214
+ if (out.length === 0)
215
+ return;
216
+ lastCharWasNewline = out.endsWith('\n');
217
+ process.stdout.write(theme.primary(out));
218
+ fullText += out;
177
219
  }
178
- if (out.length === 0)
179
- return;
180
- lastCharWasNewline = out.endsWith('\n');
181
- process.stdout.write(theme.primary(out));
182
- fullText += out;
183
- }
184
- // Suppress terminal echo while we stream so mid-stream keystrokes
185
- // don't interleave with the model's output. Restored in `finally`.
186
- const inputGuard = suppressInputDuringStream();
187
- // We're about to wait on the API; tell the status singleton so a blind
188
- // user pressing F1 hears "calling claude-sonnet-4, 6 seconds elapsed"
189
- // instead of a stale "idle".
190
- setStatus({ state: 'streaming' });
191
- try {
192
- for await (const event of streamChat(ctx.config, apiMessages, ALL_TOOLS)) {
193
- if (event.type === 'thinking' && event.content) {
194
- // showThinking defaults to true; only off when explicitly disabled.
195
- if (ctx.config.showThinking !== false) {
196
- if (!thinkingActive) {
197
- printThinkingOpen();
198
- thinkingActive = true;
220
+ // (inputGuard is now lifted to runQuery scope — see above. It spans
221
+ // both streaming and tool execution, with pause/resume around the
222
+ // permission prompts inside executeToolCalls.)
223
+ // We're about to wait on the API; tell the status singleton so a blind
224
+ // user pressing F1 hears "calling claude-sonnet-4, 6 seconds elapsed"
225
+ // instead of a stale "idle".
226
+ setStatus({ state: 'streaming' });
227
+ try {
228
+ for await (const event of streamChat(ctx.config, apiMessages, ALL_TOOLS)) {
229
+ if (event.type === 'thinking' && event.content) {
230
+ // showThinking defaults to true; only off when explicitly disabled.
231
+ if (ctx.config.showThinking !== false) {
232
+ if (!thinkingActive) {
233
+ printThinkingOpen();
234
+ thinkingActive = true;
235
+ }
236
+ printThinkingText(event.content);
199
237
  }
200
- printThinkingText(event.content);
201
238
  }
202
- }
203
- else if (event.type === 'text' && event.content) {
204
- if (thinkingActive) {
205
- printThinkingClose();
206
- thinkingActive = false;
239
+ else if (event.type === 'text' && event.content) {
240
+ if (thinkingActive) {
241
+ printThinkingClose();
242
+ thinkingActive = false;
243
+ }
244
+ if (!hasOutput) {
245
+ hasOutput = true;
246
+ // First token arrived; promote status so F1 reports "receiving"
247
+ // rather than the still-waiting "streaming" message.
248
+ setStatus({ state: 'responding' });
249
+ }
250
+ writeStreamText(event.content);
207
251
  }
208
- if (!hasOutput) {
209
- hasOutput = true;
210
- // First token arrived; promote status so F1 reports "receiving"
211
- // rather than the still-waiting "streaming" message.
212
- setStatus({ state: 'responding' });
252
+ else if (event.type === 'tool_call') {
253
+ toolCalls = event.toolCalls;
254
+ }
255
+ else if (event.type === 'done') {
256
+ if (event.usage) {
257
+ const u = event.usage;
258
+ const { cost, warning } = trackUsage(ctx.sessionId, ctx.config.model, u.prompt, u.completion);
259
+ // Single newline separator if we just streamed text, then the
260
+ // compact telemetry line.
261
+ if (hasOutput && !lastCharWasNewline)
262
+ process.stdout.write('\n');
263
+ printCost(u.prompt, u.completion, cost, warning, Date.now() - turnStart);
264
+ }
213
265
  }
214
- writeStreamText(event.content);
215
266
  }
216
- else if (event.type === 'tool_call') {
217
- toolCalls = event.toolCalls;
267
+ }
268
+ catch (err) {
269
+ const msg = err instanceof Error ? err.message : String(err);
270
+ // Always close the streaming line first so the error doesn't glue to text.
271
+ if (hasOutput && !lastCharWasNewline)
272
+ process.stdout.write('\n');
273
+ // ── Auto-fallback path ─────────────────────────────────
274
+ // Categorize the error. If it's "unknown" (the provider returned a
275
+ // cryptic empty error like "ERROR" or "Provider returned error" that
276
+ // matches no specific pattern) AND we have a fallbackModel configured
277
+ // AND we haven't already used it, swap models and silently retry the
278
+ // same turn. This rescues users from broken free models without them
279
+ // having to manually /clear and /model switch.
280
+ const cat = categorizeApiError(msg, {
281
+ baseURL: ctx.config.baseURL,
282
+ provider: ctx.config.provider,
283
+ model: ctx.config.model,
284
+ });
285
+ const canFallback = cat.category === 'unknown'
286
+ && ctx.config.fallbackModel
287
+ && ctx.config.fallbackModel !== ctx.config.model
288
+ && !usedFallbackModel;
289
+ if (canFallback) {
290
+ usedFallbackModel = true;
291
+ const failedModel = ctx.config.model;
292
+ const fallback = ctx.config.fallbackModel;
293
+ ctx.config.model = fallback;
294
+ resetClient();
295
+ console.log(theme.warning(` ${sym.warn} ${failedModel} returned a cryptic provider error — retrying once with fallback model ${fallback}.`));
296
+ console.log(theme.dim(' (configure a different fallback with: /fallback <model-id>)'));
297
+ turns--; // this retry doesn't burn a turn slot from the max-turns budget
298
+ continue;
218
299
  }
219
- else if (event.type === 'done') {
220
- if (event.usage) {
221
- const u = event.usage;
222
- const { cost, warning } = trackUsage(ctx.sessionId, ctx.config.model, u.prompt, u.completion);
223
- // Single newline separator if we just streamed text, then the
224
- // compact telemetry line.
225
- if (hasOutput && !lastCharWasNewline)
226
- process.stdout.write('\n');
227
- printCost(u.prompt, u.completion, cost, warning, Date.now() - turnStart);
300
+ printApiError(msg, {
301
+ baseURL: ctx.config.baseURL,
302
+ provider: ctx.config.provider,
303
+ model: ctx.config.model,
304
+ });
305
+ // Voice: announce errors aloud for screen-reader users
306
+ if (isVoiceEnabled(ctx.config) && getAccessibilityConfig(ctx.config).announceErrors) {
307
+ const tts = getTtsConfig(ctx.config);
308
+ if (tts.apiKey) {
309
+ // Keep it terse — one short sentence — to avoid burning quota on
310
+ // long stack traces. The error pretty-printer already showed the
311
+ // categorized version to the screen-reader.
312
+ speak(`API error: ${msg.slice(0, 120)}`, ctx.config, { voiceId: tts.assistantVoiceId }).catch(() => { });
313
+ }
314
+ if (getAccessibilityConfig(ctx.config).audioCues) {
315
+ audioCue('error').catch(() => { });
228
316
  }
229
317
  }
318
+ ctx.messages.push({ role: 'assistant', content: `[API error: ${msg}]` });
319
+ break;
230
320
  }
231
- }
232
- catch (err) {
233
- const msg = err instanceof Error ? err.message : String(err);
234
- // Always close the streaming line first so the error doesn't glue to text.
235
- if (hasOutput && !lastCharWasNewline)
321
+ if (hasOutput && !lastCharWasNewline) {
236
322
  process.stdout.write('\n');
237
- printApiError(msg, {
238
- baseURL: ctx.config.baseURL,
239
- provider: ctx.config.provider,
240
- model: ctx.config.model,
241
- });
242
- // Voice: announce errors aloud for screen-reader users
243
- if (isVoiceEnabled(ctx.config) && getAccessibilityConfig(ctx.config).announceErrors) {
244
- const tts = getTtsConfig(ctx.config);
245
- if (tts.apiKey) {
246
- // Keep it terse — one short sentence — to avoid burning quota on
247
- // long stack traces. The error pretty-printer already showed the
248
- // categorized version to the screen-reader.
249
- speak(`API error: ${msg.slice(0, 120)}`, ctx.config, { voiceId: tts.assistantVoiceId }).catch(() => { });
250
- }
251
- if (getAccessibilityConfig(ctx.config).audioCues) {
252
- audioCue('error').catch(() => { });
253
- }
254
323
  }
255
- ctx.messages.push({ role: 'assistant', content: `[API error: ${msg}]` });
256
- inputGuard.restore();
257
- break;
324
+ // Save assistant message
325
+ const assistantMsg = { role: 'assistant', content: fullText || null };
326
+ if (toolCalls && toolCalls.length > 0) {
327
+ assistantMsg.tool_calls = toolCalls;
328
+ }
329
+ ctx.messages.push(assistantMsg);
330
+ // Accumulate visible assistant text for chain-end TTS. We don't TTS
331
+ // mid-chain because the model often emits short bridging sentences
332
+ // between tool calls — speaking each one is noisy and slow.
333
+ if (fullText)
334
+ accumulatedAssistantText += (accumulatedAssistantText ? '\n\n' : '') + fullText;
335
+ // If no tool calls, we're done
336
+ if (!toolCalls || toolCalls.length === 0)
337
+ break;
338
+ // Execute tool calls — executeToolCalls itself flips per-tool state
339
+ // and uses inputGuard.pause()/resume() around each permission prompt
340
+ // so rl.question() can read user input even though suppression is on
341
+ // for the rest of the chain.
342
+ const toolResults = await executeToolCalls(toolCalls, ctx, inputGuard);
343
+ ctx.messages.push(...toolResults);
258
344
  }
259
- inputGuard.restore();
260
- if (hasOutput && !lastCharWasNewline) {
261
- process.stdout.write('\n');
345
+ // Chain ended; back to idle so F1 reports the correct state.
346
+ setStatus({ state: 'idle' });
347
+ // ── Voice: read the assistant's final response ────────────
348
+ // Off the hot path — fire-and-forget so the next prompt appears
349
+ // immediately. The playback runs in background; F2 pauses, F4 skips.
350
+ if (isVoiceEnabled(ctx.config) && accumulatedAssistantText.trim()) {
351
+ const tts = getTtsConfig(ctx.config);
352
+ if (tts.apiKey) {
353
+ const a = getAccessibilityConfig(ctx.config);
354
+ let toRead = accumulatedAssistantText;
355
+ // If the response is long, abbreviate via cheap heuristic summary so
356
+ // blind users aren't forced to listen to 800 words. They can press
357
+ // F3 (replay) on chunks or ask "give me the full version" verbally.
358
+ const words = countWords(toRead);
359
+ if (words >= a.longResponseThreshold) {
360
+ toRead = summarize(toRead, a.longResponseThreshold);
361
+ }
362
+ // Register an abort controller + last-chunk + last-full-response
363
+ // globally so the hotkey handler in index.ts can cancel / replay.
364
+ // - __voiceLastChunk drives PGUP "replay last chunk"
365
+ // - __voiceLastFullResponse drives F3 "read full" + F4 "read summary"
366
+ const g = globalThis;
367
+ const ctl = new AbortController();
368
+ g.__voicePlaybackCtl = ctl;
369
+ g.__voiceLastChunk = toRead;
370
+ g.__voiceLastFullResponse = accumulatedAssistantText;
371
+ speakAssistantResponse(toRead, ctx.config, ctl.signal).catch(() => { });
372
+ }
262
373
  }
263
- // Save assistant message
264
- const assistantMsg = { role: 'assistant', content: fullText || null };
265
- if (toolCalls && toolCalls.length > 0) {
266
- assistantMsg.tool_calls = toolCalls;
374
+ if (turns >= maxTurns) {
375
+ console.log(theme.warning(`\n ${sym.warn} reached max turns limit`));
267
376
  }
268
- ctx.messages.push(assistantMsg);
269
- // Accumulate visible assistant text for chain-end TTS. We don't TTS
270
- // mid-chain because the model often emits short bridging sentences
271
- // between tool calls speaking each one is noisy and slow.
272
- if (fullText)
273
- accumulatedAssistantText += (accumulatedAssistantText ? '\n\n' : '') + fullText;
274
- // If no tool calls, we're done
275
- if (!toolCalls || toolCalls.length === 0)
276
- break;
277
- // Execute tool calls — executeToolCalls itself flips per-tool state
278
- const toolResults = await executeToolCalls(toolCalls, ctx);
279
- ctx.messages.push(...toolResults);
280
- }
281
- // Chain ended; back to idle so F1 reports the correct state.
282
- setStatus({ state: 'idle' });
283
- // ── Voice: read the assistant's final response ────────────
284
- // Off the hot path — fire-and-forget so the next prompt appears
285
- // immediately. The playback runs in background; F2 pauses, F4 skips.
286
- if (isVoiceEnabled(ctx.config) && accumulatedAssistantText.trim()) {
287
- const tts = getTtsConfig(ctx.config);
288
- if (tts.apiKey) {
289
- const a = getAccessibilityConfig(ctx.config);
290
- let toRead = accumulatedAssistantText;
291
- // If the response is long, abbreviate via cheap heuristic summary so
292
- // blind users aren't forced to listen to 800 words. They can press
293
- // F3 (replay) on chunks or ask "give me the full version" verbally.
294
- const words = countWords(toRead);
295
- if (words >= a.longResponseThreshold) {
296
- toRead = summarize(toRead, a.longResponseThreshold);
297
- }
298
- // Register an abort controller + last-chunk + last-full-response
299
- // globally so the hotkey handler in index.ts can cancel / replay.
300
- // - __voiceLastChunk drives PGUP "replay last chunk"
301
- // - __voiceLastFullResponse drives F3 "read full" + F4 "read summary"
302
- const g = globalThis;
303
- const ctl = new AbortController();
304
- g.__voicePlaybackCtl = ctl;
305
- g.__voiceLastChunk = toRead;
306
- g.__voiceLastFullResponse = accumulatedAssistantText;
307
- speakAssistantResponse(toRead, ctx.config, ctl.signal).catch(() => { });
377
+ // Chain-elapsed summary. One line per response chain (user msg → assistant
378
+ // ending without a tool call), printed regardless of how many tool-call
379
+ // iterations the chain took. Lets the user see how long that whole
380
+ // exchange took, separate from per-turn cost timings.
381
+ const chainMs = Date.now() - chainStart;
382
+ // Only show if there was meaningful work — multi-second chains. Sub-second
383
+ // chains (slash command rejects, instant returns) don't need a chain line.
384
+ if (chainMs > 1500) {
385
+ console.log(theme.dim(` chain ${formatDuration(chainMs)} · ${turns} ${turns === 1 ? 'turn' : 'turns'}`));
308
386
  }
309
387
  }
310
- if (turns >= maxTurns) {
311
- console.log(theme.warning(`\n ${sym.warn} reached max turns limit`));
312
- }
313
- // Chain-elapsed summary. One line per response chain (user msg → assistant
314
- // ending without a tool call), printed regardless of how many tool-call
315
- // iterations the chain took. Lets the user see how long that whole
316
- // exchange took, separate from per-turn cost timings.
317
- const chainMs = Date.now() - chainStart;
318
- // Only show if there was meaningful work — multi-second chains. Sub-second
319
- // chains (slash command rejects, instant returns) don't need a chain line.
320
- if (chainMs > 1500) {
321
- console.log(theme.dim(` chain ${formatDuration(chainMs)} · ${turns} ${turns === 1 ? 'turn' : 'turns'}`));
388
+ finally {
389
+ inputGuard.restore();
322
390
  }
323
391
  }
324
- async function executeToolCalls(toolCalls, ctx) {
392
+ async function executeToolCalls(toolCalls, ctx, inputGuard) {
325
393
  const results = [];
326
394
  for (const tc of toolCalls) {
327
395
  const toolName = tc.function.name;
@@ -409,7 +477,18 @@ async function executeToolCalls(toolCalls, ctx) {
409
477
  }
410
478
  }
411
479
  // ── Permission check ──────────────────────────────────
412
- const allowed = await checkPermission(tool, input, ctx.config, ctx.rl);
480
+ // Pause input suppression so rl.question() can read the user's
481
+ // Y/n/always response — without this, readline's keypress listener is
482
+ // detached and the prompt would hang forever. Re-suppress immediately
483
+ // after so any typing during the next tool's execution is blocked.
484
+ inputGuard.pause();
485
+ let allowed;
486
+ try {
487
+ allowed = await checkPermission(tool, input, ctx.config, ctx.rl);
488
+ }
489
+ finally {
490
+ inputGuard.resume();
491
+ }
413
492
  if (!allowed) {
414
493
  console.log(theme.warning(` ${sym.warn} Denied: ${toolName}`));
415
494
  results.push({