@semalt-ai/code 1.8.3 → 1.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/agent.js CHANGED
@@ -3,11 +3,16 @@
3
3
  const { logToolCall } = require('./audit');
4
4
  const { Metrics } = require('./metrics');
5
5
  const { getSystemPrompt } = require('./prompts');
6
+ const { isNativeToolsActive } = require('./config');
6
7
  const { TAG_REGISTRY } = require('./constants');
7
8
  const { mapInvokeToCall } = require('./tools');
9
+ const { TOOL_SPECS } = require('./tool_specs');
8
10
  const { UI_THEME } = require('./ui/theme');
9
11
  const { RST } = require('./ui/ansi');
10
12
  const { getCols: _getCols, repeatToWidth } = require('./ui/utils');
13
+ const writer = require('./ui/writer');
14
+ const messages = require('./ui/messages');
15
+ const dbg = require('./debug');
11
16
 
12
17
  class StreamParser {
13
18
  constructor(onToken, onTagOpen, onTagContent, onTagClose) {
@@ -153,7 +158,33 @@ function estimateTokens(text) {
153
158
  return Math.floor((text || '').length / 4);
154
159
  }
155
160
 
156
- function detectFormat(reply, toolCalls) {
161
+ // User-initiated aborts surface through several shapes depending on where in
162
+ // the Node http stack the signal fires: `new Error('Aborted')` from our own
163
+ // abort paths in api.js, or AbortError/ABORT_ERR from Node's built-ins. The
164
+ // authoritative check is the signal itself — this helper is the fallback.
165
+ function isAbortError(err) {
166
+ if (!err) return false;
167
+ if (err.name === 'AbortError') return true;
168
+ if (err.code === 'ABORT_ERR' || err.code === 'ERR_ABORTED') return true;
169
+ if (typeof err.message === 'string' && /^Aborted$/i.test(err.message)) return true;
170
+ return false;
171
+ }
172
+
173
+ function abortableSleep(ms, signal) {
174
+ return new Promise((resolve) => {
175
+ if (signal && signal.aborted) { resolve(); return; }
176
+ const t = setTimeout(resolve, ms);
177
+ if (signal) {
178
+ signal.addEventListener('abort', () => {
179
+ clearTimeout(t);
180
+ resolve();
181
+ }, { once: true });
182
+ }
183
+ });
184
+ }
185
+
186
+ function detectFormat(reply, toolCalls, nativeToolCalls) {
187
+ if (nativeToolCalls && nativeToolCalls.length > 0) return 'native_tool_calls';
157
188
  if (!reply || !reply.trim()) return 'empty';
158
189
  if (/<(minimax:tool_call|qwen:tool_call|tool_call|function_call)\b/i.test(reply)) return 'tool_call';
159
190
  if (toolCalls && toolCalls.length > 0) return 'command';
@@ -195,6 +226,26 @@ function previewCommand(call) {
195
226
  return trimmed ? `<${tag}> ${trimmed}` : `<${tag}>`;
196
227
  }
197
228
 
229
+ // Classify why mapInvokeToCall returned null for a native tool_call so the
230
+ // debug block (and the corrective retry hint) can surface the specific cause
231
+ // instead of a generic "unknown name or invalid args". Source of truth is
232
+ // TOOL_SPECS — its `required` array tells us which positional args the
233
+ // native API advertised, and `wrapper:true` flags parser envelopes that
234
+ // must never appear as a model-emitted tool name.
235
+ function describeNativeRejection(toolName, params) {
236
+ const lowerName = (toolName || '').toLowerCase();
237
+ const spec = TOOL_SPECS[lowerName];
238
+ if (!spec || spec.wrapper) {
239
+ return 'unknown name (not in TOOL_SPECS / not supported by mapInvokeToCall)';
240
+ }
241
+ const required = (spec.parameters && spec.parameters.required) || [];
242
+ const missing = required.filter((r) => params[r] === undefined || params[r] === null);
243
+ if (missing.length > 0) {
244
+ return `missing required arg: ${missing.join(', ')}`;
245
+ }
246
+ return 'mapInvokeToCall returned null without specific reason';
247
+ }
248
+
198
249
  function formatDebugBlock(sections) {
199
250
  // The debug block is rendered as a tool-output message in the TUI. Chat
200
251
  // history indents output by 5 cols; account for that so the frame still
@@ -281,7 +332,117 @@ function truncateForDebug(text, maxLines = 40, maxChars = 2000) {
281
332
  return s;
282
333
  }
283
334
 
284
- function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agentExecFile, ui, getConfig }) {
335
+ // Per-tag meta extractor. Converts a tool-executor return value into the
336
+ // compact meta object consumed by the tool-line formatter — exit codes for
337
+ // shell, byte counts for file ops, status_code + bytes for HTTP, etc. A
338
+ // pure function by design: no UI state, no config reads. The callback
339
+ // layer (commands.js) feeds the meta into formatToolLine together with
340
+ // the tag, so the formatter can produce the 4-segment line in either the
341
+ // pending (live region) or final (scrollback) context.
342
+ function _metaForTool(tag, result) {
343
+ if (!result || result.error) return null;
344
+ switch (tag) {
345
+ case 'shell':
346
+ case 'exec':
347
+ return { exit_code: result.exit_code };
348
+ case 'read':
349
+ case 'read_file':
350
+ return {
351
+ bytes: typeof result.bytes === 'number'
352
+ ? result.bytes
353
+ : (result.content ? Buffer.byteLength(String(result.content), 'utf8') : 0),
354
+ };
355
+ case 'write':
356
+ case 'write_file':
357
+ case 'create_file':
358
+ case 'append':
359
+ case 'append_file':
360
+ case 'upload':
361
+ return { bytes: typeof result.bytes === 'number' ? result.bytes : 0 };
362
+ case 'list_dir':
363
+ return { count: Array.isArray(result.items) ? result.items.length : 0 };
364
+ case 'search_files':
365
+ return { count: Array.isArray(result.files) ? result.files.length : 0 };
366
+ case 'search_in_file':
367
+ return { count: Array.isArray(result.matches) ? result.matches.length : 0 };
368
+ case 'replace_in_file':
369
+ return { count: typeof result.count === 'number' ? result.count : 0 };
370
+ case 'http_get':
371
+ case 'download':
372
+ return {
373
+ status_code: result.status_code,
374
+ bytes: typeof result.bytes === 'number'
375
+ ? result.bytes
376
+ : (result.body ? Buffer.byteLength(String(result.body), 'utf8') : 0),
377
+ };
378
+ case 'file_stat':
379
+ return {
380
+ bytes: result.size_kb ? Math.round(parseFloat(result.size_kb) * 1024) : 0,
381
+ kind: result.type || null,
382
+ };
383
+ default:
384
+ return null;
385
+ }
386
+ }
387
+
388
+ // Turn a [action, arg1, arg2, …] call tuple into the `attrs` bag that
389
+ // formatToolLine looks up when building the operation string. Centralized
390
+ // here so the per-tag positional-arg contract is written down in exactly
391
+ // one place — any new tool added to the agent-loop tuple schema also gets
392
+ // its attrs mapping here.
393
+ function _attrsFromCall(call) {
394
+ if (!Array.isArray(call) || call.length === 0) return {};
395
+ const [tag, ...args] = call;
396
+ switch (tag) {
397
+ case 'shell':
398
+ case 'exec':
399
+ return { command: args[0] || '' };
400
+ case 'read':
401
+ case 'read_file':
402
+ case 'list_dir':
403
+ case 'delete_file':
404
+ case 'make_dir':
405
+ case 'remove_dir':
406
+ case 'file_stat':
407
+ return { path: args[0] || '' };
408
+ case 'write':
409
+ case 'write_file':
410
+ case 'create_file':
411
+ case 'append':
412
+ case 'append_file':
413
+ return { path: args[0] || '', content: args[1] || '' };
414
+ case 'upload':
415
+ return { path: args[0] || '' };
416
+ case 'move_file':
417
+ case 'copy_file':
418
+ return { src: args[0] || '', dst: args[1] || '' };
419
+ case 'edit_file':
420
+ return { path: args[0] || '', line: args[1], content: args[2] || '' };
421
+ case 'search_files':
422
+ return { pattern: args[0] || '', dir: args[1] || '.' };
423
+ case 'search_in_file':
424
+ return { path: args[0] || '', pattern: args[1] || '' };
425
+ case 'replace_in_file':
426
+ return { path: args[0] || '', search: args[1] || '', replace: args[2] || '', flags: args[3] || '' };
427
+ case 'get_env':
428
+ return { name: args[0] || '' };
429
+ case 'set_env':
430
+ return { name: args[0] || '', value: args[1] || '' };
431
+ case 'download':
432
+ case 'http_get':
433
+ return { url: args[0] || '' };
434
+ case 'ask_user':
435
+ return { question: args[0] || '' };
436
+ case 'store_memory':
437
+ return { key: args[0] || '', value: args[1] || '' };
438
+ case 'recall_memory':
439
+ return { key: args[0] || '' };
440
+ default:
441
+ return {};
442
+ }
443
+ }
444
+
445
+ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agentExecFile, describePermission, permissionManager, ui, getConfig }) {
285
446
  const { BOLD, FG_DARK, FG_GRAY, FG_TEAL, FG_YELLOW, RST, THEME, getCols } = ui;
286
447
 
287
448
  function formatFileResult(call, result) {
@@ -405,8 +566,7 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
405
566
  }
406
567
  case 'http_get': {
407
568
  const url = attrs.url || content;
408
- const raw = attrs.raw || '';
409
- return formatFileResult(['http_get', url, raw], await agentExecFile('http_get', url, raw));
569
+ return formatFileResult(['http_get', url], await agentExecFile('http_get', url));
410
570
  }
411
571
  case 'ask_user': {
412
572
  const q = attrs.question || content;
@@ -461,30 +621,26 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
461
621
  const metrics = new Metrics(tokenLimit);
462
622
  const mode = overrideMode || 'system_role';
463
623
 
464
- // Route debug blocks to the UI callback when present (interactive TUI mode
465
- // overwrites stderr with redraws, losing the output). Fall back to stderr
466
- // for one-shot/non-TTY flows where there's no UI to host the block.
624
+ // Route debug blocks based on debug mode.
625
+ // file mode — write to the debug file. Never touch the TUI.
626
+ // simple mode UI callback when present (chat-bubble in interactive
627
+ // TUI), fall back to stderr for one-shot/non-TTY flows.
628
+ // off mode — discard. (debug=true can also come from in-chat /debug
629
+ // toggle with no global mode active.)
467
630
  const emitDebug = (block) => {
631
+ if (dbg.isFile()) {
632
+ dbg.log(block);
633
+ return;
634
+ }
468
635
  if (typeof cb.onDebug === 'function') cb.onDebug(block);
636
+ // audit: allowed — stderr debug under --debug flag (no UI hosting available).
469
637
  else process.stderr.write('\n' + block + '\n');
470
638
  };
471
639
 
472
- // Resolve native_tools from the active profile (matched by api_base+model).
473
- // Fallback to true if no matching profile — mirrors config-normalization default.
474
- const _cfg = typeof getConfig === 'function' ? getConfig() : {};
475
- const _profile = Array.isArray(_cfg.models)
476
- ? _cfg.models.find((p) => p && p.api_base === _cfg.api_base && p.model === model)
477
- : null;
478
- const nativeTools = _profile && _profile.native_tools === false ? false : true;
640
+ const nativeTools = isNativeToolsActive(model);
479
641
 
480
642
  const activeSystemPrompt = overrideSystemPrompt !== null ? overrideSystemPrompt : getSystemPrompt(nativeTools);
481
643
 
482
- // Response contract: every model response must end with a tool call or
483
- // <final_answer>...</final_answer>. Anything else is degraded — push a
484
- // synthetic nudge and retry, capped to prevent runaway loops.
485
- const MAX_DEGRADED_RETRIES = 2;
486
- let degradedRetries = 0;
487
-
488
644
  for (let iteration = 0; iteration < maxIterations; iteration++) {
489
645
  if (isAborted()) break;
490
646
  const linePrefix = `${FG_TEAL}${BOLD}◆ ${RST}`;
@@ -582,6 +738,14 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
582
738
  lastApiErr = null;
583
739
  break;
584
740
  } catch (err) {
741
+ // User-initiated abort: not a transient failure. Skip the retry
742
+ // counter, the "Retrying (N/M)..." status update, the debug dump,
743
+ // and the post-loop error surface. The "Interrupted." feedback is
744
+ // already shown by the input-field abort listener.
745
+ if (controller.signal.aborted || isAborted() || isAbortError(err)) {
746
+ lastApiErr = null;
747
+ break;
748
+ }
585
749
  lastApiErr = err;
586
750
  if (debug) {
587
751
  const status = err.statusCode ? `HTTP ${err.statusCode}` : 'network error';
@@ -621,13 +785,25 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
621
785
  }
622
786
  }
623
787
  cb.onRetry?.(attempt + 1, MAX_RETRIES);
624
- await new Promise((r) => setTimeout(r, delayMs));
788
+ await abortableSleep(delayMs, controller.signal);
789
+ // Ctrl+C pressed during backoff: bail without the next attempt.
790
+ if (controller.signal.aborted || isAborted()) {
791
+ lastApiErr = null;
792
+ break;
793
+ }
625
794
  }
626
795
  }
627
796
  } finally {
628
797
  clearInterval(abortWatcher);
629
798
  }
630
799
 
800
+ // User-initiated abort: exit the turn quietly. Skip the empty-reply
801
+ // "connection dropped" warning below — the abort listener already
802
+ // surfaced "Interrupted." and the outer prompt will return.
803
+ if (controller.signal.aborted || isAborted()) {
804
+ break;
805
+ }
806
+
631
807
  if (lastApiErr) {
632
808
  if (cb.onError) cb.onError(lastApiErr);
633
809
  break;
@@ -652,13 +828,7 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
652
828
  if (cb.onError) {
653
829
  cb.onError({ message: warnMsg, isWarning: true });
654
830
  } else {
655
- // Non-TUI fallback (cb.onError is unset only for one-shot CLI
656
- // commands like `cmdCode`, which don't run the shared live-region
657
- // writer). Direct stdout write is safe here: no status-bar timer
658
- // or bubble renderer is competing for stdout.
659
- process.stdout.write(
660
- `\n ${THEME.warn}⚠ ${warnMsg}${THEME.reset}\n`
661
- );
831
+ messages.sysWarn(warnMsg);
662
832
  }
663
833
  }
664
834
 
@@ -708,20 +878,35 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
708
878
  const nativeToolCalls = Array.isArray(result?.toolCalls) ? result.toolCalls : [];
709
879
  let toolCalls;
710
880
  let nativeToolCallIds = [];
881
+ // Per-call rejection records for native tool_calls that could not be
882
+ // converted to executable form (parse error or unknown name / missing
883
+ // required arg). Used downstream to (a) keep the assistant's tool_calls
884
+ // ↔ tool-result map consistent, and (b) feed a corrective hint back to
885
+ // the model so it retries instead of stalling.
886
+ const nativeRejections = [];
711
887
  if (nativeToolCalls.length > 0) {
712
888
  toolCalls = [];
713
889
  for (const tc of nativeToolCalls) {
890
+ const fnName = tc.function?.name || '(unknown)';
891
+ const argsRaw = tc.function?.arguments || '';
892
+ const argsPreview = argsRaw.length > 200 ? argsRaw.slice(0, 200) + '…' : argsRaw;
714
893
  let args;
715
894
  try {
716
- args = tc.function?.arguments ? JSON.parse(tc.function.arguments) : {};
895
+ args = argsRaw ? JSON.parse(argsRaw) : {};
717
896
  } catch (err) {
718
- if (cb.onError) cb.onError({ message: `Failed to parse tool_call arguments for ${tc.function?.name || '(unknown)'}: ${err.message}`, isWarning: true });
897
+ const reason = `JSON parse failed: ${err.message}`;
898
+ if (cb.onError) cb.onError({ message: `${fnName}: ${reason} Args: ${argsPreview}`, isWarning: true });
899
+ nativeRejections.push({ id: tc.id, name: fnName, argsPreview, reason });
719
900
  continue;
720
901
  }
721
- const call = mapInvokeToCall(tc.function?.name, args);
902
+ const call = mapInvokeToCall(fnName, args);
722
903
  if (call) {
723
904
  toolCalls.push(call);
724
905
  nativeToolCallIds.push(tc.id);
906
+ } else {
907
+ const reason = describeNativeRejection(fnName, args);
908
+ if (cb.onError) cb.onError({ message: `${fnName}: ${reason} Args: ${argsPreview}`, isWarning: true });
909
+ nativeRejections.push({ id: tc.id, name: fnName, argsPreview, reason });
725
910
  }
726
911
  }
727
912
  } else {
@@ -729,9 +914,6 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
729
914
  }
730
915
  const isNativeCall = nativeToolCalls.length > 0;
731
916
  const cleanedReply = cleanAssistantContent(reply);
732
- // Protocol contract: a valid response ends with a tool call OR a
733
- // <final_answer>...</final_answer> block. Anything else is degraded.
734
- const hasFinal = /<final_answer\b[\s\S]*?<\/final_answer>/i.test(reply);
735
917
 
736
918
  if (debug && result) {
737
919
  const lastUserMsg = [...messagesWithSystem].reverse().find((m) => m.role === 'user');
@@ -752,17 +934,27 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
752
934
  const visibleTokens = Math.max(completionTokens - thinkingTokens, 0);
753
935
  const contextLimit = tokenLimit || null;
754
936
  const ctxPct = contextLimit ? Math.round((promptTokens / contextLimit) * 100) : null;
755
- const detected = detectFormat(reply, toolCalls);
937
+ const detected = detectFormat(reply, toolCalls, nativeToolCalls);
756
938
  const firstCmd = toolCalls.length > 0 ? previewCommand(toolCalls[0]) : previewCommand(null);
757
939
  const toolTags = Object.entries(TAG_REGISTRY)
758
940
  .filter(([, e]) => e.type === 'tool')
759
941
  .map(([t]) => t);
942
+ const callableSpecCount = Object.values(TOOL_SPECS).filter((s) => !s.wrapper).length;
760
943
 
761
944
  const warnings = [];
762
945
  if (result.finish_reason === 'length') warnings.push('finish_reason=length → response truncated, increase max_tokens');
763
946
  if (detected === 'tool_call' && toolCalls.length === 0) {
764
947
  warnings.push('commands_found=0 → agent emitted no command, client will stall');
765
948
  }
949
+ if (detected === 'native_tool_calls' && toolCalls.length === 0) {
950
+ const lines = [`commands_found=0 → all ${nativeToolCalls.length} native tool_call(s) rejected:`];
951
+ for (const r of nativeRejections) {
952
+ lines.push(` • name="${r.name}"`);
953
+ lines.push(` args=${r.argsPreview || '(empty)'}`);
954
+ lines.push(` reason=${r.reason}`);
955
+ }
956
+ warnings.push(lines.join('\n'));
957
+ }
766
958
  if (ctxPct !== null && ctxPct > 80) warnings.push(`context_used=${ctxPct}% → approaching context limit`);
767
959
 
768
960
  const block = formatDebugBlock({
@@ -788,7 +980,9 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
788
980
  ['temperature:', result.request?.temperature ?? '(default)'],
789
981
  ['stop_sequences:', JSON.stringify(result.request?.stop || [])],
790
982
  ['reasoning_effort:', '(n/a)'],
791
- ['tools_enabled:', `${toolTags.length} XML tags (via system prompt)`],
983
+ ['tools_enabled:', nativeTools
984
+ ? `${callableSpecCount} functions (via tools API)`
985
+ : `${toolTags.length} XML tags (via system prompt)`],
792
986
  ]],
793
987
  ['RESPONSE', [
794
988
  ['finish_reason:', result.finish_reason || '(unknown)'],
@@ -838,7 +1032,13 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
838
1032
  }
839
1033
 
840
1034
  const assistantMsg = { role: 'assistant', content: cleanedReply };
841
- if (isNativeCall) assistantMsg.tool_calls = nativeToolCalls;
1035
+ // Only attach tool_calls for the calls we actually accepted. Attaching
1036
+ // rejected calls here would leave them without matching `tool` results
1037
+ // on the next turn — strict providers reject the resulting history.
1038
+ if (isNativeCall && nativeToolCallIds.length > 0) {
1039
+ const acceptedSet = new Set(nativeToolCallIds);
1040
+ assistantMsg.tool_calls = nativeToolCalls.filter((tc) => acceptedSet.has(tc.id));
1041
+ }
842
1042
  messages.push(assistantMsg);
843
1043
  // When showThink is off and the turn has tool calls, suppress the text bubble —
844
1044
  // pre-tool reasoning is noise, tool result bubbles already convey what happened.
@@ -846,6 +1046,29 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
846
1046
  if (cb.onAssistantMessage) cb.onAssistantMessage(displayReply);
847
1047
 
848
1048
  if (toolCalls.length === 0) {
1049
+ // Native mode: tool_calls came in but none could be converted (parse
1050
+ // error or unknown name / missing required arg). Push a corrective
1051
+ // user hint so the model retries instead of stalling. Without this
1052
+ // the loop would break silently — that's the bug the migration set
1053
+ // out to fix.
1054
+ if (isNativeCall && nativeRejections.length > 0) {
1055
+ const summary = nativeRejections
1056
+ .map((r) => `- ${r.name}: ${r.reason}`)
1057
+ .join('\n');
1058
+ if (cb.onError) {
1059
+ const names = nativeRejections.map((r) => r.name).join(', ');
1060
+ cb.onError({
1061
+ message: `Native tool_call(s) rejected: ${names}. Asking the model to retry with a valid call.`,
1062
+ isWarning: true,
1063
+ });
1064
+ }
1065
+ messages.push({
1066
+ role: 'user',
1067
+ content: `Your last response contained tool_calls that could not be executed:\n\n${summary}\n\nRetry with a valid tool name and complete required arguments per the tools schema.`,
1068
+ });
1069
+ continue;
1070
+ }
1071
+
849
1072
  // Detect malformed known-tag syntax (e.g. <create_file> with no path
850
1073
  // attribute, usually paired with nonsense like <attrs: path=...> inside
851
1074
  // the body). Push a corrective feedback message and keep looping so
@@ -864,121 +1087,172 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
864
1087
  continue;
865
1088
  }
866
1089
 
867
- if (hasFinal) {
868
- // Model declared it is done honor the protocol and terminate.
869
- // An empty <final_answer></final_answer> is the model's choice;
870
- // we don't police content.
871
- degradedRetries = 0;
872
- break;
873
- }
874
-
875
- // Protocol violation: neither a tool call nor a <final_answer>. Nudge
876
- // the model to restate in-protocol, capped to prevent runaway loops.
877
- if (degradedRetries >= MAX_DEGRADED_RETRIES) {
878
- if (cb.onError) {
879
- cb.onError({ message: `Agent violated the response contract after ${MAX_DEGRADED_RETRIES} retries — no tool call or <final_answer> block emitted. Stopping.`, isWarning: false });
880
- }
881
- break;
882
- }
883
- degradedRetries++;
884
- if (cb.onError) {
885
- cb.onError({ message: 'Response missing tool call or <final_answer> — nudging model to retry in-protocol.', isWarning: true });
886
- }
887
- messages.push({
888
- role: 'user',
889
- content: 'Your previous response contained neither a tool call nor a <final_answer> block, which violates the response contract. If you need to perform an action, emit the appropriate tool tag now. If you are done, wrap your reply in <final_answer>...</final_answer>. Do not describe intended actions in prose.',
890
- });
891
- continue;
1090
+ // No tool calls and non-empty content (the empty case was already
1091
+ // handled by the `!reply` guard above). This is the model's final
1092
+ // answer for this turn — end the loop and return control to the user.
1093
+ break;
892
1094
  }
893
- // Non-degraded response (has tool calls) — reset the retry counter.
894
- degradedRetries = 0;
895
1095
  if (isAborted()) break;
896
1096
 
897
1097
  if (!cb.onToolStart) {
898
- // Non-TUI fallback: only one-shot CLI commands leave cb.onToolStart
899
- // unset. The shared live-region writer isn't running, so a direct
900
- // write here can't interleave with a bubble/status redraw.
901
- process.stdout.write(`\n ${FG_TEAL}◆${RST} ${FG_GRAY}Found ${toolCalls.length} action(s) to execute${RST}\n`);
1098
+ writer.scrollback(`\n ${FG_TEAL}◆${RST} ${FG_GRAY}Found ${toolCalls.length} action(s) to execute${RST}`);
902
1099
  }
903
1100
 
904
1101
  const results = [];
905
1102
  const debugEntries = debug ? [] : null;
906
1103
  let aborted = false;
1104
+ // Per-invocation id. Paired across onToolStart/onToolEnd so the UI
1105
+ // layer can track each concurrent tool's activity-region slot and
1106
+ // commit its final line atomically via endActivity. Monotonic —
1107
+ // never reused even if the agent runs the same tag twice.
1108
+ let invocationCounter = 0;
1109
+
1110
+ // Re-arm the abort watcher for the tool-execution phase. The API-call
1111
+ // finally cleared the previous one, so without this a Ctrl+C while a
1112
+ // long shell command is running would never reach the AbortSignal we
1113
+ // now thread into agentExecShell — the child would keep running and
1114
+ // the UI would show "Interrupted" without actually killing anything.
1115
+ const toolAbortWatcher = setInterval(() => {
1116
+ if (isAborted() && !controller.signal.aborted) controller.abort();
1117
+ }, 50);
907
1118
 
908
- for (const call of toolCalls) {
909
- if (isAborted()) { aborted = true; break; }
1119
+ try {
1120
+ for (const call of toolCalls) {
1121
+ if (isAborted()) { aborted = true; break; }
1122
+
1123
+ const tag = call[0] || 'unknown';
1124
+ const arg = call[1] || '';
1125
+ const attrs = _attrsFromCall(call);
1126
+
1127
+ // Permission gate, lifted out of the executors. Asking before
1128
+ // onToolStart fires means the activity bubble (and its 1Hz
1129
+ // ticker) doesn't pre-date grant — and on denial no bubble
1130
+ // appears at all. The picker's own onCloseModal scrollback
1131
+ // line ("✗ <description>") is the visual record of the denial.
1132
+ let permDesc = null;
1133
+ try {
1134
+ permDesc = describePermission ? await describePermission(call) : null;
1135
+ } catch (err) {
1136
+ if (cb.onError) cb.onError({ message: `describePermission(${tag}): ${err.message}`, isWarning: true });
1137
+ }
1138
+ if (permDesc) {
1139
+ if (cb.onPermissionAsk) cb.onPermissionAsk(tag, arg);
1140
+ let approved = true;
1141
+ try {
1142
+ approved = await permissionManager.askPermission(permDesc.actionType, permDesc.description, permDesc.tag);
1143
+ } catch (err) {
1144
+ if (cb.onError) cb.onError({ message: `askPermission(${tag}): ${err.message}`, isWarning: true });
1145
+ approved = false;
1146
+ }
1147
+ if (!approved) {
1148
+ const resultStr = (tag === 'shell' || tag === 'exec')
1149
+ ? `Command \`${arg}\`: Permission denied by user.`
1150
+ : `${tag} ${arg}: Permission denied by user.`;
1151
+ logToolCall(permDesc.tag, { args: call.slice(1) }, false, 'denied');
1152
+ results.push(resultStr);
1153
+ if (debugEntries) debugEntries.push({ tag, call, ms: 0, status: 'denied', exitCode: null, result: resultStr });
1154
+ aborted = true;
1155
+ break;
1156
+ }
1157
+ }
910
1158
 
911
- const tag = call[0] || 'unknown';
912
- const arg = call[1] || '';
913
- const toolStart = Date.now();
1159
+ const toolStart = Date.now();
1160
+ const invocationId = `tool-${iteration}-${invocationCounter++}-${tag}`;
1161
+ const startCtx = { id: invocationId, call, attrs, startedAt: toolStart };
914
1162
 
915
- if (cb.onToolStart) cb.onToolStart(tag, arg);
1163
+ if (cb.onToolStart) cb.onToolStart(tag, arg, startCtx);
916
1164
 
917
- try {
918
- if (tag === 'shell') {
919
- const shellResult = await agentExecShell(arg);
1165
+ try {
1166
+ if (tag === 'shell') {
1167
+ const shellResult = await agentExecShell(arg, { signal: controller.signal });
1168
+ const ms = Date.now() - toolStart;
1169
+ if (shellResult.aborted) {
1170
+ // User pressed Ctrl+C mid-command. The child process tree
1171
+ // has already been terminated by killTreeEscalating in
1172
+ // tools.js. Surface a clear message to the model so it can
1173
+ // plan around the interruption instead of blindly retrying
1174
+ // the same long-running command on the next turn.
1175
+ const elapsedS = shellResult.elapsed_s || 0;
1176
+ const oneLine = String(arg).replace(/\s+/g, ' ').trim();
1177
+ const truncatedCmd = oneLine.length > 80 ? oneLine.slice(0, 77) + '...' : oneLine;
1178
+ const resultStr = `User interrupted execution after ${elapsedS}s. Tool was running: ${truncatedCmd}. Plan around this — do not retry the same long-running command.`;
1179
+ if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms, { id: invocationId, call, attrs, meta: null, error: { message: 'aborted' } });
1180
+ results.push(resultStr);
1181
+ if (debugEntries) debugEntries.push({ tag, call, ms, status: 'aborted', exitCode: null, result: resultStr });
1182
+ aborted = true;
1183
+ break;
1184
+ } else {
1185
+ let out = shellResult.stdout;
1186
+ if (shellResult.stderr) out += `\nSTDERR: ${shellResult.stderr}`;
1187
+ const resultStr = `Command \`${arg}\`:\nExit code: ${shellResult.exit_code}\n${out}`;
1188
+ const meta = _metaForTool(tag, shellResult);
1189
+ const error = shellResult.exit_code !== 0
1190
+ ? { message: `exit ${shellResult.exit_code}`, code: shellResult.exit_code }
1191
+ : null;
1192
+ if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms, { id: invocationId, call, attrs, meta, error });
1193
+ results.push(resultStr);
1194
+ if (debugEntries) debugEntries.push({
1195
+ tag,
1196
+ call,
1197
+ ms,
1198
+ status: shellResult.exit_code === 0 ? 'ok' : 'nonzero_exit',
1199
+ exitCode: shellResult.exit_code,
1200
+ result: resultStr,
1201
+ });
1202
+ }
1203
+ continue;
1204
+ }
1205
+
1206
+ const fileResult = await agentExecFile(...call, { signal: controller.signal });
920
1207
  const ms = Date.now() - toolStart;
921
- if (shellResult.stderr === 'Permission denied by user') {
922
- const resultStr = `Command \`${arg}\`: Permission denied by user.`;
923
- if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms);
1208
+
1209
+ if (fileResult.aborted) {
1210
+ // User pressed Ctrl+C while a file/network tool was running.
1211
+ // The per-tool abort listener has already torn down the in-flight
1212
+ // op (closed the FS read, destroyed the HTTP request, stopped the
1213
+ // recursive walk). Surface a clear note to the model so the next
1214
+ // turn doesn't replay the same long-running operation.
1215
+ const elapsedS = fileResult.elapsed_s || 0;
1216
+ const oneLine = String(arg).replace(/\s+/g, ' ').trim();
1217
+ const truncatedArg = oneLine.length > 80 ? oneLine.slice(0, 77) + '...' : oneLine;
1218
+ const resultStr = `User interrupted execution after ${elapsedS}s. Tool was running: ${tag} ${truncatedArg}. Plan around this — do not retry the same long-running operation.`;
1219
+ if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms, { id: invocationId, call, attrs, meta: null, error: { message: 'aborted' } });
924
1220
  results.push(resultStr);
925
- if (debugEntries) debugEntries.push({ tag, call, ms, status: 'denied', exitCode: null, result: resultStr });
1221
+ if (debugEntries) debugEntries.push({ tag, call, ms, status: 'aborted', exitCode: null, result: resultStr });
926
1222
  aborted = true;
927
1223
  break;
928
1224
  } else {
929
- let out = shellResult.stdout;
930
- if (shellResult.stderr) out += `\nSTDERR: ${shellResult.stderr}`;
931
- const resultStr = `Command \`${arg}\`:\nExit code: ${shellResult.exit_code}\n${out}`;
932
- if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms);
1225
+ const resultStr = formatFileResult(call, fileResult);
1226
+ const meta = _metaForTool(tag, fileResult);
1227
+ const error = fileResult.error
1228
+ ? { message: fileResult.error, code: fileResult.error_code || null }
1229
+ : null;
1230
+ if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms, { id: invocationId, call, attrs, meta, error });
933
1231
  results.push(resultStr);
934
1232
  if (debugEntries) debugEntries.push({
935
1233
  tag,
936
1234
  call,
937
1235
  ms,
938
- status: shellResult.exit_code === 0 ? 'ok' : 'nonzero_exit',
939
- exitCode: shellResult.exit_code,
1236
+ status: fileResult.error ? 'error' : 'ok',
1237
+ exitCode: null,
940
1238
  result: resultStr,
941
1239
  });
942
1240
  }
943
- continue;
944
- }
945
-
946
- const fileResult = await agentExecFile(...call);
947
- const ms = Date.now() - toolStart;
948
-
949
- if (fileResult.error === 'Permission denied') {
950
- const resultStr = `${tag} ${call[1] || ''}: Permission denied by user.`;
951
- if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms);
952
- results.push(resultStr);
953
- if (debugEntries) debugEntries.push({ tag, call, ms, status: 'denied', exitCode: null, result: resultStr });
954
- aborted = true;
955
- break;
956
- } else {
957
- const resultStr = formatFileResult(call, fileResult);
958
- if (cb.onToolEnd) cb.onToolEnd(tag, resultStr, ms);
959
- results.push(resultStr);
960
- if (debugEntries) debugEntries.push({
961
- tag,
962
- call,
963
- ms,
964
- status: fileResult.error ? 'error' : 'ok',
965
- exitCode: null,
966
- result: resultStr,
967
- });
968
- }
969
- } catch (err) {
970
- const ms = Date.now() - toolStart;
971
- if (cb.onToolEnd) cb.onToolEnd(tag, `Error: ${err.message}`, ms);
972
- if (cb.onError) {
973
- cb.onError({ message: `Tool error (${tag}): ${err.message}`, isWarning: true });
974
- } else {
975
- // Non-TUI fallback — see comment on the onToolStart branch above.
976
- process.stdout.write(`\n ${THEME.warn}⚠ Tool error (${tag}): ${err.message}${THEME.reset}\n`);
1241
+ } catch (err) {
1242
+ const ms = Date.now() - toolStart;
1243
+ if (cb.onToolEnd) cb.onToolEnd(tag, `Error: ${err.message}`, ms, { id: invocationId, call, attrs, meta: null, error: err });
1244
+ if (cb.onError) {
1245
+ cb.onError({ message: `Tool error (${tag}): ${err.message}`, isWarning: true });
1246
+ } else {
1247
+ messages.toolError(tag, err.message);
1248
+ }
1249
+ logToolCall(tag, { args: call.slice(1) }, false, 'error');
1250
+ results.push(`${tag}: Error — ${err.message}`);
1251
+ if (debugEntries) debugEntries.push({ tag, call, ms, status: 'exception', exitCode: null, result: `Error — ${err.message}` });
977
1252
  }
978
- logToolCall(tag, { args: call.slice(1) }, false, 'error');
979
- results.push(`${tag}: Error — ${err.message}`);
980
- if (debugEntries) debugEntries.push({ tag, call, ms, status: 'exception', exitCode: null, result: `Error — ${err.message}` });
981
1253
  }
1254
+ } finally {
1255
+ clearInterval(toolAbortWatcher);
982
1256
  }
983
1257
 
984
1258
  if (debug && debugEntries && debugEntries.length > 0) {
@@ -1033,12 +1307,16 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
1033
1307
  if (cb.onError) {
1034
1308
  cb.onError({ message: warnMsg, isWarning: true });
1035
1309
  } else {
1036
- // Non-TUI fallback — see comment above on the Found-actions path.
1037
- process.stdout.write(`\n ${FG_YELLOW}⚠${RST} ${FG_GRAY}${warnMsg}${RST}`);
1310
+ messages.sysWarn(warnMsg);
1038
1311
  }
1039
- // Push whatever results accumulated before the denial so the LLM has
1040
- // context if the user asks to continue.
1312
+ // Push whatever results accumulated before the stop so the LLM has
1313
+ // context if the user asks to continue. The reason matters: an abort
1314
+ // (Ctrl+C) and a denial are both surfaced through the same `aborted`
1315
+ // flag, but the model should know which happened so it doesn't
1316
+ // immediately retry a runaway command after the user explicitly
1317
+ // killed it.
1041
1318
  if (results.length > 0) {
1319
+ const reason = isAborted() ? 'user interrupted' : 'after user denied an action';
1042
1320
  if (isNativeCall) {
1043
1321
  for (let i = 0; i < results.length; i++) {
1044
1322
  messages.push({ role: 'tool', tool_call_id: nativeToolCallIds[i], content: results[i] });
@@ -1046,7 +1324,7 @@ function createAgentRunner({ chatStream, extractToolCalls, agentExecShell, agent
1046
1324
  } else {
1047
1325
  messages.push({
1048
1326
  role: 'user',
1049
- content: `Tool execution results (partial — stopped after user denied an action):\n\n${results.join('\n\n')}`,
1327
+ content: `Tool execution results (partial — stopped: ${reason}):\n\n${results.join('\n\n')}`,
1050
1328
  });
1051
1329
  }
1052
1330
  }