@semalt-ai/code 1.8.5 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. package/.claude/settings.local.json +7 -1
  2. package/.github/workflows/ci.yml +69 -0
  3. package/ARCHITECTURE.md +6 -95
  4. package/CLAUDE.md +196 -316
  5. package/README.md +148 -4
  6. package/docs/ARCHITECTURE.md +1321 -0
  7. package/docs/CONFIG.md +340 -0
  8. package/docs/HISTORY.md +245 -0
  9. package/examples/embed.js +74 -0
  10. package/index.js +251 -10
  11. package/lib/agent.js +856 -120
  12. package/lib/api.js +239 -50
  13. package/lib/args.js +74 -2
  14. package/lib/audit.js +23 -1
  15. package/lib/background.js +584 -0
  16. package/lib/checkpoints.js +757 -0
  17. package/lib/commands/auth.js +94 -0
  18. package/lib/commands/chat-session.js +489 -0
  19. package/lib/commands/chat-slash.js +415 -0
  20. package/lib/commands/chat-turn.js +669 -0
  21. package/lib/commands/chat.js +407 -0
  22. package/lib/commands/custom.js +157 -0
  23. package/lib/commands/history-utils.js +66 -0
  24. package/lib/commands/index.js +268 -0
  25. package/lib/commands/mcp.js +113 -0
  26. package/lib/commands/oneshot.js +193 -0
  27. package/lib/commands/registry.js +269 -0
  28. package/lib/commands/tasks.js +89 -0
  29. package/lib/compact.js +87 -0
  30. package/lib/config.js +360 -11
  31. package/lib/constants.js +401 -3
  32. package/lib/deny.js +199 -0
  33. package/lib/doctor.js +160 -0
  34. package/lib/headless.js +202 -0
  35. package/lib/hooks.js +286 -0
  36. package/lib/images.js +270 -0
  37. package/lib/internals.js +49 -0
  38. package/lib/mcp/boundary.js +131 -0
  39. package/lib/mcp/client.js +270 -0
  40. package/lib/mcp/oauth.js +134 -0
  41. package/lib/memory.js +209 -0
  42. package/lib/metrics.js +37 -2
  43. package/lib/payload.js +54 -0
  44. package/lib/permission-rules.js +401 -0
  45. package/lib/permissions.js +123 -26
  46. package/lib/pricing.js +67 -0
  47. package/lib/proc.js +62 -0
  48. package/lib/prompts.js +99 -8
  49. package/lib/sandbox.js +568 -0
  50. package/lib/sdk.js +328 -0
  51. package/lib/secrets.js +211 -0
  52. package/lib/skills.js +223 -0
  53. package/lib/subagents.js +516 -0
  54. package/lib/tool_registry.js +2862 -0
  55. package/lib/tool_specs.js +263 -9
  56. package/lib/tools.js +352 -1039
  57. package/lib/ui/anim.js +86 -0
  58. package/lib/ui/ansi.js +17 -27
  59. package/lib/ui/chat-history.js +253 -71
  60. package/lib/ui/create-ui.js +67 -24
  61. package/lib/ui/diff.js +90 -25
  62. package/lib/ui/file-activity.js +236 -0
  63. package/lib/ui/format.js +195 -29
  64. package/lib/ui/input-field.js +21 -11
  65. package/lib/ui/md-stream.js +234 -0
  66. package/lib/ui/render-operation.js +113 -0
  67. package/lib/ui/select.js +1 -4
  68. package/lib/ui/status-bar.js +146 -36
  69. package/lib/ui/stream.js +20 -13
  70. package/lib/ui/theme.js +190 -44
  71. package/lib/ui/tool-operation.js +190 -0
  72. package/lib/ui/utils.js +9 -5
  73. package/lib/ui/web-activity.js +270 -0
  74. package/lib/ui/writer.js +159 -45
  75. package/lib/ui.js +1 -1
  76. package/lib/verify.js +229 -0
  77. package/lib/web-extract.js +213 -0
  78. package/lib/web-summarize.js +68 -0
  79. package/package.json +19 -4
  80. package/scripts/lint.js +57 -0
  81. package/test/agent-loop.test.js +389 -0
  82. package/test/anim-driver.test.js +153 -0
  83. package/test/ask-user-display.test.js +226 -0
  84. package/test/ask-user-gate.test.js +231 -0
  85. package/test/background.test.js +414 -0
  86. package/test/chat-history-nocolor.test.js +155 -0
  87. package/test/chat-relogin.test.js +207 -0
  88. package/test/chat.test.js +114 -0
  89. package/test/checkpoints-agent.test.js +181 -0
  90. package/test/checkpoints.test.js +650 -0
  91. package/test/command-registry.test.js +160 -0
  92. package/test/compact.test.js +116 -0
  93. package/test/completion-lazy.test.js +52 -0
  94. package/test/config-merge.test.js +324 -0
  95. package/test/config-quarantine.test.js +128 -0
  96. package/test/config-write-guard-allow-anywhere.test.js +56 -0
  97. package/test/config-write-guard-skip.test.js +46 -0
  98. package/test/config-write-guard.test.js +153 -0
  99. package/test/context-split.test.js +215 -0
  100. package/test/cost-doctor.test.js +142 -0
  101. package/test/custom-commands-chat.test.js +106 -0
  102. package/test/custom-commands.test.js +230 -0
  103. package/test/defer-detail-band.test.js +403 -0
  104. package/test/deny-windows.test.js +120 -0
  105. package/test/deny.test.js +83 -0
  106. package/test/detail-band-tab-flatten.test.js +242 -0
  107. package/test/download-allow-anywhere.test.js +66 -0
  108. package/test/download-confine.test.js +153 -0
  109. package/test/exec-diff.test.js +268 -0
  110. package/test/executors.test.js +599 -0
  111. package/test/extract-tool-calls.test.js +349 -0
  112. package/test/fetch-url-validation.test.js +219 -0
  113. package/test/file-activity.test.js +522 -0
  114. package/test/fixtures/tool-calls.js +57 -0
  115. package/test/fixtures/web-page.js +91 -0
  116. package/test/git-tools.test.js +384 -0
  117. package/test/grep-glob-serialize.test.js +242 -0
  118. package/test/grep-glob.test.js +268 -0
  119. package/test/grep-path-target.test.js +227 -0
  120. package/test/harness/README.md +57 -0
  121. package/test/harness/chat-harness.js +143 -0
  122. package/test/harness/memwarn-headless-child.js +65 -0
  123. package/test/harness/mock-llm.js +120 -0
  124. package/test/harness/mock-mcp-server.js +142 -0
  125. package/test/harness/sse-server.js +69 -0
  126. package/test/headless.test.js +348 -0
  127. package/test/history-utils.test.js +88 -0
  128. package/test/hooks-agent.test.js +238 -0
  129. package/test/hooks-verify-sandbox.test.js +232 -0
  130. package/test/hooks.test.js +216 -0
  131. package/test/http-get-user-agent.test.js +142 -0
  132. package/test/images-api.test.js +208 -0
  133. package/test/images.test.js +238 -0
  134. package/test/input-field-ctrl-o.test.js +37 -0
  135. package/test/live-height-physical.test.js +281 -0
  136. package/test/max-iterations.test.js +218 -0
  137. package/test/mcp-boundary.test.js +57 -0
  138. package/test/mcp-client.test.js +267 -0
  139. package/test/mcp-oauth.test.js +86 -0
  140. package/test/md-stream.test.js +183 -0
  141. package/test/memory-truncation-warning.test.js +222 -0
  142. package/test/memory.test.js +198 -0
  143. package/test/native-dispatch.test.js +409 -0
  144. package/test/native-live-narration.test.js +254 -0
  145. package/test/output-chokepoint.test.js +188 -0
  146. package/test/output-heredoc-leak.test.js +195 -0
  147. package/test/output-preview.test.js +245 -0
  148. package/test/path-guards.test.js +134 -0
  149. package/test/payload.test.js +99 -0
  150. package/test/permission-rules-agent.test.js +210 -0
  151. package/test/permission-rules.test.js +297 -0
  152. package/test/permissions.test.js +362 -0
  153. package/test/plan-mode.test.js +167 -0
  154. package/test/read-paginate.test.js +275 -0
  155. package/test/readonly-tools.test.js +177 -0
  156. package/test/render-operation.test.js +317 -0
  157. package/test/replay-descriptor-xml.test.js +216 -0
  158. package/test/replay-descriptor.test.js +189 -0
  159. package/test/replay-web-aggregate.test.js +291 -0
  160. package/test/replay-web-persist.test.js +241 -0
  161. package/test/result-cap.test.js +233 -0
  162. package/test/running-glyph-anim.test.js +111 -0
  163. package/test/sandbox-agent.test.js +147 -0
  164. package/test/sandbox-integration.test.js +216 -0
  165. package/test/sandbox.test.js +408 -0
  166. package/test/sdk.test.js +234 -0
  167. package/test/shell-output-cap.test.js +181 -0
  168. package/test/skills-chat.test.js +110 -0
  169. package/test/skills.test.js +295 -0
  170. package/test/smoke.test.js +68 -0
  171. package/test/status-bar-driver.test.js +93 -0
  172. package/test/status-bar-pause.test.js +164 -0
  173. package/test/status-bar-resync.test.js +188 -0
  174. package/test/stream-parser.test.js +171 -0
  175. package/test/subagents-agent.test.js +178 -0
  176. package/test/subagents.test.js +222 -0
  177. package/test/theme-palette.test.js +166 -0
  178. package/test/tool-registry.test.js +85 -0
  179. package/test/trim-budget.test.js +101 -0
  180. package/test/truncate-visible.test.js +78 -0
  181. package/test/verify-agent.test.js +317 -0
  182. package/test/verify.test.js +141 -0
  183. package/test/view-image.test.js +199 -0
  184. package/test/web-activity-ordering.test.js +203 -0
  185. package/test/web-activity.test.js +207 -0
  186. package/test/web-data-extraction-guidance.test.js +71 -0
  187. package/test/web-extract.test.js +185 -0
  188. package/test/web-fetch-agent.test.js +291 -0
  189. package/test/web-fetch-mode.test.js +193 -0
  190. package/test/web-search.test.js +380 -0
  191. package/lib/commands.js +0 -1438
  192. package/path +0 -1
@@ -0,0 +1,669 @@
1
+ 'use strict';
2
+
3
+ // The chat input/turn handler (cmdChat's inputField.onSubmit), extracted in
4
+ // Task 1.5. Handles picker text-fallback navigation, slash-command dispatch, and
5
+ // running a normal message through the agent loop with the full TUI callback
6
+ // wiring. Mutable session fields go through ctx; session/picker/sync helpers and
7
+ // stable collaborators are read from ctx (where cmdChat assigned them). Bodies
8
+ // are unchanged. NOTE: the onToolStart/onToolEnd callbacks take a local `ctx`
9
+ // parameter (the per-tool invocation context) that intentionally shadows the
10
+ // session ctx — those callbacks use only the per-tool fields, never session state.
11
+
12
+ const { resolveMaxIterations } = require('../config');
13
+ const { createWebActivityTracker, serializeWebOp } = require('../ui/web-activity');
14
+ const { createFileActivityTracker } = require('../ui/file-activity');
15
+ const { buildToolOperation, serializeOperation } = require('../ui/tool-operation');
16
+ const { renderOperation } = require('../ui/render-operation');
17
+ const { normalizeCmdForDisplay } = require('../ui/format');
18
+
19
+ function createTurnHandler(ctx, slashHandlers) {
20
+ // The session ctx — the per-tool callbacks below intentionally shadow `ctx`
21
+ // with the per-invocation context, so capture the session here for the few
22
+ // session-level reads they need (the live --debug flag).
23
+ const sessionCtx = ctx;
24
+ const {
25
+ inputField, statusBar, chatHistory, getConfig, approxTokens, resolveCommand,
26
+ runAgentLoop, opts, TAG_REGISTRY, writerModule,
27
+ collapseListMsg, handlePendingSelection, showPendingStep, activateNavCapture, finalizeListMsg,
28
+ createChatIfNeeded, saveTurnToDashboard, saveSession,
29
+ } = ctx;
30
+
31
+ // Running session token totals for the cost indicator (Task 2.6). Each turn's
32
+ // Metrics is per-turn, so we accumulate here for a session cost in the bar.
33
+ const sessionUsage = { prompt_tokens: 0, completion_tokens: 0 };
34
+
35
+ return async (text) => {
36
+ // Handle pending selection (text fallback for non-TTY; TTY uses captureNavigation)
37
+ if (ctx.pendingAction) {
38
+ inputField.releaseNavigation();
39
+ const t = text.trim().toLowerCase();
40
+ const { items, displayItems: di, stepIdx, type } = ctx.pendingAction;
41
+ const activeItems = di || items;
42
+ if (t === 's' || t === 'select' || t === 'y' || t === 'yes') {
43
+ collapseListMsg(type, activeItems[stepIdx]);
44
+ statusBar.update('idle');
45
+ await handlePendingSelection(stepIdx);
46
+ inputField.setDisabled(false);
47
+ return;
48
+ } else if (t === 'n' || t === 'next') {
49
+ ctx.pendingAction = { ...ctx.pendingAction, stepIdx: (stepIdx + 1) % items.length };
50
+ showPendingStep();
51
+ activateNavCapture();
52
+ return;
53
+ } else if (t === 'p' || t === 'prev') {
54
+ ctx.pendingAction = { ...ctx.pendingAction, stepIdx: (stepIdx - 1 + items.length) % items.length };
55
+ showPendingStep();
56
+ activateNavCapture();
57
+ return;
58
+ } else if (t === 'c' || t === 'cancel') {
59
+ finalizeListMsg();
60
+ chatHistory.addMessage({ role: 'system', content: 'Cancelled.' });
61
+ ctx.pendingAction = null;
62
+ statusBar.update('idle');
63
+ inputField.setDisabled(false);
64
+ return;
65
+ } else {
66
+ // Not a nav key: close nav silently and let the message go to AI
67
+ finalizeListMsg();
68
+ ctx.pendingAction = null;
69
+ statusBar.update('idle');
70
+ // fall through to AI processing below
71
+ }
72
+ }
73
+
74
+ // Slash-command dispatch via the registry (replaces the former if-chain).
75
+ // resolveCommand maps the raw text to a canonical command + its argument;
76
+ // null means "not a command" → fall through to the agent below.
77
+ const resolved = resolveCommand(text);
78
+ if (resolved) {
79
+ if (resolved.spec && resolved.spec.custom) {
80
+ // Custom (Markdown) command: render its template and let it fall through
81
+ // to the agent path below as the user prompt. It is submitted as text,
82
+ // never executed as code.
83
+ const { renderTemplate } = require('./custom');
84
+ text = renderTemplate(resolved.spec.template, resolved.arg);
85
+ if (!text.trim()) {
86
+ chatHistory.addMessage({ role: 'system', content: `✗ Custom command ${resolved.name} produced an empty prompt.`, isError: true });
87
+ return;
88
+ }
89
+ } else if (resolved.spec && resolved.spec.skill) {
90
+ // Skill invocation (Task 3.5): the system prompt carried only the skill's
91
+ // metadata. Loading the body HERE — on invocation — is the progressive
92
+ // disclosure: the instructions enter context only now. The body is read
93
+ // from SKILL.md, rendered (so $ARGUMENTS/$1 work if the author used them),
94
+ // and submitted to the agent as a user prompt, never executed as code.
95
+ const { loadSkillBody } = require('../skills');
96
+ const { renderTemplate } = require('./custom');
97
+ let body;
98
+ try { body = loadSkillBody(resolved.spec); } catch { body = ''; }
99
+ if (!body || !body.trim()) {
100
+ chatHistory.addMessage({ role: 'system', content: `✗ Skill ${resolved.name} has no loadable body.`, isError: true });
101
+ return;
102
+ }
103
+ const rendered = renderTemplate(body, resolved.arg);
104
+ // Skills may carry assets/scripts alongside SKILL.md — tell the agent where.
105
+ text = rendered + (resolved.spec.dir ? `\n\n(Skill assets directory: ${resolved.spec.dir})` : '');
106
+ } else {
107
+ await slashHandlers[resolved.name](resolved.arg, text);
108
+ return;
109
+ }
110
+ }
111
+
112
+
113
+ // Block unauthenticated users from running the agent
114
+ if (!getConfig().auth_token) {
115
+ chatHistory.addMessage({ role: 'system', content: '✗ Not logged in. Run /login first.', isError: true });
116
+ return;
117
+ }
118
+
119
+ // Normal message → run agent
120
+ inputField.setDisabled(true);
121
+ chatHistory.addMessage({ role: 'user', content: text });
122
+ statusBar.update('thinking', 'Thinking...');
123
+ // Bump the context-size indicator with this user message's approximate
124
+ // token count. It'll be overwritten with the exact prompt_tokens from
125
+ // the API response when the first turn completes — this just keeps the
126
+ // indicator reactive in the gap before that.
127
+ statusBar.addPendingTokens(approxTokens(text));
128
+ await createChatIfNeeded(text);
129
+ // Multimodal image input (Task 5.4): consume any images staged by /image and
130
+ // attach them to this user turn, then clear the staging buffer.
131
+ const stagedImages = (ctx.pendingImages && ctx.pendingImages.length) ? ctx.pendingImages : null;
132
+ ctx.pendingImages = [];
133
+ const userMessage = { role: 'user', content: text };
134
+ if (stagedImages) userMessage.images = stagedImages;
135
+ ctx.messages.push(userMessage);
136
+
137
+ // Per-turn state: buffer tokens until we know if the model is in an implicit
138
+ // think block (Qwen3-style: plain text followed by </think>, no opening tag).
139
+ let implicitThinkPhase = !opts.showThink;
140
+ let implicitThinkBuffer = '';
141
+ // Live-narration safety signals for the NATIVE rail only (XML rail ignores
142
+ // all three and keeps the buffered-until-boundary behavior unchanged):
143
+ // • nativeRail — set from onStreamStart's first arg; true only
144
+ // when this stream is on the native tool-call rail.
145
+ // • reasoningSeen — signal (a): a delta.reasoning_content arrived this
146
+ // iteration, proving the model uses the structured
147
+ // reasoning channel → subsequent content is narration.
148
+ // • inlineReasoningFalse — signal (b): the active profile asserts
149
+ // inline_reasoning:false → never inlines reasoning.
150
+ // When nativeRail AND (reasoningSeen OR inlineReasoningFalse), onToken may
151
+ // eager-open the implicit-think gate and stream narration live (see below).
152
+ let nativeRail = false;
153
+ let reasoningSeen = false;
154
+ let inlineReasoningFalse = false;
155
+ // Orphan closing reasoning tag, emitted VERBATIM by the StreamParser: a lone
156
+ // </think> (or </reasoning>/</reflection>/</plan>) has no matching open tag, so
157
+ // the parser's closing form `/think` is not a TAG_REGISTRY key (the registry
158
+ // keys are the bare names) and it streams the literal `</tag>` through onToken
159
+ // (agent.js StreamParser: `if (!entry) this.onToken('<' + tagRaw + '>')`).
160
+ // MiniMax-style models emit reasoning via BOTH reasoning_content AND an inline
161
+ // </think> terminator in content; that stray tag must never reach the terminal.
162
+ // Persisted history is already clean (cleanAssistantContent strips it), so this
163
+ // guard is live-stream-only. Match exactly the raw-emitted closing shapes.
164
+ const ORPHAN_CLOSE_TAG_RE = /^<\/(think|reasoning|reflection|plan)>$/i;
165
+
166
+ // Web-activity collapse (Task W.3): in the default (non-debug) view, a run of
167
+ // consecutive web ops (web_search → http_get) renders as ONE process-summary
168
+ // line instead of a per-op line each. Fresh per turn. In --debug the tracker
169
+ // is bypassed and web ops render the normal per-op way (full detail).
170
+ const webTracker = createWebActivityTracker({ writerModule });
171
+
172
+ // File-activity collapse (parallel instance of the web tracker): in the
173
+ // default (non-debug) view, a run of consecutive same-type pure file reads
174
+ // (read_file / list_dir) collapses into ONE process-summary line instead of a
175
+ // per-op line each — unless the run is only 1–2 ops, which still commit as
176
+ // individual lines (decided at flush). read and list group SEPARATELY. Fresh
177
+ // per turn. In --debug the tracker is bypassed (full per-op detail).
178
+ const fileTracker = createFileActivityTracker({ writerModule });
179
+
180
+ const callbacks = {
181
+ onThinking: () => statusBar.update('thinking', 'Thinking...'),
182
+ onRequestSent: () => {
183
+ statusBar.update('thinking', 'Thinking...');
184
+ // Reset think-phase detection for each new agent iteration.
185
+ implicitThinkPhase = !opts.showThink;
186
+ implicitThinkBuffer = '';
187
+ // Reset the live-narration safety signals alongside the gate — each API
188
+ // call re-establishes the rail and re-observes the reasoning channel.
189
+ nativeRail = false;
190
+ reasoningSeen = false;
191
+ inlineReasoningFalse = false;
192
+ },
193
+ onStreamStart: (isNativeRail, inlineReasoning) => {
194
+ // Capture the rail + inline-reasoning assertion threaded from agent.js
195
+ // (signal b). Recorded BEFORE the first content token so onToken's
196
+ // eager-open check below sees them on the very first token.
197
+ nativeRail = !!isNativeRail;
198
+ inlineReasoningFalse = inlineReasoning === false;
199
+ // If showThink is on, switch to streaming immediately.
200
+ // Otherwise keep "Thinking…" until </think> is resolved.
201
+ if (opts.showThink) statusBar.update('streaming', 'Streaming response');
202
+ },
203
+ onReasoningStart: () => {
204
+ // Signal (a): the model emitted reasoning_content this iteration, so the
205
+ // structured reasoning channel is in use. Fires before any delta.content
206
+ // token, so the eager-open in onToken sees it for the first token.
207
+ reasoningSeen = true;
208
+ },
209
+ onTagOpen: (tag, attrs) => {
210
+ const entry = TAG_REGISTRY[tag];
211
+ // Positive-evidence early exit from the implicit-think gate (live narration).
212
+ // The gate buffers leading bare text because a Qwen3-style model emits implicit
213
+ // reasoning as bare text terminated by an orphan </think> — and that reasoning
214
+ // is indistinguishable from ordinary narration until the boundary arrives. So
215
+ // for a plain bare-text preamble we STAY buffered until </think> (handled in
216
+ // onToken); flipping early there could stream hidden reasoning = a leak.
217
+ // But two tag openings are positive PROOF the bare text is NOT implicit
218
+ // reasoning, so we can open the gate and start streaming live immediately:
219
+ // • a <think>/<reasoning>/<reflection>/<plan> tag (display:'think_bubble')
220
+ // means THIS model delimits reasoning with explicit tags; that inner
221
+ // content is consumed by the StreamParser and never reaches onToken (and is
222
+ // suppressed by handleTag when !showThink), so any bare text outside the
223
+ // tag is narration — safe to stream.
224
+ // • a <final_answer> (type:'final') streams its inner content THROUGH onToken
225
+ // (streamInner in the parser), so the gate must open or the answer is
226
+ // swallowed by the buffer.
227
+ // We deliberately do NOT exit on a tool tag: bare-reasoning-then-tool with no
228
+ // </think> is possible (malformed implicit-think), so opening there could leak.
229
+ // Any buffered leading text is DISCARDED here (treated as reasoning), never
230
+ // flushed — preserving implicit-think suppression.
231
+ if (!opts.showThink && implicitThinkPhase &&
232
+ (entry?.display === 'think_bubble' || entry?.type === 'final')) {
233
+ implicitThinkPhase = false;
234
+ implicitThinkBuffer = '';
235
+ statusBar.update('streaming', 'Streaming response');
236
+ }
237
+ if (entry?.type === 'tool') {
238
+ const actionLabel = entry.label || tag;
239
+ const detail = attrs.path || attrs.url || attrs.key || attrs.src || '';
240
+ const isDownload = tag === 'download' || tag === 'http_get';
241
+ const barState = isDownload ? 'waiting_download' : 'tool';
242
+ const label = isDownload
243
+ ? `Waiting for download${detail ? ': ' + detail : ''}`
244
+ : `${actionLabel}${detail ? ': ' + detail : ''}`;
245
+ statusBar.update(barState, label);
246
+ if (!opts.showThink) chatHistory.clearStreamingContent();
247
+ }
248
+ if (entry?.display === 'think_bubble') {
249
+ statusBar.update('thinking', 'Reasoning...');
250
+ }
251
+ },
252
+ onThinkEnd: (content) => {
253
+ chatHistory.addMessage({ role: 'think', content });
254
+ statusBar.update('streaming', 'Streaming response');
255
+ },
256
+ onPermissionAsk: (tag, input) => {
257
+ // Status-bar update fires while the permission picker is open so
258
+ // the user can see what's pending in the side label, not just
259
+ // inside the modal. Mirrors the labels onToolStart uses post-grant
260
+ // — the next streaming/idle state will overwrite this when the
261
+ // picker closes (whether granted or denied).
262
+ const actionLabel = TAG_REGISTRY[tag]?.label || tag;
263
+ // Flatten embedded newlines/tabs (e.g. heredoc commands) BEFORE the
264
+ // slice so the status label is a single physical row. A raw slice of
265
+ // multi-line input rides a \n into the status string → the live region
266
+ // mis-counts rows (1 logical line spanning 2+ physical rows) and leaks
267
+ // stale rules/spinners into scrollback. See Phase 4 fix-A.
268
+ const flat = normalizeCmdForDisplay(input);
269
+ const short = flat.length > 40 ? flat.slice(0, 40) + '…' : flat;
270
+ const isDownload = tag === 'download' || tag === 'http_get';
271
+ if (isDownload) {
272
+ statusBar.update('waiting_download', `Waiting for download: ${short}`);
273
+ } else {
274
+ statusBar.update('tool', `${actionLabel}: ${short}`);
275
+ }
276
+ },
277
+ onToolStart: (tag, input, ctx) => {
278
+ // Phase 7b boundary — commit the PREVIOUS op's held detail band to
279
+ // scrollback BEFORE this op's running line (or web group) is installed,
280
+ // so the committed preview lands above the new activity row. Mirrors the
281
+ // web tracker's "flush previous, then start new" sequencing. No-op when
282
+ // nothing is deferred. Runs before the web branch too, so a non-web
283
+ // preview followed by a web op still commits in chronological order.
284
+ chatHistory.commitDeferredDetail();
285
+ const actionLabel = TAG_REGISTRY[tag]?.label || tag;
286
+ // Flatten before slicing — see onPermissionAsk above (Phase 4 fix-A).
287
+ const flat = normalizeCmdForDisplay(input);
288
+ const short = flat.length > 40 ? flat.slice(0, 40) + '…' : flat;
289
+ const isDownload = tag === 'download' || tag === 'http_get';
290
+ if (isDownload) {
291
+ statusBar.update('waiting_download', `Waiting for download: ${short}`);
292
+ } else {
293
+ statusBar.update('tool', `${actionLabel}: ${short}`);
294
+ }
295
+ // Web- and file-activity collapse: in the default view, fold this op into
296
+ // its running process-summary line instead of its own activity row.
297
+ // --debug bypasses both trackers (full per-op detail). Switching group
298
+ // type — or starting a non-grouped tool — closes the OTHER open group
299
+ // first, so its committed summary lands ABOVE this op in scrollback. (A
300
+ // read↔list key change within the file group is handled inside
301
+ // fileTracker.start.)
302
+ const webOp = !sessionCtx.debugMode && webTracker.isWeb(tag);
303
+ const fileOp = !sessionCtx.debugMode && fileTracker.isGroupable(tag);
304
+ if (!webOp && webTracker.isOpen()) webTracker.flush();
305
+ if (!fileOp && fileTracker.isOpen()) fileTracker.flush();
306
+ if (webOp) {
307
+ webTracker.start(tag, input);
308
+ return;
309
+ }
310
+ if (fileOp) {
311
+ fileTracker.start(tag, input);
312
+ return;
313
+ }
314
+ // Register the invocation with the writer's activity region.
315
+ // The render function is re-invoked by the writer on every
316
+ // redraw so the pending line's elapsed time stays current with
317
+ // the ticker cadence without an explicit refresh timer.
318
+ //
319
+ // ask_user is the only currently-blocking tool — it pauses the
320
+ // agent until the user responds via the modal. A ticking
321
+ // elapsed-time meter on a paused tool is misleading ("13s"
322
+ // suggests work is happening), and the per-tick redraw
323
+ // interacts badly with the open modal (see TECHNICAL_DEBT.md).
324
+ // Render once with no duration meta and freeze. Replace this
325
+ // name check with a category flag (e.g. blocking: true on the
326
+ // tool spec) if more blocking tools appear.
327
+ if (ctx && ctx.id) {
328
+ // Output Refactor (Phase 1): the interactive core tool line is now
329
+ // produced via a ToolOperation descriptor → the pure renderOperation,
330
+ // instead of an inline formatToolLine call. Byte-for-byte identical —
331
+ // this is a re-routing, not a re-styling.
332
+ if (tag === 'ask_user') {
333
+ const staticLine = renderOperation(
334
+ buildToolOperation({ id: ctx.id, tag, arg: input, attrs: ctx.attrs, status: 'pending', noDuration: true }),
335
+ { mode: 'ansi', phase: 'pending' },
336
+ );
337
+ writerModule.startActivity(ctx.id, () => staticLine);
338
+ } else {
339
+ writerModule.startActivity(ctx.id, (elapsedMs) => renderOperation(
340
+ buildToolOperation({ id: ctx.id, tag, arg: input, attrs: ctx.attrs, status: 'pending', durationMs: elapsedMs }),
341
+ { mode: 'ansi', phase: 'pending' },
342
+ ));
343
+ }
344
+ }
345
+ },
346
+ onToolEnd: (tag, result, durationMs, ctx) => {
347
+ const hasError = !!(ctx && ctx.error);
348
+ // Web-activity collapse (Task W.3): record this web op into the running
349
+ // summary instead of committing a per-op line. The summary reflects the
350
+ // failure (a 403/406 or timeout shows as "blocked"); the detailed error
351
+ // body stays hidden in the collapsed view (visible under --debug).
352
+ if (!sessionCtx.debugMode && webTracker.isWeb(tag)) {
353
+ // Live display unchanged — the tracker still owns the collapsed web
354
+ // summary region. Phase 6c-i: instead of returning `undefined` (which
355
+ // persisted a `null` slot → web vanished into the legacy whole-blob
356
+ // summary on replay), hand back a dedicated web-op core so the agent
357
+ // loop's `displayCore || null` push stores it on BOTH rails (native
358
+ // {role:'tool'} `_display`; XML `_display[]` slot). Nothing in the live
359
+ // render path reads this return value, so the live region is untouched;
360
+ // every replay reader treats the web-core as fallback (chat-history /
361
+ // chat-session) so the screen stays byte-identical until 6c-ii.
362
+ webTracker.end(tag, result, durationMs, ctx);
363
+ if (hasError) statusBar.update('streaming', 'Streaming response');
364
+ return serializeWebOp(ctx, tag, durationMs);
365
+ }
366
+ const isBlocking = tag === 'ask_user';
367
+ // Output Refactor (Phase 1): build ONE descriptor for this finished call
368
+ // and render both the committed result line and (below) its diff detail
369
+ // from it — the single source of truth, replacing the inline
370
+ // formatToolLine + buildExecutionDiff pair. Byte-for-byte identical.
371
+ const operation = buildToolOperation({
372
+ id: ctx ? ctx.id : null,
373
+ tag,
374
+ arg: ctx && ctx.attrs ? (ctx.attrs.command || ctx.attrs.path || ctx.attrs.url || ctx.attrs.src || ctx.attrs.key || ctx.attrs.name || ctx.attrs.pattern) : '',
375
+ attrs: ctx ? ctx.attrs : null,
376
+ status: hasError ? 'error' : 'ok',
377
+ durationMs,
378
+ meta: ctx ? ctx.meta : null,
379
+ error: ctx ? ctx.error : null,
380
+ diff: ctx ? ctx.diff : null,
381
+ // Phase 5: hand the model-facing result to the descriptor so it can
382
+ // derive an output-preview detail (shell/MCP/subagent). Chrome only —
383
+ // the model already received the full result via boundToolOutput.
384
+ output: typeof result === 'string' ? result : null,
385
+ noDuration: isBlocking,
386
+ });
387
+ // File-activity collapse: a SUCCESSFUL read_file/list_dir folds into the
388
+ // running file-group aggregate instead of committing its own line — the
389
+ // group's single summary (or, for a 1–2 op run, the individual lines)
390
+ // commits at flush. The op core is STILL persisted here (serializeOperation
391
+ // below) so replay re-groups it. An ERRORED file op does NOT join the
392
+ // group: it falls through to flush the success-group first (so its summary
393
+ // lands ABOVE), then renders the error standalone + error body.
394
+ if (!sessionCtx.debugMode && !hasError && fileTracker.isGroupable(tag)) {
395
+ fileTracker.end(operation);
396
+ return serializeOperation(operation);
397
+ }
398
+ // A non-grouped tool end (or an errored file op) closes any open file
399
+ // group first, so its committed summary lands ABOVE this line.
400
+ if (fileTracker.isOpen()) fileTracker.flush();
401
+ const finalLine = renderOperation(operation, { mode: 'ansi', phase: 'result' });
402
+ if (ctx && ctx.id) {
403
+ writerModule.endActivity(ctx.id, finalLine);
404
+ } else {
405
+ // No invocation id means the agent-loop wasn't upgraded to pass
406
+ // structured context (shouldn't happen in practice). Fall back
407
+ // to a direct scrollback line so the tool still leaves a trace.
408
+ writerModule.scrollback(finalLine);
409
+ }
410
+ // Execution-time file-edit diff. This is the SINGLE site the full diff of
411
+ // a successful mutating edit renders — decoupled from the permission modal,
412
+ // so an auto-approved edit shows its diff exactly like a manual one, and
413
+ // every entry mode (fresh / --resume / /history / /chats) renders it the
414
+ // same way. Loaded history replays through displayLoadedMessages (summaries
415
+ // only), never onToolEnd, so past turns carry no diff payload and are not
416
+ // replayed. Capped at config.diff_max_lines (head+tail for a large edit).
417
+ if (!hasError && operation.detail && operation.detail.kind === 'diff') {
418
+ const diffStr = renderOperation(operation, {
419
+ mode: 'ansi',
420
+ phase: 'detail',
421
+ maxLines: (getConfig() || {}).diff_max_lines,
422
+ });
423
+ if (diffStr) writerModule.scrollback(diffStr);
424
+ }
425
+ // Phase 5/7b: collapsed output preview for shell/MCP/subagent successes.
426
+ // DEFERRED into the writer's redrawable detail band (not committed to
427
+ // scrollback yet) — the held slot commits once at the next boundary
428
+ // (next-op start / assistant answer / turn end). The preview is static
429
+ // (first N lines + `… M more lines`, no expand affordance). Model-facing
430
+ // context is untouched (the full output already reached the model). The
431
+ // result line above and any diff still commit immediately.
432
+ if (!hasError && operation.detail && operation.detail.kind === 'output') {
433
+ chatHistory.deferToolOutput({
434
+ role: 'tool',
435
+ tag,
436
+ content: '',
437
+ output: operation.detail.payload.body,
438
+ previewLines: (getConfig() || {}).shell_preview_lines || 5,
439
+ });
440
+ }
441
+ if (hasError) {
442
+ // Preserve the expandable error body as a follow-up tool
443
+ // bubble. Empty content suppresses its header so the scrollback
444
+ // line above (written by endActivity) isn't duplicated.
445
+ const body = typeof result === 'string' && result.trim() ? result : null;
446
+ if (body) {
447
+ chatHistory.addMessage({ role: 'tool', tag, content: '', output: body, isError: true });
448
+ }
449
+ statusBar.update('streaming', 'Streaming response');
450
+ }
451
+ // Phase 6a — hand the SAME descriptor back to the agent loop (serialized)
452
+ // so the native rail can persist it as a `_display` sibling on the tool
453
+ // result message; replay then rebuilds it for full-fidelity rendering.
454
+ // Display chrome only — never touches the model-facing `content`. The
455
+ // web-activity path above returns its own web-op core (Phase 6c-i) which
456
+ // every replay reader routes to the legacy fallback, so web ops still
457
+ // render via the summary on replay (aggregation lands in 6c-ii).
458
+ return serializeOperation(operation);
459
+ },
460
+ onToken: (token) => {
461
+ if (!opts.showThink && implicitThinkPhase) {
462
+ // NATIVE-RAIL eager-open (live token-by-token narration). Gated on a
463
+ // safety signal so reasoning is NEVER leaked: open the gate eagerly
464
+ // ONLY when this stream is on the native rail AND the model has proven
465
+ // (a) it uses the structured reasoning channel this iteration
466
+ // (reasoningSeen) OR (b) it asserts inline_reasoning:false. In either
467
+ // case the leading content is narration, so we open the gate, drop the
468
+ // (empty) buffer, and fall through to stream THIS token live. The XML
469
+ // rail and the no-signal native case skip this branch entirely and keep
470
+ // the buffered-until-boundary fallback below (no behavior change, no
471
+ // leak). Mirror the think_bubble/orphan-</think> exits' status update.
472
+ if (nativeRail && (reasoningSeen || inlineReasoningFalse)) {
473
+ implicitThinkPhase = false;
474
+ implicitThinkBuffer = '';
475
+ statusBar.update('streaming', 'Streaming response');
476
+ // fall through — stream this and all subsequent tokens live. The
477
+ // orphan-close-tag filter below still runs so a stray </think> that
478
+ // MiniMax inlines alongside reasoning_content never reaches the
479
+ // terminal (regression from 938f583's eager-open, which skipped the
480
+ // else-branch's drop guard for this and every subsequent token).
481
+ } else {
482
+ // Check if this token is the closing think tag (Qwen3-style implicit think).
483
+ if (ORPHAN_CLOSE_TAG_RE.test(token.trim())) {
484
+ // Thinking phase is over — discard buffered reasoning, start streaming.
485
+ implicitThinkPhase = false;
486
+ implicitThinkBuffer = '';
487
+ statusBar.update('streaming', 'Streaming response');
488
+ return;
489
+ }
490
+ // Buffer the token; keep the thinking animation visible.
491
+ implicitThinkBuffer += token;
492
+ return;
493
+ }
494
+ }
495
+ // Drop any orphan closing reasoning tag on every token, regardless of which
496
+ // branch opened the gate (eager-open or showThink). The StreamParser emits
497
+ // these verbatim, so once the gate is open they would otherwise stream live.
498
+ if (ORPHAN_CLOSE_TAG_RE.test(token.trim())) return;
499
+ chatHistory.streamToken(token);
500
+ statusBar.onToken();
501
+ },
502
+ onAssistantMessage: (cleanContent, meta) => {
503
+ // If </think> was never seen, the model had no implicit think block — its
504
+ // leading text was ordinary narration. Drop the raw buffered tokens: the
505
+ // cleaned, canonical narration arrives as `cleanContent` and is rendered by
506
+ // finalizeLastMessage below (as a pre-tool bubble when nothing streamed live),
507
+ // so re-emitting the raw buffer would double it.
508
+ if (implicitThinkPhase && implicitThinkBuffer) {
509
+ implicitThinkPhase = false;
510
+ implicitThinkBuffer = '';
511
+ }
512
+ // Terminal-iteration signal. agent.js now passes `{ terminal }` explicitly
513
+ // (true only on the final, no-tool-call answer). Fall back to the legacy
514
+ // "content is non-empty" proxy when the flag is absent (older callers / the
515
+ // web-ordering unit tests drive these callbacks directly with one arg).
516
+ const terminal = meta && typeof meta.terminal === 'boolean'
517
+ ? meta.terminal
518
+ : !!(cleanContent && cleanContent.trim());
519
+ // Web-activity ordering (W.3 regression fix): commit any still-open web
520
+ // group BEFORE the answer is finalized, so the collapsed "✓ web · …"
521
+ // summary lands ABOVE the answer in scrollback (pre-W.3 ordering).
522
+ //
523
+ // Guard on the TERMINAL signal (no tool calls this iteration). Intermediate
524
+ // web-tool iterations are non-terminal — they keep the group open so a
525
+ // multi-step search→fetch still collapses into a single line (the W.3
526
+ // guarantee). Pre-live-narration this used "cleanContent is empty" as the
527
+ // proxy for intermediate; now intermediate iterations carry narration too,
528
+ // so we rely on the explicit `terminal` flag instead — otherwise an
529
+ // intermediate narration would flush the group early and split the line.
530
+ // Empty/interrupted turns (no terminal message ever arrives) fall back to
531
+ // the turn-end `finally` flush, which is the safety net.
532
+ if (terminal && webTracker.isOpen()) {
533
+ webTracker.flush();
534
+ }
535
+ // Same terminal-gating for the file group: only the explicit terminal
536
+ // signal flushes, so intermediate-iteration narration does NOT split a
537
+ // multi-iteration read run — it still collapses to one summary.
538
+ if (terminal && fileTracker.isOpen()) {
539
+ fileTracker.flush();
540
+ }
541
+ chatHistory.finalizeLastMessage(cleanContent);
542
+ },
543
+ onMetricsUpdate: (data) => statusBar.updateMetrics(data),
544
+ onRetry: (attempt, max) => {
545
+ statusBar.update('thinking', `Retrying (${attempt}/${max})...`);
546
+ },
547
+ onDebug: (block) => {
548
+ // Render in-history as a tool-style bubble so the RAW RESPONSE text
549
+ // survives TUI redraws (stderr would be clobbered).
550
+ chatHistory.addMessage({ role: 'tool', tag: 'debug', content: 'DEBUG', output: block });
551
+ },
552
+ onError: (err) => {
553
+ if (err && err.isWarning) {
554
+ chatHistory.addMessage({ role: 'system', content: err.message || String(err) });
555
+ } else {
556
+ const msg = (err && err.message) || String(err);
557
+ statusBar.update('error', msg);
558
+ chatHistory.addMessage({ role: 'system', content: `✗ ${msg}`, isError: true });
559
+ }
560
+ },
561
+ onPlanWithhold: (tag, arg) => {
562
+ chatHistory.addMessage({ role: 'system', content: `⏸ Planned (withheld): ${tag}${arg ? ' ' + arg : ''}` });
563
+ },
564
+ };
565
+
566
+ let _agentAborted = false;
567
+ const _onAbort = () => {
568
+ if (!_agentAborted) {
569
+ _agentAborted = true;
570
+ chatHistory.addMessage({ role: 'system', content: '⏹ Interrupted.' });
571
+ }
572
+ };
573
+ inputField.on('abort', _onAbort);
574
+
575
+ // Refresh in case a prior turn's 400 overflow persisted a learned
576
+ // context_length to config after this chat started.
577
+ if (ctx.resolvedTokenLimit == null) {
578
+ const cfg = getConfig();
579
+ if (Number.isInteger(cfg.context_length) && cfg.context_length > 0) {
580
+ ctx.resolvedTokenLimit = cfg.context_length;
581
+ }
582
+ }
583
+
584
+ // Auto-compaction near the context limit (Task 2.7): summarize older turns
585
+ // before the request so they survive as a summary rather than being dropped
586
+ // by api.js trimToTokenBudget. Best-effort; never blocks the turn.
587
+ try {
588
+ const { shouldAutoCompact, selectForCompaction, summarizationRequest, buildCompactedMessages, approxTokens: approxTok } = require('../compact');
589
+ const lim = ctx.resolvedTokenLimit;
590
+ const used = approxTok(ctx.messages, approxTokens);
591
+ if (shouldAutoCompact(used, lim, ctx.messages.length)) {
592
+ const sel = selectForCompaction(ctx.messages, { keepRecent: 6 });
593
+ if (sel.head.length) {
594
+ // PreCompact hook (Task 3.4): fire before summarizing. Best-effort.
595
+ try {
596
+ await require('../hooks').createHookRunner({ getConfig })
597
+ .run('PreCompact', { reason: 'auto', messageCount: ctx.messages.length });
598
+ } catch { /* hook failures never block compaction */ }
599
+ const summary = await ctx.chatSync(summarizationRequest(sel.head), { model: ctx.currentModel });
600
+ if (summary && summary.trim()) {
601
+ ctx.messages = buildCompactedMessages(sel, summary);
602
+ const after = approxTok(ctx.messages, approxTokens);
603
+ chatHistory.addMessage({ role: 'system', content: `✓ Auto-compacted near context limit: ~${used} → ~${after} tokens.` });
604
+ }
605
+ }
606
+ }
607
+ } catch { /* auto-compaction is best-effort */ }
608
+
609
+ try {
610
+ const agentResult = await runAgentLoop(ctx.messages, ctx.currentModel, resolveMaxIterations(getConfig().max_iterations), ctx.resolvedTokenLimit, {
611
+ showThink: opts.showThink || false,
612
+ debug: ctx.debugMode,
613
+ callbacks,
614
+ systemPrompt: ctx.resolvedSystemPrompt,
615
+ systemPromptMode: getConfig().system_prompt_mode || 'system_role',
616
+ getAbortFlag: () => _agentAborted,
617
+ getPlanMode: () => ctx.planMode,
618
+ noVerify: !!opts.noVerify,
619
+ });
620
+ ctx.messages = agentResult.messages;
621
+ ctx.sessionMetrics = agentResult.metrics;
622
+
623
+ // Cost indicator (Task 2.6): accumulate this turn's usage and render the
624
+ // session cost. Unknown model price → "unknown", never a fake $0.
625
+ try {
626
+ const cfg = getConfig();
627
+ if (cfg.show_cost && agentResult.metrics && Array.isArray(agentResult.metrics.turns)) {
628
+ for (const t of agentResult.metrics.turns) {
629
+ sessionUsage.prompt_tokens += t.promptTokens || 0;
630
+ sessionUsage.completion_tokens += t.completionTokens || 0;
631
+ }
632
+ const { priceForModel, computeCost, formatCost } = require('../pricing');
633
+ const cost = computeCost(sessionUsage, priceForModel(ctx.currentModel, cfg.pricing));
634
+ if (typeof statusBar.setCost === 'function') statusBar.setCost(formatCost(cost));
635
+ }
636
+ } catch { /* cost display is best-effort */ }
637
+
638
+ if (ctx.planMode && agentResult.withheldActions && agentResult.withheldActions.length) {
639
+ chatHistory.addMessage({
640
+ role: 'system',
641
+ content: `Plan ready — ${agentResult.withheldActions.length} action(s) withheld. Run /plan to approve and execute, or /clear to discard.`,
642
+ });
643
+ }
644
+ } catch (err) {
645
+ statusBar.update('error', err.message || 'Agent error');
646
+ chatHistory.addMessage({ role: 'system', content: err.message || 'Agent error', isError: true });
647
+ } finally {
648
+ // Phase 7b boundary — commit any trailing op's held detail band before the
649
+ // turn unwinds (the turn may have ended right after a tool with no
650
+ // following message). No-op when nothing is deferred; ordered before the
651
+ // web flush (the two are mutually exclusive in practice).
652
+ try { chatHistory.commitDeferredDetail(); } catch { /* never block turn teardown */ }
653
+ // Commit any still-open web-activity summary (the turn may have ended right
654
+ // after a web op, or been interrupted mid-group) before the turn unwinds.
655
+ try { webTracker.flush(); } catch { /* never block turn teardown */ }
656
+ // Commit any still-open file-activity group (turn ended right after a read
657
+ // run, or was interrupted mid-group) before the turn unwinds.
658
+ try { fileTracker.flush(); } catch { /* never block turn teardown */ }
659
+ inputField.removeListener('abort', _onAbort);
660
+ }
661
+
662
+ statusBar.update('idle');
663
+ inputField.setDisabled(false);
664
+ await saveTurnToDashboard();
665
+ saveSession();
666
+ };
667
+ }
668
+
669
+ module.exports = { createTurnHandler };