@semalt-ai/code 1.19.0 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/.claude/settings.local.json +2 -1
  2. package/ARCHITECTURE.md +6 -95
  3. package/CLAUDE.md +196 -1874
  4. package/README.md +1 -1
  5. package/docs/ARCHITECTURE.md +1321 -0
  6. package/docs/CONFIG.md +340 -0
  7. package/docs/HISTORY.md +245 -0
  8. package/index.js +1 -1
  9. package/lib/agent.js +145 -16
  10. package/lib/api.js +28 -3
  11. package/lib/commands/chat-session.js +187 -4
  12. package/lib/commands/chat-slash.js +16 -0
  13. package/lib/commands/chat-turn.js +272 -49
  14. package/lib/commands/chat.js +12 -8
  15. package/lib/config.js +27 -0
  16. package/lib/constants.js +30 -1
  17. package/lib/headless.js +36 -1
  18. package/lib/images.js +8 -2
  19. package/lib/permissions.js +23 -16
  20. package/lib/prompts.js +15 -3
  21. package/lib/tool_registry.js +357 -53
  22. package/lib/tool_specs.js +42 -8
  23. package/lib/tools.js +80 -19
  24. package/lib/ui/anim.js +86 -0
  25. package/lib/ui/ansi.js +17 -27
  26. package/lib/ui/chat-history.js +253 -71
  27. package/lib/ui/create-ui.js +67 -24
  28. package/lib/ui/diff.js +90 -25
  29. package/lib/ui/file-activity.js +236 -0
  30. package/lib/ui/format.js +173 -28
  31. package/lib/ui/input-field.js +5 -4
  32. package/lib/ui/md-stream.js +234 -0
  33. package/lib/ui/render-operation.js +113 -0
  34. package/lib/ui/select.js +1 -4
  35. package/lib/ui/status-bar.js +99 -57
  36. package/lib/ui/stream.js +20 -13
  37. package/lib/ui/theme.js +190 -45
  38. package/lib/ui/tool-operation.js +190 -0
  39. package/lib/ui/utils.js +9 -5
  40. package/lib/ui/web-activity.js +58 -6
  41. package/lib/ui/writer.js +159 -45
  42. package/lib/ui.js +1 -1
  43. package/package.json +1 -1
  44. package/test/anim-driver.test.js +153 -0
  45. package/test/ask-user-display.test.js +226 -0
  46. package/test/ask-user-gate.test.js +231 -0
  47. package/test/chat-history-nocolor.test.js +155 -0
  48. package/test/chat-relogin.test.js +207 -0
  49. package/test/defer-detail-band.test.js +403 -0
  50. package/test/detail-band-tab-flatten.test.js +242 -0
  51. package/test/exec-diff.test.js +268 -0
  52. package/test/executors.test.js +250 -13
  53. package/test/extract-tool-calls.test.js +37 -3
  54. package/test/file-activity.test.js +522 -0
  55. package/test/grep-path-target.test.js +227 -0
  56. package/test/harness/chat-harness.js +2 -1
  57. package/test/headless.test.js +146 -1
  58. package/test/input-field-ctrl-o.test.js +37 -0
  59. package/test/live-height-physical.test.js +281 -0
  60. package/test/max-iterations.test.js +9 -7
  61. package/test/md-stream.test.js +183 -0
  62. package/test/native-dispatch.test.js +53 -0
  63. package/test/native-live-narration.test.js +254 -0
  64. package/test/output-heredoc-leak.test.js +195 -0
  65. package/test/output-preview.test.js +245 -0
  66. package/test/permissions.test.js +199 -0
  67. package/test/read-paginate.test.js +1 -1
  68. package/test/render-operation.test.js +317 -0
  69. package/test/replay-descriptor-xml.test.js +216 -0
  70. package/test/replay-descriptor.test.js +189 -0
  71. package/test/replay-web-aggregate.test.js +291 -0
  72. package/test/replay-web-persist.test.js +241 -0
  73. package/test/running-glyph-anim.test.js +111 -0
  74. package/test/status-bar-driver.test.js +93 -0
  75. package/test/status-bar-resync.test.js +188 -0
  76. package/test/stream-parser.test.js +24 -0
  77. package/test/theme-palette.test.js +166 -0
  78. package/test/truncate-visible.test.js +78 -0
  79. package/test/view-image.test.js +199 -0
  80. package/test/web-activity-ordering.test.js +12 -3
  81. package/path +0 -1
@@ -10,7 +10,11 @@
10
10
  // session ctx — those callbacks use only the per-tool fields, never session state.
11
11
 
12
12
  const { resolveMaxIterations } = require('../config');
13
- const { createWebActivityTracker } = require('../ui/web-activity');
13
+ const { createWebActivityTracker, serializeWebOp } = require('../ui/web-activity');
14
+ const { createFileActivityTracker } = require('../ui/file-activity');
15
+ const { buildToolOperation, serializeOperation } = require('../ui/tool-operation');
16
+ const { renderOperation } = require('../ui/render-operation');
17
+ const { normalizeCmdForDisplay } = require('../ui/format');
14
18
 
15
19
  function createTurnHandler(ctx, slashHandlers) {
16
20
  // The session ctx — the per-tool callbacks below intentionally shadow `ctx`
@@ -19,7 +23,7 @@ function createTurnHandler(ctx, slashHandlers) {
19
23
  const sessionCtx = ctx;
20
24
  const {
21
25
  inputField, statusBar, chatHistory, getConfig, approxTokens, resolveCommand,
22
- runAgentLoop, opts, TAG_REGISTRY, formatToolLine, writerModule,
26
+ runAgentLoop, opts, TAG_REGISTRY, writerModule,
23
27
  collapseListMsg, handlePendingSelection, showPendingStep, activateNavCapture, finalizeListMsg,
24
28
  createChatIfNeeded, saveTurnToDashboard, saveSession,
25
29
  } = ctx;
@@ -134,6 +138,30 @@ function createTurnHandler(ctx, slashHandlers) {
134
138
  // think block (Qwen3-style: plain text followed by </think>, no opening tag).
135
139
  let implicitThinkPhase = !opts.showThink;
136
140
  let implicitThinkBuffer = '';
141
+ // Live-narration safety signals for the NATIVE rail only (XML rail ignores
142
+ // all three and keeps the buffered-until-boundary behavior unchanged):
143
+ // • nativeRail — set from onStreamStart's first arg; true only
144
+ // when this stream is on the native tool-call rail.
145
+ // • reasoningSeen — signal (a): a delta.reasoning_content arrived this
146
+ // iteration, proving the model uses the structured
147
+ // reasoning channel → subsequent content is narration.
148
+ // • inlineReasoningFalse — signal (b): the active profile asserts
149
+ // inline_reasoning:false → never inlines reasoning.
150
+ // When nativeRail AND (reasoningSeen OR inlineReasoningFalse), onToken may
151
+ // eager-open the implicit-think gate and stream narration live (see below).
152
+ let nativeRail = false;
153
+ let reasoningSeen = false;
154
+ let inlineReasoningFalse = false;
155
+ // Orphan closing reasoning tag, emitted VERBATIM by the StreamParser: a lone
156
+ // </think> (or </reasoning>/</reflection>/</plan>) has no matching open tag, so
157
+ // the parser's closing form `/think` is not a TAG_REGISTRY key (the registry
158
+ // keys are the bare names) and it streams the literal `</tag>` through onToken
159
+ // (agent.js StreamParser: `if (!entry) this.onToken('<' + tagRaw + '>')`).
160
+ // MiniMax-style models emit reasoning via BOTH reasoning_content AND an inline
161
+ // </think> terminator in content; that stray tag must never reach the terminal.
162
+ // Persisted history is already clean (cleanAssistantContent strips it), so this
163
+ // guard is live-stream-only. Match exactly the raw-emitted closing shapes.
164
+ const ORPHAN_CLOSE_TAG_RE = /^<\/(think|reasoning|reflection|plan)>$/i;
137
165
 
138
166
  // Web-activity collapse (Task W.3): in the default (non-debug) view, a run of
139
167
  // consecutive web ops (web_search → http_get) renders as ONE process-summary
@@ -141,6 +169,14 @@ function createTurnHandler(ctx, slashHandlers) {
141
169
  // is bypassed and web ops render the normal per-op way (full detail).
142
170
  const webTracker = createWebActivityTracker({ writerModule });
143
171
 
172
+ // File-activity collapse (parallel instance of the web tracker): in the
173
+ // default (non-debug) view, a run of consecutive same-type pure file reads
174
+ // (read_file / list_dir) collapses into ONE process-summary line instead of a
175
+ // per-op line each — unless the run is only 1–2 ops, which still commit as
176
+ // individual lines (decided at flush). read and list group SEPARATELY. Fresh
177
+ // per turn. In --debug the tracker is bypassed (full per-op detail).
178
+ const fileTracker = createFileActivityTracker({ writerModule });
179
+
144
180
  const callbacks = {
145
181
  onThinking: () => statusBar.update('thinking', 'Thinking...'),
146
182
  onRequestSent: () => {
@@ -148,14 +184,56 @@ function createTurnHandler(ctx, slashHandlers) {
148
184
  // Reset think-phase detection for each new agent iteration.
149
185
  implicitThinkPhase = !opts.showThink;
150
186
  implicitThinkBuffer = '';
187
+ // Reset the live-narration safety signals alongside the gate — each API
188
+ // call re-establishes the rail and re-observes the reasoning channel.
189
+ nativeRail = false;
190
+ reasoningSeen = false;
191
+ inlineReasoningFalse = false;
151
192
  },
152
- onStreamStart: () => {
193
+ onStreamStart: (isNativeRail, inlineReasoning) => {
194
+ // Capture the rail + inline-reasoning assertion threaded from agent.js
195
+ // (signal b). Recorded BEFORE the first content token so onToken's
196
+ // eager-open check below sees them on the very first token.
197
+ nativeRail = !!isNativeRail;
198
+ inlineReasoningFalse = inlineReasoning === false;
153
199
  // If showThink is on, switch to streaming immediately.
154
200
  // Otherwise keep "Thinking…" until </think> is resolved.
155
201
  if (opts.showThink) statusBar.update('streaming', 'Streaming response');
156
202
  },
203
+ onReasoningStart: () => {
204
+ // Signal (a): the model emitted reasoning_content this iteration, so the
205
+ // structured reasoning channel is in use. Fires before any delta.content
206
+ // token, so the eager-open in onToken sees it for the first token.
207
+ reasoningSeen = true;
208
+ },
157
209
  onTagOpen: (tag, attrs) => {
158
210
  const entry = TAG_REGISTRY[tag];
211
+ // Positive-evidence early exit from the implicit-think gate (live narration).
212
+ // The gate buffers leading bare text because a Qwen3-style model emits implicit
213
+ // reasoning as bare text terminated by an orphan </think> — and that reasoning
214
+ // is indistinguishable from ordinary narration until the boundary arrives. So
215
+ // for a plain bare-text preamble we STAY buffered until </think> (handled in
216
+ // onToken); flipping early there could stream hidden reasoning = a leak.
217
+ // But two tag openings are positive PROOF the bare text is NOT implicit
218
+ // reasoning, so we can open the gate and start streaming live immediately:
219
+ // • a <think>/<reasoning>/<reflection>/<plan> tag (display:'think_bubble')
220
+ // means THIS model delimits reasoning with explicit tags; that inner
221
+ // content is consumed by the StreamParser and never reaches onToken (and is
222
+ // suppressed by handleTag when !showThink), so any bare text outside the
223
+ // tag is narration — safe to stream.
224
+ // • a <final_answer> (type:'final') streams its inner content THROUGH onToken
225
+ // (streamInner in the parser), so the gate must open or the answer is
226
+ // swallowed by the buffer.
227
+ // We deliberately do NOT exit on a tool tag: bare-reasoning-then-tool with no
228
+ // </think> is possible (malformed implicit-think), so opening there could leak.
229
+ // Any buffered leading text is DISCARDED here (treated as reasoning), never
230
+ // flushed — preserving implicit-think suppression.
231
+ if (!opts.showThink && implicitThinkPhase &&
232
+ (entry?.display === 'think_bubble' || entry?.type === 'final')) {
233
+ implicitThinkPhase = false;
234
+ implicitThinkBuffer = '';
235
+ statusBar.update('streaming', 'Streaming response');
236
+ }
159
237
  if (entry?.type === 'tool') {
160
238
  const actionLabel = entry.label || tag;
161
239
  const detail = attrs.path || attrs.url || attrs.key || attrs.src || '';
@@ -182,7 +260,13 @@ function createTurnHandler(ctx, slashHandlers) {
182
260
  // — the next streaming/idle state will overwrite this when the
183
261
  // picker closes (whether granted or denied).
184
262
  const actionLabel = TAG_REGISTRY[tag]?.label || tag;
185
- const short = input && input.length > 40 ? input.slice(0, 40) + '…' : (input || '');
263
+ // Flatten embedded newlines/tabs (e.g. heredoc commands) BEFORE the
264
+ // slice so the status label is a single physical row. A raw slice of
265
+ // multi-line input rides a \n into the status string → the live region
266
+ // mis-counts rows (1 logical line spanning 2+ physical rows) and leaks
267
+ // stale rules/spinners into scrollback. See Phase 4 fix-A.
268
+ const flat = normalizeCmdForDisplay(input);
269
+ const short = flat.length > 40 ? flat.slice(0, 40) + '…' : flat;
186
270
  const isDownload = tag === 'download' || tag === 'http_get';
187
271
  if (isDownload) {
188
272
  statusBar.update('waiting_download', `Waiting for download: ${short}`);
@@ -191,24 +275,42 @@ function createTurnHandler(ctx, slashHandlers) {
191
275
  }
192
276
  },
193
277
  onToolStart: (tag, input, ctx) => {
278
+ // Phase 7b boundary — commit the PREVIOUS op's held detail band to
279
+ // scrollback BEFORE this op's running line (or web group) is installed,
280
+ // so the committed preview lands above the new activity row. Mirrors the
281
+ // web tracker's "flush previous, then start new" sequencing. No-op when
282
+ // nothing is deferred. Runs before the web branch too, so a non-web
283
+ // preview followed by a web op still commits in chronological order.
284
+ chatHistory.commitDeferredDetail();
194
285
  const actionLabel = TAG_REGISTRY[tag]?.label || tag;
195
- const short = input && input.length > 40 ? input.slice(0, 40) + '…' : (input || '');
286
+ // Flatten before slicing see onPermissionAsk above (Phase 4 fix-A).
287
+ const flat = normalizeCmdForDisplay(input);
288
+ const short = flat.length > 40 ? flat.slice(0, 40) + '…' : flat;
196
289
  const isDownload = tag === 'download' || tag === 'http_get';
197
290
  if (isDownload) {
198
291
  statusBar.update('waiting_download', `Waiting for download: ${short}`);
199
292
  } else {
200
293
  statusBar.update('tool', `${actionLabel}: ${short}`);
201
294
  }
202
- // Web-activity collapse (Task W.3): in the default view, fold this web op
203
- // into the running process-summary line instead of its own activity row.
204
- // --debug keeps the per-op line (fall through to the normal path below).
205
- if (!sessionCtx.debugMode && webTracker.isWeb(tag)) {
295
+ // Web- and file-activity collapse: in the default view, fold this op into
296
+ // its running process-summary line instead of its own activity row.
297
+ // --debug bypasses both trackers (full per-op detail). Switching group
298
+ // type or starting a non-grouped tool — closes the OTHER open group
299
+ // first, so its committed summary lands ABOVE this op in scrollback. (A
300
+ // read↔list key change within the file group is handled inside
301
+ // fileTracker.start.)
302
+ const webOp = !sessionCtx.debugMode && webTracker.isWeb(tag);
303
+ const fileOp = !sessionCtx.debugMode && fileTracker.isGroupable(tag);
304
+ if (!webOp && webTracker.isOpen()) webTracker.flush();
305
+ if (!fileOp && fileTracker.isOpen()) fileTracker.flush();
306
+ if (webOp) {
206
307
  webTracker.start(tag, input);
207
308
  return;
208
309
  }
209
- // A non-web tool (or debug mode) closes any open web group first, so its
210
- // committed summary lands ABOVE this tool's line in scrollback.
211
- if (webTracker.isOpen()) webTracker.flush();
310
+ if (fileOp) {
311
+ fileTracker.start(tag, input);
312
+ return;
313
+ }
212
314
  // Register the invocation with the writer's activity region.
213
315
  // The render function is re-invoked by the writer on every
214
316
  // redraw so the pending line's elapsed time stays current with
@@ -223,23 +325,21 @@ function createTurnHandler(ctx, slashHandlers) {
223
325
  // name check with a category flag (e.g. blocking: true on the
224
326
  // tool spec) if more blocking tools appear.
225
327
  if (ctx && ctx.id) {
328
+ // Output Refactor (Phase 1): the interactive core tool line is now
329
+ // produced via a ToolOperation descriptor → the pure renderOperation,
330
+ // instead of an inline formatToolLine call. Byte-for-byte identical —
331
+ // this is a re-routing, not a re-styling.
226
332
  if (tag === 'ask_user') {
227
- const staticLine = formatToolLine({
228
- status: 'pending',
229
- tag,
230
- arg: input,
231
- attrs: ctx.attrs,
232
- noDuration: true,
233
- });
333
+ const staticLine = renderOperation(
334
+ buildToolOperation({ id: ctx.id, tag, arg: input, attrs: ctx.attrs, status: 'pending', noDuration: true }),
335
+ { mode: 'ansi', phase: 'pending' },
336
+ );
234
337
  writerModule.startActivity(ctx.id, () => staticLine);
235
338
  } else {
236
- writerModule.startActivity(ctx.id, (elapsedMs) => formatToolLine({
237
- status: 'pending',
238
- tag,
239
- arg: input,
240
- attrs: ctx.attrs,
241
- durationMs: elapsedMs,
242
- }));
339
+ writerModule.startActivity(ctx.id, (elapsedMs) => renderOperation(
340
+ buildToolOperation({ id: ctx.id, tag, arg: input, attrs: ctx.attrs, status: 'pending', durationMs: elapsedMs }),
341
+ { mode: 'ansi', phase: 'pending' },
342
+ ));
243
343
  }
244
344
  }
245
345
  },
@@ -250,21 +350,55 @@ function createTurnHandler(ctx, slashHandlers) {
250
350
  // failure (a 403/406 or timeout shows as "blocked"); the detailed error
251
351
  // body stays hidden in the collapsed view (visible under --debug).
252
352
  if (!sessionCtx.debugMode && webTracker.isWeb(tag)) {
353
+ // Live display unchanged — the tracker still owns the collapsed web
354
+ // summary region. Phase 6c-i: instead of returning `undefined` (which
355
+ // persisted a `null` slot → web vanished into the legacy whole-blob
356
+ // summary on replay), hand back a dedicated web-op core so the agent
357
+ // loop's `displayCore || null` push stores it on BOTH rails (native
358
+ // {role:'tool'} `_display`; XML `_display[]` slot). Nothing in the live
359
+ // render path reads this return value, so the live region is untouched;
360
+ // every replay reader treats the web-core as fallback (chat-history /
361
+ // chat-session) so the screen stays byte-identical until 6c-ii.
253
362
  webTracker.end(tag, result, durationMs, ctx);
254
363
  if (hasError) statusBar.update('streaming', 'Streaming response');
255
- return;
364
+ return serializeWebOp(ctx, tag, durationMs);
256
365
  }
257
366
  const isBlocking = tag === 'ask_user';
258
- const finalLine = formatToolLine({
259
- status: hasError ? 'failure' : 'success',
367
+ // Output Refactor (Phase 1): build ONE descriptor for this finished call
368
+ // and render both the committed result line and (below) its diff detail
369
+ // from it — the single source of truth, replacing the inline
370
+ // formatToolLine + buildExecutionDiff pair. Byte-for-byte identical.
371
+ const operation = buildToolOperation({
372
+ id: ctx ? ctx.id : null,
260
373
  tag,
261
374
  arg: ctx && ctx.attrs ? (ctx.attrs.command || ctx.attrs.path || ctx.attrs.url || ctx.attrs.src || ctx.attrs.key || ctx.attrs.name || ctx.attrs.pattern) : '',
262
375
  attrs: ctx ? ctx.attrs : null,
376
+ status: hasError ? 'error' : 'ok',
263
377
  durationMs,
264
378
  meta: ctx ? ctx.meta : null,
265
379
  error: ctx ? ctx.error : null,
380
+ diff: ctx ? ctx.diff : null,
381
+ // Phase 5: hand the model-facing result to the descriptor so it can
382
+ // derive an output-preview detail (shell/MCP/subagent). Chrome only —
383
+ // the model already received the full result via boundToolOutput.
384
+ output: typeof result === 'string' ? result : null,
266
385
  noDuration: isBlocking,
267
386
  });
387
+ // File-activity collapse: a SUCCESSFUL read_file/list_dir folds into the
388
+ // running file-group aggregate instead of committing its own line — the
389
+ // group's single summary (or, for a 1–2 op run, the individual lines)
390
+ // commits at flush. The op core is STILL persisted here (serializeOperation
391
+ // below) so replay re-groups it. An ERRORED file op does NOT join the
392
+ // group: it falls through to flush the success-group first (so its summary
393
+ // lands ABOVE), then renders the error standalone + error body.
394
+ if (!sessionCtx.debugMode && !hasError && fileTracker.isGroupable(tag)) {
395
+ fileTracker.end(operation);
396
+ return serializeOperation(operation);
397
+ }
398
+ // A non-grouped tool end (or an errored file op) closes any open file
399
+ // group first, so its committed summary lands ABOVE this line.
400
+ if (fileTracker.isOpen()) fileTracker.flush();
401
+ const finalLine = renderOperation(operation, { mode: 'ansi', phase: 'result' });
268
402
  if (ctx && ctx.id) {
269
403
  writerModule.endActivity(ctx.id, finalLine);
270
404
  } else {
@@ -273,6 +407,37 @@ function createTurnHandler(ctx, slashHandlers) {
273
407
  // to a direct scrollback line so the tool still leaves a trace.
274
408
  writerModule.scrollback(finalLine);
275
409
  }
410
+ // Execution-time file-edit diff. This is the SINGLE site the full diff of
411
+ // a successful mutating edit renders — decoupled from the permission modal,
412
+ // so an auto-approved edit shows its diff exactly like a manual one, and
413
+ // every entry mode (fresh / --resume / /history / /chats) renders it the
414
+ // same way. Loaded history replays through displayLoadedMessages (summaries
415
+ // only), never onToolEnd, so past turns carry no diff payload and are not
416
+ // replayed. Capped at config.diff_max_lines (head+tail for a large edit).
417
+ if (!hasError && operation.detail && operation.detail.kind === 'diff') {
418
+ const diffStr = renderOperation(operation, {
419
+ mode: 'ansi',
420
+ phase: 'detail',
421
+ maxLines: (getConfig() || {}).diff_max_lines,
422
+ });
423
+ if (diffStr) writerModule.scrollback(diffStr);
424
+ }
425
+ // Phase 5/7b: collapsed output preview for shell/MCP/subagent successes.
426
+ // DEFERRED into the writer's redrawable detail band (not committed to
427
+ // scrollback yet) — the held slot commits once at the next boundary
428
+ // (next-op start / assistant answer / turn end). The preview is static
429
+ // (first N lines + `… M more lines`, no expand affordance). Model-facing
430
+ // context is untouched (the full output already reached the model). The
431
+ // result line above and any diff still commit immediately.
432
+ if (!hasError && operation.detail && operation.detail.kind === 'output') {
433
+ chatHistory.deferToolOutput({
434
+ role: 'tool',
435
+ tag,
436
+ content: '',
437
+ output: operation.detail.payload.body,
438
+ previewLines: (getConfig() || {}).shell_preview_lines || 5,
439
+ });
440
+ }
276
441
  if (hasError) {
277
442
  // Preserve the expandable error body as a follow-up tool
278
443
  // bubble. Empty content suppresses its header so the scrollback
@@ -283,46 +448,96 @@ function createTurnHandler(ctx, slashHandlers) {
283
448
  }
284
449
  statusBar.update('streaming', 'Streaming response');
285
450
  }
451
+ // Phase 6a — hand the SAME descriptor back to the agent loop (serialized)
452
+ // so the native rail can persist it as a `_display` sibling on the tool
453
+ // result message; replay then rebuilds it for full-fidelity rendering.
454
+ // Display chrome only — never touches the model-facing `content`. The
455
+ // web-activity path above returns its own web-op core (Phase 6c-i) which
456
+ // every replay reader routes to the legacy fallback, so web ops still
457
+ // render via the summary on replay (aggregation lands in 6c-ii).
458
+ return serializeOperation(operation);
286
459
  },
287
460
  onToken: (token) => {
288
461
  if (!opts.showThink && implicitThinkPhase) {
289
- // Check if this token is the closing think tag (Qwen3-style implicit think).
290
- if (/^<\/(think|reasoning|reflection)>$/i.test(token.trim())) {
291
- // Thinking phase is over discard buffered reasoning, start streaming.
462
+ // NATIVE-RAIL eager-open (live token-by-token narration). Gated on a
463
+ // safety signal so reasoning is NEVER leaked: open the gate eagerly
464
+ // ONLY when this stream is on the native rail AND the model has proven
465
+ // (a) it uses the structured reasoning channel this iteration
466
+ // (reasoningSeen) OR (b) it asserts inline_reasoning:false. In either
467
+ // case the leading content is narration, so we open the gate, drop the
468
+ // (empty) buffer, and fall through to stream THIS token live. The XML
469
+ // rail and the no-signal native case skip this branch entirely and keep
470
+ // the buffered-until-boundary fallback below (no behavior change, no
471
+ // leak). Mirror the think_bubble/orphan-</think> exits' status update.
472
+ if (nativeRail && (reasoningSeen || inlineReasoningFalse)) {
292
473
  implicitThinkPhase = false;
293
474
  implicitThinkBuffer = '';
294
475
  statusBar.update('streaming', 'Streaming response');
476
+ // fall through — stream this and all subsequent tokens live. The
477
+ // orphan-close-tag filter below still runs so a stray </think> that
478
+ // MiniMax inlines alongside reasoning_content never reaches the
479
+ // terminal (regression from 938f583's eager-open, which skipped the
480
+ // else-branch's drop guard for this and every subsequent token).
481
+ } else {
482
+ // Check if this token is the closing think tag (Qwen3-style implicit think).
483
+ if (ORPHAN_CLOSE_TAG_RE.test(token.trim())) {
484
+ // Thinking phase is over — discard buffered reasoning, start streaming.
485
+ implicitThinkPhase = false;
486
+ implicitThinkBuffer = '';
487
+ statusBar.update('streaming', 'Streaming response');
488
+ return;
489
+ }
490
+ // Buffer the token; keep the thinking animation visible.
491
+ implicitThinkBuffer += token;
295
492
  return;
296
493
  }
297
- // Buffer the token; keep the thinking animation visible.
298
- implicitThinkBuffer += token;
299
- return;
300
494
  }
495
+ // Drop any orphan closing reasoning tag on every token, regardless of which
496
+ // branch opened the gate (eager-open or showThink). The StreamParser emits
497
+ // these verbatim, so once the gate is open they would otherwise stream live.
498
+ if (ORPHAN_CLOSE_TAG_RE.test(token.trim())) return;
301
499
  chatHistory.streamToken(token);
302
500
  statusBar.onToken();
303
501
  },
304
- onAssistantMessage: (cleanContent) => {
305
- // If </think> was never seen, the model had no implicit think block —
306
- // flush whatever was buffered as normal streaming content.
502
+ onAssistantMessage: (cleanContent, meta) => {
503
+ // If </think> was never seen, the model had no implicit think block — its
504
+ // leading text was ordinary narration. Drop the raw buffered tokens: the
505
+ // cleaned, canonical narration arrives as `cleanContent` and is rendered by
506
+ // finalizeLastMessage below (as a pre-tool bubble when nothing streamed live),
507
+ // so re-emitting the raw buffer would double it.
307
508
  if (implicitThinkPhase && implicitThinkBuffer) {
308
509
  implicitThinkPhase = false;
309
510
  implicitThinkBuffer = '';
310
511
  }
512
+ // Terminal-iteration signal. agent.js now passes `{ terminal }` explicitly
513
+ // (true only on the final, no-tool-call answer). Fall back to the legacy
514
+ // "content is non-empty" proxy when the flag is absent (older callers / the
515
+ // web-ordering unit tests drive these callbacks directly with one arg).
516
+ const terminal = meta && typeof meta.terminal === 'boolean'
517
+ ? meta.terminal
518
+ : !!(cleanContent && cleanContent.trim());
311
519
  // Web-activity ordering (W.3 regression fix): commit any still-open web
312
520
  // group BEFORE the answer is finalized, so the collapsed "✓ web · …"
313
521
  // summary lands ABOVE the answer in scrollback (pre-W.3 ordering).
314
522
  //
315
- // Guard on non-empty content: that is exactly the "terminal response"
316
- // signal. Intermediate web-tool iterations pass cleanContent === ''
317
- // (suppressed because they carried tool calls agent.js), so they do
318
- // NOT flush the group stays open and the multi-step search→fetch
319
- // activity stays collapsed into a single line (the W.3 guarantee).
320
- // The final-answer iteration passes non-empty content flush once.
321
- // Empty/interrupted turns (no non-empty message ever arrives) fall back
322
- // to the turn-end `finally` flush, which is now the safety net.
323
- if (cleanContent && cleanContent.trim() && webTracker.isOpen()) {
523
+ // Guard on the TERMINAL signal (no tool calls this iteration). Intermediate
524
+ // web-tool iterations are non-terminal they keep the group open so a
525
+ // multi-step search→fetch still collapses into a single line (the W.3
526
+ // guarantee). Pre-live-narration this used "cleanContent is empty" as the
527
+ // proxy for intermediate; now intermediate iterations carry narration too,
528
+ // so we rely on the explicit `terminal` flag instead — otherwise an
529
+ // intermediate narration would flush the group early and split the line.
530
+ // Empty/interrupted turns (no terminal message ever arrives) fall back to
531
+ // the turn-end `finally` flush, which is the safety net.
532
+ if (terminal && webTracker.isOpen()) {
324
533
  webTracker.flush();
325
534
  }
535
+ // Same terminal-gating for the file group: only the explicit terminal
536
+ // signal flushes, so intermediate-iteration narration does NOT split a
537
+ // multi-iteration read run — it still collapses to one summary.
538
+ if (terminal && fileTracker.isOpen()) {
539
+ fileTracker.flush();
540
+ }
326
541
  chatHistory.finalizeLastMessage(cleanContent);
327
542
  },
328
543
  onMetricsUpdate: (data) => statusBar.updateMetrics(data),
@@ -330,8 +545,8 @@ function createTurnHandler(ctx, slashHandlers) {
330
545
  statusBar.update('thinking', `Retrying (${attempt}/${max})...`);
331
546
  },
332
547
  onDebug: (block) => {
333
- // Render in-history as a tool-style bubble so ctrl+O expand works and
334
- // the RAW RESPONSE text survives TUI redraws (stderr would be clobbered).
548
+ // Render in-history as a tool-style bubble so the RAW RESPONSE text
549
+ // survives TUI redraws (stderr would be clobbered).
335
550
  chatHistory.addMessage({ role: 'tool', tag: 'debug', content: 'DEBUG', output: block });
336
551
  },
337
552
  onError: (err) => {
@@ -430,9 +645,17 @@ function createTurnHandler(ctx, slashHandlers) {
430
645
  statusBar.update('error', err.message || 'Agent error');
431
646
  chatHistory.addMessage({ role: 'system', content: err.message || 'Agent error', isError: true });
432
647
  } finally {
648
+ // Phase 7b boundary — commit any trailing op's held detail band before the
649
+ // turn unwinds (the turn may have ended right after a tool with no
650
+ // following message). No-op when nothing is deferred; ordered before the
651
+ // web flush (the two are mutually exclusive in practice).
652
+ try { chatHistory.commitDeferredDetail(); } catch { /* never block turn teardown */ }
433
653
  // Commit any still-open web-activity summary (the turn may have ended right
434
654
  // after a web op, or been interrupted mid-group) before the turn unwinds.
435
655
  try { webTracker.flush(); } catch { /* never block turn teardown */ }
656
+ // Commit any still-open file-activity group (turn ended right after a read
657
+ // run, or was interrupted mid-group) before the turn unwinds.
658
+ try { fileTracker.flush(); } catch { /* never block turn teardown */ }
436
659
  inputField.removeListener('abort', _onAbort);
437
660
  }
438
661
 
@@ -86,13 +86,11 @@ function createChatCommand(deps) {
86
86
  onRemoveMessage: (id) => chatHistory.removeById(id),
87
87
  // Modal-region API: setModal replaces the modal live band above the
88
88
  // status region; clearModal drops it. Arrow-key redraws go through
89
- // setModal only — no scrollback churn. When the picker resolves we
90
- // clear the modal and push a single summary line to scrollback.
89
+ // setModal only — no scrollback churn. When the picker resolves we just
90
+ // clear the modal the execution result line is the sole post-approval
91
+ // confirmation, so no summary line is pushed to scrollback (Phase 2 D1).
91
92
  onShowModal: (lines) => writer.setModal(lines),
92
- onCloseModal: (summary) => {
93
- writer.clearModal();
94
- if (summary) chatHistory.addMessage({ role: 'system', content: summary });
95
- },
93
+ onCloseModal: () => { writer.clearModal(); },
96
94
  onCaptureNavigation: (handler) => {
97
95
  inputField.captureNavigation(handler);
98
96
  return () => inputField.releaseNavigation();
@@ -100,8 +98,6 @@ function createChatCommand(deps) {
100
98
  captureSelect: (menu) => inputField.captureSelect(menu),
101
99
  });
102
100
 
103
- inputField.on('expand', () => chatHistory.toggleLastExpand());
104
-
105
101
  const cwd = process.cwd();
106
102
  let currentModel = opts.model || getConfig().default_model;
107
103
  let resolvedTokenLimit = await resolveTokenLimit(currentModel);
@@ -331,6 +327,14 @@ function createChatCommand(deps) {
331
327
 
332
328
 
333
329
  statusBar.update('idle');
330
+ // Re-sync the clock to the input field's actual idle state. update('idle')
331
+ // unconditionally restarts the clock (the not-paused ⇒ clock-running
332
+ // invariant), but if an await above (resume / MCP connectAll) yielded the
333
+ // event loop, the one-shot _goIdle already fired and no active→idle
334
+ // transition will re-fire pause(). Converge both paths: if the field is
335
+ // already idle, pause the clock so the viewport can scroll. On a no-await
336
+ // start the field is not yet idle here, so this is a no-op.
337
+ if (inputField.isIdle()) statusBar.pause();
334
338
 
335
339
  // Slash-command handlers (lib/commands/chat-slash.js), keyed by the canonical
336
340
  // registry name. The parity check below guarantees registry ↔ handler stay
package/lib/config.js CHANGED
@@ -206,6 +206,16 @@ function normalizeConfig(cfg = {}) {
206
206
  // native_tools defaults to true; only explicit false/0/"false"/"0" opts out.
207
207
  const nt = entry.native_tools;
208
208
  normalized.native_tools = !(nt === false || nt === 0 || nt === '0' || nt === 'false');
209
+ // inline_reasoning (live-narration safety signal): an OPTIONAL explicit
210
+ // boolean assertion about whether this model inlines its reasoning into
211
+ // delta.content (Qwen3-style bare text + orphan </think>). Left unset by
212
+ // default → assume it MIGHT inline (safe default: keep buffering until a
213
+ // boundary). Only an explicit `false` asserts "never inlines" → the agent
214
+ // loop may stream narration live from token 1 on the native rail. Only an
215
+ // explicit boolean is persisted; any other value is dropped (stays unset).
216
+ if (typeof entry.inline_reasoning === 'boolean') {
217
+ normalized.inline_reasoning = entry.inline_reasoning;
218
+ }
209
219
  // Multimodal image input (Task 5.4). `vision` (only when an explicit
210
220
  // boolean) marks the profile vision-capable or text-only; a text-only
211
221
  // profile makes an image attach fail LOUD rather than silently drop.
@@ -418,6 +428,22 @@ function isNativeToolsActive(model) {
418
428
  return !(profile && profile.native_tools === false);
419
429
  }
420
430
 
431
+ // Resolves the active profile's `inline_reasoning` assertion (live-narration
432
+ // safety signal b). Returns the explicit boolean if the profile sets one, else
433
+ // `undefined` ("unknown — assume it might inline reasoning"). Mirrors the
434
+ // profile lookup used by isNativeToolsActive. The agent loop treats only an
435
+ // explicit `false` as the safe-to-stream-live signal.
436
+ function getInlineReasoning(model) {
437
+ const cfg = loadConfig();
438
+ if (!Array.isArray(cfg.models)) return undefined;
439
+ const profile = cfg.models.find(
440
+ (p) => p && p.api_base === cfg.api_base && p.model === model
441
+ );
442
+ return profile && typeof profile.inline_reasoning === 'boolean'
443
+ ? profile.inline_reasoning
444
+ : undefined;
445
+ }
446
+
421
447
  const REDACTED_KEYS = new Set(['api_key', 'auth_token']);
422
448
 
423
449
  function configShow(systemPromptOverride = null) {
@@ -457,6 +483,7 @@ module.exports = {
457
483
  configSet,
458
484
  configShow,
459
485
  isNativeToolsActive,
486
+ getInlineReasoning,
460
487
  loadConfig,
461
488
  loadUserConfig,
462
489
  normalizeConfig,
package/lib/constants.js CHANGED
@@ -14,7 +14,7 @@ const DEFAULT_API_TIMEOUT_MS = 15 * 60 * 1000;
14
14
  // even a caller that omits the value gets a real cap rather than an unbounded
15
15
  // loop. A config value of 0 (the "unlimited" sentinel) opts out — see
16
16
  // resolveMaxIterations in lib/config.js.
17
- const DEFAULT_MAX_ITERATIONS = 50;
17
+ const DEFAULT_MAX_ITERATIONS = 125;
18
18
 
19
19
  // Self-verification (Task 4.2). When the agent declares a task done, an optional
20
20
  // configured shell command (e.g. `npm test`) is run and its result fed back.
@@ -110,6 +110,28 @@ const OUTPUT_HEAD_RATIO = 0.6;
110
110
  // line-bounded output — it only catches the pathological few-but-huge-lines case.
111
111
  const DEFAULT_OUTPUT_MAX_TOKENS = 10000;
112
112
 
113
+ // File-edit diff display bound (execution-time diff rendering). Every mutating
114
+ // file edit (write/append/edit_file/replace_in_file) renders its diff at the
115
+ // moment it executes — decoupled from the permission modal, so an auto-approved
116
+ // edit shows its changes just like a manually-approved one. `diff_max_lines`
117
+ // caps the number of CHANGED (+/-) lines shown: a small edit (or a series of
118
+ // small edits) renders in full; one large edit shows head+tail of the changed
119
+ // lines with a `… K more changed lines (N total)` notice (mirrors the W.6
120
+ // shell head+tail discipline). Operator-overridable via config.diff_max_lines.
121
+ const DEFAULT_DIFF_MAX_LINES = 50;
122
+
123
+ // Collapsed output-preview bound (Output Refactor — Phase 5). Shell / MCP /
124
+ // subagent output is shown in MODERATION in the chrome: the first
125
+ // `shell_preview_lines` lines render below the result line, then a static
126
+ // `… N more lines` hint. There is no in-terminal way to expand — full viewing is
127
+ // deferred to the planned transcript viewer. This is DISPLAY-ONLY — the model
128
+ // still receives the full output via boundToolOutput; this cap never touches
129
+ // context.
130
+ // Diffs (file edits) are NOT subject to this — they render expanded to
131
+ // `diff_max_lines` (the user explicitly wants to see diffs). Operator-overridable
132
+ // via config.shell_preview_lines.
133
+ const DEFAULT_SHELL_PREVIEW_LINES = 5;
134
+
113
135
  // MCP & subagent result context bounds (Task W.8). MCP tool results
114
136
  // (lib/mcp/client.js mcpResultToText) and subagent final text (lib/subagents.js)
115
137
  // were the last two UNBOUNDED paths into context — both are fenced as untrusted,
@@ -185,6 +207,11 @@ const DEFAULT_CONFIG = {
185
207
  // head+tail line cap (max_output_lines) bounds the common case; this bounds the
186
208
  // pathological few-but-huge-lines case (a single minified line, a binary cat).
187
209
  max_output_tokens: DEFAULT_OUTPUT_MAX_TOKENS,
210
+ // Changed-line cap for execution-time file-edit diffs (see DEFAULT_DIFF_MAX_LINES).
211
+ diff_max_lines: DEFAULT_DIFF_MAX_LINES,
212
+ // Preview-line count for shell/MCP/subagent output chrome (see
213
+ // DEFAULT_SHELL_PREVIEW_LINES). Display-only — never affects model context.
214
+ shell_preview_lines: DEFAULT_SHELL_PREVIEW_LINES,
188
215
  // Max agent-loop iterations per user turn. A positive integer caps the loop;
189
216
  // 0 means deliberately unbounded (power-user choice). Default 50.
190
217
  max_iterations: DEFAULT_MAX_ITERATIONS,
@@ -359,6 +386,7 @@ const TAG_REGISTRY = {
359
386
  exec: { type: 'tool', streaming: false, label: 'Running command' },
360
387
  shell: { type: 'tool', streaming: false, label: 'Running shell' },
361
388
  read_file: { type: 'tool', streaming: false, label: 'Reading file' },
389
+ view_image: { type: 'tool', streaming: false, label: 'Viewing image' },
362
390
  write_file: { type: 'tool', streaming: false, label: 'Writing file' },
363
391
  create_file: { type: 'tool', streaming: false, label: 'Creating file' },
364
392
  append_file: { type: 'tool', streaming: false, label: 'Appending to file' },
@@ -510,6 +538,7 @@ module.exports = {
510
538
  DEFAULT_MAX_OUTPUT_LINES,
511
539
  OUTPUT_HEAD_RATIO,
512
540
  DEFAULT_OUTPUT_MAX_TOKENS,
541
+ DEFAULT_DIFF_MAX_LINES,
513
542
  DEFAULT_MCP_MAX_RESULT_TOKENS,
514
543
  DEFAULT_SUBAGENT_MAX_RESULT_TOKENS,
515
544
  DEFAULT_WEB_MAX_CONTENT_TOKENS,