npm - @semalt-ai/code - Versions diffs - 1.8.5 → 1.20.0 - Mend

@semalt-ai/code 1.8.5 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (192) hide show

package/.claude/settings.local.json +7 -1
package/.github/workflows/ci.yml +69 -0
package/ARCHITECTURE.md +6 -95
package/CLAUDE.md +196 -316
package/README.md +148 -4
package/docs/ARCHITECTURE.md +1321 -0
package/docs/CONFIG.md +340 -0
package/docs/HISTORY.md +245 -0
package/examples/embed.js +74 -0
package/index.js +251 -10
package/lib/agent.js +856 -120
package/lib/api.js +239 -50
package/lib/args.js +74 -2
package/lib/audit.js +23 -1
package/lib/background.js +584 -0
package/lib/checkpoints.js +757 -0
package/lib/commands/auth.js +94 -0
package/lib/commands/chat-session.js +489 -0
package/lib/commands/chat-slash.js +415 -0
package/lib/commands/chat-turn.js +669 -0
package/lib/commands/chat.js +407 -0
package/lib/commands/custom.js +157 -0
package/lib/commands/history-utils.js +66 -0
package/lib/commands/index.js +268 -0
package/lib/commands/mcp.js +113 -0
package/lib/commands/oneshot.js +193 -0
package/lib/commands/registry.js +269 -0
package/lib/commands/tasks.js +89 -0
package/lib/compact.js +87 -0
package/lib/config.js +360 -11
package/lib/constants.js +401 -3
package/lib/deny.js +199 -0
package/lib/doctor.js +160 -0
package/lib/headless.js +202 -0
package/lib/hooks.js +286 -0
package/lib/images.js +270 -0
package/lib/internals.js +49 -0
package/lib/mcp/boundary.js +131 -0
package/lib/mcp/client.js +270 -0
package/lib/mcp/oauth.js +134 -0
package/lib/memory.js +209 -0
package/lib/metrics.js +37 -2
package/lib/payload.js +54 -0
package/lib/permission-rules.js +401 -0
package/lib/permissions.js +123 -26
package/lib/pricing.js +67 -0
package/lib/proc.js +62 -0
package/lib/prompts.js +99 -8
package/lib/sandbox.js +568 -0
package/lib/sdk.js +328 -0
package/lib/secrets.js +211 -0
package/lib/skills.js +223 -0
package/lib/subagents.js +516 -0
package/lib/tool_registry.js +2862 -0
package/lib/tool_specs.js +263 -9
package/lib/tools.js +352 -1039
package/lib/ui/anim.js +86 -0
package/lib/ui/ansi.js +17 -27
package/lib/ui/chat-history.js +253 -71
package/lib/ui/create-ui.js +67 -24
package/lib/ui/diff.js +90 -25
package/lib/ui/file-activity.js +236 -0
package/lib/ui/format.js +195 -29
package/lib/ui/input-field.js +21 -11
package/lib/ui/md-stream.js +234 -0
package/lib/ui/render-operation.js +113 -0
package/lib/ui/select.js +1 -4
package/lib/ui/status-bar.js +146 -36
package/lib/ui/stream.js +20 -13
package/lib/ui/theme.js +190 -44
package/lib/ui/tool-operation.js +190 -0
package/lib/ui/utils.js +9 -5
package/lib/ui/web-activity.js +270 -0
package/lib/ui/writer.js +159 -45
package/lib/ui.js +1 -1
package/lib/verify.js +229 -0
package/lib/web-extract.js +213 -0
package/lib/web-summarize.js +68 -0
package/package.json +19 -4
package/scripts/lint.js +57 -0
package/test/agent-loop.test.js +389 -0
package/test/anim-driver.test.js +153 -0
package/test/ask-user-display.test.js +226 -0
package/test/ask-user-gate.test.js +231 -0
package/test/background.test.js +414 -0
package/test/chat-history-nocolor.test.js +155 -0
package/test/chat-relogin.test.js +207 -0
package/test/chat.test.js +114 -0
package/test/checkpoints-agent.test.js +181 -0
package/test/checkpoints.test.js +650 -0
package/test/command-registry.test.js +160 -0
package/test/compact.test.js +116 -0
package/test/completion-lazy.test.js +52 -0
package/test/config-merge.test.js +324 -0
package/test/config-quarantine.test.js +128 -0
package/test/config-write-guard-allow-anywhere.test.js +56 -0
package/test/config-write-guard-skip.test.js +46 -0
package/test/config-write-guard.test.js +153 -0
package/test/context-split.test.js +215 -0
package/test/cost-doctor.test.js +142 -0
package/test/custom-commands-chat.test.js +106 -0
package/test/custom-commands.test.js +230 -0
package/test/defer-detail-band.test.js +403 -0
package/test/deny-windows.test.js +120 -0
package/test/deny.test.js +83 -0
package/test/detail-band-tab-flatten.test.js +242 -0
package/test/download-allow-anywhere.test.js +66 -0
package/test/download-confine.test.js +153 -0
package/test/exec-diff.test.js +268 -0
package/test/executors.test.js +599 -0
package/test/extract-tool-calls.test.js +349 -0
package/test/fetch-url-validation.test.js +219 -0
package/test/file-activity.test.js +522 -0
package/test/fixtures/tool-calls.js +57 -0
package/test/fixtures/web-page.js +91 -0
package/test/git-tools.test.js +384 -0
package/test/grep-glob-serialize.test.js +242 -0
package/test/grep-glob.test.js +268 -0
package/test/grep-path-target.test.js +227 -0
package/test/harness/README.md +57 -0
package/test/harness/chat-harness.js +143 -0
package/test/harness/memwarn-headless-child.js +65 -0
package/test/harness/mock-llm.js +120 -0
package/test/harness/mock-mcp-server.js +142 -0
package/test/harness/sse-server.js +69 -0
package/test/headless.test.js +348 -0
package/test/history-utils.test.js +88 -0
package/test/hooks-agent.test.js +238 -0
package/test/hooks-verify-sandbox.test.js +232 -0
package/test/hooks.test.js +216 -0
package/test/http-get-user-agent.test.js +142 -0
package/test/images-api.test.js +208 -0
package/test/images.test.js +238 -0
package/test/input-field-ctrl-o.test.js +37 -0
package/test/live-height-physical.test.js +281 -0
package/test/max-iterations.test.js +218 -0
package/test/mcp-boundary.test.js +57 -0
package/test/mcp-client.test.js +267 -0
package/test/mcp-oauth.test.js +86 -0
package/test/md-stream.test.js +183 -0
package/test/memory-truncation-warning.test.js +222 -0
package/test/memory.test.js +198 -0
package/test/native-dispatch.test.js +409 -0
package/test/native-live-narration.test.js +254 -0
package/test/output-chokepoint.test.js +188 -0
package/test/output-heredoc-leak.test.js +195 -0
package/test/output-preview.test.js +245 -0
package/test/path-guards.test.js +134 -0
package/test/payload.test.js +99 -0
package/test/permission-rules-agent.test.js +210 -0
package/test/permission-rules.test.js +297 -0
package/test/permissions.test.js +362 -0
package/test/plan-mode.test.js +167 -0
package/test/read-paginate.test.js +275 -0
package/test/readonly-tools.test.js +177 -0
package/test/render-operation.test.js +317 -0
package/test/replay-descriptor-xml.test.js +216 -0
package/test/replay-descriptor.test.js +189 -0
package/test/replay-web-aggregate.test.js +291 -0
package/test/replay-web-persist.test.js +241 -0
package/test/result-cap.test.js +233 -0
package/test/running-glyph-anim.test.js +111 -0
package/test/sandbox-agent.test.js +147 -0
package/test/sandbox-integration.test.js +216 -0
package/test/sandbox.test.js +408 -0
package/test/sdk.test.js +234 -0
package/test/shell-output-cap.test.js +181 -0
package/test/skills-chat.test.js +110 -0
package/test/skills.test.js +295 -0
package/test/smoke.test.js +68 -0
package/test/status-bar-driver.test.js +93 -0
package/test/status-bar-pause.test.js +164 -0
package/test/status-bar-resync.test.js +188 -0
package/test/stream-parser.test.js +171 -0
package/test/subagents-agent.test.js +178 -0
package/test/subagents.test.js +222 -0
package/test/theme-palette.test.js +166 -0
package/test/tool-registry.test.js +85 -0
package/test/trim-budget.test.js +101 -0
package/test/truncate-visible.test.js +78 -0
package/test/verify-agent.test.js +317 -0
package/test/verify.test.js +141 -0
package/test/view-image.test.js +199 -0
package/test/web-activity-ordering.test.js +203 -0
package/test/web-activity.test.js +207 -0
package/test/web-data-extraction-guidance.test.js +71 -0
package/test/web-extract.test.js +185 -0
package/test/web-fetch-agent.test.js +291 -0
package/test/web-fetch-mode.test.js +193 -0
package/test/web-search.test.js +380 -0
package/lib/commands.js +0 -1438
package/path +0 -1

package/test/file-activity.test.js ADDED Viewed

@@ -0,0 +1,522 @@
+'use strict';
+// File-activity grouping — a SECOND INSTANCE of the web-activity collapser for
+// consecutive pure file reads/lists (read_file / list_dir). read_file and
+// list_dir share ONE group key, so a mixed read/list exploration phase collapses
+// into a single summary (neutral "file ×N" verb) instead of fragmenting; a
+// homogeneous run keeps its specific "read ×N" / "list ×N" verb. Covers the live
+// flush sites (driven through the REAL createTurnHandler callbacks, exactly as
+// web-activity-ordering.test.js does for web ops), the flush-time THRESHOLD
+// (1–2 individual lines, 3+ collapsed summary), the merged read/list group, the
+// error-breaks-the-group ordering, terminal-flag gating across iterations, the
+// double-flush guard, and replay re-grouping at the replay terminal width.
+const { test } = require('node:test');
+const assert = require('node:assert');
+// Stable colour env for byte comparisons (node:test isolates each file's process).
+process.stdout.isTTY = true;
+delete process.env.NO_COLOR;
+const { stripAnsi } = require('../lib/ui/utils');
+const { createTurnHandler } = require('../lib/commands/chat-turn');
+const { buildToolOperation, serializeOperation } = require('../lib/ui/tool-operation');
+const {
+  createFileActivityTracker, formatFileSummaryLine, fileSummaryState,
+  isGroupableFileCore, normalizeFileTag, fileGroupKey,
+} = require('../lib/ui/file-activity');
+const { ChatHistory } = require('../lib/ui/chat-history');
+const { createChatSession } = require('../lib/commands/chat-session');
+const CFG = { diff_max_lines: 50, shell_preview_lines: 5 };
+// ── Live harness: drive the real createTurnHandler callbacks ──────────────────
+// Records every committed line in ONE ordered log. The file summary / individual
+// file lines commit via writerModule.endActivity (from fileTracker.flush); the
+// answer via chatHistory.finalizeLastMessage; an error body via addMessage.
+function harness(opts) {
+  const events = [];
+  const writerModule = {
+    startActivity() {}, updateActivity() {},
+    endActivity(id, line) {
+      // A flush may commit several individual lines joined by '\n' (the <3 case).
+      for (const raw of String(line == null ? '' : line).split('\n')) {
+        if (raw === '') continue;
+        events.push({ kind: 'commit', line: stripAnsi(raw) });
+      }
+    },
+    scrollback(line) { events.push({ kind: 'scrollback', line: stripAnsi(String(line)) }); },
+  };
+  const chatHistory = {
+    addMessage(m) {
+      if (m && m.isError) events.push({ kind: 'error-body', output: m.output });
+    },
+    streamToken() {}, clearStreamingContent() {},
+    deferToolOutput() {}, commitDeferredDetail() {},
+    finalizeLastMessage(content) { if (content && content.trim()) events.push({ kind: 'answer', content }); },
+  };
+  const statusBar = { update() {}, onToken() {}, addPendingTokens() {}, updateMetrics() {}, setCost() {} };
+  const inputField = { on() {}, removeListener() {}, releaseNavigation() {}, setDisabled() {} };
+  let scenario = async () => {};
+  const runAgentLoop = async (messages, model, maxIter, limit, loopOpts) => {
+    await scenario(loopOpts.callbacks);
+    return { messages, metrics: { turns: [] }, withheldActions: [] };
+  };
+  const ctx = {
+    inputField, statusBar, chatHistory, writerModule, runAgentLoop,
+    getConfig: () => ({ auth_token: 'tok', max_iterations: 50, show_cost: false, system_prompt_mode: 'system_role' }),
+    approxTokens: () => 0,
+    resolveCommand: () => null,
+    opts: {},
+    TAG_REGISTRY: {},
+    collapseListMsg() {}, handlePendingSelection() {}, showPendingStep() {},
+    activateNavCapture() {}, finalizeListMsg() {},
+    createChatIfNeeded: async () => {}, saveTurnToDashboard: async () => {}, saveSession() {},
+    messages: [], currentModel: 'm', debugMode: (opts && opts.debugMode) || false, pendingImages: [],
+    chatSync: async () => '', resolvedSystemPrompt: '', resolvedTokenLimit: null, planMode: false,
+  };
+  const handler = createTurnHandler(ctx, {});
+  return { events, handler, setScenario: (fn) => { scenario = fn; } };
+}
+// Simulate one fully-successful groupable file op (one agent iteration's worth).
+function fileOp(cb, tag, path, bytes) {
+  cb.onToolStart(tag, path, { id: `${tag}-${path}`, attrs: { path } });
+  cb.onToolEnd(tag, 'contents', 5, { id: `${tag}-${path}`, attrs: { path }, meta: { bytes: bytes || 10 }, error: null });
+}
+const commits = (events) => events.filter((e) => e.kind === 'commit');
+// Matches a collapsed file summary for any verb: homogeneous (read/reading,
+// list/listing) or the neutral mixed verb (file/accessing).
+const summaries = (events) => commits(events).filter((e) => /file .*(read|reading|list|listing|file|accessing) ×\d+/.test(e.line));
+// ───────────────────────────────────────────────────────────────────────────
+// (a) 10 consecutive read_file ops → ONE "✓ file · read ×10 (…)" summary line.
+// ───────────────────────────────────────────────────────────────────────────
+test('(a) 10 reads collapse to ONE summary; ×10 always present; basenames truncated to width', async () => {
+  const prevCols = process.stdout.columns;
+  process.stdout.columns = 60; // narrow → force basename truncation
+  try {
+    const h = harness();
+    const files = Array.from({ length: 10 }, (_, i) => `/proj/src/file-${i}-with-a-long-name.js`);
+    h.setScenario(async (cb) => {
+      cb.onAssistantMessage('');                       // one tool-call-only iteration
+      for (const f of files) fileOp(cb, 'read', f, 100 + 1);
+      cb.onAssistantMessage('Done reading.');          // terminal
+    });
+    await h.handler('warm up');
+    const s = summaries(h.events);
+    assert.strictEqual(s.length, 1, 'exactly one collapsed summary');
+    assert.match(s[0].line, /file .* read ×10 \(/, 'shows the read verb and the ×10 count');
+    assert.ok(s[0].line.includes('…'), 'the basename list is truncated to width');
+    assert.match(s[0].line, /×10/, 'the ×10 count survives truncation (it is in the fixed prefix)');
+    // Single physical row at the render width.
+    assert.ok(s[0].line.length <= 60, `summary fits one 60-col row (got ${s[0].line.length})`);
+    // No per-op read lines leaked alongside the summary.
+    assert.strictEqual(commits(h.events).filter((e) => /read \//.test(e.line)).length, 0, 'no individual read lines');
+  } finally {
+    process.stdout.columns = prevCols;
+  }
+});
+// ───────────────────────────────────────────────────────────────────────────
+// (b) 2 read ops → two individual lines (threshold: <3 stays per-op).
+// ───────────────────────────────────────────────────────────────────────────
+test('(b) 2 reads commit as two individual lines, no summary', async () => {
+  const h = harness();
+  h.setScenario(async (cb) => {
+    cb.onAssistantMessage('');
+    fileOp(cb, 'read', '/a.js');
+    fileOp(cb, 'read', '/b.js');
+    cb.onAssistantMessage('done');
+  });
+  await h.handler('read two');
+  assert.strictEqual(summaries(h.events).length, 0, 'no collapsed summary for a 2-op run');
+  const indiv = commits(h.events).filter((e) => /read \//.test(e.line));
+  assert.strictEqual(indiv.length, 2, 'two individual per-op result lines');
+  assert.match(indiv[0].line, /read \/a\.js/);
+  assert.match(indiv[1].line, /read \/b\.js/);
+});
+// ───────────────────────────────────────────────────────────────────────────
+// (c) reads and lists INTERLEAVED (read, list, read, list, read) → ONE merged
+//     summary with the neutral "file ×5" verb (was: two separate summaries —
+//     CHANGED by the key-merge: read_file + list_dir now share one group).
+// ───────────────────────────────────────────────────────────────────────────
+test('(c) interleaved reads+lists collapse to ONE merged summary with the neutral verb', async () => {
+  const h = harness();
+  h.setScenario(async (cb) => {
+    cb.onAssistantMessage('');
+    fileOp(cb, 'read', '/a.js');
+    fileOp(cb, 'list_dir', '/d0');
+    fileOp(cb, 'read', '/b.js');
+    fileOp(cb, 'list_dir', '/d1');
+    fileOp(cb, 'read', '/c.js');
+    cb.onAssistantMessage('done');
+  });
+  await h.handler('interleaved reads and lists');
+  const s = summaries(h.events);
+  assert.strictEqual(s.length, 1, 'one merged summary — read and list share a group now');
+  assert.match(s[0].line, /file .* file ×5 \(/, 'mixed group uses the neutral "file ×5" verb');
+  assert.doesNotMatch(s[0].line, /read ×|list ×/, 'no homogeneous verb for a mixed group');
+  // All five basenames/dirs listed once in the merged summary.
+  for (const b of ['a.js', 'd0', 'b.js', 'd1', 'c.js']) {
+    assert.ok(s[0].line.includes(b), `merged summary lists ${b}`);
+  }
+});
+// ───────────────────────────────────────────────────────────────────────────
+// (c2) homogeneous list run (5 list_dir, no reads) → still "list ×5".
+// ───────────────────────────────────────────────────────────────────────────
+test('(c2) 5 list_dir ops only → homogeneous "list ×5" summary (specific verb kept)', async () => {
+  const h = harness();
+  h.setScenario(async (cb) => {
+    cb.onAssistantMessage('');
+    for (let i = 0; i < 5; i++) fileOp(cb, 'list_dir', `/dir${i}`);
+    cb.onAssistantMessage('done');
+  });
+  await h.handler('five lists');
+  const s = summaries(h.events);
+  assert.strictEqual(s.length, 1, 'one summary');
+  assert.match(s[0].line, /list ×5/, 'homogeneous lists keep the specific "list" verb');
+  assert.doesNotMatch(s[0].line, /file ×|read ×/, 'no neutral/read verb for an all-list group');
+});
+// ───────────────────────────────────────────────────────────────────────────
+// (c3) a mixed read/list run broken by a grep in the middle → TWO groups, split
+//      by the grep (a non-file tool still breaks the run; only read↔list merges).
+// ───────────────────────────────────────────────────────────────────────────
+test('(c3) a grep between two mixed read/list runs splits them into two summaries', async () => {
+  const h = harness();
+  h.setScenario(async (cb) => {
+    cb.onAssistantMessage('');
+    // group 1: read, list, read → mixed ×3
+    fileOp(cb, 'read', '/g1a.js');
+    fileOp(cb, 'list_dir', '/g1d');
+    fileOp(cb, 'read', '/g1b.js');
+    // a grep breaks the run.
+    cb.onToolStart('grep', 'TODO', { id: 'grep-1', attrs: { pattern: 'TODO' } });
+    cb.onToolEnd('grep', 'match', 7, { id: 'grep-1', attrs: { pattern: 'TODO' }, meta: { matches: 1 }, error: null });
+    // group 2: list, read, list → mixed ×3
+    fileOp(cb, 'list_dir', '/g2d');
+    fileOp(cb, 'read', '/g2a.js');
+    fileOp(cb, 'list_dir', '/g2e');
+    cb.onAssistantMessage('done');
+  });
+  await h.handler('mixed, grep, mixed');
+  const s = summaries(h.events);
+  assert.strictEqual(s.length, 2, 'the grep splits the run into two merged summaries');
+  assert.match(s[0].line, /file ×3/, 'first mixed group is file ×3');
+  assert.match(s[1].line, /file ×3/, 'second mixed group is file ×3');
+  const iS0 = h.events.findIndex((e) => e.kind === 'commit' && /file ×3/.test(e.line));
+  const iGrep = h.events.findIndex((e) => e.kind === 'commit' && /TODO/.test(e.line));
+  assert.ok(iS0 >= 0 && iGrep >= 0 && iS0 < iGrep, 'the first summary lands above the grep line');
+});
+// ───────────────────────────────────────────────────────────────────────────
+// (c4) 2 mixed ops (1 read + 1 list) → individual per-op lines (threshold <3,
+//      unchanged — merging the key does NOT lower the threshold).
+// ───────────────────────────────────────────────────────────────────────────
+test('(c4) 1 read + 1 list → two individual lines, no merged summary (threshold 3)', async () => {
+  const h = harness();
+  h.setScenario(async (cb) => {
+    cb.onAssistantMessage('');
+    fileOp(cb, 'read', '/a.js');
+    fileOp(cb, 'list_dir', '/d');
+    cb.onAssistantMessage('done');
+  });
+  await h.handler('one read one list');
+  assert.strictEqual(summaries(h.events).length, 0, 'no collapsed summary for a 2-op run');
+  const indiv = commits(h.events).filter((e) => /read \/|list .*\/d/.test(e.line));
+  const readLine = commits(h.events).find((e) => /read \/a\.js/.test(e.line));
+  const listLine = commits(h.events).find((e) => /\/d/.test(e.line) && !/read/.test(e.line));
+  assert.ok(readLine, 'the read commits its own per-op line');
+  assert.ok(listLine, 'the list commits its own per-op line');
+  assert.strictEqual(indiv.length, 2, 'exactly two individual per-op lines');
+});
+// ───────────────────────────────────────────────────────────────────────────
+// (d) read run interrupted by a non-file tool → group flushes before the tool row.
+// ───────────────────────────────────────────────────────────────────────────
+test('(d) a non-file tool after a read run flushes the summary before its own line', async () => {
+  const h = harness();
+  h.setScenario(async (cb) => {
+    cb.onAssistantMessage('');
+    for (let i = 0; i < 3; i++) fileOp(cb, 'read', `/r${i}.js`);
+    // A shell op breaks the group: its onToolStart closes the file group first.
+    cb.onToolStart('shell', 'ls -la', { id: 'sh-1', attrs: { command: 'ls -la' } });
+    cb.onToolEnd('shell', 'Command `ls -la`:\nExit code: 0\nout', 9, { id: 'sh-1', attrs: { command: 'ls -la' }, meta: { exit_code: 0 }, error: null });
+    cb.onAssistantMessage('done');
+  });
+  await h.handler('read then shell');
+  const iSummary = h.events.findIndex((e) => e.kind === 'commit' && /read ×3/.test(e.line));
+  const iShell = h.events.findIndex((e) => e.kind === 'commit' && /ls -la/.test(e.line));
+  assert.ok(iSummary >= 0, 'the read summary committed');
+  assert.ok(iShell >= 0, 'the shell line committed');
+  assert.ok(iSummary < iShell, 'the read summary lands ABOVE the shell line');
+});
+// ───────────────────────────────────────────────────────────────────────────
+// (e) read run with op #5 erroring → "read ×4" summary, then standalone error +
+//     body, then a fresh group for the subsequent reads.
+// ───────────────────────────────────────────────────────────────────────────
+test('(e) a mid-run error flushes the success-group, renders error standalone, then a new group starts', async () => {
+  const h = harness();
+  h.setScenario(async (cb) => {
+    cb.onAssistantMessage('');
+    for (let i = 0; i < 4; i++) fileOp(cb, 'read', `/ok${i}.js`);
+    // op #5 errors — must NOT join the group.
+    cb.onToolStart('read', '/bad.js', { id: 'read-bad', attrs: { path: '/bad.js' } });
+    cb.onToolEnd('read', 'Error: boom', 3, { id: 'read-bad', attrs: { path: '/bad.js' }, meta: null, error: { message: 'boom' } });
+    // three more reads → a brand new group.
+    for (let i = 0; i < 3; i++) fileOp(cb, 'read', `/more${i}.js`);
+    cb.onAssistantMessage('done');
+  });
+  await h.handler('reads with an error');
+  const s = summaries(h.events);
+  assert.strictEqual(s.length, 2, 'the 4 successes and the 3 later successes form two summaries');
+  assert.match(s[0].line, /read ×4/, 'the errored op did NOT join the group → ×4 not ×5');
+  assert.match(s[1].line, /read ×3/, 'a new group started after the error');
+  const iSummary4 = h.events.findIndex((e) => e.kind === 'commit' && /read ×4/.test(e.line));
+  const iErrLine = h.events.findIndex((e) => e.kind === 'commit' && /read \/bad\.js/.test(e.line));
+  const iErrBody = h.events.findIndex((e) => e.kind === 'error-body');
+  const iSummary3 = h.events.findIndex((e) => e.kind === 'commit' && /read ×3/.test(e.line));
+  assert.ok(iSummary4 >= 0 && iErrLine >= 0 && iErrBody >= 0 && iSummary3 >= 0, 'all four landmarks present');
+  assert.ok(iSummary4 < iErrLine, 'success summary before the error line (never above the reads it followed)');
+  assert.ok(iErrLine < iErrBody, 'error line before its expandable body');
+  assert.ok(iErrBody < iSummary3, 'the new group commits after the error');
+});
+// ───────────────────────────────────────────────────────────────────────────
+// (f) multi-iteration read run with intermediate narration → still ONE summary
+//     (terminal-flag gating: intermediate narration must NOT split the group).
+// ───────────────────────────────────────────────────────────────────────────
+test('(f) intermediate-iteration narration does NOT split a multi-iteration read run', async () => {
+  const h = harness();
+  h.setScenario(async (cb) => {
+    // iter 1: two reads, then a NON-empty but NON-terminal narration.
+    cb.onAssistantMessage('');
+    fileOp(cb, 'read', '/i1a.js');
+    fileOp(cb, 'read', '/i1b.js');
+    cb.onAssistantMessage('Let me read a couple more files.', { terminal: false });
+    // iter 2: two more reads, then the terminal answer.
+    fileOp(cb, 'read', '/i2a.js');
+    fileOp(cb, 'read', '/i2b.js');
+    cb.onAssistantMessage('All read.', { terminal: true });
+  });
+  await h.handler('multi-iteration reads');
+  const s = summaries(h.events);
+  assert.strictEqual(s.length, 1, 'the four reads across two iterations collapse to ONE summary');
+  assert.match(s[0].line, /read ×4/, 'all four reads counted');
+});
+// ───────────────────────────────────────────────────────────────────────────
+// (g) replay/resume re-groups to the same summary at the REPLAY width; a grouped
+//     run replayed narrower re-truncates; the >=3 threshold is applied on replay.
+// ───────────────────────────────────────────────────────────────────────────
+// Drive the REAL tracker as the live oracle at a fixed width.
+function liveFileSummary(ops, cols) {
+  const prev = process.stdout.columns;
+  process.stdout.columns = cols;
+  try {
+    const lines = [];
+    const tracker = createFileActivityTracker({ writerModule: {
+      startActivity: () => {}, updateActivity: () => {},
+      endActivity: (_id, line) => lines.push(line),
+    } });
+    for (const op of ops) { tracker.start(op.tag, op.target); tracker.end(op); }
+    tracker.flush();
+    return lines;
+  } finally {
+    process.stdout.columns = prev;
+  }
+}
+function replayCommits(loadedMessages, cols) {
+  const prev = process.stdout.columns;
+  process.stdout.columns = cols;
+  try {
+    const ch = new ChatHistory();
+    const out = [];
+    ch._commit = (t) => out.push(t);
+    const session = createChatSession({ chatHistory: ch, getConfig: () => CFG });
+    session.displayLoadedMessages(loadedMessages);
+    return out;
+  } finally {
+    process.stdout.columns = prev;
+  }
+}
+const fileLineOf = (commitsArr) => commitsArr
+  .map((c) => stripAnsi(c))
+  .filter((c) => /file .* read ×\d+/.test(c));
+test('(g) replay re-groups to the same summary at the replay width; narrower re-truncates; ≥3 threshold applied', () => {
+  const files = Array.from({ length: 6 }, (_, i) => `/proj/module-${i}/index-file-${i}.js`);
+  const ops = files.map((f) => buildToolOperation({ id: `r${f}`, tag: 'read', arg: f, attrs: { path: f }, status: 'ok', durationMs: 5, meta: { bytes: 200 } }));
+  // Persist exactly as the live path does — one native {role:'tool'} message per op
+  // carrying the normal serialized core (NO storage format change).
+  const loaded = ops.map((op) => ({ role: 'tool', content: 'contents', _display: serializeOperation(op) }))
+    .concat([{ role: 'assistant', content: 'done' }]);
+  // Same width: replay byte-identical to the live committed summary.
+  const oracle200 = liveFileSummary(ops, 200);
+  assert.strictEqual(oracle200.length, 1, 'live commits one summary for 6 reads');
+  const replay200 = fileLineOf(replayCommits(loaded, 200));
+  assert.strictEqual(replay200.length, 1, 'replay commits exactly one file summary');
+  assert.strictEqual(replay200[0], stripAnsi(oracle200[0]), 'replay summary is byte-identical to the live one at the same width');
+  // Narrower terminal: re-truncates at 80 cols, but the ×6 count still shows.
+  const replay80 = fileLineOf(replayCommits(loaded, 80));
+  assert.strictEqual(replay80.length, 1, 'one summary at 80 cols too');
+  assert.match(replay80[0], /×6/, 'the ×6 count survives the narrower re-truncation');
+  assert.ok(replay80[0].length <= 80, 'fits one 80-col row');
+  assert.notStrictEqual(replay80[0], replay200[0], 'the 80-col render re-truncates differently from the 200-col one');
+  // Threshold on replay: a 2-op run replays as individual lines, no summary.
+  const twoLoaded = ops.slice(0, 2).map((op) => ({ role: 'tool', content: 'contents', _display: serializeOperation(op) }))
+    .concat([{ role: 'assistant', content: 'done' }]);
+  const replayTwo = replayCommits(twoLoaded, 200);
+  assert.strictEqual(fileLineOf(replayTwo).length, 0, 'a 2-op run does NOT collapse on replay');
+  assert.strictEqual(replayTwo.filter((c) => /read \//.test(stripAnsi(c))).length, 2, 'it replays as two individual read lines');
+});
+// ───────────────────────────────────────────────────────────────────────────
+// (g2) replay of a MIXED read/list run re-groups into the SAME single merged
+//      summary (neutral "file ×N" verb), byte-identical to the live oracle.
+// ───────────────────────────────────────────────────────────────────────────
+test('(g2) replay of a mixed read/list run → identical merged summary at the replay width', () => {
+  // read, list, read, list, read — interleaved, persisted as native cores.
+  const seq = [
+    { tag: 'read', path: '/proj/src/alpha-module.js' },
+    { tag: 'list_dir', path: '/proj/src/components' },
+    { tag: 'read', path: '/proj/src/beta-module.js' },
+    { tag: 'list_dir', path: '/proj/src/utils' },
+    { tag: 'read', path: '/proj/src/gamma-module.js' },
+  ];
+  const ops = seq.map((o, i) => buildToolOperation({
+    id: `m${i}`, tag: o.tag, arg: o.path, attrs: { path: o.path },
+    status: 'ok', durationMs: 5, meta: o.tag === 'read' ? { bytes: 200 } : { entries: 3 },
+  }));
+  const loaded = ops.map((op) => ({ role: 'tool', content: 'contents', _display: serializeOperation(op) }))
+    .concat([{ role: 'assistant', content: 'done' }]);
+  const mixedLineOf = (commitsArr) => commitsArr
+    .map((c) => stripAnsi(c))
+    .filter((c) => /file .* file ×\d+/.test(c));
+  const oracle = liveFileSummary(ops, 200);
+  assert.strictEqual(oracle.length, 1, 'live commits one merged summary for the mixed run');
+  assert.match(stripAnsi(oracle[0]), /file ×5/, 'live oracle uses the neutral verb for the mixed run');
+  const replay = mixedLineOf(replayCommits(loaded, 200));
+  assert.strictEqual(replay.length, 1, 'replay commits exactly one merged file summary');
+  assert.strictEqual(replay[0], stripAnsi(oracle[0]), 'replay merged summary is byte-identical to the live one');
+});
+// ───────────────────────────────────────────────────────────────────────────
+// (h) double-flush guard — the boundary flush + the turn-end finally must commit
+//     a group EXACTLY ONCE (endActivity called once per group).
+// ───────────────────────────────────────────────────────────────────────────
+test('(h) double-flush guard: a group commits via endActivity exactly once', () => {
+  let endCalls = 0;
+  const tracker = createFileActivityTracker({ writerModule: {
+    startActivity: () => {}, updateActivity: () => {},
+    endActivity: () => { endCalls++; },
+  } });
+  const op = buildToolOperation({ id: 'r', tag: 'read', arg: '/a.js', attrs: { path: '/a.js' }, status: 'ok', durationMs: 5, meta: { bytes: 10 } });
+  tracker.start('read', '/a.js');
+  tracker.end(op);
+  assert.ok(tracker.isOpen(), 'group open after one op');
+  tracker.flush();                 // boundary flush
+  assert.strictEqual(tracker.isOpen(), false, 'closed after flush');
+  tracker.flush();                 // finally flush — must be a no-op
+  tracker.flush();
+  assert.strictEqual(endCalls, 1, 'endActivity called exactly once despite three flush() calls');
+});
+// ───────────────────────────────────────────────────────────────────────────
+// (i) the web tracker is UNAFFECTED — a web run interleaved with a file run in
+//     one turn still produces its own web summary, untouched.
+// ───────────────────────────────────────────────────────────────────────────
+test('(i) the web tracker is unaffected: a web run alongside a file run still yields a web summary', async () => {
+  const h = harness();
+  h.setScenario(async (cb) => {
+    cb.onAssistantMessage('');
+    for (let i = 0; i < 3; i++) fileOp(cb, 'read', `/r${i}.js`);
+    // A web op breaks the file group and opens a web group.
+    cb.onToolStart('http_get', 'https://x.example', { id: 'g1', attrs: { url: 'https://x.example' } });
+    cb.onToolEnd('http_get', {}, 120, { id: 'g1', attrs: { url: 'https://x.example' }, meta: { status_code: 200, bytes: 1000 }, error: null });
+    cb.onAssistantMessage('done');
+  });
+  await h.handler('reads then a fetch');
+  const fileS = summaries(h.events);
+  assert.strictEqual(fileS.length, 1, 'one file summary');
+  assert.match(fileS[0].line, /read ×3/);
+  const webS = commits(h.events).filter((e) => / web /.test(e.line) && /source/.test(e.line));
+  assert.strictEqual(webS.length, 1, 'the web tracker still commits its own summary, unaffected');
+  const iFile = h.events.findIndex((e) => e.kind === 'commit' && /read ×3/.test(e.line));
+  const iWeb = h.events.findIndex((e) => e.kind === 'commit' && / web /.test(e.line) && /source/.test(e.line));
+  assert.ok(iFile < iWeb, 'the file summary lands above the web summary it preceded');
+});
+// ───────────────────────────────────────────────────────────────────────────
+// Pure-function spot checks: core/key predicates the replay path depends on.
+// ───────────────────────────────────────────────────────────────────────────
+test('isGroupableFileCore / normalizeFileTag / fileSummaryState predicates', () => {
+  const readCore = serializeOperation(buildToolOperation({ tag: 'read', arg: '/a.js', attrs: { path: '/a.js' }, status: 'ok' }));
+  const listCore = serializeOperation(buildToolOperation({ tag: 'list_dir', arg: '/d', attrs: { path: '/d' }, status: 'ok' }));
+  const shellCore = serializeOperation(buildToolOperation({ tag: 'shell', arg: 'ls', attrs: { command: 'ls' }, status: 'ok' }));
+  const errCore = serializeOperation(buildToolOperation({ tag: 'read', arg: '/a.js', attrs: { path: '/a.js' }, status: 'error', error: { message: 'x' } }));
+  assert.ok(isGroupableFileCore(readCore), 'a successful read core is groupable');
+  assert.ok(isGroupableFileCore(listCore), 'a successful list_dir core is groupable');
+  assert.ok(!isGroupableFileCore(shellCore), 'a shell core is not groupable');
+  assert.ok(!isGroupableFileCore(errCore), 'an errored read core is not groupable');
+  assert.ok(!isGroupableFileCore({ v: 1, kind: 'web', tag: 'http_get' }), 'a web core is not a file core');
+  assert.ok(!isGroupableFileCore(null), 'null is tolerated');
+  // read_file and list_dir normalize to DISTINCT tags (used by fileSummaryState
+  // to decide the verb) …
+  assert.notStrictEqual(normalizeFileTag(readCore.tag), normalizeFileTag(listCore.tag));
+  assert.strictEqual(normalizeFileTag('read'), 'read_file');
+  assert.strictEqual(normalizeFileTag('list_dir'), 'list_dir');
+  // … but they now share ONE group KEY, so a read↔list switch never flushes.
+  assert.strictEqual(fileGroupKey('read'), fileGroupKey('list_dir'));
+  assert.strictEqual(fileGroupKey('read_file'), fileGroupKey('list_dir'));
+  // Homogeneous read group → specific "read" verb.
+  const st = fileSummaryState([readCore, readCore]);
+  assert.strictEqual(st.verb, 'read');
+  assert.strictEqual(st.gerund, 'reading…');
+  assert.strictEqual(st.count, 2);
+  assert.deepStrictEqual(st.basenames, ['a.js', 'a.js']);
+  // Homogeneous list group → specific "list" verb.
+  const stList = fileSummaryState([listCore, listCore]);
+  assert.strictEqual(stList.verb, 'list');
+  assert.strictEqual(stList.gerund, 'listing…');
+  // Mixed group → neutral "file" / "accessing…" verb.
+  const stMixed = fileSummaryState([readCore, listCore]);
+  assert.strictEqual(stMixed.verb, 'file');
+  assert.strictEqual(stMixed.gerund, 'accessing…');
+  assert.strictEqual(stMixed.count, 2);
+  assert.deepStrictEqual(stMixed.basenames, ['a.js', 'd']);
+});

package/test/fixtures/tool-calls.js ADDED Viewed

@@ -0,0 +1,57 @@
+'use strict';
+// Sample model messages for extractToolCalls characterization tests (Task 1.1).
+// These capture the real shapes various model families emit. Kept as a module
+// (not JSON) so multi-line template literals stay readable.
+// A single assistant message mixing prose with multiple XML tool tags.
+const MULTI_TAG_MESSAGE = [
+  'Sure, let me do a few things.',
+  '<read_file>src/index.js</read_file>',
+  'Now I will write the result:',
+  '<write_file path="out.txt">hello\nworld</write_file>',
+  'And run the tests:',
+  '<exec>npm test</exec>',
+].join('\n');
+// MiniMax-M2 native wrapper round-tripped back into text by chatStream.
+const MINIMAX_WRAPPER = [
+  '<minimax:tool_call>',
+  '<invoke name="write_file">',
+  '<parameter name="path">a.json</parameter>',
+  '<parameter name="content">{"k":1}</parameter>',
+  '</invoke>',
+  '</minimax:tool_call>',
+].join('\n');
+// Qwen3-Coder XML format: name on the tag as `=name`, params as `=key`.
+const QWEN3_XML = [
+  '<function=write_file>',
+  '<parameter=path>a.css</parameter>',
+  '<parameter=content>body{}</parameter>',
+  '</function>',
+].join('\n');
+// Hermes/Qwen JSON tool-call block.
+const JSON_TOOL_CALL = [
+  '<tool_call>',
+  '{"name": "read_file", "arguments": {"path": "README.md"}}',
+  '</tool_call>',
+].join('\n');
+// A fenced shell block (models sometimes emit commands this way).
+const SHELL_FENCE = [
+  '```shell',
+  'echo hi',
+  '# a comment line that must be skipped',
+  'ls -la',
+  '```',
+].join('\n');
+module.exports = {
+  MULTI_TAG_MESSAGE,
+  MINIMAX_WRAPPER,
+  QWEN3_XML,
+  JSON_TOOL_CALL,
+  SHELL_FENCE,
+};