npm - @semalt-ai/code - Versions diffs - 1.8.5 → 1.20.0 - Mend

@semalt-ai/code 1.8.5 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (192) hide show

package/.claude/settings.local.json +7 -1
package/.github/workflows/ci.yml +69 -0
package/ARCHITECTURE.md +6 -95
package/CLAUDE.md +196 -316
package/README.md +148 -4
package/docs/ARCHITECTURE.md +1321 -0
package/docs/CONFIG.md +340 -0
package/docs/HISTORY.md +245 -0
package/examples/embed.js +74 -0
package/index.js +251 -10
package/lib/agent.js +856 -120
package/lib/api.js +239 -50
package/lib/args.js +74 -2
package/lib/audit.js +23 -1
package/lib/background.js +584 -0
package/lib/checkpoints.js +757 -0
package/lib/commands/auth.js +94 -0
package/lib/commands/chat-session.js +489 -0
package/lib/commands/chat-slash.js +415 -0
package/lib/commands/chat-turn.js +669 -0
package/lib/commands/chat.js +407 -0
package/lib/commands/custom.js +157 -0
package/lib/commands/history-utils.js +66 -0
package/lib/commands/index.js +268 -0
package/lib/commands/mcp.js +113 -0
package/lib/commands/oneshot.js +193 -0
package/lib/commands/registry.js +269 -0
package/lib/commands/tasks.js +89 -0
package/lib/compact.js +87 -0
package/lib/config.js +360 -11
package/lib/constants.js +401 -3
package/lib/deny.js +199 -0
package/lib/doctor.js +160 -0
package/lib/headless.js +202 -0
package/lib/hooks.js +286 -0
package/lib/images.js +270 -0
package/lib/internals.js +49 -0
package/lib/mcp/boundary.js +131 -0
package/lib/mcp/client.js +270 -0
package/lib/mcp/oauth.js +134 -0
package/lib/memory.js +209 -0
package/lib/metrics.js +37 -2
package/lib/payload.js +54 -0
package/lib/permission-rules.js +401 -0
package/lib/permissions.js +123 -26
package/lib/pricing.js +67 -0
package/lib/proc.js +62 -0
package/lib/prompts.js +99 -8
package/lib/sandbox.js +568 -0
package/lib/sdk.js +328 -0
package/lib/secrets.js +211 -0
package/lib/skills.js +223 -0
package/lib/subagents.js +516 -0
package/lib/tool_registry.js +2862 -0
package/lib/tool_specs.js +263 -9
package/lib/tools.js +352 -1039
package/lib/ui/anim.js +86 -0
package/lib/ui/ansi.js +17 -27
package/lib/ui/chat-history.js +253 -71
package/lib/ui/create-ui.js +67 -24
package/lib/ui/diff.js +90 -25
package/lib/ui/file-activity.js +236 -0
package/lib/ui/format.js +195 -29
package/lib/ui/input-field.js +21 -11
package/lib/ui/md-stream.js +234 -0
package/lib/ui/render-operation.js +113 -0
package/lib/ui/select.js +1 -4
package/lib/ui/status-bar.js +146 -36
package/lib/ui/stream.js +20 -13
package/lib/ui/theme.js +190 -44
package/lib/ui/tool-operation.js +190 -0
package/lib/ui/utils.js +9 -5
package/lib/ui/web-activity.js +270 -0
package/lib/ui/writer.js +159 -45
package/lib/ui.js +1 -1
package/lib/verify.js +229 -0
package/lib/web-extract.js +213 -0
package/lib/web-summarize.js +68 -0
package/package.json +19 -4
package/scripts/lint.js +57 -0
package/test/agent-loop.test.js +389 -0
package/test/anim-driver.test.js +153 -0
package/test/ask-user-display.test.js +226 -0
package/test/ask-user-gate.test.js +231 -0
package/test/background.test.js +414 -0
package/test/chat-history-nocolor.test.js +155 -0
package/test/chat-relogin.test.js +207 -0
package/test/chat.test.js +114 -0
package/test/checkpoints-agent.test.js +181 -0
package/test/checkpoints.test.js +650 -0
package/test/command-registry.test.js +160 -0
package/test/compact.test.js +116 -0
package/test/completion-lazy.test.js +52 -0
package/test/config-merge.test.js +324 -0
package/test/config-quarantine.test.js +128 -0
package/test/config-write-guard-allow-anywhere.test.js +56 -0
package/test/config-write-guard-skip.test.js +46 -0
package/test/config-write-guard.test.js +153 -0
package/test/context-split.test.js +215 -0
package/test/cost-doctor.test.js +142 -0
package/test/custom-commands-chat.test.js +106 -0
package/test/custom-commands.test.js +230 -0
package/test/defer-detail-band.test.js +403 -0
package/test/deny-windows.test.js +120 -0
package/test/deny.test.js +83 -0
package/test/detail-band-tab-flatten.test.js +242 -0
package/test/download-allow-anywhere.test.js +66 -0
package/test/download-confine.test.js +153 -0
package/test/exec-diff.test.js +268 -0
package/test/executors.test.js +599 -0
package/test/extract-tool-calls.test.js +349 -0
package/test/fetch-url-validation.test.js +219 -0
package/test/file-activity.test.js +522 -0
package/test/fixtures/tool-calls.js +57 -0
package/test/fixtures/web-page.js +91 -0
package/test/git-tools.test.js +384 -0
package/test/grep-glob-serialize.test.js +242 -0
package/test/grep-glob.test.js +268 -0
package/test/grep-path-target.test.js +227 -0
package/test/harness/README.md +57 -0
package/test/harness/chat-harness.js +143 -0
package/test/harness/memwarn-headless-child.js +65 -0
package/test/harness/mock-llm.js +120 -0
package/test/harness/mock-mcp-server.js +142 -0
package/test/harness/sse-server.js +69 -0
package/test/headless.test.js +348 -0
package/test/history-utils.test.js +88 -0
package/test/hooks-agent.test.js +238 -0
package/test/hooks-verify-sandbox.test.js +232 -0
package/test/hooks.test.js +216 -0
package/test/http-get-user-agent.test.js +142 -0
package/test/images-api.test.js +208 -0
package/test/images.test.js +238 -0
package/test/input-field-ctrl-o.test.js +37 -0
package/test/live-height-physical.test.js +281 -0
package/test/max-iterations.test.js +218 -0
package/test/mcp-boundary.test.js +57 -0
package/test/mcp-client.test.js +267 -0
package/test/mcp-oauth.test.js +86 -0
package/test/md-stream.test.js +183 -0
package/test/memory-truncation-warning.test.js +222 -0
package/test/memory.test.js +198 -0
package/test/native-dispatch.test.js +409 -0
package/test/native-live-narration.test.js +254 -0
package/test/output-chokepoint.test.js +188 -0
package/test/output-heredoc-leak.test.js +195 -0
package/test/output-preview.test.js +245 -0
package/test/path-guards.test.js +134 -0
package/test/payload.test.js +99 -0
package/test/permission-rules-agent.test.js +210 -0
package/test/permission-rules.test.js +297 -0
package/test/permissions.test.js +362 -0
package/test/plan-mode.test.js +167 -0
package/test/read-paginate.test.js +275 -0
package/test/readonly-tools.test.js +177 -0
package/test/render-operation.test.js +317 -0
package/test/replay-descriptor-xml.test.js +216 -0
package/test/replay-descriptor.test.js +189 -0
package/test/replay-web-aggregate.test.js +291 -0
package/test/replay-web-persist.test.js +241 -0
package/test/result-cap.test.js +233 -0
package/test/running-glyph-anim.test.js +111 -0
package/test/sandbox-agent.test.js +147 -0
package/test/sandbox-integration.test.js +216 -0
package/test/sandbox.test.js +408 -0
package/test/sdk.test.js +234 -0
package/test/shell-output-cap.test.js +181 -0
package/test/skills-chat.test.js +110 -0
package/test/skills.test.js +295 -0
package/test/smoke.test.js +68 -0
package/test/status-bar-driver.test.js +93 -0
package/test/status-bar-pause.test.js +164 -0
package/test/status-bar-resync.test.js +188 -0
package/test/stream-parser.test.js +171 -0
package/test/subagents-agent.test.js +178 -0
package/test/subagents.test.js +222 -0
package/test/theme-palette.test.js +166 -0
package/test/tool-registry.test.js +85 -0
package/test/trim-budget.test.js +101 -0
package/test/truncate-visible.test.js +78 -0
package/test/verify-agent.test.js +317 -0
package/test/verify.test.js +141 -0
package/test/view-image.test.js +199 -0
package/test/web-activity-ordering.test.js +203 -0
package/test/web-activity.test.js +207 -0
package/test/web-data-extraction-guidance.test.js +71 -0
package/test/web-extract.test.js +185 -0
package/test/web-fetch-agent.test.js +291 -0
package/test/web-fetch-mode.test.js +193 -0
package/test/web-search.test.js +380 -0
package/lib/commands.js +0 -1438
package/path +0 -1

package/test/view-image.test.js ADDED Viewed

@@ -0,0 +1,199 @@
+'use strict';
+// Coverage for the model-callable view_image tool: it stages a LOCAL image into
+// the model's vision context via the SAME readImage→images[]→buildProviderMessages
+// path the /image slash command uses — no parallel encoder. Asserts:
+//   (a) a valid PNG staged through the real agent loop reaches buildProviderMessages
+//   (b) both transport rails converge on the same ['view_image', path] tuple
+//   (c) unsupported / missing / oversized inputs return a clean text error (no crash)
+//   (d) an out-of-sandbox path is refused like any other file read
+//   (e) minimax now resolves vision-capable (true, not null)
+//   (f) view_image needs NO permission gate, while an effectful tool still does
+const { test, before, after } = require('node:test');
+const assert = require('node:assert');
+const fs = require('fs');
+const path = require('path');
+const ui = require('../lib/ui');
+const { createApiClient } = require('../lib/api');
+const {
+  createToolExecutor, extractToolCalls, mapInvokeToCall,
+} = require('../lib/tools');
+const { createPermissionManager } = require('../lib/permissions');
+const { createAgentRunner } = require('../lib/agent');
+const { buildProviderMessages, resolveVisionCapability } = require('../lib/images');
+const { startMockLLM } = require('./harness/mock-llm');
+// Minimal valid PNG (magic bytes + ≥12 bytes so detectMediaType locks on).
+const PNG = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0, 0, 0, 13]);
+let prevKey;
+before(() => { prevKey = process.env.SEMALT_API_KEY; process.env.SEMALT_API_KEY = 'test-key'; });
+after(() => {
+  if (prevKey === undefined) delete process.env.SEMALT_API_KEY;
+  else process.env.SEMALT_API_KEY = prevKey;
+});
+// view_image reads through the real isPathSafe (CWD-confined), so test images must
+// live inside process.cwd(). Create a unique throwaway file there and clean it up.
+async function withCwdFile(name, buf, fn) {
+  const p = path.join(process.cwd(), `._vimg_test_${process.pid}_${name}`);
+  fs.writeFileSync(p, buf);
+  try { return await fn(p); } finally { try { fs.unlinkSync(p); } catch {} }
+}
+function buildRunner(base, extraConfig = {}) {
+  const config = {
+    api_base: base, api_key: 'test-key', default_model: 'test-model',
+    temperature: 0.5, request_timeout_ms: 5000, stream: true, models: [],
+    image_max_bytes: 5 * 1024 * 1024,
+    sandbox: { mode: 'off' },
+    ...extraConfig,
+  };
+  const getConfig = () => config;
+  const saveConfig = (c) => Object.assign(config, c);
+  const api = createApiClient({ getConfig, saveConfig, ui });
+  const pm = createPermissionManager(ui, { skipPermissions: true });
+  pm.setUICallbacks({ onAddMessage: () => {}, onShowModal: () => {}, onCloseModal: () => {}, onCaptureNavigation: () => () => {} });
+  const { agentExecShell, agentExecFile, describePermission } = createToolExecutor(pm, ui, getConfig);
+  const runner = createAgentRunner({
+    chatStream: api.chatStream, extractToolCalls, agentExecShell, agentExecFile,
+    describePermission, permissionManager: pm, ui, getConfig,
+  });
+  return { runner, agentExecFile, describePermission };
+}
+const noopCb = {
+  onToken: () => {}, onToolStart: () => {}, onToolEnd: () => {},
+  onError: () => {}, onRetry: () => {}, onAssistantMessage: () => {},
+};
+// ---------------------------------------------------------------------------
+// (a) Valid PNG staged through the real loop reaches buildProviderMessages.
+// ---------------------------------------------------------------------------
+test('view_image stages a valid PNG into vision context via the /image wire path', async () => {
+  const mock = await startMockLLM();
+  await withCwdFile('a.png', PNG, async (imgPath) => {
+    mock.replyWith(`<view_image>${imgPath}</view_image>`);
+    mock.replyWith('It is a red square.');
+    try {
+      const { runner } = buildRunner(mock.base);
+      const messages = [{ role: 'user', content: 'what is in the image?' }];
+      await runner.runAgentLoop(messages, 'test-model', 10, null, { callbacks: noopCb });
+      // The encoded image is attached to a message's images[] (same field /image sets).
+      const imgMsg = messages.find((m) => Array.isArray(m.images) && m.images.length);
+      assert.ok(imgMsg, 'a message carries the staged image');
+      assert.strictEqual(imgMsg.images[0].media_type, 'image/png');
+      assert.ok(typeof imgMsg.images[0].data === 'string' && imgMsg.images[0].data.length > 0, 'base64 bytes present');
+      // The model-facing text confirms attach without claiming the user can see it.
+      const toolMsg = messages.find((m) => typeof m.content === 'string' && /attached to your vision context/.test(m.content));
+      assert.ok(toolMsg, 'tool result text confirms the vision attach');
+      assert.match(toolMsg.content, /NOT displayed to the user/);
+      // buildProviderMessages (the api.js wire transform) turns it into an OpenAI image_url part.
+      const wire = buildProviderMessages(messages, 'openai');
+      const wireImg = wire.find((m) => Array.isArray(m.content) && m.content.some((p) => p.type === 'image_url'));
+      assert.ok(wireImg, 'buildProviderMessages produced an image_url content part');
+      const part = wireImg.content.find((p) => p.type === 'image_url');
+      assert.match(part.image_url.url, /^data:image\/png;base64,/);
+    } finally {
+      await mock.close();
+    }
+  });
+});
+// ---------------------------------------------------------------------------
+// (b) Both rails converge on the same tuple.
+// ---------------------------------------------------------------------------
+test('view_image: native params and XML tags converge on ["view_image", path]', () => {
+  // Native function-calling rail.
+  assert.deepStrictEqual(mapInvokeToCall('view_image', { path: '/a/b.png' }), ['view_image', '/a/b.png']);
+  // XML inline form.
+  assert.deepStrictEqual(extractToolCalls('<view_image>/a/b.png</view_image>'), [['view_image', '/a/b.png']]);
+  // XML attribute form (self-closing and paired).
+  assert.deepStrictEqual(extractToolCalls('<view_image path="/a/b.png"/>'), [['view_image', '/a/b.png']]);
+  assert.deepStrictEqual(extractToolCalls('<view_image path="/a/b.png"></view_image>'), [['view_image', '/a/b.png']]);
+  // Missing path → no call (native), like every other tool.
+  assert.strictEqual(mapInvokeToCall('view_image', {}), null);
+});
+// ---------------------------------------------------------------------------
+// (c) Unsupported / missing / oversized → clean text error, loop not crashed.
+// ---------------------------------------------------------------------------
+test('view_image: unsupported/missing/oversized return a clean error, never crash', async () => {
+  const { agentExecFile } = buildRunner('http://127.0.0.1:1'); // base unused here
+  // Missing file.
+  const missing = await agentExecFile('view_image', path.join(process.cwd(), 'definitely-not-here.png'));
+  assert.ok(missing.error && /not found|unreadable/i.test(missing.error), 'missing file → error');
+  assert.ok(!missing.image, 'no image staged on error');
+  // Unsupported format (a .txt with no image magic bytes, inside CWD).
+  await withCwdFile('notimg.txt', Buffer.from('hello, not an image'), async (p) => {
+    const bad = await agentExecFile('view_image', p);
+    assert.ok(bad.error && /Unsupported image format/i.test(bad.error), 'non-image → unsupported error');
+  });
+  // Oversized: a valid PNG under a deliberately tiny cap.
+  await withCwdFile('big.png', PNG, async (p) => {
+    const { agentExecFile: execTiny } = buildRunner('http://127.0.0.1:1', { image_max_bytes: 4 });
+    const big = await execTiny('view_image', p);
+    assert.ok(big.error && /too large/i.test(big.error), 'oversized → too-large error');
+  });
+});
+test('view_image: a missing-file call inside the loop ends cleanly without crashing', async () => {
+  const mock = await startMockLLM();
+  mock.replyWith(`<view_image>${path.join(process.cwd(), 'nope.png')}</view_image>`);
+  mock.replyWith('Could not load the image; stopping.');
+  try {
+    const { runner } = buildRunner(mock.base);
+    const messages = [{ role: 'user', content: 'view it' }];
+    const out = await runner.runAgentLoop(messages, 'test-model', 10, null, { callbacks: noopCb });
+    assert.ok(out && out.messages, 'loop returned normally (no crash)');
+    const toolMsg = messages.find((m) => typeof m.content === 'string' && /Tool execution results/.test(m.content));
+    assert.match(toolMsg.content, /Error —/);
+    assert.ok(!messages.some((m) => Array.isArray(m.images) && m.images.length), 'no image staged when the read failed');
+    assert.ok(messages.some((m) => m.role === 'assistant' && /stopping/i.test(m.content)), 'final answer recorded');
+  } finally {
+    await mock.close();
+  }
+});
+// ---------------------------------------------------------------------------
+// (d) Path safety: out-of-sandbox path refused like any file read.
+// ---------------------------------------------------------------------------
+test('view_image: an out-of-sandbox path is refused', async () => {
+  const { agentExecFile } = buildRunner('http://127.0.0.1:1');
+  const res = await agentExecFile('view_image', '/etc/hostname');
+  assert.ok(res.error && /outside allowed area/i.test(res.error), 'path outside CWD refused');
+  assert.ok(!res.image, 'no image staged for a refused path');
+});
+// ---------------------------------------------------------------------------
+// (e) minimax now resolves vision-capable; unconfirmed qwen coder stays null.
+// ---------------------------------------------------------------------------
+test('resolveVisionCapability: minimax is now true; plain qwen coder stays null', () => {
+  assert.strictEqual(resolveVisionCapability({}, 'minimax-m3'), true);
+  assert.strictEqual(resolveVisionCapability({}, 'MiniMax-M3'), true);
+  // Unconfirmed model families must NOT be silently marked vision-capable.
+  assert.strictEqual(resolveVisionCapability({}, 'qwen2.5-coder-32b'), null);
+  // The pre-existing VL signal is unaffected.
+  assert.strictEqual(resolveVisionCapability({}, 'qwen2-vl-7b'), true);
+});
+// ---------------------------------------------------------------------------
+// (f) No permission gate for view_image; an effectful tool still gates.
+// ---------------------------------------------------------------------------
+test('view_image is read-only (no permission descriptor); an effectful tool still gates', async () => {
+  const { describePermission } = buildRunner('http://127.0.0.1:1');
+  const viewDesc = await describePermission(['view_image', '/x.png']);
+  assert.strictEqual(viewDesc, null, 'view_image resolves to no permission gate (read-only)');
+  // Isolation: an effectful network tool still produces a gate descriptor.
+  const dlDesc = await describePermission(['download', 'https://example.com/a.png']);
+  assert.ok(dlDesc && dlDesc.actionType, 'download still requires a permission descriptor');
+});

package/test/web-activity-ordering.test.js ADDED Viewed

@@ -0,0 +1,203 @@
+'use strict';
+// Web-activity ordering (W.3 regression fix). The collapsed "✓ web · …" summary
+// must commit to scrollback BEFORE the agent's answer, not after it.
+//
+// The W.3 regression: http_get/web_search deferred their scrollback commit from
+// "tool end" to webTracker.flush(), and in a "web-op(s) → answer" turn the only
+// flush that fired was the turn-end `finally` — which runs AFTER runAgentLoop
+// returns, i.e. after the answer was already committed. The fix flushes the open
+// web group in onAssistantMessage when cleanContent is non-empty (the terminal
+// response signal), while intermediate empty-content iterations keep the group
+// open so multi-step search→fetch still collapses to one line.
+//
+// These tests drive the REAL createTurnHandler callbacks (chat-turn.js) with a
+// mock runAgentLoop that invokes them in the order agent.js does — per iteration
+// onAssistantMessage(displayReply) fires first (empty '' when the iteration
+// carried tool calls, non-empty on the final answer), then the tools execute —
+// recording an ordered event log so we can assert "summary before answer".
+const { test } = require('node:test');
+const assert = require('node:assert');
+const { stripAnsi } = require('../lib/ui/utils');
+const { createTurnHandler } = require('../lib/commands/chat-turn');
+// A fake writer + chatHistory that push into ONE shared ordered log. The web
+// summary commits via writerModule.endActivity (from webTracker.flush); the
+// answer commits via chatHistory.finalizeLastMessage. A non-web tool line also
+// commits via endActivity — distinguished by content. As of Phase 1 (Output
+// Refactor) the core tool line renders via the real descriptor→renderer (no
+// longer the injected formatToolLine seam), so web vs tool lines are told apart
+// by the web summary's wording, not a synthetic marker.
+function harness() {
+  const events = [];
+  const writerModule = {
+    startActivity() {},
+    updateActivity() {},
+    endActivity(id, line) {
+      const plain = stripAnsi(String(line));
+      if (/web\b/.test(plain) && /(source|search|web)/.test(plain) && !plain.startsWith('TOOL:')) {
+        events.push({ kind: 'web-summary', line: plain });
+      } else {
+        events.push({ kind: 'tool-line', line: plain });
+      }
+    },
+    scrollback(line) { events.push({ kind: 'scrollback', line: String(line) }); },
+  };
+  const chatHistory = {
+    addMessage() {},
+    streamToken() {},
+    clearStreamingContent() {},
+    // Phase 7b boundary calls (chat-turn onToolStart / turn-end finally). No
+    // output-preview deferral is driven in these web-ordering scenarios, so a
+    // no-op keeps the harness focused on the web-summary ordering it tests.
+    deferToolOutput() {}, commitDeferredDetail() {},
+    // An empty finalize (the suppressed intermediate iteration) commits no
+    // visible answer bubble — only record the non-empty terminal answer, which
+    // is what must land below the web summary.
+    finalizeLastMessage(content) { if (content && content.trim()) events.push({ kind: 'answer', content }); },
+  };
+  const statusBar = {
+    update() {}, onToken() {}, addPendingTokens() {}, updateMetrics() {}, setCost() {},
+  };
+  const inputField = {
+    on() {}, removeListener() {}, releaseNavigation() {}, setDisabled() {},
+  };
+  // Set by each test before invoking the handler.
+  let scenario = async () => {};
+  const runAgentLoop = async (messages, model, maxIter, limit, loopOpts) => {
+    await scenario(loopOpts.callbacks);
+    return { messages, metrics: { turns: [] }, withheldActions: [] };
+  };
+  const ctx = {
+    inputField, statusBar, chatHistory, writerModule, runAgentLoop,
+    getConfig: () => ({ auth_token: 'tok', max_iterations: 50, show_cost: false, system_prompt_mode: 'system_role' }),
+    approxTokens: () => 0,
+    resolveCommand: () => null,
+    opts: {},
+    TAG_REGISTRY: {},
+    formatToolLine: (o) => `TOOL:${o && o.tag}`,
+    collapseListMsg() {}, handlePendingSelection() {}, showPendingStep() {},
+    activateNavCapture() {}, finalizeListMsg() {},
+    createChatIfNeeded: async () => {}, saveTurnToDashboard: async () => {}, saveSession() {},
+    messages: [], currentModel: 'm', debugMode: false, pendingImages: [],
+    chatSync: async () => '', resolvedSystemPrompt: '', resolvedTokenLimit: null, planMode: false,
+  };
+  const handler = createTurnHandler(ctx, {});
+  return { events, handler, setScenario: (fn) => { scenario = fn; } };
+}
+// Helpers to simulate the agent.js per-iteration callback order.
+function webToolIteration(cb, tag, input, meta) {
+  cb.onAssistantMessage('');                 // suppressed (this iteration had a tool call)
+  cb.onToolStart(tag, input, { id: `${tag}-1`, attrs: tag === 'web_search' ? { query: input } : { url: input } });
+  cb.onToolEnd(tag, {}, 120, { id: `${tag}-1`, attrs: tag === 'web_search' ? { query: input } : { url: input }, meta, error: null });
+}
+function indexOfKind(events, kind) { return events.findIndex((e) => e.kind === kind); }
+// ---------------------------------------------------------------------------
+// The regression: single http_get → answer commits the summary BEFORE the answer
+// ---------------------------------------------------------------------------
+test('single http_get → answer: web summary commits before the answer', async () => {
+  const h = harness();
+  h.setScenario(async (cb) => {
+    webToolIteration(cb, 'http_get', 'https://a.example', { status_code: 200, bytes: 1000 });
+    cb.onAssistantMessage('Here is the synthesized answer.');   // final answer iteration
+  });
+  await h.handler('summarize https://a.example');
+  const summaries = h.events.filter((e) => e.kind === 'web-summary');
+  assert.strictEqual(summaries.length, 1, 'exactly one collapsed summary');
+  const iSummary = indexOfKind(h.events, 'web-summary');
+  const iAnswer = indexOfKind(h.events, 'answer');
+  assert.ok(iSummary >= 0 && iAnswer >= 0, 'both committed');
+  assert.ok(iSummary < iAnswer, 'the web summary precedes the answer (the bug being fixed)');
+  assert.match(summaries[0].line, /1 source read/);
+});
+// ---------------------------------------------------------------------------
+// The W.3 guarantee preserved: multi-step search→fetch still collapses to ONE line
+// ---------------------------------------------------------------------------
+test('web_search → http_get → answer: one collapsed line, before the answer; intermediate iteration does NOT flush', async () => {
+  const h = harness();
+  h.setScenario(async (cb) => {
+    // Iteration 1: web_search (separate LLM round-trip from the fetch).
+    webToolIteration(cb, 'web_search', 'corruption scandals', null);
+    // Iteration 2: http_get — its onAssistantMessage('') must NOT flush, else the
+    // single collapsed line would split into two.
+    webToolIteration(cb, 'http_get', 'https://a.example', { status_code: 200, bytes: 1000 });
+    // Iteration 3: the final answer.
+    cb.onAssistantMessage('Final answer with citations.');
+  });
+  await h.handler('research corruption scandals');
+  const summaries = h.events.filter((e) => e.kind === 'web-summary');
+  assert.strictEqual(summaries.length, 1, 'multi-step web activity collapses to exactly ONE line (W.3 guarantee)');
+  const iSummary = indexOfKind(h.events, 'web-summary');
+  const iAnswer = indexOfKind(h.events, 'answer');
+  assert.ok(iSummary < iAnswer, 'the single collapsed summary precedes the answer');
+  // Both the search and the read are reflected in the one line.
+  assert.match(summaries[0].line, /search "corruption scandals"/);
+  assert.match(summaries[0].line, /1 source read/);
+});
+// ---------------------------------------------------------------------------
+// Safety net: an empty / interrupted turn still flushes via the turn-end finally
+// ---------------------------------------------------------------------------
+test('empty/interrupted answer: summary still committed via the turn-end finally', async () => {
+  const h = harness();
+  h.setScenario(async (cb) => {
+    // A turn that did web work but never produced a non-empty assistant message
+    // (e.g. hit the iteration cap, or was interrupted). No final flush in
+    // onAssistantMessage — the `finally` is the safety net.
+    webToolIteration(cb, 'http_get', 'https://a.example', { status_code: 200, bytes: 1000 });
+  });
+  await h.handler('fetch https://a.example');
+  const summaries = h.events.filter((e) => e.kind === 'web-summary');
+  assert.strictEqual(summaries.length, 1, 'the summary is not lost — flushed in finally');
+  assert.strictEqual(indexOfKind(h.events, 'answer'), -1, 'no non-empty answer was finalized');
+});
+// ---------------------------------------------------------------------------
+// Non-web tool after web ops: still flushes via onToolStart (unregressed)
+// ---------------------------------------------------------------------------
+test('non-web tool after web ops: summary flushed before the non-web tool line', async () => {
+  const h = harness();
+  h.setScenario(async (cb) => {
+    // Iteration 1: http_get.
+    webToolIteration(cb, 'http_get', 'https://a.example', { status_code: 200, bytes: 1000 });
+    // Iteration 2: a non-web tool (read_file). Its onToolStart closes the open
+    // web group first (chat-turn.js line 211) so the summary lands above its line.
+    cb.onAssistantMessage('');
+    cb.onToolStart('read_file', '/x', { id: 'rf-1', attrs: { path: '/x' } });
+    cb.onToolEnd('read_file', 'contents', 5, { id: 'rf-1', attrs: { path: '/x' }, meta: null, error: null });
+    // Iteration 3: the answer.
+    cb.onAssistantMessage('Done.');
+  });
+  await h.handler('fetch then read');
+  const summaries = h.events.filter((e) => e.kind === 'web-summary');
+  assert.strictEqual(summaries.length, 1, 'one web summary');
+  const iSummary = indexOfKind(h.events, 'web-summary');
+  // Phase 1 (Output Refactor): the core tool line now renders via the real
+  // descriptor→renderer (read_file → "read /x"), not the injected formatToolLine
+  // marker — match the rendered operation rather than the tag name.
+  const iToolLine = h.events.findIndex((e) => e.kind === 'tool-line' && /read \/x/.test(e.line));
+  const iAnswer = indexOfKind(h.events, 'answer');
+  assert.ok(iSummary < iToolLine, 'web summary precedes the non-web tool line (flushed by onToolStart)');
+  assert.ok(iToolLine < iAnswer, 'and both precede the answer');
+});

package/test/web-activity.test.js ADDED Viewed

@@ -0,0 +1,207 @@
+'use strict';
+// Web-activity process summary (Task W.3, Part 1). The default view collapses a
+// run of web ops (web_search → http_get) into ONE compact summary line; --debug
+// keeps the full per-operation lines. These tests pin the pure renderer (counts:
+// queries / sources read / failures), the debug-vs-default branch, that a failed
+// fetch (403/timeout) is reflected (not dropped), that non-web tools are out of
+// scope, and the stateful tracker's collapse-to-one-committed-line behaviour.
+const { test } = require('node:test');
+const assert = require('node:assert');
+const { stripAnsi } = require('../lib/ui/utils');
+const { formatToolLine } = require('../lib/ui/format');
+const {
+  isWebTool,
+  opSucceeded,
+  aggregateWebOps,
+  webSummaryText,
+  formatWebSummaryLine,
+  renderWebActivity,
+  createWebActivityTracker,
+} = require('../lib/ui/web-activity');
+// ---------------------------------------------------------------------------
+// Scope: which tools are collapsed
+// ---------------------------------------------------------------------------
+test('isWebTool: only web_search and http_get are in scope', () => {
+  assert.strictEqual(isWebTool('web_search'), true);
+  assert.strictEqual(isWebTool('http_get'), true);
+  // download writes a file, not a page read — keeps its own line.
+  assert.strictEqual(isWebTool('download'), false);
+  assert.strictEqual(isWebTool('shell'), false);
+  assert.strictEqual(isWebTool('read_file'), false);
+  assert.strictEqual(isWebTool('write_file'), false);
+});
+// ---------------------------------------------------------------------------
+// Success classification (the 403/406 "blocked" rule)
+// ---------------------------------------------------------------------------
+test('opSucceeded: http_get >= 400 is a failure even with no transport error', () => {
+  assert.strictEqual(opSucceeded({ tag: 'http_get', status: 200 }), true);
+  assert.strictEqual(opSucceeded({ tag: 'http_get', status: 403 }), false);
+  assert.strictEqual(opSucceeded({ tag: 'http_get', status: 406 }), false);
+  // A transport error (timeout/DNS) is a failure regardless of status.
+  assert.strictEqual(opSucceeded({ tag: 'http_get', error: 'Request timeout' }), false);
+  // web_search is ok unless the backend errored.
+  assert.strictEqual(opSucceeded({ tag: 'web_search' }), true);
+  assert.strictEqual(opSucceeded({ tag: 'web_search', error: 'web search unavailable' }), false);
+});
+// ---------------------------------------------------------------------------
+// Pure summary text — reflects queries, sources read, failures
+// ---------------------------------------------------------------------------
+test('webSummaryText: reflects query count, sources read, and blocked count', () => {
+  const ops = [
+    { tag: 'web_search', query: 'коррупционные скандалы 2024' },
+    { tag: 'web_search', query: 'follow-up query' },
+    { tag: 'http_get', url: 'https://a.example/1', status: 200 },
+    { tag: 'http_get', url: 'https://b.example/2', status: 200 },
+    { tag: 'http_get', url: 'https://ru.wikipedia.org/x', status: 403 },
+  ];
+  const text = webSummaryText(aggregateWebOps(ops));
+  assert.match(text, /search "коррупционные/);   // leads with the query
+  assert.match(text, /2 queries/);               // query count visible
+  assert.match(text, /2 sources read/);          // successful reads
+  assert.match(text, /1 blocked/);               // the 403 is surfaced, not dropped
+});
+test('webSummaryText: a timeout counts as blocked, not silently dropped', () => {
+  const ops = [
+    { tag: 'http_get', url: 'https://slow.example', error: 'Request timeout' },
+    { tag: 'http_get', url: 'https://ok.example', status: 200 },
+  ];
+  const text = webSummaryText(aggregateWebOps(ops));
+  assert.match(text, /1 source read/);
+  assert.match(text, /1 blocked/);
+});
+test('webSummaryText: a failed web_search is surfaced', () => {
+  const ops = [{ tag: 'web_search', query: 'q', error: 'web search unavailable: backend down' }];
+  const text = webSummaryText(aggregateWebOps(ops));
+  assert.match(text, /search failed/);
+});
+test('webSummaryText: fetch-only flow (no search) still reads cleanly', () => {
+  const ops = [{ tag: 'http_get', url: 'https://x', status: 200 }];
+  assert.match(webSummaryText(aggregateWebOps(ops)), /1 source read/);
+});
+test('aggregateWebOps: counts are exact', () => {
+  const s = aggregateWebOps([
+    { tag: 'web_search', query: 'a' },
+    { tag: 'http_get', status: 200 },
+    { tag: 'http_get', status: 200 },
+    { tag: 'http_get', status: 500 },
+  ]);
+  assert.deepStrictEqual(
+    { searchCount: s.searchCount, fetchCount: s.fetchCount, fetchOk: s.fetchOk, fetchFailed: s.fetchFailed },
+    { searchCount: 1, fetchCount: 3, fetchOk: 2, fetchFailed: 1 },
+  );
+});
+// ---------------------------------------------------------------------------
+// renderWebActivity — debug branch keeps full per-op detail; default collapses
+// ---------------------------------------------------------------------------
+const SAMPLE_OPS = [
+  { tag: 'web_search', query: 'how do tariffs work', durationMs: 941 },
+  { tag: 'http_get', url: 'https://24tv.ua/article', status: 200, bytes: 406 * 1024, durationMs: 171 },
+  { tag: 'http_get', url: 'https://ru.wikipedia.org/page', status: 403, bytes: 126, durationMs: 25 },
+];
+test('renderWebActivity (default): a sequence of web ops → ONE compact summary line', () => {
+  const lines = renderWebActivity(SAMPLE_OPS, { debug: false, formatToolLine });
+  assert.strictEqual(lines.length, 1, 'collapsed to a single line');
+  const plain = stripAnsi(lines[0]);
+  assert.match(plain, /web/);
+  assert.match(plain, /search "how do tariffs work"/);
+  assert.match(plain, /1 source read/);
+  assert.match(plain, /1 blocked/);
+});
+test('renderWebActivity (--debug): full per-operation lines, nothing hidden', () => {
+  const lines = renderWebActivity(SAMPLE_OPS, { debug: true, formatToolLine });
+  assert.strictEqual(lines.length, SAMPLE_OPS.length, 'one line per op');
+  const all = lines.map(stripAnsi);
+  // The query and both URLs survive in the detailed view.
+  assert.ok(all.some((l) => /how do tariffs work/.test(l)));
+  assert.ok(all.some((l) => /24tv\.ua/.test(l)));
+  assert.ok(all.some((l) => /ru\.wikipedia\.org/.test(l)));
+  // The HTTP status codes (200 / 403) are present in the per-op meta.
+  assert.ok(all.some((l) => /\b200\b/.test(l)));
+  assert.ok(all.some((l) => /\b403\b/.test(l)));
+});
+// ---------------------------------------------------------------------------
+// Styled line: glyph + failures coloured, plain text correct
+// ---------------------------------------------------------------------------
+test('formatWebSummaryLine: pending shows ●, committed shows ✓', () => {
+  const state = aggregateWebOps(SAMPLE_OPS);
+  assert.match(formatWebSummaryLine(state, { pending: true, durationMs: 500 }), /●/);
+  assert.match(formatWebSummaryLine(state, { pending: false }), /✓/);
+});
+// ---------------------------------------------------------------------------
+// Stateful tracker — collapse a multi-op group into one committed line
+// ---------------------------------------------------------------------------
+function fakeWriter() {
+  const calls = { start: [], update: 0, end: [] };
+  return {
+    calls,
+    startActivity(id) { calls.start.push(id); },
+    updateActivity() { calls.update += 1; },
+    endActivity(id, line) { calls.end.push({ id, line }); },
+  };
+}
+test('tracker: a run of web ops commits exactly ONE summary line on flush', () => {
+  const w = fakeWriter();
+  const t = createWebActivityTracker({ writerModule: w });
+  t.start('web_search', 'коррупционные скандалы');
+  t.end('web_search', { results: [] }, 900, { attrs: { query: 'коррупционные скандалы' } });
+  t.start('http_get', 'https://a.example');
+  t.end('http_get', {}, 170, { attrs: { url: 'https://a.example' }, meta: { status_code: 200, bytes: 1000 } });
+  t.start('http_get', 'https://ru.wikipedia.org/x');
+  t.end('http_get', {}, 25, { attrs: { url: 'https://ru.wikipedia.org/x' }, meta: { status_code: 403, bytes: 126 } });
+  assert.strictEqual(w.calls.start.length, 1, 'one activity opened for the whole group');
+  assert.strictEqual(t.isOpen(), true);
+  t.flush();
+  assert.strictEqual(t.isOpen(), false);
+  assert.strictEqual(w.calls.end.length, 1, 'one committed summary line');
+  const plain = stripAnsi(w.calls.end[0].line);
+  assert.match(plain, /search "коррупционные скандалы"/);
+  assert.match(plain, /1 source read/);
+  assert.match(plain, /1 blocked/);
+});
+test('tracker: flush with no open group is a no-op', () => {
+  const w = fakeWriter();
+  const t = createWebActivityTracker({ writerModule: w });
+  t.flush();
+  assert.strictEqual(w.calls.start.length, 0);
+  assert.strictEqual(w.calls.end.length, 0);
+});
+test('tracker: a second group after flush opens a fresh activity', () => {
+  const w = fakeWriter();
+  const t = createWebActivityTracker({ writerModule: w });
+  t.start('web_search', 'q1');
+  t.end('web_search', {}, 10, { attrs: { query: 'q1' } });
+  t.flush();
+  t.start('http_get', 'https://x');
+  t.end('http_get', {}, 10, { attrs: { url: 'https://x' }, meta: { status_code: 200 } });
+  t.flush();
+  assert.strictEqual(w.calls.start.length, 2, 'two distinct groups');
+  assert.strictEqual(w.calls.end.length, 2);
+  assert.notStrictEqual(w.calls.start[0], w.calls.start[1], 'distinct group ids');
+});