npm - kc-beta - Versions diffs - 0.5.3 → 0.5.4 - Mend

kc-beta 0.5.3 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/package.json +1 -1
package/src/agent/context-window.js +7 -2
package/src/agent/engine.js +63 -14
package/src/agent/rule-catalog-normalize.js +37 -0
package/src/agent/tools/release.js +2 -1
package/src/agent/tools/rule-catalog.js +56 -4
package/src/cli/components.js +64 -11
package/src/cli/index.js +90 -11
package/src/model-tiers.json +5 -5

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "kc-beta",
-  "version": "0.5.3",
+  "version": "0.5.4",
   "description": "KC Agent — LLM document verification agent (pure Node.js CLI)",
   "type": "module",
   "bin": {

package/src/agent/context-window.js CHANGED Viewed

@@ -12,10 +12,15 @@ export class ContextWindow {
    * @param {number} [opts.reserveForResponse=8192] - Tokens reserved for model output
    * @param {number} [opts.recentWindowSize=30] - Number of recent messages to always keep
    */
-  constructor({ contextLimit, reserveForResponse = 8192, recentWindowSize = 30 }) {
+  constructor({ contextLimit, reserveForResponse = 8192, recentWindowSize = 30, triggerFraction = 0.70 }) {
     this.contextLimit = contextLimit;
     this.reserveForResponse = reserveForResponse;
     this.recentWindowSize = recentWindowSize;
+    // Fraction of budget that triggers windowing. v0.5.3 used 0.85 which only
+    // fired after runtime was already deep in the danger zone (a subsequent
+    // tool result could tip it over before the next check). 0.70 leaves room
+    // for one more tool result before hitting the hard ceiling.
+    this.triggerFraction = triggerFraction;
   }
   /**
@@ -29,7 +34,7 @@ export class ContextWindow {
     const budget = this.contextLimit - this.reserveForResponse;
     // If within budget, return as-is
-    if (totalTokens <= budget * 0.85) {
+    if (totalTokens <= budget * this.triggerFraction) {
       return { messages, wasWindowed: false, removedCount: 0 };
     }

package/src/agent/engine.js CHANGED Viewed

@@ -4,6 +4,7 @@ import { AgentEvent } from "./events.js";
 import { ContextAssembler } from "./context.js";
 import { ConversationHistory } from "./history.js";
 import { Workspace } from "./workspace.js";
+import { normalizeRuleCatalog } from "./rule-catalog-normalize.js";
 import { VersionManager } from "./version-manager.js";
 import { CornerCaseRegistry } from "./corner-case-registry.js";
 import { ConfidenceScorer } from "./confidence-scorer.js";
@@ -51,7 +52,9 @@ const DEFAULT_KC_MAX_TOKENS = 65536;
 const DISTILL_PHASES = new Set([Phase.DISTILLATION, Phase.PRODUCTION_QC]);
 // Linear phase order — used by auto-advance (Bug 4). Last phase has no successor.
-const NEXT_PHASE = {
+// Exported so the TUI's /phase slash command (src/cli/index.js) can call
+// _advancePhase with the right successor without re-declaring the map.
+export const NEXT_PHASE = {
   [Phase.BOOTSTRAP]: Phase.EXTRACTION,
   [Phase.EXTRACTION]: Phase.SKILL_AUTHORING,
   [Phase.SKILL_AUTHORING]: Phase.SKILL_TESTING,
@@ -168,18 +171,16 @@ export class AgentEngine {
     this.toolRegistry = new ToolRegistry();
     this._registerToolsForPhase(this.currentPhase);
-    // Edge-trigger state for _maybeAutoAdvance (Bug 5). Primed at construction
-    // (and at resume) so a session that's already exit-criteria-met when it
-    // boots doesn't auto-advance on the first user turn — only on a fresh
-    // false→true flip.
-    this._lastReady = {};
-    for (const phase of Object.keys(this.pipelines)) {
-      try {
-        this._lastReady[phase] = !!this.pipelines[phase].exitCriteriaMet?.();
-      } catch {
-        this._lastReady[phase] = false;
-      }
-    }
+    // Edge-trigger state for _maybeAutoAdvance. Initialize to false for every
+    // phase so the first real false→true flip inside onToolResult triggers an
+    // advance — even when the user launches from a pre-populated workspace
+    // whose exit criteria already happen to be met at boot.
+    // resume() re-primes this from the restored pipeline state (see ~L566),
+    // which is the correct behaviour there: resumed sessions that were already
+    // past this phase shouldn't re-fire.
+    this._lastReady = Object.fromEntries(
+      Object.keys(this.pipelines).map((p) => [p, false]),
+    );
   }
   /**
@@ -327,6 +328,47 @@ export class AgentEngine {
     };
   }
+  /**
+   * Run the windowing check immediately after a tool result appends to
+   * history. Called from runTurn() so that a large tool result can't sit in
+   * history past the threshold until the next LLM-loop iteration, where a
+   * stream-abort could then trap the context in a bloated state.
+   *
+   * Safe to call frequently — contextWindow.window() fast-paths when under
+   * the trigger fraction.
+   */
+  _maybeWindowAfterToolResult() {
+    if (!this.contextWindow) return;
+    const windowed = this.contextWindow.window(this.history.messages, this._phaseSummaries);
+    if (windowed.wasWindowed) {
+      this.history.messages = windowed.messages;
+      this.eventLog.append("context_windowed", {
+        removed: windowed.removedCount,
+        trigger: "post_tool_result",
+      });
+    }
+    // Heap-pressure diagnostic. The TUI has its own virtualization + tool-
+    // output truncation (Bug 3 fixes), so Ink itself should never OOM. If we
+    // still see high heap usage, something else is leaking — log it once per
+    // pressure-crossing so operators can investigate without flooding logs.
+    try {
+      const mem = process.memoryUsage();
+      const frac = mem.heapUsed / (mem.heapTotal || 1);
+      if (frac > 0.80 && !this._memPressureLogged) {
+        this._memPressureLogged = true;
+        this.eventLog.append("memory_pressure", {
+          heapUsedMB: Math.round(mem.heapUsed / 1024 / 1024),
+          heapTotalMB: Math.round(mem.heapTotal / 1024 / 1024),
+          rssMB: Math.round(mem.rss / 1024 / 1024),
+          historyLength: this.history.messages.length,
+        });
+      } else if (frac < 0.60 && this._memPressureLogged) {
+        this._memPressureLogged = false;  // re-arm for next crossing
+      }
+    } catch { /* process.memoryUsage failures are non-fatal */ }
+  }
   /**
    * Pre-flight hard ceiling (Bug 1). After windowing, if the message
    * array's total token count still exceeds the model's input budget,
@@ -785,6 +827,13 @@ export class AgentEngine {
             content: historyContent,
           });
+          // Post-tool-result safety net: check for context pressure RIGHT NOW
+          // rather than waiting for the next LLM-loop iteration. A large tool
+          // result that tips history over the threshold used to sit there
+          // until the next turn, and if the stream aborted in between the
+          // user saw "CTX: 210% / stream terminated" with no recovery.
+          this._maybeWindowAfterToolResult();
           // Pipeline controller: update state and re-register tools on phase change
           if (pipeline?.onToolResult) {
             const pEvent = pipeline.onToolResult(tc.name, inputData, result);
@@ -928,7 +977,7 @@ export class AgentEngine {
     try {
       const catalog = JSON.parse(fs.readFileSync(catalogPath, "utf-8"));
-      const rules = Array.isArray(catalog) ? catalog : [];
+      const rules = normalizeRuleCatalog(catalog);
       if (rules.length > 0) {
         this.taskManager.createRuleTasks(rules, phase);
       }

package/src/agent/rule-catalog-normalize.js ADDED Viewed

@@ -0,0 +1,37 @@
+/**
+ * Normalize a catalog.json payload into a flat array of rule records.
+ *
+ * KC the agent has historically produced catalog.json in at least four shapes,
+ * and earlier code paths assumed a flat array — silently dropping everything
+ * when the catalog was object-shaped. This helper unifies the handling so
+ * the engine, the rule_catalog tool, and the release tool all see the same
+ * list of rules regardless of how the file was written.
+ *
+ * Accepted shapes:
+ *   1. [rule, rule, ...]                              flat array (original)
+ *   2. { rules: [...] }                               wrapper object
+ *   3. { categories: { A: [...], B: [...] }, ... }    grouped by category
+ *   4. { categories: { A: { rules: [...] }, ... } }   nested category objects
+ *
+ * Anything else (null, wrong shape, throws) returns [].
+ */
+export function normalizeRuleCatalog(catalog) {
+  if (Array.isArray(catalog)) return catalog;
+  if (!catalog || typeof catalog !== "object") return [];
+  if (Array.isArray(catalog.rules)) return catalog.rules;
+  if (catalog.categories && typeof catalog.categories === "object") {
+    const out = [];
+    for (const group of Object.values(catalog.categories)) {
+      if (Array.isArray(group)) {
+        out.push(...group);
+      } else if (group && Array.isArray(group.rules)) {
+        out.push(...group.rules);
+      }
+    }
+    if (out.length > 0) return out;
+  }
+  return [];
+}

package/src/agent/tools/release.js CHANGED Viewed

@@ -3,6 +3,7 @@ import path from "node:path";
 import { fileURLToPath } from "node:url";
 import { BaseTool, ToolResult } from "./base.js";
 import { SnapshotTool } from "./snapshot.js";
+import { normalizeRuleCatalog } from "../rule-catalog-normalize.js";
 const __dirname = path.dirname(fileURLToPath(import.meta.url));
 const TEMPLATE_DIR = path.resolve(__dirname, "../../../template/release-runtime");
@@ -100,7 +101,7 @@ export class ReleaseTool extends BaseTool {
     let catalog;
     try { catalog = JSON.parse(fs.readFileSync(catalogPath, "utf-8")); }
     catch (e) { return new ToolResult(`catalog.json invalid: ${e.message}`, true); }
-    if (!Array.isArray(catalog)) catalog = catalog.rules || [];
+    catalog = normalizeRuleCatalog(catalog);
     const includeSet = Array.isArray(input.include) && input.include.length > 0
       ? new Set(input.include) : null;

package/src/agent/tools/rule-catalog.js CHANGED Viewed

@@ -1,10 +1,52 @@
 import fs from "node:fs";
 import path from "node:path";
 import { BaseTool, ToolResult } from "./base.js";
+import { normalizeRuleCatalog } from "../rule-catalog-normalize.js";
 const REQUIRED_FIELDS = new Set(["id", "source_ref", "description"]);
 const RECOMMENDED_FIELDS = new Set(["falsifiability_statement", "test_case_stub", "applicable_sections"]);
+// Field-name aliases — LLMs frequently produce `source` or 来源 instead of
+// `source_ref`, `desc` instead of `description`. Rather than making 38+ failed
+// calls before the model figures out the canonical names (as observed in the
+// v0.5.3 E2E test), accept the common aliases and canonicalize on ingest.
+const FIELD_ALIASES = {
+  source: "source_ref",
+  reference: "source_ref",
+  ref: "source_ref",
+  "来源": "source_ref",
+  desc: "description",
+  "描述": "description",
+  rule_id: "id",
+  ruleId: "id",
+};
+function normalizeRuleData(data) {
+  if (!data || typeof data !== "object") return data;
+  const out = { ...data };
+  for (const [alias, canonical] of Object.entries(FIELD_ALIASES)) {
+    if (out[alias] !== undefined && out[canonical] === undefined) {
+      out[canonical] = out[alias];
+    }
+  }
+  return out;
+}
+function missingFieldError(missing, data) {
+  // Concrete, actionable error. The generic "Missing required fields: id,
+  // source_ref, description" confused agents (they couldn't tell which field
+  // they'd actually failed to provide). Point at the first missing field, name
+  // what was supplied, and mention the aliases so the model can self-correct.
+  const provided = Object.keys(data || {}).slice(0, 8).join(", ") || "(none)";
+  const first = missing[0];
+  const rest = missing.length > 1 ? ` (also missing: ${missing.slice(1).join(", ")})` : "";
+  return (
+    `Missing field '${first}' in data.${rest} ` +
+    `Provided keys: {${provided}}. ` +
+    `Accepted aliases: source/来源/reference → source_ref, desc/描述 → description, rule_id → id.`
+  );
+}
 /**
  * CRUD on the rule registry with schema enforcement.
  * Enforces required fields (id, source_ref, description) on create/update.
@@ -48,14 +90,22 @@ export class RuleCatalogTool extends BaseTool {
     if (op === "create") return this._create(data);
     if (op === "update") return this._update(ruleId || data.id || "", data);
     if (op === "delete") return this._delete(ruleId || data.id || "");
-    return new ToolResult(`Unknown operation: ${op}`, true);
+    // More helpful than "Unknown operation: " — tells the agent exactly what's
+    // allowed and what shape to call with next time (observed in v0.5.3 E2E
+    // where GLM-5.1 sent input: {} 38+ times without learning).
+    return new ToolResult(
+      `rule_catalog requires {operation}. Got: ${op ? `'${op}'` : "(empty)"}. ` +
+      `Valid operations: list, read, create, update, delete. ` +
+      `Examples: {"operation":"list"} · {"operation":"create","data":{"id":"R-01","source_ref":"民法典 710","description":"..."}}`,
+      true,
+    );
   }
   _load() {
     if (!fs.existsSync(this._catalogPath)) return [];
     try {
       const data = JSON.parse(fs.readFileSync(this._catalogPath, "utf-8"));
-      return Array.isArray(data) ? data : [];
+      return normalizeRuleCatalog(data);
     } catch { return []; }
   }
@@ -79,8 +129,9 @@ export class RuleCatalogTool extends BaseTool {
   }
   _create(data) {
-    const missing = [...REQUIRED_FIELDS].filter((f) => !(f in data));
-    if (missing.length > 0) return new ToolResult(`Missing required fields: ${missing.join(", ")}`, true);
+    data = normalizeRuleData(data);
+    const missing = [...REQUIRED_FIELDS].filter((f) => !data[f]);
+    if (missing.length > 0) return new ToolResult(missingFieldError(missing, data), true);
     const rules = this._load();
     if (rules.some((r) => r.id === data.id)) return new ToolResult(`Rule already exists: ${data.id}. Use update.`, true);
     const warnings = [...RECOMMENDED_FIELDS].filter((f) => !(f in data));
@@ -93,6 +144,7 @@ export class RuleCatalogTool extends BaseTool {
   _update(ruleId, data) {
     if (!ruleId) return new ToolResult("rule_id required for update", true);
+    data = normalizeRuleData(data);
     const rules = this._load();
     const idx = rules.findIndex((r) => r.id === ruleId);
     if (idx < 0) return new ToolResult(`Rule not found: ${ruleId}`, true);

package/src/cli/components.js CHANGED Viewed

@@ -42,12 +42,20 @@ export function StatusBar({ sessionId, phase, contextTokens, contextLimit }) {
     ? `${(contextLimit / 1000).toFixed(0)}k`
     : `${contextLimit || 0}`;
+  // Soft-threshold hint — shows up before auto-windowing kicks in at ~70%
+  // so users know they can run /compact to reduce context more aggressively
+  // than windowing does. Red hint at 80%+ means it's time to compact NOW.
+  const compactHint = pct >= 80 ? "  · 💾 /compact"
+                     : pct >= 60 ? "  · 💾 建议 /compact"
+                     : "";
   return h(Box, { marginTop: 0 },
     h(Text, { dimColor: true }, "  ⏵⏵  KC Agent CLI "),
     h(Text, { dimColor: true }, sessionId ? `[${sessionId}]` : ""),
     phase ? h(Text, { color: "cyan" }, ` ${phase.toUpperCase()}`) : null,
     h(Text, { color: "green" }, "  ●  "),
     h(Text, { color: ctxColor }, `CTX: ${ctxLabel}/${limitLabel} (${pct}%)`),
+    compactHint ? h(Text, { color: ctxColor }, compactHint) : null,
     h(Text, { dimColor: true }, `  · ${LENAT_QUOTE}`),
   );
 }
@@ -112,29 +120,74 @@ export function WelcomeBanner({ projectDir, pendingInputCount = 0 } = {}) {
 // --- Tool block ---
-export function ToolBlock({ name, input, output, isError, isRunning }) {
+/**
+ * Tool-result block.
+ *
+ * Rendering modes:
+ *   - isRunning       → yellow border, no output (spinner shown elsewhere).
+ *   - isError         → red border, ALWAYS show full output (errors are short + critical).
+ *   - isRecent: true  → green border, show up to ~4 lines + "N lines hidden" footer.
+ *   - isRecent: false → header only (header includes line count + byte count).
+ *
+ * The full output is always on disk in logs/events.jsonl. Keeping the Ink
+ * tree slim is what lets KC handle long sessions without OOM / typing lag.
+ */
+const RECENT_PREVIEW_LINES = 4;
+export function ToolBlock({ name, input, output, isError, isRunning, isRecent = true }) {
   const borderColor = isRunning ? "yellow" : isError ? "red" : "green";
+  const outStr = typeof output === "string" ? output : "";
+  const lines = outStr ? outStr.split("\n") : [];
+  const bytes = outStr.length;
+  const header = h(Box, null,
+    h(Text, { color: borderColor }, "┃ "),
+    h(Text, { dimColor: true }, name),
+    input ? h(Text, { dimColor: true }, ` ${JSON.stringify(input).slice(0, 120)}`) : null,
+    outStr && !isRunning
+      ? h(Text, { dimColor: true }, `  (${lines.length} 行 / ${bytes} 字节)`)
+      : null,
+  );
+  // Errors: always show in full (short + critical).
+  if (isError && outStr) {
+    return h(Box, { flexDirection: "column", marginLeft: 2 },
+      header,
+      h(Box, { flexDirection: "column" },
+        ...lines.map((line, i) =>
+          h(Box, { key: i },
+            h(Text, { color: "red" }, "┃ "),
+            h(Text, { color: "red" }, line),
+          ),
+        ),
+      ),
+    );
+  }
+  // Off-screen / not-recent: header only. Full output remains on disk.
+  if (!isRecent || !outStr) {
+    return h(Box, { marginLeft: 2 }, header);
+  }
+  // Recent + successful: show preview + truncation footer.
+  const previewLines = lines.slice(0, RECENT_PREVIEW_LINES);
+  const remaining = lines.length - previewLines.length;
   return h(Box, { flexDirection: "column", marginLeft: 2 },
-    h(Box, null,
-      h(Text, { color: borderColor }, "┃ "),
-      h(Text, { dimColor: true }, name),
-      input ? h(Text, { dimColor: true }, ` ${JSON.stringify(input)}`) : null,
-    ),
-    output ? h(Box, { flexDirection: "column" },
-      ...output.split("\n").slice(0, 20).map((line, i) =>
+    header,
+    h(Box, { flexDirection: "column" },
+      ...previewLines.map((line, i) =>
         h(Box, { key: i },
           h(Text, { color: borderColor }, "┃ "),
           h(Text, null, line),
         ),
       ),
-      output.split("\n").length > 20
+      remaining > 0
         ? h(Box, null,
             h(Text, { color: borderColor }, "┃ "),
-            h(Text, { dimColor: true }, `... ${output.split("\n").length - 20} more lines`),
+            h(Text, { dimColor: true }, `… ${remaining} 行已省略（在 logs/events.jsonl 中完整保留）`),
           )
         : null,
-    ) : null,
+    ),
   );
 }

package/src/cli/index.js CHANGED Viewed

@@ -2,7 +2,7 @@ import React, { useState, useEffect, useCallback, useRef } from "react";
 import { render, Box, Text, useApp, useInput } from "ink";
 import { loadSettings } from "../config.js";
 import { LLMClient } from "../agent/llm-client.js";
-import { AgentEngine } from "../agent/engine.js";
+import { AgentEngine, NEXT_PHASE } from "../agent/engine.js";
 import { Workspace } from "../agent/workspace.js";
 import { ConversationHistory } from "../agent/history.js";
 import { Scheduler } from "../agent/scheduler.js";
@@ -18,6 +18,18 @@ import {
 const h = React.createElement;
+// Only the last N messages stay in the Ink render tree. Older messages
+// remain in React state (so /compact can summarize them) but aren't
+// diffed on every keystroke — this is what keeps long sessions responsive
+// and prevents the 4 GB heap OOM observed in the v0.5.3 E2E test.
+// Full conversation is persisted to logs/events.jsonl on every event,
+// so dropping from render is purely visual.
+const VISIBLE_WINDOW = 50;
+// How many recent messages render their ToolBlock with full preview.
+// Older ToolBlocks show header only. Both still persist full output to disk.
+const RECENT_TOOL_WINDOW = 10;
 /**
  * Main KC Agent CLI App using Ink (React for terminals).
  */
@@ -159,6 +171,7 @@ function App({ engine, config }) {
             "  /help                Show this help\n" +
             "  /status              Show session info, model, phase, workspace\n" +
             "  /tasks               Show task progress\n" +
+            "  /phase [sub]         advance | status | <name> — manual phase override\n" +
             "  /schedule            Show scheduled ingestion jobs and recent log lines\n" +
             "  /clear               Clear conversation history (keep workspace)\n" +
             "  /compact             Summarize older messages to reduce context\n" +
@@ -195,6 +208,55 @@ function App({ engine, config }) {
         });
         return true;
+      case "/phase": {
+        // User-driven phase override. Useful when auto-advance fails to fire
+        // or when debugging. Subcommands:
+        //   /phase                 → current phase (alias: /phase status)
+        //   /phase advance | next  → move to NEXT_PHASE[current]
+        //   /phase <name>          → force-jump to any phase (forward or back)
+        const engine = engineRef.current;
+        const sub = (parts[1] || "").toLowerCase();
+        if (!sub || sub === "status") {
+          const next = NEXT_PHASE[engine.currentPhase];
+          addMessage({
+            role: "system",
+            content:
+              `Current phase: ${engine.currentPhase.toUpperCase()}` +
+              (next ? `  (next auto: ${next})` : "  (final phase)"),
+          });
+          return true;
+        }
+        if (sub === "advance" || sub === "next") {
+          const next = NEXT_PHASE[engine.currentPhase];
+          if (!next) {
+            addMessage({ role: "system", content: `Already in final phase (${engine.currentPhase}).` });
+            return true;
+          }
+          const ok = engine._advancePhase(next, "manual /phase advance");
+          addMessage({
+            role: "system",
+            content: ok
+              ? `→ phase advanced to ${next.toUpperCase()}.`
+              : `Failed to advance from ${engine.currentPhase}.`,
+          });
+          updateContextStats();
+          return true;
+        }
+        // /phase <name> — force-jump. Uses {force:true} to allow backward jumps.
+        const ok = engine._advancePhase(sub, "manual /phase <name>", { force: true });
+        addMessage({
+          role: "system",
+          content: ok
+            ? `→ phase set to ${sub.toUpperCase()}.`
+            : `Unknown phase: ${sub}. Valid: bootstrap, extraction, skill_authoring, skill_testing, distillation, production_qc`,
+        });
+        updateContextStats();
+        return true;
+      }
       case "/schedule": {
         const sched = new Scheduler(engineRef.current.workspace);
         const jobs = sched.list();
@@ -228,15 +290,22 @@ function App({ engine, config }) {
       case "/compact": {
         addMessage({ role: "system", content: "Compacting conversation history..." });
-        // Run compact asynchronously
         (async () => {
           try {
             const result = await engineRef.current.compact();
             if (result) {
-              addMessage({
+              // Claude Code pattern: after successful compact, clear the
+              // visible TUI messages and start fresh with a single summary
+              // line. The underlying engine.history already contains the
+              // compact-summary message pair; the TUI doesn't need to keep
+              // showing the pre-compact history (it's on disk in
+              // logs/events.jsonl anyway) and clearing it immediately frees
+              // Ink render-tree memory — fixing the lag that builds up over
+              // long sessions.
+              setMessages([{
                 role: "system",
-                content: `Compacted: removed ${result.removedCount} messages, kept ${result.retainedCount}. Summary: ~${result.summaryTokens} tokens.`,
-              });
+                content: `✓ 上下文已压缩：合并了 ${result.removedCount} 条早期消息（摘要约 ${result.summaryTokens} tokens，保留最近 ${result.retainedCount} 条）`,
+              }]);
             } else {
               addMessage({ role: "system", content: "Nothing to compact (conversation is short enough)." });
             }
@@ -392,31 +461,41 @@ function App({ engine, config }) {
     // Task dashboard (ralph-loop)
     taskList.length > 0 ? h(TaskDashboard, { tasks: taskList, progress: taskProgress }) : null,
-    // Message history
-    ...messages.map((msg, i) => {
+    // Message history (virtualized — only last VISIBLE_WINDOW render).
+    // Hidden-count hint for earlier messages, so users know the full
+    // history still exists (on disk) even though the TUI is slim.
+    messages.length > VISIBLE_WINDOW ? h(Box, { key: "hidden-hint" },
+      h(Text, { dimColor: true },
+        `— 前 ${messages.length - VISIBLE_WINDOW} 条消息已折叠，完整记录在 logs/events.jsonl —`),
+    ) : null,
+    ...messages.slice(-VISIBLE_WINDOW).map((msg, i, arr) => {
+      // Global index (for stable React keys) vs visible index (for isRecent).
+      const globalIdx = messages.length - arr.length + i;
+      const visibleIdx = arr.length - 1 - i;  // 0 = most recent
       if (msg.role === "user") {
-        return h(Box, { key: `msg-${i}` },
+        return h(Box, { key: `msg-${globalIdx}` },
           h(Text, { dimColor: true }, "❯ "),
           h(Text, null, msg.content),
         );
       }
       if (msg.role === "agent") {
-        return h(Box, { key: `msg-${i}` },
+        return h(Box, { key: `msg-${globalIdx}` },
           h(Text, null, msg.content),
         );
       }
       if (msg.role === "tool") {
         return h(ToolBlock, {
-          key: `msg-${i}`,
+          key: `msg-${globalIdx}`,
           name: msg.toolName,
           input: msg.toolInput,
           output: msg.toolOutput,
           isError: msg.toolIsError,
           isRunning: false,
+          isRecent: visibleIdx < RECENT_TOOL_WINDOW,
         });
       }
       if (msg.role === "system") {
-        return h(Box, { key: `msg-${i}` },
+        return h(Box, { key: `msg-${globalIdx}` },
           h(Text, { dimColor: true }, msg.content),
         );
       }

package/src/model-tiers.json CHANGED Viewed

@@ -2,17 +2,17 @@
   "_comment": "Model selections per provider. LLM tiers 1-4, VLM tiers 1-3. Edit this file directly to update model assignments.",
   "siliconflow": {
-    "conductor": "Pro/zai-org/GLM-5",
+    "conductor": "Pro/zai-org/GLM-5.1",
     "llm": {
-      "tier1": "Pro/zai-org/GLM-5, Pro/moonshotai/Kimi-K2.5",
+      "tier1": "Pro/zai-org/GLM-5.1, Pro/moonshotai/Kimi-K2.5",
       "tier2": "Pro/deepseek-ai/DeepSeek-V3.2, Pro/MiniMaxAI/MiniMax-M2.5",
       "tier3": "Qwen/Qwen3.5-122B-A10B",
       "tier4": "Qwen/Qwen3.5-35B-A3B"
     },
     "vlm": {
-      "tier1": "Pro/Qwen/Qwen2.5-VL-72B-Instruct",
-      "tier2": "Qwen/Qwen2.5-VL-32B-Instruct",
-      "tier3": "Qwen/Qwen2.5-VL-7B-Instruct"
+      "tier1": "Qwen/Qwen3-VL-235B-A22B-Instruct",
+      "tier2": "Qwen/Qwen3-VL-30B-A3B-Instruct",
+      "tier3": "Qwen/Qwen3-VL-8B-Instruct"
     }
   },