kc-beta 0.5.3 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "kc-beta",
3
- "version": "0.5.3",
3
+ "version": "0.5.4",
4
4
  "description": "KC Agent — LLM document verification agent (pure Node.js CLI)",
5
5
  "type": "module",
6
6
  "bin": {
@@ -12,10 +12,15 @@ export class ContextWindow {
12
12
  * @param {number} [opts.reserveForResponse=8192] - Tokens reserved for model output
13
13
  * @param {number} [opts.recentWindowSize=30] - Number of recent messages to always keep
14
14
  */
15
- constructor({ contextLimit, reserveForResponse = 8192, recentWindowSize = 30 }) {
15
+ constructor({ contextLimit, reserveForResponse = 8192, recentWindowSize = 30, triggerFraction = 0.70 }) {
16
16
  this.contextLimit = contextLimit;
17
17
  this.reserveForResponse = reserveForResponse;
18
18
  this.recentWindowSize = recentWindowSize;
19
+ // Fraction of budget that triggers windowing. v0.5.3 used 0.85 which only
20
+ // fired after runtime was already deep in the danger zone (a subsequent
21
+ // tool result could tip it over before the next check). 0.70 leaves room
22
+ // for one more tool result before hitting the hard ceiling.
23
+ this.triggerFraction = triggerFraction;
19
24
  }
20
25
 
21
26
  /**
@@ -29,7 +34,7 @@ export class ContextWindow {
29
34
  const budget = this.contextLimit - this.reserveForResponse;
30
35
 
31
36
  // If within budget, return as-is
32
- if (totalTokens <= budget * 0.85) {
37
+ if (totalTokens <= budget * this.triggerFraction) {
33
38
  return { messages, wasWindowed: false, removedCount: 0 };
34
39
  }
35
40
 
@@ -4,6 +4,7 @@ import { AgentEvent } from "./events.js";
4
4
  import { ContextAssembler } from "./context.js";
5
5
  import { ConversationHistory } from "./history.js";
6
6
  import { Workspace } from "./workspace.js";
7
+ import { normalizeRuleCatalog } from "./rule-catalog-normalize.js";
7
8
  import { VersionManager } from "./version-manager.js";
8
9
  import { CornerCaseRegistry } from "./corner-case-registry.js";
9
10
  import { ConfidenceScorer } from "./confidence-scorer.js";
@@ -51,7 +52,9 @@ const DEFAULT_KC_MAX_TOKENS = 65536;
51
52
  const DISTILL_PHASES = new Set([Phase.DISTILLATION, Phase.PRODUCTION_QC]);
52
53
 
53
54
  // Linear phase order — used by auto-advance (Bug 4). Last phase has no successor.
54
- const NEXT_PHASE = {
55
+ // Exported so the TUI's /phase slash command (src/cli/index.js) can call
56
+ // _advancePhase with the right successor without re-declaring the map.
57
+ export const NEXT_PHASE = {
55
58
  [Phase.BOOTSTRAP]: Phase.EXTRACTION,
56
59
  [Phase.EXTRACTION]: Phase.SKILL_AUTHORING,
57
60
  [Phase.SKILL_AUTHORING]: Phase.SKILL_TESTING,
@@ -168,18 +171,16 @@ export class AgentEngine {
168
171
  this.toolRegistry = new ToolRegistry();
169
172
  this._registerToolsForPhase(this.currentPhase);
170
173
 
171
- // Edge-trigger state for _maybeAutoAdvance (Bug 5). Primed at construction
172
- // (and at resume) so a session that's already exit-criteria-met when it
173
- // boots doesn't auto-advance on the first user turn only on a fresh
174
- // false→true flip.
175
- this._lastReady = {};
176
- for (const phase of Object.keys(this.pipelines)) {
177
- try {
178
- this._lastReady[phase] = !!this.pipelines[phase].exitCriteriaMet?.();
179
- } catch {
180
- this._lastReady[phase] = false;
181
- }
182
- }
174
+ // Edge-trigger state for _maybeAutoAdvance. Initialize to false for every
175
+ // phase so the first real false→true flip inside onToolResult triggers an
176
+ // advance even when the user launches from a pre-populated workspace
177
+ // whose exit criteria already happen to be met at boot.
178
+ // resume() re-primes this from the restored pipeline state (see ~L566),
179
+ // which is the correct behaviour there: resumed sessions that were already
180
+ // past this phase shouldn't re-fire.
181
+ this._lastReady = Object.fromEntries(
182
+ Object.keys(this.pipelines).map((p) => [p, false]),
183
+ );
183
184
  }
184
185
 
185
186
  /**
@@ -327,6 +328,47 @@ export class AgentEngine {
327
328
  };
328
329
  }
329
330
 
331
+ /**
332
+ * Run the windowing check immediately after a tool result appends to
333
+ * history. Called from runTurn() so that a large tool result can't sit in
334
+ * history past the threshold until the next LLM-loop iteration, where a
335
+ * stream-abort could then trap the context in a bloated state.
336
+ *
337
+ * Safe to call frequently — contextWindow.window() fast-paths when under
338
+ * the trigger fraction.
339
+ */
340
+ _maybeWindowAfterToolResult() {
341
+ if (!this.contextWindow) return;
342
+ const windowed = this.contextWindow.window(this.history.messages, this._phaseSummaries);
343
+ if (windowed.wasWindowed) {
344
+ this.history.messages = windowed.messages;
345
+ this.eventLog.append("context_windowed", {
346
+ removed: windowed.removedCount,
347
+ trigger: "post_tool_result",
348
+ });
349
+ }
350
+
351
+ // Heap-pressure diagnostic. The TUI has its own virtualization + tool-
352
+ // output truncation (Bug 3 fixes), so Ink itself should never OOM. If we
353
+ // still see high heap usage, something else is leaking — log it once per
354
+ // pressure-crossing so operators can investigate without flooding logs.
355
+ try {
356
+ const mem = process.memoryUsage();
357
+ const frac = mem.heapUsed / (mem.heapTotal || 1);
358
+ if (frac > 0.80 && !this._memPressureLogged) {
359
+ this._memPressureLogged = true;
360
+ this.eventLog.append("memory_pressure", {
361
+ heapUsedMB: Math.round(mem.heapUsed / 1024 / 1024),
362
+ heapTotalMB: Math.round(mem.heapTotal / 1024 / 1024),
363
+ rssMB: Math.round(mem.rss / 1024 / 1024),
364
+ historyLength: this.history.messages.length,
365
+ });
366
+ } else if (frac < 0.60 && this._memPressureLogged) {
367
+ this._memPressureLogged = false; // re-arm for next crossing
368
+ }
369
+ } catch { /* process.memoryUsage failures are non-fatal */ }
370
+ }
371
+
330
372
  /**
331
373
  * Pre-flight hard ceiling (Bug 1). After windowing, if the message
332
374
  * array's total token count still exceeds the model's input budget,
@@ -785,6 +827,13 @@ export class AgentEngine {
785
827
  content: historyContent,
786
828
  });
787
829
 
830
+ // Post-tool-result safety net: check for context pressure RIGHT NOW
831
+ // rather than waiting for the next LLM-loop iteration. A large tool
832
+ // result that tips history over the threshold used to sit there
833
+ // until the next turn, and if the stream aborted in between the
834
+ // user saw "CTX: 210% / stream terminated" with no recovery.
835
+ this._maybeWindowAfterToolResult();
836
+
788
837
  // Pipeline controller: update state and re-register tools on phase change
789
838
  if (pipeline?.onToolResult) {
790
839
  const pEvent = pipeline.onToolResult(tc.name, inputData, result);
@@ -928,7 +977,7 @@ export class AgentEngine {
928
977
 
929
978
  try {
930
979
  const catalog = JSON.parse(fs.readFileSync(catalogPath, "utf-8"));
931
- const rules = Array.isArray(catalog) ? catalog : [];
980
+ const rules = normalizeRuleCatalog(catalog);
932
981
  if (rules.length > 0) {
933
982
  this.taskManager.createRuleTasks(rules, phase);
934
983
  }
@@ -0,0 +1,37 @@
1
+ /**
2
+ * Normalize a catalog.json payload into a flat array of rule records.
3
+ *
4
+ * KC the agent has historically produced catalog.json in at least four shapes,
5
+ * and earlier code paths assumed a flat array — silently dropping everything
6
+ * when the catalog was object-shaped. This helper unifies the handling so
7
+ * the engine, the rule_catalog tool, and the release tool all see the same
8
+ * list of rules regardless of how the file was written.
9
+ *
10
+ * Accepted shapes:
11
+ * 1. [rule, rule, ...] flat array (original)
12
+ * 2. { rules: [...] } wrapper object
13
+ * 3. { categories: { A: [...], B: [...] }, ... } grouped by category
14
+ * 4. { categories: { A: { rules: [...] }, ... } } nested category objects
15
+ *
16
+ * Anything else (null, wrong shape, throws) returns [].
17
+ */
18
+ export function normalizeRuleCatalog(catalog) {
19
+ if (Array.isArray(catalog)) return catalog;
20
+ if (!catalog || typeof catalog !== "object") return [];
21
+
22
+ if (Array.isArray(catalog.rules)) return catalog.rules;
23
+
24
+ if (catalog.categories && typeof catalog.categories === "object") {
25
+ const out = [];
26
+ for (const group of Object.values(catalog.categories)) {
27
+ if (Array.isArray(group)) {
28
+ out.push(...group);
29
+ } else if (group && Array.isArray(group.rules)) {
30
+ out.push(...group.rules);
31
+ }
32
+ }
33
+ if (out.length > 0) return out;
34
+ }
35
+
36
+ return [];
37
+ }
@@ -3,6 +3,7 @@ import path from "node:path";
3
3
  import { fileURLToPath } from "node:url";
4
4
  import { BaseTool, ToolResult } from "./base.js";
5
5
  import { SnapshotTool } from "./snapshot.js";
6
+ import { normalizeRuleCatalog } from "../rule-catalog-normalize.js";
6
7
 
7
8
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
8
9
  const TEMPLATE_DIR = path.resolve(__dirname, "../../../template/release-runtime");
@@ -100,7 +101,7 @@ export class ReleaseTool extends BaseTool {
100
101
  let catalog;
101
102
  try { catalog = JSON.parse(fs.readFileSync(catalogPath, "utf-8")); }
102
103
  catch (e) { return new ToolResult(`catalog.json invalid: ${e.message}`, true); }
103
- if (!Array.isArray(catalog)) catalog = catalog.rules || [];
104
+ catalog = normalizeRuleCatalog(catalog);
104
105
 
105
106
  const includeSet = Array.isArray(input.include) && input.include.length > 0
106
107
  ? new Set(input.include) : null;
@@ -1,10 +1,52 @@
1
1
  import fs from "node:fs";
2
2
  import path from "node:path";
3
3
  import { BaseTool, ToolResult } from "./base.js";
4
+ import { normalizeRuleCatalog } from "../rule-catalog-normalize.js";
4
5
 
5
6
  const REQUIRED_FIELDS = new Set(["id", "source_ref", "description"]);
6
7
  const RECOMMENDED_FIELDS = new Set(["falsifiability_statement", "test_case_stub", "applicable_sections"]);
7
8
 
9
+ // Field-name aliases — LLMs frequently produce `source` or 来源 instead of
10
+ // `source_ref`, `desc` instead of `description`. Rather than making 38+ failed
11
+ // calls before the model figures out the canonical names (as observed in the
12
+ // v0.5.3 E2E test), accept the common aliases and canonicalize on ingest.
13
+ const FIELD_ALIASES = {
14
+ source: "source_ref",
15
+ reference: "source_ref",
16
+ ref: "source_ref",
17
+ "来源": "source_ref",
18
+ desc: "description",
19
+ "描述": "description",
20
+ rule_id: "id",
21
+ ruleId: "id",
22
+ };
23
+
24
+ function normalizeRuleData(data) {
25
+ if (!data || typeof data !== "object") return data;
26
+ const out = { ...data };
27
+ for (const [alias, canonical] of Object.entries(FIELD_ALIASES)) {
28
+ if (out[alias] !== undefined && out[canonical] === undefined) {
29
+ out[canonical] = out[alias];
30
+ }
31
+ }
32
+ return out;
33
+ }
34
+
35
+ function missingFieldError(missing, data) {
36
+ // Concrete, actionable error. The generic "Missing required fields: id,
37
+ // source_ref, description" confused agents (they couldn't tell which field
38
+ // they'd actually failed to provide). Point at the first missing field, name
39
+ // what was supplied, and mention the aliases so the model can self-correct.
40
+ const provided = Object.keys(data || {}).slice(0, 8).join(", ") || "(none)";
41
+ const first = missing[0];
42
+ const rest = missing.length > 1 ? ` (also missing: ${missing.slice(1).join(", ")})` : "";
43
+ return (
44
+ `Missing field '${first}' in data.${rest} ` +
45
+ `Provided keys: {${provided}}. ` +
46
+ `Accepted aliases: source/来源/reference → source_ref, desc/描述 → description, rule_id → id.`
47
+ );
48
+ }
49
+
8
50
  /**
9
51
  * CRUD on the rule registry with schema enforcement.
10
52
  * Enforces required fields (id, source_ref, description) on create/update.
@@ -48,14 +90,22 @@ export class RuleCatalogTool extends BaseTool {
48
90
  if (op === "create") return this._create(data);
49
91
  if (op === "update") return this._update(ruleId || data.id || "", data);
50
92
  if (op === "delete") return this._delete(ruleId || data.id || "");
51
- return new ToolResult(`Unknown operation: ${op}`, true);
93
+ // More helpful than "Unknown operation: " — tells the agent exactly what's
94
+ // allowed and what shape to call with next time (observed in v0.5.3 E2E
95
+ // where GLM-5.1 sent input: {} 38+ times without learning).
96
+ return new ToolResult(
97
+ `rule_catalog requires {operation}. Got: ${op ? `'${op}'` : "(empty)"}. ` +
98
+ `Valid operations: list, read, create, update, delete. ` +
99
+ `Examples: {"operation":"list"} · {"operation":"create","data":{"id":"R-01","source_ref":"民法典 710","description":"..."}}`,
100
+ true,
101
+ );
52
102
  }
53
103
 
54
104
  _load() {
55
105
  if (!fs.existsSync(this._catalogPath)) return [];
56
106
  try {
57
107
  const data = JSON.parse(fs.readFileSync(this._catalogPath, "utf-8"));
58
- return Array.isArray(data) ? data : [];
108
+ return normalizeRuleCatalog(data);
59
109
  } catch { return []; }
60
110
  }
61
111
 
@@ -79,8 +129,9 @@ export class RuleCatalogTool extends BaseTool {
79
129
  }
80
130
 
81
131
  _create(data) {
82
- const missing = [...REQUIRED_FIELDS].filter((f) => !(f in data));
83
- if (missing.length > 0) return new ToolResult(`Missing required fields: ${missing.join(", ")}`, true);
132
+ data = normalizeRuleData(data);
133
+ const missing = [...REQUIRED_FIELDS].filter((f) => !data[f]);
134
+ if (missing.length > 0) return new ToolResult(missingFieldError(missing, data), true);
84
135
  const rules = this._load();
85
136
  if (rules.some((r) => r.id === data.id)) return new ToolResult(`Rule already exists: ${data.id}. Use update.`, true);
86
137
  const warnings = [...RECOMMENDED_FIELDS].filter((f) => !(f in data));
@@ -93,6 +144,7 @@ export class RuleCatalogTool extends BaseTool {
93
144
 
94
145
  _update(ruleId, data) {
95
146
  if (!ruleId) return new ToolResult("rule_id required for update", true);
147
+ data = normalizeRuleData(data);
96
148
  const rules = this._load();
97
149
  const idx = rules.findIndex((r) => r.id === ruleId);
98
150
  if (idx < 0) return new ToolResult(`Rule not found: ${ruleId}`, true);
@@ -42,12 +42,20 @@ export function StatusBar({ sessionId, phase, contextTokens, contextLimit }) {
42
42
  ? `${(contextLimit / 1000).toFixed(0)}k`
43
43
  : `${contextLimit || 0}`;
44
44
 
45
+ // Soft-threshold hint — shows up before auto-windowing kicks in at ~70%
46
+ // so users know they can run /compact to reduce context more aggressively
47
+ // than windowing does. Red hint at 80%+ means it's time to compact NOW.
48
+ const compactHint = pct >= 80 ? " · 💾 /compact"
49
+ : pct >= 60 ? " · 💾 建议 /compact"
50
+ : "";
51
+
45
52
  return h(Box, { marginTop: 0 },
46
53
  h(Text, { dimColor: true }, " ⏵⏵ KC Agent CLI "),
47
54
  h(Text, { dimColor: true }, sessionId ? `[${sessionId}]` : ""),
48
55
  phase ? h(Text, { color: "cyan" }, ` ${phase.toUpperCase()}`) : null,
49
56
  h(Text, { color: "green" }, " ● "),
50
57
  h(Text, { color: ctxColor }, `CTX: ${ctxLabel}/${limitLabel} (${pct}%)`),
58
+ compactHint ? h(Text, { color: ctxColor }, compactHint) : null,
51
59
  h(Text, { dimColor: true }, ` · ${LENAT_QUOTE}`),
52
60
  );
53
61
  }
@@ -112,29 +120,74 @@ export function WelcomeBanner({ projectDir, pendingInputCount = 0 } = {}) {
112
120
 
113
121
  // --- Tool block ---
114
122
 
115
- export function ToolBlock({ name, input, output, isError, isRunning }) {
123
+ /**
124
+ * Tool-result block.
125
+ *
126
+ * Rendering modes:
127
+ * - isRunning → yellow border, no output (spinner shown elsewhere).
128
+ * - isError → red border, ALWAYS show full output (errors are short + critical).
129
+ * - isRecent: true → green border, show up to ~4 lines + "N lines hidden" footer.
130
+ * - isRecent: false → header only (header includes line count + byte count).
131
+ *
132
+ * The full output is always on disk in logs/events.jsonl. Keeping the Ink
133
+ * tree slim is what lets KC handle long sessions without OOM / typing lag.
134
+ */
135
+ const RECENT_PREVIEW_LINES = 4;
136
+
137
+ export function ToolBlock({ name, input, output, isError, isRunning, isRecent = true }) {
116
138
  const borderColor = isRunning ? "yellow" : isError ? "red" : "green";
139
+ const outStr = typeof output === "string" ? output : "";
140
+ const lines = outStr ? outStr.split("\n") : [];
141
+ const bytes = outStr.length;
142
+
143
+ const header = h(Box, null,
144
+ h(Text, { color: borderColor }, "┃ "),
145
+ h(Text, { dimColor: true }, name),
146
+ input ? h(Text, { dimColor: true }, ` ${JSON.stringify(input).slice(0, 120)}`) : null,
147
+ outStr && !isRunning
148
+ ? h(Text, { dimColor: true }, ` (${lines.length} 行 / ${bytes} 字节)`)
149
+ : null,
150
+ );
117
151
 
152
+ // Errors: always show in full (short + critical).
153
+ if (isError && outStr) {
154
+ return h(Box, { flexDirection: "column", marginLeft: 2 },
155
+ header,
156
+ h(Box, { flexDirection: "column" },
157
+ ...lines.map((line, i) =>
158
+ h(Box, { key: i },
159
+ h(Text, { color: "red" }, "┃ "),
160
+ h(Text, { color: "red" }, line),
161
+ ),
162
+ ),
163
+ ),
164
+ );
165
+ }
166
+
167
+ // Off-screen / not-recent: header only. Full output remains on disk.
168
+ if (!isRecent || !outStr) {
169
+ return h(Box, { marginLeft: 2 }, header);
170
+ }
171
+
172
+ // Recent + successful: show preview + truncation footer.
173
+ const previewLines = lines.slice(0, RECENT_PREVIEW_LINES);
174
+ const remaining = lines.length - previewLines.length;
118
175
  return h(Box, { flexDirection: "column", marginLeft: 2 },
119
- h(Box, null,
120
- h(Text, { color: borderColor }, "┃ "),
121
- h(Text, { dimColor: true }, name),
122
- input ? h(Text, { dimColor: true }, ` ${JSON.stringify(input)}`) : null,
123
- ),
124
- output ? h(Box, { flexDirection: "column" },
125
- ...output.split("\n").slice(0, 20).map((line, i) =>
176
+ header,
177
+ h(Box, { flexDirection: "column" },
178
+ ...previewLines.map((line, i) =>
126
179
  h(Box, { key: i },
127
180
  h(Text, { color: borderColor }, "┃ "),
128
181
  h(Text, null, line),
129
182
  ),
130
183
  ),
131
- output.split("\n").length > 20
184
+ remaining > 0
132
185
  ? h(Box, null,
133
186
  h(Text, { color: borderColor }, "┃ "),
134
- h(Text, { dimColor: true }, `... ${output.split("\n").length - 20} more lines`),
187
+ h(Text, { dimColor: true }, `… ${remaining} 行已省略(在 logs/events.jsonl 中完整保留)`),
135
188
  )
136
189
  : null,
137
- ) : null,
190
+ ),
138
191
  );
139
192
  }
140
193
 
package/src/cli/index.js CHANGED
@@ -2,7 +2,7 @@ import React, { useState, useEffect, useCallback, useRef } from "react";
2
2
  import { render, Box, Text, useApp, useInput } from "ink";
3
3
  import { loadSettings } from "../config.js";
4
4
  import { LLMClient } from "../agent/llm-client.js";
5
- import { AgentEngine } from "../agent/engine.js";
5
+ import { AgentEngine, NEXT_PHASE } from "../agent/engine.js";
6
6
  import { Workspace } from "../agent/workspace.js";
7
7
  import { ConversationHistory } from "../agent/history.js";
8
8
  import { Scheduler } from "../agent/scheduler.js";
@@ -18,6 +18,18 @@ import {
18
18
 
19
19
  const h = React.createElement;
20
20
 
21
+ // Only the last N messages stay in the Ink render tree. Older messages
22
+ // remain in React state (so /compact can summarize them) but aren't
23
+ // diffed on every keystroke — this is what keeps long sessions responsive
24
+ // and prevents the 4 GB heap OOM observed in the v0.5.3 E2E test.
25
+ // Full conversation is persisted to logs/events.jsonl on every event,
26
+ // so dropping from render is purely visual.
27
+ const VISIBLE_WINDOW = 50;
28
+
29
+ // How many recent messages render their ToolBlock with full preview.
30
+ // Older ToolBlocks show header only. Both still persist full output to disk.
31
+ const RECENT_TOOL_WINDOW = 10;
32
+
21
33
  /**
22
34
  * Main KC Agent CLI App using Ink (React for terminals).
23
35
  */
@@ -159,6 +171,7 @@ function App({ engine, config }) {
159
171
  " /help Show this help\n" +
160
172
  " /status Show session info, model, phase, workspace\n" +
161
173
  " /tasks Show task progress\n" +
174
+ " /phase [sub] advance | status | <name> — manual phase override\n" +
162
175
  " /schedule Show scheduled ingestion jobs and recent log lines\n" +
163
176
  " /clear Clear conversation history (keep workspace)\n" +
164
177
  " /compact Summarize older messages to reduce context\n" +
@@ -195,6 +208,55 @@ function App({ engine, config }) {
195
208
  });
196
209
  return true;
197
210
 
211
+ case "/phase": {
212
+ // User-driven phase override. Useful when auto-advance fails to fire
213
+ // or when debugging. Subcommands:
214
+ // /phase → current phase (alias: /phase status)
215
+ // /phase advance | next → move to NEXT_PHASE[current]
216
+ // /phase <name> → force-jump to any phase (forward or back)
217
+ const engine = engineRef.current;
218
+ const sub = (parts[1] || "").toLowerCase();
219
+
220
+ if (!sub || sub === "status") {
221
+ const next = NEXT_PHASE[engine.currentPhase];
222
+ addMessage({
223
+ role: "system",
224
+ content:
225
+ `Current phase: ${engine.currentPhase.toUpperCase()}` +
226
+ (next ? ` (next auto: ${next})` : " (final phase)"),
227
+ });
228
+ return true;
229
+ }
230
+
231
+ if (sub === "advance" || sub === "next") {
232
+ const next = NEXT_PHASE[engine.currentPhase];
233
+ if (!next) {
234
+ addMessage({ role: "system", content: `Already in final phase (${engine.currentPhase}).` });
235
+ return true;
236
+ }
237
+ const ok = engine._advancePhase(next, "manual /phase advance");
238
+ addMessage({
239
+ role: "system",
240
+ content: ok
241
+ ? `→ phase advanced to ${next.toUpperCase()}.`
242
+ : `Failed to advance from ${engine.currentPhase}.`,
243
+ });
244
+ updateContextStats();
245
+ return true;
246
+ }
247
+
248
+ // /phase <name> — force-jump. Uses {force:true} to allow backward jumps.
249
+ const ok = engine._advancePhase(sub, "manual /phase <name>", { force: true });
250
+ addMessage({
251
+ role: "system",
252
+ content: ok
253
+ ? `→ phase set to ${sub.toUpperCase()}.`
254
+ : `Unknown phase: ${sub}. Valid: bootstrap, extraction, skill_authoring, skill_testing, distillation, production_qc`,
255
+ });
256
+ updateContextStats();
257
+ return true;
258
+ }
259
+
198
260
  case "/schedule": {
199
261
  const sched = new Scheduler(engineRef.current.workspace);
200
262
  const jobs = sched.list();
@@ -228,15 +290,22 @@ function App({ engine, config }) {
228
290
 
229
291
  case "/compact": {
230
292
  addMessage({ role: "system", content: "Compacting conversation history..." });
231
- // Run compact asynchronously
232
293
  (async () => {
233
294
  try {
234
295
  const result = await engineRef.current.compact();
235
296
  if (result) {
236
- addMessage({
297
+ // Claude Code pattern: after successful compact, clear the
298
+ // visible TUI messages and start fresh with a single summary
299
+ // line. The underlying engine.history already contains the
300
+ // compact-summary message pair; the TUI doesn't need to keep
301
+ // showing the pre-compact history (it's on disk in
302
+ // logs/events.jsonl anyway) and clearing it immediately frees
303
+ // Ink render-tree memory — fixing the lag that builds up over
304
+ // long sessions.
305
+ setMessages([{
237
306
  role: "system",
238
- content: `Compacted: removed ${result.removedCount} messages, kept ${result.retainedCount}. Summary: ~${result.summaryTokens} tokens.`,
239
- });
307
+ content: `✓ 上下文已压缩:合并了 ${result.removedCount} 条早期消息(摘要约 ${result.summaryTokens} tokens,保留最近 ${result.retainedCount} 条)`,
308
+ }]);
240
309
  } else {
241
310
  addMessage({ role: "system", content: "Nothing to compact (conversation is short enough)." });
242
311
  }
@@ -392,31 +461,41 @@ function App({ engine, config }) {
392
461
  // Task dashboard (ralph-loop)
393
462
  taskList.length > 0 ? h(TaskDashboard, { tasks: taskList, progress: taskProgress }) : null,
394
463
 
395
- // Message history
396
- ...messages.map((msg, i) => {
464
+ // Message history (virtualized — only last VISIBLE_WINDOW render).
465
+ // Hidden-count hint for earlier messages, so users know the full
466
+ // history still exists (on disk) even though the TUI is slim.
467
+ messages.length > VISIBLE_WINDOW ? h(Box, { key: "hidden-hint" },
468
+ h(Text, { dimColor: true },
469
+ `— 前 ${messages.length - VISIBLE_WINDOW} 条消息已折叠,完整记录在 logs/events.jsonl —`),
470
+ ) : null,
471
+ ...messages.slice(-VISIBLE_WINDOW).map((msg, i, arr) => {
472
+ // Global index (for stable React keys) vs visible index (for isRecent).
473
+ const globalIdx = messages.length - arr.length + i;
474
+ const visibleIdx = arr.length - 1 - i; // 0 = most recent
397
475
  if (msg.role === "user") {
398
- return h(Box, { key: `msg-${i}` },
476
+ return h(Box, { key: `msg-${globalIdx}` },
399
477
  h(Text, { dimColor: true }, "❯ "),
400
478
  h(Text, null, msg.content),
401
479
  );
402
480
  }
403
481
  if (msg.role === "agent") {
404
- return h(Box, { key: `msg-${i}` },
482
+ return h(Box, { key: `msg-${globalIdx}` },
405
483
  h(Text, null, msg.content),
406
484
  );
407
485
  }
408
486
  if (msg.role === "tool") {
409
487
  return h(ToolBlock, {
410
- key: `msg-${i}`,
488
+ key: `msg-${globalIdx}`,
411
489
  name: msg.toolName,
412
490
  input: msg.toolInput,
413
491
  output: msg.toolOutput,
414
492
  isError: msg.toolIsError,
415
493
  isRunning: false,
494
+ isRecent: visibleIdx < RECENT_TOOL_WINDOW,
416
495
  });
417
496
  }
418
497
  if (msg.role === "system") {
419
- return h(Box, { key: `msg-${i}` },
498
+ return h(Box, { key: `msg-${globalIdx}` },
420
499
  h(Text, { dimColor: true }, msg.content),
421
500
  );
422
501
  }
@@ -2,17 +2,17 @@
2
2
  "_comment": "Model selections per provider. LLM tiers 1-4, VLM tiers 1-3. Edit this file directly to update model assignments.",
3
3
 
4
4
  "siliconflow": {
5
- "conductor": "Pro/zai-org/GLM-5",
5
+ "conductor": "Pro/zai-org/GLM-5.1",
6
6
  "llm": {
7
- "tier1": "Pro/zai-org/GLM-5, Pro/moonshotai/Kimi-K2.5",
7
+ "tier1": "Pro/zai-org/GLM-5.1, Pro/moonshotai/Kimi-K2.5",
8
8
  "tier2": "Pro/deepseek-ai/DeepSeek-V3.2, Pro/MiniMaxAI/MiniMax-M2.5",
9
9
  "tier3": "Qwen/Qwen3.5-122B-A10B",
10
10
  "tier4": "Qwen/Qwen3.5-35B-A3B"
11
11
  },
12
12
  "vlm": {
13
- "tier1": "Pro/Qwen/Qwen2.5-VL-72B-Instruct",
14
- "tier2": "Qwen/Qwen2.5-VL-32B-Instruct",
15
- "tier3": "Qwen/Qwen2.5-VL-7B-Instruct"
13
+ "tier1": "Qwen/Qwen3-VL-235B-A22B-Instruct",
14
+ "tier2": "Qwen/Qwen3-VL-30B-A3B-Instruct",
15
+ "tier3": "Qwen/Qwen3-VL-8B-Instruct"
16
16
  }
17
17
  },
18
18