kc-beta 0.5.3 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "kc-beta",
3
- "version": "0.5.3",
3
+ "version": "0.5.5",
4
4
  "description": "KC Agent — LLM document verification agent (pure Node.js CLI)",
5
5
  "type": "module",
6
6
  "bin": {
@@ -12,10 +12,15 @@ export class ContextWindow {
12
12
  * @param {number} [opts.reserveForResponse=8192] - Tokens reserved for model output
13
13
  * @param {number} [opts.recentWindowSize=30] - Number of recent messages to always keep
14
14
  */
15
- constructor({ contextLimit, reserveForResponse = 8192, recentWindowSize = 30 }) {
15
+ constructor({ contextLimit, reserveForResponse = 8192, recentWindowSize = 30, triggerFraction = 0.70 }) {
16
16
  this.contextLimit = contextLimit;
17
17
  this.reserveForResponse = reserveForResponse;
18
18
  this.recentWindowSize = recentWindowSize;
19
+ // Fraction of budget that triggers windowing. v0.5.3 used 0.85 which only
20
+ // fired after runtime was already deep in the danger zone (a subsequent
21
+ // tool result could tip it over before the next check). 0.70 leaves room
22
+ // for one more tool result before hitting the hard ceiling.
23
+ this.triggerFraction = triggerFraction;
19
24
  }
20
25
 
21
26
  /**
@@ -29,12 +34,22 @@ export class ContextWindow {
29
34
  const budget = this.contextLimit - this.reserveForResponse;
30
35
 
31
36
  // If within budget, return as-is
32
- if (totalTokens <= budget * 0.85) {
37
+ if (totalTokens <= budget * this.triggerFraction) {
33
38
  return { messages, wasWindowed: false, removedCount: 0 };
34
39
  }
35
40
 
36
- // Split into older and recent
37
- const splitPoint = Math.max(0, messages.length - this.recentWindowSize);
41
+ // Split into older and recent. The recent slice is fed directly to the
42
+ // LLM, so it must not begin with an orphan "tool" message — those carry a
43
+ // tool_call_id that references an assistant `tool_calls` entry, and if
44
+ // that assistant message ended up in the compressed older slice the
45
+ // provider rejects the request (OpenAI: "tool messages must follow an
46
+ // assistant with tool_calls"; Anthropic: unpaired tool_use/tool_result).
47
+ // Walk the split point forward past any leading tool rows so the recent
48
+ // window always starts on a turn boundary.
49
+ let splitPoint = Math.max(0, messages.length - this.recentWindowSize);
50
+ while (splitPoint < messages.length && messages[splitPoint]?.role === "tool") {
51
+ splitPoint++;
52
+ }
38
53
  const recentMessages = messages.slice(splitPoint);
39
54
  const olderMessages = messages.slice(0, splitPoint);
40
55
 
@@ -4,6 +4,7 @@ import { AgentEvent } from "./events.js";
4
4
  import { ContextAssembler } from "./context.js";
5
5
  import { ConversationHistory } from "./history.js";
6
6
  import { Workspace } from "./workspace.js";
7
+ import { normalizeRuleCatalog } from "./rule-catalog-normalize.js";
7
8
  import { VersionManager } from "./version-manager.js";
8
9
  import { CornerCaseRegistry } from "./corner-case-registry.js";
9
10
  import { ConfidenceScorer } from "./confidence-scorer.js";
@@ -51,7 +52,9 @@ const DEFAULT_KC_MAX_TOKENS = 65536;
51
52
  const DISTILL_PHASES = new Set([Phase.DISTILLATION, Phase.PRODUCTION_QC]);
52
53
 
53
54
  // Linear phase order — used by auto-advance (Bug 4). Last phase has no successor.
54
- const NEXT_PHASE = {
55
+ // Exported so the TUI's /phase slash command (src/cli/index.js) can call
56
+ // _advancePhase with the right successor without re-declaring the map.
57
+ export const NEXT_PHASE = {
55
58
  [Phase.BOOTSTRAP]: Phase.EXTRACTION,
56
59
  [Phase.EXTRACTION]: Phase.SKILL_AUTHORING,
57
60
  [Phase.SKILL_AUTHORING]: Phase.SKILL_TESTING,
@@ -168,18 +171,16 @@ export class AgentEngine {
168
171
  this.toolRegistry = new ToolRegistry();
169
172
  this._registerToolsForPhase(this.currentPhase);
170
173
 
171
- // Edge-trigger state for _maybeAutoAdvance (Bug 5). Primed at construction
172
- // (and at resume) so a session that's already exit-criteria-met when it
173
- // boots doesn't auto-advance on the first user turn only on a fresh
174
- // false→true flip.
175
- this._lastReady = {};
176
- for (const phase of Object.keys(this.pipelines)) {
177
- try {
178
- this._lastReady[phase] = !!this.pipelines[phase].exitCriteriaMet?.();
179
- } catch {
180
- this._lastReady[phase] = false;
181
- }
182
- }
174
+ // Edge-trigger state for _maybeAutoAdvance. Initialize to false for every
175
+ // phase so the first real false→true flip inside onToolResult triggers an
176
+ // advance even when the user launches from a pre-populated workspace
177
+ // whose exit criteria already happen to be met at boot.
178
+ // resume() re-primes this from the restored pipeline state (see ~L566),
179
+ // which is the correct behaviour there: resumed sessions that were already
180
+ // past this phase shouldn't re-fire.
181
+ this._lastReady = Object.fromEntries(
182
+ Object.keys(this.pipelines).map((p) => [p, false]),
183
+ );
183
184
  }
184
185
 
185
186
  /**
@@ -327,6 +328,50 @@ export class AgentEngine {
327
328
  };
328
329
  }
329
330
 
331
+ /**
332
+ * Run the windowing check immediately after a tool result appends to
333
+ * history. Called from runTurn() so that a large tool result can't sit in
334
+ * history past the threshold until the next LLM-loop iteration, where a
335
+ * stream-abort could then trap the context in a bloated state.
336
+ *
337
+ * Safe to call frequently — contextWindow.window() fast-paths when under
338
+ * the trigger fraction.
339
+ */
340
+ _maybeWindowAfterToolResult() {
341
+ if (!this.contextWindow) return;
342
+ const windowed = this.contextWindow.window(this.history.messages, this._phaseSummaries);
343
+ if (windowed.wasWindowed) {
344
+ // `messages` is a getter-only property on ConversationHistory; write the
345
+ // backing field and persist (same pattern as compact()).
346
+ this.history._messages = windowed.messages;
347
+ this.history._save();
348
+ this.eventLog.append("context_windowed", {
349
+ removed: windowed.removedCount,
350
+ trigger: "post_tool_result",
351
+ });
352
+ }
353
+
354
+ // Heap-pressure diagnostic. The TUI has its own virtualization + tool-
355
+ // output truncation (Bug 3 fixes), so Ink itself should never OOM. If we
356
+ // still see high heap usage, something else is leaking — log it once per
357
+ // pressure-crossing so operators can investigate without flooding logs.
358
+ try {
359
+ const mem = process.memoryUsage();
360
+ const frac = mem.heapUsed / (mem.heapTotal || 1);
361
+ if (frac > 0.80 && !this._memPressureLogged) {
362
+ this._memPressureLogged = true;
363
+ this.eventLog.append("memory_pressure", {
364
+ heapUsedMB: Math.round(mem.heapUsed / 1024 / 1024),
365
+ heapTotalMB: Math.round(mem.heapTotal / 1024 / 1024),
366
+ rssMB: Math.round(mem.rss / 1024 / 1024),
367
+ historyLength: this.history.messages.length,
368
+ });
369
+ } else if (frac < 0.60 && this._memPressureLogged) {
370
+ this._memPressureLogged = false; // re-arm for next crossing
371
+ }
372
+ } catch { /* process.memoryUsage failures are non-fatal */ }
373
+ }
374
+
330
375
  /**
331
376
  * Pre-flight hard ceiling (Bug 1). After windowing, if the message
332
377
  * array's total token count still exceeds the model's input budget,
@@ -785,6 +830,13 @@ export class AgentEngine {
785
830
  content: historyContent,
786
831
  });
787
832
 
833
+ // Post-tool-result safety net: check for context pressure RIGHT NOW
834
+ // rather than waiting for the next LLM-loop iteration. A large tool
835
+ // result that tips history over the threshold used to sit there
836
+ // until the next turn, and if the stream aborted in between the
837
+ // user saw "CTX: 210% / stream terminated" with no recovery.
838
+ this._maybeWindowAfterToolResult();
839
+
788
840
  // Pipeline controller: update state and re-register tools on phase change
789
841
  if (pipeline?.onToolResult) {
790
842
  const pEvent = pipeline.onToolResult(tc.name, inputData, result);
@@ -928,7 +980,7 @@ export class AgentEngine {
928
980
 
929
981
  try {
930
982
  const catalog = JSON.parse(fs.readFileSync(catalogPath, "utf-8"));
931
- const rules = Array.isArray(catalog) ? catalog : [];
983
+ const rules = normalizeRuleCatalog(catalog);
932
984
  if (rules.length > 0) {
933
985
  this.taskManager.createRuleTasks(rules, phase);
934
986
  }
@@ -0,0 +1,37 @@
1
+ /**
2
+ * Normalize a catalog.json payload into a flat array of rule records.
3
+ *
4
+ * KC the agent has historically produced catalog.json in at least four shapes,
5
+ * and earlier code paths assumed a flat array — silently dropping everything
6
+ * when the catalog was object-shaped. This helper unifies the handling so
7
+ * the engine, the rule_catalog tool, and the release tool all see the same
8
+ * list of rules regardless of how the file was written.
9
+ *
10
+ * Accepted shapes:
11
+ * 1. [rule, rule, ...] flat array (original)
12
+ * 2. { rules: [...] } wrapper object
13
+ * 3. { categories: { A: [...], B: [...] }, ... } grouped by category
14
+ * 4. { categories: { A: { rules: [...] }, ... } } nested category objects
15
+ *
16
+ * Anything else (null, wrong shape, throws) returns [].
17
+ */
18
+ export function normalizeRuleCatalog(catalog) {
19
+ if (Array.isArray(catalog)) return catalog;
20
+ if (!catalog || typeof catalog !== "object") return [];
21
+
22
+ if (Array.isArray(catalog.rules)) return catalog.rules;
23
+
24
+ if (catalog.categories && typeof catalog.categories === "object") {
25
+ const out = [];
26
+ for (const group of Object.values(catalog.categories)) {
27
+ if (Array.isArray(group)) {
28
+ out.push(...group);
29
+ } else if (group && Array.isArray(group.rules)) {
30
+ out.push(...group.rules);
31
+ }
32
+ }
33
+ if (out.length > 0) return out;
34
+ }
35
+
36
+ return [];
37
+ }
@@ -3,6 +3,7 @@ import path from "node:path";
3
3
  import { fileURLToPath } from "node:url";
4
4
  import { BaseTool, ToolResult } from "./base.js";
5
5
  import { SnapshotTool } from "./snapshot.js";
6
+ import { normalizeRuleCatalog } from "../rule-catalog-normalize.js";
6
7
 
7
8
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
8
9
  const TEMPLATE_DIR = path.resolve(__dirname, "../../../template/release-runtime");
@@ -100,7 +101,7 @@ export class ReleaseTool extends BaseTool {
100
101
  let catalog;
101
102
  try { catalog = JSON.parse(fs.readFileSync(catalogPath, "utf-8")); }
102
103
  catch (e) { return new ToolResult(`catalog.json invalid: ${e.message}`, true); }
103
- if (!Array.isArray(catalog)) catalog = catalog.rules || [];
104
+ catalog = normalizeRuleCatalog(catalog);
104
105
 
105
106
  const includeSet = Array.isArray(input.include) && input.include.length > 0
106
107
  ? new Set(input.include) : null;
@@ -1,10 +1,52 @@
1
1
  import fs from "node:fs";
2
2
  import path from "node:path";
3
3
  import { BaseTool, ToolResult } from "./base.js";
4
+ import { normalizeRuleCatalog } from "../rule-catalog-normalize.js";
4
5
 
5
6
  const REQUIRED_FIELDS = new Set(["id", "source_ref", "description"]);
6
7
  const RECOMMENDED_FIELDS = new Set(["falsifiability_statement", "test_case_stub", "applicable_sections"]);
7
8
 
9
+ // Field-name aliases — LLMs frequently produce `source` or 来源 instead of
10
+ // `source_ref`, `desc` instead of `description`. Rather than making 38+ failed
11
+ // calls before the model figures out the canonical names (as observed in the
12
+ // v0.5.3 E2E test), accept the common aliases and canonicalize on ingest.
13
+ const FIELD_ALIASES = {
14
+ source: "source_ref",
15
+ reference: "source_ref",
16
+ ref: "source_ref",
17
+ "来源": "source_ref",
18
+ desc: "description",
19
+ "描述": "description",
20
+ rule_id: "id",
21
+ ruleId: "id",
22
+ };
23
+
24
+ function normalizeRuleData(data) {
25
+ if (!data || typeof data !== "object") return data;
26
+ const out = { ...data };
27
+ for (const [alias, canonical] of Object.entries(FIELD_ALIASES)) {
28
+ if (out[alias] !== undefined && out[canonical] === undefined) {
29
+ out[canonical] = out[alias];
30
+ }
31
+ }
32
+ return out;
33
+ }
34
+
35
+ function missingFieldError(missing, data) {
36
+ // Concrete, actionable error. The generic "Missing required fields: id,
37
+ // source_ref, description" confused agents (they couldn't tell which field
38
+ // they'd actually failed to provide). Point at the first missing field, name
39
+ // what was supplied, and mention the aliases so the model can self-correct.
40
+ const provided = Object.keys(data || {}).slice(0, 8).join(", ") || "(none)";
41
+ const first = missing[0];
42
+ const rest = missing.length > 1 ? ` (also missing: ${missing.slice(1).join(", ")})` : "";
43
+ return (
44
+ `Missing field '${first}' in data.${rest} ` +
45
+ `Provided keys: {${provided}}. ` +
46
+ `Accepted aliases: source/来源/reference → source_ref, desc/描述 → description, rule_id → id.`
47
+ );
48
+ }
49
+
8
50
  /**
9
51
  * CRUD on the rule registry with schema enforcement.
10
52
  * Enforces required fields (id, source_ref, description) on create/update.
@@ -48,14 +90,22 @@ export class RuleCatalogTool extends BaseTool {
48
90
  if (op === "create") return this._create(data);
49
91
  if (op === "update") return this._update(ruleId || data.id || "", data);
50
92
  if (op === "delete") return this._delete(ruleId || data.id || "");
51
- return new ToolResult(`Unknown operation: ${op}`, true);
93
+ // More helpful than "Unknown operation: " — tells the agent exactly what's
94
+ // allowed and what shape to call with next time (observed in v0.5.3 E2E
95
+ // where GLM-5.1 sent input: {} 38+ times without learning).
96
+ return new ToolResult(
97
+ `rule_catalog requires {operation}. Got: ${op ? `'${op}'` : "(empty)"}. ` +
98
+ `Valid operations: list, read, create, update, delete. ` +
99
+ `Examples: {"operation":"list"} · {"operation":"create","data":{"id":"R-01","source_ref":"民法典 710","description":"..."}}`,
100
+ true,
101
+ );
52
102
  }
53
103
 
54
104
  _load() {
55
105
  if (!fs.existsSync(this._catalogPath)) return [];
56
106
  try {
57
107
  const data = JSON.parse(fs.readFileSync(this._catalogPath, "utf-8"));
58
- return Array.isArray(data) ? data : [];
108
+ return normalizeRuleCatalog(data);
59
109
  } catch { return []; }
60
110
  }
61
111
 
@@ -79,8 +129,9 @@ export class RuleCatalogTool extends BaseTool {
79
129
  }
80
130
 
81
131
  _create(data) {
82
- const missing = [...REQUIRED_FIELDS].filter((f) => !(f in data));
83
- if (missing.length > 0) return new ToolResult(`Missing required fields: ${missing.join(", ")}`, true);
132
+ data = normalizeRuleData(data);
133
+ const missing = [...REQUIRED_FIELDS].filter((f) => !data[f]);
134
+ if (missing.length > 0) return new ToolResult(missingFieldError(missing, data), true);
84
135
  const rules = this._load();
85
136
  if (rules.some((r) => r.id === data.id)) return new ToolResult(`Rule already exists: ${data.id}. Use update.`, true);
86
137
  const warnings = [...RECOMMENDED_FIELDS].filter((f) => !(f in data));
@@ -93,6 +144,7 @@ export class RuleCatalogTool extends BaseTool {
93
144
 
94
145
  _update(ruleId, data) {
95
146
  if (!ruleId) return new ToolResult("rule_id required for update", true);
147
+ data = normalizeRuleData(data);
96
148
  const rules = this._load();
97
149
  const idx = rules.findIndex((r) => r.id === ruleId);
98
150
  if (idx < 0) return new ToolResult(`Rule not found: ${ruleId}`, true);
@@ -42,12 +42,20 @@ export function StatusBar({ sessionId, phase, contextTokens, contextLimit }) {
42
42
  ? `${(contextLimit / 1000).toFixed(0)}k`
43
43
  : `${contextLimit || 0}`;
44
44
 
45
+ // Soft-threshold hint — shows up before auto-windowing kicks in at ~70%
46
+ // so users know they can run /compact to reduce context more aggressively
47
+ // than windowing does. Red hint at 80%+ means it's time to compact NOW.
48
+ const compactHint = pct >= 80 ? " · 💾 /compact"
49
+ : pct >= 60 ? " · 💾 建议 /compact"
50
+ : "";
51
+
45
52
  return h(Box, { marginTop: 0 },
46
53
  h(Text, { dimColor: true }, " ⏵⏵ KC Agent CLI "),
47
54
  h(Text, { dimColor: true }, sessionId ? `[${sessionId}]` : ""),
48
55
  phase ? h(Text, { color: "cyan" }, ` ${phase.toUpperCase()}`) : null,
49
56
  h(Text, { color: "green" }, " ● "),
50
57
  h(Text, { color: ctxColor }, `CTX: ${ctxLabel}/${limitLabel} (${pct}%)`),
58
+ compactHint ? h(Text, { color: ctxColor }, compactHint) : null,
51
59
  h(Text, { dimColor: true }, ` · ${LENAT_QUOTE}`),
52
60
  );
53
61
  }
@@ -112,29 +120,74 @@ export function WelcomeBanner({ projectDir, pendingInputCount = 0 } = {}) {
112
120
 
113
121
  // --- Tool block ---
114
122
 
115
- export function ToolBlock({ name, input, output, isError, isRunning }) {
123
+ /**
124
+ * Tool-result block.
125
+ *
126
+ * Rendering modes:
127
+ * - isRunning → yellow border, no output (spinner shown elsewhere).
128
+ * - isError → red border, ALWAYS show full output (errors are short + critical).
129
+ * - isRecent: true → green border, show up to ~4 lines + "N lines hidden" footer.
130
+ * - isRecent: false → header only (header includes line count + byte count).
131
+ *
132
+ * The full output is always on disk in logs/events.jsonl. Keeping the Ink
133
+ * tree slim is what lets KC handle long sessions without OOM / typing lag.
134
+ */
135
+ const RECENT_PREVIEW_LINES = 4;
136
+
137
+ export function ToolBlock({ name, input, output, isError, isRunning, isRecent = true }) {
116
138
  const borderColor = isRunning ? "yellow" : isError ? "red" : "green";
139
+ const outStr = typeof output === "string" ? output : "";
140
+ const lines = outStr ? outStr.split("\n") : [];
141
+ const bytes = outStr.length;
142
+
143
+ const header = h(Box, null,
144
+ h(Text, { color: borderColor }, "┃ "),
145
+ h(Text, { dimColor: true }, name),
146
+ input ? h(Text, { dimColor: true }, ` ${JSON.stringify(input).slice(0, 120)}`) : null,
147
+ outStr && !isRunning
148
+ ? h(Text, { dimColor: true }, ` (${lines.length} 行 / ${bytes} 字节)`)
149
+ : null,
150
+ );
117
151
 
152
+ // Errors: always show in full (short + critical).
153
+ if (isError && outStr) {
154
+ return h(Box, { flexDirection: "column", marginLeft: 2 },
155
+ header,
156
+ h(Box, { flexDirection: "column" },
157
+ ...lines.map((line, i) =>
158
+ h(Box, { key: i },
159
+ h(Text, { color: "red" }, "┃ "),
160
+ h(Text, { color: "red" }, line),
161
+ ),
162
+ ),
163
+ ),
164
+ );
165
+ }
166
+
167
+ // Off-screen / not-recent: header only. Full output remains on disk.
168
+ if (!isRecent || !outStr) {
169
+ return h(Box, { marginLeft: 2 }, header);
170
+ }
171
+
172
+ // Recent + successful: show preview + truncation footer.
173
+ const previewLines = lines.slice(0, RECENT_PREVIEW_LINES);
174
+ const remaining = lines.length - previewLines.length;
118
175
  return h(Box, { flexDirection: "column", marginLeft: 2 },
119
- h(Box, null,
120
- h(Text, { color: borderColor }, "┃ "),
121
- h(Text, { dimColor: true }, name),
122
- input ? h(Text, { dimColor: true }, ` ${JSON.stringify(input)}`) : null,
123
- ),
124
- output ? h(Box, { flexDirection: "column" },
125
- ...output.split("\n").slice(0, 20).map((line, i) =>
176
+ header,
177
+ h(Box, { flexDirection: "column" },
178
+ ...previewLines.map((line, i) =>
126
179
  h(Box, { key: i },
127
180
  h(Text, { color: borderColor }, "┃ "),
128
181
  h(Text, null, line),
129
182
  ),
130
183
  ),
131
- output.split("\n").length > 20
184
+ remaining > 0
132
185
  ? h(Box, null,
133
186
  h(Text, { color: borderColor }, "┃ "),
134
- h(Text, { dimColor: true }, `... ${output.split("\n").length - 20} more lines`),
187
+ h(Text, { dimColor: true }, `… ${remaining} 行已省略(在 logs/events.jsonl 中完整保留)`),
135
188
  )
136
189
  : null,
137
- ) : null,
190
+ ),
138
191
  );
139
192
  }
140
193
 
package/src/cli/index.js CHANGED
@@ -2,7 +2,7 @@ import React, { useState, useEffect, useCallback, useRef } from "react";
2
2
  import { render, Box, Text, useApp, useInput } from "ink";
3
3
  import { loadSettings } from "../config.js";
4
4
  import { LLMClient } from "../agent/llm-client.js";
5
- import { AgentEngine } from "../agent/engine.js";
5
+ import { AgentEngine, NEXT_PHASE } from "../agent/engine.js";
6
6
  import { Workspace } from "../agent/workspace.js";
7
7
  import { ConversationHistory } from "../agent/history.js";
8
8
  import { Scheduler } from "../agent/scheduler.js";
@@ -18,6 +18,18 @@ import {
18
18
 
19
19
  const h = React.createElement;
20
20
 
21
+ // Only the last N messages stay in the Ink render tree. Older messages
22
+ // remain in React state (so /compact can summarize them) but aren't
23
+ // diffed on every keystroke — this is what keeps long sessions responsive
24
+ // and prevents the 4 GB heap OOM observed in the v0.5.3 E2E test.
25
+ // Full conversation is persisted to logs/events.jsonl on every event,
26
+ // so dropping from render is purely visual.
27
+ const VISIBLE_WINDOW = 50;
28
+
29
+ // How many recent messages render their ToolBlock with full preview.
30
+ // Older ToolBlocks show header only. Both still persist full output to disk.
31
+ const RECENT_TOOL_WINDOW = 10;
32
+
21
33
  /**
22
34
  * Main KC Agent CLI App using Ink (React for terminals).
23
35
  */
@@ -159,6 +171,7 @@ function App({ engine, config }) {
159
171
  " /help Show this help\n" +
160
172
  " /status Show session info, model, phase, workspace\n" +
161
173
  " /tasks Show task progress\n" +
174
+ " /phase [sub] advance | status | <name> — manual phase override\n" +
162
175
  " /schedule Show scheduled ingestion jobs and recent log lines\n" +
163
176
  " /clear Clear conversation history (keep workspace)\n" +
164
177
  " /compact Summarize older messages to reduce context\n" +
@@ -195,6 +208,72 @@ function App({ engine, config }) {
195
208
  });
196
209
  return true;
197
210
 
211
+ case "/phase": {
212
+ // User-driven phase override. Useful when auto-advance fails to fire
213
+ // or when debugging. Subcommands:
214
+ // /phase → current phase (alias: /phase status)
215
+ // /phase advance | next → move to NEXT_PHASE[current]
216
+ // /phase <name> → force-jump to any phase (forward or back)
217
+ const engine = engineRef.current;
218
+ const sub = (parts[1] || "").toLowerCase();
219
+
220
+ if (!sub || sub === "status") {
221
+ const next = NEXT_PHASE[engine.currentPhase];
222
+ addMessage({
223
+ role: "system",
224
+ content:
225
+ `Current phase: ${engine.currentPhase.toUpperCase()}` +
226
+ (next ? ` (next auto: ${next})` : " (final phase)"),
227
+ });
228
+ return true;
229
+ }
230
+
231
+ if (sub === "advance" || sub === "next") {
232
+ const next = NEXT_PHASE[engine.currentPhase];
233
+ if (!next) {
234
+ addMessage({ role: "system", content: `Already in final phase (${engine.currentPhase}).` });
235
+ return true;
236
+ }
237
+ const ok = engine._advancePhase(next, "manual /phase advance");
238
+ if (ok) setPhase(engine.currentPhase);
239
+ addMessage({
240
+ role: "system",
241
+ content: ok
242
+ ? `→ phase advanced to ${next.toUpperCase()}.`
243
+ : `Failed to advance from ${engine.currentPhase}.`,
244
+ });
245
+ updateContextStats();
246
+ return true;
247
+ }
248
+
249
+ // /phase <name> — force-jump. Uses {force:true} to allow backward jumps.
250
+ // Whitelist against known phases first so an unknown name doesn't
251
+ // silently corrupt engine state (_advancePhase with {force:true}
252
+ // would otherwise accept any string and mutate currentPhase).
253
+ const validPhases = Object.keys(engine.pipelines);
254
+ if (!validPhases.includes(sub)) {
255
+ addMessage({
256
+ role: "system",
257
+ content: `Unknown phase: ${sub}. Valid: ${validPhases.join(", ")}`,
258
+ });
259
+ return true;
260
+ }
261
+ if (sub === engine.currentPhase) {
262
+ addMessage({ role: "system", content: `Already in phase ${sub.toUpperCase()}.` });
263
+ return true;
264
+ }
265
+ const ok = engine._advancePhase(sub, "manual /phase <name>", { force: true });
266
+ if (ok) setPhase(engine.currentPhase);
267
+ addMessage({
268
+ role: "system",
269
+ content: ok
270
+ ? `→ phase set to ${sub.toUpperCase()}.`
271
+ : `Failed to set phase to ${sub}.`,
272
+ });
273
+ updateContextStats();
274
+ return true;
275
+ }
276
+
198
277
  case "/schedule": {
199
278
  const sched = new Scheduler(engineRef.current.workspace);
200
279
  const jobs = sched.list();
@@ -228,21 +307,45 @@ function App({ engine, config }) {
228
307
 
229
308
  case "/compact": {
230
309
  addMessage({ role: "system", content: "Compacting conversation history..." });
231
- // Run compact asynchronously
310
+ // Gate the prompt while compact() is in flight. Without this,
311
+ // InputPrompt stays active (isActive: !streaming) and a concurrent
312
+ // user submission routes into runTurn → history.addUser(...), which
313
+ // appends to _messages AFTER compact()'s pre-await snapshot. When
314
+ // compact resolves it overwrites _messages with [summary, ack,
315
+ // ...recentMessages] and silently drops the concurrent turn.
316
+ streamingRef.current = true;
317
+ setStreaming(true);
318
+ setSpinnerStatus("Compacting...");
232
319
  (async () => {
233
320
  try {
234
321
  const result = await engineRef.current.compact();
235
322
  if (result) {
236
- addMessage({
323
+ // Claude Code pattern: after successful compact, clear the
324
+ // visible TUI messages and start fresh with a single summary
325
+ // line. The underlying engine.history already contains the
326
+ // compact-summary message pair; the TUI doesn't need to keep
327
+ // showing the pre-compact history (it's on disk in
328
+ // logs/events.jsonl anyway) and clearing it immediately frees
329
+ // Ink render-tree memory — fixing the lag that builds up over
330
+ // long sessions.
331
+ setMessages([{
237
332
  role: "system",
238
- content: `Compacted: removed ${result.removedCount} messages, kept ${result.retainedCount}. Summary: ~${result.summaryTokens} tokens.`,
239
- });
333
+ content: `✓ 上下文已压缩:合并了 ${result.removedCount} 条早期消息(摘要约 ${result.summaryTokens} tokens,保留最近 ${result.retainedCount} 条)`,
334
+ }]);
240
335
  } else {
241
336
  addMessage({ role: "system", content: "Nothing to compact (conversation is short enough)." });
242
337
  }
243
338
  updateContextStats();
244
339
  } catch (err) {
245
340
  addMessage({ role: "system", content: `Compact failed: ${err.message}` });
341
+ } finally {
342
+ streamingRef.current = false;
343
+ setStreaming(false);
344
+ setSpinnerStatus(null);
345
+ if (queueRef.current.length > 0) {
346
+ const next = queueRef.current.shift();
347
+ runTurn(next);
348
+ }
246
349
  }
247
350
  })();
248
351
  return true;
@@ -392,31 +495,41 @@ function App({ engine, config }) {
392
495
  // Task dashboard (ralph-loop)
393
496
  taskList.length > 0 ? h(TaskDashboard, { tasks: taskList, progress: taskProgress }) : null,
394
497
 
395
- // Message history
396
- ...messages.map((msg, i) => {
498
+ // Message history (virtualized — only last VISIBLE_WINDOW render).
499
+ // Hidden-count hint for earlier messages, so users know the full
500
+ // history still exists (on disk) even though the TUI is slim.
501
+ messages.length > VISIBLE_WINDOW ? h(Box, { key: "hidden-hint" },
502
+ h(Text, { dimColor: true },
503
+ `— 前 ${messages.length - VISIBLE_WINDOW} 条消息已折叠,完整记录在 logs/events.jsonl —`),
504
+ ) : null,
505
+ ...messages.slice(-VISIBLE_WINDOW).map((msg, i, arr) => {
506
+ // Global index (for stable React keys) vs visible index (for isRecent).
507
+ const globalIdx = messages.length - arr.length + i;
508
+ const visibleIdx = arr.length - 1 - i; // 0 = most recent
397
509
  if (msg.role === "user") {
398
- return h(Box, { key: `msg-${i}` },
510
+ return h(Box, { key: `msg-${globalIdx}` },
399
511
  h(Text, { dimColor: true }, "❯ "),
400
512
  h(Text, null, msg.content),
401
513
  );
402
514
  }
403
515
  if (msg.role === "agent") {
404
- return h(Box, { key: `msg-${i}` },
516
+ return h(Box, { key: `msg-${globalIdx}` },
405
517
  h(Text, null, msg.content),
406
518
  );
407
519
  }
408
520
  if (msg.role === "tool") {
409
521
  return h(ToolBlock, {
410
- key: `msg-${i}`,
522
+ key: `msg-${globalIdx}`,
411
523
  name: msg.toolName,
412
524
  input: msg.toolInput,
413
525
  output: msg.toolOutput,
414
526
  isError: msg.toolIsError,
415
527
  isRunning: false,
528
+ isRecent: visibleIdx < RECENT_TOOL_WINDOW,
416
529
  });
417
530
  }
418
531
  if (msg.role === "system") {
419
- return h(Box, { key: `msg-${i}` },
532
+ return h(Box, { key: `msg-${globalIdx}` },
420
533
  h(Text, { dimColor: true }, msg.content),
421
534
  );
422
535
  }
@@ -171,13 +171,19 @@ export async function onboard() {
171
171
  }
172
172
 
173
173
  // --- API Key ---
174
- const maskedExisting = existing.api_key ? existing.api_key.slice(0, 6) + "..." + existing.api_key.slice(-4) : "";
174
+ // Only offer to "keep" an existing key when the user hasn't switched
175
+ // providers. Otherwise an accidental Enter would silently save the OLD
176
+ // provider's key against the NEW provider's base URL — silent breakage.
177
+ const keyIsForSameProvider = existing.provider === provider.id;
178
+ const maskedExisting = keyIsForSameProvider && existing.api_key
179
+ ? existing.api_key.slice(0, 6) + "..." + existing.api_key.slice(-4)
180
+ : "";
175
181
  const keyHint = maskedExisting ? t.apiKeyKeep : t.apiKeyRequired;
176
182
  const keyPrompt = maskedExisting
177
183
  ? ` ${CYAN}${t.apiKey}${RESET} ${DIM}(${maskedExisting})${RESET}`
178
184
  : ` ${CYAN}${t.apiKey}${RESET} ${YELLOW}(${t.apiKeyRequired})${RESET}`;
179
185
  const apiKey = await ask(rl, keyPrompt, "", keyHint);
180
- const finalKey = apiKey || existing.api_key || "";
186
+ const finalKey = apiKey || (keyIsForSameProvider ? existing.api_key : "") || "";
181
187
  if (!finalKey) { console.log(` ${RED}${t.apiKeyMissing}${RESET}`); rl.close(); process.exit(1); }
182
188
  console.log();
183
189
 
@@ -245,34 +251,12 @@ export async function onboard() {
245
251
  );
246
252
  console.log();
247
253
 
248
- // --- Worker LLM tiers ---
249
- console.log(` ${CYAN}${t.workerTiers}${RESET} ${DIM}(${t.tierHint})${RESET}`);
250
- const tiers = {};
251
- for (const tier of ["tier1", "tier2", "tier3", "tier4"]) {
252
- const def = suggestedTiers?.[tier] || provider.defaultTiers[tier] || existing?.tiers?.[tier] || "";
253
- tiers[tier] = await ask(
254
- rl,
255
- ` ${tier.toUpperCase()}`,
256
- def,
257
- t.discoveryAccept ? "" : "",
258
- );
259
- }
260
- console.log();
261
-
262
- // --- VLM tiers (vision/OCR) ---
263
- console.log(` ${CYAN}${t.vlmTiers}${RESET} ${DIM}(${t.tierHint})${RESET}`);
264
- const vlmTiers = {};
265
- for (const tier of ["tier1", "tier2", "tier3"]) {
266
- const def = provider.defaultVlm?.[tier] || existing?.vlm_tiers?.[tier] || "";
267
- vlmTiers[tier] = await ask(
268
- rl,
269
- ` ${tier.toUpperCase()}`,
270
- def,
271
- );
272
- }
273
- console.log();
274
-
275
254
  // --- Worker LLM provider (optional) ---
255
+ // Ask worker-provider BEFORE tier prompts so that when worker differs from
256
+ // conductor (e.g. conductor=xfyun single-model, worker=siliconflow) the
257
+ // tier-default suggestions come from the WORKER provider's model-tiers.json
258
+ // entry, not the conductor's. Previous ordering defaulted tiers from the
259
+ // conductor and produced nonsensical defaults for single-model conductors.
276
260
  console.log(` ${CYAN}${t.workerConfig}${RESET}`);
277
261
  const sameProvider = await ask(rl, ` ${t.workerSameProvider}`, "Y", t.yesNo);
278
262
  let workerProvider = "";
@@ -280,6 +264,7 @@ export async function onboard() {
280
264
  let workerBaseUrl = "";
281
265
  let workerAuthType = "";
282
266
  let workerApiFormat = "";
267
+ let tierProviderDef = provider; // where tier defaults come from
283
268
 
284
269
  if (sameProvider.toLowerCase() === "n" || sameProvider.toLowerCase() === "no") {
285
270
  // Pick a different provider for workers
@@ -294,21 +279,58 @@ export async function onboard() {
294
279
  workerAuthType = wp.authType;
295
280
  workerApiFormat = wp.apiFormat;
296
281
  workerBaseUrl = wp.baseUrl;
282
+ tierProviderDef = wp;
297
283
 
298
284
  if (wp.id === "custom") {
299
285
  workerBaseUrl = await ask(rl, ` ${t.baseUrl}`, existing.worker_base_url || "");
300
286
  }
301
287
 
302
- // Worker API key
303
- const wMasked = existing.worker_api_key ? existing.worker_api_key.slice(0, 6) + "..." + existing.worker_api_key.slice(-4) : "";
288
+ // Worker API key. Show masked existing key in the prompt (matches the
289
+ // main-provider prompt style) so the user can confirm what's saved
290
+ // without guessing. Like the main key, only offer to "keep" the existing
291
+ // value if the WORKER provider itself hasn't changed — otherwise Enter
292
+ // would silently carry the previous worker provider's key across.
293
+ const workerKeyIsForSameProvider = existing.worker_provider === wp.id;
294
+ const wMasked = workerKeyIsForSameProvider && existing.worker_api_key
295
+ ? existing.worker_api_key.slice(0, 6) + "..." + existing.worker_api_key.slice(-4)
296
+ : "";
304
297
  const wKeyHint = wMasked ? t.apiKeyKeep : t.apiKeyRequired;
305
- workerApiKey = await ask(
298
+ const wKeyPrompt = wMasked
299
+ ? ` ${CYAN}${t.apiKey} (Worker)${RESET} ${DIM}(${wMasked})${RESET}`
300
+ : ` ${CYAN}${t.apiKey} (Worker)${RESET} ${YELLOW}(${t.apiKeyRequired})${RESET}`;
301
+ workerApiKey = await ask(rl, wKeyPrompt, "", wKeyHint);
302
+ workerApiKey = workerApiKey || (workerKeyIsForSameProvider ? existing.worker_api_key : "") || "";
303
+ }
304
+ console.log();
305
+
306
+ // --- Worker LLM tiers (defaults come from tierProviderDef set above) ---
307
+ // When worker==conductor, these default from the conductor's model-tiers.json
308
+ // entry. When worker is a separate provider, they default from the WORKER
309
+ // provider's entry — so e.g. siliconflow's GLM-5.1 tier1 defaults apply.
310
+ console.log(` ${CYAN}${t.workerTiers}${RESET} ${DIM}(${t.tierHint})${RESET}`);
311
+ const tiers = {};
312
+ const tierSuggested = tierProviderDef.id === provider.id ? suggestedTiers : null;
313
+ for (const tier of ["tier1", "tier2", "tier3", "tier4"]) {
314
+ const def = tierSuggested?.[tier] || tierProviderDef.defaultTiers?.[tier] || existing?.tiers?.[tier] || "";
315
+ tiers[tier] = await ask(
316
+ rl,
317
+ ` ${tier.toUpperCase()}`,
318
+ def,
319
+ t.discoveryAccept ? "" : "",
320
+ );
321
+ }
322
+ console.log();
323
+
324
+ // --- VLM tiers (vision/OCR) — also from worker provider when split ---
325
+ console.log(` ${CYAN}${t.vlmTiers}${RESET} ${DIM}(${t.tierHint})${RESET}`);
326
+ const vlmTiers = {};
327
+ for (const tier of ["tier1", "tier2", "tier3"]) {
328
+ const def = tierProviderDef.defaultVlm?.[tier] || existing?.vlm_tiers?.[tier] || "";
329
+ vlmTiers[tier] = await ask(
306
330
  rl,
307
- ` ${CYAN}${t.apiKey} (Worker)${RESET}`,
308
- "",
309
- wKeyHint,
331
+ ` ${tier.toUpperCase()}`,
332
+ def,
310
333
  );
311
- workerApiKey = workerApiKey || existing.worker_api_key || "";
312
334
  }
313
335
  console.log();
314
336
 
@@ -2,17 +2,17 @@
2
2
  "_comment": "Model selections per provider. LLM tiers 1-4, VLM tiers 1-3. Edit this file directly to update model assignments.",
3
3
 
4
4
  "siliconflow": {
5
- "conductor": "Pro/zai-org/GLM-5",
5
+ "conductor": "Pro/zai-org/GLM-5.1",
6
6
  "llm": {
7
- "tier1": "Pro/zai-org/GLM-5, Pro/moonshotai/Kimi-K2.5",
7
+ "tier1": "Pro/zai-org/GLM-5.1, Pro/moonshotai/Kimi-K2.5",
8
8
  "tier2": "Pro/deepseek-ai/DeepSeek-V3.2, Pro/MiniMaxAI/MiniMax-M2.5",
9
9
  "tier3": "Qwen/Qwen3.5-122B-A10B",
10
10
  "tier4": "Qwen/Qwen3.5-35B-A3B"
11
11
  },
12
12
  "vlm": {
13
- "tier1": "Pro/Qwen/Qwen2.5-VL-72B-Instruct",
14
- "tier2": "Qwen/Qwen2.5-VL-32B-Instruct",
15
- "tier3": "Qwen/Qwen2.5-VL-7B-Instruct"
13
+ "tier1": "Qwen/Qwen3-VL-235B-A22B-Instruct",
14
+ "tier2": "Qwen/Qwen3-VL-30B-A3B-Instruct",
15
+ "tier3": "Qwen/Qwen3-VL-8B-Instruct"
16
16
  }
17
17
  },
18
18
 
@@ -46,6 +46,22 @@
46
46
  }
47
47
  },
48
48
 
49
+ "xfyun": {
50
+ "_comment": "iFlytek Astro coding plan exposes only astron-code-latest; no VLM.",
51
+ "conductor": "astron-code-latest",
52
+ "llm": {
53
+ "tier1": "astron-code-latest",
54
+ "tier2": "",
55
+ "tier3": "",
56
+ "tier4": ""
57
+ },
58
+ "vlm": {
59
+ "tier1": "",
60
+ "tier2": "",
61
+ "tier3": ""
62
+ }
63
+ },
64
+
49
65
  "anthropic": {
50
66
  "conductor": "claude-sonnet-4-20250514",
51
67
  "llm": {
package/src/providers.js CHANGED
@@ -98,6 +98,27 @@ const PROVIDERS = [
98
98
  zh: "火山云(字节跳动)",
99
99
  },
100
100
  },
101
+ {
102
+ id: "xfyun",
103
+ name: "XfYun Astro",
104
+ // iFlytek Astro coding plan — OpenAI-compatible endpoint. Only exposes
105
+ // a single model (astron-code-latest) today, so no /models discovery and
106
+ // the tier assignment in model-tiers.json only fills tier1 / conductor.
107
+ baseUrl: "https://maas-coding-api.cn-huabei-1.xf-yun.com/v2",
108
+ authType: "bearer",
109
+ apiFormat: "openai",
110
+ modelsEndpoint: null,
111
+ defaultModel: getTierConfig("xfyun").conductor || "astron-code-latest",
112
+ defaultTiers: getTierConfig("xfyun").llm,
113
+ defaultVlm: getTierConfig("xfyun").vlm,
114
+ curatedModels: [
115
+ { id: "astron-code-latest", ownedBy: "iflytek" },
116
+ ],
117
+ labels: {
118
+ en: "iFlytek XfYun Astro (coding plan, single-model)",
119
+ zh: "科大讯飞 Astro 编程套餐(单模型)",
120
+ },
121
+ },
101
122
  {
102
123
  id: "anthropic",
103
124
  name: "Anthropic",
@@ -244,6 +265,8 @@ const MODEL_RANKING = {
244
265
  // Others
245
266
  "kimi-k2.5": 85,
246
267
  "kimi-k2": 80,
268
+ // iFlytek Astro
269
+ "astron-code": 90,
247
270
  "minimax-m2": 80,
248
271
  "deepseek-v3": 85,
249
272
  "deepseek-r1": 90,
Binary file