kc-beta 0.5.3 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agent/context-window.js +7 -2
- package/src/agent/engine.js +63 -14
- package/src/agent/rule-catalog-normalize.js +37 -0
- package/src/agent/tools/release.js +2 -1
- package/src/agent/tools/rule-catalog.js +56 -4
- package/src/cli/components.js +64 -11
- package/src/cli/index.js +90 -11
- package/src/model-tiers.json +5 -5
package/package.json
CHANGED
|
@@ -12,10 +12,15 @@ export class ContextWindow {
|
|
|
12
12
|
* @param {number} [opts.reserveForResponse=8192] - Tokens reserved for model output
|
|
13
13
|
* @param {number} [opts.recentWindowSize=30] - Number of recent messages to always keep
|
|
14
14
|
*/
|
|
15
|
-
constructor({ contextLimit, reserveForResponse = 8192, recentWindowSize = 30 }) {
|
|
15
|
+
constructor({ contextLimit, reserveForResponse = 8192, recentWindowSize = 30, triggerFraction = 0.70 }) {
|
|
16
16
|
this.contextLimit = contextLimit;
|
|
17
17
|
this.reserveForResponse = reserveForResponse;
|
|
18
18
|
this.recentWindowSize = recentWindowSize;
|
|
19
|
+
// Fraction of budget that triggers windowing. v0.5.3 used 0.85 which only
|
|
20
|
+
// fired after runtime was already deep in the danger zone (a subsequent
|
|
21
|
+
// tool result could tip it over before the next check). 0.70 leaves room
|
|
22
|
+
// for one more tool result before hitting the hard ceiling.
|
|
23
|
+
this.triggerFraction = triggerFraction;
|
|
19
24
|
}
|
|
20
25
|
|
|
21
26
|
/**
|
|
@@ -29,7 +34,7 @@ export class ContextWindow {
|
|
|
29
34
|
const budget = this.contextLimit - this.reserveForResponse;
|
|
30
35
|
|
|
31
36
|
// If within budget, return as-is
|
|
32
|
-
if (totalTokens <= budget *
|
|
37
|
+
if (totalTokens <= budget * this.triggerFraction) {
|
|
33
38
|
return { messages, wasWindowed: false, removedCount: 0 };
|
|
34
39
|
}
|
|
35
40
|
|
package/src/agent/engine.js
CHANGED
|
@@ -4,6 +4,7 @@ import { AgentEvent } from "./events.js";
|
|
|
4
4
|
import { ContextAssembler } from "./context.js";
|
|
5
5
|
import { ConversationHistory } from "./history.js";
|
|
6
6
|
import { Workspace } from "./workspace.js";
|
|
7
|
+
import { normalizeRuleCatalog } from "./rule-catalog-normalize.js";
|
|
7
8
|
import { VersionManager } from "./version-manager.js";
|
|
8
9
|
import { CornerCaseRegistry } from "./corner-case-registry.js";
|
|
9
10
|
import { ConfidenceScorer } from "./confidence-scorer.js";
|
|
@@ -51,7 +52,9 @@ const DEFAULT_KC_MAX_TOKENS = 65536;
|
|
|
51
52
|
const DISTILL_PHASES = new Set([Phase.DISTILLATION, Phase.PRODUCTION_QC]);
|
|
52
53
|
|
|
53
54
|
// Linear phase order — used by auto-advance (Bug 4). Last phase has no successor.
|
|
54
|
-
|
|
55
|
+
// Exported so the TUI's /phase slash command (src/cli/index.js) can call
|
|
56
|
+
// _advancePhase with the right successor without re-declaring the map.
|
|
57
|
+
export const NEXT_PHASE = {
|
|
55
58
|
[Phase.BOOTSTRAP]: Phase.EXTRACTION,
|
|
56
59
|
[Phase.EXTRACTION]: Phase.SKILL_AUTHORING,
|
|
57
60
|
[Phase.SKILL_AUTHORING]: Phase.SKILL_TESTING,
|
|
@@ -168,18 +171,16 @@ export class AgentEngine {
|
|
|
168
171
|
this.toolRegistry = new ToolRegistry();
|
|
169
172
|
this._registerToolsForPhase(this.currentPhase);
|
|
170
173
|
|
|
171
|
-
// Edge-trigger state for _maybeAutoAdvance
|
|
172
|
-
//
|
|
173
|
-
//
|
|
174
|
-
//
|
|
175
|
-
this
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
}
|
|
182
|
-
}
|
|
174
|
+
// Edge-trigger state for _maybeAutoAdvance. Initialize to false for every
|
|
175
|
+
// phase so the first real false→true flip inside onToolResult triggers an
|
|
176
|
+
// advance — even when the user launches from a pre-populated workspace
|
|
177
|
+
// whose exit criteria already happen to be met at boot.
|
|
178
|
+
// resume() re-primes this from the restored pipeline state (see ~L566),
|
|
179
|
+
// which is the correct behaviour there: resumed sessions that were already
|
|
180
|
+
// past this phase shouldn't re-fire.
|
|
181
|
+
this._lastReady = Object.fromEntries(
|
|
182
|
+
Object.keys(this.pipelines).map((p) => [p, false]),
|
|
183
|
+
);
|
|
183
184
|
}
|
|
184
185
|
|
|
185
186
|
/**
|
|
@@ -327,6 +328,47 @@ export class AgentEngine {
|
|
|
327
328
|
};
|
|
328
329
|
}
|
|
329
330
|
|
|
331
|
+
/**
|
|
332
|
+
* Run the windowing check immediately after a tool result appends to
|
|
333
|
+
* history. Called from runTurn() so that a large tool result can't sit in
|
|
334
|
+
* history past the threshold until the next LLM-loop iteration, where a
|
|
335
|
+
* stream-abort could then trap the context in a bloated state.
|
|
336
|
+
*
|
|
337
|
+
* Safe to call frequently — contextWindow.window() fast-paths when under
|
|
338
|
+
* the trigger fraction.
|
|
339
|
+
*/
|
|
340
|
+
_maybeWindowAfterToolResult() {
|
|
341
|
+
if (!this.contextWindow) return;
|
|
342
|
+
const windowed = this.contextWindow.window(this.history.messages, this._phaseSummaries);
|
|
343
|
+
if (windowed.wasWindowed) {
|
|
344
|
+
this.history.messages = windowed.messages;
|
|
345
|
+
this.eventLog.append("context_windowed", {
|
|
346
|
+
removed: windowed.removedCount,
|
|
347
|
+
trigger: "post_tool_result",
|
|
348
|
+
});
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
// Heap-pressure diagnostic. The TUI has its own virtualization + tool-
|
|
352
|
+
// output truncation (Bug 3 fixes), so Ink itself should never OOM. If we
|
|
353
|
+
// still see high heap usage, something else is leaking — log it once per
|
|
354
|
+
// pressure-crossing so operators can investigate without flooding logs.
|
|
355
|
+
try {
|
|
356
|
+
const mem = process.memoryUsage();
|
|
357
|
+
const frac = mem.heapUsed / (mem.heapTotal || 1);
|
|
358
|
+
if (frac > 0.80 && !this._memPressureLogged) {
|
|
359
|
+
this._memPressureLogged = true;
|
|
360
|
+
this.eventLog.append("memory_pressure", {
|
|
361
|
+
heapUsedMB: Math.round(mem.heapUsed / 1024 / 1024),
|
|
362
|
+
heapTotalMB: Math.round(mem.heapTotal / 1024 / 1024),
|
|
363
|
+
rssMB: Math.round(mem.rss / 1024 / 1024),
|
|
364
|
+
historyLength: this.history.messages.length,
|
|
365
|
+
});
|
|
366
|
+
} else if (frac < 0.60 && this._memPressureLogged) {
|
|
367
|
+
this._memPressureLogged = false; // re-arm for next crossing
|
|
368
|
+
}
|
|
369
|
+
} catch { /* process.memoryUsage failures are non-fatal */ }
|
|
370
|
+
}
|
|
371
|
+
|
|
330
372
|
/**
|
|
331
373
|
* Pre-flight hard ceiling (Bug 1). After windowing, if the message
|
|
332
374
|
* array's total token count still exceeds the model's input budget,
|
|
@@ -785,6 +827,13 @@ export class AgentEngine {
|
|
|
785
827
|
content: historyContent,
|
|
786
828
|
});
|
|
787
829
|
|
|
830
|
+
// Post-tool-result safety net: check for context pressure RIGHT NOW
|
|
831
|
+
// rather than waiting for the next LLM-loop iteration. A large tool
|
|
832
|
+
// result that tips history over the threshold used to sit there
|
|
833
|
+
// until the next turn, and if the stream aborted in between the
|
|
834
|
+
// user saw "CTX: 210% / stream terminated" with no recovery.
|
|
835
|
+
this._maybeWindowAfterToolResult();
|
|
836
|
+
|
|
788
837
|
// Pipeline controller: update state and re-register tools on phase change
|
|
789
838
|
if (pipeline?.onToolResult) {
|
|
790
839
|
const pEvent = pipeline.onToolResult(tc.name, inputData, result);
|
|
@@ -928,7 +977,7 @@ export class AgentEngine {
|
|
|
928
977
|
|
|
929
978
|
try {
|
|
930
979
|
const catalog = JSON.parse(fs.readFileSync(catalogPath, "utf-8"));
|
|
931
|
-
const rules =
|
|
980
|
+
const rules = normalizeRuleCatalog(catalog);
|
|
932
981
|
if (rules.length > 0) {
|
|
933
982
|
this.taskManager.createRuleTasks(rules, phase);
|
|
934
983
|
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Normalize a catalog.json payload into a flat array of rule records.
|
|
3
|
+
*
|
|
4
|
+
* KC the agent has historically produced catalog.json in at least four shapes,
|
|
5
|
+
* and earlier code paths assumed a flat array — silently dropping everything
|
|
6
|
+
* when the catalog was object-shaped. This helper unifies the handling so
|
|
7
|
+
* the engine, the rule_catalog tool, and the release tool all see the same
|
|
8
|
+
* list of rules regardless of how the file was written.
|
|
9
|
+
*
|
|
10
|
+
* Accepted shapes:
|
|
11
|
+
* 1. [rule, rule, ...] flat array (original)
|
|
12
|
+
* 2. { rules: [...] } wrapper object
|
|
13
|
+
* 3. { categories: { A: [...], B: [...] }, ... } grouped by category
|
|
14
|
+
* 4. { categories: { A: { rules: [...] }, ... } } nested category objects
|
|
15
|
+
*
|
|
16
|
+
* Anything else (null, wrong shape, throws) returns [].
|
|
17
|
+
*/
|
|
18
|
+
export function normalizeRuleCatalog(catalog) {
|
|
19
|
+
if (Array.isArray(catalog)) return catalog;
|
|
20
|
+
if (!catalog || typeof catalog !== "object") return [];
|
|
21
|
+
|
|
22
|
+
if (Array.isArray(catalog.rules)) return catalog.rules;
|
|
23
|
+
|
|
24
|
+
if (catalog.categories && typeof catalog.categories === "object") {
|
|
25
|
+
const out = [];
|
|
26
|
+
for (const group of Object.values(catalog.categories)) {
|
|
27
|
+
if (Array.isArray(group)) {
|
|
28
|
+
out.push(...group);
|
|
29
|
+
} else if (group && Array.isArray(group.rules)) {
|
|
30
|
+
out.push(...group.rules);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
if (out.length > 0) return out;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
return [];
|
|
37
|
+
}
|
|
@@ -3,6 +3,7 @@ import path from "node:path";
|
|
|
3
3
|
import { fileURLToPath } from "node:url";
|
|
4
4
|
import { BaseTool, ToolResult } from "./base.js";
|
|
5
5
|
import { SnapshotTool } from "./snapshot.js";
|
|
6
|
+
import { normalizeRuleCatalog } from "../rule-catalog-normalize.js";
|
|
6
7
|
|
|
7
8
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
8
9
|
const TEMPLATE_DIR = path.resolve(__dirname, "../../../template/release-runtime");
|
|
@@ -100,7 +101,7 @@ export class ReleaseTool extends BaseTool {
|
|
|
100
101
|
let catalog;
|
|
101
102
|
try { catalog = JSON.parse(fs.readFileSync(catalogPath, "utf-8")); }
|
|
102
103
|
catch (e) { return new ToolResult(`catalog.json invalid: ${e.message}`, true); }
|
|
103
|
-
|
|
104
|
+
catalog = normalizeRuleCatalog(catalog);
|
|
104
105
|
|
|
105
106
|
const includeSet = Array.isArray(input.include) && input.include.length > 0
|
|
106
107
|
? new Set(input.include) : null;
|
|
@@ -1,10 +1,52 @@
|
|
|
1
1
|
import fs from "node:fs";
|
|
2
2
|
import path from "node:path";
|
|
3
3
|
import { BaseTool, ToolResult } from "./base.js";
|
|
4
|
+
import { normalizeRuleCatalog } from "../rule-catalog-normalize.js";
|
|
4
5
|
|
|
5
6
|
const REQUIRED_FIELDS = new Set(["id", "source_ref", "description"]);
|
|
6
7
|
const RECOMMENDED_FIELDS = new Set(["falsifiability_statement", "test_case_stub", "applicable_sections"]);
|
|
7
8
|
|
|
9
|
+
// Field-name aliases — LLMs frequently produce `source` or 来源 instead of
|
|
10
|
+
// `source_ref`, `desc` instead of `description`. Rather than making 38+ failed
|
|
11
|
+
// calls before the model figures out the canonical names (as observed in the
|
|
12
|
+
// v0.5.3 E2E test), accept the common aliases and canonicalize on ingest.
|
|
13
|
+
const FIELD_ALIASES = {
|
|
14
|
+
source: "source_ref",
|
|
15
|
+
reference: "source_ref",
|
|
16
|
+
ref: "source_ref",
|
|
17
|
+
"来源": "source_ref",
|
|
18
|
+
desc: "description",
|
|
19
|
+
"描述": "description",
|
|
20
|
+
rule_id: "id",
|
|
21
|
+
ruleId: "id",
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
function normalizeRuleData(data) {
|
|
25
|
+
if (!data || typeof data !== "object") return data;
|
|
26
|
+
const out = { ...data };
|
|
27
|
+
for (const [alias, canonical] of Object.entries(FIELD_ALIASES)) {
|
|
28
|
+
if (out[alias] !== undefined && out[canonical] === undefined) {
|
|
29
|
+
out[canonical] = out[alias];
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
return out;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function missingFieldError(missing, data) {
|
|
36
|
+
// Concrete, actionable error. The generic "Missing required fields: id,
|
|
37
|
+
// source_ref, description" confused agents (they couldn't tell which field
|
|
38
|
+
// they'd actually failed to provide). Point at the first missing field, name
|
|
39
|
+
// what was supplied, and mention the aliases so the model can self-correct.
|
|
40
|
+
const provided = Object.keys(data || {}).slice(0, 8).join(", ") || "(none)";
|
|
41
|
+
const first = missing[0];
|
|
42
|
+
const rest = missing.length > 1 ? ` (also missing: ${missing.slice(1).join(", ")})` : "";
|
|
43
|
+
return (
|
|
44
|
+
`Missing field '${first}' in data.${rest} ` +
|
|
45
|
+
`Provided keys: {${provided}}. ` +
|
|
46
|
+
`Accepted aliases: source/来源/reference → source_ref, desc/描述 → description, rule_id → id.`
|
|
47
|
+
);
|
|
48
|
+
}
|
|
49
|
+
|
|
8
50
|
/**
|
|
9
51
|
* CRUD on the rule registry with schema enforcement.
|
|
10
52
|
* Enforces required fields (id, source_ref, description) on create/update.
|
|
@@ -48,14 +90,22 @@ export class RuleCatalogTool extends BaseTool {
|
|
|
48
90
|
if (op === "create") return this._create(data);
|
|
49
91
|
if (op === "update") return this._update(ruleId || data.id || "", data);
|
|
50
92
|
if (op === "delete") return this._delete(ruleId || data.id || "");
|
|
51
|
-
|
|
93
|
+
// More helpful than "Unknown operation: " — tells the agent exactly what's
|
|
94
|
+
// allowed and what shape to call with next time (observed in v0.5.3 E2E
|
|
95
|
+
// where GLM-5.1 sent input: {} 38+ times without learning).
|
|
96
|
+
return new ToolResult(
|
|
97
|
+
`rule_catalog requires {operation}. Got: ${op ? `'${op}'` : "(empty)"}. ` +
|
|
98
|
+
`Valid operations: list, read, create, update, delete. ` +
|
|
99
|
+
`Examples: {"operation":"list"} · {"operation":"create","data":{"id":"R-01","source_ref":"民法典 710","description":"..."}}`,
|
|
100
|
+
true,
|
|
101
|
+
);
|
|
52
102
|
}
|
|
53
103
|
|
|
54
104
|
_load() {
|
|
55
105
|
if (!fs.existsSync(this._catalogPath)) return [];
|
|
56
106
|
try {
|
|
57
107
|
const data = JSON.parse(fs.readFileSync(this._catalogPath, "utf-8"));
|
|
58
|
-
return
|
|
108
|
+
return normalizeRuleCatalog(data);
|
|
59
109
|
} catch { return []; }
|
|
60
110
|
}
|
|
61
111
|
|
|
@@ -79,8 +129,9 @@ export class RuleCatalogTool extends BaseTool {
|
|
|
79
129
|
}
|
|
80
130
|
|
|
81
131
|
_create(data) {
|
|
82
|
-
|
|
83
|
-
|
|
132
|
+
data = normalizeRuleData(data);
|
|
133
|
+
const missing = [...REQUIRED_FIELDS].filter((f) => !data[f]);
|
|
134
|
+
if (missing.length > 0) return new ToolResult(missingFieldError(missing, data), true);
|
|
84
135
|
const rules = this._load();
|
|
85
136
|
if (rules.some((r) => r.id === data.id)) return new ToolResult(`Rule already exists: ${data.id}. Use update.`, true);
|
|
86
137
|
const warnings = [...RECOMMENDED_FIELDS].filter((f) => !(f in data));
|
|
@@ -93,6 +144,7 @@ export class RuleCatalogTool extends BaseTool {
|
|
|
93
144
|
|
|
94
145
|
_update(ruleId, data) {
|
|
95
146
|
if (!ruleId) return new ToolResult("rule_id required for update", true);
|
|
147
|
+
data = normalizeRuleData(data);
|
|
96
148
|
const rules = this._load();
|
|
97
149
|
const idx = rules.findIndex((r) => r.id === ruleId);
|
|
98
150
|
if (idx < 0) return new ToolResult(`Rule not found: ${ruleId}`, true);
|
package/src/cli/components.js
CHANGED
|
@@ -42,12 +42,20 @@ export function StatusBar({ sessionId, phase, contextTokens, contextLimit }) {
|
|
|
42
42
|
? `${(contextLimit / 1000).toFixed(0)}k`
|
|
43
43
|
: `${contextLimit || 0}`;
|
|
44
44
|
|
|
45
|
+
// Soft-threshold hint — shows up before auto-windowing kicks in at ~70%
|
|
46
|
+
// so users know they can run /compact to reduce context more aggressively
|
|
47
|
+
// than windowing does. Red hint at 80%+ means it's time to compact NOW.
|
|
48
|
+
const compactHint = pct >= 80 ? " · 💾 /compact"
|
|
49
|
+
: pct >= 60 ? " · 💾 建议 /compact"
|
|
50
|
+
: "";
|
|
51
|
+
|
|
45
52
|
return h(Box, { marginTop: 0 },
|
|
46
53
|
h(Text, { dimColor: true }, " ⏵⏵ KC Agent CLI "),
|
|
47
54
|
h(Text, { dimColor: true }, sessionId ? `[${sessionId}]` : ""),
|
|
48
55
|
phase ? h(Text, { color: "cyan" }, ` ${phase.toUpperCase()}`) : null,
|
|
49
56
|
h(Text, { color: "green" }, " ● "),
|
|
50
57
|
h(Text, { color: ctxColor }, `CTX: ${ctxLabel}/${limitLabel} (${pct}%)`),
|
|
58
|
+
compactHint ? h(Text, { color: ctxColor }, compactHint) : null,
|
|
51
59
|
h(Text, { dimColor: true }, ` · ${LENAT_QUOTE}`),
|
|
52
60
|
);
|
|
53
61
|
}
|
|
@@ -112,29 +120,74 @@ export function WelcomeBanner({ projectDir, pendingInputCount = 0 } = {}) {
|
|
|
112
120
|
|
|
113
121
|
// --- Tool block ---
|
|
114
122
|
|
|
115
|
-
|
|
123
|
+
/**
|
|
124
|
+
* Tool-result block.
|
|
125
|
+
*
|
|
126
|
+
* Rendering modes:
|
|
127
|
+
* - isRunning → yellow border, no output (spinner shown elsewhere).
|
|
128
|
+
* - isError → red border, ALWAYS show full output (errors are short + critical).
|
|
129
|
+
* - isRecent: true → green border, show up to ~4 lines + "N lines hidden" footer.
|
|
130
|
+
* - isRecent: false → header only (header includes line count + byte count).
|
|
131
|
+
*
|
|
132
|
+
* The full output is always on disk in logs/events.jsonl. Keeping the Ink
|
|
133
|
+
* tree slim is what lets KC handle long sessions without OOM / typing lag.
|
|
134
|
+
*/
|
|
135
|
+
const RECENT_PREVIEW_LINES = 4;
|
|
136
|
+
|
|
137
|
+
export function ToolBlock({ name, input, output, isError, isRunning, isRecent = true }) {
|
|
116
138
|
const borderColor = isRunning ? "yellow" : isError ? "red" : "green";
|
|
139
|
+
const outStr = typeof output === "string" ? output : "";
|
|
140
|
+
const lines = outStr ? outStr.split("\n") : [];
|
|
141
|
+
const bytes = outStr.length;
|
|
142
|
+
|
|
143
|
+
const header = h(Box, null,
|
|
144
|
+
h(Text, { color: borderColor }, "┃ "),
|
|
145
|
+
h(Text, { dimColor: true }, name),
|
|
146
|
+
input ? h(Text, { dimColor: true }, ` ${JSON.stringify(input).slice(0, 120)}`) : null,
|
|
147
|
+
outStr && !isRunning
|
|
148
|
+
? h(Text, { dimColor: true }, ` (${lines.length} 行 / ${bytes} 字节)`)
|
|
149
|
+
: null,
|
|
150
|
+
);
|
|
117
151
|
|
|
152
|
+
// Errors: always show in full (short + critical).
|
|
153
|
+
if (isError && outStr) {
|
|
154
|
+
return h(Box, { flexDirection: "column", marginLeft: 2 },
|
|
155
|
+
header,
|
|
156
|
+
h(Box, { flexDirection: "column" },
|
|
157
|
+
...lines.map((line, i) =>
|
|
158
|
+
h(Box, { key: i },
|
|
159
|
+
h(Text, { color: "red" }, "┃ "),
|
|
160
|
+
h(Text, { color: "red" }, line),
|
|
161
|
+
),
|
|
162
|
+
),
|
|
163
|
+
),
|
|
164
|
+
);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Off-screen / not-recent: header only. Full output remains on disk.
|
|
168
|
+
if (!isRecent || !outStr) {
|
|
169
|
+
return h(Box, { marginLeft: 2 }, header);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Recent + successful: show preview + truncation footer.
|
|
173
|
+
const previewLines = lines.slice(0, RECENT_PREVIEW_LINES);
|
|
174
|
+
const remaining = lines.length - previewLines.length;
|
|
118
175
|
return h(Box, { flexDirection: "column", marginLeft: 2 },
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
input ? h(Text, { dimColor: true }, ` ${JSON.stringify(input)}`) : null,
|
|
123
|
-
),
|
|
124
|
-
output ? h(Box, { flexDirection: "column" },
|
|
125
|
-
...output.split("\n").slice(0, 20).map((line, i) =>
|
|
176
|
+
header,
|
|
177
|
+
h(Box, { flexDirection: "column" },
|
|
178
|
+
...previewLines.map((line, i) =>
|
|
126
179
|
h(Box, { key: i },
|
|
127
180
|
h(Text, { color: borderColor }, "┃ "),
|
|
128
181
|
h(Text, null, line),
|
|
129
182
|
),
|
|
130
183
|
),
|
|
131
|
-
|
|
184
|
+
remaining > 0
|
|
132
185
|
? h(Box, null,
|
|
133
186
|
h(Text, { color: borderColor }, "┃ "),
|
|
134
|
-
h(Text, { dimColor: true },
|
|
187
|
+
h(Text, { dimColor: true }, `… ${remaining} 行已省略(在 logs/events.jsonl 中完整保留)`),
|
|
135
188
|
)
|
|
136
189
|
: null,
|
|
137
|
-
)
|
|
190
|
+
),
|
|
138
191
|
);
|
|
139
192
|
}
|
|
140
193
|
|
package/src/cli/index.js
CHANGED
|
@@ -2,7 +2,7 @@ import React, { useState, useEffect, useCallback, useRef } from "react";
|
|
|
2
2
|
import { render, Box, Text, useApp, useInput } from "ink";
|
|
3
3
|
import { loadSettings } from "../config.js";
|
|
4
4
|
import { LLMClient } from "../agent/llm-client.js";
|
|
5
|
-
import { AgentEngine } from "../agent/engine.js";
|
|
5
|
+
import { AgentEngine, NEXT_PHASE } from "../agent/engine.js";
|
|
6
6
|
import { Workspace } from "../agent/workspace.js";
|
|
7
7
|
import { ConversationHistory } from "../agent/history.js";
|
|
8
8
|
import { Scheduler } from "../agent/scheduler.js";
|
|
@@ -18,6 +18,18 @@ import {
|
|
|
18
18
|
|
|
19
19
|
const h = React.createElement;
|
|
20
20
|
|
|
21
|
+
// Only the last N messages stay in the Ink render tree. Older messages
|
|
22
|
+
// remain in React state (so /compact can summarize them) but aren't
|
|
23
|
+
// diffed on every keystroke — this is what keeps long sessions responsive
|
|
24
|
+
// and prevents the 4 GB heap OOM observed in the v0.5.3 E2E test.
|
|
25
|
+
// Full conversation is persisted to logs/events.jsonl on every event,
|
|
26
|
+
// so dropping from render is purely visual.
|
|
27
|
+
const VISIBLE_WINDOW = 50;
|
|
28
|
+
|
|
29
|
+
// How many recent messages render their ToolBlock with full preview.
|
|
30
|
+
// Older ToolBlocks show header only. Both still persist full output to disk.
|
|
31
|
+
const RECENT_TOOL_WINDOW = 10;
|
|
32
|
+
|
|
21
33
|
/**
|
|
22
34
|
* Main KC Agent CLI App using Ink (React for terminals).
|
|
23
35
|
*/
|
|
@@ -159,6 +171,7 @@ function App({ engine, config }) {
|
|
|
159
171
|
" /help Show this help\n" +
|
|
160
172
|
" /status Show session info, model, phase, workspace\n" +
|
|
161
173
|
" /tasks Show task progress\n" +
|
|
174
|
+
" /phase [sub] advance | status | <name> — manual phase override\n" +
|
|
162
175
|
" /schedule Show scheduled ingestion jobs and recent log lines\n" +
|
|
163
176
|
" /clear Clear conversation history (keep workspace)\n" +
|
|
164
177
|
" /compact Summarize older messages to reduce context\n" +
|
|
@@ -195,6 +208,55 @@ function App({ engine, config }) {
|
|
|
195
208
|
});
|
|
196
209
|
return true;
|
|
197
210
|
|
|
211
|
+
case "/phase": {
|
|
212
|
+
// User-driven phase override. Useful when auto-advance fails to fire
|
|
213
|
+
// or when debugging. Subcommands:
|
|
214
|
+
// /phase → current phase (alias: /phase status)
|
|
215
|
+
// /phase advance | next → move to NEXT_PHASE[current]
|
|
216
|
+
// /phase <name> → force-jump to any phase (forward or back)
|
|
217
|
+
const engine = engineRef.current;
|
|
218
|
+
const sub = (parts[1] || "").toLowerCase();
|
|
219
|
+
|
|
220
|
+
if (!sub || sub === "status") {
|
|
221
|
+
const next = NEXT_PHASE[engine.currentPhase];
|
|
222
|
+
addMessage({
|
|
223
|
+
role: "system",
|
|
224
|
+
content:
|
|
225
|
+
`Current phase: ${engine.currentPhase.toUpperCase()}` +
|
|
226
|
+
(next ? ` (next auto: ${next})` : " (final phase)"),
|
|
227
|
+
});
|
|
228
|
+
return true;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
if (sub === "advance" || sub === "next") {
|
|
232
|
+
const next = NEXT_PHASE[engine.currentPhase];
|
|
233
|
+
if (!next) {
|
|
234
|
+
addMessage({ role: "system", content: `Already in final phase (${engine.currentPhase}).` });
|
|
235
|
+
return true;
|
|
236
|
+
}
|
|
237
|
+
const ok = engine._advancePhase(next, "manual /phase advance");
|
|
238
|
+
addMessage({
|
|
239
|
+
role: "system",
|
|
240
|
+
content: ok
|
|
241
|
+
? `→ phase advanced to ${next.toUpperCase()}.`
|
|
242
|
+
: `Failed to advance from ${engine.currentPhase}.`,
|
|
243
|
+
});
|
|
244
|
+
updateContextStats();
|
|
245
|
+
return true;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// /phase <name> — force-jump. Uses {force:true} to allow backward jumps.
|
|
249
|
+
const ok = engine._advancePhase(sub, "manual /phase <name>", { force: true });
|
|
250
|
+
addMessage({
|
|
251
|
+
role: "system",
|
|
252
|
+
content: ok
|
|
253
|
+
? `→ phase set to ${sub.toUpperCase()}.`
|
|
254
|
+
: `Unknown phase: ${sub}. Valid: bootstrap, extraction, skill_authoring, skill_testing, distillation, production_qc`,
|
|
255
|
+
});
|
|
256
|
+
updateContextStats();
|
|
257
|
+
return true;
|
|
258
|
+
}
|
|
259
|
+
|
|
198
260
|
case "/schedule": {
|
|
199
261
|
const sched = new Scheduler(engineRef.current.workspace);
|
|
200
262
|
const jobs = sched.list();
|
|
@@ -228,15 +290,22 @@ function App({ engine, config }) {
|
|
|
228
290
|
|
|
229
291
|
case "/compact": {
|
|
230
292
|
addMessage({ role: "system", content: "Compacting conversation history..." });
|
|
231
|
-
// Run compact asynchronously
|
|
232
293
|
(async () => {
|
|
233
294
|
try {
|
|
234
295
|
const result = await engineRef.current.compact();
|
|
235
296
|
if (result) {
|
|
236
|
-
|
|
297
|
+
// Claude Code pattern: after successful compact, clear the
|
|
298
|
+
// visible TUI messages and start fresh with a single summary
|
|
299
|
+
// line. The underlying engine.history already contains the
|
|
300
|
+
// compact-summary message pair; the TUI doesn't need to keep
|
|
301
|
+
// showing the pre-compact history (it's on disk in
|
|
302
|
+
// logs/events.jsonl anyway) and clearing it immediately frees
|
|
303
|
+
// Ink render-tree memory — fixing the lag that builds up over
|
|
304
|
+
// long sessions.
|
|
305
|
+
setMessages([{
|
|
237
306
|
role: "system",
|
|
238
|
-
content:
|
|
239
|
-
});
|
|
307
|
+
content: `✓ 上下文已压缩:合并了 ${result.removedCount} 条早期消息(摘要约 ${result.summaryTokens} tokens,保留最近 ${result.retainedCount} 条)`,
|
|
308
|
+
}]);
|
|
240
309
|
} else {
|
|
241
310
|
addMessage({ role: "system", content: "Nothing to compact (conversation is short enough)." });
|
|
242
311
|
}
|
|
@@ -392,31 +461,41 @@ function App({ engine, config }) {
|
|
|
392
461
|
// Task dashboard (ralph-loop)
|
|
393
462
|
taskList.length > 0 ? h(TaskDashboard, { tasks: taskList, progress: taskProgress }) : null,
|
|
394
463
|
|
|
395
|
-
// Message history
|
|
396
|
-
|
|
464
|
+
// Message history (virtualized — only last VISIBLE_WINDOW render).
|
|
465
|
+
// Hidden-count hint for earlier messages, so users know the full
|
|
466
|
+
// history still exists (on disk) even though the TUI is slim.
|
|
467
|
+
messages.length > VISIBLE_WINDOW ? h(Box, { key: "hidden-hint" },
|
|
468
|
+
h(Text, { dimColor: true },
|
|
469
|
+
`— 前 ${messages.length - VISIBLE_WINDOW} 条消息已折叠,完整记录在 logs/events.jsonl —`),
|
|
470
|
+
) : null,
|
|
471
|
+
...messages.slice(-VISIBLE_WINDOW).map((msg, i, arr) => {
|
|
472
|
+
// Global index (for stable React keys) vs visible index (for isRecent).
|
|
473
|
+
const globalIdx = messages.length - arr.length + i;
|
|
474
|
+
const visibleIdx = arr.length - 1 - i; // 0 = most recent
|
|
397
475
|
if (msg.role === "user") {
|
|
398
|
-
return h(Box, { key: `msg-${
|
|
476
|
+
return h(Box, { key: `msg-${globalIdx}` },
|
|
399
477
|
h(Text, { dimColor: true }, "❯ "),
|
|
400
478
|
h(Text, null, msg.content),
|
|
401
479
|
);
|
|
402
480
|
}
|
|
403
481
|
if (msg.role === "agent") {
|
|
404
|
-
return h(Box, { key: `msg-${
|
|
482
|
+
return h(Box, { key: `msg-${globalIdx}` },
|
|
405
483
|
h(Text, null, msg.content),
|
|
406
484
|
);
|
|
407
485
|
}
|
|
408
486
|
if (msg.role === "tool") {
|
|
409
487
|
return h(ToolBlock, {
|
|
410
|
-
key: `msg-${
|
|
488
|
+
key: `msg-${globalIdx}`,
|
|
411
489
|
name: msg.toolName,
|
|
412
490
|
input: msg.toolInput,
|
|
413
491
|
output: msg.toolOutput,
|
|
414
492
|
isError: msg.toolIsError,
|
|
415
493
|
isRunning: false,
|
|
494
|
+
isRecent: visibleIdx < RECENT_TOOL_WINDOW,
|
|
416
495
|
});
|
|
417
496
|
}
|
|
418
497
|
if (msg.role === "system") {
|
|
419
|
-
return h(Box, { key: `msg-${
|
|
498
|
+
return h(Box, { key: `msg-${globalIdx}` },
|
|
420
499
|
h(Text, { dimColor: true }, msg.content),
|
|
421
500
|
);
|
|
422
501
|
}
|
package/src/model-tiers.json
CHANGED
|
@@ -2,17 +2,17 @@
|
|
|
2
2
|
"_comment": "Model selections per provider. LLM tiers 1-4, VLM tiers 1-3. Edit this file directly to update model assignments.",
|
|
3
3
|
|
|
4
4
|
"siliconflow": {
|
|
5
|
-
"conductor": "Pro/zai-org/GLM-5",
|
|
5
|
+
"conductor": "Pro/zai-org/GLM-5.1",
|
|
6
6
|
"llm": {
|
|
7
|
-
"tier1": "Pro/zai-org/GLM-5, Pro/moonshotai/Kimi-K2.5",
|
|
7
|
+
"tier1": "Pro/zai-org/GLM-5.1, Pro/moonshotai/Kimi-K2.5",
|
|
8
8
|
"tier2": "Pro/deepseek-ai/DeepSeek-V3.2, Pro/MiniMaxAI/MiniMax-M2.5",
|
|
9
9
|
"tier3": "Qwen/Qwen3.5-122B-A10B",
|
|
10
10
|
"tier4": "Qwen/Qwen3.5-35B-A3B"
|
|
11
11
|
},
|
|
12
12
|
"vlm": {
|
|
13
|
-
"tier1": "
|
|
14
|
-
"tier2": "Qwen/
|
|
15
|
-
"tier3": "Qwen/
|
|
13
|
+
"tier1": "Qwen/Qwen3-VL-235B-A22B-Instruct",
|
|
14
|
+
"tier2": "Qwen/Qwen3-VL-30B-A3B-Instruct",
|
|
15
|
+
"tier3": "Qwen/Qwen3-VL-8B-Instruct"
|
|
16
16
|
}
|
|
17
17
|
},
|
|
18
18
|
|