kc-beta 0.5.5 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/QUICKSTART.md +17 -4
- package/README.md +58 -11
- package/bin/kc-beta.js +35 -1
- package/package.json +1 -1
- package/src/agent/bundle-tree.js +553 -0
- package/src/agent/context.js +40 -1
- package/src/agent/engine.js +644 -28
- package/src/agent/llm-client.js +67 -18
- package/src/agent/pipelines/finalization.js +186 -0
- package/src/agent/pipelines/index.js +8 -0
- package/src/agent/pipelines/initializer.js +40 -0
- package/src/agent/pipelines/skill-authoring.js +100 -6
- package/src/agent/skill-loader.js +54 -4
- package/src/agent/task-manager.js +66 -3
- package/src/agent/tools/agent-tool.js +283 -35
- package/src/agent/tools/bundle-search.js +146 -0
- package/src/agent/tools/document-chunk.js +246 -0
- package/src/agent/tools/document-classify.js +311 -0
- package/src/agent/tools/document-parse.js +8 -1
- package/src/agent/tools/phase-advance.js +30 -7
- package/src/agent/tools/registry.js +10 -0
- package/src/agent/tools/rule-catalog.js +17 -3
- package/src/agent/tools/sandbox-exec.js +30 -0
- package/src/agent/workspace.js +168 -14
- package/src/cli/components.js +165 -17
- package/src/cli/index.js +166 -19
- package/src/cli/meme.js +58 -0
- package/src/config.js +39 -2
- package/src/model-tiers.json +3 -2
- package/src/providers.js +34 -1
- package/template/skills/en/meta-meta/evolution-loop/SKILL.md +13 -1
- package/template/skills/en/meta-meta/rule-extraction/SKILL.md +74 -0
- package/template/skills/zh/meta-meta/evolution-loop/SKILL.md +7 -1
- package/template/skills/zh/meta-meta/rule-extraction/SKILL.md +73 -0
package/src/cli/index.js
CHANGED
|
@@ -15,6 +15,7 @@ import {
|
|
|
15
15
|
HRule,
|
|
16
16
|
InputPrompt,
|
|
17
17
|
} from "./components.js";
|
|
18
|
+
import { MemeOverlay } from "./meme.js"; // F6
|
|
18
19
|
|
|
19
20
|
const h = React.createElement;
|
|
20
21
|
|
|
@@ -30,6 +31,17 @@ const VISIBLE_WINDOW = 50;
|
|
|
30
31
|
// Older ToolBlocks show header only. Both still persist full output to disk.
|
|
31
32
|
const RECENT_TOOL_WINDOW = 10;
|
|
32
33
|
|
|
34
|
+
// B0.3: Hard cap on the React `messages` array. Without this, the array
|
|
35
|
+
// grows forever (setMessages((prev) => [...prev, msg]) via addMessage) —
|
|
36
|
+
// the VISIBLE_WINDOW virtualization hides old entries from render but
|
|
37
|
+
// they still sit in state. Over a 17 h session with 2-4 messages per
|
|
38
|
+
// turn, that's 1000s of entries holding tool-result digest strings and
|
|
39
|
+
// pipeline messages. /compact resets messages to a 1-item summary, so
|
|
40
|
+
// this cap is really a safety net between compacts. On cap hit, drop
|
|
41
|
+
// oldest non-system entries (system messages carry session-level
|
|
42
|
+
// context — pipeline transitions, errors — that users want retained).
|
|
43
|
+
const MAX_RETAINED_MESSAGES = 500;
|
|
44
|
+
|
|
33
45
|
/**
|
|
34
46
|
* Main KC Agent CLI App using Ink (React for terminals).
|
|
35
47
|
*/
|
|
@@ -43,6 +55,7 @@ function App({ engine, config }) {
|
|
|
43
55
|
const [sessionId, setSessionId] = useState(engine.workspace.sessionId);
|
|
44
56
|
const [phase, setPhase] = useState(engine.currentPhase);
|
|
45
57
|
const [showWelcome, setShowWelcome] = useState(true);
|
|
58
|
+
const [showMeme, setShowMeme] = useState(false); // F6
|
|
46
59
|
const [spinnerStatus, setSpinnerStatus] = useState(null);
|
|
47
60
|
const [contextTokens, setContextTokens] = useState(0);
|
|
48
61
|
const [contextLimit, setContextLimit] = useState(config.kcContextLimit || 200000);
|
|
@@ -63,7 +76,16 @@ function App({ engine, config }) {
|
|
|
63
76
|
}, []);
|
|
64
77
|
|
|
65
78
|
const addMessage = useCallback((msg) => {
|
|
66
|
-
setMessages((prev) =>
|
|
79
|
+
setMessages((prev) => {
|
|
80
|
+
if (prev.length < MAX_RETAINED_MESSAGES) return [...prev, msg];
|
|
81
|
+
// Cap hit: drop the oldest non-system entry. If everything is system
|
|
82
|
+
// (unlikely but possible), fall back to dropping the very oldest.
|
|
83
|
+
const dropIdx = prev.findIndex((m) => m.role !== "system");
|
|
84
|
+
const next = dropIdx >= 0
|
|
85
|
+
? [...prev.slice(0, dropIdx), ...prev.slice(dropIdx + 1), msg]
|
|
86
|
+
: [...prev.slice(1), msg];
|
|
87
|
+
return next;
|
|
88
|
+
});
|
|
67
89
|
}, []);
|
|
68
90
|
|
|
69
91
|
const runTurn = useCallback(async (text) => {
|
|
@@ -76,7 +98,9 @@ function App({ engine, config }) {
|
|
|
76
98
|
let accumulated = "";
|
|
77
99
|
|
|
78
100
|
try {
|
|
79
|
-
for await (const event of engineRef.current.runTaskLoop(text
|
|
101
|
+
for await (const event of engineRef.current.runTaskLoop(text, {
|
|
102
|
+
parallelism: config.effectiveParallelism?.() ?? 1,
|
|
103
|
+
})) {
|
|
80
104
|
switch (event.type) {
|
|
81
105
|
case "text_delta":
|
|
82
106
|
accumulated += event.text ?? "";
|
|
@@ -117,6 +141,13 @@ function App({ engine, config }) {
|
|
|
117
141
|
});
|
|
118
142
|
setCurrentTool(null);
|
|
119
143
|
setSpinnerStatus("Analyzing results...");
|
|
144
|
+
// H4: Refresh the CTX indicator after every tool_result. Without
|
|
145
|
+
// this, contextTokens only updates on turn_complete, which never
|
|
146
|
+
// fires in long tool-heavy sessions — we observed 908 events with
|
|
147
|
+
// zero turn_complete in session 6304673afaa0, CTX stuck at 0/131k
|
|
148
|
+
// for 30+ minutes. getContextStats() is a cheap pure calc over
|
|
149
|
+
// the history array; safe to call on every tool call.
|
|
150
|
+
updateContextStats();
|
|
120
151
|
break;
|
|
121
152
|
|
|
122
153
|
case "pipeline_event": {
|
|
@@ -153,7 +184,10 @@ function App({ engine, config }) {
|
|
|
153
184
|
// Process queue
|
|
154
185
|
if (queueRef.current.length > 0) {
|
|
155
186
|
const next = queueRef.current.shift();
|
|
187
|
+
setQueueSize(queueRef.current.length); // F2
|
|
156
188
|
runTurn(next);
|
|
189
|
+
} else {
|
|
190
|
+
setQueueSize(0); // F2
|
|
157
191
|
}
|
|
158
192
|
}, [addMessage, updateContextStats]);
|
|
159
193
|
|
|
@@ -173,6 +207,8 @@ function App({ engine, config }) {
|
|
|
173
207
|
" /tasks Show task progress\n" +
|
|
174
208
|
" /phase [sub] advance | status | <name> — manual phase override\n" +
|
|
175
209
|
" /schedule Show scheduled ingestion jobs and recent log lines\n" +
|
|
210
|
+
" /tools List all registered tools and which phase gates them\n" +
|
|
211
|
+
" /parallelism [N] Show or set parallel ralph-loop worker count (1-8)\n" +
|
|
176
212
|
" /clear Clear conversation history (keep workspace)\n" +
|
|
177
213
|
" /compact Summarize older messages to reduce context\n" +
|
|
178
214
|
" /sessions List all sessions\n" +
|
|
@@ -184,19 +220,90 @@ function App({ engine, config }) {
|
|
|
184
220
|
|
|
185
221
|
case "/status": {
|
|
186
222
|
const stats = engineRef.current.getContextStats();
|
|
223
|
+
const par = config.effectiveParallelism?.() ?? 1;
|
|
224
|
+
const parLine = par > 1
|
|
225
|
+
? `${par} (verified)`
|
|
226
|
+
: `${config.parallelismRequested || 1} requested` +
|
|
227
|
+
(config.parallelismRequested > 1 && !config.parallelismVerified
|
|
228
|
+
? ` — clamped to 1 (KC_PARALLELISM_VERIFIED not set; run heap baseline first)`
|
|
229
|
+
: "");
|
|
230
|
+
addMessage({
|
|
231
|
+
role: "system",
|
|
232
|
+
content:
|
|
233
|
+
`Session: ${engineRef.current.workspace.sessionId}\n` +
|
|
234
|
+
`Phase: ${engineRef.current.currentPhase.toUpperCase()}\n` +
|
|
235
|
+
`Model: ${config.kcModel}\n` +
|
|
236
|
+
`Provider: ${config.provider || "unknown"}\n` +
|
|
237
|
+
`LLM URL: ${config.llmBaseUrl}\n` +
|
|
238
|
+
`Project: ${engineRef.current.workspace.projectDir || "(none)"}\n` +
|
|
239
|
+
`Workspace: ${engineRef.current.workspace.cwd}\n` +
|
|
240
|
+
`Tools: ${engineRef.current.toolRegistry.size} registered\n` +
|
|
241
|
+
`History: ${engineRef.current.history.messages.length} messages\n` +
|
|
242
|
+
`Context: ~${stats.totalTokens} tokens (${stats.percentage}% of ${stats.limit})\n` +
|
|
243
|
+
`Parallelism: ${parLine}`,
|
|
244
|
+
});
|
|
245
|
+
return true;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
case "/meme":
|
|
249
|
+
// F6: easter egg. Not in /help.
|
|
250
|
+
setShowMeme(true);
|
|
251
|
+
return true;
|
|
252
|
+
|
|
253
|
+
case "/tools": {
|
|
254
|
+
// F5: list all registered tools + which phase gates them. Reads
|
|
255
|
+
// from the live toolRegistry so what you see is what the agent
|
|
256
|
+
// currently has available. Also names the distill-only tools
|
|
257
|
+
// explicitly so users understand why some tools "come and go"
|
|
258
|
+
// as phases advance.
|
|
259
|
+
const reg = engineRef.current.toolRegistry;
|
|
260
|
+
const names = reg?.names?.() || [];
|
|
261
|
+
const core = engineRef.current._buildTools?.core?.map((t) => t?.name).filter(Boolean) || [];
|
|
262
|
+
const distill = engineRef.current._buildTools?.distill?.map((t) => t?.name).filter(Boolean) || [];
|
|
263
|
+
const phase = engineRef.current.currentPhase.toUpperCase();
|
|
264
|
+
const lines = [
|
|
265
|
+
`Tools registered for phase ${phase}: ${names.length}`,
|
|
266
|
+
"",
|
|
267
|
+
`Core (always available, ${core.length}):`,
|
|
268
|
+
...core.map((n) => ` • ${n}${names.includes(n) ? "" : " [not currently registered]"}`),
|
|
269
|
+
];
|
|
270
|
+
if (distill.length > 0) {
|
|
271
|
+
lines.push("", `Distill-only (DISTILLATION / PRODUCTION_QC / FINALIZATION, ${distill.length}):`);
|
|
272
|
+
for (const n of distill) {
|
|
273
|
+
lines.push(` • ${n}${names.includes(n) ? "" : " [gated out of this phase]"}`);
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
lines.push("", "Tools are not separately installable — they ship with the KC release. To see what each tool does, invoke it or ask the agent.");
|
|
277
|
+
addMessage({ role: "system", content: lines.join("\n") });
|
|
278
|
+
return true;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
case "/parallelism": {
|
|
282
|
+
// B3: set parallelism at runtime. Respects the B0.6 guard —
|
|
283
|
+
// takes effect only if KC_PARALLELISM_VERIFIED is already set.
|
|
284
|
+
const n = parseInt(arg, 10);
|
|
285
|
+
if (!Number.isFinite(n) || n < 1) {
|
|
286
|
+
addMessage({
|
|
287
|
+
role: "system",
|
|
288
|
+
content:
|
|
289
|
+
`Usage: /parallelism <N> (1-8)\n` +
|
|
290
|
+
`Current: requested=${config.parallelismRequested || 1}, ` +
|
|
291
|
+
`effective=${config.effectiveParallelism?.() ?? 1}. ` +
|
|
292
|
+
(config.parallelismVerified
|
|
293
|
+
? "Verified — new value takes effect next /run."
|
|
294
|
+
: "Unverified — clamped to 1. Set KC_PARALLELISM_VERIFIED=1 after a clean 2h heap-baseline run."),
|
|
295
|
+
});
|
|
296
|
+
return true;
|
|
297
|
+
}
|
|
298
|
+
const clamped = Math.min(Math.max(n, 1), 8);
|
|
299
|
+
config.parallelismRequested = clamped;
|
|
187
300
|
addMessage({
|
|
188
301
|
role: "system",
|
|
189
302
|
content:
|
|
190
|
-
`
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
`LLM URL: ${config.llmBaseUrl}\n` +
|
|
195
|
-
`Project: ${engineRef.current.workspace.projectDir || "(none)"}\n` +
|
|
196
|
-
`Workspace: ${engineRef.current.workspace.cwd}\n` +
|
|
197
|
-
`Tools: ${engineRef.current.toolRegistry.size} registered\n` +
|
|
198
|
-
`History: ${engineRef.current.history.messages.length} messages\n` +
|
|
199
|
-
`Context: ~${stats.totalTokens} tokens (${stats.percentage}% of ${stats.limit})`,
|
|
303
|
+
`Parallelism requested=${clamped}. ` +
|
|
304
|
+
(config.parallelismVerified
|
|
305
|
+
? `Effective=${config.effectiveParallelism()} (verified).`
|
|
306
|
+
: `Effective=1 (verified flag not set — see /status).`),
|
|
200
307
|
});
|
|
201
308
|
return true;
|
|
202
309
|
}
|
|
@@ -339,10 +446,25 @@ function App({ engine, config }) {
|
|
|
339
446
|
} catch (err) {
|
|
340
447
|
addMessage({ role: "system", content: `Compact failed: ${err.message}` });
|
|
341
448
|
} finally {
|
|
449
|
+
// F8: Spinner-race fix. If a queued task is about to kick off
|
|
450
|
+
// via runTurn(next), DO NOT clear the streaming/spinner state
|
|
451
|
+
// here — runTurn's own entry sets streamingRef=true + spinner
|
|
452
|
+
// immediately, but there's a brief React-render window between
|
|
453
|
+
// our `setStreaming(false)` and its `setStreaming(true)` where
|
|
454
|
+
// the TUI paints "no spinner, no streaming" for 1-2 frames.
|
|
455
|
+
// Over long sessions that looked like a dead TUI when a user
|
|
456
|
+
// watched the moment /compact auto-chained to the next task.
|
|
457
|
+
// Order now: IF next task is queued, let runTurn(next) set all
|
|
458
|
+
// streaming state in one atomic render; we just reset the ref
|
|
459
|
+
// flags to avoid the input-is-locked issue. Otherwise do the
|
|
460
|
+
// full clear (idle-TUI case).
|
|
461
|
+
const hasQueuedWork = queueRef.current.length > 0;
|
|
342
462
|
streamingRef.current = false;
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
463
|
+
if (!hasQueuedWork) {
|
|
464
|
+
setStreaming(false);
|
|
465
|
+
setSpinnerStatus(null);
|
|
466
|
+
}
|
|
467
|
+
if (hasQueuedWork) {
|
|
346
468
|
const next = queueRef.current.shift();
|
|
347
469
|
runTurn(next);
|
|
348
470
|
}
|
|
@@ -436,8 +558,9 @@ function App({ engine, config }) {
|
|
|
436
558
|
|
|
437
559
|
case "/exit":
|
|
438
560
|
case "/quit":
|
|
439
|
-
// Save state before exit
|
|
561
|
+
// Save state + stop diagnostics before exit
|
|
440
562
|
try { engineRef.current.saveState(); } catch { /* ignore */ }
|
|
563
|
+
try { engineRef.current.stop(); } catch { /* ignore */ }
|
|
441
564
|
exit();
|
|
442
565
|
return true;
|
|
443
566
|
|
|
@@ -446,6 +569,8 @@ function App({ engine, config }) {
|
|
|
446
569
|
}
|
|
447
570
|
}, [addMessage, config, exit, updateContextStats]);
|
|
448
571
|
|
|
572
|
+
const [queueSize, setQueueSize] = useState(0); // F2: count for TUI indicator
|
|
573
|
+
|
|
449
574
|
const handleSubmit = useCallback((text) => {
|
|
450
575
|
const trimmed = text.trim();
|
|
451
576
|
setInputValue("");
|
|
@@ -460,6 +585,11 @@ function App({ engine, config }) {
|
|
|
460
585
|
|
|
461
586
|
if (streamingRef.current) {
|
|
462
587
|
queueRef.current.push(trimmed);
|
|
588
|
+
setQueueSize(queueRef.current.length); // F2
|
|
589
|
+
addMessage({
|
|
590
|
+
role: "system",
|
|
591
|
+
content: `⏳ Queued (${queueRef.current.length} waiting). Will be sent to KC on next turn boundary.`,
|
|
592
|
+
});
|
|
463
593
|
} else {
|
|
464
594
|
runTurn(trimmed);
|
|
465
595
|
}
|
|
@@ -473,15 +603,23 @@ function App({ engine, config }) {
|
|
|
473
603
|
addMessage({ role: "system", content: "[Queue cleared]" });
|
|
474
604
|
} else {
|
|
475
605
|
try { engineRef.current.saveState(); } catch { /* ignore */ }
|
|
606
|
+
try { engineRef.current.stop(); } catch { /* ignore */ }
|
|
476
607
|
exit();
|
|
477
608
|
}
|
|
478
609
|
}
|
|
479
610
|
if (key.ctrl && input === "d") {
|
|
480
611
|
try { engineRef.current.saveState(); } catch { /* ignore */ }
|
|
612
|
+
try { engineRef.current.stop(); } catch { /* ignore */ }
|
|
481
613
|
exit();
|
|
482
614
|
}
|
|
483
615
|
});
|
|
484
616
|
|
|
617
|
+
// F6: /meme overlay short-circuits the rest of the UI until dismissed.
|
|
618
|
+
// Its own useInput handler owns ESC / Enter while it's up.
|
|
619
|
+
if (showMeme) {
|
|
620
|
+
return h(MemeOverlay, { onDismiss: () => setShowMeme(false) });
|
|
621
|
+
}
|
|
622
|
+
|
|
485
623
|
return h(Box, { flexDirection: "column" },
|
|
486
624
|
// Welcome banner
|
|
487
625
|
showWelcome ? h(WelcomeBanner, {
|
|
@@ -558,11 +696,16 @@ function App({ engine, config }) {
|
|
|
558
696
|
|
|
559
697
|
// Separator + Input
|
|
560
698
|
h(HRule),
|
|
699
|
+
// F2: Input stays active during streaming. Submissions while the
|
|
700
|
+
// agent is busy get queued (handleSubmit checks streamingRef) and
|
|
701
|
+
// flushed at the next natural turn boundary. Matches Claude Code's
|
|
702
|
+
// type-ahead behavior.
|
|
561
703
|
h(InputPrompt, {
|
|
562
704
|
value: inputValue,
|
|
563
705
|
onChange: setInputValue,
|
|
564
706
|
onSubmit: handleSubmit,
|
|
565
|
-
isActive:
|
|
707
|
+
isActive: true,
|
|
708
|
+
placeholderRight: queueSize > 0 ? `(${queueSize} queued)` : null,
|
|
566
709
|
}),
|
|
567
710
|
h(HRule),
|
|
568
711
|
h(StatusBar, { sessionId, phase, contextTokens, contextLimit }),
|
|
@@ -611,8 +754,12 @@ export async function main({ languageOverride } = {}) {
|
|
|
611
754
|
|
|
612
755
|
const engine = new AgentEngine({ client, config });
|
|
613
756
|
|
|
614
|
-
// Save state on process exit
|
|
615
|
-
|
|
757
|
+
// Save state on process exit + stop background diagnostics (B0.1 heap
|
|
758
|
+
// sampler). saveState is idempotent; stop() is safe to call twice.
|
|
759
|
+
const saveOnExit = () => {
|
|
760
|
+
try { engine.saveState(); } catch { /* ignore */ }
|
|
761
|
+
try { engine.stop(); } catch { /* ignore */ }
|
|
762
|
+
};
|
|
616
763
|
process.on("SIGINT", saveOnExit);
|
|
617
764
|
process.on("SIGTERM", saveOnExit);
|
|
618
765
|
|
package/src/cli/meme.js
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import React, { useState } from "react";
|
|
2
|
+
import { Box, Text, useInput } from "ink";
|
|
3
|
+
|
|
4
|
+
const h = React.createElement;
|
|
5
|
+
|
|
6
|
+
// F6: /meme easter egg. Intentionally not listed in /help — discovery
|
|
7
|
+
// is the point. Press ESC or Enter to dismiss. Content per the v0.6.0
|
|
8
|
+
// plan (item 15) — lyrics + team credit.
|
|
9
|
+
|
|
10
|
+
const LYRICS = [
|
|
11
|
+
"I'll wait and soon",
|
|
12
|
+
"We're stranded on the beach",
|
|
13
|
+
"In our dream",
|
|
14
|
+
"We part too soon",
|
|
15
|
+
"But in our lies",
|
|
16
|
+
"There's a truth to find",
|
|
17
|
+
"The end is new",
|
|
18
|
+
"A tomorrow we must reach for",
|
|
19
|
+
"To be heard",
|
|
20
|
+
];
|
|
21
|
+
|
|
22
|
+
const TEAM = [
|
|
23
|
+
"@kitchen-engineer42", "@Xigua", "@Amelia", "@01Fish",
|
|
24
|
+
"@zyxthetroll", "@theon", "@DivisionDirectorXu",
|
|
25
|
+
"@AnselKocen", "@CarolineCRL", "@GraceGuo",
|
|
26
|
+
"@XY🌟", "@HalfM", "@GreenOrange",
|
|
27
|
+
"@LilyHuang", "@Qianlili", "@songmao",
|
|
28
|
+
"@zoezoe", "@yhhm",
|
|
29
|
+
];
|
|
30
|
+
|
|
31
|
+
export function MemeOverlay({ onDismiss }) {
|
|
32
|
+
useInput((input, key) => {
|
|
33
|
+
if (key.escape || key.return) onDismiss();
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
return h(Box, { flexDirection: "column", borderStyle: "round", borderColor: "magenta", paddingLeft: 2, paddingRight: 2, paddingTop: 1, paddingBottom: 1, marginTop: 1, marginBottom: 1 },
|
|
37
|
+
// Lyrics block
|
|
38
|
+
h(Box, { flexDirection: "column" },
|
|
39
|
+
...LYRICS.map((line, i) =>
|
|
40
|
+
h(Text, { key: `l-${i}`, color: "cyan", italic: true }, line),
|
|
41
|
+
),
|
|
42
|
+
),
|
|
43
|
+
h(Text, null, ""),
|
|
44
|
+
h(Text, { dimColor: true }, "─".repeat(60)),
|
|
45
|
+
h(Text, null, ""),
|
|
46
|
+
// Team credit
|
|
47
|
+
h(Text, { color: "yellow", bold: true },
|
|
48
|
+
"Here's to all the smart minds that are/were part of our team:"),
|
|
49
|
+
h(Text, null, ""),
|
|
50
|
+
h(Box, { flexWrap: "wrap" },
|
|
51
|
+
...TEAM.map((handle, i) =>
|
|
52
|
+
h(Text, { key: `t-${i}`, color: "green" }, `${handle}${i < TEAM.length - 1 ? ", " : ""}`),
|
|
53
|
+
),
|
|
54
|
+
),
|
|
55
|
+
h(Text, null, ""),
|
|
56
|
+
h(Text, { dimColor: true }, "Press ESC or Enter to dismiss."),
|
|
57
|
+
);
|
|
58
|
+
}
|
package/src/config.js
CHANGED
|
@@ -109,8 +109,17 @@ export function loadSettings(workspacePath) {
|
|
|
109
109
|
// Web search
|
|
110
110
|
tavilyApiKey: env.TAVILY_API_KEY || gc.tavily_api_key || "",
|
|
111
111
|
|
|
112
|
-
// Context management
|
|
113
|
-
|
|
112
|
+
// Context management — A2: prefer per-provider cap from providers.js
|
|
113
|
+
// over the generic 200000 default. KC_CONTEXT_LIMIT env still wins.
|
|
114
|
+
// gc.kc_context_limit (global config) is next. Then provider.contextLimit.
|
|
115
|
+
// Then a safe 200000 fallback for unknown/custom providers.
|
|
116
|
+
kcContextLimit: parseInt(
|
|
117
|
+
env.KC_CONTEXT_LIMIT ||
|
|
118
|
+
gc.kc_context_limit?.toString() ||
|
|
119
|
+
providerDef?.contextLimit?.toString() ||
|
|
120
|
+
"200000",
|
|
121
|
+
10,
|
|
122
|
+
),
|
|
114
123
|
toolOutputOffloadTokens: parseInt(env.TOOL_OUTPUT_OFFLOAD_TOKENS || gc.tool_output_offload_tokens?.toString() || "2000", 10),
|
|
115
124
|
toolOutputOffloadErrorTokens: parseInt(env.TOOL_OUTPUT_OFFLOAD_ERROR_TOKENS || gc.tool_output_offload_error_tokens?.toString() || "500", 10),
|
|
116
125
|
maxMessageTokens: parseInt(env.MAX_MESSAGE_TOKENS || gc.max_message_tokens?.toString() || "60000", 10),
|
|
@@ -123,8 +132,36 @@ export function loadSettings(workspacePath) {
|
|
|
123
132
|
|
|
124
133
|
// Language
|
|
125
134
|
language: env.LANGUAGE || gc.language || "en",
|
|
135
|
+
|
|
136
|
+
// B0.6: Parallel ralph-loop guard. Parallelism > 1 is a LOADED footgun
|
|
137
|
+
// until the heap-safety conformance gate (B0.7) passes. Unsetting the
|
|
138
|
+
// verified flag forces serial execution — KC_PARALLELISM_VERIFIED must
|
|
139
|
+
// be set explicitly after heap.jsonl shows a flat RSS trajectory over
|
|
140
|
+
// ≥ 2h. This prevents accidental $100+ runaway runs.
|
|
141
|
+
//
|
|
142
|
+
// Source priority (highest first): process.env (B3 CLI flag sets this)
|
|
143
|
+
// → workspace .env → global config. Parsed here; the actual effective
|
|
144
|
+
// value is computed by a helper below that downgrades to 1 if the
|
|
145
|
+
// verified flag isn't set.
|
|
146
|
+
parallelismVerified: (() => {
|
|
147
|
+
const raw = (process.env.KC_PARALLELISM_VERIFIED ||
|
|
148
|
+
env.KC_PARALLELISM_VERIFIED || gc.parallelism_verified || "").toString();
|
|
149
|
+
return raw === "1" || raw.toLowerCase() === "true";
|
|
150
|
+
})(),
|
|
151
|
+
parallelismRequested: (() => {
|
|
152
|
+
const raw = process.env.KC_PARALLELISM || env.KC_PARALLELISM || gc.parallelism;
|
|
153
|
+
const n = Number.parseInt(raw, 10);
|
|
154
|
+
if (!Number.isFinite(n) || n < 1) return 1;
|
|
155
|
+
return Math.min(n, 8); // max 8 per plan — prevents API-spend runaway
|
|
156
|
+
})(),
|
|
126
157
|
};
|
|
127
158
|
|
|
159
|
+
// Effective parallelism is silently clamped to 1 unless KC_PARALLELISM_VERIFIED
|
|
160
|
+
// is set. Callers (engine.runTaskLoop, /parallelism slash command, CLI flag)
|
|
161
|
+
// should read this instead of parallelismRequested.
|
|
162
|
+
settings.effectiveParallelism = () =>
|
|
163
|
+
settings.parallelismVerified ? settings.parallelismRequested : 1;
|
|
164
|
+
|
|
128
165
|
// Effective worker config (falls back to conductor config)
|
|
129
166
|
settings.effectiveWorkerProvider = () => settings.workerProvider || settings.provider;
|
|
130
167
|
settings.effectiveWorkerApiKey = () => settings.workerApiKey || settings.llmApiKey;
|
package/src/model-tiers.json
CHANGED
|
@@ -32,9 +32,10 @@
|
|
|
32
32
|
},
|
|
33
33
|
|
|
34
34
|
"volcanocloud": {
|
|
35
|
-
"
|
|
35
|
+
"_comment": "Coding plan (api/coding/v3) serves glm-5.1 only. Regular plan (api/v3) serves doubao/deepseek/glm-4-7. Pick conductor per the plan you onboarded.",
|
|
36
|
+
"conductor": "glm-5.1",
|
|
36
37
|
"llm": {
|
|
37
|
-
"tier1": "doubao-seed-2-0-pro-260215, deepseek-v3-2-251201",
|
|
38
|
+
"tier1": "glm-5.1, doubao-seed-2-0-pro-260215, deepseek-v3-2-251201",
|
|
38
39
|
"tier2": "glm-4-7-251222, doubao-1-5-pro-32k-250115",
|
|
39
40
|
"tier3": "doubao-seed-2-0-mini-260215",
|
|
40
41
|
"tier4": "doubao-seed-2-0-lite-260215, doubao-1-5-lite-32k-250115"
|
package/src/providers.js
CHANGED
|
@@ -28,6 +28,16 @@ function getTierConfig(providerId) {
|
|
|
28
28
|
return MODEL_TIERS[providerId] || { conductor: "", llm: {}, vlm: {} };
|
|
29
29
|
}
|
|
30
30
|
|
|
31
|
+
// A2: Per-provider context-window caps. Without these, every provider
|
|
32
|
+
// inherited the generic 200000-token default from config.js, which caused
|
|
33
|
+
// silent empty-response failures on smaller-window models (xfyun
|
|
34
|
+
// astron-code-latest behaves like it has ~32K during E2E #3). The
|
|
35
|
+
// _maybeWindowAfterToolResult threshold only fires around 70% of budget, so
|
|
36
|
+
// with a 200K budget on a 32K-limit model windowing never fires in time.
|
|
37
|
+
// These numbers are conservative minimums — users can still override via
|
|
38
|
+
// KC_CONTEXT_LIMIT env or kc_context_limit in global config.
|
|
39
|
+
const DEFAULT_CONTEXT_LIMIT = 200000;
|
|
40
|
+
|
|
31
41
|
const PROVIDERS = [
|
|
32
42
|
{
|
|
33
43
|
id: "siliconflow",
|
|
@@ -36,6 +46,7 @@ const PROVIDERS = [
|
|
|
36
46
|
authType: "bearer",
|
|
37
47
|
apiFormat: "openai",
|
|
38
48
|
modelsEndpoint: "/models",
|
|
49
|
+
contextLimit: 200000, // GLM-5.1, Kimi-K2.5 — 200K native
|
|
39
50
|
defaultModel: getTierConfig("siliconflow").conductor || "glm-5",
|
|
40
51
|
defaultTiers: getTierConfig("siliconflow").llm,
|
|
41
52
|
defaultVlm: getTierConfig("siliconflow").vlm,
|
|
@@ -54,6 +65,7 @@ const PROVIDERS = [
|
|
|
54
65
|
apiFormat: "openai",
|
|
55
66
|
modelsEndpoint: null, // Aliyun coding plan doesn't support /models
|
|
56
67
|
supportsCodingPlanKey: true,
|
|
68
|
+
contextLimit: 131072, // Qwen3.x family — 128K on the coding plan
|
|
57
69
|
defaultModel: getTierConfig("aliyun").conductor || "qwen3.6-plus",
|
|
58
70
|
defaultTiers: getTierConfig("aliyun").llm,
|
|
59
71
|
defaultVlm: getTierConfig("aliyun").vlm,
|
|
@@ -77,14 +89,21 @@ const PROVIDERS = [
|
|
|
77
89
|
{
|
|
78
90
|
id: "volcanocloud",
|
|
79
91
|
name: "VolcanoCloud",
|
|
92
|
+
// Regular Ark API — serves doubao / deepseek / glm-4-7-251222.
|
|
93
|
+
// Coding plan uses api/coding/v3 and serves glm-5.1 (aliased to glm-4.7
|
|
94
|
+
// server-side, thinking model).
|
|
80
95
|
baseUrl: "https://ark.cn-beijing.volces.com/api/v3",
|
|
96
|
+
codingPlanUrl: "https://ark.cn-beijing.volces.com/api/coding/v3",
|
|
81
97
|
authType: "bearer",
|
|
82
98
|
apiFormat: "openai",
|
|
83
|
-
modelsEndpoint: null, // VolcanoCloud
|
|
99
|
+
modelsEndpoint: null, // VolcanoCloud — use curated list
|
|
100
|
+
supportsCodingPlanKey: true,
|
|
101
|
+
contextLimit: 200000, // H2: glm-5.1 on coding plan has 200K native
|
|
84
102
|
defaultModel: getTierConfig("volcanocloud").conductor || "doubao-seed-2-0-pro-260215",
|
|
85
103
|
defaultTiers: getTierConfig("volcanocloud").llm,
|
|
86
104
|
defaultVlm: getTierConfig("volcanocloud").vlm,
|
|
87
105
|
curatedModels: [
|
|
106
|
+
{ id: "glm-5.1", ownedBy: "zhipu" },
|
|
88
107
|
{ id: "doubao-seed-2-0-pro-260215", ownedBy: "bytedance" },
|
|
89
108
|
{ id: "deepseek-v3-2-251201", ownedBy: "deepseek" },
|
|
90
109
|
{ id: "glm-4-7-251222", ownedBy: "zhipu" },
|
|
@@ -108,6 +127,10 @@ const PROVIDERS = [
|
|
|
108
127
|
authType: "bearer",
|
|
109
128
|
apiFormat: "openai",
|
|
110
129
|
modelsEndpoint: null,
|
|
130
|
+
// xfyun astron-code-latest — empirical ~32K-64K window per E2E #3. Set
|
|
131
|
+
// conservatively at 32K so windowing fires early and the provider never
|
|
132
|
+
// sees a request it will silently fail on.
|
|
133
|
+
contextLimit: 32768,
|
|
111
134
|
defaultModel: getTierConfig("xfyun").conductor || "astron-code-latest",
|
|
112
135
|
defaultTiers: getTierConfig("xfyun").llm,
|
|
113
136
|
defaultVlm: getTierConfig("xfyun").vlm,
|
|
@@ -126,6 +149,7 @@ const PROVIDERS = [
|
|
|
126
149
|
authType: "x-api-key",
|
|
127
150
|
apiFormat: "anthropic",
|
|
128
151
|
modelsEndpoint: null, // Use curated list
|
|
152
|
+
contextLimit: 400000, // Claude 4.x family — 400K on current long-context tier
|
|
129
153
|
defaultModel: getTierConfig("anthropic").conductor || "claude-sonnet-4-20250514",
|
|
130
154
|
defaultTiers: getTierConfig("anthropic").llm,
|
|
131
155
|
defaultVlm: getTierConfig("anthropic").vlm,
|
|
@@ -146,6 +170,7 @@ const PROVIDERS = [
|
|
|
146
170
|
authType: "bearer",
|
|
147
171
|
apiFormat: "openai",
|
|
148
172
|
modelsEndpoint: "/models",
|
|
173
|
+
contextLimit: 128000, // gpt-4o — 128K
|
|
149
174
|
defaultModel: getTierConfig("openai").conductor || "gpt-4o",
|
|
150
175
|
defaultTiers: getTierConfig("openai").llm,
|
|
151
176
|
defaultVlm: getTierConfig("openai").vlm,
|
|
@@ -161,6 +186,7 @@ const PROVIDERS = [
|
|
|
161
186
|
authType: "bearer",
|
|
162
187
|
apiFormat: "openai",
|
|
163
188
|
modelsEndpoint: "/models",
|
|
189
|
+
contextLimit: 200000, // GLM official (bigmodel.cn) — 200K on GLM-4.x/5.x tiers
|
|
164
190
|
defaultModel: getTierConfig("zhipu").conductor || "glm-4-plus",
|
|
165
191
|
defaultTiers: getTierConfig("zhipu").llm,
|
|
166
192
|
defaultVlm: getTierConfig("zhipu").vlm,
|
|
@@ -176,6 +202,7 @@ const PROVIDERS = [
|
|
|
176
202
|
authType: "bearer",
|
|
177
203
|
apiFormat: "openai",
|
|
178
204
|
modelsEndpoint: "/models",
|
|
205
|
+
contextLimit: 245760, // MiniMax-M2.5 — 240K
|
|
179
206
|
defaultModel: getTierConfig("minimax").conductor || "MiniMax-M2.5",
|
|
180
207
|
defaultTiers: getTierConfig("minimax").llm,
|
|
181
208
|
defaultVlm: getTierConfig("minimax").vlm,
|
|
@@ -191,6 +218,10 @@ const PROVIDERS = [
|
|
|
191
218
|
authType: "bearer",
|
|
192
219
|
apiFormat: "openai",
|
|
193
220
|
modelsEndpoint: "/models",
|
|
221
|
+
// OpenRouter proxies many models; defaulting to 200K matches the underlying
|
|
222
|
+
// frontier Anthropic/Google routes most users pick. Lower-context models
|
|
223
|
+
// behind OpenRouter will still work, just won't benefit from early windowing.
|
|
224
|
+
contextLimit: 200000,
|
|
194
225
|
defaultModel: getTierConfig("openrouter").conductor || "anthropic/claude-sonnet-4-20250514",
|
|
195
226
|
defaultTiers: getTierConfig("openrouter").llm,
|
|
196
227
|
defaultVlm: getTierConfig("openrouter").vlm,
|
|
@@ -206,6 +237,7 @@ const PROVIDERS = [
|
|
|
206
237
|
authType: "aws-sigv4",
|
|
207
238
|
apiFormat: "anthropic",
|
|
208
239
|
modelsEndpoint: null,
|
|
240
|
+
contextLimit: 200000, // Bedrock Anthropic routes mirror native Claude 200K
|
|
209
241
|
defaultModel: getTierConfig("bedrock").conductor || "anthropic.claude-sonnet-4-20250514-v1:0",
|
|
210
242
|
defaultTiers: getTierConfig("bedrock").llm,
|
|
211
243
|
defaultVlm: getTierConfig("bedrock").vlm,
|
|
@@ -259,6 +291,7 @@ const MODEL_RANKING = {
|
|
|
259
291
|
"qwen3.5-122b": 75,
|
|
260
292
|
"qwen3.5-35b": 65,
|
|
261
293
|
// Zhipu
|
|
294
|
+
"glm-5.1": 92,
|
|
262
295
|
"glm-5": 90,
|
|
263
296
|
"glm-4.7": 80,
|
|
264
297
|
"glm-4": 75,
|
|
@@ -163,12 +163,24 @@ Track three metrics per iteration to know when to stop:
|
|
|
163
163
|
|
|
164
164
|
### Stopping Criteria
|
|
165
165
|
|
|
166
|
-
Stop the loop when ALL three conditions hold for one iteration:
|
|
166
|
+
Stop the loop when **ALL** three conditions hold for one iteration:
|
|
167
167
|
|
|
168
168
|
1. Correction volume < 5% of total test cases.
|
|
169
169
|
2. New pattern count = 0.
|
|
170
170
|
3. Regression count = 0.
|
|
171
171
|
|
|
172
|
+
**OR** when the standalone accuracy-convergence condition holds (D5, added
|
|
173
|
+
2026-04-23):
|
|
174
|
+
|
|
175
|
+
4. Overall accuracy changed by less than 1% between the last two
|
|
176
|
+
iterations — i.e. `|accuracy[N+1] − accuracy[N]| < 0.01`.
|
|
177
|
+
|
|
178
|
+
Condition 4 prevents the observed over-iteration pattern of v5 → v12,
|
|
179
|
+
where each iteration oscillated within a ~0.5% accuracy window. Once the
|
|
180
|
+
model has reached "good enough," continuing burns tokens without
|
|
181
|
+
delivering real improvement. When accuracy has plateaued, proceed to the
|
|
182
|
+
next phase (distillation / production).
|
|
183
|
+
|
|
172
184
|
If correction volume *increases* between consecutive iterations, this is a regression signal. Pause the loop and diagnose before continuing — the last fix may be destabilizing the system.
|
|
173
185
|
|
|
174
186
|
### Expected Convergence
|
|
@@ -59,6 +59,80 @@ Rules will be distilled into workflows (see `skill-to-workflow`). Design with di
|
|
|
59
59
|
### Catalog Versioning
|
|
60
60
|
When rules change (additions, modifications, deprecations), version the entire rule catalog as a unit. Individual rule versions track specific rules; the catalog version tracks the coherent set. Record the catalog version in `versions.json` alongside individual rule versions.
|
|
61
61
|
|
|
62
|
+
## Granularity Calibration (read before extracting)
|
|
63
|
+
|
|
64
|
+
A well-extracted rule catalog has **10-20 rules per typical regulation PDF**
|
|
65
|
+
(a 30-80 page disclosure regulation). Over-extraction into 60-100 rules per
|
|
66
|
+
regulation signals you're treating every clause as its own rule — downstream
|
|
67
|
+
consumers (skill-authoring, workflow-run) can't distinguish meaningful
|
|
68
|
+
checks from boilerplate.
|
|
69
|
+
|
|
70
|
+
If your first pass produces more than ~25 rules for a single regulation:
|
|
71
|
+
- **Merge rules that share evidence and fail together** (e.g., "must
|
|
72
|
+
disclose X" and "must disclose Y" where both come from the same
|
|
73
|
+
required-fields table → one rule: "must disclose the required-fields
|
|
74
|
+
list including X, Y").
|
|
75
|
+
- **Drop procedural language** that isn't checkable against a report
|
|
76
|
+
(definitions, scope statements, references to other regs that just
|
|
77
|
+
transitively apply).
|
|
78
|
+
- **Keep only checkable obligations, prohibitions, and thresholds** —
|
|
79
|
+
things where you can read a sample report and say pass or fail.
|
|
80
|
+
|
|
81
|
+
### Sample "good" rule
|
|
82
|
+
|
|
83
|
+
```json
|
|
84
|
+
{
|
|
85
|
+
"id": "R014",
|
|
86
|
+
"source_ref": "Disclosure Reg §15.2",
|
|
87
|
+
"description": "Quarterly reports must be disclosed within 15 business days after quarter-end.",
|
|
88
|
+
"applicable_sections": ["public funds"],
|
|
89
|
+
"severity": "high",
|
|
90
|
+
"machine_checkable": true,
|
|
91
|
+
"falsifiability_statement": "If disclosure date is later than 15th business day after quarter-end, the rule fails.",
|
|
92
|
+
"test_case_stub": "Read the quarterly report's disclosure date + the quarter-end date, compute business-day difference."
|
|
93
|
+
}
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Note: one pass/fail outcome, a single `source_ref` to a specific clause,
|
|
97
|
+
clear applicability scope. Skill-authoring can write `check_r014.py` from
|
|
98
|
+
this alone.
|
|
99
|
+
|
|
100
|
+
### Cross-regulation dedup (when working across multiple PDFs)
|
|
101
|
+
|
|
102
|
+
If the developer user provides N regulations, rules from later regs often
|
|
103
|
+
duplicate cross-cutting requirements already captured by earlier ones
|
|
104
|
+
(e.g., a 2018 generic disclosure rule vs. a 2025 specific version).
|
|
105
|
+
Before emitting a rule from reg N:
|
|
106
|
+
|
|
107
|
+
1. **Check the existing catalog.** Use `rule_catalog` (operation: list)
|
|
108
|
+
to see what's already there. Skip if a rule with equivalent scope +
|
|
109
|
+
intent exists.
|
|
110
|
+
2. **Prefer the newer / more specific source_ref** when rules overlap.
|
|
111
|
+
3. **If you merged rules**, record the consolidated sources in
|
|
112
|
+
`source_ref`: e.g., `"New Reg §15.2 + Old Reg §24"`.
|
|
113
|
+
|
|
114
|
+
### Delegation to sub-agents
|
|
115
|
+
|
|
116
|
+
If you dispatch extraction to sub-agents (one per regulation), the
|
|
117
|
+
sub-agent inherits ONLY its `task_description` — it cannot see your
|
|
118
|
+
conversation or existing catalog. Therefore, when composing the brief:
|
|
119
|
+
|
|
120
|
+
- **Specify the target count band** explicitly: "Extract 10-20 atomic
|
|
121
|
+
rules from this regulation."
|
|
122
|
+
- **Include a sample rule** in the brief body (paste the JSON above
|
|
123
|
+
verbatim) so the sub-agent's calibration matches yours.
|
|
124
|
+
- **Name every regulation the sub-agent should process.** If AGENT.md
|
|
125
|
+
lists 10 core regulations, the brief must list all 10 by name, not
|
|
126
|
+
"the core regs" as a pronoun — LLMs composing long structured briefs
|
|
127
|
+
frequently drop items (observed in session 6304673afaa0 where reg 02
|
|
128
|
+
was silently omitted).
|
|
129
|
+
- **State the dedup contract**: "Rules already in the parent's catalog
|
|
130
|
+
(R001–Rnnn) should NOT be re-extracted. If a requirement is already
|
|
131
|
+
covered, skip it." Then pass the current catalog's ID ranges.
|
|
132
|
+
- **Prefer `rule_catalog` create operations over sandbox_exec writes to
|
|
133
|
+
catalog.json.** rule_catalog uses workspace file locking;
|
|
134
|
+
sandbox_exec bypasses it and races with other writers.
|
|
135
|
+
|
|
62
136
|
## Extraction Strategies
|
|
63
137
|
|
|
64
138
|
### Strategy 1: Structured Input (Developer User Provides Rules)
|