kc-beta 0.5.6 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/QUICKSTART.md +17 -4
- package/README.md +58 -11
- package/bin/kc-beta.js +35 -1
- package/package.json +1 -1
- package/src/agent/bundle-tree.js +553 -0
- package/src/agent/context.js +40 -1
- package/src/agent/engine.js +644 -28
- package/src/agent/llm-client.js +67 -18
- package/src/agent/pipelines/finalization.js +186 -0
- package/src/agent/pipelines/index.js +8 -0
- package/src/agent/pipelines/initializer.js +40 -0
- package/src/agent/pipelines/skill-authoring.js +100 -6
- package/src/agent/skill-loader.js +54 -4
- package/src/agent/task-manager.js +66 -3
- package/src/agent/tools/agent-tool.js +283 -35
- package/src/agent/tools/bundle-search.js +146 -0
- package/src/agent/tools/document-chunk.js +246 -0
- package/src/agent/tools/document-classify.js +311 -0
- package/src/agent/tools/document-parse.js +8 -1
- package/src/agent/tools/phase-advance.js +30 -7
- package/src/agent/tools/registry.js +10 -0
- package/src/agent/tools/rule-catalog.js +17 -3
- package/src/agent/tools/sandbox-exec.js +30 -0
- package/src/agent/workspace.js +168 -14
- package/src/cli/components.js +165 -17
- package/src/cli/index.js +166 -19
- package/src/cli/meme.js +58 -0
- package/src/config.js +39 -2
- package/src/providers.js +26 -0
- package/template/skills/en/meta-meta/evolution-loop/SKILL.md +13 -1
- package/template/skills/en/meta-meta/rule-extraction/SKILL.md +74 -0
- package/template/skills/zh/meta-meta/evolution-loop/SKILL.md +7 -1
- package/template/skills/zh/meta-meta/rule-extraction/SKILL.md +73 -0
package/src/cli/index.js
CHANGED
|
@@ -15,6 +15,7 @@ import {
|
|
|
15
15
|
HRule,
|
|
16
16
|
InputPrompt,
|
|
17
17
|
} from "./components.js";
|
|
18
|
+
import { MemeOverlay } from "./meme.js"; // F6
|
|
18
19
|
|
|
19
20
|
const h = React.createElement;
|
|
20
21
|
|
|
@@ -30,6 +31,17 @@ const VISIBLE_WINDOW = 50;
|
|
|
30
31
|
// Older ToolBlocks show header only. Both still persist full output to disk.
|
|
31
32
|
const RECENT_TOOL_WINDOW = 10;
|
|
32
33
|
|
|
34
|
+
// B0.3: Hard cap on the React `messages` array. Without this, the array
|
|
35
|
+
// grows forever (setMessages((prev) => [...prev, msg]) via addMessage) —
|
|
36
|
+
// the VISIBLE_WINDOW virtualization hides old entries from render but
|
|
37
|
+
// they still sit in state. Over a 17 h session with 2-4 messages per
|
|
38
|
+
// turn, that's 1000s of entries holding tool-result digest strings and
|
|
39
|
+
// pipeline messages. /compact resets messages to a 1-item summary, so
|
|
40
|
+
// this cap is really a safety net between compacts. On cap hit, drop
|
|
41
|
+
// oldest non-system entries (system messages carry session-level
|
|
42
|
+
// context — pipeline transitions, errors — that users want retained).
|
|
43
|
+
const MAX_RETAINED_MESSAGES = 500;
|
|
44
|
+
|
|
33
45
|
/**
|
|
34
46
|
* Main KC Agent CLI App using Ink (React for terminals).
|
|
35
47
|
*/
|
|
@@ -43,6 +55,7 @@ function App({ engine, config }) {
|
|
|
43
55
|
const [sessionId, setSessionId] = useState(engine.workspace.sessionId);
|
|
44
56
|
const [phase, setPhase] = useState(engine.currentPhase);
|
|
45
57
|
const [showWelcome, setShowWelcome] = useState(true);
|
|
58
|
+
const [showMeme, setShowMeme] = useState(false); // F6
|
|
46
59
|
const [spinnerStatus, setSpinnerStatus] = useState(null);
|
|
47
60
|
const [contextTokens, setContextTokens] = useState(0);
|
|
48
61
|
const [contextLimit, setContextLimit] = useState(config.kcContextLimit || 200000);
|
|
@@ -63,7 +76,16 @@ function App({ engine, config }) {
|
|
|
63
76
|
}, []);
|
|
64
77
|
|
|
65
78
|
const addMessage = useCallback((msg) => {
|
|
66
|
-
setMessages((prev) =>
|
|
79
|
+
setMessages((prev) => {
|
|
80
|
+
if (prev.length < MAX_RETAINED_MESSAGES) return [...prev, msg];
|
|
81
|
+
// Cap hit: drop the oldest non-system entry. If everything is system
|
|
82
|
+
// (unlikely but possible), fall back to dropping the very oldest.
|
|
83
|
+
const dropIdx = prev.findIndex((m) => m.role !== "system");
|
|
84
|
+
const next = dropIdx >= 0
|
|
85
|
+
? [...prev.slice(0, dropIdx), ...prev.slice(dropIdx + 1), msg]
|
|
86
|
+
: [...prev.slice(1), msg];
|
|
87
|
+
return next;
|
|
88
|
+
});
|
|
67
89
|
}, []);
|
|
68
90
|
|
|
69
91
|
const runTurn = useCallback(async (text) => {
|
|
@@ -76,7 +98,9 @@ function App({ engine, config }) {
|
|
|
76
98
|
let accumulated = "";
|
|
77
99
|
|
|
78
100
|
try {
|
|
79
|
-
for await (const event of engineRef.current.runTaskLoop(text
|
|
101
|
+
for await (const event of engineRef.current.runTaskLoop(text, {
|
|
102
|
+
parallelism: config.effectiveParallelism?.() ?? 1,
|
|
103
|
+
})) {
|
|
80
104
|
switch (event.type) {
|
|
81
105
|
case "text_delta":
|
|
82
106
|
accumulated += event.text ?? "";
|
|
@@ -117,6 +141,13 @@ function App({ engine, config }) {
|
|
|
117
141
|
});
|
|
118
142
|
setCurrentTool(null);
|
|
119
143
|
setSpinnerStatus("Analyzing results...");
|
|
144
|
+
// H4: Refresh the CTX indicator after every tool_result. Without
|
|
145
|
+
// this, contextTokens only updates on turn_complete, which never
|
|
146
|
+
// fires in long tool-heavy sessions — we observed 908 events with
|
|
147
|
+
// zero turn_complete in session 6304673afaa0, CTX stuck at 0/131k
|
|
148
|
+
// for 30+ minutes. getContextStats() is a cheap pure calc over
|
|
149
|
+
// the history array; safe to call on every tool call.
|
|
150
|
+
updateContextStats();
|
|
120
151
|
break;
|
|
121
152
|
|
|
122
153
|
case "pipeline_event": {
|
|
@@ -153,7 +184,10 @@ function App({ engine, config }) {
|
|
|
153
184
|
// Process queue
|
|
154
185
|
if (queueRef.current.length > 0) {
|
|
155
186
|
const next = queueRef.current.shift();
|
|
187
|
+
setQueueSize(queueRef.current.length); // F2
|
|
156
188
|
runTurn(next);
|
|
189
|
+
} else {
|
|
190
|
+
setQueueSize(0); // F2
|
|
157
191
|
}
|
|
158
192
|
}, [addMessage, updateContextStats]);
|
|
159
193
|
|
|
@@ -173,6 +207,8 @@ function App({ engine, config }) {
|
|
|
173
207
|
" /tasks Show task progress\n" +
|
|
174
208
|
" /phase [sub] advance | status | <name> — manual phase override\n" +
|
|
175
209
|
" /schedule Show scheduled ingestion jobs and recent log lines\n" +
|
|
210
|
+
" /tools List all registered tools and which phase gates them\n" +
|
|
211
|
+
" /parallelism [N] Show or set parallel ralph-loop worker count (1-8)\n" +
|
|
176
212
|
" /clear Clear conversation history (keep workspace)\n" +
|
|
177
213
|
" /compact Summarize older messages to reduce context\n" +
|
|
178
214
|
" /sessions List all sessions\n" +
|
|
@@ -184,19 +220,90 @@ function App({ engine, config }) {
|
|
|
184
220
|
|
|
185
221
|
case "/status": {
|
|
186
222
|
const stats = engineRef.current.getContextStats();
|
|
223
|
+
const par = config.effectiveParallelism?.() ?? 1;
|
|
224
|
+
const parLine = par > 1
|
|
225
|
+
? `${par} (verified)`
|
|
226
|
+
: `${config.parallelismRequested || 1} requested` +
|
|
227
|
+
(config.parallelismRequested > 1 && !config.parallelismVerified
|
|
228
|
+
? ` — clamped to 1 (KC_PARALLELISM_VERIFIED not set; run heap baseline first)`
|
|
229
|
+
: "");
|
|
230
|
+
addMessage({
|
|
231
|
+
role: "system",
|
|
232
|
+
content:
|
|
233
|
+
`Session: ${engineRef.current.workspace.sessionId}\n` +
|
|
234
|
+
`Phase: ${engineRef.current.currentPhase.toUpperCase()}\n` +
|
|
235
|
+
`Model: ${config.kcModel}\n` +
|
|
236
|
+
`Provider: ${config.provider || "unknown"}\n` +
|
|
237
|
+
`LLM URL: ${config.llmBaseUrl}\n` +
|
|
238
|
+
`Project: ${engineRef.current.workspace.projectDir || "(none)"}\n` +
|
|
239
|
+
`Workspace: ${engineRef.current.workspace.cwd}\n` +
|
|
240
|
+
`Tools: ${engineRef.current.toolRegistry.size} registered\n` +
|
|
241
|
+
`History: ${engineRef.current.history.messages.length} messages\n` +
|
|
242
|
+
`Context: ~${stats.totalTokens} tokens (${stats.percentage}% of ${stats.limit})\n` +
|
|
243
|
+
`Parallelism: ${parLine}`,
|
|
244
|
+
});
|
|
245
|
+
return true;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
case "/meme":
|
|
249
|
+
// F6: easter egg. Not in /help.
|
|
250
|
+
setShowMeme(true);
|
|
251
|
+
return true;
|
|
252
|
+
|
|
253
|
+
case "/tools": {
|
|
254
|
+
// F5: list all registered tools + which phase gates them. Reads
|
|
255
|
+
// from the live toolRegistry so what you see is what the agent
|
|
256
|
+
// currently has available. Also names the distill-only tools
|
|
257
|
+
// explicitly so users understand why some tools "come and go"
|
|
258
|
+
// as phases advance.
|
|
259
|
+
const reg = engineRef.current.toolRegistry;
|
|
260
|
+
const names = reg?.names?.() || [];
|
|
261
|
+
const core = engineRef.current._buildTools?.core?.map((t) => t?.name).filter(Boolean) || [];
|
|
262
|
+
const distill = engineRef.current._buildTools?.distill?.map((t) => t?.name).filter(Boolean) || [];
|
|
263
|
+
const phase = engineRef.current.currentPhase.toUpperCase();
|
|
264
|
+
const lines = [
|
|
265
|
+
`Tools registered for phase ${phase}: ${names.length}`,
|
|
266
|
+
"",
|
|
267
|
+
`Core (always available, ${core.length}):`,
|
|
268
|
+
...core.map((n) => ` • ${n}${names.includes(n) ? "" : " [not currently registered]"}`),
|
|
269
|
+
];
|
|
270
|
+
if (distill.length > 0) {
|
|
271
|
+
lines.push("", `Distill-only (DISTILLATION / PRODUCTION_QC / FINALIZATION, ${distill.length}):`);
|
|
272
|
+
for (const n of distill) {
|
|
273
|
+
lines.push(` • ${n}${names.includes(n) ? "" : " [gated out of this phase]"}`);
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
lines.push("", "Tools are not separately installable — they ship with the KC release. To see what each tool does, invoke it or ask the agent.");
|
|
277
|
+
addMessage({ role: "system", content: lines.join("\n") });
|
|
278
|
+
return true;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
case "/parallelism": {
|
|
282
|
+
// B3: set parallelism at runtime. Respects the B0.6 guard —
|
|
283
|
+
// takes effect only if KC_PARALLELISM_VERIFIED is already set.
|
|
284
|
+
const n = parseInt(arg, 10);
|
|
285
|
+
if (!Number.isFinite(n) || n < 1) {
|
|
286
|
+
addMessage({
|
|
287
|
+
role: "system",
|
|
288
|
+
content:
|
|
289
|
+
`Usage: /parallelism <N> (1-8)\n` +
|
|
290
|
+
`Current: requested=${config.parallelismRequested || 1}, ` +
|
|
291
|
+
`effective=${config.effectiveParallelism?.() ?? 1}. ` +
|
|
292
|
+
(config.parallelismVerified
|
|
293
|
+
? "Verified — new value takes effect next /run."
|
|
294
|
+
: "Unverified — clamped to 1. Set KC_PARALLELISM_VERIFIED=1 after a clean 2h heap-baseline run."),
|
|
295
|
+
});
|
|
296
|
+
return true;
|
|
297
|
+
}
|
|
298
|
+
const clamped = Math.min(Math.max(n, 1), 8);
|
|
299
|
+
config.parallelismRequested = clamped;
|
|
187
300
|
addMessage({
|
|
188
301
|
role: "system",
|
|
189
302
|
content:
|
|
190
|
-
`
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
`LLM URL: ${config.llmBaseUrl}\n` +
|
|
195
|
-
`Project: ${engineRef.current.workspace.projectDir || "(none)"}\n` +
|
|
196
|
-
`Workspace: ${engineRef.current.workspace.cwd}\n` +
|
|
197
|
-
`Tools: ${engineRef.current.toolRegistry.size} registered\n` +
|
|
198
|
-
`History: ${engineRef.current.history.messages.length} messages\n` +
|
|
199
|
-
`Context: ~${stats.totalTokens} tokens (${stats.percentage}% of ${stats.limit})`,
|
|
303
|
+
`Parallelism requested=${clamped}. ` +
|
|
304
|
+
(config.parallelismVerified
|
|
305
|
+
? `Effective=${config.effectiveParallelism()} (verified).`
|
|
306
|
+
: `Effective=1 (verified flag not set — see /status).`),
|
|
200
307
|
});
|
|
201
308
|
return true;
|
|
202
309
|
}
|
|
@@ -339,10 +446,25 @@ function App({ engine, config }) {
|
|
|
339
446
|
} catch (err) {
|
|
340
447
|
addMessage({ role: "system", content: `Compact failed: ${err.message}` });
|
|
341
448
|
} finally {
|
|
449
|
+
// F8: Spinner-race fix. If a queued task is about to kick off
|
|
450
|
+
// via runTurn(next), DO NOT clear the streaming/spinner state
|
|
451
|
+
// here — runTurn's own entry sets streamingRef=true + spinner
|
|
452
|
+
// immediately, but there's a brief React-render window between
|
|
453
|
+
// our `setStreaming(false)` and its `setStreaming(true)` where
|
|
454
|
+
// the TUI paints "no spinner, no streaming" for 1-2 frames.
|
|
455
|
+
// Over long sessions that looked like a dead TUI when a user
|
|
456
|
+
// watched the moment /compact auto-chained to the next task.
|
|
457
|
+
// Order now: IF next task is queued, let runTurn(next) set all
|
|
458
|
+
// streaming state in one atomic render; we just reset the ref
|
|
459
|
+
// flags to avoid the input-is-locked issue. Otherwise do the
|
|
460
|
+
// full clear (idle-TUI case).
|
|
461
|
+
const hasQueuedWork = queueRef.current.length > 0;
|
|
342
462
|
streamingRef.current = false;
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
463
|
+
if (!hasQueuedWork) {
|
|
464
|
+
setStreaming(false);
|
|
465
|
+
setSpinnerStatus(null);
|
|
466
|
+
}
|
|
467
|
+
if (hasQueuedWork) {
|
|
346
468
|
const next = queueRef.current.shift();
|
|
347
469
|
runTurn(next);
|
|
348
470
|
}
|
|
@@ -436,8 +558,9 @@ function App({ engine, config }) {
|
|
|
436
558
|
|
|
437
559
|
case "/exit":
|
|
438
560
|
case "/quit":
|
|
439
|
-
// Save state before exit
|
|
561
|
+
// Save state + stop diagnostics before exit
|
|
440
562
|
try { engineRef.current.saveState(); } catch { /* ignore */ }
|
|
563
|
+
try { engineRef.current.stop(); } catch { /* ignore */ }
|
|
441
564
|
exit();
|
|
442
565
|
return true;
|
|
443
566
|
|
|
@@ -446,6 +569,8 @@ function App({ engine, config }) {
|
|
|
446
569
|
}
|
|
447
570
|
}, [addMessage, config, exit, updateContextStats]);
|
|
448
571
|
|
|
572
|
+
const [queueSize, setQueueSize] = useState(0); // F2: count for TUI indicator
|
|
573
|
+
|
|
449
574
|
const handleSubmit = useCallback((text) => {
|
|
450
575
|
const trimmed = text.trim();
|
|
451
576
|
setInputValue("");
|
|
@@ -460,6 +585,11 @@ function App({ engine, config }) {
|
|
|
460
585
|
|
|
461
586
|
if (streamingRef.current) {
|
|
462
587
|
queueRef.current.push(trimmed);
|
|
588
|
+
setQueueSize(queueRef.current.length); // F2
|
|
589
|
+
addMessage({
|
|
590
|
+
role: "system",
|
|
591
|
+
content: `⏳ Queued (${queueRef.current.length} waiting). Will be sent to KC on next turn boundary.`,
|
|
592
|
+
});
|
|
463
593
|
} else {
|
|
464
594
|
runTurn(trimmed);
|
|
465
595
|
}
|
|
@@ -473,15 +603,23 @@ function App({ engine, config }) {
|
|
|
473
603
|
addMessage({ role: "system", content: "[Queue cleared]" });
|
|
474
604
|
} else {
|
|
475
605
|
try { engineRef.current.saveState(); } catch { /* ignore */ }
|
|
606
|
+
try { engineRef.current.stop(); } catch { /* ignore */ }
|
|
476
607
|
exit();
|
|
477
608
|
}
|
|
478
609
|
}
|
|
479
610
|
if (key.ctrl && input === "d") {
|
|
480
611
|
try { engineRef.current.saveState(); } catch { /* ignore */ }
|
|
612
|
+
try { engineRef.current.stop(); } catch { /* ignore */ }
|
|
481
613
|
exit();
|
|
482
614
|
}
|
|
483
615
|
});
|
|
484
616
|
|
|
617
|
+
// F6: /meme overlay short-circuits the rest of the UI until dismissed.
|
|
618
|
+
// Its own useInput handler owns ESC / Enter while it's up.
|
|
619
|
+
if (showMeme) {
|
|
620
|
+
return h(MemeOverlay, { onDismiss: () => setShowMeme(false) });
|
|
621
|
+
}
|
|
622
|
+
|
|
485
623
|
return h(Box, { flexDirection: "column" },
|
|
486
624
|
// Welcome banner
|
|
487
625
|
showWelcome ? h(WelcomeBanner, {
|
|
@@ -558,11 +696,16 @@ function App({ engine, config }) {
|
|
|
558
696
|
|
|
559
697
|
// Separator + Input
|
|
560
698
|
h(HRule),
|
|
699
|
+
// F2: Input stays active during streaming. Submissions while the
|
|
700
|
+
// agent is busy get queued (handleSubmit checks streamingRef) and
|
|
701
|
+
// flushed at the next natural turn boundary. Matches Claude Code's
|
|
702
|
+
// type-ahead behavior.
|
|
561
703
|
h(InputPrompt, {
|
|
562
704
|
value: inputValue,
|
|
563
705
|
onChange: setInputValue,
|
|
564
706
|
onSubmit: handleSubmit,
|
|
565
|
-
isActive:
|
|
707
|
+
isActive: true,
|
|
708
|
+
placeholderRight: queueSize > 0 ? `(${queueSize} queued)` : null,
|
|
566
709
|
}),
|
|
567
710
|
h(HRule),
|
|
568
711
|
h(StatusBar, { sessionId, phase, contextTokens, contextLimit }),
|
|
@@ -611,8 +754,12 @@ export async function main({ languageOverride } = {}) {
|
|
|
611
754
|
|
|
612
755
|
const engine = new AgentEngine({ client, config });
|
|
613
756
|
|
|
614
|
-
// Save state on process exit
|
|
615
|
-
|
|
757
|
+
// Save state on process exit + stop background diagnostics (B0.1 heap
|
|
758
|
+
// sampler). saveState is idempotent; stop() is safe to call twice.
|
|
759
|
+
const saveOnExit = () => {
|
|
760
|
+
try { engine.saveState(); } catch { /* ignore */ }
|
|
761
|
+
try { engine.stop(); } catch { /* ignore */ }
|
|
762
|
+
};
|
|
616
763
|
process.on("SIGINT", saveOnExit);
|
|
617
764
|
process.on("SIGTERM", saveOnExit);
|
|
618
765
|
|
package/src/cli/meme.js
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import React, { useState } from "react";
|
|
2
|
+
import { Box, Text, useInput } from "ink";
|
|
3
|
+
|
|
4
|
+
const h = React.createElement;
|
|
5
|
+
|
|
6
|
+
// F6: /meme easter egg. Intentionally not listed in /help — discovery
|
|
7
|
+
// is the point. Press ESC or Enter to dismiss. Content per the v0.6.0
|
|
8
|
+
// plan (item 15) — lyrics + team credit.
|
|
9
|
+
|
|
10
|
+
const LYRICS = [
|
|
11
|
+
"I'll wait and soon",
|
|
12
|
+
"We're stranded on the beach",
|
|
13
|
+
"In our dream",
|
|
14
|
+
"We part too soon",
|
|
15
|
+
"But in our lies",
|
|
16
|
+
"There's a truth to find",
|
|
17
|
+
"The end is new",
|
|
18
|
+
"A tomorrow we must reach for",
|
|
19
|
+
"To be heard",
|
|
20
|
+
];
|
|
21
|
+
|
|
22
|
+
const TEAM = [
|
|
23
|
+
"@kitchen-engineer42", "@Xigua", "@Amelia", "@01Fish",
|
|
24
|
+
"@zyxthetroll", "@theon", "@DivisionDirectorXu",
|
|
25
|
+
"@AnselKocen", "@CarolineCRL", "@GraceGuo",
|
|
26
|
+
"@XY🌟", "@HalfM", "@GreenOrange",
|
|
27
|
+
"@LilyHuang", "@Qianlili", "@songmao",
|
|
28
|
+
"@zoezoe", "@yhhm",
|
|
29
|
+
];
|
|
30
|
+
|
|
31
|
+
export function MemeOverlay({ onDismiss }) {
|
|
32
|
+
useInput((input, key) => {
|
|
33
|
+
if (key.escape || key.return) onDismiss();
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
return h(Box, { flexDirection: "column", borderStyle: "round", borderColor: "magenta", paddingLeft: 2, paddingRight: 2, paddingTop: 1, paddingBottom: 1, marginTop: 1, marginBottom: 1 },
|
|
37
|
+
// Lyrics block
|
|
38
|
+
h(Box, { flexDirection: "column" },
|
|
39
|
+
...LYRICS.map((line, i) =>
|
|
40
|
+
h(Text, { key: `l-${i}`, color: "cyan", italic: true }, line),
|
|
41
|
+
),
|
|
42
|
+
),
|
|
43
|
+
h(Text, null, ""),
|
|
44
|
+
h(Text, { dimColor: true }, "─".repeat(60)),
|
|
45
|
+
h(Text, null, ""),
|
|
46
|
+
// Team credit
|
|
47
|
+
h(Text, { color: "yellow", bold: true },
|
|
48
|
+
"Here's to all the smart minds that are/were part of our team:"),
|
|
49
|
+
h(Text, null, ""),
|
|
50
|
+
h(Box, { flexWrap: "wrap" },
|
|
51
|
+
...TEAM.map((handle, i) =>
|
|
52
|
+
h(Text, { key: `t-${i}`, color: "green" }, `${handle}${i < TEAM.length - 1 ? ", " : ""}`),
|
|
53
|
+
),
|
|
54
|
+
),
|
|
55
|
+
h(Text, null, ""),
|
|
56
|
+
h(Text, { dimColor: true }, "Press ESC or Enter to dismiss."),
|
|
57
|
+
);
|
|
58
|
+
}
|
package/src/config.js
CHANGED
|
@@ -109,8 +109,17 @@ export function loadSettings(workspacePath) {
|
|
|
109
109
|
// Web search
|
|
110
110
|
tavilyApiKey: env.TAVILY_API_KEY || gc.tavily_api_key || "",
|
|
111
111
|
|
|
112
|
-
// Context management
|
|
113
|
-
|
|
112
|
+
// Context management — A2: prefer per-provider cap from providers.js
|
|
113
|
+
// over the generic 200000 default. KC_CONTEXT_LIMIT env still wins.
|
|
114
|
+
// gc.kc_context_limit (global config) is next. Then provider.contextLimit.
|
|
115
|
+
// Then a safe 200000 fallback for unknown/custom providers.
|
|
116
|
+
kcContextLimit: parseInt(
|
|
117
|
+
env.KC_CONTEXT_LIMIT ||
|
|
118
|
+
gc.kc_context_limit?.toString() ||
|
|
119
|
+
providerDef?.contextLimit?.toString() ||
|
|
120
|
+
"200000",
|
|
121
|
+
10,
|
|
122
|
+
),
|
|
114
123
|
toolOutputOffloadTokens: parseInt(env.TOOL_OUTPUT_OFFLOAD_TOKENS || gc.tool_output_offload_tokens?.toString() || "2000", 10),
|
|
115
124
|
toolOutputOffloadErrorTokens: parseInt(env.TOOL_OUTPUT_OFFLOAD_ERROR_TOKENS || gc.tool_output_offload_error_tokens?.toString() || "500", 10),
|
|
116
125
|
maxMessageTokens: parseInt(env.MAX_MESSAGE_TOKENS || gc.max_message_tokens?.toString() || "60000", 10),
|
|
@@ -123,8 +132,36 @@ export function loadSettings(workspacePath) {
|
|
|
123
132
|
|
|
124
133
|
// Language
|
|
125
134
|
language: env.LANGUAGE || gc.language || "en",
|
|
135
|
+
|
|
136
|
+
// B0.6: Parallel ralph-loop guard. Parallelism > 1 is a LOADED footgun
|
|
137
|
+
// until the heap-safety conformance gate (B0.7) passes. Unsetting the
|
|
138
|
+
// verified flag forces serial execution — KC_PARALLELISM_VERIFIED must
|
|
139
|
+
// be set explicitly after heap.jsonl shows a flat RSS trajectory over
|
|
140
|
+
// ≥ 2h. This prevents accidental $100+ runaway runs.
|
|
141
|
+
//
|
|
142
|
+
// Source priority (highest first): process.env (B3 CLI flag sets this)
|
|
143
|
+
// → workspace .env → global config. Parsed here; the actual effective
|
|
144
|
+
// value is computed by a helper below that downgrades to 1 if the
|
|
145
|
+
// verified flag isn't set.
|
|
146
|
+
parallelismVerified: (() => {
|
|
147
|
+
const raw = (process.env.KC_PARALLELISM_VERIFIED ||
|
|
148
|
+
env.KC_PARALLELISM_VERIFIED || gc.parallelism_verified || "").toString();
|
|
149
|
+
return raw === "1" || raw.toLowerCase() === "true";
|
|
150
|
+
})(),
|
|
151
|
+
parallelismRequested: (() => {
|
|
152
|
+
const raw = process.env.KC_PARALLELISM || env.KC_PARALLELISM || gc.parallelism;
|
|
153
|
+
const n = Number.parseInt(raw, 10);
|
|
154
|
+
if (!Number.isFinite(n) || n < 1) return 1;
|
|
155
|
+
return Math.min(n, 8); // max 8 per plan — prevents API-spend runaway
|
|
156
|
+
})(),
|
|
126
157
|
};
|
|
127
158
|
|
|
159
|
+
// Effective parallelism is silently clamped to 1 unless KC_PARALLELISM_VERIFIED
|
|
160
|
+
// is set. Callers (engine.runTaskLoop, /parallelism slash command, CLI flag)
|
|
161
|
+
// should read this instead of parallelismRequested.
|
|
162
|
+
settings.effectiveParallelism = () =>
|
|
163
|
+
settings.parallelismVerified ? settings.parallelismRequested : 1;
|
|
164
|
+
|
|
128
165
|
// Effective worker config (falls back to conductor config)
|
|
129
166
|
settings.effectiveWorkerProvider = () => settings.workerProvider || settings.provider;
|
|
130
167
|
settings.effectiveWorkerApiKey = () => settings.workerApiKey || settings.llmApiKey;
|
package/src/providers.js
CHANGED
|
@@ -28,6 +28,16 @@ function getTierConfig(providerId) {
|
|
|
28
28
|
return MODEL_TIERS[providerId] || { conductor: "", llm: {}, vlm: {} };
|
|
29
29
|
}
|
|
30
30
|
|
|
31
|
+
// A2: Per-provider context-window caps. Without these, every provider
|
|
32
|
+
// inherited the generic 200000-token default from config.js, which caused
|
|
33
|
+
// silent empty-response failures on smaller-window models (xfyun
|
|
34
|
+
// astron-code-latest behaves like it has ~32K during E2E #3). The
|
|
35
|
+
// _maybeWindowAfterToolResult threshold only fires around 70% of budget, so
|
|
36
|
+
// with a 200K budget on a 32K-limit model windowing never fires in time.
|
|
37
|
+
// These numbers are conservative minimums — users can still override via
|
|
38
|
+
// KC_CONTEXT_LIMIT env or kc_context_limit in global config.
|
|
39
|
+
const DEFAULT_CONTEXT_LIMIT = 200000;
|
|
40
|
+
|
|
31
41
|
const PROVIDERS = [
|
|
32
42
|
{
|
|
33
43
|
id: "siliconflow",
|
|
@@ -36,6 +46,7 @@ const PROVIDERS = [
|
|
|
36
46
|
authType: "bearer",
|
|
37
47
|
apiFormat: "openai",
|
|
38
48
|
modelsEndpoint: "/models",
|
|
49
|
+
contextLimit: 200000, // GLM-5.1, Kimi-K2.5 — 200K native
|
|
39
50
|
defaultModel: getTierConfig("siliconflow").conductor || "glm-5",
|
|
40
51
|
defaultTiers: getTierConfig("siliconflow").llm,
|
|
41
52
|
defaultVlm: getTierConfig("siliconflow").vlm,
|
|
@@ -54,6 +65,7 @@ const PROVIDERS = [
|
|
|
54
65
|
apiFormat: "openai",
|
|
55
66
|
modelsEndpoint: null, // Aliyun coding plan doesn't support /models
|
|
56
67
|
supportsCodingPlanKey: true,
|
|
68
|
+
contextLimit: 131072, // Qwen3.x family — 128K on the coding plan
|
|
57
69
|
defaultModel: getTierConfig("aliyun").conductor || "qwen3.6-plus",
|
|
58
70
|
defaultTiers: getTierConfig("aliyun").llm,
|
|
59
71
|
defaultVlm: getTierConfig("aliyun").vlm,
|
|
@@ -86,6 +98,7 @@ const PROVIDERS = [
|
|
|
86
98
|
apiFormat: "openai",
|
|
87
99
|
modelsEndpoint: null, // VolcanoCloud — use curated list
|
|
88
100
|
supportsCodingPlanKey: true,
|
|
101
|
+
contextLimit: 200000, // H2: glm-5.1 on coding plan has 200K native
|
|
89
102
|
defaultModel: getTierConfig("volcanocloud").conductor || "doubao-seed-2-0-pro-260215",
|
|
90
103
|
defaultTiers: getTierConfig("volcanocloud").llm,
|
|
91
104
|
defaultVlm: getTierConfig("volcanocloud").vlm,
|
|
@@ -114,6 +127,10 @@ const PROVIDERS = [
|
|
|
114
127
|
authType: "bearer",
|
|
115
128
|
apiFormat: "openai",
|
|
116
129
|
modelsEndpoint: null,
|
|
130
|
+
// xfyun astron-code-latest — empirical ~32K-64K window per E2E #3. Set
|
|
131
|
+
// conservatively at 32K so windowing fires early and the provider never
|
|
132
|
+
// sees a request it will silently fail on.
|
|
133
|
+
contextLimit: 32768,
|
|
117
134
|
defaultModel: getTierConfig("xfyun").conductor || "astron-code-latest",
|
|
118
135
|
defaultTiers: getTierConfig("xfyun").llm,
|
|
119
136
|
defaultVlm: getTierConfig("xfyun").vlm,
|
|
@@ -132,6 +149,7 @@ const PROVIDERS = [
|
|
|
132
149
|
authType: "x-api-key",
|
|
133
150
|
apiFormat: "anthropic",
|
|
134
151
|
modelsEndpoint: null, // Use curated list
|
|
152
|
+
contextLimit: 400000, // Claude 4.x family — 400K on current long-context tier
|
|
135
153
|
defaultModel: getTierConfig("anthropic").conductor || "claude-sonnet-4-20250514",
|
|
136
154
|
defaultTiers: getTierConfig("anthropic").llm,
|
|
137
155
|
defaultVlm: getTierConfig("anthropic").vlm,
|
|
@@ -152,6 +170,7 @@ const PROVIDERS = [
|
|
|
152
170
|
authType: "bearer",
|
|
153
171
|
apiFormat: "openai",
|
|
154
172
|
modelsEndpoint: "/models",
|
|
173
|
+
contextLimit: 128000, // gpt-4o — 128K
|
|
155
174
|
defaultModel: getTierConfig("openai").conductor || "gpt-4o",
|
|
156
175
|
defaultTiers: getTierConfig("openai").llm,
|
|
157
176
|
defaultVlm: getTierConfig("openai").vlm,
|
|
@@ -167,6 +186,7 @@ const PROVIDERS = [
|
|
|
167
186
|
authType: "bearer",
|
|
168
187
|
apiFormat: "openai",
|
|
169
188
|
modelsEndpoint: "/models",
|
|
189
|
+
contextLimit: 200000, // GLM official (bigmodel.cn) — 200K on GLM-4.x/5.x tiers
|
|
170
190
|
defaultModel: getTierConfig("zhipu").conductor || "glm-4-plus",
|
|
171
191
|
defaultTiers: getTierConfig("zhipu").llm,
|
|
172
192
|
defaultVlm: getTierConfig("zhipu").vlm,
|
|
@@ -182,6 +202,7 @@ const PROVIDERS = [
|
|
|
182
202
|
authType: "bearer",
|
|
183
203
|
apiFormat: "openai",
|
|
184
204
|
modelsEndpoint: "/models",
|
|
205
|
+
contextLimit: 245760, // MiniMax-M2.5 — 240K
|
|
185
206
|
defaultModel: getTierConfig("minimax").conductor || "MiniMax-M2.5",
|
|
186
207
|
defaultTiers: getTierConfig("minimax").llm,
|
|
187
208
|
defaultVlm: getTierConfig("minimax").vlm,
|
|
@@ -197,6 +218,10 @@ const PROVIDERS = [
|
|
|
197
218
|
authType: "bearer",
|
|
198
219
|
apiFormat: "openai",
|
|
199
220
|
modelsEndpoint: "/models",
|
|
221
|
+
// OpenRouter proxies many models; defaulting to 200K matches the underlying
|
|
222
|
+
// frontier Anthropic/Google routes most users pick. Lower-context models
|
|
223
|
+
// behind OpenRouter will still work, just won't benefit from early windowing.
|
|
224
|
+
contextLimit: 200000,
|
|
200
225
|
defaultModel: getTierConfig("openrouter").conductor || "anthropic/claude-sonnet-4-20250514",
|
|
201
226
|
defaultTiers: getTierConfig("openrouter").llm,
|
|
202
227
|
defaultVlm: getTierConfig("openrouter").vlm,
|
|
@@ -212,6 +237,7 @@ const PROVIDERS = [
|
|
|
212
237
|
authType: "aws-sigv4",
|
|
213
238
|
apiFormat: "anthropic",
|
|
214
239
|
modelsEndpoint: null,
|
|
240
|
+
contextLimit: 200000, // Bedrock Anthropic routes mirror native Claude 200K
|
|
215
241
|
defaultModel: getTierConfig("bedrock").conductor || "anthropic.claude-sonnet-4-20250514-v1:0",
|
|
216
242
|
defaultTiers: getTierConfig("bedrock").llm,
|
|
217
243
|
defaultVlm: getTierConfig("bedrock").vlm,
|
|
@@ -163,12 +163,24 @@ Track three metrics per iteration to know when to stop:
|
|
|
163
163
|
|
|
164
164
|
### Stopping Criteria
|
|
165
165
|
|
|
166
|
-
Stop the loop when ALL three conditions hold for one iteration:
|
|
166
|
+
Stop the loop when **ALL** three conditions hold for one iteration:
|
|
167
167
|
|
|
168
168
|
1. Correction volume < 5% of total test cases.
|
|
169
169
|
2. New pattern count = 0.
|
|
170
170
|
3. Regression count = 0.
|
|
171
171
|
|
|
172
|
+
**OR** when the standalone accuracy-convergence condition holds (D5, added
|
|
173
|
+
2026-04-23):
|
|
174
|
+
|
|
175
|
+
4. Overall accuracy changed by less than 1% between the last two
|
|
176
|
+
iterations — i.e. `|accuracy[N+1] − accuracy[N]| < 0.01`.
|
|
177
|
+
|
|
178
|
+
Condition 4 prevents the observed over-iteration pattern of v5 → v12,
|
|
179
|
+
where each iteration oscillated within a ~0.5% accuracy window. Once the
|
|
180
|
+
model has reached "good enough," continuing burns tokens without
|
|
181
|
+
delivering real improvement. When accuracy has plateaued, proceed to the
|
|
182
|
+
next phase (distillation / production).
|
|
183
|
+
|
|
172
184
|
If correction volume *increases* between consecutive iterations, this is a regression signal. Pause the loop and diagnose before continuing — the last fix may be destabilizing the system.
|
|
173
185
|
|
|
174
186
|
### Expected Convergence
|
|
@@ -59,6 +59,80 @@ Rules will be distilled into workflows (see `skill-to-workflow`). Design with di
|
|
|
59
59
|
### Catalog Versioning
|
|
60
60
|
When rules change (additions, modifications, deprecations), version the entire rule catalog as a unit. Individual rule versions track specific rules; the catalog version tracks the coherent set. Record the catalog version in `versions.json` alongside individual rule versions.
|
|
61
61
|
|
|
62
|
+
## Granularity Calibration (read before extracting)
|
|
63
|
+
|
|
64
|
+
A well-extracted rule catalog has **10-20 rules per typical regulation PDF**
|
|
65
|
+
(a 30-80 page disclosure regulation). Over-extraction into 60-100 rules per
|
|
66
|
+
regulation signals you're treating every clause as its own rule — downstream
|
|
67
|
+
consumers (skill-authoring, workflow-run) can't distinguish meaningful
|
|
68
|
+
checks from boilerplate.
|
|
69
|
+
|
|
70
|
+
If your first pass produces more than ~25 rules for a single regulation:
|
|
71
|
+
- **Merge rules that share evidence and fail together** (e.g., "must
|
|
72
|
+
disclose X" and "must disclose Y" where both come from the same
|
|
73
|
+
required-fields table → one rule: "must disclose the required-fields
|
|
74
|
+
list including X, Y").
|
|
75
|
+
- **Drop procedural language** that isn't checkable against a report
|
|
76
|
+
(definitions, scope statements, references to other regs that just
|
|
77
|
+
transitively apply).
|
|
78
|
+
- **Keep only checkable obligations, prohibitions, and thresholds** —
|
|
79
|
+
things where you can read a sample report and say pass or fail.
|
|
80
|
+
|
|
81
|
+
### Sample "good" rule
|
|
82
|
+
|
|
83
|
+
```json
|
|
84
|
+
{
|
|
85
|
+
"id": "R014",
|
|
86
|
+
"source_ref": "Disclosure Reg §15.2",
|
|
87
|
+
"description": "Quarterly reports must be disclosed within 15 business days after quarter-end.",
|
|
88
|
+
"applicable_sections": ["public funds"],
|
|
89
|
+
"severity": "high",
|
|
90
|
+
"machine_checkable": true,
|
|
91
|
+
"falsifiability_statement": "If disclosure date is later than 15th business day after quarter-end, the rule fails.",
|
|
92
|
+
"test_case_stub": "Read the quarterly report's disclosure date + the quarter-end date, compute business-day difference."
|
|
93
|
+
}
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Note: one pass/fail outcome, a single `source_ref` to a specific clause,
|
|
97
|
+
clear applicability scope. Skill-authoring can write `check_r014.py` from
|
|
98
|
+
this alone.
|
|
99
|
+
|
|
100
|
+
### Cross-regulation dedup (when working across multiple PDFs)
|
|
101
|
+
|
|
102
|
+
If the developer user provides N regulations, rules from later regs often
|
|
103
|
+
duplicate cross-cutting requirements already captured by earlier ones
|
|
104
|
+
(e.g., a 2018 generic disclosure rule vs. a 2025 specific version).
|
|
105
|
+
Before emitting a rule from reg N:
|
|
106
|
+
|
|
107
|
+
1. **Check the existing catalog.** Use `rule_catalog` (operation: list)
|
|
108
|
+
to see what's already there. Skip if a rule with equivalent scope +
|
|
109
|
+
intent exists.
|
|
110
|
+
2. **Prefer the newer / more specific source_ref** when rules overlap.
|
|
111
|
+
3. **If you merged rules**, record the consolidated sources in
|
|
112
|
+
`source_ref`: e.g., `"New Reg §15.2 + Old Reg §24"`.
|
|
113
|
+
|
|
114
|
+
### Delegation to sub-agents
|
|
115
|
+
|
|
116
|
+
If you dispatch extraction to sub-agents (one per regulation), the
|
|
117
|
+
sub-agent inherits ONLY its `task_description` — it cannot see your
|
|
118
|
+
conversation or existing catalog. Therefore, when composing the brief:
|
|
119
|
+
|
|
120
|
+
- **Specify the target count band** explicitly: "Extract 10-20 atomic
|
|
121
|
+
rules from this regulation."
|
|
122
|
+
- **Include a sample rule** in the brief body (paste the JSON above
|
|
123
|
+
verbatim) so the sub-agent's calibration matches yours.
|
|
124
|
+
- **Name every regulation the sub-agent should process.** If AGENT.md
|
|
125
|
+
lists 10 core regulations, the brief must list all 10 by name, not
|
|
126
|
+
"the core regs" as a pronoun — LLMs composing long structured briefs
|
|
127
|
+
frequently drop items (observed in session 6304673afaa0 where reg 02
|
|
128
|
+
was silently omitted).
|
|
129
|
+
- **State the dedup contract**: "Rules already in the parent's catalog
|
|
130
|
+
(R001–Rnnn) should NOT be re-extracted. If a requirement is already
|
|
131
|
+
covered, skip it." Then pass the current catalog's ID ranges.
|
|
132
|
+
- **Prefer `rule_catalog` create operations over sandbox_exec writes to
|
|
133
|
+
catalog.json.** rule_catalog uses workspace file locking;
|
|
134
|
+
sandbox_exec bypasses it and races with other writers.
|
|
135
|
+
|
|
62
136
|
## Extraction Strategies
|
|
63
137
|
|
|
64
138
|
### Strategy 1: Structured Input (Developer User Provides Rules)
|
|
@@ -241,12 +241,18 @@ description: Drive continuous improvement of skills and workflows through the di
|
|
|
241
241
|
|
|
242
242
|
### 停止条件
|
|
243
243
|
|
|
244
|
-
|
|
244
|
+
当一轮迭代**同时**满足以下三个条件时,停止循环:
|
|
245
245
|
|
|
246
246
|
1. 修正量 < 总测试案例的 5%。
|
|
247
247
|
2. 新模式数 = 0。
|
|
248
248
|
3. 回归数 = 0。
|
|
249
249
|
|
|
250
|
+
**或者**满足单独的准确率收敛条件(D5,2026-04-23 新增):
|
|
251
|
+
|
|
252
|
+
4. 连续两轮迭代之间的整体准确率变化 < 1%。即 `|accuracy[N+1] - accuracy[N]| < 0.01`。
|
|
253
|
+
|
|
254
|
+
条件 4 是为了防止观察到的过度迭代模式——从 v5 一直迭代到 v12,每轮都在 0.5% 的精度范围内来回波动。当模型已经达到"足够好"时,继续迭代只会消耗 token,不会带来实质改进。一旦准确率趋于稳定,应当进入下一阶段(蒸馏/生产)。
|
|
255
|
+
|
|
250
256
|
如果修正量在连续两轮迭代之间**增加**,这是回归信号。暂停循环,先诊断原因再继续——上一轮的修复可能正在破坏系统的稳定性。
|
|
251
257
|
|
|
252
258
|
### 预期收敛速度
|