kc-beta 0.7.5 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -0
- package/package.json +3 -2
- package/src/agent/engine.js +390 -100
- package/src/agent/pipelines/_advance-hints.js +92 -0
- package/src/agent/pipelines/_milestone-derive.js +247 -13
- package/src/agent/pipelines/skill-authoring.js +30 -1
- package/src/agent/tools/agent-tool.js +2 -2
- package/src/agent/tools/consult-skill.js +15 -0
- package/src/agent/tools/dashboard-render.js +48 -1
- package/src/agent/tools/document-parse.js +31 -2
- package/src/agent/tools/phase-advance.js +17 -13
- package/src/agent/tools/release.js +250 -7
- package/src/agent/tools/sandbox-exec.js +65 -8
- package/src/agent/tools/worker-llm-call.js +95 -15
- package/src/agent/workspace.js +25 -4
- package/src/cli/components.js +4 -1
- package/src/cli/index.js +97 -1
- package/src/config.js +19 -2
- package/src/marathon/driver.js +217 -0
- package/src/marathon/prompts.js +93 -0
- package/template/.env.template +16 -0
- package/template/skills/en/bootstrap-workspace/SKILL.md +14 -0
- package/template/skills/en/quality-control/SKILL.md +9 -0
- package/template/skills/en/skill-authoring/SKILL.md +39 -0
- package/template/skills/en/skill-to-workflow/SKILL.md +53 -0
- package/template/skills/en/work-decomposition/SKILL.md +34 -0
- package/template/skills/phase_skills.yaml +5 -0
- package/template/skills/zh/bootstrap-workspace/SKILL.md +14 -0
- package/template/skills/zh/compliance-judgment/SKILL.md +37 -37
- package/template/skills/zh/document-chunking/SKILL.md +21 -14
- package/template/skills/zh/document-parsing/SKILL.md +65 -65
- package/template/skills/zh/entity-extraction/SKILL.md +68 -68
- package/template/skills/zh/quality-control/SKILL.md +9 -0
- package/template/skills/zh/skill-authoring/SKILL.md +39 -0
- package/template/skills/zh/skill-creator/SKILL.md +204 -200
- package/template/skills/zh/skill-to-workflow/SKILL.md +53 -0
- package/template/skills/zh/tree-processing/SKILL.md +67 -63
- package/template/skills/zh/work-decomposition/SKILL.md +34 -0
- package/template/workflows/common/llm_client.py +168 -0
- package/template/workflows/common/utils.py +132 -0
package/src/cli/index.js
CHANGED
|
@@ -59,6 +59,8 @@ function App({ engine, config }) {
|
|
|
59
59
|
const [spinnerStatus, setSpinnerStatus] = useState(null);
|
|
60
60
|
const [contextTokens, setContextTokens] = useState(0);
|
|
61
61
|
const [contextLimit, setContextLimit] = useState(config.kcContextLimit || 200000);
|
|
62
|
+
// v0.8.1 P8-A: marathon-mode indicator for StatusBar.
|
|
63
|
+
const [marathonActive, setMarathonActive] = useState(false);
|
|
62
64
|
const [taskList, setTaskList] = useState([]);
|
|
63
65
|
const [taskProgress, setTaskProgress] = useState(null);
|
|
64
66
|
|
|
@@ -124,6 +126,11 @@ function App({ engine, config }) {
|
|
|
124
126
|
setCurrentTool(null);
|
|
125
127
|
setSpinnerStatus(null);
|
|
126
128
|
updateContextStats();
|
|
129
|
+
// v0.8.1 P8-A: refresh marathon indicator. If the driver
|
|
130
|
+
// self-terminated (max_wallclock / finalization_settled),
|
|
131
|
+
// engine clears marathonDriver on next decideNext loop;
|
|
132
|
+
// we sync the TUI state here.
|
|
133
|
+
setMarathonActive(engineRef.current.isMarathonActive());
|
|
127
134
|
break;
|
|
128
135
|
|
|
129
136
|
case "tool_start":
|
|
@@ -221,6 +228,9 @@ function App({ engine, config }) {
|
|
|
221
228
|
" /sessions List all sessions\n" +
|
|
222
229
|
" /resume <name> Resume a previous session\n" +
|
|
223
230
|
" /rename <name> Rename current session\n" +
|
|
231
|
+
" /marathon <goal> Activate marathon mode (chains turns automatically)\n" +
|
|
232
|
+
" /marathon off Deactivate marathon (return to interactive)\n" +
|
|
233
|
+
" /marathon status Show marathon driver state\n" +
|
|
224
234
|
" /exit Quit",
|
|
225
235
|
});
|
|
226
236
|
return true;
|
|
@@ -593,6 +603,84 @@ function App({ engine, config }) {
|
|
|
593
603
|
}
|
|
594
604
|
return true;
|
|
595
605
|
|
|
606
|
+
case "/marathon": {
|
|
607
|
+
// v0.8.1 P8-A: inline marathon mode. `/marathon <goal>` activates;
|
|
608
|
+
// `/marathon off` deactivates; `/marathon status` shows snapshot.
|
|
609
|
+
const sub = arg.split(/\s+/)[0]?.toLowerCase();
|
|
610
|
+
if (sub === "off" || sub === "stop") {
|
|
611
|
+
const final = engineRef.current.exitMarathonMode("user_off");
|
|
612
|
+
setMarathonActive(false);
|
|
613
|
+
if (final) {
|
|
614
|
+
addMessage({
|
|
615
|
+
role: "system",
|
|
616
|
+
content: `Marathon mode OFF.\n decisions: ${final.decisionCount}\n runtime: ${Math.round(final.runtimeMs / 1000)}s\n last phase: ${final.currentPhase}`,
|
|
617
|
+
});
|
|
618
|
+
} else {
|
|
619
|
+
addMessage({ role: "system", content: "Marathon was not active." });
|
|
620
|
+
}
|
|
621
|
+
return true;
|
|
622
|
+
}
|
|
623
|
+
if (sub === "status") {
|
|
624
|
+
if (!engineRef.current.isMarathonActive()) {
|
|
625
|
+
addMessage({ role: "system", content: "Marathon mode is OFF." });
|
|
626
|
+
return true;
|
|
627
|
+
}
|
|
628
|
+
const s = engineRef.current.marathonDriver.getStatus();
|
|
629
|
+
const lines = [
|
|
630
|
+
`Marathon mode ON`,
|
|
631
|
+
` goal: ${s.goal.slice(0, 100)}${s.goal.length > 100 ? "..." : ""}`,
|
|
632
|
+
` language: ${s.language}`,
|
|
633
|
+
` started: ${s.startedAt} (${Math.round(s.runtimeMs / 60000)} min ago)`,
|
|
634
|
+
` current_phase: ${s.currentPhase}`,
|
|
635
|
+
` turns this phase: ${s.turnsThisPhase}`,
|
|
636
|
+
` total decisions: ${s.decisionCount}`,
|
|
637
|
+
];
|
|
638
|
+
if (s.recentDecisions?.length) {
|
|
639
|
+
lines.push(` recent decisions:`);
|
|
640
|
+
for (const d of s.recentDecisions.slice(-3)) {
|
|
641
|
+
lines.push(` ${d.ts.slice(11, 19)} [${d.template}] ${d.reason}`);
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
addMessage({ role: "system", content: lines.join("\n") });
|
|
645
|
+
return true;
|
|
646
|
+
}
|
|
647
|
+
// `/marathon <goal>` — activate
|
|
648
|
+
if (!arg) {
|
|
649
|
+
addMessage({
|
|
650
|
+
role: "system",
|
|
651
|
+
content:
|
|
652
|
+
"Usage:\n" +
|
|
653
|
+
" /marathon <goal description> Activate marathon mode with the given goal\n" +
|
|
654
|
+
" /marathon off Deactivate (return to interactive)\n" +
|
|
655
|
+
" /marathon status Show current driver state\n\n" +
|
|
656
|
+
"Marathon mode chains turns automatically using templated continuation prompts.\n" +
|
|
657
|
+
"F5 strict one-phase-per-prompt is bypassed while active. /resume after a crash\n" +
|
|
658
|
+
"does NOT auto-restore marathon — re-type /marathon to re-engage.",
|
|
659
|
+
});
|
|
660
|
+
return true;
|
|
661
|
+
}
|
|
662
|
+
try {
|
|
663
|
+
const status = engineRef.current.enterMarathonMode(arg);
|
|
664
|
+
setMarathonActive(true);
|
|
665
|
+
addMessage({
|
|
666
|
+
role: "system",
|
|
667
|
+
content:
|
|
668
|
+
`🏃 Marathon mode ON.\n` +
|
|
669
|
+
` goal: ${arg.slice(0, 200)}${arg.length > 200 ? "..." : ""}\n` +
|
|
670
|
+
` language: ${status.language}\n` +
|
|
671
|
+
` stop conditions: ${Math.round(status.maxWallclockMs / 3600000)}h wall-clock OR 5 turns settled in finalization\n\n` +
|
|
672
|
+
`Next turn will use the marathon initial prompt. Type /marathon off to disengage.`,
|
|
673
|
+
});
|
|
674
|
+
// Immediately trigger a turn with the initial prompt
|
|
675
|
+
const initialPrompt = engineRef.current.marathonDriver.getInitialPrompt();
|
|
676
|
+
// Hand the initial prompt to the same runTurn path as a user message
|
|
677
|
+
runTurn(initialPrompt);
|
|
678
|
+
} catch (e) {
|
|
679
|
+
addMessage({ role: "system", content: `Marathon activation failed: ${e.message}` });
|
|
680
|
+
}
|
|
681
|
+
return true;
|
|
682
|
+
}
|
|
683
|
+
|
|
596
684
|
case "/exit":
|
|
597
685
|
case "/quit":
|
|
598
686
|
// Save state + stop diagnostics before exit
|
|
@@ -752,7 +840,7 @@ function App({ engine, config }) {
|
|
|
752
840
|
placeholderRight: queueSize > 0 ? `(${queueSize} queued)` : null,
|
|
753
841
|
}),
|
|
754
842
|
h(HRule),
|
|
755
|
-
h(StatusBar, { sessionId, phase, contextTokens, contextLimit }),
|
|
843
|
+
h(StatusBar, { sessionId, phase, contextTokens, contextLimit, marathonActive }),
|
|
756
844
|
);
|
|
757
845
|
}
|
|
758
846
|
|
|
@@ -821,6 +909,14 @@ export async function main({ languageOverride } = {}) {
|
|
|
821
909
|
};
|
|
822
910
|
process.on("SIGINT", saveOnExit);
|
|
823
911
|
process.on("SIGTERM", saveOnExit);
|
|
912
|
+
// v0.8.1 P8-B: SIGHUP coverage. E2E #11 found macOS sends signals to
|
|
913
|
+
// descendant processes when a Terminal.app window closes or quits;
|
|
914
|
+
// nohup masks SIGHUP but not SIGTERM, and we already cover SIGTERM.
|
|
915
|
+
// Adding SIGHUP makes the kc-beta process robust against terminal
|
|
916
|
+
// teardown even if it's not nohup'd. Without this, a closed terminal
|
|
917
|
+
// can leave KC half-shut-down (events.jsonl flushed, but no
|
|
918
|
+
// marathon_detach event, no clean session-state save).
|
|
919
|
+
process.on("SIGHUP", saveOnExit);
|
|
824
920
|
|
|
825
921
|
const instance = render(h(App, { engine, config }));
|
|
826
922
|
await instance.waitUntilExit();
|
package/src/config.js
CHANGED
|
@@ -21,7 +21,11 @@ function loadGlobalConfig() {
|
|
|
21
21
|
* Parse a .env file into a key-value object.
|
|
22
22
|
* Handles KEY=VALUE lines, ignores comments and blank lines.
|
|
23
23
|
*/
|
|
24
|
-
|
|
24
|
+
// v0.8 P1-B: exported so engine.js can re-overlay workspace .env after
|
|
25
|
+
// the workspace directory is known (cli/index.js calls loadSettings()
|
|
26
|
+
// without a workspace path because the path isn't known until the engine
|
|
27
|
+
// constructs the Workspace object).
|
|
28
|
+
export function loadEnvFile(envPath) {
|
|
25
29
|
if (!fs.existsSync(envPath)) return {};
|
|
26
30
|
// v0.7.0 H9: defend bootstrap against a .env that exists but isn't
|
|
27
31
|
// readable (permission denied, unexpected directory, encoding error,
|
|
@@ -110,7 +114,20 @@ export function loadSettings(workspacePath) {
|
|
|
110
114
|
|
|
111
115
|
// Workspace (process.env wins — for parallel benchmark runs)
|
|
112
116
|
kcWorkspaceRoot: penv.KC_WORKSPACE_ROOT || gc.workspace_root || path.join(os.homedir(), ".kc_agent", "workspaces"),
|
|
113
|
-
|
|
117
|
+
// v0.8 P1-F sandbox_exec timeout model. Default 120s (Claude Code parity),
|
|
118
|
+
// max 600s (10 min) ceiling. Agent can pass per-call timeout_ms up to max.
|
|
119
|
+
// Legacy KC_EXEC_TIMEOUT (seconds) accepted as deprecation alias for default.
|
|
120
|
+
kcExecDefaultTimeoutMs: parseInt(
|
|
121
|
+
env.KC_EXEC_DEFAULT_TIMEOUT_MS ||
|
|
122
|
+
(env.KC_EXEC_TIMEOUT ? String(parseInt(env.KC_EXEC_TIMEOUT, 10) * 1000) : "") ||
|
|
123
|
+
"120000",
|
|
124
|
+
10,
|
|
125
|
+
),
|
|
126
|
+
kcExecMaxTimeoutMs: parseInt(env.KC_EXEC_MAX_TIMEOUT_MS || "600000", 10),
|
|
127
|
+
// Legacy alias kept for any consumer reading it directly. Computed
|
|
128
|
+
// from the new ms-based field for consistency. New code should read
|
|
129
|
+
// kcExecDefaultTimeoutMs / kcExecMaxTimeoutMs.
|
|
130
|
+
kcExecTimeout: parseInt(env.KC_EXEC_TIMEOUT || "120", 10),
|
|
114
131
|
|
|
115
132
|
// Accuracy thresholds
|
|
116
133
|
skillAccuracy: parseFloat(env.SKILL_ACCURACY || gc.accuracy_threshold?.toString() || "0.9"),
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
// v0.8.1 P8-A — marathon driver as inline state machine.
|
|
2
|
+
//
|
|
3
|
+
// v0.8.0 shipped this as a separate-process driver (bin/kc-marathon.js)
|
|
4
|
+
// that tailed events.jsonl + wrote prompts to .kc_marathon/inbox.jsonl.
|
|
5
|
+
// E2E #11 audits found both drivers died silently within 10 min when
|
|
6
|
+
// the terminal closed or laptop slept (SIGHUP/SIGTERM unhandled). The
|
|
7
|
+
// engine survived both deaths because it lives in a different process.
|
|
8
|
+
//
|
|
9
|
+
// v0.8.1 redesign per user proposal (2026-05-15):
|
|
10
|
+
// - Single process: driver runs inline as part of the engine
|
|
11
|
+
// - Activated via `/marathon <goal>` slash command in kc-beta TUI
|
|
12
|
+
// - Engine calls decideNext(state) after each turn_complete to get
|
|
13
|
+
// the next continuation prompt (or null if marathon should end)
|
|
14
|
+
// - No filesystem IPC (no inbox, no active marker, no state.json)
|
|
15
|
+
// - State persists via engine's existing session-state.json
|
|
16
|
+
//
|
|
17
|
+
// The state machine logic from v0.8.0 is preserved verbatim — only
|
|
18
|
+
// the I/O wrapper changes. Templates (renderPrompt) unchanged.
|
|
19
|
+
|
|
20
|
+
import { renderPrompt } from "./prompts.js";
|
|
21
|
+
|
|
22
|
+
const DEFAULT_STUCK_AFTER_MS = 30 * 60 * 1000; // 30 min
|
|
23
|
+
const DEFAULT_MAX_WALLCLOCK_MS = 12 * 60 * 60 * 1000; // 12 h
|
|
24
|
+
|
|
25
|
+
export class MarathonDriver {
|
|
26
|
+
/**
|
|
27
|
+
* @param {object} opts
|
|
28
|
+
* @param {string} opts.goal — the marathon goal-description prompt
|
|
29
|
+
* @param {string} [opts.language] — "en" or "zh"
|
|
30
|
+
* @param {number} [opts.maxWallclockMs] — stop after this much wall time
|
|
31
|
+
* @param {number} [opts.stuckAfterMs] — emit unstick prompt after idle
|
|
32
|
+
*/
|
|
33
|
+
constructor(opts = {}) {
|
|
34
|
+
if (!opts.goal || typeof opts.goal !== "string") {
|
|
35
|
+
throw new Error("MarathonDriver requires a non-empty `goal` string");
|
|
36
|
+
}
|
|
37
|
+
this.goal = opts.goal;
|
|
38
|
+
this.language = opts.language === "zh" ? "zh" : "en";
|
|
39
|
+
this.maxWallclockMs = opts.maxWallclockMs ?? DEFAULT_MAX_WALLCLOCK_MS;
|
|
40
|
+
this.stuckAfterMs = opts.stuckAfterMs ?? DEFAULT_STUCK_AFTER_MS;
|
|
41
|
+
|
|
42
|
+
this.startedAt = Date.now();
|
|
43
|
+
this.lastDecisionAt = 0;
|
|
44
|
+
this.decisionCount = 0;
|
|
45
|
+
this.currentPhase = "bootstrap";
|
|
46
|
+
this.lastMilestones = {};
|
|
47
|
+
this.turnsThisPhase = 0;
|
|
48
|
+
this.lastEventTs = Date.now();
|
|
49
|
+
this.initialDelivered = false;
|
|
50
|
+
this.stopped = false;
|
|
51
|
+
this.stopReason = null;
|
|
52
|
+
|
|
53
|
+
// Decision history (kept in-memory; surfaced in /marathon status).
|
|
54
|
+
// Bounded to last 100 to cap memory.
|
|
55
|
+
this.decisions = [];
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Engine calls this once BEFORE the initial turn after /marathon was
|
|
60
|
+
* typed. Returns the goal-description prompt to feed into runTurn.
|
|
61
|
+
*/
|
|
62
|
+
getInitialPrompt() {
|
|
63
|
+
const out = renderPrompt(
|
|
64
|
+
"initial",
|
|
65
|
+
this._stateSnapshot(),
|
|
66
|
+
this.language,
|
|
67
|
+
);
|
|
68
|
+
this._recordDecision("initial", "marathon kickoff", out);
|
|
69
|
+
this.initialDelivered = true;
|
|
70
|
+
return out;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Engine calls decideNext(state) after each turn_complete event.
|
|
75
|
+
* Returns { prompt, template, reason } if marathon should continue,
|
|
76
|
+
* or null if a stop condition is met (engine will exit marathon mode).
|
|
77
|
+
*
|
|
78
|
+
* @param {object} state — engine snapshot:
|
|
79
|
+
* {currentPhase, milestones, phaseChanged, errorSeen, turnsThisPhase}
|
|
80
|
+
*/
|
|
81
|
+
decideNext(state = {}) {
|
|
82
|
+
if (this.stopped) return null;
|
|
83
|
+
|
|
84
|
+
// Update tracked state from engine
|
|
85
|
+
if (state.currentPhase && state.currentPhase !== this.currentPhase) {
|
|
86
|
+
this.currentPhase = state.currentPhase;
|
|
87
|
+
this.turnsThisPhase = 0;
|
|
88
|
+
}
|
|
89
|
+
if (state.milestones) this.lastMilestones = state.milestones;
|
|
90
|
+
if (typeof state.turnsThisPhase === "number") {
|
|
91
|
+
this.turnsThisPhase = state.turnsThisPhase;
|
|
92
|
+
} else {
|
|
93
|
+
this.turnsThisPhase += 1;
|
|
94
|
+
}
|
|
95
|
+
this.lastEventTs = Date.now();
|
|
96
|
+
|
|
97
|
+
// Stop conditions
|
|
98
|
+
if (this._shouldStop()) {
|
|
99
|
+
this.stopped = true;
|
|
100
|
+
// Emit one final "stop" prompt so the agent has a chance to wrap up.
|
|
101
|
+
const out = renderPrompt("stop", this._stateSnapshot(), this.language);
|
|
102
|
+
this._recordDecision("stop", this.stopReason, out);
|
|
103
|
+
return { prompt: out, template: "stop", reason: this.stopReason };
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
let template = "continue_phase";
|
|
107
|
+
let reason = "turn_complete in same phase";
|
|
108
|
+
|
|
109
|
+
if (state.errorSeen) {
|
|
110
|
+
template = "unstick";
|
|
111
|
+
reason = "engine emitted error event";
|
|
112
|
+
} else if (state.phaseChanged) {
|
|
113
|
+
if (this.currentPhase === "finalization") {
|
|
114
|
+
template = "finalize";
|
|
115
|
+
reason = "reached finalization";
|
|
116
|
+
} else {
|
|
117
|
+
template = "continue_phase";
|
|
118
|
+
reason = `entered ${this.currentPhase}`;
|
|
119
|
+
}
|
|
120
|
+
} else {
|
|
121
|
+
const idleMs = Date.now() - this.lastEventTs;
|
|
122
|
+
if (idleMs > this.stuckAfterMs) {
|
|
123
|
+
template = "unstick";
|
|
124
|
+
reason = `idle for ${Math.round(idleMs / 60000)} min`;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
const out = renderPrompt(template, this._stateSnapshot(), this.language);
|
|
129
|
+
this._recordDecision(template, reason, out);
|
|
130
|
+
return { prompt: out, template, reason };
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/** User-invoked manual stop (e.g., `/marathon off`). */
|
|
134
|
+
stop(reason = "user_off") {
|
|
135
|
+
this.stopped = true;
|
|
136
|
+
this.stopReason = reason;
|
|
137
|
+
this._recordDecision("manual_stop", reason, "");
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/** Snapshot for /marathon status command + audit. */
|
|
141
|
+
getStatus() {
|
|
142
|
+
return {
|
|
143
|
+
active: !this.stopped,
|
|
144
|
+
goal: this.goal,
|
|
145
|
+
language: this.language,
|
|
146
|
+
startedAt: new Date(this.startedAt).toISOString(),
|
|
147
|
+
runtimeMs: Date.now() - this.startedAt,
|
|
148
|
+
currentPhase: this.currentPhase,
|
|
149
|
+
turnsThisPhase: this.turnsThisPhase,
|
|
150
|
+
decisionCount: this.decisionCount,
|
|
151
|
+
lastDecisionAt: this.lastDecisionAt ? new Date(this.lastDecisionAt).toISOString() : null,
|
|
152
|
+
stopReason: this.stopReason,
|
|
153
|
+
maxWallclockMs: this.maxWallclockMs,
|
|
154
|
+
stuckAfterMs: this.stuckAfterMs,
|
|
155
|
+
recentDecisions: this.decisions.slice(-5),
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/** Serialize for session-state.json persistence (NOT used for auto-resume per user-locked decision; included for audit visibility only). */
|
|
160
|
+
toJSON() {
|
|
161
|
+
return {
|
|
162
|
+
goal: this.goal,
|
|
163
|
+
language: this.language,
|
|
164
|
+
maxWallclockMs: this.maxWallclockMs,
|
|
165
|
+
stuckAfterMs: this.stuckAfterMs,
|
|
166
|
+
startedAt: this.startedAt,
|
|
167
|
+
currentPhase: this.currentPhase,
|
|
168
|
+
turnsThisPhase: this.turnsThisPhase,
|
|
169
|
+
decisionCount: this.decisionCount,
|
|
170
|
+
initialDelivered: this.initialDelivered,
|
|
171
|
+
stopped: this.stopped,
|
|
172
|
+
stopReason: this.stopReason,
|
|
173
|
+
// Note: decisions array not persisted (memory-only)
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// ─── internals ──────────────────────────────────────────────────
|
|
178
|
+
|
|
179
|
+
_stateSnapshot() {
|
|
180
|
+
return {
|
|
181
|
+
goal: this.goal,
|
|
182
|
+
currentPhase: this.currentPhase,
|
|
183
|
+
milestones: this.lastMilestones,
|
|
184
|
+
idleSec: Math.round((Date.now() - this.lastEventTs) / 1000),
|
|
185
|
+
lastEventType: this._lastEventType || null,
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
_shouldStop() {
|
|
190
|
+
if (this.stopped) return true;
|
|
191
|
+
if (Date.now() - this.startedAt > this.maxWallclockMs) {
|
|
192
|
+
this.stopReason = "max_wallclock";
|
|
193
|
+
return true;
|
|
194
|
+
}
|
|
195
|
+
if (
|
|
196
|
+
this.currentPhase === "finalization" &&
|
|
197
|
+
this.turnsThisPhase >= 5
|
|
198
|
+
) {
|
|
199
|
+
this.stopReason = "finalization_settled";
|
|
200
|
+
return true;
|
|
201
|
+
}
|
|
202
|
+
return false;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
_recordDecision(template, reason, prompt) {
|
|
206
|
+
this.decisionCount += 1;
|
|
207
|
+
this.lastDecisionAt = Date.now();
|
|
208
|
+
this.decisions.push({
|
|
209
|
+
ts: new Date().toISOString(),
|
|
210
|
+
template,
|
|
211
|
+
reason,
|
|
212
|
+
currentPhase: this.currentPhase,
|
|
213
|
+
promptPreview: (prompt || "").slice(0, 200),
|
|
214
|
+
});
|
|
215
|
+
if (this.decisions.length > 100) this.decisions.shift();
|
|
216
|
+
}
|
|
217
|
+
}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
// v0.8 P4 — templated continuation prompts for the marathon driver.
|
|
2
|
+
//
|
|
3
|
+
// Driver state → prompt template mapping. Templates are deterministic
|
|
4
|
+
// (no LLM in the driver), bilingual (en + zh per workspace LANGUAGE).
|
|
5
|
+
// Goal: surface to KC's main conductor the smallest useful nudge to
|
|
6
|
+
// keep the pipeline moving without leaking marathon-implementation
|
|
7
|
+
// details into the agent's context.
|
|
8
|
+
//
|
|
9
|
+
// Each template is a function (engineState, goal) → string. State
|
|
10
|
+
// fields used:
|
|
11
|
+
// currentPhase, milestones, idleSec, lastEventType, goal
|
|
12
|
+
//
|
|
13
|
+
// Add new templates here as the driver state machine grows.
|
|
14
|
+
|
|
15
|
+
const TEMPLATES_EN = {
|
|
16
|
+
initial: (s) => `Goal: ${s.goal}\n\n` +
|
|
17
|
+
`You are running in marathon mode (no manual user check-ins). Advance the pipeline phase by phase. ` +
|
|
18
|
+
`Engine derives milestones from filesystem facts; produce real artifacts, then call phase_advance. ` +
|
|
19
|
+
`If you get stuck on a specific phase, surface the blocker in your next response and the driver will ` +
|
|
20
|
+
`inject a diagnostic prompt next turn.`,
|
|
21
|
+
|
|
22
|
+
continue_phase: (s) => `Continue with ${s.currentPhase} work. ` +
|
|
23
|
+
`Engine status: ${formatMilestones(s.milestones)}.`,
|
|
24
|
+
|
|
25
|
+
advance_phase: (s) => `${s.currentPhase} milestones look complete (${formatMilestones(s.milestones)}). ` +
|
|
26
|
+
`Verify the gate conditions then call \`phase_advance\` to the next phase.`,
|
|
27
|
+
|
|
28
|
+
unstick: (s) => `No phase progress in the last ${Math.round(s.idleSec / 60)} minutes. ` +
|
|
29
|
+
`Either (1) surface the blocker explicitly so the developer user can intervene, or (2) ` +
|
|
30
|
+
`consult the relevant meta-skill for the current phase and try a different approach. ` +
|
|
31
|
+
`If you've genuinely finished and the engine gate is wrong, force phase_advance with reason.`,
|
|
32
|
+
|
|
33
|
+
finalize: (s) => `You've reached finalization. Wrap the deliverable bundle: ` +
|
|
34
|
+
`verify rule_skills/coverage_report.md is substantive, output/releases/<slug>/ is current ` +
|
|
35
|
+
`(re-run release tool if workflows changed after the last snapshot), and final_dashboard.html ` +
|
|
36
|
+
`reflects the latest QC data. When done, just say so — the marathon will exit.`,
|
|
37
|
+
|
|
38
|
+
stop: () => `Marathon stop condition reached. Save state and summarize what was accomplished.`,
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
const TEMPLATES_ZH = {
|
|
42
|
+
initial: (s) => `目标:${s.goal}\n\n` +
|
|
43
|
+
`你正运行在 marathon 模式(无人工 check-in)。按阶段推进整条流水线。` +
|
|
44
|
+
`引擎从文件系统事实派生里程碑;先把真实交付物产出来,再调用 phase_advance。` +
|
|
45
|
+
`如果在某个阶段卡住了,直接在下一回合的回复里把阻塞点说清楚,驱动器会在下一回合注入诊断提示。`,
|
|
46
|
+
|
|
47
|
+
continue_phase: (s) => `继续 ${s.currentPhase} 阶段的工作。` +
|
|
48
|
+
`引擎状态:${formatMilestones(s.milestones)}。`,
|
|
49
|
+
|
|
50
|
+
advance_phase: (s) => `${s.currentPhase} 阶段的里程碑看起来已经完成(${formatMilestones(s.milestones)})。` +
|
|
51
|
+
`核对一遍门控条件,然后调用 \`phase_advance\` 进入下一阶段。`,
|
|
52
|
+
|
|
53
|
+
unstick: (s) => `已经 ${Math.round(s.idleSec / 60)} 分钟没有阶段推进了。` +
|
|
54
|
+
`两条路:(1) 明确说出阻塞在哪里、让开发者用户介入;(2) 查阅当前阶段相关的 meta-skill 换个思路再试。` +
|
|
55
|
+
`如果你确实已经做完、但引擎门控判断错误,用 reason 强制 phase_advance。`,
|
|
56
|
+
|
|
57
|
+
finalize: (s) => `已经进入 finalization。收尾打包:` +
|
|
58
|
+
`确认 rule_skills/coverage_report.md 内容充实、output/releases/<slug>/ 是最新的(` +
|
|
59
|
+
`如果 workflows 在最近一次快照之后还有改动,重新跑 release 工具),final_dashboard.html 反映最新 QC 数据。` +
|
|
60
|
+
`做完之后直接说一声,marathon 会退出。`,
|
|
61
|
+
|
|
62
|
+
stop: () => `Marathon 停止条件已触发。保存状态、总结已完成的工作。`,
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
function formatMilestones(m) {
|
|
66
|
+
if (!m || typeof m !== "object") return "(unknown)";
|
|
67
|
+
const parts = [];
|
|
68
|
+
for (const [k, v] of Object.entries(m)) {
|
|
69
|
+
if (typeof v === "number") parts.push(`${k}:${v}`);
|
|
70
|
+
else if (typeof v === "boolean") parts.push(`${k}:${v ? "yes" : "no"}`);
|
|
71
|
+
else if (Array.isArray(v)) parts.push(`${k}:${v.length}`);
|
|
72
|
+
}
|
|
73
|
+
return parts.slice(0, 6).join(", ") || "(empty)";
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Render a continuation prompt for the given driver state.
|
|
78
|
+
*
|
|
79
|
+
* @param {string} templateKey — one of: initial, continue_phase, advance_phase, unstick, finalize, stop
|
|
80
|
+
* @param {object} state — {goal, currentPhase, milestones, idleSec, lastEventType}
|
|
81
|
+
* @param {string} [language] — "en" or "zh" (defaults to "en")
|
|
82
|
+
* @returns {string}
|
|
83
|
+
*/
|
|
84
|
+
export function renderPrompt(templateKey, state, language = "en") {
|
|
85
|
+
const tmpls = language === "zh" ? TEMPLATES_ZH : TEMPLATES_EN;
|
|
86
|
+
const tmpl = tmpls[templateKey];
|
|
87
|
+
if (!tmpl) {
|
|
88
|
+
throw new Error(`Unknown marathon prompt template: ${templateKey}`);
|
|
89
|
+
}
|
|
90
|
+
return tmpl(state);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export const PROMPT_TEMPLATES = Object.freeze(Object.keys(TEMPLATES_EN));
|
package/template/.env.template
CHANGED
|
@@ -29,3 +29,19 @@ MONITOR_FREQUENCY=mid
|
|
|
29
29
|
# === Evolution Control ===
|
|
30
30
|
# Maximum evolution iterations per rule before escalating to developer user
|
|
31
31
|
MAX_ITERATIONS=20
|
|
32
|
+
|
|
33
|
+
# === sandbox_exec Timeout (v0.8 P1-F) ===
|
|
34
|
+
# Default timeout when the agent doesn't pass timeout_ms (ms).
|
|
35
|
+
# Claude Code parity = 120000 (2 min). Raise if your default workloads
|
|
36
|
+
# routinely exceed 2 min (e.g., document-parsing benchmarks).
|
|
37
|
+
# KC_EXEC_DEFAULT_TIMEOUT_MS=120000
|
|
38
|
+
#
|
|
39
|
+
# Hard ceiling — agent's timeout_ms is clamped to this (ms). Raise for
|
|
40
|
+
# v0.9 jyppx integration where parser_building can take 10-30 min per
|
|
41
|
+
# corpus. Don't go above 1800000 (30 min) without a specific reason.
|
|
42
|
+
# KC_EXEC_MAX_TIMEOUT_MS=600000
|
|
43
|
+
#
|
|
44
|
+
# Legacy alias (seconds) for KC_EXEC_DEFAULT_TIMEOUT_MS. Deprecated as of
|
|
45
|
+
# v0.8 — prefer the ms-based name. The seconds value is multiplied by
|
|
46
|
+
# 1000 if KC_EXEC_DEFAULT_TIMEOUT_MS isn't set.
|
|
47
|
+
# KC_EXEC_TIMEOUT=120
|
|
@@ -73,6 +73,20 @@ Once a project is past bootstrap and into production, fresh documents often arri
|
|
|
73
73
|
|
|
74
74
|
Discuss the cadence with the developer user during bootstrap — knowing the production input rhythm shapes how skills and workflows should be written (batch vs streaming, idempotency requirements, etc.).
|
|
75
75
|
|
|
76
|
+
## Per-project memory: keep AGENT.md alive
|
|
77
|
+
|
|
78
|
+
`AGENT.md` at the workspace root has per-project memory sections (`Project`, `Decisions`, `Domain Notes`, `User Preferences`). These are intentionally placeholder comments at bootstrap — they're for YOU to fill in as the work surfaces things worth remembering across phases or future sessions.
|
|
79
|
+
|
|
80
|
+
What belongs there:
|
|
81
|
+
- **Project**: corpus identity (regulation name + scope), language, primary vs auxiliary rules, sample doc set composition.
|
|
82
|
+
- **Decisions**: design choices that aren't obvious from code — "non-标 35% limit is bank-level not per-product, so single-doc reports get WARNING not FAIL", "季报 not applicable for R02-06/R02-08 per regulation §39", etc.
|
|
83
|
+
- **Domain Notes**: regulatory or business-domain nuance worth surfacing — "PT/RT/LZ are three distinct product types with different disclosure templates", terminology disambiguation.
|
|
84
|
+
- **User Preferences**: how the developer user wants you to operate on THIS project — verbosity, naming conventions, when to ask vs proceed.
|
|
85
|
+
|
|
86
|
+
Update AGENT.md at natural checkpoints: after the developer user gives you a substantive clarification, after you finish a phase, after you discover a design constraint that affects subsequent phases. Don't wait for a `/remember` instruction — the memory is yours to maintain.
|
|
87
|
+
|
|
88
|
+
A future session resumes by reading AGENT.md first. The richer it is, the less re-explanation the developer user has to do.
|
|
89
|
+
|
|
76
90
|
## When to Re-Bootstrap
|
|
77
91
|
|
|
78
92
|
Return to this skill when:
|
|
@@ -121,6 +121,15 @@ There are two distinct dashboards in this system:
|
|
|
121
121
|
|
|
122
122
|
When a release is built, point end users at the bundled dashboard, not the workspace one. Workspace dashboard stays your developer surface.
|
|
123
123
|
|
|
124
|
+
## Re-release after substantive changes
|
|
125
|
+
|
|
126
|
+
A release bundle is a snapshot of `workflows/` and `rule_skills/` at the moment the `release` tool ran. If you modify any `workflows/<rule>/workflow_v*.py`, `rule_skills/<id>/SKILL.md`, or `check.py` AFTER the release was built, the shipped artifact no longer reflects your actual work. Engine's milestone derivation will surface `releaseIsStale: true` with the divergent file list.
|
|
127
|
+
|
|
128
|
+
When this fires:
|
|
129
|
+
- **Substantive change** (new hybrid path, fixed verdict logic, added rule): re-run the `release` tool to produce a fresh bundle.
|
|
130
|
+
- **Cosmetic edit only** (typo, comment, formatting): write `.accept_stale_release` into the release directory to acknowledge — `touch output/releases/<slug>/.accept_stale_release`.
|
|
131
|
+
- **DON'T** declare finalization done while a stale release ships. Downstream consumers (other agents, deployed verification systems) read the bundled `parser_v*.py` / `workflows/`, not the workspace.
|
|
132
|
+
|
|
124
133
|
## Developer User Involvement
|
|
125
134
|
|
|
126
135
|
The developer user should see QC results through the dashboard (see `dashboard-reporting`). Key metrics to surface:
|
|
@@ -28,6 +28,8 @@ rule-skills/
|
|
|
28
28
|
|
|
29
29
|
Not every rule needs all of these. A simple threshold check might only need SKILL.md and a script. A complex semantic rule might need detailed references and many samples. Start minimal, add as needed during testing.
|
|
30
30
|
|
|
31
|
+
**Filename case matters.** Use uppercase `SKILL.md` (matching the meta-skill convention you see in `template/skills/`). On Linux filesystems this is case-sensitive; engine path-matching, audit scripts, and downstream tooling all assume uppercase. Do not write `skill.md`, `Skill.md`, or any other case variant.
|
|
32
|
+
|
|
31
33
|
## Granularity: 1 rule = 1 skill directory (default)
|
|
32
34
|
|
|
33
35
|
Default to **one rule per skill directory**. Group rules into the same file ONLY when they meet BOTH:
|
|
@@ -47,6 +49,25 @@ E2E #4 demonstrated the cost: an agent wrote `unified_qc.py` to bypass 110 indiv
|
|
|
47
49
|
|
|
48
50
|
If individual skills aren't running cleanly, the right response is to identify which ones break and fix them, not consolidate. The whole pipeline (extraction → skill_testing → distillation → production_qc) assumes one rule = one verifiable artifact.
|
|
49
51
|
|
|
52
|
+
### Anti-pattern: stub SKILL.md OR stub check.py
|
|
53
|
+
|
|
54
|
+
Each rule_skill folder MUST have BOTH a substantive `SKILL.md` AND a substantive `check.py` (or `check.py` that imports + calls a workflow that does the real work). One side being a stub breaks the contract.
|
|
55
|
+
|
|
56
|
+
**Variant 1 (v0.7.5 贷款 audit § 9.1)**: stub `SKILL.md` (templated 19 lines with `检查逻辑: N/A`) paired with real `check.py` (44-131 LOC of regex methodology). SKILL.md is supposed to be the human-readable methodology document. A reader scanning the rule folder for "what does this verify and why" gets nothing. The agent put all the methodology into `check.py` comments, which works for the engine but loses the deliverable framing.
|
|
57
|
+
|
|
58
|
+
**Variant 2 (v0.7.5 资管 audit § 3.4)**: substantive `SKILL.md` (real methodology, PASS/FAIL criteria, regulation cross-refs) paired with stub `check.py` (29-line scaffold returning `{"verdict": "NOT_APPLICABLE", "evidence": "Check requires worker LLM execution"}`). The real check logic lives in `workflows/<rule_id>/workflow.py` — but `check.py` doesn't import or call it. A user running `python rule_skills/R01-01/check.py document.txt` gets `NOT_APPLICABLE` on every input, which is misleading.
|
|
59
|
+
|
|
60
|
+
**Variant 3 (legacy v0.7.0)**: stub `check.py` returning `{"pass": null, "method": "stub"}` paired with otherwise-real SKILL.md. Methodology described but never executable.
|
|
61
|
+
|
|
62
|
+
**The contract**:
|
|
63
|
+
- ✓ DO: SKILL.md describes WHAT to check + WHY + WHEN to flag it. Substantive — typically 50-300 lines, not 19.
|
|
64
|
+
- ✓ DO: check.py implements the check. EITHER substantive direct logic OR `from workflows.<rule_id>.workflow_v1 import verify` + delegate. Returns concrete verdicts.
|
|
65
|
+
- ✗ DON'T: stub SKILL.md with methodology in check.py comments (variant 1).
|
|
66
|
+
- ✗ DON'T: substantive SKILL.md with check.py that returns NOT_APPLICABLE without delegating to a workflow (variant 2).
|
|
67
|
+
- ✗ DON'T: stub check.py returning null verdict (variant 3, legacy).
|
|
68
|
+
|
|
69
|
+
A future engine milestone check (v0.8 P2-F) may refuse phase advance if too many check.py files are stub-shaped. Better to author them substantively now.
|
|
70
|
+
|
|
50
71
|
## Writing SKILL.md
|
|
51
72
|
|
|
52
73
|
### Frontmatter
|
|
@@ -102,6 +123,24 @@ Scripts should be self-contained Python files that can be imported or executed.
|
|
|
102
123
|
|
|
103
124
|
Do not put LLM prompts in scripts. LLM interactions belong in the SKILL.md body or in the workflow (later phase).
|
|
104
125
|
|
|
126
|
+
### Strip reviewer annotations before keyword matching
|
|
127
|
+
|
|
128
|
+
Sample documents often carry reviewer-annotation footers (`预期命中点: ...`, `标注: ...`, `Expected: ...`) that mark the ground-truth verdict for testing. If your check.py uses keyword/regex matching against the document body, these annotations will leak into the match — producing false-positive PASS on violation samples (your rule "finds" the disclosure keyword inside the annotation itself, not the actual document content).
|
|
129
|
+
|
|
130
|
+
The canonical helper ships at `workflows/common/utils.py` and is auto-populated into every workspace at engine init:
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
from workflows.common.utils import strip_annotations
|
|
134
|
+
|
|
135
|
+
def check(document_text):
|
|
136
|
+
text = strip_annotations(document_text)
|
|
137
|
+
# ... your real check logic against `text`, not document_text
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
Recognized prefixes (Chinese + English variants): 预期命中点, 预期结果, 预期判定, 预期验证, 标注, 审核标注, Expected, expected, EXPECTED, Annotation, annotation. Pass `extra_prefixes=("..."、"...")` if your project uses different labels.
|
|
141
|
+
|
|
142
|
+
E2E #11 贷款 v0.8 audit: 4/14 rules had standalone check.py false-positive PASS on violation samples because they matched the `预期命中点: ...年化利率` footer instead of the document body. v0.8.1 ships the helper as a template file so this trap is one import away from being avoided.
|
|
143
|
+
|
|
105
144
|
## Writing References
|
|
106
145
|
|
|
107
146
|
`references/` holds content that the coding agent reads on demand:
|
|
@@ -187,3 +187,56 @@ Worker LLMs are accessed via SiliconFlow API. Connection details are in `.env`:
|
|
|
187
187
|
- Model names in `TIER1` through `TIER4`
|
|
188
188
|
|
|
189
189
|
See `references/worker-llm-catalog.md` for current model capabilities and context window sizes.
|
|
190
|
+
|
|
191
|
+
## Two access paths: `worker_llm_call` tool (preferred) vs direct HTTP
|
|
192
|
+
|
|
193
|
+
KC ships a `worker_llm_call` tool. Use it whenever possible — the engine sees every call, can track cost + token spend, applies rate limiting, and surfaces in audit. v0.8 P2-B added a batch mode:
|
|
194
|
+
|
|
195
|
+
```
|
|
196
|
+
worker_llm_call({
|
|
197
|
+
tier: "tier1",
|
|
198
|
+
prompts: ["check doc A...", "check doc B...", "check doc C..."],
|
|
199
|
+
system_prompt: "You are a compliance assistant. Reply with JSON {verdict, evidence, confidence}.",
|
|
200
|
+
concurrency: 5 // 1-10, default 5
|
|
201
|
+
})
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
Returns a `{n_total, n_succeeded, n_failed, total_tokens_in, total_tokens_out, results: [...]}` summary. Partial failures don't fail the whole batch.
|
|
205
|
+
|
|
206
|
+
### The canonical `workflows/common/llm_client.py` (v0.8.1 — ship from template)
|
|
207
|
+
|
|
208
|
+
For a workflow that runs **standalone** (no KC session — e.g., a customer deploys the release bundle and runs `python run.py doc.pdf`), the workflow has no access to `worker_llm_call`. The canonical HTTP client shim ships as a template file and is auto-populated into every workspace's `workflows/common/llm_client.py` at engine init. **Do not write your own.** Use the file that's already there:
|
|
209
|
+
|
|
210
|
+
```python
|
|
211
|
+
from workflows.common.llm_client import call
|
|
212
|
+
|
|
213
|
+
result = call(
|
|
214
|
+
tier="tier2",
|
|
215
|
+
prompt=user_prompt,
|
|
216
|
+
system_prompt="You are a compliance assistant. Reply with JSON.",
|
|
217
|
+
max_tokens=2048,
|
|
218
|
+
)
|
|
219
|
+
# result = {"response": "...", "model_used": "...", "tier": "tier2",
|
|
220
|
+
# "tokens_in": N, "tokens_out": N}
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
What the shim does:
|
|
224
|
+
- Reads `LLM_API_KEY` + `LLM_BASE_URL` + `TIER1..4` from `.env` (provider-agnostic — works with SiliconFlow, OpenAI, Anthropic, Aliyun, etc.)
|
|
225
|
+
- Sends OpenAI-format chat completions to the configured base URL
|
|
226
|
+
- Writes a line to `output/llm_ledger.jsonl` per call so KC audits can reconstruct cost even when worker_llm_call wasn't used
|
|
227
|
+
- Raises an explicit error if `LLM_BASE_URL` is missing (no silent fallback to a hardcoded vendor)
|
|
228
|
+
|
|
229
|
+
**Don't write your own llm_client.py from scratch.** Three v0.7.x/v0.8 sessions in a row had agents roll their own shim — buggy (stale model IDs, hardcoded SiliconFlow URL, no ledger) and invisible to the engine. Use the canonical shim; if it's missing for some reason, copy it from `template/workflows/common/llm_client.py` in the kc-beta install (the engine also auto-populates at init — check `workflows_common_populated` event in events.jsonl).
|
|
230
|
+
|
|
231
|
+
## sandbox_exec timeout for known-slow commands
|
|
232
|
+
|
|
233
|
+
Default `sandbox_exec` timeout is 120 seconds. For commands you expect to take longer — LLM batch processing, large regression runs, document parsing — pass an explicit `timeout_ms` (up to 600000ms = 10 minutes). Don't fight the default by re-batching artificially small chunks; that wastes turns and obscures intent.
|
|
234
|
+
|
|
235
|
+
```
|
|
236
|
+
sandbox_exec({
|
|
237
|
+
command: "python scripts/v2_full_test.py",
|
|
238
|
+
timeout_ms: 480000 // 8 minutes for 14 rules × 6 docs through worker LLM
|
|
239
|
+
})
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
If you're at the 10-minute ceiling and still timing out, split the work into multiple invocations OR delegate to a subagent (subagent timeouts are independent of the parent's).
|