kc-beta 0.7.5 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/README.md +47 -0
  2. package/package.json +3 -2
  3. package/src/agent/context.js +17 -1
  4. package/src/agent/engine.js +467 -100
  5. package/src/agent/llm-client.js +24 -1
  6. package/src/agent/pipelines/_advance-hints.js +92 -0
  7. package/src/agent/pipelines/_milestone-derive.js +325 -20
  8. package/src/agent/pipelines/skill-authoring.js +49 -3
  9. package/src/agent/tools/agent-tool.js +2 -2
  10. package/src/agent/tools/consult-skill.js +15 -0
  11. package/src/agent/tools/dashboard-render.js +48 -1
  12. package/src/agent/tools/document-parse.js +31 -2
  13. package/src/agent/tools/phase-advance.js +17 -13
  14. package/src/agent/tools/release.js +343 -7
  15. package/src/agent/tools/sandbox-exec.js +65 -8
  16. package/src/agent/tools/worker-llm-call.js +95 -15
  17. package/src/agent/workspace.js +25 -4
  18. package/src/cli/components.js +4 -1
  19. package/src/cli/index.js +125 -8
  20. package/src/config.js +19 -2
  21. package/src/marathon/driver.js +217 -0
  22. package/src/marathon/prompts.js +93 -0
  23. package/template/.env.template +17 -1
  24. package/template/AGENT.md +2 -2
  25. package/template/skills/en/auto-model-selection/SKILL.md +55 -35
  26. package/template/skills/en/bootstrap-workspace/SKILL.md +27 -0
  27. package/template/skills/en/compliance-judgment/SKILL.md +14 -0
  28. package/template/skills/en/confidence-system/SKILL.md +30 -8
  29. package/template/skills/en/corner-case-management/SKILL.md +53 -33
  30. package/template/skills/en/cross-document-verification/SKILL.md +88 -83
  31. package/template/skills/en/dashboard-reporting/SKILL.md +91 -66
  32. package/template/skills/en/dashboard-reporting/scripts/generate_dashboard.py +1 -1
  33. package/template/skills/en/data-sensibility/SKILL.md +19 -12
  34. package/template/skills/en/document-chunking/SKILL.md +99 -15
  35. package/template/skills/en/entity-extraction/SKILL.md +14 -4
  36. package/template/skills/en/quality-control/SKILL.md +23 -0
  37. package/template/skills/en/rule-extraction/SKILL.md +92 -94
  38. package/template/skills/en/rule-extraction/references/chunking-strategies.md +7 -78
  39. package/template/skills/en/skill-authoring/SKILL.md +85 -2
  40. package/template/skills/en/skill-creator/SKILL.md +25 -3
  41. package/template/skills/en/skill-to-workflow/SKILL.md +73 -1
  42. package/template/skills/en/task-decomposition/SKILL.md +1 -1
  43. package/template/skills/en/tree-processing/SKILL.md +1 -1
  44. package/template/skills/en/version-control/SKILL.md +15 -0
  45. package/template/skills/en/work-decomposition/SKILL.md +52 -32
  46. package/template/skills/phase_skills.yaml +5 -0
  47. package/template/skills/zh/auto-model-selection/SKILL.md +54 -33
  48. package/template/skills/zh/bootstrap-workspace/SKILL.md +27 -0
  49. package/template/skills/zh/compliance-judgment/SKILL.md +51 -37
  50. package/template/skills/zh/compliance-judgment/references/output-format.md +62 -62
  51. package/template/skills/zh/confidence-system/SKILL.md +34 -9
  52. package/template/skills/zh/corner-case-management/SKILL.md +71 -104
  53. package/template/skills/zh/cross-document-verification/SKILL.md +90 -195
  54. package/template/skills/zh/cross-document-verification/references/contradiction-taxonomy.md +36 -36
  55. package/template/skills/zh/dashboard-reporting/SKILL.md +82 -232
  56. package/template/skills/zh/dashboard-reporting/scripts/generate_dashboard.py +1 -1
  57. package/template/skills/zh/data-sensibility/SKILL.md +13 -0
  58. package/template/skills/zh/document-chunking/SKILL.md +101 -18
  59. package/template/skills/zh/document-parsing/SKILL.md +65 -65
  60. package/template/skills/zh/document-parsing/references/parser-catalog.md +26 -26
  61. package/template/skills/zh/entity-extraction/SKILL.md +78 -68
  62. package/template/skills/zh/evolution-loop/references/convergence-guide.md +38 -38
  63. package/template/skills/zh/quality-control/SKILL.md +23 -0
  64. package/template/skills/zh/quality-control/references/qa-layers.md +65 -65
  65. package/template/skills/zh/quality-control/references/sampling-strategies.md +49 -49
  66. package/template/skills/zh/rule-extraction/SKILL.md +199 -188
  67. package/template/skills/zh/rule-extraction/references/chunking-strategies.md +5 -78
  68. package/template/skills/zh/skill-authoring/SKILL.md +136 -58
  69. package/template/skills/zh/skill-authoring/references/skill-format-spec.md +39 -39
  70. package/template/skills/zh/skill-creator/SKILL.md +215 -201
  71. package/template/skills/zh/skill-creator/references/schemas.md +60 -60
  72. package/template/skills/zh/skill-to-workflow/SKILL.md +73 -1
  73. package/template/skills/zh/skill-to-workflow/references/worker-llm-catalog.md +24 -24
  74. package/template/skills/zh/task-decomposition/SKILL.md +1 -1
  75. package/template/skills/zh/task-decomposition/references/decision-matrix.md +54 -54
  76. package/template/skills/zh/tree-processing/SKILL.md +67 -63
  77. package/template/skills/zh/version-control/SKILL.md +15 -0
  78. package/template/skills/zh/version-control/references/trace-id-spec.md +34 -34
  79. package/template/skills/zh/work-decomposition/SKILL.md +52 -30
  80. package/template/workflows/common/llm_client.py +168 -0
  81. package/template/workflows/common/utils.py +132 -0
@@ -25,16 +25,38 @@ function detectSharedFileWrites(command) {
25
25
  * Execute shell commands in the workspace directory.
26
26
  * Uses child_process.spawn so pipes, redirects, && all work.
27
27
  * Output (stdout + stderr combined) is capped at 10K chars.
28
+ *
29
+ * v0.8 P1-F timeout model:
30
+ * - Default: KC_EXEC_DEFAULT_TIMEOUT_MS (env) or 120000ms (2 min)
31
+ * - Hard cap: KC_EXEC_MAX_TIMEOUT_MS (env) or 600000ms (10 min)
32
+ * - Per-call `timeout_ms` overrides default, clamped to [1000, max]
33
+ * - Legacy `KC_EXEC_TIMEOUT` (seconds) still accepted as a deprecation
34
+ * alias for the default; emits a warning to stderr on first read.
28
35
  */
29
36
  export class SandboxExecTool extends BaseTool {
30
37
  /**
31
38
  * @param {import('../workspace.js').Workspace} workspace
32
- * @param {number} [timeout=30]
39
+ * @param {object|number} [opts] — either a config object (new) OR
40
+ * a number meaning the legacy timeout-in-seconds (old). The number
41
+ * form is preserved for callers that haven't been updated yet.
42
+ * @param {number} [opts.defaultTimeoutMs] — default 120000
43
+ * @param {number} [opts.maxTimeoutMs] — default 600000
33
44
  */
34
- constructor(workspace, timeout = 30) {
45
+ constructor(workspace, opts = {}) {
35
46
  super();
36
47
  this._workspace = workspace;
37
- this._timeout = timeout;
48
+
49
+ // Legacy: opts is a bare number = seconds. Convert to ms.
50
+ if (typeof opts === "number") {
51
+ this._defaultTimeoutMs = opts * 1000;
52
+ this._maxTimeoutMs = Math.max(this._defaultTimeoutMs, 600_000);
53
+ } else {
54
+ this._defaultTimeoutMs = opts.defaultTimeoutMs ?? 120_000;
55
+ this._maxTimeoutMs = opts.maxTimeoutMs ?? 600_000;
56
+ }
57
+ // Floor: keep at least 1s. Cap: max can't be below default.
58
+ this._defaultTimeoutMs = Math.max(1000, this._defaultTimeoutMs);
59
+ this._maxTimeoutMs = Math.max(this._defaultTimeoutMs, this._maxTimeoutMs);
38
60
  }
39
61
 
40
62
  get name() { return "sandbox_exec"; }
@@ -47,7 +69,10 @@ export class SandboxExecTool extends BaseTool {
47
69
  "Pipes, redirects, and chained commands (&&) are supported. " +
48
70
  "stdout + stderr combined are capped at 10,000 chars; longer output is truncated. " +
49
71
  "For reading individual files larger than ~10 KB (e.g. regulation documents), " +
50
- "prefer workspace_file (operation=read) which has a larger 50 KB cap."
72
+ "prefer workspace_file (operation=read) which has a larger 50 KB cap. " +
73
+ `Default timeout ${Math.round(this._defaultTimeoutMs / 1000)}s; pass timeout_ms ` +
74
+ `to extend up to ${Math.round(this._maxTimeoutMs / 1000)}s for known-slow commands ` +
75
+ `(LLM batch processing, document parsing, large regression runs).`
51
76
  );
52
77
  }
53
78
 
@@ -64,6 +89,10 @@ export class SandboxExecTool extends BaseTool {
64
89
  enum: ["workspace", "project"],
65
90
  description: "Working directory. 'workspace' (default) = KC's workspace. 'project' = user's project directory.",
66
91
  },
92
+ timeout_ms: {
93
+ type: "integer",
94
+ description: `Optional per-call timeout in milliseconds. Default ${this._defaultTimeoutMs}ms; clamped to [1000, ${this._maxTimeoutMs}]. Pass for commands you expect to take longer than the default (LLM batches, parsing, regressions).`,
95
+ },
67
96
  },
68
97
  required: ["command"],
69
98
  };
@@ -76,6 +105,22 @@ export class SandboxExecTool extends BaseTool {
76
105
  return new ToolResult("No command provided", true);
77
106
  }
78
107
 
108
+ // v0.8 P1-F: per-call timeout clamping
109
+ let effectiveTimeoutMs = this._defaultTimeoutMs;
110
+ let clampedMessage = null;
111
+ if (Number.isFinite(input.timeout_ms) && input.timeout_ms > 0) {
112
+ const requested = Math.floor(input.timeout_ms);
113
+ if (requested < 1000) {
114
+ effectiveTimeoutMs = 1000;
115
+ clampedMessage = `timeout_ms=${requested} below 1000ms floor; using 1000ms.`;
116
+ } else if (requested > this._maxTimeoutMs) {
117
+ effectiveTimeoutMs = this._maxTimeoutMs;
118
+ clampedMessage = `timeout_ms=${requested} above ${this._maxTimeoutMs}ms ceiling; clamped to ${this._maxTimeoutMs}ms.`;
119
+ } else {
120
+ effectiveTimeoutMs = requested;
121
+ }
122
+ }
123
+
79
124
  const effectiveCwd = (cwdScope === "project" && this._workspace.projectDir)
80
125
  ? this._workspace.projectDir
81
126
  : this._workspace.cwd;
@@ -86,7 +131,7 @@ export class SandboxExecTool extends BaseTool {
86
131
  const sharedHits = detectSharedFileWrites(command);
87
132
 
88
133
  try {
89
- const { output, code } = await this._run(command, effectiveCwd);
134
+ const { output, code } = await this._run(command, effectiveCwd, effectiveTimeoutMs);
90
135
  let result = output;
91
136
  if (result.length > MAX_OUTPUT) {
92
137
  result = result.slice(0, MAX_OUTPUT) + "\n[truncated]";
@@ -101,10 +146,20 @@ export class SandboxExecTool extends BaseTool {
101
146
  ` Under concurrent subagents this races — use workspace_file or rule_catalog instead.\n\n`;
102
147
  result = prefix + result;
103
148
  }
149
+ if (clampedMessage) {
150
+ result = `[note] ${clampedMessage}\n\n` + result;
151
+ }
104
152
  return new ToolResult(result, code !== 0);
105
153
  } catch (err) {
106
154
  if (err.message === "timeout") {
107
- return new ToolResult(`Command timed out after ${this._timeout}s`, true);
155
+ const seconds = Math.round(effectiveTimeoutMs / 1000);
156
+ const hint = effectiveTimeoutMs < this._maxTimeoutMs
157
+ ? ` Pass timeout_ms (up to ${this._maxTimeoutMs}) for known-slow commands.`
158
+ : ` Already at max timeout (${this._maxTimeoutMs}ms); consider splitting the command into smaller batches or running it via a subagent.`;
159
+ return new ToolResult(
160
+ `Command timed out after ${seconds}s (${effectiveTimeoutMs}ms).${hint}`,
161
+ true,
162
+ );
108
163
  }
109
164
  return new ToolResult(`Execution error: ${err.message}`, true);
110
165
  }
@@ -112,9 +167,11 @@ export class SandboxExecTool extends BaseTool {
112
167
 
113
168
  /**
114
169
  * @param {string} command
170
+ * @param {string} cwd
171
+ * @param {number} timeoutMs
115
172
  * @returns {Promise<{output: string, code: number}>}
116
173
  */
117
- _run(command, cwd) {
174
+ _run(command, cwd, timeoutMs) {
118
175
  return new Promise((resolve, reject) => {
119
176
  const controller = new AbortController();
120
177
  const proc = spawn("sh", ["-c", command], {
@@ -130,7 +187,7 @@ export class SandboxExecTool extends BaseTool {
130
187
  const timer = setTimeout(() => {
131
188
  controller.abort();
132
189
  reject(new Error("timeout"));
133
- }, this._timeout * 1000);
190
+ }, timeoutMs);
134
191
 
135
192
  proc.on("close", (code) => {
136
193
  clearTimeout(timer);
@@ -49,7 +49,10 @@ export class WorkerLLMCallTool extends BaseTool {
49
49
  return (
50
50
  "Call a worker LLM at a specified tier (tier1-tier4) for extraction, " +
51
51
  "judgment, or other verification tasks. Tier1 is most capable/expensive, " +
52
- "tier4 is cheapest. Returns response with model used and token counts."
52
+ "tier4 is cheapest. Pass `prompt` for a single call OR `prompts: [...]` " +
53
+ "for batch (parallel up to concurrency=5). Returns response(s) with " +
54
+ "model used and token counts. v0.8 P2-B: batch mode keeps the engine " +
55
+ "visible to LLM usage instead of agents bypassing via direct HTTP."
53
56
  );
54
57
  }
55
58
 
@@ -58,29 +61,105 @@ export class WorkerLLMCallTool extends BaseTool {
58
61
  type: "object",
59
62
  properties: {
60
63
  tier: { type: "string", enum: ["tier1", "tier2", "tier3", "tier4"], description: "Worker LLM tier to use" },
61
- prompt: { type: "string", description: "The user/task prompt to send" },
62
- system_prompt: { type: "string", description: "Optional system prompt for context" },
63
- max_tokens: { type: "integer", description: "Maximum tokens in response (default 4096)" },
64
+ prompt: { type: "string", description: "The user/task prompt to send (single-call mode)" },
65
+ prompts: {
66
+ type: "array",
67
+ items: { type: "string" },
68
+ description: "Batch mode: array of prompts processed in parallel (up to concurrency=5). All share the same tier + system_prompt. Mutually exclusive with `prompt`.",
69
+ },
70
+ system_prompt: { type: "string", description: "Optional system prompt for context (shared across all prompts in batch mode)" },
71
+ max_tokens: { type: "integer", description: "Maximum tokens per response (default 4096)" },
72
+ concurrency: { type: "integer", description: "Batch mode only: max parallel requests (default 5, max 10)" },
64
73
  },
65
- required: ["tier", "prompt"],
74
+ required: ["tier"],
66
75
  };
67
76
  }
68
77
 
69
78
  async execute(input) {
70
79
  const tier = input.tier || "tier2";
71
- const prompt = input.prompt || "";
72
80
  const systemPrompt = input.system_prompt;
73
81
  const maxTokens = input.max_tokens || 4096;
74
82
 
75
- if (!prompt) return new ToolResult("No prompt provided", true);
76
83
  if (!this._apiKey) return new ToolResult("Worker LLM API key not configured", true);
77
84
 
85
+ // v0.8 P2-B: batch mode dispatch
86
+ if (Array.isArray(input.prompts)) {
87
+ return this._executeBatch(input.prompts, { tier, systemPrompt, maxTokens, concurrency: input.concurrency });
88
+ }
89
+
90
+ const prompt = input.prompt || "";
91
+ if (!prompt) return new ToolResult("No prompt provided (pass `prompt` for single-call or `prompts: [...]` for batch)", true);
92
+
93
+ const result = await this._executeOne({ prompt, tier, systemPrompt, maxTokens });
94
+ if (result.error) return new ToolResult(result.error, true);
95
+ return new ToolResult(JSON.stringify(result.payload, null, 2));
96
+ }
97
+
98
+ /**
99
+ * v0.8 P2-B: process N prompts in parallel with concurrency control.
100
+ * Returns aggregated results as a JSON array under "results" with
101
+ * summary stats (total_in, total_out, n_failed). Partial failures don't
102
+ * fail the whole call — individual results carry their own error flag.
103
+ */
104
+ async _executeBatch(prompts, { tier, systemPrompt, maxTokens, concurrency }) {
105
+ if (prompts.length === 0) return new ToolResult("Empty prompts array", true);
78
106
  this._loadTiers();
79
107
  const models = this._tierModels[tier] || [];
80
108
  if (models.length === 0) {
81
109
  return new ToolResult(`No models configured for ${tier}. Check .env TIER1-TIER4 settings.`, true);
82
110
  }
83
111
 
112
+ const limit = Math.max(1, Math.min(10, Number.isFinite(concurrency) ? concurrency : 5));
113
+ const results = new Array(prompts.length);
114
+ let cursor = 0;
115
+ let tokensIn = 0;
116
+ let tokensOut = 0;
117
+ let nFailed = 0;
118
+
119
+ const worker = async () => {
120
+ while (true) {
121
+ const idx = cursor++;
122
+ if (idx >= prompts.length) break;
123
+ const r = await this._executeOne({ prompt: prompts[idx], tier, systemPrompt, maxTokens });
124
+ if (r.error) {
125
+ results[idx] = { index: idx, error: r.error };
126
+ nFailed++;
127
+ } else {
128
+ results[idx] = { index: idx, ...r.payload };
129
+ tokensIn += r.payload.tokens_in || 0;
130
+ tokensOut += r.payload.tokens_out || 0;
131
+ }
132
+ }
133
+ };
134
+
135
+ await Promise.all(Array.from({ length: limit }, () => worker()));
136
+
137
+ const summary = {
138
+ n_total: prompts.length,
139
+ n_succeeded: prompts.length - nFailed,
140
+ n_failed: nFailed,
141
+ total_tokens_in: tokensIn,
142
+ total_tokens_out: tokensOut,
143
+ tier,
144
+ concurrency: limit,
145
+ results,
146
+ };
147
+ return new ToolResult(JSON.stringify(summary, null, 2), nFailed > 0 && nFailed === prompts.length);
148
+ }
149
+
150
+ /**
151
+ * Single-prompt path. Returns {error?: string, payload?: {...}}.
152
+ * Used by both single-call and batch modes; batch dedups the tier
153
+ * lookup and shares concurrency with multiple in-flight invocations.
154
+ */
155
+ async _executeOne({ prompt, tier, systemPrompt, maxTokens }) {
156
+ if (!prompt) return { error: "Empty prompt" };
157
+ this._loadTiers();
158
+ const models = this._tierModels[tier] || [];
159
+ if (models.length === 0) {
160
+ return { error: `No models configured for ${tier}. Check .env TIER1-TIER4 settings.` };
161
+ }
162
+
84
163
  const messages = [];
85
164
  if (systemPrompt) messages.push({ role: "system", content: systemPrompt });
86
165
  messages.push({ role: "user", content: prompt });
@@ -98,14 +177,15 @@ export class WorkerLLMCallTool extends BaseTool {
98
177
  if (resp.ok) {
99
178
  const data = await resp.json();
100
179
  const usage = data.usage || {};
101
- const result = {
102
- response: data.choices[0].message.content,
103
- model_used: model,
104
- tier,
105
- tokens_in: usage.prompt_tokens || 0,
106
- tokens_out: usage.completion_tokens || 0,
180
+ return {
181
+ payload: {
182
+ response: data.choices[0].message.content,
183
+ model_used: model,
184
+ tier,
185
+ tokens_in: usage.prompt_tokens || 0,
186
+ tokens_out: usage.completion_tokens || 0,
187
+ },
107
188
  };
108
- return new ToolResult(JSON.stringify(result, null, 2));
109
189
  }
110
190
  lastError = `${model}: HTTP ${resp.status}`;
111
191
  } catch (e) {
@@ -113,6 +193,6 @@ export class WorkerLLMCallTool extends BaseTool {
113
193
  }
114
194
  }
115
195
 
116
- return new ToolResult(`All models for ${tier} failed. Last error: ${lastError}`, true);
196
+ return { error: `All models for ${tier} failed. Last error: ${lastError}` };
117
197
  }
118
198
  }
@@ -170,11 +170,12 @@ export class Workspace {
170
170
  * @param {{timeoutMs?: number, retryMs?: number, staleMs?: number}} [opts]
171
171
  * @returns {Promise<T>}
172
172
  */
173
- async withFileLock(relPath, fn, { timeoutMs = 10_000, retryMs = 50, staleMs = 60_000 } = {}) {
173
+ async withFileLock(relPath, fn, { timeoutMs = 10_000, retryMs = 50, staleMs = 60_000, eventLog = null, blockedWarnMs = 5_000 } = {}) {
174
174
  const target = this.resolvePath(relPath);
175
175
  fs.mkdirSync(path.dirname(target), { recursive: true });
176
176
  const lockPath = target + ".lock";
177
177
  const start = Date.now();
178
+ let blockedWarned = false;
178
179
 
179
180
  while (true) {
180
181
  let fd;
@@ -193,7 +194,24 @@ export class Workspace {
193
194
  // Lockfile vanished between EEXIST and stat — retry to acquire.
194
195
  continue;
195
196
  }
196
- if (Date.now() - start > timeoutMs) {
197
+ // v0.8 P4-C: emit lock_blocked event once when wait crosses
198
+ // blockedWarnMs (default 5s). Lets parent see subagent contention
199
+ // before the call fails. 贷款 v0.7.5 audit: subagent burned 5 min
200
+ // on silent lock contention; parent only saw it as a long-running
201
+ // subagent. Now there's a visible signal.
202
+ const waited = Date.now() - start;
203
+ if (!blockedWarned && waited > blockedWarnMs && eventLog?.append) {
204
+ try {
205
+ eventLog.append("lock_blocked", {
206
+ path: relPath,
207
+ waited_ms: waited,
208
+ session_id: this.sessionId,
209
+ pid: process.pid,
210
+ });
211
+ } catch { /* best-effort */ }
212
+ blockedWarned = true;
213
+ }
214
+ if (waited > timeoutMs) {
197
215
  throw new Error(`Timeout acquiring lock on ${relPath} after ${timeoutMs}ms (held by another engine)`);
198
216
  }
199
217
  await new Promise((r) => setTimeout(r, retryMs));
@@ -221,8 +239,11 @@ export class Workspace {
221
239
  * Lets callsites uniformly wrap their writes without knowing which
222
240
  * paths are shared.
223
241
  */
224
- async withSharedLockIfApplicable(relPath, fn) {
225
- if (isSharedCoordinationPath(relPath)) return this.withFileLock(relPath, fn);
242
+ async withSharedLockIfApplicable(relPath, fn, opts = {}) {
243
+ // v0.8 P4-C: forward optional {eventLog, ...} through to withFileLock
244
+ // so lock_blocked events can fire from any call site (workspace_file,
245
+ // rule_catalog, etc.) once they pass their engine's eventLog.
246
+ if (isSharedCoordinationPath(relPath)) return this.withFileLock(relPath, fn, opts);
226
247
  return fn();
227
248
  }
228
249
 
@@ -89,7 +89,7 @@ function truncateVisual(s, maxCells) {
89
89
  return head + "…" + tail;
90
90
  }
91
91
 
92
- export function StatusBar({ sessionId, phase, contextTokens, contextLimit }) {
92
+ export function StatusBar({ sessionId, phase, contextTokens, contextLimit, marathonActive }) {
93
93
  const samplesRef = useRef([]);
94
94
  const peakRef = useRef(0);
95
95
 
@@ -136,6 +136,9 @@ export function StatusBar({ sessionId, phase, contextTokens, contextLimit }) {
136
136
  h(Text, { dimColor: true, wrap: "truncate-end" }, " ⏵⏵ KC "),
137
137
  h(Text, { dimColor: true, wrap: "truncate-end" }, displaySessionId ? `[${displaySessionId}]` : ""),
138
138
  phase ? h(Text, { color: "cyan", wrap: "truncate-end" }, ` ${phase.toUpperCase()}`) : null,
139
+ // v0.8.1 P8-A: marathon-mode indicator. Only renders when active —
140
+ // normal interactive mode shows no indicator (avoid clutter).
141
+ marathonActive ? h(Text, { color: "magenta", bold: true, wrap: "truncate-end" }, " 🏃 MARATHON") : null,
139
142
  h(Text, { color: "green", wrap: "truncate-end" }, " ● "),
140
143
  h(Text, { color: ctxColor, wrap: "truncate-end" }, `CTX: ${ctxLabel}/${limitLabel} (${pct}%)`),
141
144
  showPeak ? h(Text, { dimColor: true, wrap: "truncate-end" }, ` · peak ${fmt(peak)}`) : null,
package/src/cli/index.js CHANGED
@@ -59,6 +59,8 @@ function App({ engine, config }) {
59
59
  const [spinnerStatus, setSpinnerStatus] = useState(null);
60
60
  const [contextTokens, setContextTokens] = useState(0);
61
61
  const [contextLimit, setContextLimit] = useState(config.kcContextLimit || 200000);
62
+ // v0.8.1 P8-A: marathon-mode indicator for StatusBar.
63
+ const [marathonActive, setMarathonActive] = useState(false);
62
64
  const [taskList, setTaskList] = useState([]);
63
65
  const [taskProgress, setTaskProgress] = useState(null);
64
66
 
@@ -124,6 +126,11 @@ function App({ engine, config }) {
124
126
  setCurrentTool(null);
125
127
  setSpinnerStatus(null);
126
128
  updateContextStats();
129
+ // v0.8.1 P8-A: refresh marathon indicator. If the driver
130
+ // self-terminated (max_wallclock / finalization_settled),
131
+ // engine clears marathonDriver on next decideNext loop;
132
+ // we sync the TUI state here.
133
+ setMarathonActive(engineRef.current.isMarathonActive());
127
134
  break;
128
135
 
129
136
  case "tool_start":
@@ -221,6 +228,9 @@ function App({ engine, config }) {
221
228
  " /sessions List all sessions\n" +
222
229
  " /resume <name> Resume a previous session\n" +
223
230
  " /rename <name> Rename current session\n" +
231
+ " /marathon <goal> Activate marathon mode (chains turns automatically)\n" +
232
+ " /marathon off Deactivate marathon (return to interactive)\n" +
233
+ " /marathon status Show marathon driver state\n" +
224
234
  " /exit Quit",
225
235
  });
226
236
  return true;
@@ -593,6 +603,84 @@ function App({ engine, config }) {
593
603
  }
594
604
  return true;
595
605
 
606
+ case "/marathon": {
607
+ // v0.8.1 P8-A: inline marathon mode. `/marathon <goal>` activates;
608
+ // `/marathon off` deactivates; `/marathon status` shows snapshot.
609
+ const sub = arg.split(/\s+/)[0]?.toLowerCase();
610
+ if (sub === "off" || sub === "stop") {
611
+ const final = engineRef.current.exitMarathonMode("user_off");
612
+ setMarathonActive(false);
613
+ if (final) {
614
+ addMessage({
615
+ role: "system",
616
+ content: `Marathon mode OFF.\n decisions: ${final.decisionCount}\n runtime: ${Math.round(final.runtimeMs / 1000)}s\n last phase: ${final.currentPhase}`,
617
+ });
618
+ } else {
619
+ addMessage({ role: "system", content: "Marathon was not active." });
620
+ }
621
+ return true;
622
+ }
623
+ if (sub === "status") {
624
+ if (!engineRef.current.isMarathonActive()) {
625
+ addMessage({ role: "system", content: "Marathon mode is OFF." });
626
+ return true;
627
+ }
628
+ const s = engineRef.current.marathonDriver.getStatus();
629
+ const lines = [
630
+ `Marathon mode ON`,
631
+ ` goal: ${s.goal.slice(0, 100)}${s.goal.length > 100 ? "..." : ""}`,
632
+ ` language: ${s.language}`,
633
+ ` started: ${s.startedAt} (${Math.round(s.runtimeMs / 60000)} min ago)`,
634
+ ` current_phase: ${s.currentPhase}`,
635
+ ` turns this phase: ${s.turnsThisPhase}`,
636
+ ` total decisions: ${s.decisionCount}`,
637
+ ];
638
+ if (s.recentDecisions?.length) {
639
+ lines.push(` recent decisions:`);
640
+ for (const d of s.recentDecisions.slice(-3)) {
641
+ lines.push(` ${d.ts.slice(11, 19)} [${d.template}] ${d.reason}`);
642
+ }
643
+ }
644
+ addMessage({ role: "system", content: lines.join("\n") });
645
+ return true;
646
+ }
647
+ // `/marathon <goal>` — activate
648
+ if (!arg) {
649
+ addMessage({
650
+ role: "system",
651
+ content:
652
+ "Usage:\n" +
653
+ " /marathon <goal description> Activate marathon mode with the given goal\n" +
654
+ " /marathon off Deactivate (return to interactive)\n" +
655
+ " /marathon status Show current driver state\n\n" +
656
+ "Marathon mode chains turns automatically using templated continuation prompts.\n" +
657
+ "F5 strict one-phase-per-prompt is bypassed while active. /resume after a crash\n" +
658
+ "does NOT auto-restore marathon — re-type /marathon to re-engage.",
659
+ });
660
+ return true;
661
+ }
662
+ try {
663
+ const status = engineRef.current.enterMarathonMode(arg);
664
+ setMarathonActive(true);
665
+ addMessage({
666
+ role: "system",
667
+ content:
668
+ `🏃 Marathon mode ON.\n` +
669
+ ` goal: ${arg.slice(0, 200)}${arg.length > 200 ? "..." : ""}\n` +
670
+ ` language: ${status.language}\n` +
671
+ ` stop conditions: ${Math.round(status.maxWallclockMs / 3600000)}h wall-clock OR 5 turns settled in finalization\n\n` +
672
+ `Next turn will use the marathon initial prompt. Type /marathon off to disengage.`,
673
+ });
674
+ // Immediately trigger a turn with the initial prompt
675
+ const initialPrompt = engineRef.current.marathonDriver.getInitialPrompt();
676
+ // Hand the initial prompt to the same runTurn path as a user message
677
+ runTurn(initialPrompt);
678
+ } catch (e) {
679
+ addMessage({ role: "system", content: `Marathon activation failed: ${e.message}` });
680
+ }
681
+ return true;
682
+ }
683
+
596
684
  case "/exit":
597
685
  case "/quit":
598
686
  // Save state + stop diagnostics before exit
@@ -628,12 +716,27 @@ function App({ engine, config }) {
628
716
  }
629
717
 
630
718
  if (streamingRef.current) {
631
- queueRef.current.push(trimmed);
632
- setQueueSize(queueRef.current.length); // F2
633
- addMessage({
634
- role: "system",
635
- content: `⏳ Queued (${queueRef.current.length} waiting). Will be sent to KC on next turn boundary.`,
636
- });
719
+ // v0.8.2 P12-B: in marathon mode, hand off to engine's input queue
720
+ // instead of the TUI-local queueRef. The engine's marathon decision
721
+ // loop drains it FIRST at each turn boundary, so the user's nudge
722
+ // wins over the driver's continuation. Outside marathon, keep the
723
+ // existing TUI-local queue (drained after runTurn returns).
724
+ const marathonActive = engineRef.current?.isMarathonActive?.() ?? false;
725
+ if (marathonActive && engineRef.current?.queueUserInput) {
726
+ engineRef.current.queueUserInput(trimmed);
727
+ const depth = engineRef.current.getQueueDepth?.() ?? 1;
728
+ addMessage({
729
+ role: "system",
730
+ content: `⏳ Queued for marathon (${depth} waiting). Will be sent before the next driver continuation.`,
731
+ });
732
+ } else {
733
+ queueRef.current.push(trimmed);
734
+ setQueueSize(queueRef.current.length); // F2
735
+ addMessage({
736
+ role: "system",
737
+ content: `⏳ Queued (${queueRef.current.length} waiting). Will be sent to KC on next turn boundary.`,
738
+ });
739
+ }
637
740
  } else {
638
741
  runTurn(trimmed);
639
742
  }
@@ -752,7 +855,7 @@ function App({ engine, config }) {
752
855
  placeholderRight: queueSize > 0 ? `(${queueSize} queued)` : null,
753
856
  }),
754
857
  h(HRule),
755
- h(StatusBar, { sessionId, phase, contextTokens, contextLimit }),
858
+ h(StatusBar, { sessionId, phase, contextTokens, contextLimit, marathonActive }),
756
859
  );
757
860
  }
758
861
 
@@ -762,9 +865,15 @@ export async function main({ languageOverride } = {}) {
762
865
  // Capture user's project directory (CWD at launch)
763
866
  config.projectDir = process.cwd();
764
867
 
765
- // Session-only language override (does NOT persist to config)
868
+ // Session-only language override (does NOT persist to config).
869
+ // v0.8.3 P20-B3 (Task #218): also set process.env.LANGUAGE so the
870
+ // engine's _overlayWorkspaceEnv() penvWon check honors the CLI flag.
871
+ // Pre-v0.8.3, workspace .env LANGUAGE=en would overwrite a CLI --zh
872
+ // override during engine construction because the overlay only
873
+ // checked process.env, not in-memory config.language.
766
874
  if (languageOverride) {
767
875
  config.language = languageOverride;
876
+ process.env.LANGUAGE = languageOverride;
768
877
  }
769
878
 
770
879
  if (!config.llmApiKey) {
@@ -821,6 +930,14 @@ export async function main({ languageOverride } = {}) {
821
930
  };
822
931
  process.on("SIGINT", saveOnExit);
823
932
  process.on("SIGTERM", saveOnExit);
933
+ // v0.8.1 P8-B: SIGHUP coverage. E2E #11 found macOS sends signals to
934
+ // descendant processes when a Terminal.app window closes or quits;
935
+ // nohup masks SIGHUP but not SIGTERM, and we already cover SIGTERM.
936
+ // Adding SIGHUP makes the kc-beta process robust against terminal
937
+ // teardown even if it's not nohup'd. Without this, a closed terminal
938
+ // can leave KC half-shut-down (events.jsonl flushed, but no
939
+ // marathon_detach event, no clean session-state save).
940
+ process.on("SIGHUP", saveOnExit);
824
941
 
825
942
  const instance = render(h(App, { engine, config }));
826
943
  await instance.waitUntilExit();
package/src/config.js CHANGED
@@ -21,7 +21,11 @@ function loadGlobalConfig() {
21
21
  * Parse a .env file into a key-value object.
22
22
  * Handles KEY=VALUE lines, ignores comments and blank lines.
23
23
  */
24
- function loadEnvFile(envPath) {
24
+ // v0.8 P1-B: exported so engine.js can re-overlay workspace .env after
25
+ // the workspace directory is known (cli/index.js calls loadSettings()
26
+ // without a workspace path because the path isn't known until the engine
27
+ // constructs the Workspace object).
28
+ export function loadEnvFile(envPath) {
25
29
  if (!fs.existsSync(envPath)) return {};
26
30
  // v0.7.0 H9: defend bootstrap against a .env that exists but isn't
27
31
  // readable (permission denied, unexpected directory, encoding error,
@@ -110,7 +114,20 @@ export function loadSettings(workspacePath) {
110
114
 
111
115
  // Workspace (process.env wins — for parallel benchmark runs)
112
116
  kcWorkspaceRoot: penv.KC_WORKSPACE_ROOT || gc.workspace_root || path.join(os.homedir(), ".kc_agent", "workspaces"),
113
- kcExecTimeout: parseInt(env.KC_EXEC_TIMEOUT || "30", 10),
117
+ // v0.8 P1-F sandbox_exec timeout model. Default 120s (Claude Code parity),
118
+ // max 600s (10 min) ceiling. Agent can pass per-call timeout_ms up to max.
119
+ // Legacy KC_EXEC_TIMEOUT (seconds) accepted as deprecation alias for default.
120
+ kcExecDefaultTimeoutMs: parseInt(
121
+ env.KC_EXEC_DEFAULT_TIMEOUT_MS ||
122
+ (env.KC_EXEC_TIMEOUT ? String(parseInt(env.KC_EXEC_TIMEOUT, 10) * 1000) : "") ||
123
+ "120000",
124
+ 10,
125
+ ),
126
+ kcExecMaxTimeoutMs: parseInt(env.KC_EXEC_MAX_TIMEOUT_MS || "600000", 10),
127
+ // Legacy alias kept for any consumer reading it directly. Computed
128
+ // from the new ms-based field for consistency. New code should read
129
+ // kcExecDefaultTimeoutMs / kcExecMaxTimeoutMs.
130
+ kcExecTimeout: parseInt(env.KC_EXEC_TIMEOUT || "120", 10),
114
131
 
115
132
  // Accuracy thresholds
116
133
  skillAccuracy: parseFloat(env.SKILL_ACCURACY || gc.accuracy_threshold?.toString() || "0.9"),