npm - kc-beta - Versions diffs - 0.7.5 → 0.8.3 - Mend

kc-beta 0.7.5 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

package/README.md +47 -0
package/package.json +3 -2
package/src/agent/context.js +17 -1
package/src/agent/engine.js +467 -100
package/src/agent/llm-client.js +24 -1
package/src/agent/pipelines/_advance-hints.js +92 -0
package/src/agent/pipelines/_milestone-derive.js +325 -20
package/src/agent/pipelines/skill-authoring.js +49 -3
package/src/agent/tools/agent-tool.js +2 -2
package/src/agent/tools/consult-skill.js +15 -0
package/src/agent/tools/dashboard-render.js +48 -1
package/src/agent/tools/document-parse.js +31 -2
package/src/agent/tools/phase-advance.js +17 -13
package/src/agent/tools/release.js +343 -7
package/src/agent/tools/sandbox-exec.js +65 -8
package/src/agent/tools/worker-llm-call.js +95 -15
package/src/agent/workspace.js +25 -4
package/src/cli/components.js +4 -1
package/src/cli/index.js +125 -8
package/src/config.js +19 -2
package/src/marathon/driver.js +217 -0
package/src/marathon/prompts.js +93 -0
package/template/.env.template +17 -1
package/template/AGENT.md +2 -2
package/template/skills/en/auto-model-selection/SKILL.md +55 -35
package/template/skills/en/bootstrap-workspace/SKILL.md +27 -0
package/template/skills/en/compliance-judgment/SKILL.md +14 -0
package/template/skills/en/confidence-system/SKILL.md +30 -8
package/template/skills/en/corner-case-management/SKILL.md +53 -33
package/template/skills/en/cross-document-verification/SKILL.md +88 -83
package/template/skills/en/dashboard-reporting/SKILL.md +91 -66
package/template/skills/en/dashboard-reporting/scripts/generate_dashboard.py +1 -1
package/template/skills/en/data-sensibility/SKILL.md +19 -12
package/template/skills/en/document-chunking/SKILL.md +99 -15
package/template/skills/en/entity-extraction/SKILL.md +14 -4
package/template/skills/en/quality-control/SKILL.md +23 -0
package/template/skills/en/rule-extraction/SKILL.md +92 -94
package/template/skills/en/rule-extraction/references/chunking-strategies.md +7 -78
package/template/skills/en/skill-authoring/SKILL.md +85 -2
package/template/skills/en/skill-creator/SKILL.md +25 -3
package/template/skills/en/skill-to-workflow/SKILL.md +73 -1
package/template/skills/en/task-decomposition/SKILL.md +1 -1
package/template/skills/en/tree-processing/SKILL.md +1 -1
package/template/skills/en/version-control/SKILL.md +15 -0
package/template/skills/en/work-decomposition/SKILL.md +52 -32
package/template/skills/phase_skills.yaml +5 -0
package/template/skills/zh/auto-model-selection/SKILL.md +54 -33
package/template/skills/zh/bootstrap-workspace/SKILL.md +27 -0
package/template/skills/zh/compliance-judgment/SKILL.md +51 -37
package/template/skills/zh/compliance-judgment/references/output-format.md +62 -62
package/template/skills/zh/confidence-system/SKILL.md +34 -9
package/template/skills/zh/corner-case-management/SKILL.md +71 -104
package/template/skills/zh/cross-document-verification/SKILL.md +90 -195
package/template/skills/zh/cross-document-verification/references/contradiction-taxonomy.md +36 -36
package/template/skills/zh/dashboard-reporting/SKILL.md +82 -232
package/template/skills/zh/dashboard-reporting/scripts/generate_dashboard.py +1 -1
package/template/skills/zh/data-sensibility/SKILL.md +13 -0
package/template/skills/zh/document-chunking/SKILL.md +101 -18
package/template/skills/zh/document-parsing/SKILL.md +65 -65
package/template/skills/zh/document-parsing/references/parser-catalog.md +26 -26
package/template/skills/zh/entity-extraction/SKILL.md +78 -68
package/template/skills/zh/evolution-loop/references/convergence-guide.md +38 -38
package/template/skills/zh/quality-control/SKILL.md +23 -0
package/template/skills/zh/quality-control/references/qa-layers.md +65 -65
package/template/skills/zh/quality-control/references/sampling-strategies.md +49 -49
package/template/skills/zh/rule-extraction/SKILL.md +199 -188
package/template/skills/zh/rule-extraction/references/chunking-strategies.md +5 -78
package/template/skills/zh/skill-authoring/SKILL.md +136 -58
package/template/skills/zh/skill-authoring/references/skill-format-spec.md +39 -39
package/template/skills/zh/skill-creator/SKILL.md +215 -201
package/template/skills/zh/skill-creator/references/schemas.md +60 -60
package/template/skills/zh/skill-to-workflow/SKILL.md +73 -1
package/template/skills/zh/skill-to-workflow/references/worker-llm-catalog.md +24 -24
package/template/skills/zh/task-decomposition/SKILL.md +1 -1
package/template/skills/zh/task-decomposition/references/decision-matrix.md +54 -54
package/template/skills/zh/tree-processing/SKILL.md +67 -63
package/template/skills/zh/version-control/SKILL.md +15 -0
package/template/skills/zh/version-control/references/trace-id-spec.md +34 -34
package/template/skills/zh/work-decomposition/SKILL.md +52 -30
package/template/workflows/common/llm_client.py +168 -0
package/template/workflows/common/utils.py +132 -0

package/src/agent/tools/sandbox-exec.js CHANGED Viewed

@@ -25,16 +25,38 @@ function detectSharedFileWrites(command) {
  * Execute shell commands in the workspace directory.
  * Uses child_process.spawn so pipes, redirects, && all work.
  * Output (stdout + stderr combined) is capped at 10K chars.
+ *
+ * v0.8 P1-F timeout model:
+ *   - Default: KC_EXEC_DEFAULT_TIMEOUT_MS (env) or 120000ms (2 min)
+ *   - Hard cap: KC_EXEC_MAX_TIMEOUT_MS (env) or 600000ms (10 min)
+ *   - Per-call `timeout_ms` overrides default, clamped to [1000, max]
+ *   - Legacy `KC_EXEC_TIMEOUT` (seconds) still accepted as a deprecation
+ *     alias for the default; emits a warning to stderr on first read.
  */
 export class SandboxExecTool extends BaseTool {
   /**
    * @param {import('../workspace.js').Workspace} workspace
-   * @param {number} [timeout=30]
+   * @param {object|number} [opts] — either a config object (new) OR
+   *   a number meaning the legacy timeout-in-seconds (old). The number
+   *   form is preserved for callers that haven't been updated yet.
+   * @param {number} [opts.defaultTimeoutMs] — default 120000
+   * @param {number} [opts.maxTimeoutMs] — default 600000
    */
-  constructor(workspace, timeout = 30) {
+  constructor(workspace, opts = {}) {
     super();
     this._workspace = workspace;
-    this._timeout = timeout;
+    // Legacy: opts is a bare number = seconds. Convert to ms.
+    if (typeof opts === "number") {
+      this._defaultTimeoutMs = opts * 1000;
+      this._maxTimeoutMs = Math.max(this._defaultTimeoutMs, 600_000);
+    } else {
+      this._defaultTimeoutMs = opts.defaultTimeoutMs ?? 120_000;
+      this._maxTimeoutMs = opts.maxTimeoutMs ?? 600_000;
+    }
+    // Floor: keep at least 1s. Cap: max can't be below default.
+    this._defaultTimeoutMs = Math.max(1000, this._defaultTimeoutMs);
+    this._maxTimeoutMs = Math.max(this._defaultTimeoutMs, this._maxTimeoutMs);
   }
   get name() { return "sandbox_exec"; }
@@ -47,7 +69,10 @@ export class SandboxExecTool extends BaseTool {
       "Pipes, redirects, and chained commands (&&) are supported. " +
       "stdout + stderr combined are capped at 10,000 chars; longer output is truncated. " +
       "For reading individual files larger than ~10 KB (e.g. regulation documents), " +
-      "prefer workspace_file (operation=read) which has a larger 50 KB cap."
+      "prefer workspace_file (operation=read) which has a larger 50 KB cap. " +
+      `Default timeout ${Math.round(this._defaultTimeoutMs / 1000)}s; pass timeout_ms ` +
+      `to extend up to ${Math.round(this._maxTimeoutMs / 1000)}s for known-slow commands ` +
+      `(LLM batch processing, document parsing, large regression runs).`
     );
   }
@@ -64,6 +89,10 @@ export class SandboxExecTool extends BaseTool {
           enum: ["workspace", "project"],
           description: "Working directory. 'workspace' (default) = KC's workspace. 'project' = user's project directory.",
         },
+        timeout_ms: {
+          type: "integer",
+          description: `Optional per-call timeout in milliseconds. Default ${this._defaultTimeoutMs}ms; clamped to [1000, ${this._maxTimeoutMs}]. Pass for commands you expect to take longer than the default (LLM batches, parsing, regressions).`,
+        },
       },
       required: ["command"],
     };
@@ -76,6 +105,22 @@ export class SandboxExecTool extends BaseTool {
       return new ToolResult("No command provided", true);
     }
+    // v0.8 P1-F: per-call timeout clamping
+    let effectiveTimeoutMs = this._defaultTimeoutMs;
+    let clampedMessage = null;
+    if (Number.isFinite(input.timeout_ms) && input.timeout_ms > 0) {
+      const requested = Math.floor(input.timeout_ms);
+      if (requested < 1000) {
+        effectiveTimeoutMs = 1000;
+        clampedMessage = `timeout_ms=${requested} below 1000ms floor; using 1000ms.`;
+      } else if (requested > this._maxTimeoutMs) {
+        effectiveTimeoutMs = this._maxTimeoutMs;
+        clampedMessage = `timeout_ms=${requested} above ${this._maxTimeoutMs}ms ceiling; clamped to ${this._maxTimeoutMs}ms.`;
+      } else {
+        effectiveTimeoutMs = requested;
+      }
+    }
     const effectiveCwd = (cwdScope === "project" && this._workspace.projectDir)
       ? this._workspace.projectDir
       : this._workspace.cwd;
@@ -86,7 +131,7 @@ export class SandboxExecTool extends BaseTool {
     const sharedHits = detectSharedFileWrites(command);
     try {
-      const { output, code } = await this._run(command, effectiveCwd);
+      const { output, code } = await this._run(command, effectiveCwd, effectiveTimeoutMs);
       let result = output;
       if (result.length > MAX_OUTPUT) {
         result = result.slice(0, MAX_OUTPUT) + "\n[truncated]";
@@ -101,10 +146,20 @@ export class SandboxExecTool extends BaseTool {
           `   Under concurrent subagents this races — use workspace_file or rule_catalog instead.\n\n`;
         result = prefix + result;
       }
+      if (clampedMessage) {
+        result = `[note] ${clampedMessage}\n\n` + result;
+      }
       return new ToolResult(result, code !== 0);
     } catch (err) {
       if (err.message === "timeout") {
-        return new ToolResult(`Command timed out after ${this._timeout}s`, true);
+        const seconds = Math.round(effectiveTimeoutMs / 1000);
+        const hint = effectiveTimeoutMs < this._maxTimeoutMs
+          ? ` Pass timeout_ms (up to ${this._maxTimeoutMs}) for known-slow commands.`
+          : ` Already at max timeout (${this._maxTimeoutMs}ms); consider splitting the command into smaller batches or running it via a subagent.`;
+        return new ToolResult(
+          `Command timed out after ${seconds}s (${effectiveTimeoutMs}ms).${hint}`,
+          true,
+        );
       }
       return new ToolResult(`Execution error: ${err.message}`, true);
     }
@@ -112,9 +167,11 @@ export class SandboxExecTool extends BaseTool {
   /**
    * @param {string} command
+   * @param {string} cwd
+   * @param {number} timeoutMs
    * @returns {Promise<{output: string, code: number}>}
    */
-  _run(command, cwd) {
+  _run(command, cwd, timeoutMs) {
     return new Promise((resolve, reject) => {
       const controller = new AbortController();
       const proc = spawn("sh", ["-c", command], {
@@ -130,7 +187,7 @@ export class SandboxExecTool extends BaseTool {
       const timer = setTimeout(() => {
         controller.abort();
         reject(new Error("timeout"));
-      }, this._timeout * 1000);
+      }, timeoutMs);
       proc.on("close", (code) => {
         clearTimeout(timer);

package/src/agent/tools/worker-llm-call.js CHANGED Viewed

@@ -49,7 +49,10 @@ export class WorkerLLMCallTool extends BaseTool {
     return (
       "Call a worker LLM at a specified tier (tier1-tier4) for extraction, " +
       "judgment, or other verification tasks. Tier1 is most capable/expensive, " +
-      "tier4 is cheapest. Returns response with model used and token counts."
+      "tier4 is cheapest. Pass `prompt` for a single call OR `prompts: [...]` " +
+      "for batch (parallel up to concurrency=5). Returns response(s) with " +
+      "model used and token counts. v0.8 P2-B: batch mode keeps the engine " +
+      "visible to LLM usage instead of agents bypassing via direct HTTP."
     );
   }
@@ -58,29 +61,105 @@ export class WorkerLLMCallTool extends BaseTool {
       type: "object",
       properties: {
         tier: { type: "string", enum: ["tier1", "tier2", "tier3", "tier4"], description: "Worker LLM tier to use" },
-        prompt: { type: "string", description: "The user/task prompt to send" },
-        system_prompt: { type: "string", description: "Optional system prompt for context" },
-        max_tokens: { type: "integer", description: "Maximum tokens in response (default 4096)" },
+        prompt: { type: "string", description: "The user/task prompt to send (single-call mode)" },
+        prompts: {
+          type: "array",
+          items: { type: "string" },
+          description: "Batch mode: array of prompts processed in parallel (up to concurrency=5). All share the same tier + system_prompt. Mutually exclusive with `prompt`.",
+        },
+        system_prompt: { type: "string", description: "Optional system prompt for context (shared across all prompts in batch mode)" },
+        max_tokens: { type: "integer", description: "Maximum tokens per response (default 4096)" },
+        concurrency: { type: "integer", description: "Batch mode only: max parallel requests (default 5, max 10)" },
       },
-      required: ["tier", "prompt"],
+      required: ["tier"],
     };
   }
   async execute(input) {
     const tier = input.tier || "tier2";
-    const prompt = input.prompt || "";
     const systemPrompt = input.system_prompt;
     const maxTokens = input.max_tokens || 4096;
-    if (!prompt) return new ToolResult("No prompt provided", true);
     if (!this._apiKey) return new ToolResult("Worker LLM API key not configured", true);
+    // v0.8 P2-B: batch mode dispatch
+    if (Array.isArray(input.prompts)) {
+      return this._executeBatch(input.prompts, { tier, systemPrompt, maxTokens, concurrency: input.concurrency });
+    }
+    const prompt = input.prompt || "";
+    if (!prompt) return new ToolResult("No prompt provided (pass `prompt` for single-call or `prompts: [...]` for batch)", true);
+    const result = await this._executeOne({ prompt, tier, systemPrompt, maxTokens });
+    if (result.error) return new ToolResult(result.error, true);
+    return new ToolResult(JSON.stringify(result.payload, null, 2));
+  }
+  /**
+   * v0.8 P2-B: process N prompts in parallel with concurrency control.
+   * Returns aggregated results as a JSON array under "results" with
+   * summary stats (total_in, total_out, n_failed). Partial failures don't
+   * fail the whole call — individual results carry their own error flag.
+   */
+  async _executeBatch(prompts, { tier, systemPrompt, maxTokens, concurrency }) {
+    if (prompts.length === 0) return new ToolResult("Empty prompts array", true);
     this._loadTiers();
     const models = this._tierModels[tier] || [];
     if (models.length === 0) {
       return new ToolResult(`No models configured for ${tier}. Check .env TIER1-TIER4 settings.`, true);
     }
+    const limit = Math.max(1, Math.min(10, Number.isFinite(concurrency) ? concurrency : 5));
+    const results = new Array(prompts.length);
+    let cursor = 0;
+    let tokensIn = 0;
+    let tokensOut = 0;
+    let nFailed = 0;
+    const worker = async () => {
+      while (true) {
+        const idx = cursor++;
+        if (idx >= prompts.length) break;
+        const r = await this._executeOne({ prompt: prompts[idx], tier, systemPrompt, maxTokens });
+        if (r.error) {
+          results[idx] = { index: idx, error: r.error };
+          nFailed++;
+        } else {
+          results[idx] = { index: idx, ...r.payload };
+          tokensIn += r.payload.tokens_in || 0;
+          tokensOut += r.payload.tokens_out || 0;
+        }
+      }
+    };
+    await Promise.all(Array.from({ length: limit }, () => worker()));
+    const summary = {
+      n_total: prompts.length,
+      n_succeeded: prompts.length - nFailed,
+      n_failed: nFailed,
+      total_tokens_in: tokensIn,
+      total_tokens_out: tokensOut,
+      tier,
+      concurrency: limit,
+      results,
+    };
+    return new ToolResult(JSON.stringify(summary, null, 2), nFailed > 0 && nFailed === prompts.length);
+  }
+  /**
+   * Single-prompt path. Returns {error?: string, payload?: {...}}.
+   * Used by both single-call and batch modes; batch dedups the tier
+   * lookup and shares concurrency with multiple in-flight invocations.
+   */
+  async _executeOne({ prompt, tier, systemPrompt, maxTokens }) {
+    if (!prompt) return { error: "Empty prompt" };
+    this._loadTiers();
+    const models = this._tierModels[tier] || [];
+    if (models.length === 0) {
+      return { error: `No models configured for ${tier}. Check .env TIER1-TIER4 settings.` };
+    }
     const messages = [];
     if (systemPrompt) messages.push({ role: "system", content: systemPrompt });
     messages.push({ role: "user", content: prompt });
@@ -98,14 +177,15 @@ export class WorkerLLMCallTool extends BaseTool {
         if (resp.ok) {
           const data = await resp.json();
           const usage = data.usage || {};
-          const result = {
-            response: data.choices[0].message.content,
-            model_used: model,
-            tier,
-            tokens_in: usage.prompt_tokens || 0,
-            tokens_out: usage.completion_tokens || 0,
+          return {
+            payload: {
+              response: data.choices[0].message.content,
+              model_used: model,
+              tier,
+              tokens_in: usage.prompt_tokens || 0,
+              tokens_out: usage.completion_tokens || 0,
+            },
           };
-          return new ToolResult(JSON.stringify(result, null, 2));
         }
         lastError = `${model}: HTTP ${resp.status}`;
       } catch (e) {
@@ -113,6 +193,6 @@ export class WorkerLLMCallTool extends BaseTool {
       }
     }
-    return new ToolResult(`All models for ${tier} failed. Last error: ${lastError}`, true);
+    return { error: `All models for ${tier} failed. Last error: ${lastError}` };
   }
 }

package/src/agent/workspace.js CHANGED Viewed

@@ -170,11 +170,12 @@ export class Workspace {
    * @param {{timeoutMs?: number, retryMs?: number, staleMs?: number}} [opts]
    * @returns {Promise<T>}
    */
-  async withFileLock(relPath, fn, { timeoutMs = 10_000, retryMs = 50, staleMs = 60_000 } = {}) {
+  async withFileLock(relPath, fn, { timeoutMs = 10_000, retryMs = 50, staleMs = 60_000, eventLog = null, blockedWarnMs = 5_000 } = {}) {
     const target = this.resolvePath(relPath);
     fs.mkdirSync(path.dirname(target), { recursive: true });
     const lockPath = target + ".lock";
     const start = Date.now();
+    let blockedWarned = false;
     while (true) {
       let fd;
@@ -193,7 +194,24 @@ export class Workspace {
           // Lockfile vanished between EEXIST and stat — retry to acquire.
           continue;
         }
-        if (Date.now() - start > timeoutMs) {
+        // v0.8 P4-C: emit lock_blocked event once when wait crosses
+        // blockedWarnMs (default 5s). Lets parent see subagent contention
+        // before the call fails. 贷款 v0.7.5 audit: subagent burned 5 min
+        // on silent lock contention; parent only saw it as a long-running
+        // subagent. Now there's a visible signal.
+        const waited = Date.now() - start;
+        if (!blockedWarned && waited > blockedWarnMs && eventLog?.append) {
+          try {
+            eventLog.append("lock_blocked", {
+              path: relPath,
+              waited_ms: waited,
+              session_id: this.sessionId,
+              pid: process.pid,
+            });
+          } catch { /* best-effort */ }
+          blockedWarned = true;
+        }
+        if (waited > timeoutMs) {
           throw new Error(`Timeout acquiring lock on ${relPath} after ${timeoutMs}ms (held by another engine)`);
         }
         await new Promise((r) => setTimeout(r, retryMs));
@@ -221,8 +239,11 @@ export class Workspace {
    * Lets callsites uniformly wrap their writes without knowing which
    * paths are shared.
    */
-  async withSharedLockIfApplicable(relPath, fn) {
-    if (isSharedCoordinationPath(relPath)) return this.withFileLock(relPath, fn);
+  async withSharedLockIfApplicable(relPath, fn, opts = {}) {
+    // v0.8 P4-C: forward optional {eventLog, ...} through to withFileLock
+    // so lock_blocked events can fire from any call site (workspace_file,
+    // rule_catalog, etc.) once they pass their engine's eventLog.
+    if (isSharedCoordinationPath(relPath)) return this.withFileLock(relPath, fn, opts);
     return fn();
   }

package/src/cli/components.js CHANGED Viewed

@@ -89,7 +89,7 @@ function truncateVisual(s, maxCells) {
   return head + "…" + tail;
 }
-export function StatusBar({ sessionId, phase, contextTokens, contextLimit }) {
+export function StatusBar({ sessionId, phase, contextTokens, contextLimit, marathonActive }) {
   const samplesRef = useRef([]);
   const peakRef = useRef(0);
@@ -136,6 +136,9 @@ export function StatusBar({ sessionId, phase, contextTokens, contextLimit }) {
     h(Text, { dimColor: true, wrap: "truncate-end" }, "  ⏵⏵  KC "),
     h(Text, { dimColor: true, wrap: "truncate-end" }, displaySessionId ? `[${displaySessionId}]` : ""),
     phase ? h(Text, { color: "cyan", wrap: "truncate-end" }, ` ${phase.toUpperCase()}`) : null,
+    // v0.8.1 P8-A: marathon-mode indicator. Only renders when active —
+    // normal interactive mode shows no indicator (avoid clutter).
+    marathonActive ? h(Text, { color: "magenta", bold: true, wrap: "truncate-end" }, "  🏃 MARATHON") : null,
     h(Text, { color: "green", wrap: "truncate-end" }, "  ●  "),
     h(Text, { color: ctxColor, wrap: "truncate-end" }, `CTX: ${ctxLabel}/${limitLabel} (${pct}%)`),
     showPeak ? h(Text, { dimColor: true, wrap: "truncate-end" }, ` · peak ${fmt(peak)}`) : null,

package/src/cli/index.js CHANGED Viewed

@@ -59,6 +59,8 @@ function App({ engine, config }) {
   const [spinnerStatus, setSpinnerStatus] = useState(null);
   const [contextTokens, setContextTokens] = useState(0);
   const [contextLimit, setContextLimit] = useState(config.kcContextLimit || 200000);
+  // v0.8.1 P8-A: marathon-mode indicator for StatusBar.
+  const [marathonActive, setMarathonActive] = useState(false);
   const [taskList, setTaskList] = useState([]);
   const [taskProgress, setTaskProgress] = useState(null);
@@ -124,6 +126,11 @@ function App({ engine, config }) {
             setCurrentTool(null);
             setSpinnerStatus(null);
             updateContextStats();
+            // v0.8.1 P8-A: refresh marathon indicator. If the driver
+            // self-terminated (max_wallclock / finalization_settled),
+            // engine clears marathonDriver on next decideNext loop;
+            // we sync the TUI state here.
+            setMarathonActive(engineRef.current.isMarathonActive());
             break;
           case "tool_start":
@@ -221,6 +228,9 @@ function App({ engine, config }) {
             "  /sessions            List all sessions\n" +
             "  /resume <name>       Resume a previous session\n" +
             "  /rename <name>       Rename current session\n" +
+            "  /marathon <goal>     Activate marathon mode (chains turns automatically)\n" +
+            "  /marathon off        Deactivate marathon (return to interactive)\n" +
+            "  /marathon status     Show marathon driver state\n" +
             "  /exit                Quit",
         });
         return true;
@@ -593,6 +603,84 @@ function App({ engine, config }) {
         }
         return true;
+      case "/marathon": {
+        // v0.8.1 P8-A: inline marathon mode. `/marathon <goal>` activates;
+        // `/marathon off` deactivates; `/marathon status` shows snapshot.
+        const sub = arg.split(/\s+/)[0]?.toLowerCase();
+        if (sub === "off" || sub === "stop") {
+          const final = engineRef.current.exitMarathonMode("user_off");
+          setMarathonActive(false);
+          if (final) {
+            addMessage({
+              role: "system",
+              content: `Marathon mode OFF.\n  decisions: ${final.decisionCount}\n  runtime: ${Math.round(final.runtimeMs / 1000)}s\n  last phase: ${final.currentPhase}`,
+            });
+          } else {
+            addMessage({ role: "system", content: "Marathon was not active." });
+          }
+          return true;
+        }
+        if (sub === "status") {
+          if (!engineRef.current.isMarathonActive()) {
+            addMessage({ role: "system", content: "Marathon mode is OFF." });
+            return true;
+          }
+          const s = engineRef.current.marathonDriver.getStatus();
+          const lines = [
+            `Marathon mode ON`,
+            `  goal: ${s.goal.slice(0, 100)}${s.goal.length > 100 ? "..." : ""}`,
+            `  language: ${s.language}`,
+            `  started: ${s.startedAt}  (${Math.round(s.runtimeMs / 60000)} min ago)`,
+            `  current_phase: ${s.currentPhase}`,
+            `  turns this phase: ${s.turnsThisPhase}`,
+            `  total decisions: ${s.decisionCount}`,
+          ];
+          if (s.recentDecisions?.length) {
+            lines.push(`  recent decisions:`);
+            for (const d of s.recentDecisions.slice(-3)) {
+              lines.push(`    ${d.ts.slice(11, 19)} [${d.template}] ${d.reason}`);
+            }
+          }
+          addMessage({ role: "system", content: lines.join("\n") });
+          return true;
+        }
+        // `/marathon <goal>` — activate
+        if (!arg) {
+          addMessage({
+            role: "system",
+            content:
+              "Usage:\n" +
+              "  /marathon <goal description>   Activate marathon mode with the given goal\n" +
+              "  /marathon off                  Deactivate (return to interactive)\n" +
+              "  /marathon status               Show current driver state\n\n" +
+              "Marathon mode chains turns automatically using templated continuation prompts.\n" +
+              "F5 strict one-phase-per-prompt is bypassed while active. /resume after a crash\n" +
+              "does NOT auto-restore marathon — re-type /marathon to re-engage.",
+          });
+          return true;
+        }
+        try {
+          const status = engineRef.current.enterMarathonMode(arg);
+          setMarathonActive(true);
+          addMessage({
+            role: "system",
+            content:
+              `🏃 Marathon mode ON.\n` +
+              `  goal: ${arg.slice(0, 200)}${arg.length > 200 ? "..." : ""}\n` +
+              `  language: ${status.language}\n` +
+              `  stop conditions: ${Math.round(status.maxWallclockMs / 3600000)}h wall-clock OR 5 turns settled in finalization\n\n` +
+              `Next turn will use the marathon initial prompt. Type /marathon off to disengage.`,
+          });
+          // Immediately trigger a turn with the initial prompt
+          const initialPrompt = engineRef.current.marathonDriver.getInitialPrompt();
+          // Hand the initial prompt to the same runTurn path as a user message
+          runTurn(initialPrompt);
+        } catch (e) {
+          addMessage({ role: "system", content: `Marathon activation failed: ${e.message}` });
+        }
+        return true;
+      }
       case "/exit":
       case "/quit":
         // Save state + stop diagnostics before exit
@@ -628,12 +716,27 @@ function App({ engine, config }) {
     }
     if (streamingRef.current) {
-      queueRef.current.push(trimmed);
-      setQueueSize(queueRef.current.length); // F2
-      addMessage({
-        role: "system",
-        content: `⏳ Queued (${queueRef.current.length} waiting). Will be sent to KC on next turn boundary.`,
-      });
+      // v0.8.2 P12-B: in marathon mode, hand off to engine's input queue
+      // instead of the TUI-local queueRef. The engine's marathon decision
+      // loop drains it FIRST at each turn boundary, so the user's nudge
+      // wins over the driver's continuation. Outside marathon, keep the
+      // existing TUI-local queue (drained after runTurn returns).
+      const marathonActive = engineRef.current?.isMarathonActive?.() ?? false;
+      if (marathonActive && engineRef.current?.queueUserInput) {
+        engineRef.current.queueUserInput(trimmed);
+        const depth = engineRef.current.getQueueDepth?.() ?? 1;
+        addMessage({
+          role: "system",
+          content: `⏳ Queued for marathon (${depth} waiting). Will be sent before the next driver continuation.`,
+        });
+      } else {
+        queueRef.current.push(trimmed);
+        setQueueSize(queueRef.current.length); // F2
+        addMessage({
+          role: "system",
+          content: `⏳ Queued (${queueRef.current.length} waiting). Will be sent to KC on next turn boundary.`,
+        });
+      }
     } else {
       runTurn(trimmed);
     }
@@ -752,7 +855,7 @@ function App({ engine, config }) {
       placeholderRight: queueSize > 0 ? `(${queueSize} queued)` : null,
     }),
     h(HRule),
-    h(StatusBar, { sessionId, phase, contextTokens, contextLimit }),
+    h(StatusBar, { sessionId, phase, contextTokens, contextLimit, marathonActive }),
   );
 }
@@ -762,9 +865,15 @@ export async function main({ languageOverride } = {}) {
   // Capture user's project directory (CWD at launch)
   config.projectDir = process.cwd();
-  // Session-only language override (does NOT persist to config)
+  // Session-only language override (does NOT persist to config).
+  // v0.8.3 P20-B3 (Task #218): also set process.env.LANGUAGE so the
+  // engine's _overlayWorkspaceEnv() penvWon check honors the CLI flag.
+  // Pre-v0.8.3, workspace .env LANGUAGE=en would overwrite a CLI --zh
+  // override during engine construction because the overlay only
+  // checked process.env, not in-memory config.language.
   if (languageOverride) {
     config.language = languageOverride;
+    process.env.LANGUAGE = languageOverride;
   }
   if (!config.llmApiKey) {
@@ -821,6 +930,14 @@ export async function main({ languageOverride } = {}) {
   };
   process.on("SIGINT", saveOnExit);
   process.on("SIGTERM", saveOnExit);
+  // v0.8.1 P8-B: SIGHUP coverage. E2E #11 found macOS sends signals to
+  // descendant processes when a Terminal.app window closes or quits;
+  // nohup masks SIGHUP but not SIGTERM, and we already cover SIGTERM.
+  // Adding SIGHUP makes the kc-beta process robust against terminal
+  // teardown even if it's not nohup'd. Without this, a closed terminal
+  // can leave KC half-shut-down (events.jsonl flushed, but no
+  // marathon_detach event, no clean session-state save).
+  process.on("SIGHUP", saveOnExit);
   const instance = render(h(App, { engine, config }));
   await instance.waitUntilExit();

package/src/config.js CHANGED Viewed

@@ -21,7 +21,11 @@ function loadGlobalConfig() {
  * Parse a .env file into a key-value object.
  * Handles KEY=VALUE lines, ignores comments and blank lines.
  */
-function loadEnvFile(envPath) {
+// v0.8 P1-B: exported so engine.js can re-overlay workspace .env after
+// the workspace directory is known (cli/index.js calls loadSettings()
+// without a workspace path because the path isn't known until the engine
+// constructs the Workspace object).
+export function loadEnvFile(envPath) {
   if (!fs.existsSync(envPath)) return {};
   // v0.7.0 H9: defend bootstrap against a .env that exists but isn't
   // readable (permission denied, unexpected directory, encoding error,
@@ -110,7 +114,20 @@ export function loadSettings(workspacePath) {
     // Workspace (process.env wins — for parallel benchmark runs)
     kcWorkspaceRoot: penv.KC_WORKSPACE_ROOT || gc.workspace_root || path.join(os.homedir(), ".kc_agent", "workspaces"),
-    kcExecTimeout: parseInt(env.KC_EXEC_TIMEOUT || "30", 10),
+    // v0.8 P1-F sandbox_exec timeout model. Default 120s (Claude Code parity),
+    // max 600s (10 min) ceiling. Agent can pass per-call timeout_ms up to max.
+    // Legacy KC_EXEC_TIMEOUT (seconds) accepted as deprecation alias for default.
+    kcExecDefaultTimeoutMs: parseInt(
+      env.KC_EXEC_DEFAULT_TIMEOUT_MS ||
+      (env.KC_EXEC_TIMEOUT ? String(parseInt(env.KC_EXEC_TIMEOUT, 10) * 1000) : "") ||
+      "120000",
+      10,
+    ),
+    kcExecMaxTimeoutMs: parseInt(env.KC_EXEC_MAX_TIMEOUT_MS || "600000", 10),
+    // Legacy alias kept for any consumer reading it directly. Computed
+    // from the new ms-based field for consistency. New code should read
+    // kcExecDefaultTimeoutMs / kcExecMaxTimeoutMs.
+    kcExecTimeout: parseInt(env.KC_EXEC_TIMEOUT || "120", 10),
     // Accuracy thresholds
     skillAccuracy: parseFloat(env.SKILL_ACCURACY || gc.accuracy_threshold?.toString() || "0.9"),