npm - jeo-code - Versions diffs - 0.4.5 → 0.4.7 - Mend

jeo-code 0.4.5 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/README.ja.md +2 -2
package/README.ko.md +2 -2
package/README.md +2 -2
package/README.zh.md +2 -2
package/package.json +1 -1
package/src/agent/dev/evolution-bridge.ts +36 -3
package/src/agent/dev/self-analysis.ts +6 -1
package/src/agent/engine.ts +76 -71
package/src/agent/loop.ts +2 -0
package/src/agent/step-budget.ts +10 -0
package/src/agent/subagent-registry.ts +131 -0
package/src/agent/subagent-tool.ts +89 -0
package/src/agent/subagents.ts +22 -3
package/src/agent/task-tool.ts +123 -19
package/src/agent/tool-output.ts +115 -0
package/src/agent/tools.ts +42 -8
package/src/ai/model-manager.ts +9 -14
package/src/ai/model-registry.ts +8 -3
package/src/ai/providers/antigravity.ts +11 -2
package/src/ai/providers/gemini.ts +12 -2
package/src/ai/register-providers.ts +21 -0
package/src/ai/types.ts +4 -0
package/src/cli/runner.ts +0 -9
package/src/commands/launch.ts +157 -52
package/src/commands/team.ts +13 -6
package/src/skills/catalog.ts +0 -2
package/src/tui/app.ts +131 -20
package/src/tui/components/forge.ts +25 -7
package/src/tui/components/input-box.ts +8 -3
package/src/tui/components/markdown-text.ts +10 -1
package/src/tui/components/themes.ts +57 -1
package/src/tui/components/todo-card.ts +44 -13
package/src/tui/monitoring/hud-view.ts +53 -30
package/src/util/update-check.ts +53 -0
package/src/commands/gjc.ts +0 -52
package/src/prompts/skills/gjc/AGENTS.md +0 -31
package/src/prompts/skills/gjc/SKILL.md +0 -15

package/README.ja.md CHANGED Viewed

@@ -150,11 +150,11 @@ CI は `.github/workflows/npm-publish.yml` で公開します — GitHub リリ
 ## 変更履歴 (Changelog)
 <!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
+- **[0.4.7]** (2026-06-14) — Detached subagents + `subagent` control tool, live shaded in-flight output, registry-driven providers, fuller `read` budget, styled italics in the final report, and `gjc` retired.
+- **[0.4.6]** (2026-06-14) — Width-correct forge cards for CJK/emoji, red borders on failed tool cards, aligned `ooo ralph` monitor HUD, and a per-theme user-card palette.
 - **[0.4.5]** (2026-06-14) — First-class filesystem make/remove tools.
 - **[0.4.4]** (2026-06-13) — Live subagent status mirroring, always-useful Ctrl+O activity tail, read lineRange crash guard.
 - **[0.4.3]** (2026-06-13) — Readability pass for autopilot, subagent activity, and worked-history review.
-- **[0.4.2]** (2026-06-13) — Thinking-loop termination guarantees (cycle guard + turn wall-clock budget), unboxed live status without step counters, self-contained `.jeo` namespace, live next-prompt input card, role-targeted model/thinking picker.
-- **[0.4.1]** (2026-06-12) — TUI card parity polish + done-time todo reconciliation.
 See [CHANGELOG.md](CHANGELOG.md) for the full history.
 <!-- CHANGELOG:END -->

package/README.ko.md CHANGED Viewed

@@ -150,11 +150,11 @@ CI는 `.github/workflows/npm-publish.yml`로 배포합니다 — GitHub 릴리
 ## 변경 이력 (Changelog)
 <!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
+- **[0.4.7]** (2026-06-14) — Detached subagents + `subagent` control tool, live shaded in-flight output, registry-driven providers, fuller `read` budget, styled italics in the final report, and `gjc` retired.
+- **[0.4.6]** (2026-06-14) — Width-correct forge cards for CJK/emoji, red borders on failed tool cards, aligned `ooo ralph` monitor HUD, and a per-theme user-card palette.
 - **[0.4.5]** (2026-06-14) — First-class filesystem make/remove tools.
 - **[0.4.4]** (2026-06-13) — Live subagent status mirroring, always-useful Ctrl+O activity tail, read lineRange crash guard.
 - **[0.4.3]** (2026-06-13) — Readability pass for autopilot, subagent activity, and worked-history review.
-- **[0.4.2]** (2026-06-13) — Thinking-loop termination guarantees (cycle guard + turn wall-clock budget), unboxed live status without step counters, self-contained `.jeo` namespace, live next-prompt input card, role-targeted model/thinking picker.
-- **[0.4.1]** (2026-06-12) — TUI card parity polish + done-time todo reconciliation.
 See [CHANGELOG.md](CHANGELOG.md) for the full history.
 <!-- CHANGELOG:END -->

package/README.md CHANGED Viewed

@@ -150,11 +150,11 @@ Required npm token permissions (repository secret `NPM_TOKEN`):
 ## Changelog
 <!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
+- **[0.4.7]** (2026-06-14) — Detached subagents + `subagent` control tool, live shaded in-flight output, registry-driven providers, fuller `read` budget, styled italics in the final report, and `gjc` retired.
+- **[0.4.6]** (2026-06-14) — Width-correct forge cards for CJK/emoji, red borders on failed tool cards, aligned `ooo ralph` monitor HUD, and a per-theme user-card palette.
 - **[0.4.5]** (2026-06-14) — First-class filesystem make/remove tools.
 - **[0.4.4]** (2026-06-13) — Live subagent status mirroring, always-useful Ctrl+O activity tail, read lineRange crash guard.
 - **[0.4.3]** (2026-06-13) — Readability pass for autopilot, subagent activity, and worked-history review.
-- **[0.4.2]** (2026-06-13) — Thinking-loop termination guarantees (cycle guard + turn wall-clock budget), unboxed live status without step counters, self-contained `.jeo` namespace, live next-prompt input card, role-targeted model/thinking picker.
-- **[0.4.1]** (2026-06-12) — TUI card parity polish + done-time todo reconciliation.
 See [CHANGELOG.md](CHANGELOG.md) for the full history.
 <!-- CHANGELOG:END -->

package/README.zh.md CHANGED Viewed

@@ -150,11 +150,11 @@ CI 通过 `.github/workflows/npm-publish.yml` 发布 — GitHub 发布 release
 ## 更新日志 (Changelog)
 <!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
+- **[0.4.7]** (2026-06-14) — Detached subagents + `subagent` control tool, live shaded in-flight output, registry-driven providers, fuller `read` budget, styled italics in the final report, and `gjc` retired.
+- **[0.4.6]** (2026-06-14) — Width-correct forge cards for CJK/emoji, red borders on failed tool cards, aligned `ooo ralph` monitor HUD, and a per-theme user-card palette.
 - **[0.4.5]** (2026-06-14) — First-class filesystem make/remove tools.
 - **[0.4.4]** (2026-06-13) — Live subagent status mirroring, always-useful Ctrl+O activity tail, read lineRange crash guard.
 - **[0.4.3]** (2026-06-13) — Readability pass for autopilot, subagent activity, and worked-history review.
-- **[0.4.2]** (2026-06-13) — Thinking-loop termination guarantees (cycle guard + turn wall-clock budget), unboxed live status without step counters, self-contained `.jeo` namespace, live next-prompt input card, role-targeted model/thinking picker.
-- **[0.4.1]** (2026-06-12) — TUI card parity polish + done-time todo reconciliation.
 See [CHANGELOG.md](CHANGELOG.md) for the full history.
 <!-- CHANGELOG:END -->

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "jeo-code",
-  "version": "0.4.5",
+  "version": "0.4.7",
   "description": "Clean, highly optimized AI coding agent using spec-first loop",
   "type": "module",
   "main": "src/cli.ts",

package/src/agent/dev/evolution-bridge.ts CHANGED Viewed

@@ -1,7 +1,40 @@
-import { runGjcCommand } from "../../commands/gjc";
+import { runAgentLoop, executorSystemPrompt, DEFAULT_TOOLS } from "../engine";
+import { readGlobalConfig } from "../state";
+import { runPostImplementationHooks } from "../hooks";
 import { runAdvancedAnalysis } from "./advanced-analyzer";
 import { logEvolution } from "./evolution-logger";
+async function runEvolutionLoop(intent: string, cwd: string): Promise<void> {
+  const config = await readGlobalConfig();
+  const model = config.defaultModel || "fast";
+  const systemPrompt = executorSystemPrompt();
+  await runAgentLoop([{ role: "user", content: intent }], {
+    cwd,
+    systemPrompt,
+    model,
+    tools: DEFAULT_TOOLS,
+    maxSteps: 50,
+  });
+  console.log("\n[jeo] Verifying implementation...");
+  const verify = await runPostImplementationHooks(cwd, intent);
+  if (!verify.success) {
+    console.error("\n[jeo] Verification FAILED. Auto-repairing...");
+    const repairTask = `Previous implementation failed verification.\nErrors:\n${verify.output}\n\nPlease fix.`;
+    await runAgentLoop([{ role: "user", content: repairTask }], {
+      cwd,
+      systemPrompt,
+      model,
+      tools: DEFAULT_TOOLS,
+      maxSteps: 30,
+    });
+  } else {
+    console.log("\n[jeo] Verification SUCCESSFUL.");
+  }
+}
 export async function consultGjcForAdvancedEvolution(cwd: string) {
   const report = await runAdvancedAnalysis(cwd);
   const timestamp = new Date().toISOString();
@@ -27,7 +60,7 @@ As my implementation guide (gjc), please:
   `;
   try {
-    await runGjcCommand([request]);
+    await runEvolutionLoop(request, cwd);
     await logEvolution({
       timestamp: new Date().toISOString(),
@@ -62,7 +95,7 @@ export async function consultGjcForEvolution(cwd: string) {
   console.log();
   try {
-    await runGjcCommand([report]);
+    await runEvolutionLoop(report, cwd);
     await logEvolution({
       timestamp: new Date().toISOString(),
       target: "src/agent/engine.ts",

package/src/agent/dev/self-analysis.ts CHANGED Viewed

@@ -22,7 +22,12 @@ export async function runSelfAnalysis(cwd: string): Promise<string> {
   const content = await fs.readFile(targetPath, "utf-8");
   const lineCount = content.split("\n").length;
-  const hasTooManyResponsibilities = content.includes("runAgentLoop") && content.includes("truncateToolOutput") && content.includes("spillToolResult");
+  // Ownership-accurate SRP check: the loop drives steps, while output shaping
+  // (truncate/spill) lives in tool-output.ts. Flag only when those are DEFINED
+  // here again, not merely imported or re-exported for backward compatibility.
+  const definesOutputShaping =
+    /\bfunction\s+truncateToolOutput\b/.test(content) && /\bfunction\s+spillToolResult\b/.test(content);
+  const hasTooManyResponsibilities = content.includes("runAgentLoop") && definesOutputShaping;
   let report = "Analysis of src/agent/engine.ts:\n";
   report += "- File length: " + lineCount + " lines.\n";

package/src/agent/engine.ts CHANGED Viewed

@@ -16,7 +16,8 @@ import { webSearchTool, setWebSearchActiveModel } from "./web-search";
 import { friendlyProviderError, isContextOverflowError, isRefusalError } from "../util/provider-error";
 import { isRateLimitError } from "../util/retry";
 import { runPreToolHooks, runPostTurnHooks } from "./hooks";
-import { minimizeToolOutput } from "./output-minimizer";
+import { truncateToolOutput, formatToolResultBody } from "./tool-output";
+export { TOOL_OUTPUT_MAX, READ_OUTPUT_MAX, TOOL_SPILL_THRESHOLD, MAX_TOOL_ARTIFACTS, truncateToolOutput, spillToolResult } from "./tool-output";
 import { StepBudget, dynamicStepBudgetConfig, resolveStepBudgetConfig, hashSignature, type StepBudgetConfig } from "./step-budget";
 import { historyTokens, trimToolResultsInPlace } from "./compaction";
 import { jeoEnv } from "../util/env";
@@ -30,6 +31,7 @@ async function invokeCallLlm(history: Message[], options: {
   onUsage?: (u: { inputTokens?: number; outputTokens?: number }) => void;
   onRetry?: (attempt: number, err: unknown, delayMs: number) => void;
   onToken?: (delta: string) => void;
+  onReasoning?: (delta: string) => void;
 }): Promise<string> {
   const mod = await import("./loop");
   return mod.callLlm(history, options);
@@ -39,14 +41,14 @@ export interface ToolInvocation {
   arguments?: Record<string, any>;
 }
-export type ToolHandler = (args: Record<string, any>, cwd: string) => Promise<ToolResult>;
+export type ToolHandler = (args: Record<string, any>, cwd: string, onProgress?: (partialOutput: string) => void) => Promise<ToolResult>;
-/** The default executor toolset (read / write / edit / bash / find / search). */
+/** The default executor toolset (read / write / edit / bash / find / search / ls / mkdir / delete / web_search). */
 export const DEFAULT_TOOLS: Record<string, ToolHandler> = {
   read: (a, cwd) => readTool(a.filePath ?? a.path, a.lineRange ?? a.range, cwd, !!a.raw),
   write: (a, cwd) => writeTool(a.filePath ?? a.path, a.content ?? "", cwd),
   edit: (a, cwd) => editTool(a.filePath ?? a.path, a.editBlock ?? a.edit ?? "", cwd),
-  bash: (a, cwd) => bashTool(a.command ?? a.cmd, cwd, typeof a.timeoutMs === "number" ? a.timeoutMs : undefined, typeof a.cwd === "string" ? a.cwd : (typeof a.subdir === "string" ? a.subdir : undefined), a.env && typeof a.env === "object" ? a.env : undefined),
+  bash: (a, cwd, onProgress) => bashTool(a.command ?? a.cmd, cwd, typeof a.timeoutMs === "number" ? a.timeoutMs : undefined, typeof a.cwd === "string" ? a.cwd : (typeof a.subdir === "string" ? a.subdir : undefined), a.env && typeof a.env === "object" ? a.env : undefined, onProgress),
   find: (a, cwd) => findTool(a.globPattern ?? a.pattern, cwd),
   search: (a, cwd) => searchTool(a.pattern, a.globPattern ?? "*", cwd, !!(a.ignoreCase ?? a.i), { before: a.before, after: a.after, context: a.context, maxMatches: a.maxMatches }),
   ls: (a, cwd) => lsTool(a.dirPath ?? a.path ?? a.dir ?? ".", cwd),
@@ -134,6 +136,10 @@ export interface AgentLoopEvents {
   onStep?(step: number): void | Promise<void>;
   onAssistant?(raw: string, invocation: ToolInvocation | null): void;
   onToolResult?(tool: string, success: boolean, output: string): void;
+  /** Streaming partial output of the currently-running tool (e.g. bash stdout as it
+   *  arrives) — drives a live DIMMED output view that the final formatted result
+   *  replaces on onToolResult. Only bash emits today; other tools are unaffected. */
+  onToolProgress?(tool: string, partial: string): void;
   /** Transient progress notice (e.g. "rate limited — retrying in Ns"); NOT a terminal error. */
   onNotice?(message: string): void;
   /** Cumulative token usage after each LLM call — drives live usage meters. */
@@ -141,6 +147,9 @@ export interface AgentLoopEvents {
   /** Accumulated streamed model response so far — drives the live reasoning view. Only
    *  requested when a consumer sets it (the engine streams solely for the TUI). */
   onModelStream?(textSoFar: string): void;
+  /** Accumulated native reasoning/thinking text so far — drives a transient dimmed
+   *  "thinking" view. Only requested when a consumer (TUI) attaches. */
+  onReasoningStream?(textSoFar: string): void;
   /** Step-budget change (gjc-style retry flow): the limit was extended because the
    *  turn is making progress. `limit` is the new max; `reason` is display-ready. */
   onBudget?(limit: number, reason: string): void;
@@ -149,6 +158,10 @@ export interface AgentLoopEvents {
    *  first"); return null to let the turn finish. The engine guarantees at most
    *  one bounce per turn, so a stubborn model can never loop here. */
   onBeforeDone?(reason: string): string | null;
+  /** Fired when a mid-turn steering message (an additional user query typed while
+   *  the turn is running) is injected into the live history. `text` is the raw
+   *  user line — drives a TUI notice so the user sees their input was picked up. */
+  onSteer?(text: string): void;
 }
 export interface AgentLoopOptions {
@@ -173,6 +186,11 @@ export interface AgentLoopOptions {
   /** Step-budget overrides (gjc-style retry flow). `{ maxExtensions: 0 }` restores the
    *  legacy fixed counter — used by bounded subagent delegation. */
   budget?: Partial<StepBudgetConfig>;
+  /** Mid-turn steering drain (gjc parity): called at each step boundary. Any strings
+   *  returned are appended to `history` as user messages BEFORE the next model call,
+   *  so an additional query typed while the turn runs steers the live turn instead of
+   *  waiting for the next prompt. Return [] when nothing is pending. */
+  steer?: () => string[];
 }
 export interface AgentLoopResult {
@@ -183,14 +201,6 @@ export interface AgentLoopResult {
   usage?: { inputTokens: number; outputTokens: number };
 }
-/** Env-tunable output budget (plan/gjc-inheritance.md B10, gjc settings-driven
- *  output handling 계승): JEO_TOOL_OUTPUT_MAX caps the model-visible tool result;
- *  the spill threshold tracks it so anything truncated stays artifact-recoverable. */
-function envOutputMax(): number {
-  const raw = Number(jeoEnv("TOOL_OUTPUT_MAX") ?? "");
-  return Number.isFinite(raw) && raw >= 500 && raw <= 200_000 ? Math.trunc(raw) : 4_000;
-}
-export const TOOL_OUTPUT_MAX = envOutputMax();
 /** Wall-clock budget for ONE agent turn (ms). JEO_TURN_MAX_MS overrides; 0 disables.
  *  Default 30 minutes: long autonomous runs stay alive, while a turn that spins in
@@ -205,55 +215,6 @@ export function turnMaxMs(env: Record<string, string | undefined> = process.env)
   return 30 * 60 * 1000;
 }
-/**
- * Cap a tool result fed back to the model, keeping both ends: the head holds the
- * start (e.g. a file's top / a command's invocation) and the tail holds what's
- * usually decisive (test summaries, the final error). A pure head-cut loses that.
- */
-export function truncateToolOutput(s: string, max = TOOL_OUTPUT_MAX): string {
-  if (s.length <= max) return s;
-  const head = Math.floor(max * 0.6);
-  const tail = max - head;
-  return `${s.slice(0, head)}\n…(${s.length - max} chars truncated)…\n${s.slice(s.length - tail)}`;
-}
-/** Tool output larger than this is spilled to a recoverable artifact file. Aligned
- *  with `truncateToolOutput`'s cap so that whenever the model-visible result drops
- *  content, the full output is recoverable via the artifact. */
-export const TOOL_SPILL_THRESHOLD = TOOL_OUTPUT_MAX;
-/**
- * Write an oversized tool result verbatim under `.jeo/artifacts/tool-results/` and
- * return the workspace-relative path (for the model to `read`). Best-effort: throws
- * are caught by the caller, which simply omits the artifact note.
- */
-/** Most recent tool-result artifacts to keep; older ones are pruned on each spill. */
-export const MAX_TOOL_ARTIFACTS = 50;
-/** Best-effort retention: keep the newest `MAX_TOOL_ARTIFACTS` files in `dir`, delete the rest. */
-async function pruneToolArtifacts(dir: string): Promise<void> {
-  const files = await fs.readdir(dir).catch(() => [] as string[]);
-  if (files.length <= MAX_TOOL_ARTIFACTS) return;
-  const stamped = await Promise.all(
-    files.map(async f => ({ f, m: (await fs.stat(path.join(dir, f)).catch(() => null))?.mtimeMs ?? 0 })),
-  );
-  stamped.sort((a, b) => b.m - a.m); // newest first
-  for (const { f } of stamped.slice(MAX_TOOL_ARTIFACTS)) {
-    await fs.rm(path.join(dir, f), { force: true }).catch(() => {});
-  }
-}
-export async function spillToolResult(tool: string, output: string, cwd: string): Promise<string> {
-  const dir = path.join(cwd, ".jeo", "artifacts", "tool-results");
-  await fs.mkdir(dir, { recursive: true });
-  const safeTool = tool.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 32) || "tool";
-  const stamp = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
-  const rel = path.join(".jeo", "artifacts", "tool-results", `${stamp}-${safeTool}.txt`);
-  await fs.writeFile(path.join(cwd, rel), output, "utf-8");
-  // Retention so a long session can't grow the artifact dir without bound.
-  await pruneToolArtifacts(dir);
-  return rel;
-}
 /** Levenshtein distance (small inputs: tool/command names). */
 function editDistance(a: string, b: string): number {
@@ -400,6 +361,29 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
     }
     await ev.onStep?.(step);
+    // MID-TURN steering (gjc parity): drain any additional user queries typed while
+    // the turn is running and inject them as user messages BEFORE this step's model
+    // call, so the live turn adapts immediately instead of deferring to the next
+    // prompt. A genuine new instruction resets the stall/failure guards (it is fresh
+    // progress, not a repeat) and earns a budget extension so the loop has room to act.
+    if (opts.steer) {
+      const pending = opts.steer();
+      for (const raw of pending) {
+        const text = (raw ?? "").trim();
+        if (!text) continue;
+        history.push({
+          role: "user",
+          content: `[mid-turn steering — additional instruction from the user; incorporate it now]\n${text}`,
+        });
+        ev.onSteer?.(text);
+        repeatCount = 0;
+        lastSig = "";
+        consecutiveFailures = 0;
+        recentStepSigs.length = 0;
+        budget.noteSteer?.();
+      }
+    }
     // MID-TURN context guard: a single long turn (60+ steps) otherwise grows the
     // history without bound — turn-boundary compaction never runs inside a turn,
     // and field evidence shows multi-million-token prompts degrading the model
@@ -421,6 +405,10 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
     const onToken = ev.onModelStream
       ? (delta: string) => { streamBuf += delta; ev.onModelStream!(streamBuf); }
       : undefined;
+    let reasonBuf = "";
+    const onReasoning = ev.onReasoningStream
+      ? (delta: string) => { reasonBuf += delta; ev.onReasoningStream!(reasonBuf); }
+      : undefined;
     let responseText: string;
     try {
       responseText = await invokeCallLlm(history, {
@@ -430,6 +418,7 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
               signal: opts.signal,
               onUsage: u => { acc.inputTokens += u.inputTokens ?? 0; acc.outputTokens += u.outputTokens ?? 0; sawUsage = true; },
               onToken,
+              onReasoning,
               // Make provider auto-retry visible: previously a rate-limited call sat in a
               // silent backoff wait, then surfaced "auto-retry was exhausted" with no trace
               // of the retries that DID happen.
@@ -623,6 +612,29 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
           continue;
         }
       }
+      // Steering that arrived DURING this final step (after the top-of-loop drain,
+      // while the model was generating its `done`): reopen the turn and handle it now
+      // instead of letting it bounce to the next prompt. Bounded by the step/time budget.
+      if (opts.steer) {
+        const pending = opts.steer().map(s => (s ?? "").trim()).filter(Boolean);
+        if (pending.length) {
+          history.push({ role: "assistant", content: responseText });
+          for (const text of pending) {
+            history.push({
+              role: "user",
+              content: `[mid-turn steering — additional instruction from the user; incorporate it now before finishing]\n${text}`,
+            });
+            ev.onSteer?.(text);
+          }
+          repeatCount = 0;
+          lastSig = "";
+          consecutiveFailures = 0;
+          recentStepSigs.length = 0;
+          budget.noteSteer();
+          step++;
+          continue;
+        }
+      }
       return finish({ done: true, steps: step, doneReason: (toolCalls[0].arguments?.reason as string) ?? "" });
     }
@@ -728,7 +740,8 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
             output = preHookResult.error + (preHookResult.output ? `\n${preHookResult.output}` : "");
           } else {
             try {
-              const res = await handler(args ?? {}, cwd);
+              const onProgress = ev.onToolProgress ? (partial: string) => ev.onToolProgress!(tool, partial) : undefined;
+              const res = await handler(args ?? {}, cwd, onProgress);
               success = res.success;
               output = res.success ? res.output : (res.error ? (res.output ? `${res.error}\n${res.output}` : res.error) : res.output);
             } catch (err: any) {
@@ -820,15 +833,7 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
         ev.onToolResult?.(call.tool, res.success, res.output);
-        const minimized = minimizeToolOutput(res.output, call.tool);
-        const visible = minimized.text;
-        let resultBody = truncateToolOutput(visible);
-        if (res.output.length > TOOL_SPILL_THRESHOLD) {
-          const artifact = await spillToolResult(call.tool, res.output, cwd).catch(() => null);
-          if (artifact) {
-            resultBody += `\n[full output (${res.output.length} chars) saved to ${artifact} — read it for the elided middle]`;
-          }
-        }
+        const resultBody = await formatToolResultBody(call.tool, res.output, cwd);
         const { diags: hookDiags, ran: hooksRan } = await runPostTurnHooks(
           cwd,

package/src/agent/loop.ts CHANGED Viewed

@@ -19,6 +19,8 @@ export interface ChatOptions {
    *  delivered here (concatenation equals the returned string). Absent ⇒ a single
    *  non-streaming `call()` (unchanged behavior for non-interactive/test callers). */
   onToken?: (delta: string) => void;
+  /** Streaming sink for native reasoning/thinking deltas (drives the dimmed live view). */
+  onReasoning?: (delta: string) => void;
 }
 const manager = createModelManager();

package/src/agent/step-budget.ts CHANGED Viewed

@@ -180,6 +180,16 @@ export class StepBudget {
     if (this.window.length > this.cfg.windowSize) this.window.shift();
   }
+  /** A mid-turn steering message arrived — fresh, user-driven work. Grant headroom
+   *  (capped at the hard cap, without consuming the extension budget) and clear the
+   *  scoring window so the new instruction is never declined by the previous
+   *  sub-task's stall/failure signals. */
+  noteSteer(): void {
+    this.window.length = 0;
+    this.novelSinceExtension = 0;
+    this.currentLimit = Math.min(this.currentLimit + this.cfg.extensionSteps, this.cfg.hardCap);
+  }
   /** Progress over the recent window: ok count, total, distinct signatures. */
   progress(): { ok: number; total: number; distinct: number } {
     const ok = this.window.filter(r => r.success).length;

package/src/agent/subagent-registry.ts ADDED Viewed

@@ -0,0 +1,131 @@
+/**
+ * In-process detached-subagent registry (gjc `subagent`/`job` parity, scoped down
+ * to one runtime). The synchronous `task` tool blocks the parent until a subagent
+ * finishes; a DETACHED launch registers the run here and returns immediately, so
+ * the parent can keep working and later list / inspect / await / cancel it via the
+ * `subagent` control tool. Concurrency is real (JS event loop): a detached run's
+ * awaits interleave with the parent's between steps.
+ *
+ * Lifecycle is bounded to the turn that created the registry — `cancelAll()` on
+ * turn teardown guarantees no background promise leaks into the next turn.
+ */
+import type { ToolResult } from "./tools";
+export type SubagentStatus = "running" | "completed" | "failed" | "cancelled";
+export interface SubagentRecord {
+  /** Stable id, e.g. "executor-1". */
+  id: string;
+  role: string;
+  /** The assignment text (trimmed for display). */
+  task: string;
+  status: SubagentStatus;
+  startedAt: number;
+  finishedAt?: number;
+  /** Whether the finished run reported success (contract satisfied). */
+  success?: boolean;
+  /** Final subagent report/output, set once the run settles. */
+  result?: string;
+}
+interface Entry {
+  record: SubagentRecord;
+  promise: Promise<void>;
+  abort: AbortController;
+}
+/** A detached run: receives its own AbortSignal and resolves to the subagent's
+ *  final ToolResult. The runner is responsible for streaming live events itself. */
+export type DetachedRunner = (signal: AbortSignal) => Promise<ToolResult>;
+export class SubagentRegistry {
+  private readonly entries = new Map<string, Entry>();
+  private readonly seq = new Map<string, number>();
+  /** Register and START a detached run; returns the (running) record immediately. */
+  launch(role: string, task: string, runner: DetachedRunner): SubagentRecord {
+    const n = (this.seq.get(role) ?? 0) + 1;
+    this.seq.set(role, n);
+    const id = `${role}-${n}`;
+    const abort = new AbortController();
+    const record: SubagentRecord = {
+      id,
+      role,
+      task: task.length > 200 ? task.slice(0, 197) + "…" : task,
+      status: "running",
+      startedAt: Date.now(),
+    };
+    const promise = (async () => {
+      try {
+        const res = await runner(abort.signal);
+        // A cancel that already fired wins — don't overwrite the terminal state.
+        if (record.status === "cancelled") return;
+        record.status = res.success ? "completed" : "failed";
+        record.success = res.success;
+        record.result = res.output || res.error || "";
+      } catch (err) {
+        if (record.status === "cancelled") return;
+        record.status = "failed";
+        record.result = err instanceof Error ? err.message : String(err);
+      } finally {
+        if (record.finishedAt === undefined) record.finishedAt = Date.now();
+      }
+    })();
+    this.entries.set(id, { record, promise, abort });
+    return record;
+  }
+  list(): SubagentRecord[] {
+    return [...this.entries.values()].map(e => e.record);
+  }
+  get(id: string): SubagentRecord | undefined {
+    return this.entries.get(id)?.record;
+  }
+  running(): SubagentRecord[] {
+    return this.list().filter(r => r.status === "running");
+  }
+  /** Wait for the given ids (or all running, when empty). With `timeoutMs` the wait
+   *  is bounded — unfinished runs simply stay "running" in the returned snapshot. */
+  async awaitIds(ids: string[], timeoutMs?: number): Promise<SubagentRecord[]> {
+    const targets = ids
+      .map(id => this.entries.get(id))
+      .filter((e): e is Entry => e !== undefined);
+    const all = Promise.all(targets.map(e => e.promise)).then(() => {});
+    if (timeoutMs !== undefined && timeoutMs > 0) {
+      let handle: ReturnType<typeof setTimeout> | undefined;
+      const timer = new Promise<void>(resolve => {
+        handle = setTimeout(resolve, timeoutMs);
+      });
+      await Promise.race([all, timer]);
+      if (handle !== undefined) clearTimeout(handle);
+    } else {
+      await all;
+    }
+    return targets.map(e => e.record);
+  }
+  /** Cancel the given ids (or all running, when empty): aborts the run and marks the
+   *  record cancelled. Already-terminal records are returned unchanged. */
+  cancel(ids: string[]): SubagentRecord[] {
+    const out: SubagentRecord[] = [];
+    for (const id of ids) {
+      const e = this.entries.get(id);
+      if (!e) continue;
+      if (e.record.status === "running") {
+        e.record.status = "cancelled";
+        e.record.finishedAt = Date.now();
+        e.abort.abort();
+      }
+      out.push(e.record);
+    }
+    return out;
+  }
+  /** Abort every still-running subagent (turn teardown / Ctrl-C). */
+  cancelAll(): SubagentRecord[] {
+    return this.cancel(this.running().map(r => r.id));
+  }
+}

package/src/agent/subagent-tool.ts ADDED Viewed

@@ -0,0 +1,89 @@
+/**
+ * `subagent` control tool (#9) — the parent's handle on DETACHED subagents launched
+ * via `task {detached:true}`. Mirrors gjc's `subagent`/`job` control surface, scoped
+ * to an in-process registry: list, inspect, await (optionally bounded), and cancel.
+ *
+ * Out of scope here (separate subsystems, not stubbed): live peer messaging (IRC)
+ * and pause/resume — a step-budget loop has no safe mid-step checkpoint to resume
+ * from, so those are intentionally absent rather than faked.
+ */
+import type { ToolHandler } from "./engine";
+import type { ToolResult } from "./tools";
+import type { SubagentRegistry, SubagentRecord } from "./subagent-registry";
+/** One-line protocol description appended to the launch system prompt. */
+export const SUBAGENT_TOOL_PROTOCOL_LINE =
+  `subagent {action:"list"|"inspect"|"await"|"cancel", ids?, timeoutMs?} — control DETACHED ` +
+  `subagents started with task{detached:true}. 'await' blocks (optionally up to timeoutMs ms) and ` +
+  `returns their reports; 'inspect' shows status + result; 'cancel' aborts them. Omit ids to target all running.`;
+function elapsed(rec: SubagentRecord): string {
+  const end = rec.finishedAt ?? Date.now();
+  return `${Math.max(0, Math.round((end - rec.startedAt) / 1000))}s`;
+}
+function rowLine(rec: SubagentRecord): string {
+  return `- ${rec.id} [${rec.status.toUpperCase()}] ${elapsed(rec)} · ${rec.task}`;
+}
+function detailBlock(rec: SubagentRecord): string {
+  const head = rowLine(rec);
+  if (rec.status === "running" || !rec.result) return head;
+  return `${head}\n${rec.result}`;
+}
+function idsOf(args: Record<string, any>): string[] {
+  if (Array.isArray(args.ids)) return args.ids.map((x: unknown) => String(x));
+  if (args.id !== undefined) return [String(args.id)];
+  return [];
+}
+export function createSubagentTool(registry: SubagentRegistry): ToolHandler {
+  return async (args: Record<string, any>, _cwd: string): Promise<ToolResult> => {
+    const action = String(args.action ?? "list").trim().toLowerCase();
+    const ids = idsOf(args);
+    if (action === "list") {
+      const rows = registry.list();
+      if (rows.length === 0) {
+        return { success: true, output: "No detached subagents this turn. Launch one with task {detached:true}." };
+      }
+      const running = rows.filter(r => r.status === "running").length;
+      return { success: true, output: `${rows.length} subagent(s), ${running} running:\n${rows.map(rowLine).join("\n")}` };
+    }
+    if (action === "inspect") {
+      const targets = (ids.length ? ids.map(id => registry.get(id)) : registry.list())
+        .filter((r): r is SubagentRecord => r !== undefined);
+      if (targets.length === 0) {
+        return { success: false, output: "", error: ids.length ? `No subagent matches ${ids.join(", ")}.` : "No detached subagents this turn." };
+      }
+      return { success: true, output: targets.map(detailBlock).join("\n\n") };
+    }
+    if (action === "await") {
+      const targets = ids.length ? ids : registry.running().map(r => r.id);
+      if (targets.length === 0) {
+        return { success: true, output: "No running subagents to await." };
+      }
+      const timeoutMs = typeof args.timeoutMs === "number" && args.timeoutMs > 0 ? args.timeoutMs : undefined;
+      const recs = await registry.awaitIds(targets, timeoutMs);
+      const stillRunning = recs.filter(r => r.status === "running").length;
+      const head = stillRunning > 0
+        ? `Awaited ${recs.length} subagent(s); ${stillRunning} still running after the ${timeoutMs}ms timeout — await again or cancel.`
+        : `Awaited ${recs.length} subagent(s); all settled.`;
+      return { success: stillRunning === 0, output: `${head}\n\n${recs.map(detailBlock).join("\n\n")}` };
+    }
+    if (action === "cancel") {
+      const targets = ids.length ? ids : registry.running().map(r => r.id);
+      if (targets.length === 0) {
+        return { success: true, output: "No running subagents to cancel." };
+      }
+      const recs = registry.cancel(targets);
+      return { success: true, output: `Cancelled ${recs.length} subagent(s):\n${recs.map(rowLine).join("\n")}` };
+    }
+    return { success: false, output: "", error: `Unknown subagent action '${action}'. Use list | inspect | await | cancel.` };
+  };
+}