npm - jeo-code - Versions diffs - 0.4.6 → 0.4.8 - Mend

jeo-code 0.4.6 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/README.ja.md +2 -2
package/README.ko.md +2 -2
package/README.md +2 -2
package/README.zh.md +2 -2
package/package.json +1 -1
package/src/agent/dev/evolution-bridge.ts +36 -3
package/src/agent/dev/self-analysis.ts +6 -1
package/src/agent/engine.ts +21 -71
package/src/agent/loop.ts +2 -0
package/src/agent/subagent-registry.ts +131 -0
package/src/agent/subagent-tool.ts +89 -0
package/src/agent/subagents.ts +22 -3
package/src/agent/task-tool.ts +119 -27
package/src/agent/tool-output.ts +115 -0
package/src/agent/tools.ts +42 -8
package/src/ai/model-manager.ts +2 -11
package/src/ai/providers/antigravity.ts +11 -2
package/src/ai/providers/gemini.ts +12 -2
package/src/ai/register-providers.ts +21 -0
package/src/ai/types.ts +4 -0
package/src/cli/runner.ts +0 -9
package/src/commands/launch.ts +47 -9
package/src/commands/team.ts +13 -6
package/src/skills/catalog.ts +0 -2
package/src/tui/app.ts +120 -14
package/src/tui/components/forge.ts +18 -1
package/src/tui/components/markdown-text.ts +10 -1
package/src/tui/components/themes.ts +46 -0
package/src/tui/components/todo-card.ts +44 -13
package/src/tui/components/width.ts +51 -0
package/src/tui/renderer.ts +38 -12
package/src/util/update-check.ts +53 -0
package/src/commands/gjc.ts +0 -52
package/src/prompts/skills/gjc/AGENTS.md +0 -31
package/src/prompts/skills/gjc/SKILL.md +0 -15

package/README.ja.md CHANGED Viewed

@@ -150,11 +150,11 @@ CI は `.github/workflows/npm-publish.yml` で公開します — GitHub リリ
 ## 変更履歴 (Changelog)
 <!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
+- **[0.4.8]** (2026-06-14) — Live-frame stability: constant-height live turn, renderer self-heal off-by-one fix, and frame-safe child-stdout sanitizing — no more duplicate model bar or torn escapes.
+- **[0.4.7]** (2026-06-14) — Detached subagents + `subagent` control tool, live shaded in-flight output, registry-driven providers, fuller `read` budget, styled italics in the final report, and `gjc` retired.
 - **[0.4.6]** (2026-06-14) — Width-correct forge cards for CJK/emoji, red borders on failed tool cards, aligned `ooo ralph` monitor HUD, and a per-theme user-card palette.
 - **[0.4.5]** (2026-06-14) — First-class filesystem make/remove tools.
 - **[0.4.4]** (2026-06-13) — Live subagent status mirroring, always-useful Ctrl+O activity tail, read lineRange crash guard.
-- **[0.4.3]** (2026-06-13) — Readability pass for autopilot, subagent activity, and worked-history review.
-- **[0.4.2]** (2026-06-13) — Thinking-loop termination guarantees (cycle guard + turn wall-clock budget), unboxed live status without step counters, self-contained `.jeo` namespace, live next-prompt input card, role-targeted model/thinking picker.
 See [CHANGELOG.md](CHANGELOG.md) for the full history.
 <!-- CHANGELOG:END -->

package/README.ko.md CHANGED Viewed

@@ -150,11 +150,11 @@ CI는 `.github/workflows/npm-publish.yml`로 배포합니다 — GitHub 릴리
 ## 변경 이력 (Changelog)
 <!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
+- **[0.4.8]** (2026-06-14) — Live-frame stability: constant-height live turn, renderer self-heal off-by-one fix, and frame-safe child-stdout sanitizing — no more duplicate model bar or torn escapes.
+- **[0.4.7]** (2026-06-14) — Detached subagents + `subagent` control tool, live shaded in-flight output, registry-driven providers, fuller `read` budget, styled italics in the final report, and `gjc` retired.
 - **[0.4.6]** (2026-06-14) — Width-correct forge cards for CJK/emoji, red borders on failed tool cards, aligned `ooo ralph` monitor HUD, and a per-theme user-card palette.
 - **[0.4.5]** (2026-06-14) — First-class filesystem make/remove tools.
 - **[0.4.4]** (2026-06-13) — Live subagent status mirroring, always-useful Ctrl+O activity tail, read lineRange crash guard.
-- **[0.4.3]** (2026-06-13) — Readability pass for autopilot, subagent activity, and worked-history review.
-- **[0.4.2]** (2026-06-13) — Thinking-loop termination guarantees (cycle guard + turn wall-clock budget), unboxed live status without step counters, self-contained `.jeo` namespace, live next-prompt input card, role-targeted model/thinking picker.
 See [CHANGELOG.md](CHANGELOG.md) for the full history.
 <!-- CHANGELOG:END -->

package/README.md CHANGED Viewed

@@ -150,11 +150,11 @@ Required npm token permissions (repository secret `NPM_TOKEN`):
 ## Changelog
 <!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
+- **[0.4.8]** (2026-06-14) — Live-frame stability: constant-height live turn, renderer self-heal off-by-one fix, and frame-safe child-stdout sanitizing — no more duplicate model bar or torn escapes.
+- **[0.4.7]** (2026-06-14) — Detached subagents + `subagent` control tool, live shaded in-flight output, registry-driven providers, fuller `read` budget, styled italics in the final report, and `gjc` retired.
 - **[0.4.6]** (2026-06-14) — Width-correct forge cards for CJK/emoji, red borders on failed tool cards, aligned `ooo ralph` monitor HUD, and a per-theme user-card palette.
 - **[0.4.5]** (2026-06-14) — First-class filesystem make/remove tools.
 - **[0.4.4]** (2026-06-13) — Live subagent status mirroring, always-useful Ctrl+O activity tail, read lineRange crash guard.
-- **[0.4.3]** (2026-06-13) — Readability pass for autopilot, subagent activity, and worked-history review.
-- **[0.4.2]** (2026-06-13) — Thinking-loop termination guarantees (cycle guard + turn wall-clock budget), unboxed live status without step counters, self-contained `.jeo` namespace, live next-prompt input card, role-targeted model/thinking picker.
 See [CHANGELOG.md](CHANGELOG.md) for the full history.
 <!-- CHANGELOG:END -->

package/README.zh.md CHANGED Viewed

@@ -150,11 +150,11 @@ CI 通过 `.github/workflows/npm-publish.yml` 发布 — GitHub 发布 release
 ## 更新日志 (Changelog)
 <!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
+- **[0.4.8]** (2026-06-14) — Live-frame stability: constant-height live turn, renderer self-heal off-by-one fix, and frame-safe child-stdout sanitizing — no more duplicate model bar or torn escapes.
+- **[0.4.7]** (2026-06-14) — Detached subagents + `subagent` control tool, live shaded in-flight output, registry-driven providers, fuller `read` budget, styled italics in the final report, and `gjc` retired.
 - **[0.4.6]** (2026-06-14) — Width-correct forge cards for CJK/emoji, red borders on failed tool cards, aligned `ooo ralph` monitor HUD, and a per-theme user-card palette.
 - **[0.4.5]** (2026-06-14) — First-class filesystem make/remove tools.
 - **[0.4.4]** (2026-06-13) — Live subagent status mirroring, always-useful Ctrl+O activity tail, read lineRange crash guard.
-- **[0.4.3]** (2026-06-13) — Readability pass for autopilot, subagent activity, and worked-history review.
-- **[0.4.2]** (2026-06-13) — Thinking-loop termination guarantees (cycle guard + turn wall-clock budget), unboxed live status without step counters, self-contained `.jeo` namespace, live next-prompt input card, role-targeted model/thinking picker.
 See [CHANGELOG.md](CHANGELOG.md) for the full history.
 <!-- CHANGELOG:END -->

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "jeo-code",
-  "version": "0.4.6",
+  "version": "0.4.8",
   "description": "Clean, highly optimized AI coding agent using spec-first loop",
   "type": "module",
   "main": "src/cli.ts",

package/src/agent/dev/evolution-bridge.ts CHANGED Viewed

@@ -1,7 +1,40 @@
-import { runGjcCommand } from "../../commands/gjc";
+import { runAgentLoop, executorSystemPrompt, DEFAULT_TOOLS } from "../engine";
+import { readGlobalConfig } from "../state";
+import { runPostImplementationHooks } from "../hooks";
 import { runAdvancedAnalysis } from "./advanced-analyzer";
 import { logEvolution } from "./evolution-logger";
+async function runEvolutionLoop(intent: string, cwd: string): Promise<void> {
+  const config = await readGlobalConfig();
+  const model = config.defaultModel || "fast";
+  const systemPrompt = executorSystemPrompt();
+  await runAgentLoop([{ role: "user", content: intent }], {
+    cwd,
+    systemPrompt,
+    model,
+    tools: DEFAULT_TOOLS,
+    maxSteps: 50,
+  });
+  console.log("\n[jeo] Verifying implementation...");
+  const verify = await runPostImplementationHooks(cwd, intent);
+  if (!verify.success) {
+    console.error("\n[jeo] Verification FAILED. Auto-repairing...");
+    const repairTask = `Previous implementation failed verification.\nErrors:\n${verify.output}\n\nPlease fix.`;
+    await runAgentLoop([{ role: "user", content: repairTask }], {
+      cwd,
+      systemPrompt,
+      model,
+      tools: DEFAULT_TOOLS,
+      maxSteps: 30,
+    });
+  } else {
+    console.log("\n[jeo] Verification SUCCESSFUL.");
+  }
+}
 export async function consultGjcForAdvancedEvolution(cwd: string) {
   const report = await runAdvancedAnalysis(cwd);
   const timestamp = new Date().toISOString();
@@ -27,7 +60,7 @@ As my implementation guide (gjc), please:
   `;
   try {
-    await runGjcCommand([request]);
+    await runEvolutionLoop(request, cwd);
     await logEvolution({
       timestamp: new Date().toISOString(),
@@ -62,7 +95,7 @@ export async function consultGjcForEvolution(cwd: string) {
   console.log();
   try {
-    await runGjcCommand([report]);
+    await runEvolutionLoop(report, cwd);
     await logEvolution({
       timestamp: new Date().toISOString(),
       target: "src/agent/engine.ts",

package/src/agent/dev/self-analysis.ts CHANGED Viewed

@@ -22,7 +22,12 @@ export async function runSelfAnalysis(cwd: string): Promise<string> {
   const content = await fs.readFile(targetPath, "utf-8");
   const lineCount = content.split("\n").length;
-  const hasTooManyResponsibilities = content.includes("runAgentLoop") && content.includes("truncateToolOutput") && content.includes("spillToolResult");
+  // Ownership-accurate SRP check: the loop drives steps, while output shaping
+  // (truncate/spill) lives in tool-output.ts. Flag only when those are DEFINED
+  // here again, not merely imported or re-exported for backward compatibility.
+  const definesOutputShaping =
+    /\bfunction\s+truncateToolOutput\b/.test(content) && /\bfunction\s+spillToolResult\b/.test(content);
+  const hasTooManyResponsibilities = content.includes("runAgentLoop") && definesOutputShaping;
   let report = "Analysis of src/agent/engine.ts:\n";
   report += "- File length: " + lineCount + " lines.\n";

package/src/agent/engine.ts CHANGED Viewed

@@ -16,7 +16,8 @@ import { webSearchTool, setWebSearchActiveModel } from "./web-search";
 import { friendlyProviderError, isContextOverflowError, isRefusalError } from "../util/provider-error";
 import { isRateLimitError } from "../util/retry";
 import { runPreToolHooks, runPostTurnHooks } from "./hooks";
-import { minimizeToolOutput } from "./output-minimizer";
+import { truncateToolOutput, formatToolResultBody } from "./tool-output";
+export { TOOL_OUTPUT_MAX, READ_OUTPUT_MAX, TOOL_SPILL_THRESHOLD, MAX_TOOL_ARTIFACTS, truncateToolOutput, spillToolResult } from "./tool-output";
 import { StepBudget, dynamicStepBudgetConfig, resolveStepBudgetConfig, hashSignature, type StepBudgetConfig } from "./step-budget";
 import { historyTokens, trimToolResultsInPlace } from "./compaction";
 import { jeoEnv } from "../util/env";
@@ -30,6 +31,7 @@ async function invokeCallLlm(history: Message[], options: {
   onUsage?: (u: { inputTokens?: number; outputTokens?: number }) => void;
   onRetry?: (attempt: number, err: unknown, delayMs: number) => void;
   onToken?: (delta: string) => void;
+  onReasoning?: (delta: string) => void;
 }): Promise<string> {
   const mod = await import("./loop");
   return mod.callLlm(history, options);
@@ -39,14 +41,14 @@ export interface ToolInvocation {
   arguments?: Record<string, any>;
 }
-export type ToolHandler = (args: Record<string, any>, cwd: string) => Promise<ToolResult>;
+export type ToolHandler = (args: Record<string, any>, cwd: string, onProgress?: (partialOutput: string) => void) => Promise<ToolResult>;
-/** The default executor toolset (read / write / edit / bash / find / search). */
+/** The default executor toolset (read / write / edit / bash / find / search / ls / mkdir / delete / web_search). */
 export const DEFAULT_TOOLS: Record<string, ToolHandler> = {
   read: (a, cwd) => readTool(a.filePath ?? a.path, a.lineRange ?? a.range, cwd, !!a.raw),
   write: (a, cwd) => writeTool(a.filePath ?? a.path, a.content ?? "", cwd),
   edit: (a, cwd) => editTool(a.filePath ?? a.path, a.editBlock ?? a.edit ?? "", cwd),
-  bash: (a, cwd) => bashTool(a.command ?? a.cmd, cwd, typeof a.timeoutMs === "number" ? a.timeoutMs : undefined, typeof a.cwd === "string" ? a.cwd : (typeof a.subdir === "string" ? a.subdir : undefined), a.env && typeof a.env === "object" ? a.env : undefined),
+  bash: (a, cwd, onProgress) => bashTool(a.command ?? a.cmd, cwd, typeof a.timeoutMs === "number" ? a.timeoutMs : undefined, typeof a.cwd === "string" ? a.cwd : (typeof a.subdir === "string" ? a.subdir : undefined), a.env && typeof a.env === "object" ? a.env : undefined, onProgress),
   find: (a, cwd) => findTool(a.globPattern ?? a.pattern, cwd),
   search: (a, cwd) => searchTool(a.pattern, a.globPattern ?? "*", cwd, !!(a.ignoreCase ?? a.i), { before: a.before, after: a.after, context: a.context, maxMatches: a.maxMatches }),
   ls: (a, cwd) => lsTool(a.dirPath ?? a.path ?? a.dir ?? ".", cwd),
@@ -134,6 +136,10 @@ export interface AgentLoopEvents {
   onStep?(step: number): void | Promise<void>;
   onAssistant?(raw: string, invocation: ToolInvocation | null): void;
   onToolResult?(tool: string, success: boolean, output: string): void;
+  /** Streaming partial output of the currently-running tool (e.g. bash stdout as it
+   *  arrives) — drives a live DIMMED output view that the final formatted result
+   *  replaces on onToolResult. Only bash emits today; other tools are unaffected. */
+  onToolProgress?(tool: string, partial: string): void;
   /** Transient progress notice (e.g. "rate limited — retrying in Ns"); NOT a terminal error. */
   onNotice?(message: string): void;
   /** Cumulative token usage after each LLM call — drives live usage meters. */
@@ -141,6 +147,9 @@ export interface AgentLoopEvents {
   /** Accumulated streamed model response so far — drives the live reasoning view. Only
    *  requested when a consumer sets it (the engine streams solely for the TUI). */
   onModelStream?(textSoFar: string): void;
+  /** Accumulated native reasoning/thinking text so far — drives a transient dimmed
+   *  "thinking" view. Only requested when a consumer (TUI) attaches. */
+  onReasoningStream?(textSoFar: string): void;
   /** Step-budget change (gjc-style retry flow): the limit was extended because the
    *  turn is making progress. `limit` is the new max; `reason` is display-ready. */
   onBudget?(limit: number, reason: string): void;
@@ -192,14 +201,6 @@ export interface AgentLoopResult {
   usage?: { inputTokens: number; outputTokens: number };
 }
-/** Env-tunable output budget (plan/gjc-inheritance.md B10, gjc settings-driven
- *  output handling 계승): JEO_TOOL_OUTPUT_MAX caps the model-visible tool result;
- *  the spill threshold tracks it so anything truncated stays artifact-recoverable. */
-function envOutputMax(): number {
-  const raw = Number(jeoEnv("TOOL_OUTPUT_MAX") ?? "");
-  return Number.isFinite(raw) && raw >= 500 && raw <= 200_000 ? Math.trunc(raw) : 4_000;
-}
-export const TOOL_OUTPUT_MAX = envOutputMax();
 /** Wall-clock budget for ONE agent turn (ms). JEO_TURN_MAX_MS overrides; 0 disables.
  *  Default 30 minutes: long autonomous runs stay alive, while a turn that spins in
@@ -214,55 +215,6 @@ export function turnMaxMs(env: Record<string, string | undefined> = process.env)
   return 30 * 60 * 1000;
 }
-/**
- * Cap a tool result fed back to the model, keeping both ends: the head holds the
- * start (e.g. a file's top / a command's invocation) and the tail holds what's
- * usually decisive (test summaries, the final error). A pure head-cut loses that.
- */
-export function truncateToolOutput(s: string, max = TOOL_OUTPUT_MAX): string {
-  if (s.length <= max) return s;
-  const head = Math.floor(max * 0.6);
-  const tail = max - head;
-  return `${s.slice(0, head)}\n…(${s.length - max} chars truncated)…\n${s.slice(s.length - tail)}`;
-}
-/** Tool output larger than this is spilled to a recoverable artifact file. Aligned
- *  with `truncateToolOutput`'s cap so that whenever the model-visible result drops
- *  content, the full output is recoverable via the artifact. */
-export const TOOL_SPILL_THRESHOLD = TOOL_OUTPUT_MAX;
-/**
- * Write an oversized tool result verbatim under `.jeo/artifacts/tool-results/` and
- * return the workspace-relative path (for the model to `read`). Best-effort: throws
- * are caught by the caller, which simply omits the artifact note.
- */
-/** Most recent tool-result artifacts to keep; older ones are pruned on each spill. */
-export const MAX_TOOL_ARTIFACTS = 50;
-/** Best-effort retention: keep the newest `MAX_TOOL_ARTIFACTS` files in `dir`, delete the rest. */
-async function pruneToolArtifacts(dir: string): Promise<void> {
-  const files = await fs.readdir(dir).catch(() => [] as string[]);
-  if (files.length <= MAX_TOOL_ARTIFACTS) return;
-  const stamped = await Promise.all(
-    files.map(async f => ({ f, m: (await fs.stat(path.join(dir, f)).catch(() => null))?.mtimeMs ?? 0 })),
-  );
-  stamped.sort((a, b) => b.m - a.m); // newest first
-  for (const { f } of stamped.slice(MAX_TOOL_ARTIFACTS)) {
-    await fs.rm(path.join(dir, f), { force: true }).catch(() => {});
-  }
-}
-export async function spillToolResult(tool: string, output: string, cwd: string): Promise<string> {
-  const dir = path.join(cwd, ".jeo", "artifacts", "tool-results");
-  await fs.mkdir(dir, { recursive: true });
-  const safeTool = tool.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 32) || "tool";
-  const stamp = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
-  const rel = path.join(".jeo", "artifacts", "tool-results", `${stamp}-${safeTool}.txt`);
-  await fs.writeFile(path.join(cwd, rel), output, "utf-8");
-  // Retention so a long session can't grow the artifact dir without bound.
-  await pruneToolArtifacts(dir);
-  return rel;
-}
 /** Levenshtein distance (small inputs: tool/command names). */
 function editDistance(a: string, b: string): number {
@@ -453,6 +405,10 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
     const onToken = ev.onModelStream
       ? (delta: string) => { streamBuf += delta; ev.onModelStream!(streamBuf); }
       : undefined;
+    let reasonBuf = "";
+    const onReasoning = ev.onReasoningStream
+      ? (delta: string) => { reasonBuf += delta; ev.onReasoningStream!(reasonBuf); }
+      : undefined;
     let responseText: string;
     try {
       responseText = await invokeCallLlm(history, {
@@ -462,6 +418,7 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
               signal: opts.signal,
               onUsage: u => { acc.inputTokens += u.inputTokens ?? 0; acc.outputTokens += u.outputTokens ?? 0; sawUsage = true; },
               onToken,
+              onReasoning,
               // Make provider auto-retry visible: previously a rate-limited call sat in a
               // silent backoff wait, then surfaced "auto-retry was exhausted" with no trace
               // of the retries that DID happen.
@@ -783,7 +740,8 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
             output = preHookResult.error + (preHookResult.output ? `\n${preHookResult.output}` : "");
           } else {
             try {
-              const res = await handler(args ?? {}, cwd);
+              const onProgress = ev.onToolProgress ? (partial: string) => ev.onToolProgress!(tool, partial) : undefined;
+              const res = await handler(args ?? {}, cwd, onProgress);
               success = res.success;
               output = res.success ? res.output : (res.error ? (res.output ? `${res.error}\n${res.output}` : res.error) : res.output);
             } catch (err: any) {
@@ -875,15 +833,7 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
         ev.onToolResult?.(call.tool, res.success, res.output);
-        const minimized = minimizeToolOutput(res.output, call.tool);
-        const visible = minimized.text;
-        let resultBody = truncateToolOutput(visible);
-        if (res.output.length > TOOL_SPILL_THRESHOLD) {
-          const artifact = await spillToolResult(call.tool, res.output, cwd).catch(() => null);
-          if (artifact) {
-            resultBody += `\n[full output (${res.output.length} chars) saved to ${artifact} — read it for the elided middle]`;
-          }
-        }
+        const resultBody = await formatToolResultBody(call.tool, res.output, cwd);
         const { diags: hookDiags, ran: hooksRan } = await runPostTurnHooks(
           cwd,

package/src/agent/loop.ts CHANGED Viewed

@@ -19,6 +19,8 @@ export interface ChatOptions {
    *  delivered here (concatenation equals the returned string). Absent ⇒ a single
    *  non-streaming `call()` (unchanged behavior for non-interactive/test callers). */
   onToken?: (delta: string) => void;
+  /** Streaming sink for native reasoning/thinking deltas (drives the dimmed live view). */
+  onReasoning?: (delta: string) => void;
 }
 const manager = createModelManager();

package/src/agent/subagent-registry.ts ADDED Viewed

@@ -0,0 +1,131 @@
+/**
+ * In-process detached-subagent registry (gjc `subagent`/`job` parity, scoped down
+ * to one runtime). The synchronous `task` tool blocks the parent until a subagent
+ * finishes; a DETACHED launch registers the run here and returns immediately, so
+ * the parent can keep working and later list / inspect / await / cancel it via the
+ * `subagent` control tool. Concurrency is real (JS event loop): a detached run's
+ * awaits interleave with the parent's between steps.
+ *
+ * Lifecycle is bounded to the turn that created the registry — `cancelAll()` on
+ * turn teardown guarantees no background promise leaks into the next turn.
+ */
+import type { ToolResult } from "./tools";
+export type SubagentStatus = "running" | "completed" | "failed" | "cancelled";
+export interface SubagentRecord {
+  /** Stable id, e.g. "executor-1". */
+  id: string;
+  role: string;
+  /** The assignment text (trimmed for display). */
+  task: string;
+  status: SubagentStatus;
+  startedAt: number;
+  finishedAt?: number;
+  /** Whether the finished run reported success (contract satisfied). */
+  success?: boolean;
+  /** Final subagent report/output, set once the run settles. */
+  result?: string;
+}
+interface Entry {
+  record: SubagentRecord;
+  promise: Promise<void>;
+  abort: AbortController;
+}
+/** A detached run: receives its own AbortSignal and resolves to the subagent's
+ *  final ToolResult. The runner is responsible for streaming live events itself. */
+export type DetachedRunner = (signal: AbortSignal) => Promise<ToolResult>;
+export class SubagentRegistry {
+  private readonly entries = new Map<string, Entry>();
+  private readonly seq = new Map<string, number>();
+  /** Register and START a detached run; returns the (running) record immediately. */
+  launch(role: string, task: string, runner: DetachedRunner): SubagentRecord {
+    const n = (this.seq.get(role) ?? 0) + 1;
+    this.seq.set(role, n);
+    const id = `${role}-${n}`;
+    const abort = new AbortController();
+    const record: SubagentRecord = {
+      id,
+      role,
+      task: task.length > 200 ? task.slice(0, 197) + "…" : task,
+      status: "running",
+      startedAt: Date.now(),
+    };
+    const promise = (async () => {
+      try {
+        const res = await runner(abort.signal);
+        // A cancel that already fired wins — don't overwrite the terminal state.
+        if (record.status === "cancelled") return;
+        record.status = res.success ? "completed" : "failed";
+        record.success = res.success;
+        record.result = res.output || res.error || "";
+      } catch (err) {
+        if (record.status === "cancelled") return;
+        record.status = "failed";
+        record.result = err instanceof Error ? err.message : String(err);
+      } finally {
+        if (record.finishedAt === undefined) record.finishedAt = Date.now();
+      }
+    })();
+    this.entries.set(id, { record, promise, abort });
+    return record;
+  }
+  list(): SubagentRecord[] {
+    return [...this.entries.values()].map(e => e.record);
+  }
+  get(id: string): SubagentRecord | undefined {
+    return this.entries.get(id)?.record;
+  }
+  running(): SubagentRecord[] {
+    return this.list().filter(r => r.status === "running");
+  }
+  /** Wait for the given ids (or all running, when empty). With `timeoutMs` the wait
+   *  is bounded — unfinished runs simply stay "running" in the returned snapshot. */
+  async awaitIds(ids: string[], timeoutMs?: number): Promise<SubagentRecord[]> {
+    const targets = ids
+      .map(id => this.entries.get(id))
+      .filter((e): e is Entry => e !== undefined);
+    const all = Promise.all(targets.map(e => e.promise)).then(() => {});
+    if (timeoutMs !== undefined && timeoutMs > 0) {
+      let handle: ReturnType<typeof setTimeout> | undefined;
+      const timer = new Promise<void>(resolve => {
+        handle = setTimeout(resolve, timeoutMs);
+      });
+      await Promise.race([all, timer]);
+      if (handle !== undefined) clearTimeout(handle);
+    } else {
+      await all;
+    }
+    return targets.map(e => e.record);
+  }
+  /** Cancel the given ids (or all running, when empty): aborts the run and marks the
+   *  record cancelled. Already-terminal records are returned unchanged. */
+  cancel(ids: string[]): SubagentRecord[] {
+    const out: SubagentRecord[] = [];
+    for (const id of ids) {
+      const e = this.entries.get(id);
+      if (!e) continue;
+      if (e.record.status === "running") {
+        e.record.status = "cancelled";
+        e.record.finishedAt = Date.now();
+        e.abort.abort();
+      }
+      out.push(e.record);
+    }
+    return out;
+  }
+  /** Abort every still-running subagent (turn teardown / Ctrl-C). */
+  cancelAll(): SubagentRecord[] {
+    return this.cancel(this.running().map(r => r.id));
+  }
+}

package/src/agent/subagent-tool.ts ADDED Viewed

@@ -0,0 +1,89 @@
+/**
+ * `subagent` control tool (#9) — the parent's handle on DETACHED subagents launched
+ * via `task {detached:true}`. Mirrors gjc's `subagent`/`job` control surface, scoped
+ * to an in-process registry: list, inspect, await (optionally bounded), and cancel.
+ *
+ * Out of scope here (separate subsystems, not stubbed): live peer messaging (IRC)
+ * and pause/resume — a step-budget loop has no safe mid-step checkpoint to resume
+ * from, so those are intentionally absent rather than faked.
+ */
+import type { ToolHandler } from "./engine";
+import type { ToolResult } from "./tools";
+import type { SubagentRegistry, SubagentRecord } from "./subagent-registry";
+/** One-line protocol description appended to the launch system prompt. */
+export const SUBAGENT_TOOL_PROTOCOL_LINE =
+  `subagent {action:"list"|"inspect"|"await"|"cancel", ids?, timeoutMs?} — control DETACHED ` +
+  `subagents started with task{detached:true}. 'await' blocks (optionally up to timeoutMs ms) and ` +
+  `returns their reports; 'inspect' shows status + result; 'cancel' aborts them. Omit ids to target all running.`;
+function elapsed(rec: SubagentRecord): string {
+  const end = rec.finishedAt ?? Date.now();
+  return `${Math.max(0, Math.round((end - rec.startedAt) / 1000))}s`;
+}
+function rowLine(rec: SubagentRecord): string {
+  return `- ${rec.id} [${rec.status.toUpperCase()}] ${elapsed(rec)} · ${rec.task}`;
+}
+function detailBlock(rec: SubagentRecord): string {
+  const head = rowLine(rec);
+  if (rec.status === "running" || !rec.result) return head;
+  return `${head}\n${rec.result}`;
+}
+function idsOf(args: Record<string, any>): string[] {
+  if (Array.isArray(args.ids)) return args.ids.map((x: unknown) => String(x));
+  if (args.id !== undefined) return [String(args.id)];
+  return [];
+}
+export function createSubagentTool(registry: SubagentRegistry): ToolHandler {
+  return async (args: Record<string, any>, _cwd: string): Promise<ToolResult> => {
+    const action = String(args.action ?? "list").trim().toLowerCase();
+    const ids = idsOf(args);
+    if (action === "list") {
+      const rows = registry.list();
+      if (rows.length === 0) {
+        return { success: true, output: "No detached subagents this turn. Launch one with task {detached:true}." };
+      }
+      const running = rows.filter(r => r.status === "running").length;
+      return { success: true, output: `${rows.length} subagent(s), ${running} running:\n${rows.map(rowLine).join("\n")}` };
+    }
+    if (action === "inspect") {
+      const targets = (ids.length ? ids.map(id => registry.get(id)) : registry.list())
+        .filter((r): r is SubagentRecord => r !== undefined);
+      if (targets.length === 0) {
+        return { success: false, output: "", error: ids.length ? `No subagent matches ${ids.join(", ")}.` : "No detached subagents this turn." };
+      }
+      return { success: true, output: targets.map(detailBlock).join("\n\n") };
+    }
+    if (action === "await") {
+      const targets = ids.length ? ids : registry.running().map(r => r.id);
+      if (targets.length === 0) {
+        return { success: true, output: "No running subagents to await." };
+      }
+      const timeoutMs = typeof args.timeoutMs === "number" && args.timeoutMs > 0 ? args.timeoutMs : undefined;
+      const recs = await registry.awaitIds(targets, timeoutMs);
+      const stillRunning = recs.filter(r => r.status === "running").length;
+      const head = stillRunning > 0
+        ? `Awaited ${recs.length} subagent(s); ${stillRunning} still running after the ${timeoutMs}ms timeout — await again or cancel.`
+        : `Awaited ${recs.length} subagent(s); all settled.`;
+      return { success: stillRunning === 0, output: `${head}\n\n${recs.map(detailBlock).join("\n\n")}` };
+    }
+    if (action === "cancel") {
+      const targets = ids.length ? ids : registry.running().map(r => r.id);
+      if (targets.length === 0) {
+        return { success: true, output: "No running subagents to cancel." };
+      }
+      const recs = registry.cancel(targets);
+      return { success: true, output: `Cancelled ${recs.length} subagent(s):\n${recs.map(rowLine).join("\n")}` };
+    }
+    return { success: false, output: "", error: `Unknown subagent action '${action}'. Use list | inspect | await | cancel.` };
+  };
+}

package/src/agent/subagents.ts CHANGED Viewed

@@ -178,20 +178,39 @@ function renderRolePrompt(template: string, role: SubagentRole): string {
     .trim();
 }
+/** True when `marker` is present in `text` AND the span between it and the next
+ *  required marker (or end of text) carries non-whitespace content. A label-only
+ *  section ("Summary:" with an empty body) is not a real report, so it fails. */
+function markerHasContent(text: string, marker: string, allMarkers: string[]): boolean {
+  const start = text.indexOf(marker);
+  if (start < 0) return false;
+  const after = start + marker.length;
+  let end = text.length;
+  for (const other of allMarkers) {
+    if (other === marker) continue;
+    const j = text.indexOf(other, after);
+    if (j >= 0 && j < end) end = j;
+  }
+  return text.slice(after, end).trim().length > 0;
+}
 export function validateSubagentDoneReason(role: SubagentRole, reason: string | undefined): { ok: boolean; missing?: string[] } {
   const trimmed = (reason ?? "").trim();
   if (!trimmed) return { ok: false, missing: ["done.reason"] };
+  const markers = role.requiredDoneMarkers ?? [];
+  // Each required section must be PRESENT and carry non-empty content — a report of
+  // bare labels (no prose) is rejected, which the substring-presence check let pass.
+  const sectionMissing = markers.filter(m => !markerHasContent(trimmed, m, markers));
   if (role.id === "critic") {
     const verdicts = ["[OKAY]", "[ITERATE]", "[REJECT]"];
     const hasVerdict = verdicts.some(marker => trimmed.startsWith(marker));
     const missing = [
       ...(hasVerdict ? [] : ["[OKAY]|[ITERATE]|[REJECT]"]),
-      ...((role.requiredDoneMarkers ?? []).filter(marker => !trimmed.includes(marker))),
+      ...sectionMissing,
     ];
     return { ok: missing.length === 0, missing };
   }
-  const missing = (role.requiredDoneMarkers ?? []).filter(marker => !trimmed.includes(marker));
-  return { ok: missing.length === 0, missing };
+  return { ok: sectionMissing.length === 0, missing: sectionMissing };
 }
 /** Build a role-specific system prompt from its dedicated template. */