jeo-code 0.4.5 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.ja.md CHANGED
@@ -150,11 +150,11 @@ CI は `.github/workflows/npm-publish.yml` で公開します — GitHub リリ
150
150
  ## 変更履歴 (Changelog)
151
151
 
152
152
  <!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
153
+ - **[0.4.7]** (2026-06-14) — Detached subagents + `subagent` control tool, live shaded in-flight output, registry-driven providers, fuller `read` budget, styled italics in the final report, and `gjc` retired.
154
+ - **[0.4.6]** (2026-06-14) — Width-correct forge cards for CJK/emoji, red borders on failed tool cards, aligned `ooo ralph` monitor HUD, and a per-theme user-card palette.
153
155
  - **[0.4.5]** (2026-06-14) — First-class filesystem make/remove tools.
154
156
  - **[0.4.4]** (2026-06-13) — Live subagent status mirroring, always-useful Ctrl+O activity tail, read lineRange crash guard.
155
157
  - **[0.4.3]** (2026-06-13) — Readability pass for autopilot, subagent activity, and worked-history review.
156
- - **[0.4.2]** (2026-06-13) — Thinking-loop termination guarantees (cycle guard + turn wall-clock budget), unboxed live status without step counters, self-contained `.jeo` namespace, live next-prompt input card, role-targeted model/thinking picker.
157
- - **[0.4.1]** (2026-06-12) — TUI card parity polish + done-time todo reconciliation.
158
158
 
159
159
  See [CHANGELOG.md](CHANGELOG.md) for the full history.
160
160
  <!-- CHANGELOG:END -->
package/README.ko.md CHANGED
@@ -150,11 +150,11 @@ CI는 `.github/workflows/npm-publish.yml`로 배포합니다 — GitHub 릴리
150
150
  ## 변경 이력 (Changelog)
151
151
 
152
152
  <!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
153
+ - **[0.4.7]** (2026-06-14) — Detached subagents + `subagent` control tool, live shaded in-flight output, registry-driven providers, fuller `read` budget, styled italics in the final report, and `gjc` retired.
154
+ - **[0.4.6]** (2026-06-14) — Width-correct forge cards for CJK/emoji, red borders on failed tool cards, aligned `ooo ralph` monitor HUD, and a per-theme user-card palette.
153
155
  - **[0.4.5]** (2026-06-14) — First-class filesystem make/remove tools.
154
156
  - **[0.4.4]** (2026-06-13) — Live subagent status mirroring, always-useful Ctrl+O activity tail, read lineRange crash guard.
155
157
  - **[0.4.3]** (2026-06-13) — Readability pass for autopilot, subagent activity, and worked-history review.
156
- - **[0.4.2]** (2026-06-13) — Thinking-loop termination guarantees (cycle guard + turn wall-clock budget), unboxed live status without step counters, self-contained `.jeo` namespace, live next-prompt input card, role-targeted model/thinking picker.
157
- - **[0.4.1]** (2026-06-12) — TUI card parity polish + done-time todo reconciliation.
158
158
 
159
159
  See [CHANGELOG.md](CHANGELOG.md) for the full history.
160
160
  <!-- CHANGELOG:END -->
package/README.md CHANGED
@@ -150,11 +150,11 @@ Required npm token permissions (repository secret `NPM_TOKEN`):
150
150
  ## Changelog
151
151
 
152
152
  <!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
153
+ - **[0.4.7]** (2026-06-14) — Detached subagents + `subagent` control tool, live shaded in-flight output, registry-driven providers, fuller `read` budget, styled italics in the final report, and `gjc` retired.
154
+ - **[0.4.6]** (2026-06-14) — Width-correct forge cards for CJK/emoji, red borders on failed tool cards, aligned `ooo ralph` monitor HUD, and a per-theme user-card palette.
153
155
  - **[0.4.5]** (2026-06-14) — First-class filesystem make/remove tools.
154
156
  - **[0.4.4]** (2026-06-13) — Live subagent status mirroring, always-useful Ctrl+O activity tail, read lineRange crash guard.
155
157
  - **[0.4.3]** (2026-06-13) — Readability pass for autopilot, subagent activity, and worked-history review.
156
- - **[0.4.2]** (2026-06-13) — Thinking-loop termination guarantees (cycle guard + turn wall-clock budget), unboxed live status without step counters, self-contained `.jeo` namespace, live next-prompt input card, role-targeted model/thinking picker.
157
- - **[0.4.1]** (2026-06-12) — TUI card parity polish + done-time todo reconciliation.
158
158
 
159
159
  See [CHANGELOG.md](CHANGELOG.md) for the full history.
160
160
  <!-- CHANGELOG:END -->
package/README.zh.md CHANGED
@@ -150,11 +150,11 @@ CI 通过 `.github/workflows/npm-publish.yml` 发布 — GitHub 发布 release
150
150
  ## 更新日志 (Changelog)
151
151
 
152
152
  <!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
153
+ - **[0.4.7]** (2026-06-14) — Detached subagents + `subagent` control tool, live shaded in-flight output, registry-driven providers, fuller `read` budget, styled italics in the final report, and `gjc` retired.
154
+ - **[0.4.6]** (2026-06-14) — Width-correct forge cards for CJK/emoji, red borders on failed tool cards, aligned `ooo ralph` monitor HUD, and a per-theme user-card palette.
153
155
  - **[0.4.5]** (2026-06-14) — First-class filesystem make/remove tools.
154
156
  - **[0.4.4]** (2026-06-13) — Live subagent status mirroring, always-useful Ctrl+O activity tail, read lineRange crash guard.
155
157
  - **[0.4.3]** (2026-06-13) — Readability pass for autopilot, subagent activity, and worked-history review.
156
- - **[0.4.2]** (2026-06-13) — Thinking-loop termination guarantees (cycle guard + turn wall-clock budget), unboxed live status without step counters, self-contained `.jeo` namespace, live next-prompt input card, role-targeted model/thinking picker.
157
- - **[0.4.1]** (2026-06-12) — TUI card parity polish + done-time todo reconciliation.
158
158
 
159
159
  See [CHANGELOG.md](CHANGELOG.md) for the full history.
160
160
  <!-- CHANGELOG:END -->
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "jeo-code",
3
- "version": "0.4.5",
3
+ "version": "0.4.7",
4
4
  "description": "Clean, highly optimized AI coding agent using spec-first loop",
5
5
  "type": "module",
6
6
  "main": "src/cli.ts",
@@ -1,7 +1,40 @@
1
- import { runGjcCommand } from "../../commands/gjc";
1
+ import { runAgentLoop, executorSystemPrompt, DEFAULT_TOOLS } from "../engine";
2
+ import { readGlobalConfig } from "../state";
3
+ import { runPostImplementationHooks } from "../hooks";
2
4
  import { runAdvancedAnalysis } from "./advanced-analyzer";
3
5
  import { logEvolution } from "./evolution-logger";
4
6
 
7
+ async function runEvolutionLoop(intent: string, cwd: string): Promise<void> {
8
+ const config = await readGlobalConfig();
9
+ const model = config.defaultModel || "fast";
10
+ const systemPrompt = executorSystemPrompt();
11
+
12
+ await runAgentLoop([{ role: "user", content: intent }], {
13
+ cwd,
14
+ systemPrompt,
15
+ model,
16
+ tools: DEFAULT_TOOLS,
17
+ maxSteps: 50,
18
+ });
19
+
20
+ console.log("\n[jeo] Verifying implementation...");
21
+ const verify = await runPostImplementationHooks(cwd, intent);
22
+
23
+ if (!verify.success) {
24
+ console.error("\n[jeo] Verification FAILED. Auto-repairing...");
25
+ const repairTask = `Previous implementation failed verification.\nErrors:\n${verify.output}\n\nPlease fix.`;
26
+ await runAgentLoop([{ role: "user", content: repairTask }], {
27
+ cwd,
28
+ systemPrompt,
29
+ model,
30
+ tools: DEFAULT_TOOLS,
31
+ maxSteps: 30,
32
+ });
33
+ } else {
34
+ console.log("\n[jeo] Verification SUCCESSFUL.");
35
+ }
36
+ }
37
+
5
38
  export async function consultGjcForAdvancedEvolution(cwd: string) {
6
39
  const report = await runAdvancedAnalysis(cwd);
7
40
  const timestamp = new Date().toISOString();
@@ -27,7 +60,7 @@ As my implementation guide (gjc), please:
27
60
  `;
28
61
 
29
62
  try {
30
- await runGjcCommand([request]);
63
+ await runEvolutionLoop(request, cwd);
31
64
 
32
65
  await logEvolution({
33
66
  timestamp: new Date().toISOString(),
@@ -62,7 +95,7 @@ export async function consultGjcForEvolution(cwd: string) {
62
95
 
63
96
  console.log();
64
97
  try {
65
- await runGjcCommand([report]);
98
+ await runEvolutionLoop(report, cwd);
66
99
  await logEvolution({
67
100
  timestamp: new Date().toISOString(),
68
101
  target: "src/agent/engine.ts",
@@ -22,7 +22,12 @@ export async function runSelfAnalysis(cwd: string): Promise<string> {
22
22
  const content = await fs.readFile(targetPath, "utf-8");
23
23
 
24
24
  const lineCount = content.split("\n").length;
25
- const hasTooManyResponsibilities = content.includes("runAgentLoop") && content.includes("truncateToolOutput") && content.includes("spillToolResult");
25
+ // Ownership-accurate SRP check: the loop drives steps, while output shaping
26
+ // (truncate/spill) lives in tool-output.ts. Flag only when those are DEFINED
27
+ // here again, not merely imported or re-exported for backward compatibility.
28
+ const definesOutputShaping =
29
+ /\bfunction\s+truncateToolOutput\b/.test(content) && /\bfunction\s+spillToolResult\b/.test(content);
30
+ const hasTooManyResponsibilities = content.includes("runAgentLoop") && definesOutputShaping;
26
31
 
27
32
  let report = "Analysis of src/agent/engine.ts:\n";
28
33
  report += "- File length: " + lineCount + " lines.\n";
@@ -16,7 +16,8 @@ import { webSearchTool, setWebSearchActiveModel } from "./web-search";
16
16
  import { friendlyProviderError, isContextOverflowError, isRefusalError } from "../util/provider-error";
17
17
  import { isRateLimitError } from "../util/retry";
18
18
  import { runPreToolHooks, runPostTurnHooks } from "./hooks";
19
- import { minimizeToolOutput } from "./output-minimizer";
19
+ import { truncateToolOutput, formatToolResultBody } from "./tool-output";
20
+ export { TOOL_OUTPUT_MAX, READ_OUTPUT_MAX, TOOL_SPILL_THRESHOLD, MAX_TOOL_ARTIFACTS, truncateToolOutput, spillToolResult } from "./tool-output";
20
21
  import { StepBudget, dynamicStepBudgetConfig, resolveStepBudgetConfig, hashSignature, type StepBudgetConfig } from "./step-budget";
21
22
  import { historyTokens, trimToolResultsInPlace } from "./compaction";
22
23
  import { jeoEnv } from "../util/env";
@@ -30,6 +31,7 @@ async function invokeCallLlm(history: Message[], options: {
30
31
  onUsage?: (u: { inputTokens?: number; outputTokens?: number }) => void;
31
32
  onRetry?: (attempt: number, err: unknown, delayMs: number) => void;
32
33
  onToken?: (delta: string) => void;
34
+ onReasoning?: (delta: string) => void;
33
35
  }): Promise<string> {
34
36
  const mod = await import("./loop");
35
37
  return mod.callLlm(history, options);
@@ -39,14 +41,14 @@ export interface ToolInvocation {
39
41
  arguments?: Record<string, any>;
40
42
  }
41
43
 
42
- export type ToolHandler = (args: Record<string, any>, cwd: string) => Promise<ToolResult>;
44
+ export type ToolHandler = (args: Record<string, any>, cwd: string, onProgress?: (partialOutput: string) => void) => Promise<ToolResult>;
43
45
 
44
- /** The default executor toolset (read / write / edit / bash / find / search). */
46
+ /** The default executor toolset (read / write / edit / bash / find / search / ls / mkdir / delete / web_search). */
45
47
  export const DEFAULT_TOOLS: Record<string, ToolHandler> = {
46
48
  read: (a, cwd) => readTool(a.filePath ?? a.path, a.lineRange ?? a.range, cwd, !!a.raw),
47
49
  write: (a, cwd) => writeTool(a.filePath ?? a.path, a.content ?? "", cwd),
48
50
  edit: (a, cwd) => editTool(a.filePath ?? a.path, a.editBlock ?? a.edit ?? "", cwd),
49
- bash: (a, cwd) => bashTool(a.command ?? a.cmd, cwd, typeof a.timeoutMs === "number" ? a.timeoutMs : undefined, typeof a.cwd === "string" ? a.cwd : (typeof a.subdir === "string" ? a.subdir : undefined), a.env && typeof a.env === "object" ? a.env : undefined),
51
+ bash: (a, cwd, onProgress) => bashTool(a.command ?? a.cmd, cwd, typeof a.timeoutMs === "number" ? a.timeoutMs : undefined, typeof a.cwd === "string" ? a.cwd : (typeof a.subdir === "string" ? a.subdir : undefined), a.env && typeof a.env === "object" ? a.env : undefined, onProgress),
50
52
  find: (a, cwd) => findTool(a.globPattern ?? a.pattern, cwd),
51
53
  search: (a, cwd) => searchTool(a.pattern, a.globPattern ?? "*", cwd, !!(a.ignoreCase ?? a.i), { before: a.before, after: a.after, context: a.context, maxMatches: a.maxMatches }),
52
54
  ls: (a, cwd) => lsTool(a.dirPath ?? a.path ?? a.dir ?? ".", cwd),
@@ -134,6 +136,10 @@ export interface AgentLoopEvents {
134
136
  onStep?(step: number): void | Promise<void>;
135
137
  onAssistant?(raw: string, invocation: ToolInvocation | null): void;
136
138
  onToolResult?(tool: string, success: boolean, output: string): void;
139
+ /** Streaming partial output of the currently-running tool (e.g. bash stdout as it
140
+ * arrives) — drives a live DIMMED output view that the final formatted result
141
+ * replaces on onToolResult. Only bash emits today; other tools are unaffected. */
142
+ onToolProgress?(tool: string, partial: string): void;
137
143
  /** Transient progress notice (e.g. "rate limited — retrying in Ns"); NOT a terminal error. */
138
144
  onNotice?(message: string): void;
139
145
  /** Cumulative token usage after each LLM call — drives live usage meters. */
@@ -141,6 +147,9 @@ export interface AgentLoopEvents {
141
147
  /** Accumulated streamed model response so far — drives the live reasoning view. Only
142
148
  * requested when a consumer sets it (the engine streams solely for the TUI). */
143
149
  onModelStream?(textSoFar: string): void;
150
+ /** Accumulated native reasoning/thinking text so far — drives a transient dimmed
151
+ * "thinking" view. Only requested when a consumer (TUI) attaches. */
152
+ onReasoningStream?(textSoFar: string): void;
144
153
  /** Step-budget change (gjc-style retry flow): the limit was extended because the
145
154
  * turn is making progress. `limit` is the new max; `reason` is display-ready. */
146
155
  onBudget?(limit: number, reason: string): void;
@@ -149,6 +158,10 @@ export interface AgentLoopEvents {
149
158
  * first"); return null to let the turn finish. The engine guarantees at most
150
159
  * one bounce per turn, so a stubborn model can never loop here. */
151
160
  onBeforeDone?(reason: string): string | null;
161
+ /** Fired when a mid-turn steering message (an additional user query typed while
162
+ * the turn is running) is injected into the live history. `text` is the raw
163
+ * user line — drives a TUI notice so the user sees their input was picked up. */
164
+ onSteer?(text: string): void;
152
165
  }
153
166
 
154
167
  export interface AgentLoopOptions {
@@ -173,6 +186,11 @@ export interface AgentLoopOptions {
173
186
  /** Step-budget overrides (gjc-style retry flow). `{ maxExtensions: 0 }` restores the
174
187
  * legacy fixed counter — used by bounded subagent delegation. */
175
188
  budget?: Partial<StepBudgetConfig>;
189
+ /** Mid-turn steering drain (gjc parity): called at each step boundary. Any strings
190
+ * returned are appended to `history` as user messages BEFORE the next model call,
191
+ * so an additional query typed while the turn runs steers the live turn instead of
192
+ * waiting for the next prompt. Return [] when nothing is pending. */
193
+ steer?: () => string[];
176
194
  }
177
195
 
178
196
  export interface AgentLoopResult {
@@ -183,14 +201,6 @@ export interface AgentLoopResult {
183
201
  usage?: { inputTokens: number; outputTokens: number };
184
202
  }
185
203
 
186
- /** Env-tunable output budget (plan/gjc-inheritance.md B10, gjc settings-driven
187
- * output handling 계승): JEO_TOOL_OUTPUT_MAX caps the model-visible tool result;
188
- * the spill threshold tracks it so anything truncated stays artifact-recoverable. */
189
- function envOutputMax(): number {
190
- const raw = Number(jeoEnv("TOOL_OUTPUT_MAX") ?? "");
191
- return Number.isFinite(raw) && raw >= 500 && raw <= 200_000 ? Math.trunc(raw) : 4_000;
192
- }
193
- export const TOOL_OUTPUT_MAX = envOutputMax();
194
204
 
195
205
  /** Wall-clock budget for ONE agent turn (ms). JEO_TURN_MAX_MS overrides; 0 disables.
196
206
  * Default 30 minutes: long autonomous runs stay alive, while a turn that spins in
@@ -205,55 +215,6 @@ export function turnMaxMs(env: Record<string, string | undefined> = process.env)
205
215
  return 30 * 60 * 1000;
206
216
  }
207
217
 
208
- /**
209
- * Cap a tool result fed back to the model, keeping both ends: the head holds the
210
- * start (e.g. a file's top / a command's invocation) and the tail holds what's
211
- * usually decisive (test summaries, the final error). A pure head-cut loses that.
212
- */
213
- export function truncateToolOutput(s: string, max = TOOL_OUTPUT_MAX): string {
214
- if (s.length <= max) return s;
215
- const head = Math.floor(max * 0.6);
216
- const tail = max - head;
217
- return `${s.slice(0, head)}\n…(${s.length - max} chars truncated)…\n${s.slice(s.length - tail)}`;
218
- }
219
-
220
- /** Tool output larger than this is spilled to a recoverable artifact file. Aligned
221
- * with `truncateToolOutput`'s cap so that whenever the model-visible result drops
222
- * content, the full output is recoverable via the artifact. */
223
- export const TOOL_SPILL_THRESHOLD = TOOL_OUTPUT_MAX;
224
-
225
- /**
226
- * Write an oversized tool result verbatim under `.jeo/artifacts/tool-results/` and
227
- * return the workspace-relative path (for the model to `read`). Best-effort: throws
228
- * are caught by the caller, which simply omits the artifact note.
229
- */
230
- /** Most recent tool-result artifacts to keep; older ones are pruned on each spill. */
231
- export const MAX_TOOL_ARTIFACTS = 50;
232
-
233
- /** Best-effort retention: keep the newest `MAX_TOOL_ARTIFACTS` files in `dir`, delete the rest. */
234
- async function pruneToolArtifacts(dir: string): Promise<void> {
235
- const files = await fs.readdir(dir).catch(() => [] as string[]);
236
- if (files.length <= MAX_TOOL_ARTIFACTS) return;
237
- const stamped = await Promise.all(
238
- files.map(async f => ({ f, m: (await fs.stat(path.join(dir, f)).catch(() => null))?.mtimeMs ?? 0 })),
239
- );
240
- stamped.sort((a, b) => b.m - a.m); // newest first
241
- for (const { f } of stamped.slice(MAX_TOOL_ARTIFACTS)) {
242
- await fs.rm(path.join(dir, f), { force: true }).catch(() => {});
243
- }
244
- }
245
-
246
- export async function spillToolResult(tool: string, output: string, cwd: string): Promise<string> {
247
- const dir = path.join(cwd, ".jeo", "artifacts", "tool-results");
248
- await fs.mkdir(dir, { recursive: true });
249
- const safeTool = tool.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 32) || "tool";
250
- const stamp = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
251
- const rel = path.join(".jeo", "artifacts", "tool-results", `${stamp}-${safeTool}.txt`);
252
- await fs.writeFile(path.join(cwd, rel), output, "utf-8");
253
- // Retention so a long session can't grow the artifact dir without bound.
254
- await pruneToolArtifacts(dir);
255
- return rel;
256
- }
257
218
 
258
219
  /** Levenshtein distance (small inputs: tool/command names). */
259
220
  function editDistance(a: string, b: string): number {
@@ -400,6 +361,29 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
400
361
  }
401
362
  await ev.onStep?.(step);
402
363
 
364
+ // MID-TURN steering (gjc parity): drain any additional user queries typed while
365
+ // the turn is running and inject them as user messages BEFORE this step's model
366
+ // call, so the live turn adapts immediately instead of deferring to the next
367
+ // prompt. A genuine new instruction resets the stall/failure guards (it is fresh
368
+ // progress, not a repeat) and earns a budget extension so the loop has room to act.
369
+ if (opts.steer) {
370
+ const pending = opts.steer();
371
+ for (const raw of pending) {
372
+ const text = (raw ?? "").trim();
373
+ if (!text) continue;
374
+ history.push({
375
+ role: "user",
376
+ content: `[mid-turn steering — additional instruction from the user; incorporate it now]\n${text}`,
377
+ });
378
+ ev.onSteer?.(text);
379
+ repeatCount = 0;
380
+ lastSig = "";
381
+ consecutiveFailures = 0;
382
+ recentStepSigs.length = 0;
383
+ budget.noteSteer?.();
384
+ }
385
+ }
386
+
403
387
  // MID-TURN context guard: a single long turn (60+ steps) otherwise grows the
404
388
  // history without bound — turn-boundary compaction never runs inside a turn,
405
389
  // and field evidence shows multi-million-token prompts degrading the model
@@ -421,6 +405,10 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
421
405
  const onToken = ev.onModelStream
422
406
  ? (delta: string) => { streamBuf += delta; ev.onModelStream!(streamBuf); }
423
407
  : undefined;
408
+ let reasonBuf = "";
409
+ const onReasoning = ev.onReasoningStream
410
+ ? (delta: string) => { reasonBuf += delta; ev.onReasoningStream!(reasonBuf); }
411
+ : undefined;
424
412
  let responseText: string;
425
413
  try {
426
414
  responseText = await invokeCallLlm(history, {
@@ -430,6 +418,7 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
430
418
  signal: opts.signal,
431
419
  onUsage: u => { acc.inputTokens += u.inputTokens ?? 0; acc.outputTokens += u.outputTokens ?? 0; sawUsage = true; },
432
420
  onToken,
421
+ onReasoning,
433
422
  // Make provider auto-retry visible: previously a rate-limited call sat in a
434
423
  // silent backoff wait, then surfaced "auto-retry was exhausted" with no trace
435
424
  // of the retries that DID happen.
@@ -623,6 +612,29 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
623
612
  continue;
624
613
  }
625
614
  }
615
+ // Steering that arrived DURING this final step (after the top-of-loop drain,
616
+ // while the model was generating its `done`): reopen the turn and handle it now
617
+ // instead of letting it bounce to the next prompt. Bounded by the step/time budget.
618
+ if (opts.steer) {
619
+ const pending = opts.steer().map(s => (s ?? "").trim()).filter(Boolean);
620
+ if (pending.length) {
621
+ history.push({ role: "assistant", content: responseText });
622
+ for (const text of pending) {
623
+ history.push({
624
+ role: "user",
625
+ content: `[mid-turn steering — additional instruction from the user; incorporate it now before finishing]\n${text}`,
626
+ });
627
+ ev.onSteer?.(text);
628
+ }
629
+ repeatCount = 0;
630
+ lastSig = "";
631
+ consecutiveFailures = 0;
632
+ recentStepSigs.length = 0;
633
+ budget.noteSteer();
634
+ step++;
635
+ continue;
636
+ }
637
+ }
626
638
  return finish({ done: true, steps: step, doneReason: (toolCalls[0].arguments?.reason as string) ?? "" });
627
639
  }
628
640
 
@@ -728,7 +740,8 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
728
740
  output = preHookResult.error + (preHookResult.output ? `\n${preHookResult.output}` : "");
729
741
  } else {
730
742
  try {
731
- const res = await handler(args ?? {}, cwd);
743
+ const onProgress = ev.onToolProgress ? (partial: string) => ev.onToolProgress!(tool, partial) : undefined;
744
+ const res = await handler(args ?? {}, cwd, onProgress);
732
745
  success = res.success;
733
746
  output = res.success ? res.output : (res.error ? (res.output ? `${res.error}\n${res.output}` : res.error) : res.output);
734
747
  } catch (err: any) {
@@ -820,15 +833,7 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
820
833
 
821
834
  ev.onToolResult?.(call.tool, res.success, res.output);
822
835
 
823
- const minimized = minimizeToolOutput(res.output, call.tool);
824
- const visible = minimized.text;
825
- let resultBody = truncateToolOutput(visible);
826
- if (res.output.length > TOOL_SPILL_THRESHOLD) {
827
- const artifact = await spillToolResult(call.tool, res.output, cwd).catch(() => null);
828
- if (artifact) {
829
- resultBody += `\n[full output (${res.output.length} chars) saved to ${artifact} — read it for the elided middle]`;
830
- }
831
- }
836
+ const resultBody = await formatToolResultBody(call.tool, res.output, cwd);
832
837
 
833
838
  const { diags: hookDiags, ran: hooksRan } = await runPostTurnHooks(
834
839
  cwd,
package/src/agent/loop.ts CHANGED
@@ -19,6 +19,8 @@ export interface ChatOptions {
19
19
  * delivered here (concatenation equals the returned string). Absent ⇒ a single
20
20
  * non-streaming `call()` (unchanged behavior for non-interactive/test callers). */
21
21
  onToken?: (delta: string) => void;
22
+ /** Streaming sink for native reasoning/thinking deltas (drives the dimmed live view). */
23
+ onReasoning?: (delta: string) => void;
22
24
  }
23
25
 
24
26
  const manager = createModelManager();
@@ -180,6 +180,16 @@ export class StepBudget {
180
180
  if (this.window.length > this.cfg.windowSize) this.window.shift();
181
181
  }
182
182
 
183
+ /** A mid-turn steering message arrived — fresh, user-driven work. Grant headroom
184
+ * (capped at the hard cap, without consuming the extension budget) and clear the
185
+ * scoring window so the new instruction is never declined by the previous
186
+ * sub-task's stall/failure signals. */
187
+ noteSteer(): void {
188
+ this.window.length = 0;
189
+ this.novelSinceExtension = 0;
190
+ this.currentLimit = Math.min(this.currentLimit + this.cfg.extensionSteps, this.cfg.hardCap);
191
+ }
192
+
183
193
  /** Progress over the recent window: ok count, total, distinct signatures. */
184
194
  progress(): { ok: number; total: number; distinct: number } {
185
195
  const ok = this.window.filter(r => r.success).length;
@@ -0,0 +1,131 @@
1
+ /**
2
+ * In-process detached-subagent registry (gjc `subagent`/`job` parity, scoped down
3
+ * to one runtime). The synchronous `task` tool blocks the parent until a subagent
4
+ * finishes; a DETACHED launch registers the run here and returns immediately, so
5
+ * the parent can keep working and later list / inspect / await / cancel it via the
6
+ * `subagent` control tool. Concurrency is real (JS event loop): a detached run's
7
+ * awaits interleave with the parent's between steps.
8
+ *
9
+ * Lifecycle is bounded to the turn that created the registry — `cancelAll()` on
10
+ * turn teardown guarantees no background promise leaks into the next turn.
11
+ */
12
+ import type { ToolResult } from "./tools";
13
+
14
+ export type SubagentStatus = "running" | "completed" | "failed" | "cancelled";
15
+
16
+ export interface SubagentRecord {
17
+ /** Stable id, e.g. "executor-1". */
18
+ id: string;
19
+ role: string;
20
+ /** The assignment text (trimmed for display). */
21
+ task: string;
22
+ status: SubagentStatus;
23
+ startedAt: number;
24
+ finishedAt?: number;
25
+ /** Whether the finished run reported success (contract satisfied). */
26
+ success?: boolean;
27
+ /** Final subagent report/output, set once the run settles. */
28
+ result?: string;
29
+ }
30
+
31
+ interface Entry {
32
+ record: SubagentRecord;
33
+ promise: Promise<void>;
34
+ abort: AbortController;
35
+ }
36
+
37
+ /** A detached run: receives its own AbortSignal and resolves to the subagent's
38
+ * final ToolResult. The runner is responsible for streaming live events itself. */
39
+ export type DetachedRunner = (signal: AbortSignal) => Promise<ToolResult>;
40
+
41
+ export class SubagentRegistry {
42
+ private readonly entries = new Map<string, Entry>();
43
+ private readonly seq = new Map<string, number>();
44
+
45
+ /** Register and START a detached run; returns the (running) record immediately. */
46
+ launch(role: string, task: string, runner: DetachedRunner): SubagentRecord {
47
+ const n = (this.seq.get(role) ?? 0) + 1;
48
+ this.seq.set(role, n);
49
+ const id = `${role}-${n}`;
50
+ const abort = new AbortController();
51
+ const record: SubagentRecord = {
52
+ id,
53
+ role,
54
+ task: task.length > 200 ? task.slice(0, 197) + "…" : task,
55
+ status: "running",
56
+ startedAt: Date.now(),
57
+ };
58
+ const promise = (async () => {
59
+ try {
60
+ const res = await runner(abort.signal);
61
+ // A cancel that already fired wins — don't overwrite the terminal state.
62
+ if (record.status === "cancelled") return;
63
+ record.status = res.success ? "completed" : "failed";
64
+ record.success = res.success;
65
+ record.result = res.output || res.error || "";
66
+ } catch (err) {
67
+ if (record.status === "cancelled") return;
68
+ record.status = "failed";
69
+ record.result = err instanceof Error ? err.message : String(err);
70
+ } finally {
71
+ if (record.finishedAt === undefined) record.finishedAt = Date.now();
72
+ }
73
+ })();
74
+ this.entries.set(id, { record, promise, abort });
75
+ return record;
76
+ }
77
+
78
+ list(): SubagentRecord[] {
79
+ return [...this.entries.values()].map(e => e.record);
80
+ }
81
+
82
+ get(id: string): SubagentRecord | undefined {
83
+ return this.entries.get(id)?.record;
84
+ }
85
+
86
+ running(): SubagentRecord[] {
87
+ return this.list().filter(r => r.status === "running");
88
+ }
89
+
90
+ /** Wait for the given ids (or all running, when empty). With `timeoutMs` the wait
91
+ * is bounded — unfinished runs simply stay "running" in the returned snapshot. */
92
+ async awaitIds(ids: string[], timeoutMs?: number): Promise<SubagentRecord[]> {
93
+ const targets = ids
94
+ .map(id => this.entries.get(id))
95
+ .filter((e): e is Entry => e !== undefined);
96
+ const all = Promise.all(targets.map(e => e.promise)).then(() => {});
97
+ if (timeoutMs !== undefined && timeoutMs > 0) {
98
+ let handle: ReturnType<typeof setTimeout> | undefined;
99
+ const timer = new Promise<void>(resolve => {
100
+ handle = setTimeout(resolve, timeoutMs);
101
+ });
102
+ await Promise.race([all, timer]);
103
+ if (handle !== undefined) clearTimeout(handle);
104
+ } else {
105
+ await all;
106
+ }
107
+ return targets.map(e => e.record);
108
+ }
109
+
110
+ /** Cancel the given ids (or all running, when empty): aborts the run and marks the
111
+ * record cancelled. Already-terminal records are returned unchanged. */
112
+ cancel(ids: string[]): SubagentRecord[] {
113
+ const out: SubagentRecord[] = [];
114
+ for (const id of ids) {
115
+ const e = this.entries.get(id);
116
+ if (!e) continue;
117
+ if (e.record.status === "running") {
118
+ e.record.status = "cancelled";
119
+ e.record.finishedAt = Date.now();
120
+ e.abort.abort();
121
+ }
122
+ out.push(e.record);
123
+ }
124
+ return out;
125
+ }
126
+
127
+ /** Abort every still-running subagent (turn teardown / Ctrl-C). */
128
+ cancelAll(): SubagentRecord[] {
129
+ return this.cancel(this.running().map(r => r.id));
130
+ }
131
+ }
@@ -0,0 +1,89 @@
1
+ /**
2
+ * `subagent` control tool (#9) — the parent's handle on DETACHED subagents launched
3
+ * via `task {detached:true}`. Mirrors gjc's `subagent`/`job` control surface, scoped
4
+ * to an in-process registry: list, inspect, await (optionally bounded), and cancel.
5
+ *
6
+ * Out of scope here (separate subsystems, not stubbed): live peer messaging (IRC)
7
+ * and pause/resume — a step-budget loop has no safe mid-step checkpoint to resume
8
+ * from, so those are intentionally absent rather than faked.
9
+ */
10
+ import type { ToolHandler } from "./engine";
11
+ import type { ToolResult } from "./tools";
12
+ import type { SubagentRegistry, SubagentRecord } from "./subagent-registry";
13
+
14
+ /** One-line protocol description appended to the launch system prompt. */
15
+ export const SUBAGENT_TOOL_PROTOCOL_LINE =
16
+ `subagent {action:"list"|"inspect"|"await"|"cancel", ids?, timeoutMs?} — control DETACHED ` +
17
+ `subagents started with task{detached:true}. 'await' blocks (optionally up to timeoutMs ms) and ` +
18
+ `returns their reports; 'inspect' shows status + result; 'cancel' aborts them. Omit ids to target all running.`;
19
+
20
+ function elapsed(rec: SubagentRecord): string {
21
+ const end = rec.finishedAt ?? Date.now();
22
+ return `${Math.max(0, Math.round((end - rec.startedAt) / 1000))}s`;
23
+ }
24
+
25
+ function rowLine(rec: SubagentRecord): string {
26
+ return `- ${rec.id} [${rec.status.toUpperCase()}] ${elapsed(rec)} · ${rec.task}`;
27
+ }
28
+
29
+ function detailBlock(rec: SubagentRecord): string {
30
+ const head = rowLine(rec);
31
+ if (rec.status === "running" || !rec.result) return head;
32
+ return `${head}\n${rec.result}`;
33
+ }
34
+
35
+ function idsOf(args: Record<string, any>): string[] {
36
+ if (Array.isArray(args.ids)) return args.ids.map((x: unknown) => String(x));
37
+ if (args.id !== undefined) return [String(args.id)];
38
+ return [];
39
+ }
40
+
41
+ export function createSubagentTool(registry: SubagentRegistry): ToolHandler {
42
+ return async (args: Record<string, any>, _cwd: string): Promise<ToolResult> => {
43
+ const action = String(args.action ?? "list").trim().toLowerCase();
44
+ const ids = idsOf(args);
45
+
46
+ if (action === "list") {
47
+ const rows = registry.list();
48
+ if (rows.length === 0) {
49
+ return { success: true, output: "No detached subagents this turn. Launch one with task {detached:true}." };
50
+ }
51
+ const running = rows.filter(r => r.status === "running").length;
52
+ return { success: true, output: `${rows.length} subagent(s), ${running} running:\n${rows.map(rowLine).join("\n")}` };
53
+ }
54
+
55
+ if (action === "inspect") {
56
+ const targets = (ids.length ? ids.map(id => registry.get(id)) : registry.list())
57
+ .filter((r): r is SubagentRecord => r !== undefined);
58
+ if (targets.length === 0) {
59
+ return { success: false, output: "", error: ids.length ? `No subagent matches ${ids.join(", ")}.` : "No detached subagents this turn." };
60
+ }
61
+ return { success: true, output: targets.map(detailBlock).join("\n\n") };
62
+ }
63
+
64
+ if (action === "await") {
65
+ const targets = ids.length ? ids : registry.running().map(r => r.id);
66
+ if (targets.length === 0) {
67
+ return { success: true, output: "No running subagents to await." };
68
+ }
69
+ const timeoutMs = typeof args.timeoutMs === "number" && args.timeoutMs > 0 ? args.timeoutMs : undefined;
70
+ const recs = await registry.awaitIds(targets, timeoutMs);
71
+ const stillRunning = recs.filter(r => r.status === "running").length;
72
+ const head = stillRunning > 0
73
+ ? `Awaited ${recs.length} subagent(s); ${stillRunning} still running after the ${timeoutMs}ms timeout — await again or cancel.`
74
+ : `Awaited ${recs.length} subagent(s); all settled.`;
75
+ return { success: stillRunning === 0, output: `${head}\n\n${recs.map(detailBlock).join("\n\n")}` };
76
+ }
77
+
78
+ if (action === "cancel") {
79
+ const targets = ids.length ? ids : registry.running().map(r => r.id);
80
+ if (targets.length === 0) {
81
+ return { success: true, output: "No running subagents to cancel." };
82
+ }
83
+ const recs = registry.cancel(targets);
84
+ return { success: true, output: `Cancelled ${recs.length} subagent(s):\n${recs.map(rowLine).join("\n")}` };
85
+ }
86
+
87
+ return { success: false, output: "", error: `Unknown subagent action '${action}'. Use list | inspect | await | cancel.` };
88
+ };
89
+ }