jeo-code 0.4.5 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -178,20 +178,39 @@ function renderRolePrompt(template: string, role: SubagentRole): string {
178
178
  .trim();
179
179
  }
180
180
 
181
+ /** True when `marker` is present in `text` AND the span between it and the next
182
+ * required marker (or end of text) carries non-whitespace content. A label-only
183
+ * section ("Summary:" with an empty body) is not a real report, so it fails. */
184
+ function markerHasContent(text: string, marker: string, allMarkers: string[]): boolean {
185
+ const start = text.indexOf(marker);
186
+ if (start < 0) return false;
187
+ const after = start + marker.length;
188
+ let end = text.length;
189
+ for (const other of allMarkers) {
190
+ if (other === marker) continue;
191
+ const j = text.indexOf(other, after);
192
+ if (j >= 0 && j < end) end = j;
193
+ }
194
+ return text.slice(after, end).trim().length > 0;
195
+ }
196
+
181
197
  export function validateSubagentDoneReason(role: SubagentRole, reason: string | undefined): { ok: boolean; missing?: string[] } {
182
198
  const trimmed = (reason ?? "").trim();
183
199
  if (!trimmed) return { ok: false, missing: ["done.reason"] };
200
+ const markers = role.requiredDoneMarkers ?? [];
201
+ // Each required section must be PRESENT and carry non-empty content — a report of
202
+ // bare labels (no prose) is rejected, which the substring-presence check let pass.
203
+ const sectionMissing = markers.filter(m => !markerHasContent(trimmed, m, markers));
184
204
  if (role.id === "critic") {
185
205
  const verdicts = ["[OKAY]", "[ITERATE]", "[REJECT]"];
186
206
  const hasVerdict = verdicts.some(marker => trimmed.startsWith(marker));
187
207
  const missing = [
188
208
  ...(hasVerdict ? [] : ["[OKAY]|[ITERATE]|[REJECT]"]),
189
- ...((role.requiredDoneMarkers ?? []).filter(marker => !trimmed.includes(marker))),
209
+ ...sectionMissing,
190
210
  ];
191
211
  return { ok: missing.length === 0, missing };
192
212
  }
193
- const missing = (role.requiredDoneMarkers ?? []).filter(marker => !trimmed.includes(marker));
194
- return { ok: missing.length === 0, missing };
213
+ return { ok: sectionMissing.length === 0, missing: sectionMissing };
195
214
  }
196
215
 
197
216
  /** Build a role-specific system prompt from its dedicated template. */
@@ -26,6 +26,7 @@ import {
26
26
  validateSubagentDoneReason,
27
27
  } from "./subagents";
28
28
  import { thinkingMaxTokens } from "../ai/model-manager";
29
+ import type { SubagentRegistry } from "./subagent-registry";
29
30
 
30
31
  /** Lifecycle event emitted while a delegated subagent runs. */
31
32
  export interface TaskSubEvent {
@@ -41,6 +42,12 @@ export interface TaskSubEvent {
41
42
  summary?: string;
42
43
  /** Model selected for this subagent run. */
43
44
  model?: string;
45
+ /** 1-based task position within a fan-out batch (omitted for single-task runs). */
46
+ index?: number;
47
+ /** Total tasks in the fan-out batch (omitted for single-task runs). */
48
+ total?: number;
49
+ /** Provider token usage for the finished subagent (done events only). */
50
+ tokens?: { input: number; output: number };
44
51
  }
45
52
 
46
53
  export interface TaskToolOptions {
@@ -51,11 +58,46 @@ export interface TaskToolOptions {
51
58
  signal?: AbortSignal;
52
59
  /** Optional live sink (e.g. plain-stream rendering of nested progress). */
53
60
  onEvent?: (ev: TaskSubEvent) => void;
61
+ /** Mid-turn steering drain (gjc parity): an additional user query typed while a
62
+ * subagent works is forwarded live. Single-task runs and the SERIAL executor
63
+ * batch (concurrency 1) forward to the one active subagent. A parallel read-only
64
+ * batch routes through a broadcast hub (createSteerHub) so every running worker
65
+ * sees each message exactly once. Unconsumed messages stay for the parent. */
66
+ steer?: () => string[];
67
+ /** When present, a `task` call with `detached: true` registers a background run
68
+ * here and returns immediately; the parent controls it via the `subagent` tool. */
69
+ registry?: SubagentRegistry;
54
70
  }
55
71
 
56
72
  /** Max concurrent read-only subagents in a fan-out batch. */
57
73
  const MAX_FANOUT = 4;
58
74
 
75
+ /** Hard cap on a SERIAL (mutating executor) fan-out batch: it runs one task at a
76
+ * time inside one blocking tool call, so an unbounded queue would monopolize the
77
+ * parent turn. Split larger efforts into sequential task calls. */
78
+ const MAX_SERIAL_EXECUTOR = 6;
79
+
80
+ /** Broadcast steering hub for a fan-out batch. Each concurrent worker registers
81
+ * ONCE and then sees every parent steer message exactly once (append-only log +
82
+ * per-worker cursor), so a mid-batch redirect reaches all running subagents
83
+ * without the double-consume hazard of several workers draining one inbox. */
84
+ function createSteerHub(drain?: () => string[]) {
85
+ const log: string[] = [];
86
+ return {
87
+ worker(): (() => string[]) | undefined {
88
+ if (!drain) return undefined;
89
+ let cursor = 0;
90
+ return () => {
91
+ const fresh = drain();
92
+ if (fresh.length) log.push(...fresh);
93
+ const out = log.slice(cursor);
94
+ cursor = log.length;
95
+ return out;
96
+ };
97
+ },
98
+ };
99
+ }
100
+
59
101
  /** One-line protocol description appended to the launch system prompt. Pass a
60
102
  * config so CONFIG-DECLARED custom roles are advertised to the model too. */
61
103
  export function taskToolProtocolLine(config?: Pick<Config, "subagents">): string {
@@ -134,13 +176,26 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
134
176
  taskText: string,
135
177
  context: string,
136
178
  cwd: string,
179
+ extra: {
180
+ steer?: () => string[];
181
+ slot?: { index: number; total: number };
182
+ projectContext?: Awaited<ReturnType<typeof loadProjectContext>>;
183
+ /** Overrides opts.signal — a detached run uses its own registry signal so it
184
+ * is cancellable independently of the parent turn. */
185
+ signal?: AbortSignal;
186
+ } = {},
137
187
  ): Promise<ToolResult> => {
188
+ const { steer, slot, projectContext: preloadedContext, signal: signalOverride } = extra;
189
+ // Tag every live event with its fan-out slot so a parent monitor can tell
190
+ // task 1 from task 3 when several same-role subagents stream concurrently.
191
+ const emit = (ev: TaskSubEvent) =>
192
+ opts.onEvent?.(slot ? { ...ev, index: slot.index, total: slot.total } : ev);
138
193
  const model = resolveSubagentModel(role.id, opts.config);
139
194
  const maxSteps = resolveSubagentMaxSteps(role.id, opts.config);
140
195
  // gjc parity: a role may pin its own reasoning budget; absent = inherit the
141
196
  // session/global thinking level (the "(inherit)" row in the picker).
142
197
  const thinking = resolveSubagentThinking(role.id, opts.config) ?? opts.config.thinkingLevel;
143
- const projectContext = await loadProjectContext(cwd);
198
+ const projectContext = preloadedContext ?? await loadProjectContext(cwd);
144
199
  const history: Message[] = [
145
200
  { role: "system", content: withProjectContext(subagentSystemPrompt(role), projectContext) },
146
201
  { role: "user", content: `${taskText}${context}` },
@@ -149,10 +204,13 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
149
204
  let lastTarget = "";
150
205
  let currentStep = 0;
151
206
  // Round-8 (architect ref 7-Round7Workflow): count the subagent's SUCCESSFUL
152
- // mutating calls so the parent can audit a "Changed Files:" claim against
153
- // observed reality instead of trusting the report's substring markers.
154
- let mutationsOk = 0;
155
- opts.onEvent?.({ role: role.id, kind: "start", detail: taskText, maxSteps, model });
207
+ // calls so the parent can audit a "Changed Files:" claim against observed
208
+ // reality. File-writing tools (write/edit/mkdir/delete) are tracked apart from
209
+ // bash: read-only bash (e.g. `bun test`) MUST NOT count as edit evidence, but
210
+ // bash CAN mutate, so the audit message distinguishes the two cases.
211
+ let fileMutations = 0;
212
+ let bashRuns = 0;
213
+ emit({ role: role.id, kind: "start", detail: taskText, maxSteps, model });
156
214
  const result = await runAgentLoop(history, {
157
215
  cwd,
158
216
  model,
@@ -161,7 +219,8 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
161
219
  // Bounded delegation: a subagent's step contract stays exact — the parent
162
220
  // owns any retry/extension decision, so the gjc retry flow is disabled here.
163
221
  budget: { maxExtensions: 0 },
164
- signal: opts.signal,
222
+ signal: signalOverride ?? opts.signal,
223
+ steer,
165
224
  tools: subagentToolset(role),
166
225
  events: {
167
226
  onStep: n => { currentStep = n; },
@@ -169,35 +228,45 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
169
228
  if (invocation && invocation.tool && invocation.tool !== "done") {
170
229
  lastTarget = toolTarget(invocation.tool, invocation.arguments);
171
230
  trace.push(` step ${currentStep}/${maxSteps}: ${lastTarget}`);
172
- opts.onEvent?.({ role: role.id, kind: "step", detail: lastTarget, step: currentStep, maxSteps, model });
231
+ emit({ role: role.id, kind: "step", detail: lastTarget, step: currentStep, maxSteps, model });
173
232
  }
174
233
  },
175
234
  onToolResult: (tool, success, output) => {
176
- if (success && (tool === "write" || tool === "edit" || tool === "bash")) mutationsOk++;
235
+ if (success) {
236
+ if (tool === "write" || tool === "edit" || tool === "mkdir" || tool === "delete") fileMutations++;
237
+ else if (tool === "bash") bashRuns++;
238
+ }
177
239
  const label = lastTarget || tool;
178
240
  const summary = firstUsefulLine(output);
179
241
  const suffix = summary ? ` — ${summary}` : "";
180
242
  trace.push(` ${success ? "✓" : "✗"} ${label}${suffix}`);
181
- opts.onEvent?.({ role: role.id, kind: "tool", detail: label, success, summary, step: currentStep, maxSteps, model });
243
+ emit({ role: role.id, kind: "tool", detail: label, success, summary, step: currentStep, maxSteps, model });
182
244
  lastTarget = "";
183
245
  },
184
246
  // Retry notices (rate-limit backoff etc.) surface as live "step" beats so the
185
247
  // parent's monitor shows WHY a subagent is pausing instead of going silent.
186
- onNotice: msg => opts.onEvent?.({ role: role.id, kind: "step", detail: msg, step: currentStep, maxSteps, model }),
248
+ onNotice: msg => emit({ role: role.id, kind: "step", detail: msg, step: currentStep, maxSteps, model }),
249
+ // Mid-turn steering reached this subagent: surface it as a live beat so the
250
+ // parent's monitor shows the redirect instead of an unexplained behavior change.
251
+ onSteer: text => emit({ role: role.id, kind: "step", detail: `↳ steer: ${text}`, step: currentStep, maxSteps, model }),
187
252
  },
188
253
  });
189
254
  const reason = result.doneReason?.trim() || `(subagent reached the ${result.steps}-step limit without signaling done)`;
190
255
  const validation = validateSubagentDoneReason(role, reason);
191
256
  const complete = result.done && validation.ok;
192
257
  const detail = validation.ok ? reason : `${reason}\n\n[contract incomplete: missing ${validation.missing?.join(", ")}]`;
193
- opts.onEvent?.({ role: role.id, kind: "done", detail, success: complete, step: result.steps, maxSteps, model });
194
- const header = `[${role.title} subagent] ${complete ? "completed" : "stopped"} in ${result.steps} step(s) on ${model}.`;
258
+ emit({ role: role.id, kind: "done", detail, success: complete, step: result.steps, maxSteps, model, tokens: result.usage ? { input: result.usage.inputTokens, output: result.usage.outputTokens } : undefined });
259
+ const tokNote = result.usage ? `, ${result.usage.inputTokens + result.usage.outputTokens} tok` : "";
260
+ const header = `[${role.title} subagent] ${complete ? "completed" : "stopped"} in ${result.steps} step(s) on ${model}${tokNote}.`;
195
261
  const body = trace.length ? `\nSteps:\n${trace.join("\n")}` : "";
196
- // Parent-side audit: a mutating role that "completed" without ONE successful
197
- // write/edit/bash cannot have changed anything — flag the claim as unverified
198
- // (the report's markers prove formatting, not work).
199
- const audit = complete && !role.readOnly && mutationsOk === 0
200
- ? `\n[parent audit] No successful write/edit/bash was observed in this run — treat any "Changed Files:" claims above as UNVERIFIED.`
262
+ // Parent-side audit: a mutating role that "completed" without a successful file
263
+ // mutation (write/edit/mkdir/delete) likely changed nothing — flag the claim.
264
+ // bash is tracked separately: it CAN mutate, so an only-bash run downgrades to
265
+ // "verify independently" instead of the stronger UNVERIFIED.
266
+ const audit = complete && !role.readOnly && fileMutations === 0
267
+ ? bashRuns === 0
268
+ ? `\n[parent audit] No successful write/edit/bash was observed in this run — treat any "Changed Files:" claims above as UNVERIFIED.`
269
+ : `\n[parent audit] No successful write/edit was observed (only bash ran); bash may or may not have mutated files — verify any "Changed Files:" claims above independently.`
201
270
  : "";
202
271
  return { success: complete, output: `${header}${body}\n\nResult:\n${fenceSubagentReport(detail)}${audit}` };
203
272
  };
@@ -225,6 +294,18 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
225
294
  if (items.length === 0) {
226
295
  return { success: false, output: "", error: "task fan-out requires a non-empty 'tasks' array of assignments." };
227
296
  }
297
+ // #5: the mutating executor fan-out is SERIAL (concurrency 1) and blocks the
298
+ // turn; cap it regardless of justification so a huge queue can't monopolize
299
+ // the parent. Split larger efforts into sequential task calls.
300
+ if (!role.readOnly && items.length > MAX_SERIAL_EXECUTOR) {
301
+ return {
302
+ success: false,
303
+ output: "",
304
+ error:
305
+ `Executor fan-out of ${items.length} exceeds the serial cap of ${MAX_SERIAL_EXECUTOR}. ` +
306
+ `The mutating executor runs one task at a time and blocks the turn — split into ≤${MAX_SERIAL_EXECUTOR}-task batches or sequential task calls.`,
307
+ };
308
+ }
228
309
  // Spawn-gate lite (plan/gjc-inheritance.md B9, gjc spawn-gate 계승): a batch
229
310
  // wider than MAX_FANOUT is refused BEFORE any subagent launches unless the
230
311
  // model justifies the parallelism — silent capping hid the cost decision.
@@ -245,13 +326,22 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
245
326
  // Read-only roles fan out concurrently (bounded). The mutating executor is serialized
246
327
  // (concurrency 1) so parallel subagents can't race on the same files.
247
328
  const limit = role.readOnly ? Math.min(items.length, MAX_FANOUT) : 1;
329
+ // Load project context ONCE per batch instead of re-scanning AGENTS.md for
330
+ // every fan-out task (redundant IO + duplicated tokens).
331
+ const batchContext = await loadProjectContext(cwd);
248
332
  const results: ToolResult[] = new Array(items.length);
249
333
  let next = 0;
334
+ // #7: broadcast steering hub — each concurrent worker sees every parent
335
+ // steer message exactly once (safe even for parallel read-only fan-out).
336
+ const steerHub = createSteerHub(opts.steer);
250
337
  const worker = async () => {
338
+ // One steer cursor per concurrent worker (not per item) so a worker that
339
+ // processes several items sees each parent message once across them all.
340
+ const workerSteer = steerHub.worker();
251
341
  while (true) {
252
342
  const i = next++;
253
343
  if (i >= items.length) return;
254
- results[i] = await runOne(role, items[i]!.task, items[i]!.context, cwd);
344
+ results[i] = await runOne(role, items[i]!.task, items[i]!.context, cwd, { slot: { index: i + 1, total: items.length }, projectContext: batchContext, steer: workerSteer });
255
345
  }
256
346
  };
257
347
  await Promise.all(Array.from({ length: limit }, () => worker()));
@@ -267,6 +357,20 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
267
357
  if (!taskText) {
268
358
  return { success: false, output: "", error: `task tool requires a non-empty 'task' (or a 'tasks' array). Valid roles: ${subagentRoleIds(opts.config).join(", ")}.` };
269
359
  }
270
- return runOne(role, taskText, ctx(args.context), cwd);
360
+ // Detached form (#9): register a background run and return immediately so the
361
+ // parent can keep working, then list/inspect/await/cancel via the `subagent`
362
+ // tool. Steering is not forwarded to a detached run (no single active drainer).
363
+ if (args.detached === true && opts.registry) {
364
+ const rec = opts.registry.launch(role.id, taskText, signal =>
365
+ runOne(role, taskText, ctx(args.context), cwd, { signal }),
366
+ );
367
+ return {
368
+ success: true,
369
+ output:
370
+ `[detached] launched ${role.title} subagent '${rec.id}'. It runs in the background — ` +
371
+ `keep working, then use the 'subagent' tool ({action:"await"|"list"|"inspect"|"cancel", ids?}) to collect its result.`,
372
+ };
373
+ }
374
+ return runOne(role, taskText, ctx(args.context), cwd, { steer: opts.steer });
271
375
  };
272
376
  }
@@ -0,0 +1,115 @@
1
+ /**
2
+ * Tool-result output handling — the model-visible output budget, both-ends
3
+ * truncation, recoverable artifact spilling, and the minimize→truncate→spill
4
+ * orchestration the agent loop applies to every tool result.
5
+ *
6
+ * Extracted from `engine.ts` (single-responsibility: the loop drives steps; this
7
+ * module owns how a tool's raw output is shaped before it re-enters context).
8
+ * `engine.ts` re-exports the public surface for backward compatibility.
9
+ */
10
+ import * as fs from "node:fs/promises";
11
+ import * as path from "node:path";
12
+ import { jeoEnv } from "../util/env";
13
+ import { minimizeToolOutput } from "./output-minimizer";
14
+
15
+ /** Env-tunable output budget (plan/gjc-inheritance.md B10, gjc settings-driven
16
+ * output handling 계승): JEO_TOOL_OUTPUT_MAX caps the model-visible tool result;
17
+ * the spill threshold tracks it so anything truncated stays artifact-recoverable. */
18
+ function envOutputMax(): number {
19
+ const raw = Number(jeoEnv("TOOL_OUTPUT_MAX") ?? "");
20
+ return Number.isFinite(raw) && raw >= 500 && raw <= 200_000 ? Math.trunc(raw) : 4_000;
21
+ }
22
+ export const TOOL_OUTPUT_MAX = envOutputMax();
23
+
24
+ /** Read results are deliberate, contiguous file slices the model explicitly asked
25
+ * for (via lineRange), already line-capped by the read tool and recoverable via
26
+ * spill. They get a much larger model-visible budget than the generic
27
+ * noise-control cap, so a 500-line read is not silently re-shrunk to ~100 lines.
28
+ * JEO_READ_OUTPUT_MAX overrides (1k..200k). */
29
+ function envReadOutputMax(): number {
30
+ const raw = Number(jeoEnv("READ_OUTPUT_MAX") ?? "");
31
+ return Number.isFinite(raw) && raw >= 1_000 && raw <= 200_000 ? Math.trunc(raw) : 32_000;
32
+ }
33
+ export const READ_OUTPUT_MAX = envReadOutputMax();
34
+
35
+ /**
36
+ * Cap a tool result fed back to the model. Default mode keeps both ends: the head
37
+ * holds the start (e.g. a command's invocation) and the tail holds what's usually
38
+ * decisive (test summaries, the final error). A pure head-cut loses that.
39
+ *
40
+ * `headOnly` truncates from the front only — for `read` results, which are a
41
+ * contiguous file slice the model explicitly requested; head/tail splitting would
42
+ * mangle the code into two non-adjacent fragments.
43
+ */
44
+ export function truncateToolOutput(s: string, max = TOOL_OUTPUT_MAX, headOnly = false): string {
45
+ if (s.length <= max) return s;
46
+ if (headOnly) {
47
+ return `${s.slice(0, max)}\n…(${s.length - max} chars truncated; narrow the lineRange or read the spilled artifact)…`;
48
+ }
49
+ const head = Math.floor(max * 0.6);
50
+ const tail = max - head;
51
+ return `${s.slice(0, head)}\n…(${s.length - max} chars truncated)…\n${s.slice(s.length - tail)}`;
52
+ }
53
+
54
+ /** Non-read tool output larger than this is spilled to a recoverable artifact file.
55
+ * Aligned with `truncateToolOutput`'s generic cap so that whenever the model-visible
56
+ * result drops content, the full output is recoverable via the artifact. (`read`
57
+ * spills against the larger READ_OUTPUT_MAX in the result loop.) */
58
+ export const TOOL_SPILL_THRESHOLD = TOOL_OUTPUT_MAX;
59
+
60
+ /** Most recent tool-result artifacts to keep; older ones are pruned on each spill. */
61
+ export const MAX_TOOL_ARTIFACTS = 50;
62
+
63
+ /** Best-effort retention: keep the newest `MAX_TOOL_ARTIFACTS` files in `dir`, delete the rest. */
64
+ async function pruneToolArtifacts(dir: string): Promise<void> {
65
+ const files = await fs.readdir(dir).catch(() => [] as string[]);
66
+ if (files.length <= MAX_TOOL_ARTIFACTS) return;
67
+ const stamped = await Promise.all(
68
+ files.map(async f => ({ f, m: (await fs.stat(path.join(dir, f)).catch(() => null))?.mtimeMs ?? 0 })),
69
+ );
70
+ stamped.sort((a, b) => b.m - a.m); // newest first
71
+ for (const { f } of stamped.slice(MAX_TOOL_ARTIFACTS)) {
72
+ await fs.rm(path.join(dir, f), { force: true }).catch(() => {});
73
+ }
74
+ }
75
+
76
+ /**
77
+ * Write an oversized tool result verbatim under `.jeo/artifacts/tool-results/` and
78
+ * return the workspace-relative path (for the model to `read`). Best-effort: throws
79
+ * are caught by the caller, which simply omits the artifact note.
80
+ */
81
+ export async function spillToolResult(tool: string, output: string, cwd: string): Promise<string> {
82
+ const dir = path.join(cwd, ".jeo", "artifacts", "tool-results");
83
+ await fs.mkdir(dir, { recursive: true });
84
+ const safeTool = tool.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 32) || "tool";
85
+ const stamp = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
86
+ const rel = path.join(".jeo", "artifacts", "tool-results", `${stamp}-${safeTool}.txt`);
87
+ await fs.writeFile(path.join(cwd, rel), output, "utf-8");
88
+ // Retention so a long session can't grow the artifact dir without bound.
89
+ await pruneToolArtifacts(dir);
90
+ return rel;
91
+ }
92
+
93
+ /**
94
+ * Shape one tool's raw output into the model-visible result body: strip runner
95
+ * noise (minimize), cap to the per-tool budget (`read` gets the larger read budget
96
+ * and a head-only cut), and spill the full output to a recoverable artifact when it
97
+ * exceeds the budget. Behavior-identical to the inline logic it replaces in
98
+ * `runAgentLoop`.
99
+ */
100
+ export async function formatToolResultBody(tool: string, rawOutput: string, cwd: string): Promise<string> {
101
+ const visible = minimizeToolOutput(rawOutput, tool).text;
102
+ // `read` is a deliberate, contiguous file slice: give it the larger read budget
103
+ // and truncate head-only (head/tail splitting mangles code). Other tools keep the
104
+ // generic noise-control cap + both-ends truncation.
105
+ const isReadResult = tool === "read";
106
+ const outputBudget = isReadResult ? READ_OUTPUT_MAX : TOOL_OUTPUT_MAX;
107
+ let body = truncateToolOutput(visible, outputBudget, isReadResult);
108
+ if (rawOutput.length > outputBudget) {
109
+ const artifact = await spillToolResult(tool, rawOutput, cwd).catch(() => null);
110
+ if (artifact) {
111
+ body += `\n[full output (${rawOutput.length} chars) saved to ${artifact} — read it for the truncated remainder]`;
112
+ }
113
+ }
114
+ return body;
115
+ }
@@ -3,6 +3,7 @@ import * as fs from "node:fs/promises";
3
3
  import * as path from "node:path";
4
4
  import { readWorkflowState, readWorkflowStateStrict, type WorkflowState } from "./state";
5
5
  import { jeoEnv } from "../util/env";
6
+ import { READ_OUTPUT_MAX } from "./tool-output";
6
7
 
7
8
  /** Read the deep-interview lock; on corrupt state fail CLOSED (treat as active lock). */
8
9
  async function readMutationLock(cwd: string): Promise<WorkflowState | null> {
@@ -291,10 +292,27 @@ export async function readTool(
291
292
  return { success: true, output: out.join("\n") };
292
293
  }
293
294
 
294
- const MAX_LINES = 500;
295
- const annotated = lines.slice(0, MAX_LINES).map((l, i) => `${i + 1}${lineAnchor(l)}|${l}`).join("\n");
296
- if (lines.length > MAX_LINES) {
297
- const notice = `\n…(showing lines 1-${MAX_LINES} of ${lines.length}; pass lineRange "${MAX_LINES + 1}-" to read the rest)`;
295
+ // Default (no lineRange): fill the model-visible read budget with WHOLE lines
296
+ // instead of a fixed 500-line cap that left half the 32k budget unused and forced
297
+ // needless pagination (the read tool's biggest "reads too little per call" pain).
298
+ // READ_OUTPUT_MAX is the real cap; a hard line ceiling (JEO_READ_MAX_LINES) guards
299
+ // pathological files, and a small reserve keeps the pagination notice inside the
300
+ // budget so it is never trimmed by the downstream head-only truncation.
301
+ const HARD_LINE_CEILING = Math.max(500, Number(jeoEnv("READ_MAX_LINES") ?? "") || 5000);
302
+ const charBudget = Math.max(1_000, READ_OUTPUT_MAX - 256);
303
+ const shownLines: string[] = [];
304
+ let usedChars = 0;
305
+ for (let i = 0; i < lines.length && shownLines.length < HARD_LINE_CEILING; i++) {
306
+ const annotatedLine = `${i + 1}${lineAnchor(lines[i]!)}|${lines[i]}`;
307
+ const cost = annotatedLine.length + 1; // + newline
308
+ if (shownLines.length > 0 && usedChars + cost > charBudget) break; // always emit ≥1 line
309
+ shownLines.push(annotatedLine);
310
+ usedChars += cost;
311
+ }
312
+ const annotated = shownLines.join("\n");
313
+ if (shownLines.length < lines.length) {
314
+ const shown = shownLines.length;
315
+ const notice = `\n…(showing lines 1-${shown} of ${lines.length}; pass lineRange "${shown + 1}-" to read the rest)`;
298
316
  return { success: true, output: annotated + notice };
299
317
  }
300
318
  return { success: true, output: annotated };
@@ -574,7 +592,8 @@ export async function bashTool(
574
592
  cwd: string = process.cwd(),
575
593
  timeoutMs: number = 120_000,
576
594
  subdir?: string,
577
- env?: Record<string, string>
595
+ env?: Record<string, string>,
596
+ onProgress?: (partialOutput: string) => void,
578
597
  ): Promise<ToolResult> {
579
598
  if (jeoEnv("BASH_FIXUPS") === "1") {
580
599
  const fx = applyBashFixups(command);
@@ -608,12 +627,27 @@ export async function bashTool(
608
627
  killTimer = setTimeout(() => { try { proc.kill(9); } catch {} }, 3_000);
609
628
  }, TIMEOUT_MS);
610
629
 
630
+ // Stream stdout incrementally when a progress sink is attached (drives the live
631
+ // DIMMED bash output view); read stderr fully in parallel. Without a sink, fall
632
+ // back to a single post-exit read (identical content, no streaming overhead).
633
+ const stderrPromise = new Response(proc.stderr).text();
634
+ let stdout = "";
635
+ if (onProgress) {
636
+ const decoder = new TextDecoder();
637
+ let lastEmit = 0;
638
+ for await (const chunk of proc.stdout as unknown as AsyncIterable<Uint8Array>) {
639
+ stdout += decoder.decode(chunk, { stream: true });
640
+ const now = Date.now();
641
+ if (now - lastEmit >= 80) { lastEmit = now; onProgress(stdout); }
642
+ }
643
+ stdout += decoder.decode();
644
+ onProgress(stdout);
645
+ }
611
646
  await proc.exited;
612
647
  clearTimeout(timer);
613
648
  if (killTimer) clearTimeout(killTimer);
614
-
615
- const stdout = await new Response(proc.stdout).text();
616
- const stderr = await new Response(proc.stderr).text();
649
+ if (!onProgress) stdout = await new Response(proc.stdout).text();
650
+ const stderr = await stderrPromise;
617
651
 
618
652
  let output = [stdout, stderr].filter(Boolean).join("\n");
619
653
  const MAX_OUTPUT = 100_000;
@@ -2,11 +2,7 @@ import { providerRegistry } from "./provider-registry";
2
2
  import { OAUTH_FLOW_REGISTRY } from "../auth/flows";
3
3
  import { readGlobalConfig } from "../agent/state";
4
4
  import { resolveCredential, type AuthProvider, type Credential } from "../auth";
5
- import { anthropicAdapter } from "./providers/anthropic";
6
- import { openaiAdapter } from "./providers/openai";
7
- import { geminiAdapter } from "./providers/gemini";
8
- import { ollamaAdapter } from "./providers/ollama";
9
- import { antigravityAdapter } from "./providers/antigravity";
5
+ import "./register-providers"; // side-effect: registers built-in adapters into providerRegistry
10
6
  import type { CallOptions, Message, ProviderAdapter, ProviderName } from "./types";
11
7
  import { expandAlias, resolveModelId, effectiveAliasesFor } from "./model-registry";
12
8
  import { findCatalogEntry, type ModelCatalogEntry } from "./model-catalog-compat";
@@ -16,12 +12,6 @@ import { jeoEnv } from "../util/env";
16
12
  import type { Config } from "../agent/state";
17
13
 
18
14
 
19
- // Initialize Provider Registry
20
- providerRegistry.register("anthropic", anthropicAdapter);
21
- providerRegistry.register("openai", openaiAdapter);
22
- providerRegistry.register("gemini", geminiAdapter);
23
- providerRegistry.register("antigravity", antigravityAdapter);
24
- providerRegistry.register("ollama", ollamaAdapter);
25
15
 
26
16
 
27
17
  export function resolveProvider(model: string): ProviderName {
@@ -96,9 +86,13 @@ export function thinkingToReasoningEffort(
96
86
  return "medium";
97
87
  }
98
88
 
99
- /** Describe a model id: alias expansion + the provider it routes to. For `/model` + diagnostics. */
100
- export async function describeModel(input: string): Promise<{ input: string; resolved: string; provider: ProviderName }> {
101
- const resolved = await resolveModelId(input);
89
+ /** Describe a model id: alias expansion + the provider it routes to. For `/model` + diagnostics.
90
+ * Pass an already-read `config` to skip a redundant readGlobalConfig() on the turn hot path. */
91
+ export async function describeModel(
92
+ input: string,
93
+ config?: { modelAliases?: Record<string, string> },
94
+ ): Promise<{ input: string; resolved: string; provider: ProviderName }> {
95
+ const resolved = await resolveModelId(input, config);
102
96
  return { input, resolved, provider: resolveProvider(resolved) };
103
97
  }
104
98
 
@@ -311,6 +305,7 @@ async function resolveCall(options: Partial<CallOptions>, kind: "request" | "str
311
305
  onUsage: options.onUsage,
312
306
  signal: options.signal,
313
307
  reasoningEffort: options.reasoningEffort ?? thinkingToReasoningEffort(config.thinkingLevel),
308
+ onReasoning: options.onReasoning,
314
309
  };
315
310
  // Caller-supplied retry sink rides on the config-derived retry budget so the
316
311
  // engine/TUI can surface "rate limited — retrying in Ns" instead of a silent wait.
@@ -25,9 +25,14 @@ export function expandAlias(input: string, aliases: ModelAliases = BUILTIN_ALIAS
25
25
  }
26
26
 
27
27
  // Async: merge BUILTIN_ALIASES with config.modelAliases (config wins) and expand.
28
- export async function resolveModelId(input: string): Promise<string> {
29
- const config = await readGlobalConfig();
30
- const modelAliases = (config as any).modelAliases ?? {};
28
+ // Pass an already-read `config` to skip the readGlobalConfig() round-trip (turn
29
+ // hot path: avoids re-reading the config file mid-turn for model resolution).
30
+ export async function resolveModelId(
31
+ input: string,
32
+ config?: { modelAliases?: ModelAliases },
33
+ ): Promise<string> {
34
+ const cfg = config ?? (await readGlobalConfig());
35
+ const modelAliases = (cfg as any).modelAliases ?? {};
31
36
  const merged: ModelAliases = { ...BUILTIN_ALIASES, ...modelAliases };
32
37
  return expandAlias(input, merged);
33
38
  }
@@ -160,13 +160,18 @@ export function antigravityRequest(messages: Message[], options: CallOptions, cr
160
160
  type CcaUsage = { promptTokenCount?: number; candidatesTokenCount?: number; thoughtsTokenCount?: number };
161
161
  interface CcaChunk {
162
162
  response?: {
163
- candidates?: { content?: { parts?: { text?: string }[] }; finishReason?: string }[];
163
+ candidates?: { content?: { parts?: { text?: string; thought?: boolean }[] }; finishReason?: string }[];
164
164
  usageMetadata?: CcaUsage;
165
165
  };
166
166
  }
167
167
 
168
168
  function textOf(chunk: CcaChunk): string {
169
- return chunk.response?.candidates?.[0]?.content?.parts?.map(p => p.text ?? "").join("") ?? "";
169
+ return chunk.response?.candidates?.[0]?.content?.parts?.filter(p => !p.thought).map(p => p.text ?? "").join("") ?? "";
170
+ }
171
+
172
+ /** Native thinking text (`thought` parts) — kept separate so it never pollutes the JSON tool call. */
173
+ function thoughtOf(chunk: CcaChunk): string {
174
+ return chunk.response?.candidates?.[0]?.content?.parts?.filter(p => p.thought).map(p => p.text ?? "").join("") ?? "";
170
175
  }
171
176
 
172
177
  async function fetchAntigravity(messages: Message[], options: CallOptions, credential: Credential): Promise<Response> {
@@ -194,6 +199,8 @@ export const antigravityAdapter: ProviderAdapter = {
194
199
  for await (const data of readSse(response.body)) {
195
200
  let chunk: CcaChunk;
196
201
  try { chunk = JSON.parse(data); } catch { continue; }
202
+ const thought = thoughtOf(chunk);
203
+ if (thought) options.onReasoning?.(thought);
197
204
  out += textOf(chunk);
198
205
  if (chunk.response?.usageMetadata) usage = chunk.response.usageMetadata;
199
206
  }
@@ -209,6 +216,8 @@ export const antigravityAdapter: ProviderAdapter = {
209
216
  for await (const data of readSse(response.body)) {
210
217
  let chunk: CcaChunk;
211
218
  try { chunk = JSON.parse(data); } catch { continue; }
219
+ const thought = thoughtOf(chunk);
220
+ if (thought) options.onReasoning?.(thought);
212
221
  const delta = textOf(chunk);
213
222
  if (delta) { yielded = true; yield delta; }
214
223
  if (chunk.response?.usageMetadata) usage = chunk.response.usageMetadata;
@@ -119,7 +119,7 @@ export function geminiCliRequest(messages: Message[], options: CallOptions, acce
119
119
  }
120
120
 
121
121
  interface GeminiChunk {
122
- candidates?: { content?: { parts?: { text?: string }[] }; finishReason?: string }[];
122
+ candidates?: { content?: { parts?: { text?: string; thought?: boolean }[] }; finishReason?: string }[];
123
123
  promptFeedback?: { blockReason?: string };
124
124
  usageMetadata?: { promptTokenCount?: number; candidatesTokenCount?: number; thoughtsTokenCount?: number };
125
125
  }
@@ -130,7 +130,13 @@ interface CcaChunk {
130
130
  }
131
131
 
132
132
  function textOf(chunk: GeminiChunk): string {
133
- return chunk.candidates?.[0]?.content?.parts?.map(p => p.text ?? "").join("") ?? "";
133
+ return chunk.candidates?.[0]?.content?.parts?.filter(p => !p.thought).map(p => p.text ?? "").join("") ?? "";
134
+ }
135
+
136
+ /** Native thinking text (`thought` parts), present only when the model emits thought
137
+ * summaries. Kept SEPARATE from textOf so thoughts never pollute the JSON tool call. */
138
+ function thoughtOf(chunk: GeminiChunk): string {
139
+ return chunk.candidates?.[0]?.content?.parts?.filter(p => p.thought).map(p => p.text ?? "").join("") ?? "";
134
140
  }
135
141
 
136
142
  /** When Gemini returns HTTP 200 with no text, surface the real cause (safety block /
@@ -176,6 +182,8 @@ async function* ccaTurn(messages: Message[], options: CallOptions, credential: C
176
182
  }
177
183
  const inner = chunk.response;
178
184
  if (!inner) continue;
185
+ const thought = thoughtOf(inner);
186
+ if (thought) options.onReasoning?.(thought);
179
187
  const delta = textOf(inner);
180
188
  if (delta) {
181
189
  yieldedAny = true;
@@ -239,6 +247,8 @@ export const geminiAdapter: ProviderAdapter = {
239
247
  } catch {
240
248
  continue;
241
249
  }
250
+ const thought = thoughtOf(chunk);
251
+ if (thought) options.onReasoning?.(thought);
242
252
  const delta = textOf(chunk);
243
253
  if (delta) {
244
254
  yieldedAny = true;