jeo-code 0.4.6 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,6 +26,7 @@ import {
26
26
  validateSubagentDoneReason,
27
27
  } from "./subagents";
28
28
  import { thinkingMaxTokens } from "../ai/model-manager";
29
+ import type { SubagentRegistry } from "./subagent-registry";
29
30
 
30
31
  /** Lifecycle event emitted while a delegated subagent runs. */
31
32
  export interface TaskSubEvent {
@@ -41,6 +42,12 @@ export interface TaskSubEvent {
41
42
  summary?: string;
42
43
  /** Model selected for this subagent run. */
43
44
  model?: string;
45
+ /** 1-based task position within a fan-out batch (omitted for single-task runs). */
46
+ index?: number;
47
+ /** Total tasks in the fan-out batch (omitted for single-task runs). */
48
+ total?: number;
49
+ /** Provider token usage for the finished subagent (done events only). */
50
+ tokens?: { input: number; output: number };
44
51
  }
45
52
 
46
53
  export interface TaskToolOptions {
@@ -51,18 +58,46 @@ export interface TaskToolOptions {
51
58
  signal?: AbortSignal;
52
59
  /** Optional live sink (e.g. plain-stream rendering of nested progress). */
53
60
  onEvent?: (ev: TaskSubEvent) => void;
54
- /** Mid-turn steering drain (gjc parity). Forwarded to a SINGLE running subagent so
55
- * an additional user query typed while the subagent works reaches it live. While
56
- * the subagent runs the parent loop is blocked inside this tool call, so the
57
- * subagent is the only active drainer the message is not double-consumed.
58
- * Fan-out batches do NOT forward it (parallel drains would deliver to one arbitrary
59
- * subagent); pending steering stays for the parent after the batch returns. */
61
+ /** Mid-turn steering drain (gjc parity): an additional user query typed while a
62
+ * subagent works is forwarded live. Single-task runs and the SERIAL executor
63
+ * batch (concurrency 1) forward to the one active subagent. A parallel read-only
64
+ * batch routes through a broadcast hub (createSteerHub) so every running worker
65
+ * sees each message exactly once. Unconsumed messages stay for the parent. */
60
66
  steer?: () => string[];
67
+ /** When present, a `task` call with `detached: true` registers a background run
68
+ * here and returns immediately; the parent controls it via the `subagent` tool. */
69
+ registry?: SubagentRegistry;
61
70
  }
62
71
 
63
72
  /** Max concurrent read-only subagents in a fan-out batch. */
64
73
  const MAX_FANOUT = 4;
65
74
 
75
+ /** Hard cap on a SERIAL (mutating executor) fan-out batch: it runs one task at a
76
+ * time inside one blocking tool call, so an unbounded queue would monopolize the
77
+ * parent turn. Split larger efforts into sequential task calls. */
78
+ const MAX_SERIAL_EXECUTOR = 6;
79
+
80
+ /** Broadcast steering hub for a fan-out batch. Each concurrent worker registers
81
+ * ONCE and then sees every parent steer message exactly once (append-only log +
82
+ * per-worker cursor), so a mid-batch redirect reaches all running subagents
83
+ * without the double-consume hazard of several workers draining one inbox. */
84
+ function createSteerHub(drain?: () => string[]) {
85
+ const log: string[] = [];
86
+ return {
87
+ worker(): (() => string[]) | undefined {
88
+ if (!drain) return undefined;
89
+ let cursor = 0;
90
+ return () => {
91
+ const fresh = drain();
92
+ if (fresh.length) log.push(...fresh);
93
+ const out = log.slice(cursor);
94
+ cursor = log.length;
95
+ return out;
96
+ };
97
+ },
98
+ };
99
+ }
100
+
66
101
  /** One-line protocol description appended to the launch system prompt. Pass a
67
102
  * config so CONFIG-DECLARED custom roles are advertised to the model too. */
68
103
  export function taskToolProtocolLine(config?: Pick<Config, "subagents">): string {
@@ -141,14 +176,26 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
141
176
  taskText: string,
142
177
  context: string,
143
178
  cwd: string,
144
- steer?: () => string[],
179
+ extra: {
180
+ steer?: () => string[];
181
+ slot?: { index: number; total: number };
182
+ projectContext?: Awaited<ReturnType<typeof loadProjectContext>>;
183
+ /** Overrides opts.signal — a detached run uses its own registry signal so it
184
+ * is cancellable independently of the parent turn. */
185
+ signal?: AbortSignal;
186
+ } = {},
145
187
  ): Promise<ToolResult> => {
188
+ const { steer, slot, projectContext: preloadedContext, signal: signalOverride } = extra;
189
+ // Tag every live event with its fan-out slot so a parent monitor can tell
190
+ // task 1 from task 3 when several same-role subagents stream concurrently.
191
+ const emit = (ev: TaskSubEvent) =>
192
+ opts.onEvent?.(slot ? { ...ev, index: slot.index, total: slot.total } : ev);
146
193
  const model = resolveSubagentModel(role.id, opts.config);
147
194
  const maxSteps = resolveSubagentMaxSteps(role.id, opts.config);
148
195
  // gjc parity: a role may pin its own reasoning budget; absent = inherit the
149
196
  // session/global thinking level (the "(inherit)" row in the picker).
150
197
  const thinking = resolveSubagentThinking(role.id, opts.config) ?? opts.config.thinkingLevel;
151
- const projectContext = await loadProjectContext(cwd);
198
+ const projectContext = preloadedContext ?? await loadProjectContext(cwd);
152
199
  const history: Message[] = [
153
200
  { role: "system", content: withProjectContext(subagentSystemPrompt(role), projectContext) },
154
201
  { role: "user", content: `${taskText}${context}` },
@@ -157,10 +204,13 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
157
204
  let lastTarget = "";
158
205
  let currentStep = 0;
159
206
  // Round-8 (architect ref 7-Round7Workflow): count the subagent's SUCCESSFUL
160
- // mutating calls so the parent can audit a "Changed Files:" claim against
161
- // observed reality instead of trusting the report's substring markers.
162
- let mutationsOk = 0;
163
- opts.onEvent?.({ role: role.id, kind: "start", detail: taskText, maxSteps, model });
207
+ // calls so the parent can audit a "Changed Files:" claim against observed
208
+ // reality. File-writing tools (write/edit/mkdir/delete) are tracked apart from
209
+ // bash: read-only bash (e.g. `bun test`) MUST NOT count as edit evidence, but
210
+ // bash CAN mutate, so the audit message distinguishes the two cases.
211
+ let fileMutations = 0;
212
+ let bashRuns = 0;
213
+ emit({ role: role.id, kind: "start", detail: taskText, maxSteps, model });
164
214
  const result = await runAgentLoop(history, {
165
215
  cwd,
166
216
  model,
@@ -169,7 +219,7 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
169
219
  // Bounded delegation: a subagent's step contract stays exact — the parent
170
220
  // owns any retry/extension decision, so the gjc retry flow is disabled here.
171
221
  budget: { maxExtensions: 0 },
172
- signal: opts.signal,
222
+ signal: signalOverride ?? opts.signal,
173
223
  steer,
174
224
  tools: subagentToolset(role),
175
225
  events: {
@@ -178,38 +228,45 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
178
228
  if (invocation && invocation.tool && invocation.tool !== "done") {
179
229
  lastTarget = toolTarget(invocation.tool, invocation.arguments);
180
230
  trace.push(` step ${currentStep}/${maxSteps}: ${lastTarget}`);
181
- opts.onEvent?.({ role: role.id, kind: "step", detail: lastTarget, step: currentStep, maxSteps, model });
231
+ emit({ role: role.id, kind: "step", detail: lastTarget, step: currentStep, maxSteps, model });
182
232
  }
183
233
  },
184
234
  onToolResult: (tool, success, output) => {
185
- if (success && (tool === "write" || tool === "edit" || tool === "bash")) mutationsOk++;
235
+ if (success) {
236
+ if (tool === "write" || tool === "edit" || tool === "mkdir" || tool === "delete") fileMutations++;
237
+ else if (tool === "bash") bashRuns++;
238
+ }
186
239
  const label = lastTarget || tool;
187
240
  const summary = firstUsefulLine(output);
188
241
  const suffix = summary ? ` — ${summary}` : "";
189
242
  trace.push(` ${success ? "✓" : "✗"} ${label}${suffix}`);
190
- opts.onEvent?.({ role: role.id, kind: "tool", detail: label, success, summary, step: currentStep, maxSteps, model });
243
+ emit({ role: role.id, kind: "tool", detail: label, success, summary, step: currentStep, maxSteps, model });
191
244
  lastTarget = "";
192
245
  },
193
246
  // Retry notices (rate-limit backoff etc.) surface as live "step" beats so the
194
247
  // parent's monitor shows WHY a subagent is pausing instead of going silent.
195
- onNotice: msg => opts.onEvent?.({ role: role.id, kind: "step", detail: msg, step: currentStep, maxSteps, model }),
248
+ onNotice: msg => emit({ role: role.id, kind: "step", detail: msg, step: currentStep, maxSteps, model }),
196
249
  // Mid-turn steering reached this subagent: surface it as a live beat so the
197
250
  // parent's monitor shows the redirect instead of an unexplained behavior change.
198
- onSteer: text => opts.onEvent?.({ role: role.id, kind: "step", detail: `↳ steer: ${text}`, step: currentStep, maxSteps, model }),
251
+ onSteer: text => emit({ role: role.id, kind: "step", detail: `↳ steer: ${text}`, step: currentStep, maxSteps, model }),
199
252
  },
200
253
  });
201
254
  const reason = result.doneReason?.trim() || `(subagent reached the ${result.steps}-step limit without signaling done)`;
202
255
  const validation = validateSubagentDoneReason(role, reason);
203
256
  const complete = result.done && validation.ok;
204
257
  const detail = validation.ok ? reason : `${reason}\n\n[contract incomplete: missing ${validation.missing?.join(", ")}]`;
205
- opts.onEvent?.({ role: role.id, kind: "done", detail, success: complete, step: result.steps, maxSteps, model });
206
- const header = `[${role.title} subagent] ${complete ? "completed" : "stopped"} in ${result.steps} step(s) on ${model}.`;
258
+ emit({ role: role.id, kind: "done", detail, success: complete, step: result.steps, maxSteps, model, tokens: result.usage ? { input: result.usage.inputTokens, output: result.usage.outputTokens } : undefined });
259
+ const tokNote = result.usage ? `, ${result.usage.inputTokens + result.usage.outputTokens} tok` : "";
260
+ const header = `[${role.title} subagent] ${complete ? "completed" : "stopped"} in ${result.steps} step(s) on ${model}${tokNote}.`;
207
261
  const body = trace.length ? `\nSteps:\n${trace.join("\n")}` : "";
208
- // Parent-side audit: a mutating role that "completed" without ONE successful
209
- // write/edit/bash cannot have changed anything — flag the claim as unverified
210
- // (the report's markers prove formatting, not work).
211
- const audit = complete && !role.readOnly && mutationsOk === 0
212
- ? `\n[parent audit] No successful write/edit/bash was observed in this run — treat any "Changed Files:" claims above as UNVERIFIED.`
262
+ // Parent-side audit: a mutating role that "completed" without a successful file
263
+ // mutation (write/edit/mkdir/delete) likely changed nothing — flag the claim.
264
+ // bash is tracked separately: it CAN mutate, so an only-bash run downgrades to
265
+ // "verify independently" instead of the stronger UNVERIFIED.
266
+ const audit = complete && !role.readOnly && fileMutations === 0
267
+ ? bashRuns === 0
268
+ ? `\n[parent audit] No successful write/edit/bash was observed in this run — treat any "Changed Files:" claims above as UNVERIFIED.`
269
+ : `\n[parent audit] No successful write/edit was observed (only bash ran); bash may or may not have mutated files — verify any "Changed Files:" claims above independently.`
213
270
  : "";
214
271
  return { success: complete, output: `${header}${body}\n\nResult:\n${fenceSubagentReport(detail)}${audit}` };
215
272
  };
@@ -237,6 +294,18 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
237
294
  if (items.length === 0) {
238
295
  return { success: false, output: "", error: "task fan-out requires a non-empty 'tasks' array of assignments." };
239
296
  }
297
+ // #5: the mutating executor fan-out is SERIAL (concurrency 1) and blocks the
298
+ // turn; cap it regardless of justification so a huge queue can't monopolize
299
+ // the parent. Split larger efforts into sequential task calls.
300
+ if (!role.readOnly && items.length > MAX_SERIAL_EXECUTOR) {
301
+ return {
302
+ success: false,
303
+ output: "",
304
+ error:
305
+ `Executor fan-out of ${items.length} exceeds the serial cap of ${MAX_SERIAL_EXECUTOR}. ` +
306
+ `The mutating executor runs one task at a time and blocks the turn — split into ≤${MAX_SERIAL_EXECUTOR}-task batches or sequential task calls.`,
307
+ };
308
+ }
240
309
  // Spawn-gate lite (plan/gjc-inheritance.md B9, gjc spawn-gate 계승): a batch
241
310
  // wider than MAX_FANOUT is refused BEFORE any subagent launches unless the
242
311
  // model justifies the parallelism — silent capping hid the cost decision.
@@ -257,13 +326,22 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
257
326
  // Read-only roles fan out concurrently (bounded). The mutating executor is serialized
258
327
  // (concurrency 1) so parallel subagents can't race on the same files.
259
328
  const limit = role.readOnly ? Math.min(items.length, MAX_FANOUT) : 1;
329
+ // Load project context ONCE per batch instead of re-scanning AGENTS.md for
330
+ // every fan-out task (redundant IO + duplicated tokens).
331
+ const batchContext = await loadProjectContext(cwd);
260
332
  const results: ToolResult[] = new Array(items.length);
261
333
  let next = 0;
334
+ // #7: broadcast steering hub — each concurrent worker sees every parent
335
+ // steer message exactly once (safe even for parallel read-only fan-out).
336
+ const steerHub = createSteerHub(opts.steer);
262
337
  const worker = async () => {
338
+ // One steer cursor per concurrent worker (not per item) so a worker that
339
+ // processes several items sees each parent message once across them all.
340
+ const workerSteer = steerHub.worker();
263
341
  while (true) {
264
342
  const i = next++;
265
343
  if (i >= items.length) return;
266
- results[i] = await runOne(role, items[i]!.task, items[i]!.context, cwd);
344
+ results[i] = await runOne(role, items[i]!.task, items[i]!.context, cwd, { slot: { index: i + 1, total: items.length }, projectContext: batchContext, steer: workerSteer });
267
345
  }
268
346
  };
269
347
  await Promise.all(Array.from({ length: limit }, () => worker()));
@@ -279,6 +357,20 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
279
357
  if (!taskText) {
280
358
  return { success: false, output: "", error: `task tool requires a non-empty 'task' (or a 'tasks' array). Valid roles: ${subagentRoleIds(opts.config).join(", ")}.` };
281
359
  }
282
- return runOne(role, taskText, ctx(args.context), cwd, opts.steer);
360
+ // Detached form (#9): register a background run and return immediately so the
361
+ // parent can keep working, then list/inspect/await/cancel via the `subagent`
362
+ // tool. Steering is not forwarded to a detached run (no single active drainer).
363
+ if (args.detached === true && opts.registry) {
364
+ const rec = opts.registry.launch(role.id, taskText, signal =>
365
+ runOne(role, taskText, ctx(args.context), cwd, { signal }),
366
+ );
367
+ return {
368
+ success: true,
369
+ output:
370
+ `[detached] launched ${role.title} subagent '${rec.id}'. It runs in the background — ` +
371
+ `keep working, then use the 'subagent' tool ({action:"await"|"list"|"inspect"|"cancel", ids?}) to collect its result.`,
372
+ };
373
+ }
374
+ return runOne(role, taskText, ctx(args.context), cwd, { steer: opts.steer });
283
375
  };
284
376
  }
@@ -0,0 +1,115 @@
1
+ /**
2
+ * Tool-result output handling — the model-visible output budget, both-ends
3
+ * truncation, recoverable artifact spilling, and the minimize→truncate→spill
4
+ * orchestration the agent loop applies to every tool result.
5
+ *
6
+ * Extracted from `engine.ts` (single-responsibility: the loop drives steps; this
7
+ * module owns how a tool's raw output is shaped before it re-enters context).
8
+ * `engine.ts` re-exports the public surface for backward compatibility.
9
+ */
10
+ import * as fs from "node:fs/promises";
11
+ import * as path from "node:path";
12
+ import { jeoEnv } from "../util/env";
13
+ import { minimizeToolOutput } from "./output-minimizer";
14
+
15
+ /** Env-tunable output budget (plan/gjc-inheritance.md B10, gjc settings-driven
16
+ * output handling 계승): JEO_TOOL_OUTPUT_MAX caps the model-visible tool result;
17
+ * the spill threshold tracks it so anything truncated stays artifact-recoverable. */
18
+ function envOutputMax(): number {
19
+ const raw = Number(jeoEnv("TOOL_OUTPUT_MAX") ?? "");
20
+ return Number.isFinite(raw) && raw >= 500 && raw <= 200_000 ? Math.trunc(raw) : 4_000;
21
+ }
22
+ export const TOOL_OUTPUT_MAX = envOutputMax();
23
+
24
+ /** Read results are deliberate, contiguous file slices the model explicitly asked
25
+ * for (via lineRange), already line-capped by the read tool and recoverable via
26
+ * spill. They get a much larger model-visible budget than the generic
27
+ * noise-control cap, so a 500-line read is not silently re-shrunk to ~100 lines.
28
+ * JEO_READ_OUTPUT_MAX overrides (1k..200k). */
29
+ function envReadOutputMax(): number {
30
+ const raw = Number(jeoEnv("READ_OUTPUT_MAX") ?? "");
31
+ return Number.isFinite(raw) && raw >= 1_000 && raw <= 200_000 ? Math.trunc(raw) : 32_000;
32
+ }
33
+ export const READ_OUTPUT_MAX = envReadOutputMax();
34
+
35
+ /**
36
+ * Cap a tool result fed back to the model. Default mode keeps both ends: the head
37
+ * holds the start (e.g. a command's invocation) and the tail holds what's usually
38
+ * decisive (test summaries, the final error). A pure head-cut loses that.
39
+ *
40
+ * `headOnly` truncates from the front only — for `read` results, which are a
41
+ * contiguous file slice the model explicitly requested; head/tail splitting would
42
+ * mangle the code into two non-adjacent fragments.
43
+ */
44
+ export function truncateToolOutput(s: string, max = TOOL_OUTPUT_MAX, headOnly = false): string {
45
+ if (s.length <= max) return s;
46
+ if (headOnly) {
47
+ return `${s.slice(0, max)}\n…(${s.length - max} chars truncated; narrow the lineRange or read the spilled artifact)…`;
48
+ }
49
+ const head = Math.floor(max * 0.6);
50
+ const tail = max - head;
51
+ return `${s.slice(0, head)}\n…(${s.length - max} chars truncated)…\n${s.slice(s.length - tail)}`;
52
+ }
53
+
54
+ /** Non-read tool output larger than this is spilled to a recoverable artifact file.
55
+ * Aligned with `truncateToolOutput`'s generic cap so that whenever the model-visible
56
+ * result drops content, the full output is recoverable via the artifact. (`read`
57
+ * spills against the larger READ_OUTPUT_MAX in the result loop.) */
58
+ export const TOOL_SPILL_THRESHOLD = TOOL_OUTPUT_MAX;
59
+
60
+ /** Most recent tool-result artifacts to keep; older ones are pruned on each spill. */
61
+ export const MAX_TOOL_ARTIFACTS = 50;
62
+
63
+ /** Best-effort retention: keep the newest `MAX_TOOL_ARTIFACTS` files in `dir`, delete the rest. */
64
+ async function pruneToolArtifacts(dir: string): Promise<void> {
65
+ const files = await fs.readdir(dir).catch(() => [] as string[]);
66
+ if (files.length <= MAX_TOOL_ARTIFACTS) return;
67
+ const stamped = await Promise.all(
68
+ files.map(async f => ({ f, m: (await fs.stat(path.join(dir, f)).catch(() => null))?.mtimeMs ?? 0 })),
69
+ );
70
+ stamped.sort((a, b) => b.m - a.m); // newest first
71
+ for (const { f } of stamped.slice(MAX_TOOL_ARTIFACTS)) {
72
+ await fs.rm(path.join(dir, f), { force: true }).catch(() => {});
73
+ }
74
+ }
75
+
76
+ /**
77
+ * Write an oversized tool result verbatim under `.jeo/artifacts/tool-results/` and
78
+ * return the workspace-relative path (for the model to `read`). Best-effort: throws
79
+ * are caught by the caller, which simply omits the artifact note.
80
+ */
81
+ export async function spillToolResult(tool: string, output: string, cwd: string): Promise<string> {
82
+ const dir = path.join(cwd, ".jeo", "artifacts", "tool-results");
83
+ await fs.mkdir(dir, { recursive: true });
84
+ const safeTool = tool.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 32) || "tool";
85
+ const stamp = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
86
+ const rel = path.join(".jeo", "artifacts", "tool-results", `${stamp}-${safeTool}.txt`);
87
+ await fs.writeFile(path.join(cwd, rel), output, "utf-8");
88
+ // Retention so a long session can't grow the artifact dir without bound.
89
+ await pruneToolArtifacts(dir);
90
+ return rel;
91
+ }
92
+
93
+ /**
94
+ * Shape one tool's raw output into the model-visible result body: strip runner
95
+ * noise (minimize), cap to the per-tool budget (`read` gets the larger read budget
96
+ * and a head-only cut), and spill the full output to a recoverable artifact when it
97
+ * exceeds the budget. Behavior-identical to the inline logic it replaces in
98
+ * `runAgentLoop`.
99
+ */
100
+ export async function formatToolResultBody(tool: string, rawOutput: string, cwd: string): Promise<string> {
101
+ const visible = minimizeToolOutput(rawOutput, tool).text;
102
+ // `read` is a deliberate, contiguous file slice: give it the larger read budget
103
+ // and truncate head-only (head/tail splitting mangles code). Other tools keep the
104
+ // generic noise-control cap + both-ends truncation.
105
+ const isReadResult = tool === "read";
106
+ const outputBudget = isReadResult ? READ_OUTPUT_MAX : TOOL_OUTPUT_MAX;
107
+ let body = truncateToolOutput(visible, outputBudget, isReadResult);
108
+ if (rawOutput.length > outputBudget) {
109
+ const artifact = await spillToolResult(tool, rawOutput, cwd).catch(() => null);
110
+ if (artifact) {
111
+ body += `\n[full output (${rawOutput.length} chars) saved to ${artifact} — read it for the truncated remainder]`;
112
+ }
113
+ }
114
+ return body;
115
+ }
@@ -3,6 +3,7 @@ import * as fs from "node:fs/promises";
3
3
  import * as path from "node:path";
4
4
  import { readWorkflowState, readWorkflowStateStrict, type WorkflowState } from "./state";
5
5
  import { jeoEnv } from "../util/env";
6
+ import { READ_OUTPUT_MAX } from "./tool-output";
6
7
 
7
8
  /** Read the deep-interview lock; on corrupt state fail CLOSED (treat as active lock). */
8
9
  async function readMutationLock(cwd: string): Promise<WorkflowState | null> {
@@ -291,10 +292,27 @@ export async function readTool(
291
292
  return { success: true, output: out.join("\n") };
292
293
  }
293
294
 
294
- const MAX_LINES = 500;
295
- const annotated = lines.slice(0, MAX_LINES).map((l, i) => `${i + 1}${lineAnchor(l)}|${l}`).join("\n");
296
- if (lines.length > MAX_LINES) {
297
- const notice = `\n…(showing lines 1-${MAX_LINES} of ${lines.length}; pass lineRange "${MAX_LINES + 1}-" to read the rest)`;
295
+ // Default (no lineRange): fill the model-visible read budget with WHOLE lines
296
+ // instead of a fixed 500-line cap that left half the 32k budget unused and forced
297
+ // needless pagination (the read tool's biggest "reads too little per call" pain).
298
+ // READ_OUTPUT_MAX is the real cap; a hard line ceiling (JEO_READ_MAX_LINES) guards
299
+ // pathological files, and a small reserve keeps the pagination notice inside the
300
+ // budget so it is never trimmed by the downstream head-only truncation.
301
+ const HARD_LINE_CEILING = Math.max(500, Number(jeoEnv("READ_MAX_LINES") ?? "") || 5000);
302
+ const charBudget = Math.max(1_000, READ_OUTPUT_MAX - 256);
303
+ const shownLines: string[] = [];
304
+ let usedChars = 0;
305
+ for (let i = 0; i < lines.length && shownLines.length < HARD_LINE_CEILING; i++) {
306
+ const annotatedLine = `${i + 1}${lineAnchor(lines[i]!)}|${lines[i]}`;
307
+ const cost = annotatedLine.length + 1; // + newline
308
+ if (shownLines.length > 0 && usedChars + cost > charBudget) break; // always emit ≥1 line
309
+ shownLines.push(annotatedLine);
310
+ usedChars += cost;
311
+ }
312
+ const annotated = shownLines.join("\n");
313
+ if (shownLines.length < lines.length) {
314
+ const shown = shownLines.length;
315
+ const notice = `\n…(showing lines 1-${shown} of ${lines.length}; pass lineRange "${shown + 1}-" to read the rest)`;
298
316
  return { success: true, output: annotated + notice };
299
317
  }
300
318
  return { success: true, output: annotated };
@@ -574,7 +592,8 @@ export async function bashTool(
574
592
  cwd: string = process.cwd(),
575
593
  timeoutMs: number = 120_000,
576
594
  subdir?: string,
577
- env?: Record<string, string>
595
+ env?: Record<string, string>,
596
+ onProgress?: (partialOutput: string) => void,
578
597
  ): Promise<ToolResult> {
579
598
  if (jeoEnv("BASH_FIXUPS") === "1") {
580
599
  const fx = applyBashFixups(command);
@@ -608,12 +627,27 @@ export async function bashTool(
608
627
  killTimer = setTimeout(() => { try { proc.kill(9); } catch {} }, 3_000);
609
628
  }, TIMEOUT_MS);
610
629
 
630
+ // Stream stdout incrementally when a progress sink is attached (drives the live
631
+ // DIMMED bash output view); read stderr fully in parallel. Without a sink, fall
632
+ // back to a single post-exit read (identical content, no streaming overhead).
633
+ const stderrPromise = new Response(proc.stderr).text();
634
+ let stdout = "";
635
+ if (onProgress) {
636
+ const decoder = new TextDecoder();
637
+ let lastEmit = 0;
638
+ for await (const chunk of proc.stdout as unknown as AsyncIterable<Uint8Array>) {
639
+ stdout += decoder.decode(chunk, { stream: true });
640
+ const now = Date.now();
641
+ if (now - lastEmit >= 80) { lastEmit = now; onProgress(stdout); }
642
+ }
643
+ stdout += decoder.decode();
644
+ onProgress(stdout);
645
+ }
611
646
  await proc.exited;
612
647
  clearTimeout(timer);
613
648
  if (killTimer) clearTimeout(killTimer);
614
-
615
- const stdout = await new Response(proc.stdout).text();
616
- const stderr = await new Response(proc.stderr).text();
649
+ if (!onProgress) stdout = await new Response(proc.stdout).text();
650
+ const stderr = await stderrPromise;
617
651
 
618
652
  let output = [stdout, stderr].filter(Boolean).join("\n");
619
653
  const MAX_OUTPUT = 100_000;
@@ -2,11 +2,7 @@ import { providerRegistry } from "./provider-registry";
2
2
  import { OAUTH_FLOW_REGISTRY } from "../auth/flows";
3
3
  import { readGlobalConfig } from "../agent/state";
4
4
  import { resolveCredential, type AuthProvider, type Credential } from "../auth";
5
- import { anthropicAdapter } from "./providers/anthropic";
6
- import { openaiAdapter } from "./providers/openai";
7
- import { geminiAdapter } from "./providers/gemini";
8
- import { ollamaAdapter } from "./providers/ollama";
9
- import { antigravityAdapter } from "./providers/antigravity";
5
+ import "./register-providers"; // side-effect: registers built-in adapters into providerRegistry
10
6
  import type { CallOptions, Message, ProviderAdapter, ProviderName } from "./types";
11
7
  import { expandAlias, resolveModelId, effectiveAliasesFor } from "./model-registry";
12
8
  import { findCatalogEntry, type ModelCatalogEntry } from "./model-catalog-compat";
@@ -16,12 +12,6 @@ import { jeoEnv } from "../util/env";
16
12
  import type { Config } from "../agent/state";
17
13
 
18
14
 
19
- // Initialize Provider Registry
20
- providerRegistry.register("anthropic", anthropicAdapter);
21
- providerRegistry.register("openai", openaiAdapter);
22
- providerRegistry.register("gemini", geminiAdapter);
23
- providerRegistry.register("antigravity", antigravityAdapter);
24
- providerRegistry.register("ollama", ollamaAdapter);
25
15
 
26
16
 
27
17
  export function resolveProvider(model: string): ProviderName {
@@ -315,6 +305,7 @@ async function resolveCall(options: Partial<CallOptions>, kind: "request" | "str
315
305
  onUsage: options.onUsage,
316
306
  signal: options.signal,
317
307
  reasoningEffort: options.reasoningEffort ?? thinkingToReasoningEffort(config.thinkingLevel),
308
+ onReasoning: options.onReasoning,
318
309
  };
319
310
  // Caller-supplied retry sink rides on the config-derived retry budget so the
320
311
  // engine/TUI can surface "rate limited — retrying in Ns" instead of a silent wait.
@@ -160,13 +160,18 @@ export function antigravityRequest(messages: Message[], options: CallOptions, cr
160
160
  type CcaUsage = { promptTokenCount?: number; candidatesTokenCount?: number; thoughtsTokenCount?: number };
161
161
  interface CcaChunk {
162
162
  response?: {
163
- candidates?: { content?: { parts?: { text?: string }[] }; finishReason?: string }[];
163
+ candidates?: { content?: { parts?: { text?: string; thought?: boolean }[] }; finishReason?: string }[];
164
164
  usageMetadata?: CcaUsage;
165
165
  };
166
166
  }
167
167
 
168
168
  function textOf(chunk: CcaChunk): string {
169
- return chunk.response?.candidates?.[0]?.content?.parts?.map(p => p.text ?? "").join("") ?? "";
169
+ return chunk.response?.candidates?.[0]?.content?.parts?.filter(p => !p.thought).map(p => p.text ?? "").join("") ?? "";
170
+ }
171
+
172
+ /** Native thinking text (`thought` parts) — kept separate so it never pollutes the JSON tool call. */
173
+ function thoughtOf(chunk: CcaChunk): string {
174
+ return chunk.response?.candidates?.[0]?.content?.parts?.filter(p => p.thought).map(p => p.text ?? "").join("") ?? "";
170
175
  }
171
176
 
172
177
  async function fetchAntigravity(messages: Message[], options: CallOptions, credential: Credential): Promise<Response> {
@@ -194,6 +199,8 @@ export const antigravityAdapter: ProviderAdapter = {
194
199
  for await (const data of readSse(response.body)) {
195
200
  let chunk: CcaChunk;
196
201
  try { chunk = JSON.parse(data); } catch { continue; }
202
+ const thought = thoughtOf(chunk);
203
+ if (thought) options.onReasoning?.(thought);
197
204
  out += textOf(chunk);
198
205
  if (chunk.response?.usageMetadata) usage = chunk.response.usageMetadata;
199
206
  }
@@ -209,6 +216,8 @@ export const antigravityAdapter: ProviderAdapter = {
209
216
  for await (const data of readSse(response.body)) {
210
217
  let chunk: CcaChunk;
211
218
  try { chunk = JSON.parse(data); } catch { continue; }
219
+ const thought = thoughtOf(chunk);
220
+ if (thought) options.onReasoning?.(thought);
212
221
  const delta = textOf(chunk);
213
222
  if (delta) { yielded = true; yield delta; }
214
223
  if (chunk.response?.usageMetadata) usage = chunk.response.usageMetadata;
@@ -119,7 +119,7 @@ export function geminiCliRequest(messages: Message[], options: CallOptions, acce
119
119
  }
120
120
 
121
121
  interface GeminiChunk {
122
- candidates?: { content?: { parts?: { text?: string }[] }; finishReason?: string }[];
122
+ candidates?: { content?: { parts?: { text?: string; thought?: boolean }[] }; finishReason?: string }[];
123
123
  promptFeedback?: { blockReason?: string };
124
124
  usageMetadata?: { promptTokenCount?: number; candidatesTokenCount?: number; thoughtsTokenCount?: number };
125
125
  }
@@ -130,7 +130,13 @@ interface CcaChunk {
130
130
  }
131
131
 
132
132
  function textOf(chunk: GeminiChunk): string {
133
- return chunk.candidates?.[0]?.content?.parts?.map(p => p.text ?? "").join("") ?? "";
133
+ return chunk.candidates?.[0]?.content?.parts?.filter(p => !p.thought).map(p => p.text ?? "").join("") ?? "";
134
+ }
135
+
136
+ /** Native thinking text (`thought` parts), present only when the model emits thought
137
+ * summaries. Kept SEPARATE from textOf so thoughts never pollute the JSON tool call. */
138
+ function thoughtOf(chunk: GeminiChunk): string {
139
+ return chunk.candidates?.[0]?.content?.parts?.filter(p => p.thought).map(p => p.text ?? "").join("") ?? "";
134
140
  }
135
141
 
136
142
  /** When Gemini returns HTTP 200 with no text, surface the real cause (safety block /
@@ -176,6 +182,8 @@ async function* ccaTurn(messages: Message[], options: CallOptions, credential: C
176
182
  }
177
183
  const inner = chunk.response;
178
184
  if (!inner) continue;
185
+ const thought = thoughtOf(inner);
186
+ if (thought) options.onReasoning?.(thought);
179
187
  const delta = textOf(inner);
180
188
  if (delta) {
181
189
  yieldedAny = true;
@@ -239,6 +247,8 @@ export const geminiAdapter: ProviderAdapter = {
239
247
  } catch {
240
248
  continue;
241
249
  }
250
+ const thought = thoughtOf(chunk);
251
+ if (thought) options.onReasoning?.(thought);
242
252
  const delta = textOf(chunk);
243
253
  if (delta) {
244
254
  yieldedAny = true;
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Built-in provider registration (the registry bootstrap).
3
+ *
4
+ * Importing this module for its side effect registers every bundled LLM adapter
5
+ * into the shared `providerRegistry`. `model-manager` then resolves adapters
6
+ * through the registry alone — it no longer imports, or even names, concrete
7
+ * providers. To add a new built-in provider, register it HERE only; nothing in
8
+ * `model-manager` changes.
9
+ */
10
+ import { providerRegistry } from "./provider-registry";
11
+ import { anthropicAdapter } from "./providers/anthropic";
12
+ import { openaiAdapter } from "./providers/openai";
13
+ import { geminiAdapter } from "./providers/gemini";
14
+ import { ollamaAdapter } from "./providers/ollama";
15
+ import { antigravityAdapter } from "./providers/antigravity";
16
+
17
+ providerRegistry.register("anthropic", anthropicAdapter);
18
+ providerRegistry.register("openai", openaiAdapter);
19
+ providerRegistry.register("gemini", geminiAdapter);
20
+ providerRegistry.register("antigravity", antigravityAdapter);
21
+ providerRegistry.register("ollama", ollamaAdapter);
package/src/ai/types.ts CHANGED
@@ -43,6 +43,10 @@ export interface CallOptions {
43
43
  /** Notified before each auto-retry backoff wait (rate limits / transient errors).
44
44
  * NOT forwarded to provider adapters — consumed by the manager's retry layer. */
45
45
  onRetry?: (attempt: number, err: unknown, delayMs: number) => void;
46
+ /** Streaming sink for native model reasoning/thinking text deltas (separate from the
47
+ * answer text). Surfaced as a transient dimmed view; absent for models that emit no
48
+ * thought text. */
49
+ onReasoning?: (delta: string) => void;
46
50
  }
47
51
 
48
52
  export interface ProviderAdapter {
package/src/cli/runner.ts CHANGED
@@ -172,15 +172,6 @@ export const COMMANDS: readonly CommandSpec[] = [
172
172
  return args => m.runUpdateCommand(args);
173
173
  },
174
174
  },
175
- {
176
- name: "gjc",
177
- summary: "Run the gjc workflow skill as an autonomous build loop (plan → implement → verify).",
178
- usage: "gjc \"<intent>\"",
179
- loader: async () => {
180
- const m = await import("../commands/gjc");
181
- return args => m.runGjcCommand(args);
182
- },
183
- },
184
175
  {
185
176
  name: "ooo-seed",
186
177
  summary: "Generate an immutable ooo seed from a specification (spec-first automation).",