npm - jeo-code - Versions diffs - 0.4.5 → 0.4.7 - Mend

jeo-code 0.4.5 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/README.ja.md +2 -2
package/README.ko.md +2 -2
package/README.md +2 -2
package/README.zh.md +2 -2
package/package.json +1 -1
package/src/agent/dev/evolution-bridge.ts +36 -3
package/src/agent/dev/self-analysis.ts +6 -1
package/src/agent/engine.ts +76 -71
package/src/agent/loop.ts +2 -0
package/src/agent/step-budget.ts +10 -0
package/src/agent/subagent-registry.ts +131 -0
package/src/agent/subagent-tool.ts +89 -0
package/src/agent/subagents.ts +22 -3
package/src/agent/task-tool.ts +123 -19
package/src/agent/tool-output.ts +115 -0
package/src/agent/tools.ts +42 -8
package/src/ai/model-manager.ts +9 -14
package/src/ai/model-registry.ts +8 -3
package/src/ai/providers/antigravity.ts +11 -2
package/src/ai/providers/gemini.ts +12 -2
package/src/ai/register-providers.ts +21 -0
package/src/ai/types.ts +4 -0
package/src/cli/runner.ts +0 -9
package/src/commands/launch.ts +157 -52
package/src/commands/team.ts +13 -6
package/src/skills/catalog.ts +0 -2
package/src/tui/app.ts +131 -20
package/src/tui/components/forge.ts +25 -7
package/src/tui/components/input-box.ts +8 -3
package/src/tui/components/markdown-text.ts +10 -1
package/src/tui/components/themes.ts +57 -1
package/src/tui/components/todo-card.ts +44 -13
package/src/tui/monitoring/hud-view.ts +53 -30
package/src/util/update-check.ts +53 -0
package/src/commands/gjc.ts +0 -52
package/src/prompts/skills/gjc/AGENTS.md +0 -31
package/src/prompts/skills/gjc/SKILL.md +0 -15

package/src/agent/subagents.ts CHANGED Viewed

@@ -178,20 +178,39 @@ function renderRolePrompt(template: string, role: SubagentRole): string {
     .trim();
 }
+/** True when `marker` is present in `text` AND the span between it and the next
+ *  required marker (or end of text) carries non-whitespace content. A label-only
+ *  section ("Summary:" with an empty body) is not a real report, so it fails. */
+function markerHasContent(text: string, marker: string, allMarkers: string[]): boolean {
+  const start = text.indexOf(marker);
+  if (start < 0) return false;
+  const after = start + marker.length;
+  let end = text.length;
+  for (const other of allMarkers) {
+    if (other === marker) continue;
+    const j = text.indexOf(other, after);
+    if (j >= 0 && j < end) end = j;
+  }
+  return text.slice(after, end).trim().length > 0;
+}
 export function validateSubagentDoneReason(role: SubagentRole, reason: string | undefined): { ok: boolean; missing?: string[] } {
   const trimmed = (reason ?? "").trim();
   if (!trimmed) return { ok: false, missing: ["done.reason"] };
+  const markers = role.requiredDoneMarkers ?? [];
+  // Each required section must be PRESENT and carry non-empty content — a report of
+  // bare labels (no prose) is rejected, which the substring-presence check let pass.
+  const sectionMissing = markers.filter(m => !markerHasContent(trimmed, m, markers));
   if (role.id === "critic") {
     const verdicts = ["[OKAY]", "[ITERATE]", "[REJECT]"];
     const hasVerdict = verdicts.some(marker => trimmed.startsWith(marker));
     const missing = [
       ...(hasVerdict ? [] : ["[OKAY]|[ITERATE]|[REJECT]"]),
-      ...((role.requiredDoneMarkers ?? []).filter(marker => !trimmed.includes(marker))),
+      ...sectionMissing,
     ];
     return { ok: missing.length === 0, missing };
   }
-  const missing = (role.requiredDoneMarkers ?? []).filter(marker => !trimmed.includes(marker));
-  return { ok: missing.length === 0, missing };
+  return { ok: sectionMissing.length === 0, missing: sectionMissing };
 }
 /** Build a role-specific system prompt from its dedicated template. */

package/src/agent/task-tool.ts CHANGED Viewed

@@ -26,6 +26,7 @@ import {
   validateSubagentDoneReason,
 } from "./subagents";
 import { thinkingMaxTokens } from "../ai/model-manager";
+import type { SubagentRegistry } from "./subagent-registry";
 /** Lifecycle event emitted while a delegated subagent runs. */
 export interface TaskSubEvent {
@@ -41,6 +42,12 @@ export interface TaskSubEvent {
   summary?: string;
   /** Model selected for this subagent run. */
   model?: string;
+  /** 1-based task position within a fan-out batch (omitted for single-task runs). */
+  index?: number;
+  /** Total tasks in the fan-out batch (omitted for single-task runs). */
+  total?: number;
+  /** Provider token usage for the finished subagent (done events only). */
+  tokens?: { input: number; output: number };
 }
 export interface TaskToolOptions {
@@ -51,11 +58,46 @@ export interface TaskToolOptions {
   signal?: AbortSignal;
   /** Optional live sink (e.g. plain-stream rendering of nested progress). */
   onEvent?: (ev: TaskSubEvent) => void;
+  /** Mid-turn steering drain (gjc parity): an additional user query typed while a
+   *  subagent works is forwarded live. Single-task runs and the SERIAL executor
+   *  batch (concurrency 1) forward to the one active subagent. A parallel read-only
+   *  batch routes through a broadcast hub (createSteerHub) so every running worker
+   *  sees each message exactly once. Unconsumed messages stay for the parent. */
+  steer?: () => string[];
+  /** When present, a `task` call with `detached: true` registers a background run
+   *  here and returns immediately; the parent controls it via the `subagent` tool. */
+  registry?: SubagentRegistry;
 }
 /** Max concurrent read-only subagents in a fan-out batch. */
 const MAX_FANOUT = 4;
+/** Hard cap on a SERIAL (mutating executor) fan-out batch: it runs one task at a
+ *  time inside one blocking tool call, so an unbounded queue would monopolize the
+ *  parent turn. Split larger efforts into sequential task calls. */
+const MAX_SERIAL_EXECUTOR = 6;
+/** Broadcast steering hub for a fan-out batch. Each concurrent worker registers
+ *  ONCE and then sees every parent steer message exactly once (append-only log +
+ *  per-worker cursor), so a mid-batch redirect reaches all running subagents
+ *  without the double-consume hazard of several workers draining one inbox. */
+function createSteerHub(drain?: () => string[]) {
+  const log: string[] = [];
+  return {
+    worker(): (() => string[]) | undefined {
+      if (!drain) return undefined;
+      let cursor = 0;
+      return () => {
+        const fresh = drain();
+        if (fresh.length) log.push(...fresh);
+        const out = log.slice(cursor);
+        cursor = log.length;
+        return out;
+      };
+    },
+  };
+}
 /** One-line protocol description appended to the launch system prompt. Pass a
  *  config so CONFIG-DECLARED custom roles are advertised to the model too. */
 export function taskToolProtocolLine(config?: Pick<Config, "subagents">): string {
@@ -134,13 +176,26 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
     taskText: string,
     context: string,
     cwd: string,
+    extra: {
+      steer?: () => string[];
+      slot?: { index: number; total: number };
+      projectContext?: Awaited<ReturnType<typeof loadProjectContext>>;
+      /** Overrides opts.signal — a detached run uses its own registry signal so it
+       *  is cancellable independently of the parent turn. */
+      signal?: AbortSignal;
+    } = {},
   ): Promise<ToolResult> => {
+    const { steer, slot, projectContext: preloadedContext, signal: signalOverride } = extra;
+    // Tag every live event with its fan-out slot so a parent monitor can tell
+    // task 1 from task 3 when several same-role subagents stream concurrently.
+    const emit = (ev: TaskSubEvent) =>
+      opts.onEvent?.(slot ? { ...ev, index: slot.index, total: slot.total } : ev);
     const model = resolveSubagentModel(role.id, opts.config);
     const maxSteps = resolveSubagentMaxSteps(role.id, opts.config);
     // gjc parity: a role may pin its own reasoning budget; absent = inherit the
     // session/global thinking level (the "(inherit)" row in the picker).
     const thinking = resolveSubagentThinking(role.id, opts.config) ?? opts.config.thinkingLevel;
-    const projectContext = await loadProjectContext(cwd);
+    const projectContext = preloadedContext ?? await loadProjectContext(cwd);
     const history: Message[] = [
       { role: "system", content: withProjectContext(subagentSystemPrompt(role), projectContext) },
       { role: "user", content: `${taskText}${context}` },
@@ -149,10 +204,13 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
     let lastTarget = "";
     let currentStep = 0;
     // Round-8 (architect ref 7-Round7Workflow): count the subagent's SUCCESSFUL
-    // mutating calls so the parent can audit a "Changed Files:" claim against
-    // observed reality instead of trusting the report's substring markers.
-    let mutationsOk = 0;
-    opts.onEvent?.({ role: role.id, kind: "start", detail: taskText, maxSteps, model });
+    // calls so the parent can audit a "Changed Files:" claim against observed
+    // reality. File-writing tools (write/edit/mkdir/delete) are tracked apart from
+    // bash: read-only bash (e.g. `bun test`) MUST NOT count as edit evidence, but
+    // bash CAN mutate, so the audit message distinguishes the two cases.
+    let fileMutations = 0;
+    let bashRuns = 0;
+    emit({ role: role.id, kind: "start", detail: taskText, maxSteps, model });
     const result = await runAgentLoop(history, {
       cwd,
       model,
@@ -161,7 +219,8 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
       // Bounded delegation: a subagent's step contract stays exact — the parent
       // owns any retry/extension decision, so the gjc retry flow is disabled here.
       budget: { maxExtensions: 0 },
-      signal: opts.signal,
+      signal: signalOverride ?? opts.signal,
+      steer,
       tools: subagentToolset(role),
       events: {
         onStep: n => { currentStep = n; },
@@ -169,35 +228,45 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
           if (invocation && invocation.tool && invocation.tool !== "done") {
             lastTarget = toolTarget(invocation.tool, invocation.arguments);
             trace.push(`  step ${currentStep}/${maxSteps}: ${lastTarget}`);
-            opts.onEvent?.({ role: role.id, kind: "step", detail: lastTarget, step: currentStep, maxSteps, model });
+            emit({ role: role.id, kind: "step", detail: lastTarget, step: currentStep, maxSteps, model });
           }
         },
         onToolResult: (tool, success, output) => {
-          if (success && (tool === "write" || tool === "edit" || tool === "bash")) mutationsOk++;
+          if (success) {
+            if (tool === "write" || tool === "edit" || tool === "mkdir" || tool === "delete") fileMutations++;
+            else if (tool === "bash") bashRuns++;
+          }
           const label = lastTarget || tool;
           const summary = firstUsefulLine(output);
           const suffix = summary ? ` — ${summary}` : "";
           trace.push(`  ${success ? "✓" : "✗"} ${label}${suffix}`);
-          opts.onEvent?.({ role: role.id, kind: "tool", detail: label, success, summary, step: currentStep, maxSteps, model });
+          emit({ role: role.id, kind: "tool", detail: label, success, summary, step: currentStep, maxSteps, model });
           lastTarget = "";
         },
         // Retry notices (rate-limit backoff etc.) surface as live "step" beats so the
         // parent's monitor shows WHY a subagent is pausing instead of going silent.
-        onNotice: msg => opts.onEvent?.({ role: role.id, kind: "step", detail: msg, step: currentStep, maxSteps, model }),
+        onNotice: msg => emit({ role: role.id, kind: "step", detail: msg, step: currentStep, maxSteps, model }),
+        // Mid-turn steering reached this subagent: surface it as a live beat so the
+        // parent's monitor shows the redirect instead of an unexplained behavior change.
+        onSteer: text => emit({ role: role.id, kind: "step", detail: `↳ steer: ${text}`, step: currentStep, maxSteps, model }),
       },
     });
     const reason = result.doneReason?.trim() || `(subagent reached the ${result.steps}-step limit without signaling done)`;
     const validation = validateSubagentDoneReason(role, reason);
     const complete = result.done && validation.ok;
     const detail = validation.ok ? reason : `${reason}\n\n[contract incomplete: missing ${validation.missing?.join(", ")}]`;
-    opts.onEvent?.({ role: role.id, kind: "done", detail, success: complete, step: result.steps, maxSteps, model });
-    const header = `[${role.title} subagent] ${complete ? "completed" : "stopped"} in ${result.steps} step(s) on ${model}.`;
+    emit({ role: role.id, kind: "done", detail, success: complete, step: result.steps, maxSteps, model, tokens: result.usage ? { input: result.usage.inputTokens, output: result.usage.outputTokens } : undefined });
+    const tokNote = result.usage ? `, ${result.usage.inputTokens + result.usage.outputTokens} tok` : "";
+    const header = `[${role.title} subagent] ${complete ? "completed" : "stopped"} in ${result.steps} step(s) on ${model}${tokNote}.`;
     const body = trace.length ? `\nSteps:\n${trace.join("\n")}` : "";
-    // Parent-side audit: a mutating role that "completed" without ONE successful
-    // write/edit/bash cannot have changed anything — flag the claim as unverified
-    // (the report's markers prove formatting, not work).
-    const audit = complete && !role.readOnly && mutationsOk === 0
-      ? `\n[parent audit] No successful write/edit/bash was observed in this run — treat any "Changed Files:" claims above as UNVERIFIED.`
+    // Parent-side audit: a mutating role that "completed" without a successful file
+    // mutation (write/edit/mkdir/delete) likely changed nothing — flag the claim.
+    // bash is tracked separately: it CAN mutate, so an only-bash run downgrades to
+    // "verify independently" instead of the stronger UNVERIFIED.
+    const audit = complete && !role.readOnly && fileMutations === 0
+      ? bashRuns === 0
+        ? `\n[parent audit] No successful write/edit/bash was observed in this run — treat any "Changed Files:" claims above as UNVERIFIED.`
+        : `\n[parent audit] No successful write/edit was observed (only bash ran); bash may or may not have mutated files — verify any "Changed Files:" claims above independently.`
       : "";
     return { success: complete, output: `${header}${body}\n\nResult:\n${fenceSubagentReport(detail)}${audit}` };
   };
@@ -225,6 +294,18 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
       if (items.length === 0) {
         return { success: false, output: "", error: "task fan-out requires a non-empty 'tasks' array of assignments." };
       }
+      // #5: the mutating executor fan-out is SERIAL (concurrency 1) and blocks the
+      // turn; cap it regardless of justification so a huge queue can't monopolize
+      // the parent. Split larger efforts into sequential task calls.
+      if (!role.readOnly && items.length > MAX_SERIAL_EXECUTOR) {
+        return {
+          success: false,
+          output: "",
+          error:
+            `Executor fan-out of ${items.length} exceeds the serial cap of ${MAX_SERIAL_EXECUTOR}. ` +
+            `The mutating executor runs one task at a time and blocks the turn — split into ≤${MAX_SERIAL_EXECUTOR}-task batches or sequential task calls.`,
+        };
+      }
       // Spawn-gate lite (plan/gjc-inheritance.md B9, gjc spawn-gate 계승): a batch
       // wider than MAX_FANOUT is refused BEFORE any subagent launches unless the
       // model justifies the parallelism — silent capping hid the cost decision.
@@ -245,13 +326,22 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
       // Read-only roles fan out concurrently (bounded). The mutating executor is serialized
       // (concurrency 1) so parallel subagents can't race on the same files.
       const limit = role.readOnly ? Math.min(items.length, MAX_FANOUT) : 1;
+      // Load project context ONCE per batch instead of re-scanning AGENTS.md for
+      // every fan-out task (redundant IO + duplicated tokens).
+      const batchContext = await loadProjectContext(cwd);
       const results: ToolResult[] = new Array(items.length);
       let next = 0;
+      // #7: broadcast steering hub — each concurrent worker sees every parent
+      // steer message exactly once (safe even for parallel read-only fan-out).
+      const steerHub = createSteerHub(opts.steer);
       const worker = async () => {
+        // One steer cursor per concurrent worker (not per item) so a worker that
+        // processes several items sees each parent message once across them all.
+        const workerSteer = steerHub.worker();
         while (true) {
           const i = next++;
           if (i >= items.length) return;
-          results[i] = await runOne(role, items[i]!.task, items[i]!.context, cwd);
+          results[i] = await runOne(role, items[i]!.task, items[i]!.context, cwd, { slot: { index: i + 1, total: items.length }, projectContext: batchContext, steer: workerSteer });
         }
       };
       await Promise.all(Array.from({ length: limit }, () => worker()));
@@ -267,6 +357,20 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
     if (!taskText) {
       return { success: false, output: "", error: `task tool requires a non-empty 'task' (or a 'tasks' array). Valid roles: ${subagentRoleIds(opts.config).join(", ")}.` };
     }
-    return runOne(role, taskText, ctx(args.context), cwd);
+    // Detached form (#9): register a background run and return immediately so the
+    // parent can keep working, then list/inspect/await/cancel via the `subagent`
+    // tool. Steering is not forwarded to a detached run (no single active drainer).
+    if (args.detached === true && opts.registry) {
+      const rec = opts.registry.launch(role.id, taskText, signal =>
+        runOne(role, taskText, ctx(args.context), cwd, { signal }),
+      );
+      return {
+        success: true,
+        output:
+          `[detached] launched ${role.title} subagent '${rec.id}'. It runs in the background — ` +
+          `keep working, then use the 'subagent' tool ({action:"await"|"list"|"inspect"|"cancel", ids?}) to collect its result.`,
+      };
+    }
+    return runOne(role, taskText, ctx(args.context), cwd, { steer: opts.steer });
   };
 }

package/src/agent/tool-output.ts ADDED Viewed

@@ -0,0 +1,115 @@
+/**
+ * Tool-result output handling — the model-visible output budget, both-ends
+ * truncation, recoverable artifact spilling, and the minimize→truncate→spill
+ * orchestration the agent loop applies to every tool result.
+ *
+ * Extracted from `engine.ts` (single-responsibility: the loop drives steps; this
+ * module owns how a tool's raw output is shaped before it re-enters context).
+ * `engine.ts` re-exports the public surface for backward compatibility.
+ */
+import * as fs from "node:fs/promises";
+import * as path from "node:path";
+import { jeoEnv } from "../util/env";
+import { minimizeToolOutput } from "./output-minimizer";
+/** Env-tunable output budget (plan/gjc-inheritance.md B10, gjc settings-driven
+ *  output handling 계승): JEO_TOOL_OUTPUT_MAX caps the model-visible tool result;
+ *  the spill threshold tracks it so anything truncated stays artifact-recoverable. */
+function envOutputMax(): number {
+  const raw = Number(jeoEnv("TOOL_OUTPUT_MAX") ?? "");
+  return Number.isFinite(raw) && raw >= 500 && raw <= 200_000 ? Math.trunc(raw) : 4_000;
+}
+export const TOOL_OUTPUT_MAX = envOutputMax();
+/** Read results are deliberate, contiguous file slices the model explicitly asked
+ *  for (via lineRange), already line-capped by the read tool and recoverable via
+ *  spill. They get a much larger model-visible budget than the generic
+ *  noise-control cap, so a 500-line read is not silently re-shrunk to ~100 lines.
+ *  JEO_READ_OUTPUT_MAX overrides (1k..200k). */
+function envReadOutputMax(): number {
+  const raw = Number(jeoEnv("READ_OUTPUT_MAX") ?? "");
+  return Number.isFinite(raw) && raw >= 1_000 && raw <= 200_000 ? Math.trunc(raw) : 32_000;
+}
+export const READ_OUTPUT_MAX = envReadOutputMax();
+/**
+ * Cap a tool result fed back to the model. Default mode keeps both ends: the head
+ * holds the start (e.g. a command's invocation) and the tail holds what's usually
+ * decisive (test summaries, the final error). A pure head-cut loses that.
+ *
+ * `headOnly` truncates from the front only — for `read` results, which are a
+ * contiguous file slice the model explicitly requested; head/tail splitting would
+ * mangle the code into two non-adjacent fragments.
+ */
+export function truncateToolOutput(s: string, max = TOOL_OUTPUT_MAX, headOnly = false): string {
+  if (s.length <= max) return s;
+  if (headOnly) {
+    return `${s.slice(0, max)}\n…(${s.length - max} chars truncated; narrow the lineRange or read the spilled artifact)…`;
+  }
+  const head = Math.floor(max * 0.6);
+  const tail = max - head;
+  return `${s.slice(0, head)}\n…(${s.length - max} chars truncated)…\n${s.slice(s.length - tail)}`;
+}
+/** Non-read tool output larger than this is spilled to a recoverable artifact file.
+ *  Aligned with `truncateToolOutput`'s generic cap so that whenever the model-visible
+ *  result drops content, the full output is recoverable via the artifact. (`read`
+ *  spills against the larger READ_OUTPUT_MAX in the result loop.) */
+export const TOOL_SPILL_THRESHOLD = TOOL_OUTPUT_MAX;
+/** Most recent tool-result artifacts to keep; older ones are pruned on each spill. */
+export const MAX_TOOL_ARTIFACTS = 50;
+/** Best-effort retention: keep the newest `MAX_TOOL_ARTIFACTS` files in `dir`, delete the rest. */
+async function pruneToolArtifacts(dir: string): Promise<void> {
+  const files = await fs.readdir(dir).catch(() => [] as string[]);
+  if (files.length <= MAX_TOOL_ARTIFACTS) return;
+  const stamped = await Promise.all(
+    files.map(async f => ({ f, m: (await fs.stat(path.join(dir, f)).catch(() => null))?.mtimeMs ?? 0 })),
+  );
+  stamped.sort((a, b) => b.m - a.m); // newest first
+  for (const { f } of stamped.slice(MAX_TOOL_ARTIFACTS)) {
+    await fs.rm(path.join(dir, f), { force: true }).catch(() => {});
+  }
+}
+/**
+ * Write an oversized tool result verbatim under `.jeo/artifacts/tool-results/` and
+ * return the workspace-relative path (for the model to `read`). Best-effort: throws
+ * are caught by the caller, which simply omits the artifact note.
+ */
+export async function spillToolResult(tool: string, output: string, cwd: string): Promise<string> {
+  const dir = path.join(cwd, ".jeo", "artifacts", "tool-results");
+  await fs.mkdir(dir, { recursive: true });
+  const safeTool = tool.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 32) || "tool";
+  const stamp = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
+  const rel = path.join(".jeo", "artifacts", "tool-results", `${stamp}-${safeTool}.txt`);
+  await fs.writeFile(path.join(cwd, rel), output, "utf-8");
+  // Retention so a long session can't grow the artifact dir without bound.
+  await pruneToolArtifacts(dir);
+  return rel;
+}
+/**
+ * Shape one tool's raw output into the model-visible result body: strip runner
+ * noise (minimize), cap to the per-tool budget (`read` gets the larger read budget
+ * and a head-only cut), and spill the full output to a recoverable artifact when it
+ * exceeds the budget. Behavior-identical to the inline logic it replaces in
+ * `runAgentLoop`.
+ */
+export async function formatToolResultBody(tool: string, rawOutput: string, cwd: string): Promise<string> {
+  const visible = minimizeToolOutput(rawOutput, tool).text;
+  // `read` is a deliberate, contiguous file slice: give it the larger read budget
+  // and truncate head-only (head/tail splitting mangles code). Other tools keep the
+  // generic noise-control cap + both-ends truncation.
+  const isReadResult = tool === "read";
+  const outputBudget = isReadResult ? READ_OUTPUT_MAX : TOOL_OUTPUT_MAX;
+  let body = truncateToolOutput(visible, outputBudget, isReadResult);
+  if (rawOutput.length > outputBudget) {
+    const artifact = await spillToolResult(tool, rawOutput, cwd).catch(() => null);
+    if (artifact) {
+      body += `\n[full output (${rawOutput.length} chars) saved to ${artifact} — read it for the truncated remainder]`;
+    }
+  }
+  return body;
+}

package/src/agent/tools.ts CHANGED Viewed

@@ -3,6 +3,7 @@ import * as fs from "node:fs/promises";
 import * as path from "node:path";
 import { readWorkflowState, readWorkflowStateStrict, type WorkflowState } from "./state";
 import { jeoEnv } from "../util/env";
+import { READ_OUTPUT_MAX } from "./tool-output";
 /** Read the deep-interview lock; on corrupt state fail CLOSED (treat as active lock). */
 async function readMutationLock(cwd: string): Promise<WorkflowState | null> {
@@ -291,10 +292,27 @@ export async function readTool(
       return { success: true, output: out.join("\n") };
     }
-    const MAX_LINES = 500;
-    const annotated = lines.slice(0, MAX_LINES).map((l, i) => `${i + 1}${lineAnchor(l)}|${l}`).join("\n");
-    if (lines.length > MAX_LINES) {
-      const notice = `\n…(showing lines 1-${MAX_LINES} of ${lines.length}; pass lineRange "${MAX_LINES + 1}-" to read the rest)`;
+    // Default (no lineRange): fill the model-visible read budget with WHOLE lines
+    // instead of a fixed 500-line cap that left half the 32k budget unused and forced
+    // needless pagination (the read tool's biggest "reads too little per call" pain).
+    // READ_OUTPUT_MAX is the real cap; a hard line ceiling (JEO_READ_MAX_LINES) guards
+    // pathological files, and a small reserve keeps the pagination notice inside the
+    // budget so it is never trimmed by the downstream head-only truncation.
+    const HARD_LINE_CEILING = Math.max(500, Number(jeoEnv("READ_MAX_LINES") ?? "") || 5000);
+    const charBudget = Math.max(1_000, READ_OUTPUT_MAX - 256);
+    const shownLines: string[] = [];
+    let usedChars = 0;
+    for (let i = 0; i < lines.length && shownLines.length < HARD_LINE_CEILING; i++) {
+      const annotatedLine = `${i + 1}${lineAnchor(lines[i]!)}|${lines[i]}`;
+      const cost = annotatedLine.length + 1; // + newline
+      if (shownLines.length > 0 && usedChars + cost > charBudget) break; // always emit ≥1 line
+      shownLines.push(annotatedLine);
+      usedChars += cost;
+    }
+    const annotated = shownLines.join("\n");
+    if (shownLines.length < lines.length) {
+      const shown = shownLines.length;
+      const notice = `\n…(showing lines 1-${shown} of ${lines.length}; pass lineRange "${shown + 1}-" to read the rest)`;
       return { success: true, output: annotated + notice };
     }
     return { success: true, output: annotated };
@@ -574,7 +592,8 @@ export async function bashTool(
   cwd: string = process.cwd(),
   timeoutMs: number = 120_000,
   subdir?: string,
-  env?: Record<string, string>
+  env?: Record<string, string>,
+  onProgress?: (partialOutput: string) => void,
 ): Promise<ToolResult> {
   if (jeoEnv("BASH_FIXUPS") === "1") {
     const fx = applyBashFixups(command);
@@ -608,12 +627,27 @@ export async function bashTool(
       killTimer = setTimeout(() => { try { proc.kill(9); } catch {} }, 3_000);
     }, TIMEOUT_MS);
+    // Stream stdout incrementally when a progress sink is attached (drives the live
+    // DIMMED bash output view); read stderr fully in parallel. Without a sink, fall
+    // back to a single post-exit read (identical content, no streaming overhead).
+    const stderrPromise = new Response(proc.stderr).text();
+    let stdout = "";
+    if (onProgress) {
+      const decoder = new TextDecoder();
+      let lastEmit = 0;
+      for await (const chunk of proc.stdout as unknown as AsyncIterable<Uint8Array>) {
+        stdout += decoder.decode(chunk, { stream: true });
+        const now = Date.now();
+        if (now - lastEmit >= 80) { lastEmit = now; onProgress(stdout); }
+      }
+      stdout += decoder.decode();
+      onProgress(stdout);
+    }
     await proc.exited;
     clearTimeout(timer);
     if (killTimer) clearTimeout(killTimer);
-    const stdout = await new Response(proc.stdout).text();
-    const stderr = await new Response(proc.stderr).text();
+    if (!onProgress) stdout = await new Response(proc.stdout).text();
+    const stderr = await stderrPromise;
     let output = [stdout, stderr].filter(Boolean).join("\n");
     const MAX_OUTPUT = 100_000;

package/src/ai/model-manager.ts CHANGED Viewed

@@ -2,11 +2,7 @@ import { providerRegistry } from "./provider-registry";
 import { OAUTH_FLOW_REGISTRY } from "../auth/flows";
 import { readGlobalConfig } from "../agent/state";
 import { resolveCredential, type AuthProvider, type Credential } from "../auth";
-import { anthropicAdapter } from "./providers/anthropic";
-import { openaiAdapter } from "./providers/openai";
-import { geminiAdapter } from "./providers/gemini";
-import { ollamaAdapter } from "./providers/ollama";
-import { antigravityAdapter } from "./providers/antigravity";
+import "./register-providers"; // side-effect: registers built-in adapters into providerRegistry
 import type { CallOptions, Message, ProviderAdapter, ProviderName } from "./types";
 import { expandAlias, resolveModelId, effectiveAliasesFor } from "./model-registry";
 import { findCatalogEntry, type ModelCatalogEntry } from "./model-catalog-compat";
@@ -16,12 +12,6 @@ import { jeoEnv } from "../util/env";
 import type { Config } from "../agent/state";
-// Initialize Provider Registry
-providerRegistry.register("anthropic", anthropicAdapter);
-providerRegistry.register("openai", openaiAdapter);
-providerRegistry.register("gemini", geminiAdapter);
-providerRegistry.register("antigravity", antigravityAdapter);
-providerRegistry.register("ollama", ollamaAdapter);
 export function resolveProvider(model: string): ProviderName {
@@ -96,9 +86,13 @@ export function thinkingToReasoningEffort(
   return "medium";
 }
-/** Describe a model id: alias expansion + the provider it routes to. For `/model` + diagnostics. */
-export async function describeModel(input: string): Promise<{ input: string; resolved: string; provider: ProviderName }> {
-  const resolved = await resolveModelId(input);
+/** Describe a model id: alias expansion + the provider it routes to. For `/model` + diagnostics.
+ *  Pass an already-read `config` to skip a redundant readGlobalConfig() on the turn hot path. */
+export async function describeModel(
+  input: string,
+  config?: { modelAliases?: Record<string, string> },
+): Promise<{ input: string; resolved: string; provider: ProviderName }> {
+  const resolved = await resolveModelId(input, config);
   return { input, resolved, provider: resolveProvider(resolved) };
 }
@@ -311,6 +305,7 @@ async function resolveCall(options: Partial<CallOptions>, kind: "request" | "str
     onUsage: options.onUsage,
     signal: options.signal,
     reasoningEffort: options.reasoningEffort ?? thinkingToReasoningEffort(config.thinkingLevel),
+    onReasoning: options.onReasoning,
   };
   // Caller-supplied retry sink rides on the config-derived retry budget so the
   // engine/TUI can surface "rate limited — retrying in Ns" instead of a silent wait.

package/src/ai/model-registry.ts CHANGED Viewed

@@ -25,9 +25,14 @@ export function expandAlias(input: string, aliases: ModelAliases = BUILTIN_ALIAS
 }
 // Async: merge BUILTIN_ALIASES with config.modelAliases (config wins) and expand.
-export async function resolveModelId(input: string): Promise<string> {
-  const config = await readGlobalConfig();
-  const modelAliases = (config as any).modelAliases ?? {};
+// Pass an already-read `config` to skip the readGlobalConfig() round-trip (turn
+// hot path: avoids re-reading the config file mid-turn for model resolution).
+export async function resolveModelId(
+  input: string,
+  config?: { modelAliases?: ModelAliases },
+): Promise<string> {
+  const cfg = config ?? (await readGlobalConfig());
+  const modelAliases = (cfg as any).modelAliases ?? {};
   const merged: ModelAliases = { ...BUILTIN_ALIASES, ...modelAliases };
   return expandAlias(input, merged);
 }

package/src/ai/providers/antigravity.ts CHANGED Viewed

@@ -160,13 +160,18 @@ export function antigravityRequest(messages: Message[], options: CallOptions, cr
 type CcaUsage = { promptTokenCount?: number; candidatesTokenCount?: number; thoughtsTokenCount?: number };
 interface CcaChunk {
   response?: {
-    candidates?: { content?: { parts?: { text?: string }[] }; finishReason?: string }[];
+    candidates?: { content?: { parts?: { text?: string; thought?: boolean }[] }; finishReason?: string }[];
     usageMetadata?: CcaUsage;
   };
 }
 function textOf(chunk: CcaChunk): string {
-  return chunk.response?.candidates?.[0]?.content?.parts?.map(p => p.text ?? "").join("") ?? "";
+  return chunk.response?.candidates?.[0]?.content?.parts?.filter(p => !p.thought).map(p => p.text ?? "").join("") ?? "";
+}
+/** Native thinking text (`thought` parts) — kept separate so it never pollutes the JSON tool call. */
+function thoughtOf(chunk: CcaChunk): string {
+  return chunk.response?.candidates?.[0]?.content?.parts?.filter(p => p.thought).map(p => p.text ?? "").join("") ?? "";
 }
 async function fetchAntigravity(messages: Message[], options: CallOptions, credential: Credential): Promise<Response> {
@@ -194,6 +199,8 @@ export const antigravityAdapter: ProviderAdapter = {
     for await (const data of readSse(response.body)) {
       let chunk: CcaChunk;
       try { chunk = JSON.parse(data); } catch { continue; }
+      const thought = thoughtOf(chunk);
+      if (thought) options.onReasoning?.(thought);
       out += textOf(chunk);
       if (chunk.response?.usageMetadata) usage = chunk.response.usageMetadata;
     }
@@ -209,6 +216,8 @@ export const antigravityAdapter: ProviderAdapter = {
     for await (const data of readSse(response.body)) {
       let chunk: CcaChunk;
       try { chunk = JSON.parse(data); } catch { continue; }
+      const thought = thoughtOf(chunk);
+      if (thought) options.onReasoning?.(thought);
       const delta = textOf(chunk);
       if (delta) { yielded = true; yield delta; }
       if (chunk.response?.usageMetadata) usage = chunk.response.usageMetadata;

package/src/ai/providers/gemini.ts CHANGED Viewed

@@ -119,7 +119,7 @@ export function geminiCliRequest(messages: Message[], options: CallOptions, acce
 }
 interface GeminiChunk {
-  candidates?: { content?: { parts?: { text?: string }[] }; finishReason?: string }[];
+  candidates?: { content?: { parts?: { text?: string; thought?: boolean }[] }; finishReason?: string }[];
   promptFeedback?: { blockReason?: string };
   usageMetadata?: { promptTokenCount?: number; candidatesTokenCount?: number; thoughtsTokenCount?: number };
 }
@@ -130,7 +130,13 @@ interface CcaChunk {
 }
 function textOf(chunk: GeminiChunk): string {
-  return chunk.candidates?.[0]?.content?.parts?.map(p => p.text ?? "").join("") ?? "";
+  return chunk.candidates?.[0]?.content?.parts?.filter(p => !p.thought).map(p => p.text ?? "").join("") ?? "";
+}
+/** Native thinking text (`thought` parts), present only when the model emits thought
+ *  summaries. Kept SEPARATE from textOf so thoughts never pollute the JSON tool call. */
+function thoughtOf(chunk: GeminiChunk): string {
+  return chunk.candidates?.[0]?.content?.parts?.filter(p => p.thought).map(p => p.text ?? "").join("") ?? "";
 }
 /** When Gemini returns HTTP 200 with no text, surface the real cause (safety block /
@@ -176,6 +182,8 @@ async function* ccaTurn(messages: Message[], options: CallOptions, credential: C
     }
     const inner = chunk.response;
     if (!inner) continue;
+    const thought = thoughtOf(inner);
+    if (thought) options.onReasoning?.(thought);
     const delta = textOf(inner);
     if (delta) {
       yieldedAny = true;
@@ -239,6 +247,8 @@ export const geminiAdapter: ProviderAdapter = {
       } catch {
         continue;
       }
+      const thought = thoughtOf(chunk);
+      if (thought) options.onReasoning?.(thought);
       const delta = textOf(chunk);
       if (delta) {
         yieldedAny = true;