jeo-code 0.4.5 → 0.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.ja.md +2 -2
- package/README.ko.md +2 -2
- package/README.md +2 -2
- package/README.zh.md +2 -2
- package/package.json +1 -1
- package/src/agent/dev/evolution-bridge.ts +36 -3
- package/src/agent/dev/self-analysis.ts +6 -1
- package/src/agent/engine.ts +76 -71
- package/src/agent/loop.ts +2 -0
- package/src/agent/step-budget.ts +10 -0
- package/src/agent/subagent-registry.ts +131 -0
- package/src/agent/subagent-tool.ts +89 -0
- package/src/agent/subagents.ts +22 -3
- package/src/agent/task-tool.ts +123 -19
- package/src/agent/tool-output.ts +115 -0
- package/src/agent/tools.ts +42 -8
- package/src/ai/model-manager.ts +9 -14
- package/src/ai/model-registry.ts +8 -3
- package/src/ai/providers/antigravity.ts +11 -2
- package/src/ai/providers/gemini.ts +12 -2
- package/src/ai/register-providers.ts +21 -0
- package/src/ai/types.ts +4 -0
- package/src/cli/runner.ts +0 -9
- package/src/commands/launch.ts +157 -52
- package/src/commands/team.ts +13 -6
- package/src/skills/catalog.ts +0 -2
- package/src/tui/app.ts +131 -20
- package/src/tui/components/forge.ts +25 -7
- package/src/tui/components/input-box.ts +8 -3
- package/src/tui/components/markdown-text.ts +10 -1
- package/src/tui/components/themes.ts +57 -1
- package/src/tui/components/todo-card.ts +44 -13
- package/src/tui/monitoring/hud-view.ts +53 -30
- package/src/util/update-check.ts +53 -0
- package/src/commands/gjc.ts +0 -52
- package/src/prompts/skills/gjc/AGENTS.md +0 -31
- package/src/prompts/skills/gjc/SKILL.md +0 -15
package/src/agent/subagents.ts
CHANGED
|
@@ -178,20 +178,39 @@ function renderRolePrompt(template: string, role: SubagentRole): string {
|
|
|
178
178
|
.trim();
|
|
179
179
|
}
|
|
180
180
|
|
|
181
|
+
/** True when `marker` is present in `text` AND the span between it and the next
|
|
182
|
+
* required marker (or end of text) carries non-whitespace content. A label-only
|
|
183
|
+
* section ("Summary:" with an empty body) is not a real report, so it fails. */
|
|
184
|
+
function markerHasContent(text: string, marker: string, allMarkers: string[]): boolean {
|
|
185
|
+
const start = text.indexOf(marker);
|
|
186
|
+
if (start < 0) return false;
|
|
187
|
+
const after = start + marker.length;
|
|
188
|
+
let end = text.length;
|
|
189
|
+
for (const other of allMarkers) {
|
|
190
|
+
if (other === marker) continue;
|
|
191
|
+
const j = text.indexOf(other, after);
|
|
192
|
+
if (j >= 0 && j < end) end = j;
|
|
193
|
+
}
|
|
194
|
+
return text.slice(after, end).trim().length > 0;
|
|
195
|
+
}
|
|
196
|
+
|
|
181
197
|
export function validateSubagentDoneReason(role: SubagentRole, reason: string | undefined): { ok: boolean; missing?: string[] } {
|
|
182
198
|
const trimmed = (reason ?? "").trim();
|
|
183
199
|
if (!trimmed) return { ok: false, missing: ["done.reason"] };
|
|
200
|
+
const markers = role.requiredDoneMarkers ?? [];
|
|
201
|
+
// Each required section must be PRESENT and carry non-empty content — a report of
|
|
202
|
+
// bare labels (no prose) is rejected, which the substring-presence check let pass.
|
|
203
|
+
const sectionMissing = markers.filter(m => !markerHasContent(trimmed, m, markers));
|
|
184
204
|
if (role.id === "critic") {
|
|
185
205
|
const verdicts = ["[OKAY]", "[ITERATE]", "[REJECT]"];
|
|
186
206
|
const hasVerdict = verdicts.some(marker => trimmed.startsWith(marker));
|
|
187
207
|
const missing = [
|
|
188
208
|
...(hasVerdict ? [] : ["[OKAY]|[ITERATE]|[REJECT]"]),
|
|
189
|
-
...
|
|
209
|
+
...sectionMissing,
|
|
190
210
|
];
|
|
191
211
|
return { ok: missing.length === 0, missing };
|
|
192
212
|
}
|
|
193
|
-
|
|
194
|
-
return { ok: missing.length === 0, missing };
|
|
213
|
+
return { ok: sectionMissing.length === 0, missing: sectionMissing };
|
|
195
214
|
}
|
|
196
215
|
|
|
197
216
|
/** Build a role-specific system prompt from its dedicated template. */
|
package/src/agent/task-tool.ts
CHANGED
|
@@ -26,6 +26,7 @@ import {
|
|
|
26
26
|
validateSubagentDoneReason,
|
|
27
27
|
} from "./subagents";
|
|
28
28
|
import { thinkingMaxTokens } from "../ai/model-manager";
|
|
29
|
+
import type { SubagentRegistry } from "./subagent-registry";
|
|
29
30
|
|
|
30
31
|
/** Lifecycle event emitted while a delegated subagent runs. */
|
|
31
32
|
export interface TaskSubEvent {
|
|
@@ -41,6 +42,12 @@ export interface TaskSubEvent {
|
|
|
41
42
|
summary?: string;
|
|
42
43
|
/** Model selected for this subagent run. */
|
|
43
44
|
model?: string;
|
|
45
|
+
/** 1-based task position within a fan-out batch (omitted for single-task runs). */
|
|
46
|
+
index?: number;
|
|
47
|
+
/** Total tasks in the fan-out batch (omitted for single-task runs). */
|
|
48
|
+
total?: number;
|
|
49
|
+
/** Provider token usage for the finished subagent (done events only). */
|
|
50
|
+
tokens?: { input: number; output: number };
|
|
44
51
|
}
|
|
45
52
|
|
|
46
53
|
export interface TaskToolOptions {
|
|
@@ -51,11 +58,46 @@ export interface TaskToolOptions {
|
|
|
51
58
|
signal?: AbortSignal;
|
|
52
59
|
/** Optional live sink (e.g. plain-stream rendering of nested progress). */
|
|
53
60
|
onEvent?: (ev: TaskSubEvent) => void;
|
|
61
|
+
/** Mid-turn steering drain (gjc parity): an additional user query typed while a
|
|
62
|
+
* subagent works is forwarded live. Single-task runs and the SERIAL executor
|
|
63
|
+
* batch (concurrency 1) forward to the one active subagent. A parallel read-only
|
|
64
|
+
* batch routes through a broadcast hub (createSteerHub) so every running worker
|
|
65
|
+
* sees each message exactly once. Unconsumed messages stay for the parent. */
|
|
66
|
+
steer?: () => string[];
|
|
67
|
+
/** When present, a `task` call with `detached: true` registers a background run
|
|
68
|
+
* here and returns immediately; the parent controls it via the `subagent` tool. */
|
|
69
|
+
registry?: SubagentRegistry;
|
|
54
70
|
}
|
|
55
71
|
|
|
56
72
|
/** Max concurrent read-only subagents in a fan-out batch. */
|
|
57
73
|
const MAX_FANOUT = 4;
|
|
58
74
|
|
|
75
|
+
/** Hard cap on a SERIAL (mutating executor) fan-out batch: it runs one task at a
|
|
76
|
+
* time inside one blocking tool call, so an unbounded queue would monopolize the
|
|
77
|
+
* parent turn. Split larger efforts into sequential task calls. */
|
|
78
|
+
const MAX_SERIAL_EXECUTOR = 6;
|
|
79
|
+
|
|
80
|
+
/** Broadcast steering hub for a fan-out batch. Each concurrent worker registers
|
|
81
|
+
* ONCE and then sees every parent steer message exactly once (append-only log +
|
|
82
|
+
* per-worker cursor), so a mid-batch redirect reaches all running subagents
|
|
83
|
+
* without the double-consume hazard of several workers draining one inbox. */
|
|
84
|
+
function createSteerHub(drain?: () => string[]) {
|
|
85
|
+
const log: string[] = [];
|
|
86
|
+
return {
|
|
87
|
+
worker(): (() => string[]) | undefined {
|
|
88
|
+
if (!drain) return undefined;
|
|
89
|
+
let cursor = 0;
|
|
90
|
+
return () => {
|
|
91
|
+
const fresh = drain();
|
|
92
|
+
if (fresh.length) log.push(...fresh);
|
|
93
|
+
const out = log.slice(cursor);
|
|
94
|
+
cursor = log.length;
|
|
95
|
+
return out;
|
|
96
|
+
};
|
|
97
|
+
},
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
|
|
59
101
|
/** One-line protocol description appended to the launch system prompt. Pass a
|
|
60
102
|
* config so CONFIG-DECLARED custom roles are advertised to the model too. */
|
|
61
103
|
export function taskToolProtocolLine(config?: Pick<Config, "subagents">): string {
|
|
@@ -134,13 +176,26 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
|
|
|
134
176
|
taskText: string,
|
|
135
177
|
context: string,
|
|
136
178
|
cwd: string,
|
|
179
|
+
extra: {
|
|
180
|
+
steer?: () => string[];
|
|
181
|
+
slot?: { index: number; total: number };
|
|
182
|
+
projectContext?: Awaited<ReturnType<typeof loadProjectContext>>;
|
|
183
|
+
/** Overrides opts.signal — a detached run uses its own registry signal so it
|
|
184
|
+
* is cancellable independently of the parent turn. */
|
|
185
|
+
signal?: AbortSignal;
|
|
186
|
+
} = {},
|
|
137
187
|
): Promise<ToolResult> => {
|
|
188
|
+
const { steer, slot, projectContext: preloadedContext, signal: signalOverride } = extra;
|
|
189
|
+
// Tag every live event with its fan-out slot so a parent monitor can tell
|
|
190
|
+
// task 1 from task 3 when several same-role subagents stream concurrently.
|
|
191
|
+
const emit = (ev: TaskSubEvent) =>
|
|
192
|
+
opts.onEvent?.(slot ? { ...ev, index: slot.index, total: slot.total } : ev);
|
|
138
193
|
const model = resolveSubagentModel(role.id, opts.config);
|
|
139
194
|
const maxSteps = resolveSubagentMaxSteps(role.id, opts.config);
|
|
140
195
|
// gjc parity: a role may pin its own reasoning budget; absent = inherit the
|
|
141
196
|
// session/global thinking level (the "(inherit)" row in the picker).
|
|
142
197
|
const thinking = resolveSubagentThinking(role.id, opts.config) ?? opts.config.thinkingLevel;
|
|
143
|
-
const projectContext = await loadProjectContext(cwd);
|
|
198
|
+
const projectContext = preloadedContext ?? await loadProjectContext(cwd);
|
|
144
199
|
const history: Message[] = [
|
|
145
200
|
{ role: "system", content: withProjectContext(subagentSystemPrompt(role), projectContext) },
|
|
146
201
|
{ role: "user", content: `${taskText}${context}` },
|
|
@@ -149,10 +204,13 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
|
|
|
149
204
|
let lastTarget = "";
|
|
150
205
|
let currentStep = 0;
|
|
151
206
|
// Round-8 (architect ref 7-Round7Workflow): count the subagent's SUCCESSFUL
|
|
152
|
-
//
|
|
153
|
-
//
|
|
154
|
-
|
|
155
|
-
|
|
207
|
+
// calls so the parent can audit a "Changed Files:" claim against observed
|
|
208
|
+
// reality. File-writing tools (write/edit/mkdir/delete) are tracked apart from
|
|
209
|
+
// bash: read-only bash (e.g. `bun test`) MUST NOT count as edit evidence, but
|
|
210
|
+
// bash CAN mutate, so the audit message distinguishes the two cases.
|
|
211
|
+
let fileMutations = 0;
|
|
212
|
+
let bashRuns = 0;
|
|
213
|
+
emit({ role: role.id, kind: "start", detail: taskText, maxSteps, model });
|
|
156
214
|
const result = await runAgentLoop(history, {
|
|
157
215
|
cwd,
|
|
158
216
|
model,
|
|
@@ -161,7 +219,8 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
|
|
|
161
219
|
// Bounded delegation: a subagent's step contract stays exact — the parent
|
|
162
220
|
// owns any retry/extension decision, so the gjc retry flow is disabled here.
|
|
163
221
|
budget: { maxExtensions: 0 },
|
|
164
|
-
signal: opts.signal,
|
|
222
|
+
signal: signalOverride ?? opts.signal,
|
|
223
|
+
steer,
|
|
165
224
|
tools: subagentToolset(role),
|
|
166
225
|
events: {
|
|
167
226
|
onStep: n => { currentStep = n; },
|
|
@@ -169,35 +228,45 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
|
|
|
169
228
|
if (invocation && invocation.tool && invocation.tool !== "done") {
|
|
170
229
|
lastTarget = toolTarget(invocation.tool, invocation.arguments);
|
|
171
230
|
trace.push(` step ${currentStep}/${maxSteps}: ${lastTarget}`);
|
|
172
|
-
|
|
231
|
+
emit({ role: role.id, kind: "step", detail: lastTarget, step: currentStep, maxSteps, model });
|
|
173
232
|
}
|
|
174
233
|
},
|
|
175
234
|
onToolResult: (tool, success, output) => {
|
|
176
|
-
if (success
|
|
235
|
+
if (success) {
|
|
236
|
+
if (tool === "write" || tool === "edit" || tool === "mkdir" || tool === "delete") fileMutations++;
|
|
237
|
+
else if (tool === "bash") bashRuns++;
|
|
238
|
+
}
|
|
177
239
|
const label = lastTarget || tool;
|
|
178
240
|
const summary = firstUsefulLine(output);
|
|
179
241
|
const suffix = summary ? ` — ${summary}` : "";
|
|
180
242
|
trace.push(` ${success ? "✓" : "✗"} ${label}${suffix}`);
|
|
181
|
-
|
|
243
|
+
emit({ role: role.id, kind: "tool", detail: label, success, summary, step: currentStep, maxSteps, model });
|
|
182
244
|
lastTarget = "";
|
|
183
245
|
},
|
|
184
246
|
// Retry notices (rate-limit backoff etc.) surface as live "step" beats so the
|
|
185
247
|
// parent's monitor shows WHY a subagent is pausing instead of going silent.
|
|
186
|
-
onNotice: msg =>
|
|
248
|
+
onNotice: msg => emit({ role: role.id, kind: "step", detail: msg, step: currentStep, maxSteps, model }),
|
|
249
|
+
// Mid-turn steering reached this subagent: surface it as a live beat so the
|
|
250
|
+
// parent's monitor shows the redirect instead of an unexplained behavior change.
|
|
251
|
+
onSteer: text => emit({ role: role.id, kind: "step", detail: `↳ steer: ${text}`, step: currentStep, maxSteps, model }),
|
|
187
252
|
},
|
|
188
253
|
});
|
|
189
254
|
const reason = result.doneReason?.trim() || `(subagent reached the ${result.steps}-step limit without signaling done)`;
|
|
190
255
|
const validation = validateSubagentDoneReason(role, reason);
|
|
191
256
|
const complete = result.done && validation.ok;
|
|
192
257
|
const detail = validation.ok ? reason : `${reason}\n\n[contract incomplete: missing ${validation.missing?.join(", ")}]`;
|
|
193
|
-
|
|
194
|
-
const
|
|
258
|
+
emit({ role: role.id, kind: "done", detail, success: complete, step: result.steps, maxSteps, model, tokens: result.usage ? { input: result.usage.inputTokens, output: result.usage.outputTokens } : undefined });
|
|
259
|
+
const tokNote = result.usage ? `, ${result.usage.inputTokens + result.usage.outputTokens} tok` : "";
|
|
260
|
+
const header = `[${role.title} subagent] ${complete ? "completed" : "stopped"} in ${result.steps} step(s) on ${model}${tokNote}.`;
|
|
195
261
|
const body = trace.length ? `\nSteps:\n${trace.join("\n")}` : "";
|
|
196
|
-
// Parent-side audit: a mutating role that "completed" without
|
|
197
|
-
// write/edit/
|
|
198
|
-
//
|
|
199
|
-
|
|
200
|
-
|
|
262
|
+
// Parent-side audit: a mutating role that "completed" without a successful file
|
|
263
|
+
// mutation (write/edit/mkdir/delete) likely changed nothing — flag the claim.
|
|
264
|
+
// bash is tracked separately: it CAN mutate, so an only-bash run downgrades to
|
|
265
|
+
// "verify independently" instead of the stronger UNVERIFIED.
|
|
266
|
+
const audit = complete && !role.readOnly && fileMutations === 0
|
|
267
|
+
? bashRuns === 0
|
|
268
|
+
? `\n[parent audit] No successful write/edit/bash was observed in this run — treat any "Changed Files:" claims above as UNVERIFIED.`
|
|
269
|
+
: `\n[parent audit] No successful write/edit was observed (only bash ran); bash may or may not have mutated files — verify any "Changed Files:" claims above independently.`
|
|
201
270
|
: "";
|
|
202
271
|
return { success: complete, output: `${header}${body}\n\nResult:\n${fenceSubagentReport(detail)}${audit}` };
|
|
203
272
|
};
|
|
@@ -225,6 +294,18 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
|
|
|
225
294
|
if (items.length === 0) {
|
|
226
295
|
return { success: false, output: "", error: "task fan-out requires a non-empty 'tasks' array of assignments." };
|
|
227
296
|
}
|
|
297
|
+
// #5: the mutating executor fan-out is SERIAL (concurrency 1) and blocks the
|
|
298
|
+
// turn; cap it regardless of justification so a huge queue can't monopolize
|
|
299
|
+
// the parent. Split larger efforts into sequential task calls.
|
|
300
|
+
if (!role.readOnly && items.length > MAX_SERIAL_EXECUTOR) {
|
|
301
|
+
return {
|
|
302
|
+
success: false,
|
|
303
|
+
output: "",
|
|
304
|
+
error:
|
|
305
|
+
`Executor fan-out of ${items.length} exceeds the serial cap of ${MAX_SERIAL_EXECUTOR}. ` +
|
|
306
|
+
`The mutating executor runs one task at a time and blocks the turn — split into ≤${MAX_SERIAL_EXECUTOR}-task batches or sequential task calls.`,
|
|
307
|
+
};
|
|
308
|
+
}
|
|
228
309
|
// Spawn-gate lite (plan/gjc-inheritance.md B9, gjc spawn-gate 계승): a batch
|
|
229
310
|
// wider than MAX_FANOUT is refused BEFORE any subagent launches unless the
|
|
230
311
|
// model justifies the parallelism — silent capping hid the cost decision.
|
|
@@ -245,13 +326,22 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
|
|
|
245
326
|
// Read-only roles fan out concurrently (bounded). The mutating executor is serialized
|
|
246
327
|
// (concurrency 1) so parallel subagents can't race on the same files.
|
|
247
328
|
const limit = role.readOnly ? Math.min(items.length, MAX_FANOUT) : 1;
|
|
329
|
+
// Load project context ONCE per batch instead of re-scanning AGENTS.md for
|
|
330
|
+
// every fan-out task (redundant IO + duplicated tokens).
|
|
331
|
+
const batchContext = await loadProjectContext(cwd);
|
|
248
332
|
const results: ToolResult[] = new Array(items.length);
|
|
249
333
|
let next = 0;
|
|
334
|
+
// #7: broadcast steering hub — each concurrent worker sees every parent
|
|
335
|
+
// steer message exactly once (safe even for parallel read-only fan-out).
|
|
336
|
+
const steerHub = createSteerHub(opts.steer);
|
|
250
337
|
const worker = async () => {
|
|
338
|
+
// One steer cursor per concurrent worker (not per item) so a worker that
|
|
339
|
+
// processes several items sees each parent message once across them all.
|
|
340
|
+
const workerSteer = steerHub.worker();
|
|
251
341
|
while (true) {
|
|
252
342
|
const i = next++;
|
|
253
343
|
if (i >= items.length) return;
|
|
254
|
-
results[i] = await runOne(role, items[i]!.task, items[i]!.context, cwd);
|
|
344
|
+
results[i] = await runOne(role, items[i]!.task, items[i]!.context, cwd, { slot: { index: i + 1, total: items.length }, projectContext: batchContext, steer: workerSteer });
|
|
255
345
|
}
|
|
256
346
|
};
|
|
257
347
|
await Promise.all(Array.from({ length: limit }, () => worker()));
|
|
@@ -267,6 +357,20 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
|
|
|
267
357
|
if (!taskText) {
|
|
268
358
|
return { success: false, output: "", error: `task tool requires a non-empty 'task' (or a 'tasks' array). Valid roles: ${subagentRoleIds(opts.config).join(", ")}.` };
|
|
269
359
|
}
|
|
270
|
-
|
|
360
|
+
// Detached form (#9): register a background run and return immediately so the
|
|
361
|
+
// parent can keep working, then list/inspect/await/cancel via the `subagent`
|
|
362
|
+
// tool. Steering is not forwarded to a detached run (no single active drainer).
|
|
363
|
+
if (args.detached === true && opts.registry) {
|
|
364
|
+
const rec = opts.registry.launch(role.id, taskText, signal =>
|
|
365
|
+
runOne(role, taskText, ctx(args.context), cwd, { signal }),
|
|
366
|
+
);
|
|
367
|
+
return {
|
|
368
|
+
success: true,
|
|
369
|
+
output:
|
|
370
|
+
`[detached] launched ${role.title} subagent '${rec.id}'. It runs in the background — ` +
|
|
371
|
+
`keep working, then use the 'subagent' tool ({action:"await"|"list"|"inspect"|"cancel", ids?}) to collect its result.`,
|
|
372
|
+
};
|
|
373
|
+
}
|
|
374
|
+
return runOne(role, taskText, ctx(args.context), cwd, { steer: opts.steer });
|
|
271
375
|
};
|
|
272
376
|
}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool-result output handling — the model-visible output budget, both-ends
|
|
3
|
+
* truncation, recoverable artifact spilling, and the minimize→truncate→spill
|
|
4
|
+
* orchestration the agent loop applies to every tool result.
|
|
5
|
+
*
|
|
6
|
+
* Extracted from `engine.ts` (single-responsibility: the loop drives steps; this
|
|
7
|
+
* module owns how a tool's raw output is shaped before it re-enters context).
|
|
8
|
+
* `engine.ts` re-exports the public surface for backward compatibility.
|
|
9
|
+
*/
|
|
10
|
+
import * as fs from "node:fs/promises";
|
|
11
|
+
import * as path from "node:path";
|
|
12
|
+
import { jeoEnv } from "../util/env";
|
|
13
|
+
import { minimizeToolOutput } from "./output-minimizer";
|
|
14
|
+
|
|
15
|
+
/** Env-tunable output budget (plan/gjc-inheritance.md B10, gjc settings-driven
|
|
16
|
+
* output handling 계승): JEO_TOOL_OUTPUT_MAX caps the model-visible tool result;
|
|
17
|
+
* the spill threshold tracks it so anything truncated stays artifact-recoverable. */
|
|
18
|
+
function envOutputMax(): number {
|
|
19
|
+
const raw = Number(jeoEnv("TOOL_OUTPUT_MAX") ?? "");
|
|
20
|
+
return Number.isFinite(raw) && raw >= 500 && raw <= 200_000 ? Math.trunc(raw) : 4_000;
|
|
21
|
+
}
|
|
22
|
+
export const TOOL_OUTPUT_MAX = envOutputMax();
|
|
23
|
+
|
|
24
|
+
/** Read results are deliberate, contiguous file slices the model explicitly asked
|
|
25
|
+
* for (via lineRange), already line-capped by the read tool and recoverable via
|
|
26
|
+
* spill. They get a much larger model-visible budget than the generic
|
|
27
|
+
* noise-control cap, so a 500-line read is not silently re-shrunk to ~100 lines.
|
|
28
|
+
* JEO_READ_OUTPUT_MAX overrides (1k..200k). */
|
|
29
|
+
function envReadOutputMax(): number {
|
|
30
|
+
const raw = Number(jeoEnv("READ_OUTPUT_MAX") ?? "");
|
|
31
|
+
return Number.isFinite(raw) && raw >= 1_000 && raw <= 200_000 ? Math.trunc(raw) : 32_000;
|
|
32
|
+
}
|
|
33
|
+
export const READ_OUTPUT_MAX = envReadOutputMax();
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Cap a tool result fed back to the model. Default mode keeps both ends: the head
|
|
37
|
+
* holds the start (e.g. a command's invocation) and the tail holds what's usually
|
|
38
|
+
* decisive (test summaries, the final error). A pure head-cut loses that.
|
|
39
|
+
*
|
|
40
|
+
* `headOnly` truncates from the front only — for `read` results, which are a
|
|
41
|
+
* contiguous file slice the model explicitly requested; head/tail splitting would
|
|
42
|
+
* mangle the code into two non-adjacent fragments.
|
|
43
|
+
*/
|
|
44
|
+
export function truncateToolOutput(s: string, max = TOOL_OUTPUT_MAX, headOnly = false): string {
|
|
45
|
+
if (s.length <= max) return s;
|
|
46
|
+
if (headOnly) {
|
|
47
|
+
return `${s.slice(0, max)}\n…(${s.length - max} chars truncated; narrow the lineRange or read the spilled artifact)…`;
|
|
48
|
+
}
|
|
49
|
+
const head = Math.floor(max * 0.6);
|
|
50
|
+
const tail = max - head;
|
|
51
|
+
return `${s.slice(0, head)}\n…(${s.length - max} chars truncated)…\n${s.slice(s.length - tail)}`;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** Non-read tool output larger than this is spilled to a recoverable artifact file.
|
|
55
|
+
* Aligned with `truncateToolOutput`'s generic cap so that whenever the model-visible
|
|
56
|
+
* result drops content, the full output is recoverable via the artifact. (`read`
|
|
57
|
+
* spills against the larger READ_OUTPUT_MAX in the result loop.) */
|
|
58
|
+
export const TOOL_SPILL_THRESHOLD = TOOL_OUTPUT_MAX;
|
|
59
|
+
|
|
60
|
+
/** Most recent tool-result artifacts to keep; older ones are pruned on each spill. */
|
|
61
|
+
export const MAX_TOOL_ARTIFACTS = 50;
|
|
62
|
+
|
|
63
|
+
/** Best-effort retention: keep the newest `MAX_TOOL_ARTIFACTS` files in `dir`, delete the rest. */
|
|
64
|
+
async function pruneToolArtifacts(dir: string): Promise<void> {
|
|
65
|
+
const files = await fs.readdir(dir).catch(() => [] as string[]);
|
|
66
|
+
if (files.length <= MAX_TOOL_ARTIFACTS) return;
|
|
67
|
+
const stamped = await Promise.all(
|
|
68
|
+
files.map(async f => ({ f, m: (await fs.stat(path.join(dir, f)).catch(() => null))?.mtimeMs ?? 0 })),
|
|
69
|
+
);
|
|
70
|
+
stamped.sort((a, b) => b.m - a.m); // newest first
|
|
71
|
+
for (const { f } of stamped.slice(MAX_TOOL_ARTIFACTS)) {
|
|
72
|
+
await fs.rm(path.join(dir, f), { force: true }).catch(() => {});
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Write an oversized tool result verbatim under `.jeo/artifacts/tool-results/` and
|
|
78
|
+
* return the workspace-relative path (for the model to `read`). Best-effort: throws
|
|
79
|
+
* are caught by the caller, which simply omits the artifact note.
|
|
80
|
+
*/
|
|
81
|
+
export async function spillToolResult(tool: string, output: string, cwd: string): Promise<string> {
|
|
82
|
+
const dir = path.join(cwd, ".jeo", "artifacts", "tool-results");
|
|
83
|
+
await fs.mkdir(dir, { recursive: true });
|
|
84
|
+
const safeTool = tool.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 32) || "tool";
|
|
85
|
+
const stamp = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
86
|
+
const rel = path.join(".jeo", "artifacts", "tool-results", `${stamp}-${safeTool}.txt`);
|
|
87
|
+
await fs.writeFile(path.join(cwd, rel), output, "utf-8");
|
|
88
|
+
// Retention so a long session can't grow the artifact dir without bound.
|
|
89
|
+
await pruneToolArtifacts(dir);
|
|
90
|
+
return rel;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Shape one tool's raw output into the model-visible result body: strip runner
|
|
95
|
+
* noise (minimize), cap to the per-tool budget (`read` gets the larger read budget
|
|
96
|
+
* and a head-only cut), and spill the full output to a recoverable artifact when it
|
|
97
|
+
* exceeds the budget. Behavior-identical to the inline logic it replaces in
|
|
98
|
+
* `runAgentLoop`.
|
|
99
|
+
*/
|
|
100
|
+
export async function formatToolResultBody(tool: string, rawOutput: string, cwd: string): Promise<string> {
|
|
101
|
+
const visible = minimizeToolOutput(rawOutput, tool).text;
|
|
102
|
+
// `read` is a deliberate, contiguous file slice: give it the larger read budget
|
|
103
|
+
// and truncate head-only (head/tail splitting mangles code). Other tools keep the
|
|
104
|
+
// generic noise-control cap + both-ends truncation.
|
|
105
|
+
const isReadResult = tool === "read";
|
|
106
|
+
const outputBudget = isReadResult ? READ_OUTPUT_MAX : TOOL_OUTPUT_MAX;
|
|
107
|
+
let body = truncateToolOutput(visible, outputBudget, isReadResult);
|
|
108
|
+
if (rawOutput.length > outputBudget) {
|
|
109
|
+
const artifact = await spillToolResult(tool, rawOutput, cwd).catch(() => null);
|
|
110
|
+
if (artifact) {
|
|
111
|
+
body += `\n[full output (${rawOutput.length} chars) saved to ${artifact} — read it for the truncated remainder]`;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
return body;
|
|
115
|
+
}
|
package/src/agent/tools.ts
CHANGED
|
@@ -3,6 +3,7 @@ import * as fs from "node:fs/promises";
|
|
|
3
3
|
import * as path from "node:path";
|
|
4
4
|
import { readWorkflowState, readWorkflowStateStrict, type WorkflowState } from "./state";
|
|
5
5
|
import { jeoEnv } from "../util/env";
|
|
6
|
+
import { READ_OUTPUT_MAX } from "./tool-output";
|
|
6
7
|
|
|
7
8
|
/** Read the deep-interview lock; on corrupt state fail CLOSED (treat as active lock). */
|
|
8
9
|
async function readMutationLock(cwd: string): Promise<WorkflowState | null> {
|
|
@@ -291,10 +292,27 @@ export async function readTool(
|
|
|
291
292
|
return { success: true, output: out.join("\n") };
|
|
292
293
|
}
|
|
293
294
|
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
295
|
+
// Default (no lineRange): fill the model-visible read budget with WHOLE lines
|
|
296
|
+
// instead of a fixed 500-line cap that left half the 32k budget unused and forced
|
|
297
|
+
// needless pagination (the read tool's biggest "reads too little per call" pain).
|
|
298
|
+
// READ_OUTPUT_MAX is the real cap; a hard line ceiling (JEO_READ_MAX_LINES) guards
|
|
299
|
+
// pathological files, and a small reserve keeps the pagination notice inside the
|
|
300
|
+
// budget so it is never trimmed by the downstream head-only truncation.
|
|
301
|
+
const HARD_LINE_CEILING = Math.max(500, Number(jeoEnv("READ_MAX_LINES") ?? "") || 5000);
|
|
302
|
+
const charBudget = Math.max(1_000, READ_OUTPUT_MAX - 256);
|
|
303
|
+
const shownLines: string[] = [];
|
|
304
|
+
let usedChars = 0;
|
|
305
|
+
for (let i = 0; i < lines.length && shownLines.length < HARD_LINE_CEILING; i++) {
|
|
306
|
+
const annotatedLine = `${i + 1}${lineAnchor(lines[i]!)}|${lines[i]}`;
|
|
307
|
+
const cost = annotatedLine.length + 1; // + newline
|
|
308
|
+
if (shownLines.length > 0 && usedChars + cost > charBudget) break; // always emit ≥1 line
|
|
309
|
+
shownLines.push(annotatedLine);
|
|
310
|
+
usedChars += cost;
|
|
311
|
+
}
|
|
312
|
+
const annotated = shownLines.join("\n");
|
|
313
|
+
if (shownLines.length < lines.length) {
|
|
314
|
+
const shown = shownLines.length;
|
|
315
|
+
const notice = `\n…(showing lines 1-${shown} of ${lines.length}; pass lineRange "${shown + 1}-" to read the rest)`;
|
|
298
316
|
return { success: true, output: annotated + notice };
|
|
299
317
|
}
|
|
300
318
|
return { success: true, output: annotated };
|
|
@@ -574,7 +592,8 @@ export async function bashTool(
|
|
|
574
592
|
cwd: string = process.cwd(),
|
|
575
593
|
timeoutMs: number = 120_000,
|
|
576
594
|
subdir?: string,
|
|
577
|
-
env?: Record<string, string
|
|
595
|
+
env?: Record<string, string>,
|
|
596
|
+
onProgress?: (partialOutput: string) => void,
|
|
578
597
|
): Promise<ToolResult> {
|
|
579
598
|
if (jeoEnv("BASH_FIXUPS") === "1") {
|
|
580
599
|
const fx = applyBashFixups(command);
|
|
@@ -608,12 +627,27 @@ export async function bashTool(
|
|
|
608
627
|
killTimer = setTimeout(() => { try { proc.kill(9); } catch {} }, 3_000);
|
|
609
628
|
}, TIMEOUT_MS);
|
|
610
629
|
|
|
630
|
+
// Stream stdout incrementally when a progress sink is attached (drives the live
|
|
631
|
+
// DIMMED bash output view); read stderr fully in parallel. Without a sink, fall
|
|
632
|
+
// back to a single post-exit read (identical content, no streaming overhead).
|
|
633
|
+
const stderrPromise = new Response(proc.stderr).text();
|
|
634
|
+
let stdout = "";
|
|
635
|
+
if (onProgress) {
|
|
636
|
+
const decoder = new TextDecoder();
|
|
637
|
+
let lastEmit = 0;
|
|
638
|
+
for await (const chunk of proc.stdout as unknown as AsyncIterable<Uint8Array>) {
|
|
639
|
+
stdout += decoder.decode(chunk, { stream: true });
|
|
640
|
+
const now = Date.now();
|
|
641
|
+
if (now - lastEmit >= 80) { lastEmit = now; onProgress(stdout); }
|
|
642
|
+
}
|
|
643
|
+
stdout += decoder.decode();
|
|
644
|
+
onProgress(stdout);
|
|
645
|
+
}
|
|
611
646
|
await proc.exited;
|
|
612
647
|
clearTimeout(timer);
|
|
613
648
|
if (killTimer) clearTimeout(killTimer);
|
|
614
|
-
|
|
615
|
-
const
|
|
616
|
-
const stderr = await new Response(proc.stderr).text();
|
|
649
|
+
if (!onProgress) stdout = await new Response(proc.stdout).text();
|
|
650
|
+
const stderr = await stderrPromise;
|
|
617
651
|
|
|
618
652
|
let output = [stdout, stderr].filter(Boolean).join("\n");
|
|
619
653
|
const MAX_OUTPUT = 100_000;
|
package/src/ai/model-manager.ts
CHANGED
|
@@ -2,11 +2,7 @@ import { providerRegistry } from "./provider-registry";
|
|
|
2
2
|
import { OAUTH_FLOW_REGISTRY } from "../auth/flows";
|
|
3
3
|
import { readGlobalConfig } from "../agent/state";
|
|
4
4
|
import { resolveCredential, type AuthProvider, type Credential } from "../auth";
|
|
5
|
-
import
|
|
6
|
-
import { openaiAdapter } from "./providers/openai";
|
|
7
|
-
import { geminiAdapter } from "./providers/gemini";
|
|
8
|
-
import { ollamaAdapter } from "./providers/ollama";
|
|
9
|
-
import { antigravityAdapter } from "./providers/antigravity";
|
|
5
|
+
import "./register-providers"; // side-effect: registers built-in adapters into providerRegistry
|
|
10
6
|
import type { CallOptions, Message, ProviderAdapter, ProviderName } from "./types";
|
|
11
7
|
import { expandAlias, resolveModelId, effectiveAliasesFor } from "./model-registry";
|
|
12
8
|
import { findCatalogEntry, type ModelCatalogEntry } from "./model-catalog-compat";
|
|
@@ -16,12 +12,6 @@ import { jeoEnv } from "../util/env";
|
|
|
16
12
|
import type { Config } from "../agent/state";
|
|
17
13
|
|
|
18
14
|
|
|
19
|
-
// Initialize Provider Registry
|
|
20
|
-
providerRegistry.register("anthropic", anthropicAdapter);
|
|
21
|
-
providerRegistry.register("openai", openaiAdapter);
|
|
22
|
-
providerRegistry.register("gemini", geminiAdapter);
|
|
23
|
-
providerRegistry.register("antigravity", antigravityAdapter);
|
|
24
|
-
providerRegistry.register("ollama", ollamaAdapter);
|
|
25
15
|
|
|
26
16
|
|
|
27
17
|
export function resolveProvider(model: string): ProviderName {
|
|
@@ -96,9 +86,13 @@ export function thinkingToReasoningEffort(
|
|
|
96
86
|
return "medium";
|
|
97
87
|
}
|
|
98
88
|
|
|
99
|
-
/** Describe a model id: alias expansion + the provider it routes to. For `/model` + diagnostics.
|
|
100
|
-
|
|
101
|
-
|
|
89
|
+
/** Describe a model id: alias expansion + the provider it routes to. For `/model` + diagnostics.
|
|
90
|
+
* Pass an already-read `config` to skip a redundant readGlobalConfig() on the turn hot path. */
|
|
91
|
+
export async function describeModel(
|
|
92
|
+
input: string,
|
|
93
|
+
config?: { modelAliases?: Record<string, string> },
|
|
94
|
+
): Promise<{ input: string; resolved: string; provider: ProviderName }> {
|
|
95
|
+
const resolved = await resolveModelId(input, config);
|
|
102
96
|
return { input, resolved, provider: resolveProvider(resolved) };
|
|
103
97
|
}
|
|
104
98
|
|
|
@@ -311,6 +305,7 @@ async function resolveCall(options: Partial<CallOptions>, kind: "request" | "str
|
|
|
311
305
|
onUsage: options.onUsage,
|
|
312
306
|
signal: options.signal,
|
|
313
307
|
reasoningEffort: options.reasoningEffort ?? thinkingToReasoningEffort(config.thinkingLevel),
|
|
308
|
+
onReasoning: options.onReasoning,
|
|
314
309
|
};
|
|
315
310
|
// Caller-supplied retry sink rides on the config-derived retry budget so the
|
|
316
311
|
// engine/TUI can surface "rate limited — retrying in Ns" instead of a silent wait.
|
package/src/ai/model-registry.ts
CHANGED
|
@@ -25,9 +25,14 @@ export function expandAlias(input: string, aliases: ModelAliases = BUILTIN_ALIAS
|
|
|
25
25
|
}
|
|
26
26
|
|
|
27
27
|
// Async: merge BUILTIN_ALIASES with config.modelAliases (config wins) and expand.
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
28
|
+
// Pass an already-read `config` to skip the readGlobalConfig() round-trip (turn
|
|
29
|
+
// hot path: avoids re-reading the config file mid-turn for model resolution).
|
|
30
|
+
export async function resolveModelId(
|
|
31
|
+
input: string,
|
|
32
|
+
config?: { modelAliases?: ModelAliases },
|
|
33
|
+
): Promise<string> {
|
|
34
|
+
const cfg = config ?? (await readGlobalConfig());
|
|
35
|
+
const modelAliases = (cfg as any).modelAliases ?? {};
|
|
31
36
|
const merged: ModelAliases = { ...BUILTIN_ALIASES, ...modelAliases };
|
|
32
37
|
return expandAlias(input, merged);
|
|
33
38
|
}
|
|
@@ -160,13 +160,18 @@ export function antigravityRequest(messages: Message[], options: CallOptions, cr
|
|
|
160
160
|
type CcaUsage = { promptTokenCount?: number; candidatesTokenCount?: number; thoughtsTokenCount?: number };
|
|
161
161
|
interface CcaChunk {
|
|
162
162
|
response?: {
|
|
163
|
-
candidates?: { content?: { parts?: { text?: string }[] }; finishReason?: string }[];
|
|
163
|
+
candidates?: { content?: { parts?: { text?: string; thought?: boolean }[] }; finishReason?: string }[];
|
|
164
164
|
usageMetadata?: CcaUsage;
|
|
165
165
|
};
|
|
166
166
|
}
|
|
167
167
|
|
|
168
168
|
function textOf(chunk: CcaChunk): string {
|
|
169
|
-
return chunk.response?.candidates?.[0]?.content?.parts?.map(p => p.text ?? "").join("") ?? "";
|
|
169
|
+
return chunk.response?.candidates?.[0]?.content?.parts?.filter(p => !p.thought).map(p => p.text ?? "").join("") ?? "";
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/** Native thinking text (`thought` parts) — kept separate so it never pollutes the JSON tool call. */
|
|
173
|
+
function thoughtOf(chunk: CcaChunk): string {
|
|
174
|
+
return chunk.response?.candidates?.[0]?.content?.parts?.filter(p => p.thought).map(p => p.text ?? "").join("") ?? "";
|
|
170
175
|
}
|
|
171
176
|
|
|
172
177
|
async function fetchAntigravity(messages: Message[], options: CallOptions, credential: Credential): Promise<Response> {
|
|
@@ -194,6 +199,8 @@ export const antigravityAdapter: ProviderAdapter = {
|
|
|
194
199
|
for await (const data of readSse(response.body)) {
|
|
195
200
|
let chunk: CcaChunk;
|
|
196
201
|
try { chunk = JSON.parse(data); } catch { continue; }
|
|
202
|
+
const thought = thoughtOf(chunk);
|
|
203
|
+
if (thought) options.onReasoning?.(thought);
|
|
197
204
|
out += textOf(chunk);
|
|
198
205
|
if (chunk.response?.usageMetadata) usage = chunk.response.usageMetadata;
|
|
199
206
|
}
|
|
@@ -209,6 +216,8 @@ export const antigravityAdapter: ProviderAdapter = {
|
|
|
209
216
|
for await (const data of readSse(response.body)) {
|
|
210
217
|
let chunk: CcaChunk;
|
|
211
218
|
try { chunk = JSON.parse(data); } catch { continue; }
|
|
219
|
+
const thought = thoughtOf(chunk);
|
|
220
|
+
if (thought) options.onReasoning?.(thought);
|
|
212
221
|
const delta = textOf(chunk);
|
|
213
222
|
if (delta) { yielded = true; yield delta; }
|
|
214
223
|
if (chunk.response?.usageMetadata) usage = chunk.response.usageMetadata;
|
|
@@ -119,7 +119,7 @@ export function geminiCliRequest(messages: Message[], options: CallOptions, acce
|
|
|
119
119
|
}
|
|
120
120
|
|
|
121
121
|
interface GeminiChunk {
|
|
122
|
-
candidates?: { content?: { parts?: { text?: string }[] }; finishReason?: string }[];
|
|
122
|
+
candidates?: { content?: { parts?: { text?: string; thought?: boolean }[] }; finishReason?: string }[];
|
|
123
123
|
promptFeedback?: { blockReason?: string };
|
|
124
124
|
usageMetadata?: { promptTokenCount?: number; candidatesTokenCount?: number; thoughtsTokenCount?: number };
|
|
125
125
|
}
|
|
@@ -130,7 +130,13 @@ interface CcaChunk {
|
|
|
130
130
|
}
|
|
131
131
|
|
|
132
132
|
function textOf(chunk: GeminiChunk): string {
|
|
133
|
-
return chunk.candidates?.[0]?.content?.parts?.map(p => p.text ?? "").join("") ?? "";
|
|
133
|
+
return chunk.candidates?.[0]?.content?.parts?.filter(p => !p.thought).map(p => p.text ?? "").join("") ?? "";
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/** Native thinking text (`thought` parts), present only when the model emits thought
|
|
137
|
+
* summaries. Kept SEPARATE from textOf so thoughts never pollute the JSON tool call. */
|
|
138
|
+
function thoughtOf(chunk: GeminiChunk): string {
|
|
139
|
+
return chunk.candidates?.[0]?.content?.parts?.filter(p => p.thought).map(p => p.text ?? "").join("") ?? "";
|
|
134
140
|
}
|
|
135
141
|
|
|
136
142
|
/** When Gemini returns HTTP 200 with no text, surface the real cause (safety block /
|
|
@@ -176,6 +182,8 @@ async function* ccaTurn(messages: Message[], options: CallOptions, credential: C
|
|
|
176
182
|
}
|
|
177
183
|
const inner = chunk.response;
|
|
178
184
|
if (!inner) continue;
|
|
185
|
+
const thought = thoughtOf(inner);
|
|
186
|
+
if (thought) options.onReasoning?.(thought);
|
|
179
187
|
const delta = textOf(inner);
|
|
180
188
|
if (delta) {
|
|
181
189
|
yieldedAny = true;
|
|
@@ -239,6 +247,8 @@ export const geminiAdapter: ProviderAdapter = {
|
|
|
239
247
|
} catch {
|
|
240
248
|
continue;
|
|
241
249
|
}
|
|
250
|
+
const thought = thoughtOf(chunk);
|
|
251
|
+
if (thought) options.onReasoning?.(thought);
|
|
242
252
|
const delta = textOf(chunk);
|
|
243
253
|
if (delta) {
|
|
244
254
|
yieldedAny = true;
|