jeo-code 0.4.6 → 0.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.ja.md +1 -1
- package/README.ko.md +1 -1
- package/README.md +1 -1
- package/README.zh.md +1 -1
- package/package.json +1 -1
- package/src/agent/dev/evolution-bridge.ts +36 -3
- package/src/agent/dev/self-analysis.ts +6 -1
- package/src/agent/engine.ts +21 -71
- package/src/agent/loop.ts +2 -0
- package/src/agent/subagent-registry.ts +131 -0
- package/src/agent/subagent-tool.ts +89 -0
- package/src/agent/subagents.ts +22 -3
- package/src/agent/task-tool.ts +119 -27
- package/src/agent/tool-output.ts +115 -0
- package/src/agent/tools.ts +42 -8
- package/src/ai/model-manager.ts +2 -11
- package/src/ai/providers/antigravity.ts +11 -2
- package/src/ai/providers/gemini.ts +12 -2
- package/src/ai/register-providers.ts +21 -0
- package/src/ai/types.ts +4 -0
- package/src/cli/runner.ts +0 -9
- package/src/commands/launch.ts +47 -9
- package/src/commands/team.ts +13 -6
- package/src/skills/catalog.ts +0 -2
- package/src/tui/app.ts +97 -11
- package/src/tui/components/forge.ts +18 -1
- package/src/tui/components/markdown-text.ts +10 -1
- package/src/tui/components/themes.ts +46 -0
- package/src/tui/components/todo-card.ts +44 -13
- package/src/util/update-check.ts +53 -0
- package/src/commands/gjc.ts +0 -52
- package/src/prompts/skills/gjc/AGENTS.md +0 -31
- package/src/prompts/skills/gjc/SKILL.md +0 -15
package/src/agent/task-tool.ts
CHANGED
|
@@ -26,6 +26,7 @@ import {
|
|
|
26
26
|
validateSubagentDoneReason,
|
|
27
27
|
} from "./subagents";
|
|
28
28
|
import { thinkingMaxTokens } from "../ai/model-manager";
|
|
29
|
+
import type { SubagentRegistry } from "./subagent-registry";
|
|
29
30
|
|
|
30
31
|
/** Lifecycle event emitted while a delegated subagent runs. */
|
|
31
32
|
export interface TaskSubEvent {
|
|
@@ -41,6 +42,12 @@ export interface TaskSubEvent {
|
|
|
41
42
|
summary?: string;
|
|
42
43
|
/** Model selected for this subagent run. */
|
|
43
44
|
model?: string;
|
|
45
|
+
/** 1-based task position within a fan-out batch (omitted for single-task runs). */
|
|
46
|
+
index?: number;
|
|
47
|
+
/** Total tasks in the fan-out batch (omitted for single-task runs). */
|
|
48
|
+
total?: number;
|
|
49
|
+
/** Provider token usage for the finished subagent (done events only). */
|
|
50
|
+
tokens?: { input: number; output: number };
|
|
44
51
|
}
|
|
45
52
|
|
|
46
53
|
export interface TaskToolOptions {
|
|
@@ -51,18 +58,46 @@ export interface TaskToolOptions {
|
|
|
51
58
|
signal?: AbortSignal;
|
|
52
59
|
/** Optional live sink (e.g. plain-stream rendering of nested progress). */
|
|
53
60
|
onEvent?: (ev: TaskSubEvent) => void;
|
|
54
|
-
/** Mid-turn steering drain (gjc parity)
|
|
55
|
-
*
|
|
56
|
-
*
|
|
57
|
-
*
|
|
58
|
-
*
|
|
59
|
-
* subagent); pending steering stays for the parent after the batch returns. */
|
|
61
|
+
/** Mid-turn steering drain (gjc parity): an additional user query typed while a
|
|
62
|
+
* subagent works is forwarded live. Single-task runs and the SERIAL executor
|
|
63
|
+
* batch (concurrency 1) forward to the one active subagent. A parallel read-only
|
|
64
|
+
* batch routes through a broadcast hub (createSteerHub) so every running worker
|
|
65
|
+
* sees each message exactly once. Unconsumed messages stay for the parent. */
|
|
60
66
|
steer?: () => string[];
|
|
67
|
+
/** When present, a `task` call with `detached: true` registers a background run
|
|
68
|
+
* here and returns immediately; the parent controls it via the `subagent` tool. */
|
|
69
|
+
registry?: SubagentRegistry;
|
|
61
70
|
}
|
|
62
71
|
|
|
63
72
|
/** Max concurrent read-only subagents in a fan-out batch. */
|
|
64
73
|
const MAX_FANOUT = 4;
|
|
65
74
|
|
|
75
|
+
/** Hard cap on a SERIAL (mutating executor) fan-out batch: it runs one task at a
|
|
76
|
+
* time inside one blocking tool call, so an unbounded queue would monopolize the
|
|
77
|
+
* parent turn. Split larger efforts into sequential task calls. */
|
|
78
|
+
const MAX_SERIAL_EXECUTOR = 6;
|
|
79
|
+
|
|
80
|
+
/** Broadcast steering hub for a fan-out batch. Each concurrent worker registers
|
|
81
|
+
* ONCE and then sees every parent steer message exactly once (append-only log +
|
|
82
|
+
* per-worker cursor), so a mid-batch redirect reaches all running subagents
|
|
83
|
+
* without the double-consume hazard of several workers draining one inbox. */
|
|
84
|
+
function createSteerHub(drain?: () => string[]) {
|
|
85
|
+
const log: string[] = [];
|
|
86
|
+
return {
|
|
87
|
+
worker(): (() => string[]) | undefined {
|
|
88
|
+
if (!drain) return undefined;
|
|
89
|
+
let cursor = 0;
|
|
90
|
+
return () => {
|
|
91
|
+
const fresh = drain();
|
|
92
|
+
if (fresh.length) log.push(...fresh);
|
|
93
|
+
const out = log.slice(cursor);
|
|
94
|
+
cursor = log.length;
|
|
95
|
+
return out;
|
|
96
|
+
};
|
|
97
|
+
},
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
|
|
66
101
|
/** One-line protocol description appended to the launch system prompt. Pass a
|
|
67
102
|
* config so CONFIG-DECLARED custom roles are advertised to the model too. */
|
|
68
103
|
export function taskToolProtocolLine(config?: Pick<Config, "subagents">): string {
|
|
@@ -141,14 +176,26 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
|
|
|
141
176
|
taskText: string,
|
|
142
177
|
context: string,
|
|
143
178
|
cwd: string,
|
|
144
|
-
|
|
179
|
+
extra: {
|
|
180
|
+
steer?: () => string[];
|
|
181
|
+
slot?: { index: number; total: number };
|
|
182
|
+
projectContext?: Awaited<ReturnType<typeof loadProjectContext>>;
|
|
183
|
+
/** Overrides opts.signal — a detached run uses its own registry signal so it
|
|
184
|
+
* is cancellable independently of the parent turn. */
|
|
185
|
+
signal?: AbortSignal;
|
|
186
|
+
} = {},
|
|
145
187
|
): Promise<ToolResult> => {
|
|
188
|
+
const { steer, slot, projectContext: preloadedContext, signal: signalOverride } = extra;
|
|
189
|
+
// Tag every live event with its fan-out slot so a parent monitor can tell
|
|
190
|
+
// task 1 from task 3 when several same-role subagents stream concurrently.
|
|
191
|
+
const emit = (ev: TaskSubEvent) =>
|
|
192
|
+
opts.onEvent?.(slot ? { ...ev, index: slot.index, total: slot.total } : ev);
|
|
146
193
|
const model = resolveSubagentModel(role.id, opts.config);
|
|
147
194
|
const maxSteps = resolveSubagentMaxSteps(role.id, opts.config);
|
|
148
195
|
// gjc parity: a role may pin its own reasoning budget; absent = inherit the
|
|
149
196
|
// session/global thinking level (the "(inherit)" row in the picker).
|
|
150
197
|
const thinking = resolveSubagentThinking(role.id, opts.config) ?? opts.config.thinkingLevel;
|
|
151
|
-
const projectContext = await loadProjectContext(cwd);
|
|
198
|
+
const projectContext = preloadedContext ?? await loadProjectContext(cwd);
|
|
152
199
|
const history: Message[] = [
|
|
153
200
|
{ role: "system", content: withProjectContext(subagentSystemPrompt(role), projectContext) },
|
|
154
201
|
{ role: "user", content: `${taskText}${context}` },
|
|
@@ -157,10 +204,13 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
|
|
|
157
204
|
let lastTarget = "";
|
|
158
205
|
let currentStep = 0;
|
|
159
206
|
// Round-8 (architect ref 7-Round7Workflow): count the subagent's SUCCESSFUL
|
|
160
|
-
//
|
|
161
|
-
//
|
|
162
|
-
|
|
163
|
-
|
|
207
|
+
// calls so the parent can audit a "Changed Files:" claim against observed
|
|
208
|
+
// reality. File-writing tools (write/edit/mkdir/delete) are tracked apart from
|
|
209
|
+
// bash: read-only bash (e.g. `bun test`) MUST NOT count as edit evidence, but
|
|
210
|
+
// bash CAN mutate, so the audit message distinguishes the two cases.
|
|
211
|
+
let fileMutations = 0;
|
|
212
|
+
let bashRuns = 0;
|
|
213
|
+
emit({ role: role.id, kind: "start", detail: taskText, maxSteps, model });
|
|
164
214
|
const result = await runAgentLoop(history, {
|
|
165
215
|
cwd,
|
|
166
216
|
model,
|
|
@@ -169,7 +219,7 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
|
|
|
169
219
|
// Bounded delegation: a subagent's step contract stays exact — the parent
|
|
170
220
|
// owns any retry/extension decision, so the gjc retry flow is disabled here.
|
|
171
221
|
budget: { maxExtensions: 0 },
|
|
172
|
-
signal: opts.signal,
|
|
222
|
+
signal: signalOverride ?? opts.signal,
|
|
173
223
|
steer,
|
|
174
224
|
tools: subagentToolset(role),
|
|
175
225
|
events: {
|
|
@@ -178,38 +228,45 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
|
|
|
178
228
|
if (invocation && invocation.tool && invocation.tool !== "done") {
|
|
179
229
|
lastTarget = toolTarget(invocation.tool, invocation.arguments);
|
|
180
230
|
trace.push(` step ${currentStep}/${maxSteps}: ${lastTarget}`);
|
|
181
|
-
|
|
231
|
+
emit({ role: role.id, kind: "step", detail: lastTarget, step: currentStep, maxSteps, model });
|
|
182
232
|
}
|
|
183
233
|
},
|
|
184
234
|
onToolResult: (tool, success, output) => {
|
|
185
|
-
if (success
|
|
235
|
+
if (success) {
|
|
236
|
+
if (tool === "write" || tool === "edit" || tool === "mkdir" || tool === "delete") fileMutations++;
|
|
237
|
+
else if (tool === "bash") bashRuns++;
|
|
238
|
+
}
|
|
186
239
|
const label = lastTarget || tool;
|
|
187
240
|
const summary = firstUsefulLine(output);
|
|
188
241
|
const suffix = summary ? ` — ${summary}` : "";
|
|
189
242
|
trace.push(` ${success ? "✓" : "✗"} ${label}${suffix}`);
|
|
190
|
-
|
|
243
|
+
emit({ role: role.id, kind: "tool", detail: label, success, summary, step: currentStep, maxSteps, model });
|
|
191
244
|
lastTarget = "";
|
|
192
245
|
},
|
|
193
246
|
// Retry notices (rate-limit backoff etc.) surface as live "step" beats so the
|
|
194
247
|
// parent's monitor shows WHY a subagent is pausing instead of going silent.
|
|
195
|
-
onNotice: msg =>
|
|
248
|
+
onNotice: msg => emit({ role: role.id, kind: "step", detail: msg, step: currentStep, maxSteps, model }),
|
|
196
249
|
// Mid-turn steering reached this subagent: surface it as a live beat so the
|
|
197
250
|
// parent's monitor shows the redirect instead of an unexplained behavior change.
|
|
198
|
-
onSteer: text =>
|
|
251
|
+
onSteer: text => emit({ role: role.id, kind: "step", detail: `↳ steer: ${text}`, step: currentStep, maxSteps, model }),
|
|
199
252
|
},
|
|
200
253
|
});
|
|
201
254
|
const reason = result.doneReason?.trim() || `(subagent reached the ${result.steps}-step limit without signaling done)`;
|
|
202
255
|
const validation = validateSubagentDoneReason(role, reason);
|
|
203
256
|
const complete = result.done && validation.ok;
|
|
204
257
|
const detail = validation.ok ? reason : `${reason}\n\n[contract incomplete: missing ${validation.missing?.join(", ")}]`;
|
|
205
|
-
|
|
206
|
-
const
|
|
258
|
+
emit({ role: role.id, kind: "done", detail, success: complete, step: result.steps, maxSteps, model, tokens: result.usage ? { input: result.usage.inputTokens, output: result.usage.outputTokens } : undefined });
|
|
259
|
+
const tokNote = result.usage ? `, ${result.usage.inputTokens + result.usage.outputTokens} tok` : "";
|
|
260
|
+
const header = `[${role.title} subagent] ${complete ? "completed" : "stopped"} in ${result.steps} step(s) on ${model}${tokNote}.`;
|
|
207
261
|
const body = trace.length ? `\nSteps:\n${trace.join("\n")}` : "";
|
|
208
|
-
// Parent-side audit: a mutating role that "completed" without
|
|
209
|
-
// write/edit/
|
|
210
|
-
//
|
|
211
|
-
|
|
212
|
-
|
|
262
|
+
// Parent-side audit: a mutating role that "completed" without a successful file
|
|
263
|
+
// mutation (write/edit/mkdir/delete) likely changed nothing — flag the claim.
|
|
264
|
+
// bash is tracked separately: it CAN mutate, so an only-bash run downgrades to
|
|
265
|
+
// "verify independently" instead of the stronger UNVERIFIED.
|
|
266
|
+
const audit = complete && !role.readOnly && fileMutations === 0
|
|
267
|
+
? bashRuns === 0
|
|
268
|
+
? `\n[parent audit] No successful write/edit/bash was observed in this run — treat any "Changed Files:" claims above as UNVERIFIED.`
|
|
269
|
+
: `\n[parent audit] No successful write/edit was observed (only bash ran); bash may or may not have mutated files — verify any "Changed Files:" claims above independently.`
|
|
213
270
|
: "";
|
|
214
271
|
return { success: complete, output: `${header}${body}\n\nResult:\n${fenceSubagentReport(detail)}${audit}` };
|
|
215
272
|
};
|
|
@@ -237,6 +294,18 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
|
|
|
237
294
|
if (items.length === 0) {
|
|
238
295
|
return { success: false, output: "", error: "task fan-out requires a non-empty 'tasks' array of assignments." };
|
|
239
296
|
}
|
|
297
|
+
// #5: the mutating executor fan-out is SERIAL (concurrency 1) and blocks the
|
|
298
|
+
// turn; cap it regardless of justification so a huge queue can't monopolize
|
|
299
|
+
// the parent. Split larger efforts into sequential task calls.
|
|
300
|
+
if (!role.readOnly && items.length > MAX_SERIAL_EXECUTOR) {
|
|
301
|
+
return {
|
|
302
|
+
success: false,
|
|
303
|
+
output: "",
|
|
304
|
+
error:
|
|
305
|
+
`Executor fan-out of ${items.length} exceeds the serial cap of ${MAX_SERIAL_EXECUTOR}. ` +
|
|
306
|
+
`The mutating executor runs one task at a time and blocks the turn — split into ≤${MAX_SERIAL_EXECUTOR}-task batches or sequential task calls.`,
|
|
307
|
+
};
|
|
308
|
+
}
|
|
240
309
|
// Spawn-gate lite (plan/gjc-inheritance.md B9, gjc spawn-gate 계승): a batch
|
|
241
310
|
// wider than MAX_FANOUT is refused BEFORE any subagent launches unless the
|
|
242
311
|
// model justifies the parallelism — silent capping hid the cost decision.
|
|
@@ -257,13 +326,22 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
|
|
|
257
326
|
// Read-only roles fan out concurrently (bounded). The mutating executor is serialized
|
|
258
327
|
// (concurrency 1) so parallel subagents can't race on the same files.
|
|
259
328
|
const limit = role.readOnly ? Math.min(items.length, MAX_FANOUT) : 1;
|
|
329
|
+
// Load project context ONCE per batch instead of re-scanning AGENTS.md for
|
|
330
|
+
// every fan-out task (redundant IO + duplicated tokens).
|
|
331
|
+
const batchContext = await loadProjectContext(cwd);
|
|
260
332
|
const results: ToolResult[] = new Array(items.length);
|
|
261
333
|
let next = 0;
|
|
334
|
+
// #7: broadcast steering hub — each concurrent worker sees every parent
|
|
335
|
+
// steer message exactly once (safe even for parallel read-only fan-out).
|
|
336
|
+
const steerHub = createSteerHub(opts.steer);
|
|
262
337
|
const worker = async () => {
|
|
338
|
+
// One steer cursor per concurrent worker (not per item) so a worker that
|
|
339
|
+
// processes several items sees each parent message once across them all.
|
|
340
|
+
const workerSteer = steerHub.worker();
|
|
263
341
|
while (true) {
|
|
264
342
|
const i = next++;
|
|
265
343
|
if (i >= items.length) return;
|
|
266
|
-
results[i] = await runOne(role, items[i]!.task, items[i]!.context, cwd);
|
|
344
|
+
results[i] = await runOne(role, items[i]!.task, items[i]!.context, cwd, { slot: { index: i + 1, total: items.length }, projectContext: batchContext, steer: workerSteer });
|
|
267
345
|
}
|
|
268
346
|
};
|
|
269
347
|
await Promise.all(Array.from({ length: limit }, () => worker()));
|
|
@@ -279,6 +357,20 @@ export function createTaskTool(opts: TaskToolOptions): ToolHandler {
|
|
|
279
357
|
if (!taskText) {
|
|
280
358
|
return { success: false, output: "", error: `task tool requires a non-empty 'task' (or a 'tasks' array). Valid roles: ${subagentRoleIds(opts.config).join(", ")}.` };
|
|
281
359
|
}
|
|
282
|
-
|
|
360
|
+
// Detached form (#9): register a background run and return immediately so the
|
|
361
|
+
// parent can keep working, then list/inspect/await/cancel via the `subagent`
|
|
362
|
+
// tool. Steering is not forwarded to a detached run (no single active drainer).
|
|
363
|
+
if (args.detached === true && opts.registry) {
|
|
364
|
+
const rec = opts.registry.launch(role.id, taskText, signal =>
|
|
365
|
+
runOne(role, taskText, ctx(args.context), cwd, { signal }),
|
|
366
|
+
);
|
|
367
|
+
return {
|
|
368
|
+
success: true,
|
|
369
|
+
output:
|
|
370
|
+
`[detached] launched ${role.title} subagent '${rec.id}'. It runs in the background — ` +
|
|
371
|
+
`keep working, then use the 'subagent' tool ({action:"await"|"list"|"inspect"|"cancel", ids?}) to collect its result.`,
|
|
372
|
+
};
|
|
373
|
+
}
|
|
374
|
+
return runOne(role, taskText, ctx(args.context), cwd, { steer: opts.steer });
|
|
283
375
|
};
|
|
284
376
|
}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool-result output handling — the model-visible output budget, both-ends
|
|
3
|
+
* truncation, recoverable artifact spilling, and the minimize→truncate→spill
|
|
4
|
+
* orchestration the agent loop applies to every tool result.
|
|
5
|
+
*
|
|
6
|
+
* Extracted from `engine.ts` (single-responsibility: the loop drives steps; this
|
|
7
|
+
* module owns how a tool's raw output is shaped before it re-enters context).
|
|
8
|
+
* `engine.ts` re-exports the public surface for backward compatibility.
|
|
9
|
+
*/
|
|
10
|
+
import * as fs from "node:fs/promises";
|
|
11
|
+
import * as path from "node:path";
|
|
12
|
+
import { jeoEnv } from "../util/env";
|
|
13
|
+
import { minimizeToolOutput } from "./output-minimizer";
|
|
14
|
+
|
|
15
|
+
/** Env-tunable output budget (plan/gjc-inheritance.md B10, gjc settings-driven
|
|
16
|
+
* output handling 계승): JEO_TOOL_OUTPUT_MAX caps the model-visible tool result;
|
|
17
|
+
* the spill threshold tracks it so anything truncated stays artifact-recoverable. */
|
|
18
|
+
function envOutputMax(): number {
|
|
19
|
+
const raw = Number(jeoEnv("TOOL_OUTPUT_MAX") ?? "");
|
|
20
|
+
return Number.isFinite(raw) && raw >= 500 && raw <= 200_000 ? Math.trunc(raw) : 4_000;
|
|
21
|
+
}
|
|
22
|
+
export const TOOL_OUTPUT_MAX = envOutputMax();
|
|
23
|
+
|
|
24
|
+
/** Read results are deliberate, contiguous file slices the model explicitly asked
|
|
25
|
+
* for (via lineRange), already line-capped by the read tool and recoverable via
|
|
26
|
+
* spill. They get a much larger model-visible budget than the generic
|
|
27
|
+
* noise-control cap, so a 500-line read is not silently re-shrunk to ~100 lines.
|
|
28
|
+
* JEO_READ_OUTPUT_MAX overrides (1k..200k). */
|
|
29
|
+
function envReadOutputMax(): number {
|
|
30
|
+
const raw = Number(jeoEnv("READ_OUTPUT_MAX") ?? "");
|
|
31
|
+
return Number.isFinite(raw) && raw >= 1_000 && raw <= 200_000 ? Math.trunc(raw) : 32_000;
|
|
32
|
+
}
|
|
33
|
+
export const READ_OUTPUT_MAX = envReadOutputMax();
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Cap a tool result fed back to the model. Default mode keeps both ends: the head
|
|
37
|
+
* holds the start (e.g. a command's invocation) and the tail holds what's usually
|
|
38
|
+
* decisive (test summaries, the final error). A pure head-cut loses that.
|
|
39
|
+
*
|
|
40
|
+
* `headOnly` truncates from the front only — for `read` results, which are a
|
|
41
|
+
* contiguous file slice the model explicitly requested; head/tail splitting would
|
|
42
|
+
* mangle the code into two non-adjacent fragments.
|
|
43
|
+
*/
|
|
44
|
+
export function truncateToolOutput(s: string, max = TOOL_OUTPUT_MAX, headOnly = false): string {
|
|
45
|
+
if (s.length <= max) return s;
|
|
46
|
+
if (headOnly) {
|
|
47
|
+
return `${s.slice(0, max)}\n…(${s.length - max} chars truncated; narrow the lineRange or read the spilled artifact)…`;
|
|
48
|
+
}
|
|
49
|
+
const head = Math.floor(max * 0.6);
|
|
50
|
+
const tail = max - head;
|
|
51
|
+
return `${s.slice(0, head)}\n…(${s.length - max} chars truncated)…\n${s.slice(s.length - tail)}`;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** Non-read tool output larger than this is spilled to a recoverable artifact file.
|
|
55
|
+
* Aligned with `truncateToolOutput`'s generic cap so that whenever the model-visible
|
|
56
|
+
* result drops content, the full output is recoverable via the artifact. (`read`
|
|
57
|
+
* spills against the larger READ_OUTPUT_MAX in the result loop.) */
|
|
58
|
+
export const TOOL_SPILL_THRESHOLD = TOOL_OUTPUT_MAX;
|
|
59
|
+
|
|
60
|
+
/** Most recent tool-result artifacts to keep; older ones are pruned on each spill. */
|
|
61
|
+
export const MAX_TOOL_ARTIFACTS = 50;
|
|
62
|
+
|
|
63
|
+
/** Best-effort retention: keep the newest `MAX_TOOL_ARTIFACTS` files in `dir`, delete the rest. */
|
|
64
|
+
async function pruneToolArtifacts(dir: string): Promise<void> {
|
|
65
|
+
const files = await fs.readdir(dir).catch(() => [] as string[]);
|
|
66
|
+
if (files.length <= MAX_TOOL_ARTIFACTS) return;
|
|
67
|
+
const stamped = await Promise.all(
|
|
68
|
+
files.map(async f => ({ f, m: (await fs.stat(path.join(dir, f)).catch(() => null))?.mtimeMs ?? 0 })),
|
|
69
|
+
);
|
|
70
|
+
stamped.sort((a, b) => b.m - a.m); // newest first
|
|
71
|
+
for (const { f } of stamped.slice(MAX_TOOL_ARTIFACTS)) {
|
|
72
|
+
await fs.rm(path.join(dir, f), { force: true }).catch(() => {});
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Write an oversized tool result verbatim under `.jeo/artifacts/tool-results/` and
|
|
78
|
+
* return the workspace-relative path (for the model to `read`). Best-effort: throws
|
|
79
|
+
* are caught by the caller, which simply omits the artifact note.
|
|
80
|
+
*/
|
|
81
|
+
export async function spillToolResult(tool: string, output: string, cwd: string): Promise<string> {
|
|
82
|
+
const dir = path.join(cwd, ".jeo", "artifacts", "tool-results");
|
|
83
|
+
await fs.mkdir(dir, { recursive: true });
|
|
84
|
+
const safeTool = tool.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 32) || "tool";
|
|
85
|
+
const stamp = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
86
|
+
const rel = path.join(".jeo", "artifacts", "tool-results", `${stamp}-${safeTool}.txt`);
|
|
87
|
+
await fs.writeFile(path.join(cwd, rel), output, "utf-8");
|
|
88
|
+
// Retention so a long session can't grow the artifact dir without bound.
|
|
89
|
+
await pruneToolArtifacts(dir);
|
|
90
|
+
return rel;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Shape one tool's raw output into the model-visible result body: strip runner
|
|
95
|
+
* noise (minimize), cap to the per-tool budget (`read` gets the larger read budget
|
|
96
|
+
* and a head-only cut), and spill the full output to a recoverable artifact when it
|
|
97
|
+
* exceeds the budget. Behavior-identical to the inline logic it replaces in
|
|
98
|
+
* `runAgentLoop`.
|
|
99
|
+
*/
|
|
100
|
+
export async function formatToolResultBody(tool: string, rawOutput: string, cwd: string): Promise<string> {
|
|
101
|
+
const visible = minimizeToolOutput(rawOutput, tool).text;
|
|
102
|
+
// `read` is a deliberate, contiguous file slice: give it the larger read budget
|
|
103
|
+
// and truncate head-only (head/tail splitting mangles code). Other tools keep the
|
|
104
|
+
// generic noise-control cap + both-ends truncation.
|
|
105
|
+
const isReadResult = tool === "read";
|
|
106
|
+
const outputBudget = isReadResult ? READ_OUTPUT_MAX : TOOL_OUTPUT_MAX;
|
|
107
|
+
let body = truncateToolOutput(visible, outputBudget, isReadResult);
|
|
108
|
+
if (rawOutput.length > outputBudget) {
|
|
109
|
+
const artifact = await spillToolResult(tool, rawOutput, cwd).catch(() => null);
|
|
110
|
+
if (artifact) {
|
|
111
|
+
body += `\n[full output (${rawOutput.length} chars) saved to ${artifact} — read it for the truncated remainder]`;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
return body;
|
|
115
|
+
}
|
package/src/agent/tools.ts
CHANGED
|
@@ -3,6 +3,7 @@ import * as fs from "node:fs/promises";
|
|
|
3
3
|
import * as path from "node:path";
|
|
4
4
|
import { readWorkflowState, readWorkflowStateStrict, type WorkflowState } from "./state";
|
|
5
5
|
import { jeoEnv } from "../util/env";
|
|
6
|
+
import { READ_OUTPUT_MAX } from "./tool-output";
|
|
6
7
|
|
|
7
8
|
/** Read the deep-interview lock; on corrupt state fail CLOSED (treat as active lock). */
|
|
8
9
|
async function readMutationLock(cwd: string): Promise<WorkflowState | null> {
|
|
@@ -291,10 +292,27 @@ export async function readTool(
|
|
|
291
292
|
return { success: true, output: out.join("\n") };
|
|
292
293
|
}
|
|
293
294
|
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
295
|
+
// Default (no lineRange): fill the model-visible read budget with WHOLE lines
|
|
296
|
+
// instead of a fixed 500-line cap that left half the 32k budget unused and forced
|
|
297
|
+
// needless pagination (the read tool's biggest "reads too little per call" pain).
|
|
298
|
+
// READ_OUTPUT_MAX is the real cap; a hard line ceiling (JEO_READ_MAX_LINES) guards
|
|
299
|
+
// pathological files, and a small reserve keeps the pagination notice inside the
|
|
300
|
+
// budget so it is never trimmed by the downstream head-only truncation.
|
|
301
|
+
const HARD_LINE_CEILING = Math.max(500, Number(jeoEnv("READ_MAX_LINES") ?? "") || 5000);
|
|
302
|
+
const charBudget = Math.max(1_000, READ_OUTPUT_MAX - 256);
|
|
303
|
+
const shownLines: string[] = [];
|
|
304
|
+
let usedChars = 0;
|
|
305
|
+
for (let i = 0; i < lines.length && shownLines.length < HARD_LINE_CEILING; i++) {
|
|
306
|
+
const annotatedLine = `${i + 1}${lineAnchor(lines[i]!)}|${lines[i]}`;
|
|
307
|
+
const cost = annotatedLine.length + 1; // + newline
|
|
308
|
+
if (shownLines.length > 0 && usedChars + cost > charBudget) break; // always emit ≥1 line
|
|
309
|
+
shownLines.push(annotatedLine);
|
|
310
|
+
usedChars += cost;
|
|
311
|
+
}
|
|
312
|
+
const annotated = shownLines.join("\n");
|
|
313
|
+
if (shownLines.length < lines.length) {
|
|
314
|
+
const shown = shownLines.length;
|
|
315
|
+
const notice = `\n…(showing lines 1-${shown} of ${lines.length}; pass lineRange "${shown + 1}-" to read the rest)`;
|
|
298
316
|
return { success: true, output: annotated + notice };
|
|
299
317
|
}
|
|
300
318
|
return { success: true, output: annotated };
|
|
@@ -574,7 +592,8 @@ export async function bashTool(
|
|
|
574
592
|
cwd: string = process.cwd(),
|
|
575
593
|
timeoutMs: number = 120_000,
|
|
576
594
|
subdir?: string,
|
|
577
|
-
env?: Record<string, string
|
|
595
|
+
env?: Record<string, string>,
|
|
596
|
+
onProgress?: (partialOutput: string) => void,
|
|
578
597
|
): Promise<ToolResult> {
|
|
579
598
|
if (jeoEnv("BASH_FIXUPS") === "1") {
|
|
580
599
|
const fx = applyBashFixups(command);
|
|
@@ -608,12 +627,27 @@ export async function bashTool(
|
|
|
608
627
|
killTimer = setTimeout(() => { try { proc.kill(9); } catch {} }, 3_000);
|
|
609
628
|
}, TIMEOUT_MS);
|
|
610
629
|
|
|
630
|
+
// Stream stdout incrementally when a progress sink is attached (drives the live
|
|
631
|
+
// DIMMED bash output view); read stderr fully in parallel. Without a sink, fall
|
|
632
|
+
// back to a single post-exit read (identical content, no streaming overhead).
|
|
633
|
+
const stderrPromise = new Response(proc.stderr).text();
|
|
634
|
+
let stdout = "";
|
|
635
|
+
if (onProgress) {
|
|
636
|
+
const decoder = new TextDecoder();
|
|
637
|
+
let lastEmit = 0;
|
|
638
|
+
for await (const chunk of proc.stdout as unknown as AsyncIterable<Uint8Array>) {
|
|
639
|
+
stdout += decoder.decode(chunk, { stream: true });
|
|
640
|
+
const now = Date.now();
|
|
641
|
+
if (now - lastEmit >= 80) { lastEmit = now; onProgress(stdout); }
|
|
642
|
+
}
|
|
643
|
+
stdout += decoder.decode();
|
|
644
|
+
onProgress(stdout);
|
|
645
|
+
}
|
|
611
646
|
await proc.exited;
|
|
612
647
|
clearTimeout(timer);
|
|
613
648
|
if (killTimer) clearTimeout(killTimer);
|
|
614
|
-
|
|
615
|
-
const
|
|
616
|
-
const stderr = await new Response(proc.stderr).text();
|
|
649
|
+
if (!onProgress) stdout = await new Response(proc.stdout).text();
|
|
650
|
+
const stderr = await stderrPromise;
|
|
617
651
|
|
|
618
652
|
let output = [stdout, stderr].filter(Boolean).join("\n");
|
|
619
653
|
const MAX_OUTPUT = 100_000;
|
package/src/ai/model-manager.ts
CHANGED
|
@@ -2,11 +2,7 @@ import { providerRegistry } from "./provider-registry";
|
|
|
2
2
|
import { OAUTH_FLOW_REGISTRY } from "../auth/flows";
|
|
3
3
|
import { readGlobalConfig } from "../agent/state";
|
|
4
4
|
import { resolveCredential, type AuthProvider, type Credential } from "../auth";
|
|
5
|
-
import
|
|
6
|
-
import { openaiAdapter } from "./providers/openai";
|
|
7
|
-
import { geminiAdapter } from "./providers/gemini";
|
|
8
|
-
import { ollamaAdapter } from "./providers/ollama";
|
|
9
|
-
import { antigravityAdapter } from "./providers/antigravity";
|
|
5
|
+
import "./register-providers"; // side-effect: registers built-in adapters into providerRegistry
|
|
10
6
|
import type { CallOptions, Message, ProviderAdapter, ProviderName } from "./types";
|
|
11
7
|
import { expandAlias, resolveModelId, effectiveAliasesFor } from "./model-registry";
|
|
12
8
|
import { findCatalogEntry, type ModelCatalogEntry } from "./model-catalog-compat";
|
|
@@ -16,12 +12,6 @@ import { jeoEnv } from "../util/env";
|
|
|
16
12
|
import type { Config } from "../agent/state";
|
|
17
13
|
|
|
18
14
|
|
|
19
|
-
// Initialize Provider Registry
|
|
20
|
-
providerRegistry.register("anthropic", anthropicAdapter);
|
|
21
|
-
providerRegistry.register("openai", openaiAdapter);
|
|
22
|
-
providerRegistry.register("gemini", geminiAdapter);
|
|
23
|
-
providerRegistry.register("antigravity", antigravityAdapter);
|
|
24
|
-
providerRegistry.register("ollama", ollamaAdapter);
|
|
25
15
|
|
|
26
16
|
|
|
27
17
|
export function resolveProvider(model: string): ProviderName {
|
|
@@ -315,6 +305,7 @@ async function resolveCall(options: Partial<CallOptions>, kind: "request" | "str
|
|
|
315
305
|
onUsage: options.onUsage,
|
|
316
306
|
signal: options.signal,
|
|
317
307
|
reasoningEffort: options.reasoningEffort ?? thinkingToReasoningEffort(config.thinkingLevel),
|
|
308
|
+
onReasoning: options.onReasoning,
|
|
318
309
|
};
|
|
319
310
|
// Caller-supplied retry sink rides on the config-derived retry budget so the
|
|
320
311
|
// engine/TUI can surface "rate limited — retrying in Ns" instead of a silent wait.
|
|
@@ -160,13 +160,18 @@ export function antigravityRequest(messages: Message[], options: CallOptions, cr
|
|
|
160
160
|
type CcaUsage = { promptTokenCount?: number; candidatesTokenCount?: number; thoughtsTokenCount?: number };
|
|
161
161
|
interface CcaChunk {
|
|
162
162
|
response?: {
|
|
163
|
-
candidates?: { content?: { parts?: { text?: string }[] }; finishReason?: string }[];
|
|
163
|
+
candidates?: { content?: { parts?: { text?: string; thought?: boolean }[] }; finishReason?: string }[];
|
|
164
164
|
usageMetadata?: CcaUsage;
|
|
165
165
|
};
|
|
166
166
|
}
|
|
167
167
|
|
|
168
168
|
function textOf(chunk: CcaChunk): string {
|
|
169
|
-
return chunk.response?.candidates?.[0]?.content?.parts?.map(p => p.text ?? "").join("") ?? "";
|
|
169
|
+
return chunk.response?.candidates?.[0]?.content?.parts?.filter(p => !p.thought).map(p => p.text ?? "").join("") ?? "";
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/** Native thinking text (`thought` parts) — kept separate so it never pollutes the JSON tool call. */
|
|
173
|
+
function thoughtOf(chunk: CcaChunk): string {
|
|
174
|
+
return chunk.response?.candidates?.[0]?.content?.parts?.filter(p => p.thought).map(p => p.text ?? "").join("") ?? "";
|
|
170
175
|
}
|
|
171
176
|
|
|
172
177
|
async function fetchAntigravity(messages: Message[], options: CallOptions, credential: Credential): Promise<Response> {
|
|
@@ -194,6 +199,8 @@ export const antigravityAdapter: ProviderAdapter = {
|
|
|
194
199
|
for await (const data of readSse(response.body)) {
|
|
195
200
|
let chunk: CcaChunk;
|
|
196
201
|
try { chunk = JSON.parse(data); } catch { continue; }
|
|
202
|
+
const thought = thoughtOf(chunk);
|
|
203
|
+
if (thought) options.onReasoning?.(thought);
|
|
197
204
|
out += textOf(chunk);
|
|
198
205
|
if (chunk.response?.usageMetadata) usage = chunk.response.usageMetadata;
|
|
199
206
|
}
|
|
@@ -209,6 +216,8 @@ export const antigravityAdapter: ProviderAdapter = {
|
|
|
209
216
|
for await (const data of readSse(response.body)) {
|
|
210
217
|
let chunk: CcaChunk;
|
|
211
218
|
try { chunk = JSON.parse(data); } catch { continue; }
|
|
219
|
+
const thought = thoughtOf(chunk);
|
|
220
|
+
if (thought) options.onReasoning?.(thought);
|
|
212
221
|
const delta = textOf(chunk);
|
|
213
222
|
if (delta) { yielded = true; yield delta; }
|
|
214
223
|
if (chunk.response?.usageMetadata) usage = chunk.response.usageMetadata;
|
|
@@ -119,7 +119,7 @@ export function geminiCliRequest(messages: Message[], options: CallOptions, acce
|
|
|
119
119
|
}
|
|
120
120
|
|
|
121
121
|
interface GeminiChunk {
|
|
122
|
-
candidates?: { content?: { parts?: { text?: string }[] }; finishReason?: string }[];
|
|
122
|
+
candidates?: { content?: { parts?: { text?: string; thought?: boolean }[] }; finishReason?: string }[];
|
|
123
123
|
promptFeedback?: { blockReason?: string };
|
|
124
124
|
usageMetadata?: { promptTokenCount?: number; candidatesTokenCount?: number; thoughtsTokenCount?: number };
|
|
125
125
|
}
|
|
@@ -130,7 +130,13 @@ interface CcaChunk {
|
|
|
130
130
|
}
|
|
131
131
|
|
|
132
132
|
function textOf(chunk: GeminiChunk): string {
|
|
133
|
-
return chunk.candidates?.[0]?.content?.parts?.map(p => p.text ?? "").join("") ?? "";
|
|
133
|
+
return chunk.candidates?.[0]?.content?.parts?.filter(p => !p.thought).map(p => p.text ?? "").join("") ?? "";
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/** Native thinking text (`thought` parts), present only when the model emits thought
|
|
137
|
+
* summaries. Kept SEPARATE from textOf so thoughts never pollute the JSON tool call. */
|
|
138
|
+
function thoughtOf(chunk: GeminiChunk): string {
|
|
139
|
+
return chunk.candidates?.[0]?.content?.parts?.filter(p => p.thought).map(p => p.text ?? "").join("") ?? "";
|
|
134
140
|
}
|
|
135
141
|
|
|
136
142
|
/** When Gemini returns HTTP 200 with no text, surface the real cause (safety block /
|
|
@@ -176,6 +182,8 @@ async function* ccaTurn(messages: Message[], options: CallOptions, credential: C
|
|
|
176
182
|
}
|
|
177
183
|
const inner = chunk.response;
|
|
178
184
|
if (!inner) continue;
|
|
185
|
+
const thought = thoughtOf(inner);
|
|
186
|
+
if (thought) options.onReasoning?.(thought);
|
|
179
187
|
const delta = textOf(inner);
|
|
180
188
|
if (delta) {
|
|
181
189
|
yieldedAny = true;
|
|
@@ -239,6 +247,8 @@ export const geminiAdapter: ProviderAdapter = {
|
|
|
239
247
|
} catch {
|
|
240
248
|
continue;
|
|
241
249
|
}
|
|
250
|
+
const thought = thoughtOf(chunk);
|
|
251
|
+
if (thought) options.onReasoning?.(thought);
|
|
242
252
|
const delta = textOf(chunk);
|
|
243
253
|
if (delta) {
|
|
244
254
|
yieldedAny = true;
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Built-in provider registration (the registry bootstrap).
|
|
3
|
+
*
|
|
4
|
+
* Importing this module for its side effect registers every bundled LLM adapter
|
|
5
|
+
* into the shared `providerRegistry`. `model-manager` then resolves adapters
|
|
6
|
+
* through the registry alone — it no longer imports, or even names, concrete
|
|
7
|
+
* providers. To add a new built-in provider, register it HERE only; nothing in
|
|
8
|
+
* `model-manager` changes.
|
|
9
|
+
*/
|
|
10
|
+
import { providerRegistry } from "./provider-registry";
|
|
11
|
+
import { anthropicAdapter } from "./providers/anthropic";
|
|
12
|
+
import { openaiAdapter } from "./providers/openai";
|
|
13
|
+
import { geminiAdapter } from "./providers/gemini";
|
|
14
|
+
import { ollamaAdapter } from "./providers/ollama";
|
|
15
|
+
import { antigravityAdapter } from "./providers/antigravity";
|
|
16
|
+
|
|
17
|
+
providerRegistry.register("anthropic", anthropicAdapter);
|
|
18
|
+
providerRegistry.register("openai", openaiAdapter);
|
|
19
|
+
providerRegistry.register("gemini", geminiAdapter);
|
|
20
|
+
providerRegistry.register("antigravity", antigravityAdapter);
|
|
21
|
+
providerRegistry.register("ollama", ollamaAdapter);
|
package/src/ai/types.ts
CHANGED
|
@@ -43,6 +43,10 @@ export interface CallOptions {
|
|
|
43
43
|
/** Notified before each auto-retry backoff wait (rate limits / transient errors).
|
|
44
44
|
* NOT forwarded to provider adapters — consumed by the manager's retry layer. */
|
|
45
45
|
onRetry?: (attempt: number, err: unknown, delayMs: number) => void;
|
|
46
|
+
/** Streaming sink for native model reasoning/thinking text deltas (separate from the
|
|
47
|
+
* answer text). Surfaced as a transient dimmed view; absent for models that emit no
|
|
48
|
+
* thought text. */
|
|
49
|
+
onReasoning?: (delta: string) => void;
|
|
46
50
|
}
|
|
47
51
|
|
|
48
52
|
export interface ProviderAdapter {
|
package/src/cli/runner.ts
CHANGED
|
@@ -172,15 +172,6 @@ export const COMMANDS: readonly CommandSpec[] = [
|
|
|
172
172
|
return args => m.runUpdateCommand(args);
|
|
173
173
|
},
|
|
174
174
|
},
|
|
175
|
-
{
|
|
176
|
-
name: "gjc",
|
|
177
|
-
summary: "Run the gjc workflow skill as an autonomous build loop (plan → implement → verify).",
|
|
178
|
-
usage: "gjc \"<intent>\"",
|
|
179
|
-
loader: async () => {
|
|
180
|
-
const m = await import("../commands/gjc");
|
|
181
|
-
return args => m.runGjcCommand(args);
|
|
182
|
-
},
|
|
183
|
-
},
|
|
184
175
|
{
|
|
185
176
|
name: "ooo-seed",
|
|
186
177
|
summary: "Generate an immutable ooo seed from a specification (spec-first automation).",
|