jeo-code 0.4.6 → 0.4.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.ja.md +2 -2
- package/README.ko.md +2 -2
- package/README.md +2 -2
- package/README.zh.md +2 -2
- package/package.json +1 -1
- package/src/agent/dev/evolution-bridge.ts +36 -3
- package/src/agent/dev/self-analysis.ts +6 -1
- package/src/agent/engine.ts +21 -71
- package/src/agent/loop.ts +2 -0
- package/src/agent/subagent-registry.ts +131 -0
- package/src/agent/subagent-tool.ts +89 -0
- package/src/agent/subagents.ts +22 -3
- package/src/agent/task-tool.ts +119 -27
- package/src/agent/tool-output.ts +115 -0
- package/src/agent/tools.ts +42 -8
- package/src/ai/model-manager.ts +2 -11
- package/src/ai/providers/antigravity.ts +11 -2
- package/src/ai/providers/gemini.ts +12 -2
- package/src/ai/register-providers.ts +21 -0
- package/src/ai/types.ts +4 -0
- package/src/cli/runner.ts +0 -9
- package/src/commands/launch.ts +47 -9
- package/src/commands/team.ts +13 -6
- package/src/skills/catalog.ts +0 -2
- package/src/tui/app.ts +120 -14
- package/src/tui/components/forge.ts +18 -1
- package/src/tui/components/markdown-text.ts +10 -1
- package/src/tui/components/themes.ts +46 -0
- package/src/tui/components/todo-card.ts +44 -13
- package/src/tui/components/width.ts +51 -0
- package/src/tui/renderer.ts +38 -12
- package/src/util/update-check.ts +53 -0
- package/src/commands/gjc.ts +0 -52
- package/src/prompts/skills/gjc/AGENTS.md +0 -31
- package/src/prompts/skills/gjc/SKILL.md +0 -15
package/README.ja.md
CHANGED
|
@@ -150,11 +150,11 @@ CI は `.github/workflows/npm-publish.yml` で公開します — GitHub リリ
|
|
|
150
150
|
## 変更履歴 (Changelog)
|
|
151
151
|
|
|
152
152
|
<!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
|
|
153
|
+
- **[0.4.8]** (2026-06-14) — Live-frame stability: constant-height live turn, renderer self-heal off-by-one fix, and frame-safe child-stdout sanitizing — no more duplicate model bar or torn escapes.
|
|
154
|
+
- **[0.4.7]** (2026-06-14) — Detached subagents + `subagent` control tool, live shaded in-flight output, registry-driven providers, fuller `read` budget, styled italics in the final report, and `gjc` retired.
|
|
153
155
|
- **[0.4.6]** (2026-06-14) — Width-correct forge cards for CJK/emoji, red borders on failed tool cards, aligned `ooo ralph` monitor HUD, and a per-theme user-card palette.
|
|
154
156
|
- **[0.4.5]** (2026-06-14) — First-class filesystem make/remove tools.
|
|
155
157
|
- **[0.4.4]** (2026-06-13) — Live subagent status mirroring, always-useful Ctrl+O activity tail, read lineRange crash guard.
|
|
156
|
-
- **[0.4.3]** (2026-06-13) — Readability pass for autopilot, subagent activity, and worked-history review.
|
|
157
|
-
- **[0.4.2]** (2026-06-13) — Thinking-loop termination guarantees (cycle guard + turn wall-clock budget), unboxed live status without step counters, self-contained `.jeo` namespace, live next-prompt input card, role-targeted model/thinking picker.
|
|
158
158
|
|
|
159
159
|
See [CHANGELOG.md](CHANGELOG.md) for the full history.
|
|
160
160
|
<!-- CHANGELOG:END -->
|
package/README.ko.md
CHANGED
|
@@ -150,11 +150,11 @@ CI는 `.github/workflows/npm-publish.yml`로 배포합니다 — GitHub 릴리
|
|
|
150
150
|
## 변경 이력 (Changelog)
|
|
151
151
|
|
|
152
152
|
<!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
|
|
153
|
+
- **[0.4.8]** (2026-06-14) — Live-frame stability: constant-height live turn, renderer self-heal off-by-one fix, and frame-safe child-stdout sanitizing — no more duplicate model bar or torn escapes.
|
|
154
|
+
- **[0.4.7]** (2026-06-14) — Detached subagents + `subagent` control tool, live shaded in-flight output, registry-driven providers, fuller `read` budget, styled italics in the final report, and `gjc` retired.
|
|
153
155
|
- **[0.4.6]** (2026-06-14) — Width-correct forge cards for CJK/emoji, red borders on failed tool cards, aligned `ooo ralph` monitor HUD, and a per-theme user-card palette.
|
|
154
156
|
- **[0.4.5]** (2026-06-14) — First-class filesystem make/remove tools.
|
|
155
157
|
- **[0.4.4]** (2026-06-13) — Live subagent status mirroring, always-useful Ctrl+O activity tail, read lineRange crash guard.
|
|
156
|
-
- **[0.4.3]** (2026-06-13) — Readability pass for autopilot, subagent activity, and worked-history review.
|
|
157
|
-
- **[0.4.2]** (2026-06-13) — Thinking-loop termination guarantees (cycle guard + turn wall-clock budget), unboxed live status without step counters, self-contained `.jeo` namespace, live next-prompt input card, role-targeted model/thinking picker.
|
|
158
158
|
|
|
159
159
|
See [CHANGELOG.md](CHANGELOG.md) for the full history.
|
|
160
160
|
<!-- CHANGELOG:END -->
|
package/README.md
CHANGED
|
@@ -150,11 +150,11 @@ Required npm token permissions (repository secret `NPM_TOKEN`):
|
|
|
150
150
|
## Changelog
|
|
151
151
|
|
|
152
152
|
<!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
|
|
153
|
+
- **[0.4.8]** (2026-06-14) — Live-frame stability: constant-height live turn, renderer self-heal off-by-one fix, and frame-safe child-stdout sanitizing — no more duplicate model bar or torn escapes.
|
|
154
|
+
- **[0.4.7]** (2026-06-14) — Detached subagents + `subagent` control tool, live shaded in-flight output, registry-driven providers, fuller `read` budget, styled italics in the final report, and `gjc` retired.
|
|
153
155
|
- **[0.4.6]** (2026-06-14) — Width-correct forge cards for CJK/emoji, red borders on failed tool cards, aligned `ooo ralph` monitor HUD, and a per-theme user-card palette.
|
|
154
156
|
- **[0.4.5]** (2026-06-14) — First-class filesystem make/remove tools.
|
|
155
157
|
- **[0.4.4]** (2026-06-13) — Live subagent status mirroring, always-useful Ctrl+O activity tail, read lineRange crash guard.
|
|
156
|
-
- **[0.4.3]** (2026-06-13) — Readability pass for autopilot, subagent activity, and worked-history review.
|
|
157
|
-
- **[0.4.2]** (2026-06-13) — Thinking-loop termination guarantees (cycle guard + turn wall-clock budget), unboxed live status without step counters, self-contained `.jeo` namespace, live next-prompt input card, role-targeted model/thinking picker.
|
|
158
158
|
|
|
159
159
|
See [CHANGELOG.md](CHANGELOG.md) for the full history.
|
|
160
160
|
<!-- CHANGELOG:END -->
|
package/README.zh.md
CHANGED
|
@@ -150,11 +150,11 @@ CI 通过 `.github/workflows/npm-publish.yml` 发布 — GitHub 发布 release
|
|
|
150
150
|
## 更新日志 (Changelog)
|
|
151
151
|
|
|
152
152
|
<!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
|
|
153
|
+
- **[0.4.8]** (2026-06-14) — Live-frame stability: constant-height live turn, renderer self-heal off-by-one fix, and frame-safe child-stdout sanitizing — no more duplicate model bar or torn escapes.
|
|
154
|
+
- **[0.4.7]** (2026-06-14) — Detached subagents + `subagent` control tool, live shaded in-flight output, registry-driven providers, fuller `read` budget, styled italics in the final report, and `gjc` retired.
|
|
153
155
|
- **[0.4.6]** (2026-06-14) — Width-correct forge cards for CJK/emoji, red borders on failed tool cards, aligned `ooo ralph` monitor HUD, and a per-theme user-card palette.
|
|
154
156
|
- **[0.4.5]** (2026-06-14) — First-class filesystem make/remove tools.
|
|
155
157
|
- **[0.4.4]** (2026-06-13) — Live subagent status mirroring, always-useful Ctrl+O activity tail, read lineRange crash guard.
|
|
156
|
-
- **[0.4.3]** (2026-06-13) — Readability pass for autopilot, subagent activity, and worked-history review.
|
|
157
|
-
- **[0.4.2]** (2026-06-13) — Thinking-loop termination guarantees (cycle guard + turn wall-clock budget), unboxed live status without step counters, self-contained `.jeo` namespace, live next-prompt input card, role-targeted model/thinking picker.
|
|
158
158
|
|
|
159
159
|
See [CHANGELOG.md](CHANGELOG.md) for the full history.
|
|
160
160
|
<!-- CHANGELOG:END -->
|
package/package.json
CHANGED
|
@@ -1,7 +1,40 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { runAgentLoop, executorSystemPrompt, DEFAULT_TOOLS } from "../engine";
|
|
2
|
+
import { readGlobalConfig } from "../state";
|
|
3
|
+
import { runPostImplementationHooks } from "../hooks";
|
|
2
4
|
import { runAdvancedAnalysis } from "./advanced-analyzer";
|
|
3
5
|
import { logEvolution } from "./evolution-logger";
|
|
4
6
|
|
|
7
|
+
async function runEvolutionLoop(intent: string, cwd: string): Promise<void> {
|
|
8
|
+
const config = await readGlobalConfig();
|
|
9
|
+
const model = config.defaultModel || "fast";
|
|
10
|
+
const systemPrompt = executorSystemPrompt();
|
|
11
|
+
|
|
12
|
+
await runAgentLoop([{ role: "user", content: intent }], {
|
|
13
|
+
cwd,
|
|
14
|
+
systemPrompt,
|
|
15
|
+
model,
|
|
16
|
+
tools: DEFAULT_TOOLS,
|
|
17
|
+
maxSteps: 50,
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
console.log("\n[jeo] Verifying implementation...");
|
|
21
|
+
const verify = await runPostImplementationHooks(cwd, intent);
|
|
22
|
+
|
|
23
|
+
if (!verify.success) {
|
|
24
|
+
console.error("\n[jeo] Verification FAILED. Auto-repairing...");
|
|
25
|
+
const repairTask = `Previous implementation failed verification.\nErrors:\n${verify.output}\n\nPlease fix.`;
|
|
26
|
+
await runAgentLoop([{ role: "user", content: repairTask }], {
|
|
27
|
+
cwd,
|
|
28
|
+
systemPrompt,
|
|
29
|
+
model,
|
|
30
|
+
tools: DEFAULT_TOOLS,
|
|
31
|
+
maxSteps: 30,
|
|
32
|
+
});
|
|
33
|
+
} else {
|
|
34
|
+
console.log("\n[jeo] Verification SUCCESSFUL.");
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
5
38
|
export async function consultGjcForAdvancedEvolution(cwd: string) {
|
|
6
39
|
const report = await runAdvancedAnalysis(cwd);
|
|
7
40
|
const timestamp = new Date().toISOString();
|
|
@@ -27,7 +60,7 @@ As my implementation guide (gjc), please:
|
|
|
27
60
|
`;
|
|
28
61
|
|
|
29
62
|
try {
|
|
30
|
-
await
|
|
63
|
+
await runEvolutionLoop(request, cwd);
|
|
31
64
|
|
|
32
65
|
await logEvolution({
|
|
33
66
|
timestamp: new Date().toISOString(),
|
|
@@ -62,7 +95,7 @@ export async function consultGjcForEvolution(cwd: string) {
|
|
|
62
95
|
|
|
63
96
|
console.log();
|
|
64
97
|
try {
|
|
65
|
-
await
|
|
98
|
+
await runEvolutionLoop(report, cwd);
|
|
66
99
|
await logEvolution({
|
|
67
100
|
timestamp: new Date().toISOString(),
|
|
68
101
|
target: "src/agent/engine.ts",
|
|
@@ -22,7 +22,12 @@ export async function runSelfAnalysis(cwd: string): Promise<string> {
|
|
|
22
22
|
const content = await fs.readFile(targetPath, "utf-8");
|
|
23
23
|
|
|
24
24
|
const lineCount = content.split("\n").length;
|
|
25
|
-
|
|
25
|
+
// Ownership-accurate SRP check: the loop drives steps, while output shaping
|
|
26
|
+
// (truncate/spill) lives in tool-output.ts. Flag only when those are DEFINED
|
|
27
|
+
// here again, not merely imported or re-exported for backward compatibility.
|
|
28
|
+
const definesOutputShaping =
|
|
29
|
+
/\bfunction\s+truncateToolOutput\b/.test(content) && /\bfunction\s+spillToolResult\b/.test(content);
|
|
30
|
+
const hasTooManyResponsibilities = content.includes("runAgentLoop") && definesOutputShaping;
|
|
26
31
|
|
|
27
32
|
let report = "Analysis of src/agent/engine.ts:\n";
|
|
28
33
|
report += "- File length: " + lineCount + " lines.\n";
|
package/src/agent/engine.ts
CHANGED
|
@@ -16,7 +16,8 @@ import { webSearchTool, setWebSearchActiveModel } from "./web-search";
|
|
|
16
16
|
import { friendlyProviderError, isContextOverflowError, isRefusalError } from "../util/provider-error";
|
|
17
17
|
import { isRateLimitError } from "../util/retry";
|
|
18
18
|
import { runPreToolHooks, runPostTurnHooks } from "./hooks";
|
|
19
|
-
import {
|
|
19
|
+
import { truncateToolOutput, formatToolResultBody } from "./tool-output";
|
|
20
|
+
export { TOOL_OUTPUT_MAX, READ_OUTPUT_MAX, TOOL_SPILL_THRESHOLD, MAX_TOOL_ARTIFACTS, truncateToolOutput, spillToolResult } from "./tool-output";
|
|
20
21
|
import { StepBudget, dynamicStepBudgetConfig, resolveStepBudgetConfig, hashSignature, type StepBudgetConfig } from "./step-budget";
|
|
21
22
|
import { historyTokens, trimToolResultsInPlace } from "./compaction";
|
|
22
23
|
import { jeoEnv } from "../util/env";
|
|
@@ -30,6 +31,7 @@ async function invokeCallLlm(history: Message[], options: {
|
|
|
30
31
|
onUsage?: (u: { inputTokens?: number; outputTokens?: number }) => void;
|
|
31
32
|
onRetry?: (attempt: number, err: unknown, delayMs: number) => void;
|
|
32
33
|
onToken?: (delta: string) => void;
|
|
34
|
+
onReasoning?: (delta: string) => void;
|
|
33
35
|
}): Promise<string> {
|
|
34
36
|
const mod = await import("./loop");
|
|
35
37
|
return mod.callLlm(history, options);
|
|
@@ -39,14 +41,14 @@ export interface ToolInvocation {
|
|
|
39
41
|
arguments?: Record<string, any>;
|
|
40
42
|
}
|
|
41
43
|
|
|
42
|
-
export type ToolHandler = (args: Record<string, any>, cwd: string) => Promise<ToolResult>;
|
|
44
|
+
export type ToolHandler = (args: Record<string, any>, cwd: string, onProgress?: (partialOutput: string) => void) => Promise<ToolResult>;
|
|
43
45
|
|
|
44
|
-
/** The default executor toolset (read / write / edit / bash / find / search). */
|
|
46
|
+
/** The default executor toolset (read / write / edit / bash / find / search / ls / mkdir / delete / web_search). */
|
|
45
47
|
export const DEFAULT_TOOLS: Record<string, ToolHandler> = {
|
|
46
48
|
read: (a, cwd) => readTool(a.filePath ?? a.path, a.lineRange ?? a.range, cwd, !!a.raw),
|
|
47
49
|
write: (a, cwd) => writeTool(a.filePath ?? a.path, a.content ?? "", cwd),
|
|
48
50
|
edit: (a, cwd) => editTool(a.filePath ?? a.path, a.editBlock ?? a.edit ?? "", cwd),
|
|
49
|
-
bash: (a, cwd) => bashTool(a.command ?? a.cmd, cwd, typeof a.timeoutMs === "number" ? a.timeoutMs : undefined, typeof a.cwd === "string" ? a.cwd : (typeof a.subdir === "string" ? a.subdir : undefined), a.env && typeof a.env === "object" ? a.env : undefined),
|
|
51
|
+
bash: (a, cwd, onProgress) => bashTool(a.command ?? a.cmd, cwd, typeof a.timeoutMs === "number" ? a.timeoutMs : undefined, typeof a.cwd === "string" ? a.cwd : (typeof a.subdir === "string" ? a.subdir : undefined), a.env && typeof a.env === "object" ? a.env : undefined, onProgress),
|
|
50
52
|
find: (a, cwd) => findTool(a.globPattern ?? a.pattern, cwd),
|
|
51
53
|
search: (a, cwd) => searchTool(a.pattern, a.globPattern ?? "*", cwd, !!(a.ignoreCase ?? a.i), { before: a.before, after: a.after, context: a.context, maxMatches: a.maxMatches }),
|
|
52
54
|
ls: (a, cwd) => lsTool(a.dirPath ?? a.path ?? a.dir ?? ".", cwd),
|
|
@@ -134,6 +136,10 @@ export interface AgentLoopEvents {
|
|
|
134
136
|
onStep?(step: number): void | Promise<void>;
|
|
135
137
|
onAssistant?(raw: string, invocation: ToolInvocation | null): void;
|
|
136
138
|
onToolResult?(tool: string, success: boolean, output: string): void;
|
|
139
|
+
/** Streaming partial output of the currently-running tool (e.g. bash stdout as it
|
|
140
|
+
* arrives) — drives a live DIMMED output view that the final formatted result
|
|
141
|
+
* replaces on onToolResult. Only bash emits today; other tools are unaffected. */
|
|
142
|
+
onToolProgress?(tool: string, partial: string): void;
|
|
137
143
|
/** Transient progress notice (e.g. "rate limited — retrying in Ns"); NOT a terminal error. */
|
|
138
144
|
onNotice?(message: string): void;
|
|
139
145
|
/** Cumulative token usage after each LLM call — drives live usage meters. */
|
|
@@ -141,6 +147,9 @@ export interface AgentLoopEvents {
|
|
|
141
147
|
/** Accumulated streamed model response so far — drives the live reasoning view. Only
|
|
142
148
|
* requested when a consumer sets it (the engine streams solely for the TUI). */
|
|
143
149
|
onModelStream?(textSoFar: string): void;
|
|
150
|
+
/** Accumulated native reasoning/thinking text so far — drives a transient dimmed
|
|
151
|
+
* "thinking" view. Only requested when a consumer (TUI) attaches. */
|
|
152
|
+
onReasoningStream?(textSoFar: string): void;
|
|
144
153
|
/** Step-budget change (gjc-style retry flow): the limit was extended because the
|
|
145
154
|
* turn is making progress. `limit` is the new max; `reason` is display-ready. */
|
|
146
155
|
onBudget?(limit: number, reason: string): void;
|
|
@@ -192,14 +201,6 @@ export interface AgentLoopResult {
|
|
|
192
201
|
usage?: { inputTokens: number; outputTokens: number };
|
|
193
202
|
}
|
|
194
203
|
|
|
195
|
-
/** Env-tunable output budget (plan/gjc-inheritance.md B10, gjc settings-driven
|
|
196
|
-
* output handling 계승): JEO_TOOL_OUTPUT_MAX caps the model-visible tool result;
|
|
197
|
-
* the spill threshold tracks it so anything truncated stays artifact-recoverable. */
|
|
198
|
-
function envOutputMax(): number {
|
|
199
|
-
const raw = Number(jeoEnv("TOOL_OUTPUT_MAX") ?? "");
|
|
200
|
-
return Number.isFinite(raw) && raw >= 500 && raw <= 200_000 ? Math.trunc(raw) : 4_000;
|
|
201
|
-
}
|
|
202
|
-
export const TOOL_OUTPUT_MAX = envOutputMax();
|
|
203
204
|
|
|
204
205
|
/** Wall-clock budget for ONE agent turn (ms). JEO_TURN_MAX_MS overrides; 0 disables.
|
|
205
206
|
* Default 30 minutes: long autonomous runs stay alive, while a turn that spins in
|
|
@@ -214,55 +215,6 @@ export function turnMaxMs(env: Record<string, string | undefined> = process.env)
|
|
|
214
215
|
return 30 * 60 * 1000;
|
|
215
216
|
}
|
|
216
217
|
|
|
217
|
-
/**
|
|
218
|
-
* Cap a tool result fed back to the model, keeping both ends: the head holds the
|
|
219
|
-
* start (e.g. a file's top / a command's invocation) and the tail holds what's
|
|
220
|
-
* usually decisive (test summaries, the final error). A pure head-cut loses that.
|
|
221
|
-
*/
|
|
222
|
-
export function truncateToolOutput(s: string, max = TOOL_OUTPUT_MAX): string {
|
|
223
|
-
if (s.length <= max) return s;
|
|
224
|
-
const head = Math.floor(max * 0.6);
|
|
225
|
-
const tail = max - head;
|
|
226
|
-
return `${s.slice(0, head)}\n…(${s.length - max} chars truncated)…\n${s.slice(s.length - tail)}`;
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
/** Tool output larger than this is spilled to a recoverable artifact file. Aligned
|
|
230
|
-
* with `truncateToolOutput`'s cap so that whenever the model-visible result drops
|
|
231
|
-
* content, the full output is recoverable via the artifact. */
|
|
232
|
-
export const TOOL_SPILL_THRESHOLD = TOOL_OUTPUT_MAX;
|
|
233
|
-
|
|
234
|
-
/**
|
|
235
|
-
* Write an oversized tool result verbatim under `.jeo/artifacts/tool-results/` and
|
|
236
|
-
* return the workspace-relative path (for the model to `read`). Best-effort: throws
|
|
237
|
-
* are caught by the caller, which simply omits the artifact note.
|
|
238
|
-
*/
|
|
239
|
-
/** Most recent tool-result artifacts to keep; older ones are pruned on each spill. */
|
|
240
|
-
export const MAX_TOOL_ARTIFACTS = 50;
|
|
241
|
-
|
|
242
|
-
/** Best-effort retention: keep the newest `MAX_TOOL_ARTIFACTS` files in `dir`, delete the rest. */
|
|
243
|
-
async function pruneToolArtifacts(dir: string): Promise<void> {
|
|
244
|
-
const files = await fs.readdir(dir).catch(() => [] as string[]);
|
|
245
|
-
if (files.length <= MAX_TOOL_ARTIFACTS) return;
|
|
246
|
-
const stamped = await Promise.all(
|
|
247
|
-
files.map(async f => ({ f, m: (await fs.stat(path.join(dir, f)).catch(() => null))?.mtimeMs ?? 0 })),
|
|
248
|
-
);
|
|
249
|
-
stamped.sort((a, b) => b.m - a.m); // newest first
|
|
250
|
-
for (const { f } of stamped.slice(MAX_TOOL_ARTIFACTS)) {
|
|
251
|
-
await fs.rm(path.join(dir, f), { force: true }).catch(() => {});
|
|
252
|
-
}
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
export async function spillToolResult(tool: string, output: string, cwd: string): Promise<string> {
|
|
256
|
-
const dir = path.join(cwd, ".jeo", "artifacts", "tool-results");
|
|
257
|
-
await fs.mkdir(dir, { recursive: true });
|
|
258
|
-
const safeTool = tool.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 32) || "tool";
|
|
259
|
-
const stamp = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
260
|
-
const rel = path.join(".jeo", "artifacts", "tool-results", `${stamp}-${safeTool}.txt`);
|
|
261
|
-
await fs.writeFile(path.join(cwd, rel), output, "utf-8");
|
|
262
|
-
// Retention so a long session can't grow the artifact dir without bound.
|
|
263
|
-
await pruneToolArtifacts(dir);
|
|
264
|
-
return rel;
|
|
265
|
-
}
|
|
266
218
|
|
|
267
219
|
/** Levenshtein distance (small inputs: tool/command names). */
|
|
268
220
|
function editDistance(a: string, b: string): number {
|
|
@@ -453,6 +405,10 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
|
|
|
453
405
|
const onToken = ev.onModelStream
|
|
454
406
|
? (delta: string) => { streamBuf += delta; ev.onModelStream!(streamBuf); }
|
|
455
407
|
: undefined;
|
|
408
|
+
let reasonBuf = "";
|
|
409
|
+
const onReasoning = ev.onReasoningStream
|
|
410
|
+
? (delta: string) => { reasonBuf += delta; ev.onReasoningStream!(reasonBuf); }
|
|
411
|
+
: undefined;
|
|
456
412
|
let responseText: string;
|
|
457
413
|
try {
|
|
458
414
|
responseText = await invokeCallLlm(history, {
|
|
@@ -462,6 +418,7 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
|
|
|
462
418
|
signal: opts.signal,
|
|
463
419
|
onUsage: u => { acc.inputTokens += u.inputTokens ?? 0; acc.outputTokens += u.outputTokens ?? 0; sawUsage = true; },
|
|
464
420
|
onToken,
|
|
421
|
+
onReasoning,
|
|
465
422
|
// Make provider auto-retry visible: previously a rate-limited call sat in a
|
|
466
423
|
// silent backoff wait, then surfaced "auto-retry was exhausted" with no trace
|
|
467
424
|
// of the retries that DID happen.
|
|
@@ -783,7 +740,8 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
|
|
|
783
740
|
output = preHookResult.error + (preHookResult.output ? `\n${preHookResult.output}` : "");
|
|
784
741
|
} else {
|
|
785
742
|
try {
|
|
786
|
-
const
|
|
743
|
+
const onProgress = ev.onToolProgress ? (partial: string) => ev.onToolProgress!(tool, partial) : undefined;
|
|
744
|
+
const res = await handler(args ?? {}, cwd, onProgress);
|
|
787
745
|
success = res.success;
|
|
788
746
|
output = res.success ? res.output : (res.error ? (res.output ? `${res.error}\n${res.output}` : res.error) : res.output);
|
|
789
747
|
} catch (err: any) {
|
|
@@ -875,15 +833,7 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
|
|
|
875
833
|
|
|
876
834
|
ev.onToolResult?.(call.tool, res.success, res.output);
|
|
877
835
|
|
|
878
|
-
const
|
|
879
|
-
const visible = minimized.text;
|
|
880
|
-
let resultBody = truncateToolOutput(visible);
|
|
881
|
-
if (res.output.length > TOOL_SPILL_THRESHOLD) {
|
|
882
|
-
const artifact = await spillToolResult(call.tool, res.output, cwd).catch(() => null);
|
|
883
|
-
if (artifact) {
|
|
884
|
-
resultBody += `\n[full output (${res.output.length} chars) saved to ${artifact} — read it for the elided middle]`;
|
|
885
|
-
}
|
|
886
|
-
}
|
|
836
|
+
const resultBody = await formatToolResultBody(call.tool, res.output, cwd);
|
|
887
837
|
|
|
888
838
|
const { diags: hookDiags, ran: hooksRan } = await runPostTurnHooks(
|
|
889
839
|
cwd,
|
package/src/agent/loop.ts
CHANGED
|
@@ -19,6 +19,8 @@ export interface ChatOptions {
|
|
|
19
19
|
* delivered here (concatenation equals the returned string). Absent ⇒ a single
|
|
20
20
|
* non-streaming `call()` (unchanged behavior for non-interactive/test callers). */
|
|
21
21
|
onToken?: (delta: string) => void;
|
|
22
|
+
/** Streaming sink for native reasoning/thinking deltas (drives the dimmed live view). */
|
|
23
|
+
onReasoning?: (delta: string) => void;
|
|
22
24
|
}
|
|
23
25
|
|
|
24
26
|
const manager = createModelManager();
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* In-process detached-subagent registry (gjc `subagent`/`job` parity, scoped down
|
|
3
|
+
* to one runtime). The synchronous `task` tool blocks the parent until a subagent
|
|
4
|
+
* finishes; a DETACHED launch registers the run here and returns immediately, so
|
|
5
|
+
* the parent can keep working and later list / inspect / await / cancel it via the
|
|
6
|
+
* `subagent` control tool. Concurrency is real (JS event loop): a detached run's
|
|
7
|
+
* awaits interleave with the parent's between steps.
|
|
8
|
+
*
|
|
9
|
+
* Lifecycle is bounded to the turn that created the registry — `cancelAll()` on
|
|
10
|
+
* turn teardown guarantees no background promise leaks into the next turn.
|
|
11
|
+
*/
|
|
12
|
+
import type { ToolResult } from "./tools";
|
|
13
|
+
|
|
14
|
+
export type SubagentStatus = "running" | "completed" | "failed" | "cancelled";
|
|
15
|
+
|
|
16
|
+
export interface SubagentRecord {
|
|
17
|
+
/** Stable id, e.g. "executor-1". */
|
|
18
|
+
id: string;
|
|
19
|
+
role: string;
|
|
20
|
+
/** The assignment text (trimmed for display). */
|
|
21
|
+
task: string;
|
|
22
|
+
status: SubagentStatus;
|
|
23
|
+
startedAt: number;
|
|
24
|
+
finishedAt?: number;
|
|
25
|
+
/** Whether the finished run reported success (contract satisfied). */
|
|
26
|
+
success?: boolean;
|
|
27
|
+
/** Final subagent report/output, set once the run settles. */
|
|
28
|
+
result?: string;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
interface Entry {
|
|
32
|
+
record: SubagentRecord;
|
|
33
|
+
promise: Promise<void>;
|
|
34
|
+
abort: AbortController;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/** A detached run: receives its own AbortSignal and resolves to the subagent's
|
|
38
|
+
* final ToolResult. The runner is responsible for streaming live events itself. */
|
|
39
|
+
export type DetachedRunner = (signal: AbortSignal) => Promise<ToolResult>;
|
|
40
|
+
|
|
41
|
+
export class SubagentRegistry {
|
|
42
|
+
private readonly entries = new Map<string, Entry>();
|
|
43
|
+
private readonly seq = new Map<string, number>();
|
|
44
|
+
|
|
45
|
+
/** Register and START a detached run; returns the (running) record immediately. */
|
|
46
|
+
launch(role: string, task: string, runner: DetachedRunner): SubagentRecord {
|
|
47
|
+
const n = (this.seq.get(role) ?? 0) + 1;
|
|
48
|
+
this.seq.set(role, n);
|
|
49
|
+
const id = `${role}-${n}`;
|
|
50
|
+
const abort = new AbortController();
|
|
51
|
+
const record: SubagentRecord = {
|
|
52
|
+
id,
|
|
53
|
+
role,
|
|
54
|
+
task: task.length > 200 ? task.slice(0, 197) + "…" : task,
|
|
55
|
+
status: "running",
|
|
56
|
+
startedAt: Date.now(),
|
|
57
|
+
};
|
|
58
|
+
const promise = (async () => {
|
|
59
|
+
try {
|
|
60
|
+
const res = await runner(abort.signal);
|
|
61
|
+
// A cancel that already fired wins — don't overwrite the terminal state.
|
|
62
|
+
if (record.status === "cancelled") return;
|
|
63
|
+
record.status = res.success ? "completed" : "failed";
|
|
64
|
+
record.success = res.success;
|
|
65
|
+
record.result = res.output || res.error || "";
|
|
66
|
+
} catch (err) {
|
|
67
|
+
if (record.status === "cancelled") return;
|
|
68
|
+
record.status = "failed";
|
|
69
|
+
record.result = err instanceof Error ? err.message : String(err);
|
|
70
|
+
} finally {
|
|
71
|
+
if (record.finishedAt === undefined) record.finishedAt = Date.now();
|
|
72
|
+
}
|
|
73
|
+
})();
|
|
74
|
+
this.entries.set(id, { record, promise, abort });
|
|
75
|
+
return record;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
list(): SubagentRecord[] {
|
|
79
|
+
return [...this.entries.values()].map(e => e.record);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
get(id: string): SubagentRecord | undefined {
|
|
83
|
+
return this.entries.get(id)?.record;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
running(): SubagentRecord[] {
|
|
87
|
+
return this.list().filter(r => r.status === "running");
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/** Wait for the given ids (or all running, when empty). With `timeoutMs` the wait
|
|
91
|
+
* is bounded — unfinished runs simply stay "running" in the returned snapshot. */
|
|
92
|
+
async awaitIds(ids: string[], timeoutMs?: number): Promise<SubagentRecord[]> {
|
|
93
|
+
const targets = ids
|
|
94
|
+
.map(id => this.entries.get(id))
|
|
95
|
+
.filter((e): e is Entry => e !== undefined);
|
|
96
|
+
const all = Promise.all(targets.map(e => e.promise)).then(() => {});
|
|
97
|
+
if (timeoutMs !== undefined && timeoutMs > 0) {
|
|
98
|
+
let handle: ReturnType<typeof setTimeout> | undefined;
|
|
99
|
+
const timer = new Promise<void>(resolve => {
|
|
100
|
+
handle = setTimeout(resolve, timeoutMs);
|
|
101
|
+
});
|
|
102
|
+
await Promise.race([all, timer]);
|
|
103
|
+
if (handle !== undefined) clearTimeout(handle);
|
|
104
|
+
} else {
|
|
105
|
+
await all;
|
|
106
|
+
}
|
|
107
|
+
return targets.map(e => e.record);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/** Cancel the given ids (or all running, when empty): aborts the run and marks the
|
|
111
|
+
* record cancelled. Already-terminal records are returned unchanged. */
|
|
112
|
+
cancel(ids: string[]): SubagentRecord[] {
|
|
113
|
+
const out: SubagentRecord[] = [];
|
|
114
|
+
for (const id of ids) {
|
|
115
|
+
const e = this.entries.get(id);
|
|
116
|
+
if (!e) continue;
|
|
117
|
+
if (e.record.status === "running") {
|
|
118
|
+
e.record.status = "cancelled";
|
|
119
|
+
e.record.finishedAt = Date.now();
|
|
120
|
+
e.abort.abort();
|
|
121
|
+
}
|
|
122
|
+
out.push(e.record);
|
|
123
|
+
}
|
|
124
|
+
return out;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/** Abort every still-running subagent (turn teardown / Ctrl-C). */
|
|
128
|
+
cancelAll(): SubagentRecord[] {
|
|
129
|
+
return this.cancel(this.running().map(r => r.id));
|
|
130
|
+
}
|
|
131
|
+
}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `subagent` control tool (#9) — the parent's handle on DETACHED subagents launched
|
|
3
|
+
* via `task {detached:true}`. Mirrors gjc's `subagent`/`job` control surface, scoped
|
|
4
|
+
* to an in-process registry: list, inspect, await (optionally bounded), and cancel.
|
|
5
|
+
*
|
|
6
|
+
* Out of scope here (separate subsystems, not stubbed): live peer messaging (IRC)
|
|
7
|
+
* and pause/resume — a step-budget loop has no safe mid-step checkpoint to resume
|
|
8
|
+
* from, so those are intentionally absent rather than faked.
|
|
9
|
+
*/
|
|
10
|
+
import type { ToolHandler } from "./engine";
|
|
11
|
+
import type { ToolResult } from "./tools";
|
|
12
|
+
import type { SubagentRegistry, SubagentRecord } from "./subagent-registry";
|
|
13
|
+
|
|
14
|
+
/** One-line protocol description appended to the launch system prompt. */
|
|
15
|
+
export const SUBAGENT_TOOL_PROTOCOL_LINE =
|
|
16
|
+
`subagent {action:"list"|"inspect"|"await"|"cancel", ids?, timeoutMs?} — control DETACHED ` +
|
|
17
|
+
`subagents started with task{detached:true}. 'await' blocks (optionally up to timeoutMs ms) and ` +
|
|
18
|
+
`returns their reports; 'inspect' shows status + result; 'cancel' aborts them. Omit ids to target all running.`;
|
|
19
|
+
|
|
20
|
+
function elapsed(rec: SubagentRecord): string {
|
|
21
|
+
const end = rec.finishedAt ?? Date.now();
|
|
22
|
+
return `${Math.max(0, Math.round((end - rec.startedAt) / 1000))}s`;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function rowLine(rec: SubagentRecord): string {
|
|
26
|
+
return `- ${rec.id} [${rec.status.toUpperCase()}] ${elapsed(rec)} · ${rec.task}`;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function detailBlock(rec: SubagentRecord): string {
|
|
30
|
+
const head = rowLine(rec);
|
|
31
|
+
if (rec.status === "running" || !rec.result) return head;
|
|
32
|
+
return `${head}\n${rec.result}`;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function idsOf(args: Record<string, any>): string[] {
|
|
36
|
+
if (Array.isArray(args.ids)) return args.ids.map((x: unknown) => String(x));
|
|
37
|
+
if (args.id !== undefined) return [String(args.id)];
|
|
38
|
+
return [];
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export function createSubagentTool(registry: SubagentRegistry): ToolHandler {
|
|
42
|
+
return async (args: Record<string, any>, _cwd: string): Promise<ToolResult> => {
|
|
43
|
+
const action = String(args.action ?? "list").trim().toLowerCase();
|
|
44
|
+
const ids = idsOf(args);
|
|
45
|
+
|
|
46
|
+
if (action === "list") {
|
|
47
|
+
const rows = registry.list();
|
|
48
|
+
if (rows.length === 0) {
|
|
49
|
+
return { success: true, output: "No detached subagents this turn. Launch one with task {detached:true}." };
|
|
50
|
+
}
|
|
51
|
+
const running = rows.filter(r => r.status === "running").length;
|
|
52
|
+
return { success: true, output: `${rows.length} subagent(s), ${running} running:\n${rows.map(rowLine).join("\n")}` };
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if (action === "inspect") {
|
|
56
|
+
const targets = (ids.length ? ids.map(id => registry.get(id)) : registry.list())
|
|
57
|
+
.filter((r): r is SubagentRecord => r !== undefined);
|
|
58
|
+
if (targets.length === 0) {
|
|
59
|
+
return { success: false, output: "", error: ids.length ? `No subagent matches ${ids.join(", ")}.` : "No detached subagents this turn." };
|
|
60
|
+
}
|
|
61
|
+
return { success: true, output: targets.map(detailBlock).join("\n\n") };
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
if (action === "await") {
|
|
65
|
+
const targets = ids.length ? ids : registry.running().map(r => r.id);
|
|
66
|
+
if (targets.length === 0) {
|
|
67
|
+
return { success: true, output: "No running subagents to await." };
|
|
68
|
+
}
|
|
69
|
+
const timeoutMs = typeof args.timeoutMs === "number" && args.timeoutMs > 0 ? args.timeoutMs : undefined;
|
|
70
|
+
const recs = await registry.awaitIds(targets, timeoutMs);
|
|
71
|
+
const stillRunning = recs.filter(r => r.status === "running").length;
|
|
72
|
+
const head = stillRunning > 0
|
|
73
|
+
? `Awaited ${recs.length} subagent(s); ${stillRunning} still running after the ${timeoutMs}ms timeout — await again or cancel.`
|
|
74
|
+
: `Awaited ${recs.length} subagent(s); all settled.`;
|
|
75
|
+
return { success: stillRunning === 0, output: `${head}\n\n${recs.map(detailBlock).join("\n\n")}` };
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (action === "cancel") {
|
|
79
|
+
const targets = ids.length ? ids : registry.running().map(r => r.id);
|
|
80
|
+
if (targets.length === 0) {
|
|
81
|
+
return { success: true, output: "No running subagents to cancel." };
|
|
82
|
+
}
|
|
83
|
+
const recs = registry.cancel(targets);
|
|
84
|
+
return { success: true, output: `Cancelled ${recs.length} subagent(s):\n${recs.map(rowLine).join("\n")}` };
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return { success: false, output: "", error: `Unknown subagent action '${action}'. Use list | inspect | await | cancel.` };
|
|
88
|
+
};
|
|
89
|
+
}
|
package/src/agent/subagents.ts
CHANGED
|
@@ -178,20 +178,39 @@ function renderRolePrompt(template: string, role: SubagentRole): string {
|
|
|
178
178
|
.trim();
|
|
179
179
|
}
|
|
180
180
|
|
|
181
|
+
/** True when `marker` is present in `text` AND the span between it and the next
|
|
182
|
+
* required marker (or end of text) carries non-whitespace content. A label-only
|
|
183
|
+
* section ("Summary:" with an empty body) is not a real report, so it fails. */
|
|
184
|
+
function markerHasContent(text: string, marker: string, allMarkers: string[]): boolean {
|
|
185
|
+
const start = text.indexOf(marker);
|
|
186
|
+
if (start < 0) return false;
|
|
187
|
+
const after = start + marker.length;
|
|
188
|
+
let end = text.length;
|
|
189
|
+
for (const other of allMarkers) {
|
|
190
|
+
if (other === marker) continue;
|
|
191
|
+
const j = text.indexOf(other, after);
|
|
192
|
+
if (j >= 0 && j < end) end = j;
|
|
193
|
+
}
|
|
194
|
+
return text.slice(after, end).trim().length > 0;
|
|
195
|
+
}
|
|
196
|
+
|
|
181
197
|
export function validateSubagentDoneReason(role: SubagentRole, reason: string | undefined): { ok: boolean; missing?: string[] } {
|
|
182
198
|
const trimmed = (reason ?? "").trim();
|
|
183
199
|
if (!trimmed) return { ok: false, missing: ["done.reason"] };
|
|
200
|
+
const markers = role.requiredDoneMarkers ?? [];
|
|
201
|
+
// Each required section must be PRESENT and carry non-empty content — a report of
|
|
202
|
+
// bare labels (no prose) is rejected, which the substring-presence check let pass.
|
|
203
|
+
const sectionMissing = markers.filter(m => !markerHasContent(trimmed, m, markers));
|
|
184
204
|
if (role.id === "critic") {
|
|
185
205
|
const verdicts = ["[OKAY]", "[ITERATE]", "[REJECT]"];
|
|
186
206
|
const hasVerdict = verdicts.some(marker => trimmed.startsWith(marker));
|
|
187
207
|
const missing = [
|
|
188
208
|
...(hasVerdict ? [] : ["[OKAY]|[ITERATE]|[REJECT]"]),
|
|
189
|
-
...
|
|
209
|
+
...sectionMissing,
|
|
190
210
|
];
|
|
191
211
|
return { ok: missing.length === 0, missing };
|
|
192
212
|
}
|
|
193
|
-
|
|
194
|
-
return { ok: missing.length === 0, missing };
|
|
213
|
+
return { ok: sectionMissing.length === 0, missing: sectionMissing };
|
|
195
214
|
}
|
|
196
215
|
|
|
197
216
|
/** Build a role-specific system prompt from its dedicated template. */
|