jeo-code 0.4.5 → 0.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.ja.md +2 -2
- package/README.ko.md +2 -2
- package/README.md +2 -2
- package/README.zh.md +2 -2
- package/package.json +1 -1
- package/src/agent/dev/evolution-bridge.ts +36 -3
- package/src/agent/dev/self-analysis.ts +6 -1
- package/src/agent/engine.ts +76 -71
- package/src/agent/loop.ts +2 -0
- package/src/agent/step-budget.ts +10 -0
- package/src/agent/subagent-registry.ts +131 -0
- package/src/agent/subagent-tool.ts +89 -0
- package/src/agent/subagents.ts +22 -3
- package/src/agent/task-tool.ts +123 -19
- package/src/agent/tool-output.ts +115 -0
- package/src/agent/tools.ts +42 -8
- package/src/ai/model-manager.ts +9 -14
- package/src/ai/model-registry.ts +8 -3
- package/src/ai/providers/antigravity.ts +11 -2
- package/src/ai/providers/gemini.ts +12 -2
- package/src/ai/register-providers.ts +21 -0
- package/src/ai/types.ts +4 -0
- package/src/cli/runner.ts +0 -9
- package/src/commands/launch.ts +157 -52
- package/src/commands/team.ts +13 -6
- package/src/skills/catalog.ts +0 -2
- package/src/tui/app.ts +131 -20
- package/src/tui/components/forge.ts +25 -7
- package/src/tui/components/input-box.ts +8 -3
- package/src/tui/components/markdown-text.ts +10 -1
- package/src/tui/components/themes.ts +57 -1
- package/src/tui/components/todo-card.ts +44 -13
- package/src/tui/monitoring/hud-view.ts +53 -30
- package/src/util/update-check.ts +53 -0
- package/src/commands/gjc.ts +0 -52
- package/src/prompts/skills/gjc/AGENTS.md +0 -31
- package/src/prompts/skills/gjc/SKILL.md +0 -15
package/README.ja.md
CHANGED
|
@@ -150,11 +150,11 @@ CI は `.github/workflows/npm-publish.yml` で公開します — GitHub リリ
|
|
|
150
150
|
## 変更履歴 (Changelog)
|
|
151
151
|
|
|
152
152
|
<!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
|
|
153
|
+
- **[0.4.7]** (2026-06-14) — Detached subagents + `subagent` control tool, live shaded in-flight output, registry-driven providers, fuller `read` budget, styled italics in the final report, and `gjc` retired.
|
|
154
|
+
- **[0.4.6]** (2026-06-14) — Width-correct forge cards for CJK/emoji, red borders on failed tool cards, aligned `ooo ralph` monitor HUD, and a per-theme user-card palette.
|
|
153
155
|
- **[0.4.5]** (2026-06-14) — First-class filesystem make/remove tools.
|
|
154
156
|
- **[0.4.4]** (2026-06-13) — Live subagent status mirroring, always-useful Ctrl+O activity tail, read lineRange crash guard.
|
|
155
157
|
- **[0.4.3]** (2026-06-13) — Readability pass for autopilot, subagent activity, and worked-history review.
|
|
156
|
-
- **[0.4.2]** (2026-06-13) — Thinking-loop termination guarantees (cycle guard + turn wall-clock budget), unboxed live status without step counters, self-contained `.jeo` namespace, live next-prompt input card, role-targeted model/thinking picker.
|
|
157
|
-
- **[0.4.1]** (2026-06-12) — TUI card parity polish + done-time todo reconciliation.
|
|
158
158
|
|
|
159
159
|
See [CHANGELOG.md](CHANGELOG.md) for the full history.
|
|
160
160
|
<!-- CHANGELOG:END -->
|
package/README.ko.md
CHANGED
|
@@ -150,11 +150,11 @@ CI는 `.github/workflows/npm-publish.yml`로 배포합니다 — GitHub 릴리
|
|
|
150
150
|
## 변경 이력 (Changelog)
|
|
151
151
|
|
|
152
152
|
<!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
|
|
153
|
+
- **[0.4.7]** (2026-06-14) — Detached subagents + `subagent` control tool, live shaded in-flight output, registry-driven providers, fuller `read` budget, styled italics in the final report, and `gjc` retired.
|
|
154
|
+
- **[0.4.6]** (2026-06-14) — Width-correct forge cards for CJK/emoji, red borders on failed tool cards, aligned `ooo ralph` monitor HUD, and a per-theme user-card palette.
|
|
153
155
|
- **[0.4.5]** (2026-06-14) — First-class filesystem make/remove tools.
|
|
154
156
|
- **[0.4.4]** (2026-06-13) — Live subagent status mirroring, always-useful Ctrl+O activity tail, read lineRange crash guard.
|
|
155
157
|
- **[0.4.3]** (2026-06-13) — Readability pass for autopilot, subagent activity, and worked-history review.
|
|
156
|
-
- **[0.4.2]** (2026-06-13) — Thinking-loop termination guarantees (cycle guard + turn wall-clock budget), unboxed live status without step counters, self-contained `.jeo` namespace, live next-prompt input card, role-targeted model/thinking picker.
|
|
157
|
-
- **[0.4.1]** (2026-06-12) — TUI card parity polish + done-time todo reconciliation.
|
|
158
158
|
|
|
159
159
|
See [CHANGELOG.md](CHANGELOG.md) for the full history.
|
|
160
160
|
<!-- CHANGELOG:END -->
|
package/README.md
CHANGED
|
@@ -150,11 +150,11 @@ Required npm token permissions (repository secret `NPM_TOKEN`):
|
|
|
150
150
|
## Changelog
|
|
151
151
|
|
|
152
152
|
<!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
|
|
153
|
+
- **[0.4.7]** (2026-06-14) — Detached subagents + `subagent` control tool, live shaded in-flight output, registry-driven providers, fuller `read` budget, styled italics in the final report, and `gjc` retired.
|
|
154
|
+
- **[0.4.6]** (2026-06-14) — Width-correct forge cards for CJK/emoji, red borders on failed tool cards, aligned `ooo ralph` monitor HUD, and a per-theme user-card palette.
|
|
153
155
|
- **[0.4.5]** (2026-06-14) — First-class filesystem make/remove tools.
|
|
154
156
|
- **[0.4.4]** (2026-06-13) — Live subagent status mirroring, always-useful Ctrl+O activity tail, read lineRange crash guard.
|
|
155
157
|
- **[0.4.3]** (2026-06-13) — Readability pass for autopilot, subagent activity, and worked-history review.
|
|
156
|
-
- **[0.4.2]** (2026-06-13) — Thinking-loop termination guarantees (cycle guard + turn wall-clock budget), unboxed live status without step counters, self-contained `.jeo` namespace, live next-prompt input card, role-targeted model/thinking picker.
|
|
157
|
-
- **[0.4.1]** (2026-06-12) — TUI card parity polish + done-time todo reconciliation.
|
|
158
158
|
|
|
159
159
|
See [CHANGELOG.md](CHANGELOG.md) for the full history.
|
|
160
160
|
<!-- CHANGELOG:END -->
|
package/README.zh.md
CHANGED
|
@@ -150,11 +150,11 @@ CI 通过 `.github/workflows/npm-publish.yml` 发布 — GitHub 发布 release
|
|
|
150
150
|
## 更新日志 (Changelog)
|
|
151
151
|
|
|
152
152
|
<!-- CHANGELOG:START (auto-generated from CHANGELOG.md — run `bun run changelog:sync`) -->
|
|
153
|
+
- **[0.4.7]** (2026-06-14) — Detached subagents + `subagent` control tool, live shaded in-flight output, registry-driven providers, fuller `read` budget, styled italics in the final report, and `gjc` retired.
|
|
154
|
+
- **[0.4.6]** (2026-06-14) — Width-correct forge cards for CJK/emoji, red borders on failed tool cards, aligned `ooo ralph` monitor HUD, and a per-theme user-card palette.
|
|
153
155
|
- **[0.4.5]** (2026-06-14) — First-class filesystem make/remove tools.
|
|
154
156
|
- **[0.4.4]** (2026-06-13) — Live subagent status mirroring, always-useful Ctrl+O activity tail, read lineRange crash guard.
|
|
155
157
|
- **[0.4.3]** (2026-06-13) — Readability pass for autopilot, subagent activity, and worked-history review.
|
|
156
|
-
- **[0.4.2]** (2026-06-13) — Thinking-loop termination guarantees (cycle guard + turn wall-clock budget), unboxed live status without step counters, self-contained `.jeo` namespace, live next-prompt input card, role-targeted model/thinking picker.
|
|
157
|
-
- **[0.4.1]** (2026-06-12) — TUI card parity polish + done-time todo reconciliation.
|
|
158
158
|
|
|
159
159
|
See [CHANGELOG.md](CHANGELOG.md) for the full history.
|
|
160
160
|
<!-- CHANGELOG:END -->
|
package/package.json
CHANGED
|
@@ -1,7 +1,40 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { runAgentLoop, executorSystemPrompt, DEFAULT_TOOLS } from "../engine";
|
|
2
|
+
import { readGlobalConfig } from "../state";
|
|
3
|
+
import { runPostImplementationHooks } from "../hooks";
|
|
2
4
|
import { runAdvancedAnalysis } from "./advanced-analyzer";
|
|
3
5
|
import { logEvolution } from "./evolution-logger";
|
|
4
6
|
|
|
7
|
+
async function runEvolutionLoop(intent: string, cwd: string): Promise<void> {
|
|
8
|
+
const config = await readGlobalConfig();
|
|
9
|
+
const model = config.defaultModel || "fast";
|
|
10
|
+
const systemPrompt = executorSystemPrompt();
|
|
11
|
+
|
|
12
|
+
await runAgentLoop([{ role: "user", content: intent }], {
|
|
13
|
+
cwd,
|
|
14
|
+
systemPrompt,
|
|
15
|
+
model,
|
|
16
|
+
tools: DEFAULT_TOOLS,
|
|
17
|
+
maxSteps: 50,
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
console.log("\n[jeo] Verifying implementation...");
|
|
21
|
+
const verify = await runPostImplementationHooks(cwd, intent);
|
|
22
|
+
|
|
23
|
+
if (!verify.success) {
|
|
24
|
+
console.error("\n[jeo] Verification FAILED. Auto-repairing...");
|
|
25
|
+
const repairTask = `Previous implementation failed verification.\nErrors:\n${verify.output}\n\nPlease fix.`;
|
|
26
|
+
await runAgentLoop([{ role: "user", content: repairTask }], {
|
|
27
|
+
cwd,
|
|
28
|
+
systemPrompt,
|
|
29
|
+
model,
|
|
30
|
+
tools: DEFAULT_TOOLS,
|
|
31
|
+
maxSteps: 30,
|
|
32
|
+
});
|
|
33
|
+
} else {
|
|
34
|
+
console.log("\n[jeo] Verification SUCCESSFUL.");
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
5
38
|
export async function consultGjcForAdvancedEvolution(cwd: string) {
|
|
6
39
|
const report = await runAdvancedAnalysis(cwd);
|
|
7
40
|
const timestamp = new Date().toISOString();
|
|
@@ -27,7 +60,7 @@ As my implementation guide (gjc), please:
|
|
|
27
60
|
`;
|
|
28
61
|
|
|
29
62
|
try {
|
|
30
|
-
await
|
|
63
|
+
await runEvolutionLoop(request, cwd);
|
|
31
64
|
|
|
32
65
|
await logEvolution({
|
|
33
66
|
timestamp: new Date().toISOString(),
|
|
@@ -62,7 +95,7 @@ export async function consultGjcForEvolution(cwd: string) {
|
|
|
62
95
|
|
|
63
96
|
console.log();
|
|
64
97
|
try {
|
|
65
|
-
await
|
|
98
|
+
await runEvolutionLoop(report, cwd);
|
|
66
99
|
await logEvolution({
|
|
67
100
|
timestamp: new Date().toISOString(),
|
|
68
101
|
target: "src/agent/engine.ts",
|
|
@@ -22,7 +22,12 @@ export async function runSelfAnalysis(cwd: string): Promise<string> {
|
|
|
22
22
|
const content = await fs.readFile(targetPath, "utf-8");
|
|
23
23
|
|
|
24
24
|
const lineCount = content.split("\n").length;
|
|
25
|
-
|
|
25
|
+
// Ownership-accurate SRP check: the loop drives steps, while output shaping
|
|
26
|
+
// (truncate/spill) lives in tool-output.ts. Flag only when those are DEFINED
|
|
27
|
+
// here again, not merely imported or re-exported for backward compatibility.
|
|
28
|
+
const definesOutputShaping =
|
|
29
|
+
/\bfunction\s+truncateToolOutput\b/.test(content) && /\bfunction\s+spillToolResult\b/.test(content);
|
|
30
|
+
const hasTooManyResponsibilities = content.includes("runAgentLoop") && definesOutputShaping;
|
|
26
31
|
|
|
27
32
|
let report = "Analysis of src/agent/engine.ts:\n";
|
|
28
33
|
report += "- File length: " + lineCount + " lines.\n";
|
package/src/agent/engine.ts
CHANGED
|
@@ -16,7 +16,8 @@ import { webSearchTool, setWebSearchActiveModel } from "./web-search";
|
|
|
16
16
|
import { friendlyProviderError, isContextOverflowError, isRefusalError } from "../util/provider-error";
|
|
17
17
|
import { isRateLimitError } from "../util/retry";
|
|
18
18
|
import { runPreToolHooks, runPostTurnHooks } from "./hooks";
|
|
19
|
-
import {
|
|
19
|
+
import { truncateToolOutput, formatToolResultBody } from "./tool-output";
|
|
20
|
+
export { TOOL_OUTPUT_MAX, READ_OUTPUT_MAX, TOOL_SPILL_THRESHOLD, MAX_TOOL_ARTIFACTS, truncateToolOutput, spillToolResult } from "./tool-output";
|
|
20
21
|
import { StepBudget, dynamicStepBudgetConfig, resolveStepBudgetConfig, hashSignature, type StepBudgetConfig } from "./step-budget";
|
|
21
22
|
import { historyTokens, trimToolResultsInPlace } from "./compaction";
|
|
22
23
|
import { jeoEnv } from "../util/env";
|
|
@@ -30,6 +31,7 @@ async function invokeCallLlm(history: Message[], options: {
|
|
|
30
31
|
onUsage?: (u: { inputTokens?: number; outputTokens?: number }) => void;
|
|
31
32
|
onRetry?: (attempt: number, err: unknown, delayMs: number) => void;
|
|
32
33
|
onToken?: (delta: string) => void;
|
|
34
|
+
onReasoning?: (delta: string) => void;
|
|
33
35
|
}): Promise<string> {
|
|
34
36
|
const mod = await import("./loop");
|
|
35
37
|
return mod.callLlm(history, options);
|
|
@@ -39,14 +41,14 @@ export interface ToolInvocation {
|
|
|
39
41
|
arguments?: Record<string, any>;
|
|
40
42
|
}
|
|
41
43
|
|
|
42
|
-
export type ToolHandler = (args: Record<string, any>, cwd: string) => Promise<ToolResult>;
|
|
44
|
+
export type ToolHandler = (args: Record<string, any>, cwd: string, onProgress?: (partialOutput: string) => void) => Promise<ToolResult>;
|
|
43
45
|
|
|
44
|
-
/** The default executor toolset (read / write / edit / bash / find / search). */
|
|
46
|
+
/** The default executor toolset (read / write / edit / bash / find / search / ls / mkdir / delete / web_search). */
|
|
45
47
|
export const DEFAULT_TOOLS: Record<string, ToolHandler> = {
|
|
46
48
|
read: (a, cwd) => readTool(a.filePath ?? a.path, a.lineRange ?? a.range, cwd, !!a.raw),
|
|
47
49
|
write: (a, cwd) => writeTool(a.filePath ?? a.path, a.content ?? "", cwd),
|
|
48
50
|
edit: (a, cwd) => editTool(a.filePath ?? a.path, a.editBlock ?? a.edit ?? "", cwd),
|
|
49
|
-
bash: (a, cwd) => bashTool(a.command ?? a.cmd, cwd, typeof a.timeoutMs === "number" ? a.timeoutMs : undefined, typeof a.cwd === "string" ? a.cwd : (typeof a.subdir === "string" ? a.subdir : undefined), a.env && typeof a.env === "object" ? a.env : undefined),
|
|
51
|
+
bash: (a, cwd, onProgress) => bashTool(a.command ?? a.cmd, cwd, typeof a.timeoutMs === "number" ? a.timeoutMs : undefined, typeof a.cwd === "string" ? a.cwd : (typeof a.subdir === "string" ? a.subdir : undefined), a.env && typeof a.env === "object" ? a.env : undefined, onProgress),
|
|
50
52
|
find: (a, cwd) => findTool(a.globPattern ?? a.pattern, cwd),
|
|
51
53
|
search: (a, cwd) => searchTool(a.pattern, a.globPattern ?? "*", cwd, !!(a.ignoreCase ?? a.i), { before: a.before, after: a.after, context: a.context, maxMatches: a.maxMatches }),
|
|
52
54
|
ls: (a, cwd) => lsTool(a.dirPath ?? a.path ?? a.dir ?? ".", cwd),
|
|
@@ -134,6 +136,10 @@ export interface AgentLoopEvents {
|
|
|
134
136
|
onStep?(step: number): void | Promise<void>;
|
|
135
137
|
onAssistant?(raw: string, invocation: ToolInvocation | null): void;
|
|
136
138
|
onToolResult?(tool: string, success: boolean, output: string): void;
|
|
139
|
+
/** Streaming partial output of the currently-running tool (e.g. bash stdout as it
|
|
140
|
+
* arrives) — drives a live DIMMED output view that the final formatted result
|
|
141
|
+
* replaces on onToolResult. Only bash emits today; other tools are unaffected. */
|
|
142
|
+
onToolProgress?(tool: string, partial: string): void;
|
|
137
143
|
/** Transient progress notice (e.g. "rate limited — retrying in Ns"); NOT a terminal error. */
|
|
138
144
|
onNotice?(message: string): void;
|
|
139
145
|
/** Cumulative token usage after each LLM call — drives live usage meters. */
|
|
@@ -141,6 +147,9 @@ export interface AgentLoopEvents {
|
|
|
141
147
|
/** Accumulated streamed model response so far — drives the live reasoning view. Only
|
|
142
148
|
* requested when a consumer sets it (the engine streams solely for the TUI). */
|
|
143
149
|
onModelStream?(textSoFar: string): void;
|
|
150
|
+
/** Accumulated native reasoning/thinking text so far — drives a transient dimmed
|
|
151
|
+
* "thinking" view. Only requested when a consumer (TUI) attaches. */
|
|
152
|
+
onReasoningStream?(textSoFar: string): void;
|
|
144
153
|
/** Step-budget change (gjc-style retry flow): the limit was extended because the
|
|
145
154
|
* turn is making progress. `limit` is the new max; `reason` is display-ready. */
|
|
146
155
|
onBudget?(limit: number, reason: string): void;
|
|
@@ -149,6 +158,10 @@ export interface AgentLoopEvents {
|
|
|
149
158
|
* first"); return null to let the turn finish. The engine guarantees at most
|
|
150
159
|
* one bounce per turn, so a stubborn model can never loop here. */
|
|
151
160
|
onBeforeDone?(reason: string): string | null;
|
|
161
|
+
/** Fired when a mid-turn steering message (an additional user query typed while
|
|
162
|
+
* the turn is running) is injected into the live history. `text` is the raw
|
|
163
|
+
* user line — drives a TUI notice so the user sees their input was picked up. */
|
|
164
|
+
onSteer?(text: string): void;
|
|
152
165
|
}
|
|
153
166
|
|
|
154
167
|
export interface AgentLoopOptions {
|
|
@@ -173,6 +186,11 @@ export interface AgentLoopOptions {
|
|
|
173
186
|
/** Step-budget overrides (gjc-style retry flow). `{ maxExtensions: 0 }` restores the
|
|
174
187
|
* legacy fixed counter — used by bounded subagent delegation. */
|
|
175
188
|
budget?: Partial<StepBudgetConfig>;
|
|
189
|
+
/** Mid-turn steering drain (gjc parity): called at each step boundary. Any strings
|
|
190
|
+
* returned are appended to `history` as user messages BEFORE the next model call,
|
|
191
|
+
* so an additional query typed while the turn runs steers the live turn instead of
|
|
192
|
+
* waiting for the next prompt. Return [] when nothing is pending. */
|
|
193
|
+
steer?: () => string[];
|
|
176
194
|
}
|
|
177
195
|
|
|
178
196
|
export interface AgentLoopResult {
|
|
@@ -183,14 +201,6 @@ export interface AgentLoopResult {
|
|
|
183
201
|
usage?: { inputTokens: number; outputTokens: number };
|
|
184
202
|
}
|
|
185
203
|
|
|
186
|
-
/** Env-tunable output budget (plan/gjc-inheritance.md B10, gjc settings-driven
|
|
187
|
-
* output handling 계승): JEO_TOOL_OUTPUT_MAX caps the model-visible tool result;
|
|
188
|
-
* the spill threshold tracks it so anything truncated stays artifact-recoverable. */
|
|
189
|
-
function envOutputMax(): number {
|
|
190
|
-
const raw = Number(jeoEnv("TOOL_OUTPUT_MAX") ?? "");
|
|
191
|
-
return Number.isFinite(raw) && raw >= 500 && raw <= 200_000 ? Math.trunc(raw) : 4_000;
|
|
192
|
-
}
|
|
193
|
-
export const TOOL_OUTPUT_MAX = envOutputMax();
|
|
194
204
|
|
|
195
205
|
/** Wall-clock budget for ONE agent turn (ms). JEO_TURN_MAX_MS overrides; 0 disables.
|
|
196
206
|
* Default 30 minutes: long autonomous runs stay alive, while a turn that spins in
|
|
@@ -205,55 +215,6 @@ export function turnMaxMs(env: Record<string, string | undefined> = process.env)
|
|
|
205
215
|
return 30 * 60 * 1000;
|
|
206
216
|
}
|
|
207
217
|
|
|
208
|
-
/**
|
|
209
|
-
* Cap a tool result fed back to the model, keeping both ends: the head holds the
|
|
210
|
-
* start (e.g. a file's top / a command's invocation) and the tail holds what's
|
|
211
|
-
* usually decisive (test summaries, the final error). A pure head-cut loses that.
|
|
212
|
-
*/
|
|
213
|
-
export function truncateToolOutput(s: string, max = TOOL_OUTPUT_MAX): string {
|
|
214
|
-
if (s.length <= max) return s;
|
|
215
|
-
const head = Math.floor(max * 0.6);
|
|
216
|
-
const tail = max - head;
|
|
217
|
-
return `${s.slice(0, head)}\n…(${s.length - max} chars truncated)…\n${s.slice(s.length - tail)}`;
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
/** Tool output larger than this is spilled to a recoverable artifact file. Aligned
|
|
221
|
-
* with `truncateToolOutput`'s cap so that whenever the model-visible result drops
|
|
222
|
-
* content, the full output is recoverable via the artifact. */
|
|
223
|
-
export const TOOL_SPILL_THRESHOLD = TOOL_OUTPUT_MAX;
|
|
224
|
-
|
|
225
|
-
/**
|
|
226
|
-
* Write an oversized tool result verbatim under `.jeo/artifacts/tool-results/` and
|
|
227
|
-
* return the workspace-relative path (for the model to `read`). Best-effort: throws
|
|
228
|
-
* are caught by the caller, which simply omits the artifact note.
|
|
229
|
-
*/
|
|
230
|
-
/** Most recent tool-result artifacts to keep; older ones are pruned on each spill. */
|
|
231
|
-
export const MAX_TOOL_ARTIFACTS = 50;
|
|
232
|
-
|
|
233
|
-
/** Best-effort retention: keep the newest `MAX_TOOL_ARTIFACTS` files in `dir`, delete the rest. */
|
|
234
|
-
async function pruneToolArtifacts(dir: string): Promise<void> {
|
|
235
|
-
const files = await fs.readdir(dir).catch(() => [] as string[]);
|
|
236
|
-
if (files.length <= MAX_TOOL_ARTIFACTS) return;
|
|
237
|
-
const stamped = await Promise.all(
|
|
238
|
-
files.map(async f => ({ f, m: (await fs.stat(path.join(dir, f)).catch(() => null))?.mtimeMs ?? 0 })),
|
|
239
|
-
);
|
|
240
|
-
stamped.sort((a, b) => b.m - a.m); // newest first
|
|
241
|
-
for (const { f } of stamped.slice(MAX_TOOL_ARTIFACTS)) {
|
|
242
|
-
await fs.rm(path.join(dir, f), { force: true }).catch(() => {});
|
|
243
|
-
}
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
export async function spillToolResult(tool: string, output: string, cwd: string): Promise<string> {
|
|
247
|
-
const dir = path.join(cwd, ".jeo", "artifacts", "tool-results");
|
|
248
|
-
await fs.mkdir(dir, { recursive: true });
|
|
249
|
-
const safeTool = tool.replace(/[^a-zA-Z0-9_-]/g, "_").slice(0, 32) || "tool";
|
|
250
|
-
const stamp = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
251
|
-
const rel = path.join(".jeo", "artifacts", "tool-results", `${stamp}-${safeTool}.txt`);
|
|
252
|
-
await fs.writeFile(path.join(cwd, rel), output, "utf-8");
|
|
253
|
-
// Retention so a long session can't grow the artifact dir without bound.
|
|
254
|
-
await pruneToolArtifacts(dir);
|
|
255
|
-
return rel;
|
|
256
|
-
}
|
|
257
218
|
|
|
258
219
|
/** Levenshtein distance (small inputs: tool/command names). */
|
|
259
220
|
function editDistance(a: string, b: string): number {
|
|
@@ -400,6 +361,29 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
|
|
|
400
361
|
}
|
|
401
362
|
await ev.onStep?.(step);
|
|
402
363
|
|
|
364
|
+
// MID-TURN steering (gjc parity): drain any additional user queries typed while
|
|
365
|
+
// the turn is running and inject them as user messages BEFORE this step's model
|
|
366
|
+
// call, so the live turn adapts immediately instead of deferring to the next
|
|
367
|
+
// prompt. A genuine new instruction resets the stall/failure guards (it is fresh
|
|
368
|
+
// progress, not a repeat) and earns a budget extension so the loop has room to act.
|
|
369
|
+
if (opts.steer) {
|
|
370
|
+
const pending = opts.steer();
|
|
371
|
+
for (const raw of pending) {
|
|
372
|
+
const text = (raw ?? "").trim();
|
|
373
|
+
if (!text) continue;
|
|
374
|
+
history.push({
|
|
375
|
+
role: "user",
|
|
376
|
+
content: `[mid-turn steering — additional instruction from the user; incorporate it now]\n${text}`,
|
|
377
|
+
});
|
|
378
|
+
ev.onSteer?.(text);
|
|
379
|
+
repeatCount = 0;
|
|
380
|
+
lastSig = "";
|
|
381
|
+
consecutiveFailures = 0;
|
|
382
|
+
recentStepSigs.length = 0;
|
|
383
|
+
budget.noteSteer?.();
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
|
|
403
387
|
// MID-TURN context guard: a single long turn (60+ steps) otherwise grows the
|
|
404
388
|
// history without bound — turn-boundary compaction never runs inside a turn,
|
|
405
389
|
// and field evidence shows multi-million-token prompts degrading the model
|
|
@@ -421,6 +405,10 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
|
|
|
421
405
|
const onToken = ev.onModelStream
|
|
422
406
|
? (delta: string) => { streamBuf += delta; ev.onModelStream!(streamBuf); }
|
|
423
407
|
: undefined;
|
|
408
|
+
let reasonBuf = "";
|
|
409
|
+
const onReasoning = ev.onReasoningStream
|
|
410
|
+
? (delta: string) => { reasonBuf += delta; ev.onReasoningStream!(reasonBuf); }
|
|
411
|
+
: undefined;
|
|
424
412
|
let responseText: string;
|
|
425
413
|
try {
|
|
426
414
|
responseText = await invokeCallLlm(history, {
|
|
@@ -430,6 +418,7 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
|
|
|
430
418
|
signal: opts.signal,
|
|
431
419
|
onUsage: u => { acc.inputTokens += u.inputTokens ?? 0; acc.outputTokens += u.outputTokens ?? 0; sawUsage = true; },
|
|
432
420
|
onToken,
|
|
421
|
+
onReasoning,
|
|
433
422
|
// Make provider auto-retry visible: previously a rate-limited call sat in a
|
|
434
423
|
// silent backoff wait, then surfaced "auto-retry was exhausted" with no trace
|
|
435
424
|
// of the retries that DID happen.
|
|
@@ -623,6 +612,29 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
|
|
|
623
612
|
continue;
|
|
624
613
|
}
|
|
625
614
|
}
|
|
615
|
+
// Steering that arrived DURING this final step (after the top-of-loop drain,
|
|
616
|
+
// while the model was generating its `done`): reopen the turn and handle it now
|
|
617
|
+
// instead of letting it bounce to the next prompt. Bounded by the step/time budget.
|
|
618
|
+
if (opts.steer) {
|
|
619
|
+
const pending = opts.steer().map(s => (s ?? "").trim()).filter(Boolean);
|
|
620
|
+
if (pending.length) {
|
|
621
|
+
history.push({ role: "assistant", content: responseText });
|
|
622
|
+
for (const text of pending) {
|
|
623
|
+
history.push({
|
|
624
|
+
role: "user",
|
|
625
|
+
content: `[mid-turn steering — additional instruction from the user; incorporate it now before finishing]\n${text}`,
|
|
626
|
+
});
|
|
627
|
+
ev.onSteer?.(text);
|
|
628
|
+
}
|
|
629
|
+
repeatCount = 0;
|
|
630
|
+
lastSig = "";
|
|
631
|
+
consecutiveFailures = 0;
|
|
632
|
+
recentStepSigs.length = 0;
|
|
633
|
+
budget.noteSteer();
|
|
634
|
+
step++;
|
|
635
|
+
continue;
|
|
636
|
+
}
|
|
637
|
+
}
|
|
626
638
|
return finish({ done: true, steps: step, doneReason: (toolCalls[0].arguments?.reason as string) ?? "" });
|
|
627
639
|
}
|
|
628
640
|
|
|
@@ -728,7 +740,8 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
|
|
|
728
740
|
output = preHookResult.error + (preHookResult.output ? `\n${preHookResult.output}` : "");
|
|
729
741
|
} else {
|
|
730
742
|
try {
|
|
731
|
-
const
|
|
743
|
+
const onProgress = ev.onToolProgress ? (partial: string) => ev.onToolProgress!(tool, partial) : undefined;
|
|
744
|
+
const res = await handler(args ?? {}, cwd, onProgress);
|
|
732
745
|
success = res.success;
|
|
733
746
|
output = res.success ? res.output : (res.error ? (res.output ? `${res.error}\n${res.output}` : res.error) : res.output);
|
|
734
747
|
} catch (err: any) {
|
|
@@ -820,15 +833,7 @@ export async function runAgentLoop(history: Message[], opts: AgentLoopOptions):
|
|
|
820
833
|
|
|
821
834
|
ev.onToolResult?.(call.tool, res.success, res.output);
|
|
822
835
|
|
|
823
|
-
const
|
|
824
|
-
const visible = minimized.text;
|
|
825
|
-
let resultBody = truncateToolOutput(visible);
|
|
826
|
-
if (res.output.length > TOOL_SPILL_THRESHOLD) {
|
|
827
|
-
const artifact = await spillToolResult(call.tool, res.output, cwd).catch(() => null);
|
|
828
|
-
if (artifact) {
|
|
829
|
-
resultBody += `\n[full output (${res.output.length} chars) saved to ${artifact} — read it for the elided middle]`;
|
|
830
|
-
}
|
|
831
|
-
}
|
|
836
|
+
const resultBody = await formatToolResultBody(call.tool, res.output, cwd);
|
|
832
837
|
|
|
833
838
|
const { diags: hookDiags, ran: hooksRan } = await runPostTurnHooks(
|
|
834
839
|
cwd,
|
package/src/agent/loop.ts
CHANGED
|
@@ -19,6 +19,8 @@ export interface ChatOptions {
|
|
|
19
19
|
* delivered here (concatenation equals the returned string). Absent ⇒ a single
|
|
20
20
|
* non-streaming `call()` (unchanged behavior for non-interactive/test callers). */
|
|
21
21
|
onToken?: (delta: string) => void;
|
|
22
|
+
/** Streaming sink for native reasoning/thinking deltas (drives the dimmed live view). */
|
|
23
|
+
onReasoning?: (delta: string) => void;
|
|
22
24
|
}
|
|
23
25
|
|
|
24
26
|
const manager = createModelManager();
|
package/src/agent/step-budget.ts
CHANGED
|
@@ -180,6 +180,16 @@ export class StepBudget {
|
|
|
180
180
|
if (this.window.length > this.cfg.windowSize) this.window.shift();
|
|
181
181
|
}
|
|
182
182
|
|
|
183
|
+
/** A mid-turn steering message arrived — fresh, user-driven work. Grant headroom
|
|
184
|
+
* (capped at the hard cap, without consuming the extension budget) and clear the
|
|
185
|
+
* scoring window so the new instruction is never declined by the previous
|
|
186
|
+
* sub-task's stall/failure signals. */
|
|
187
|
+
noteSteer(): void {
|
|
188
|
+
this.window.length = 0;
|
|
189
|
+
this.novelSinceExtension = 0;
|
|
190
|
+
this.currentLimit = Math.min(this.currentLimit + this.cfg.extensionSteps, this.cfg.hardCap);
|
|
191
|
+
}
|
|
192
|
+
|
|
183
193
|
/** Progress over the recent window: ok count, total, distinct signatures. */
|
|
184
194
|
progress(): { ok: number; total: number; distinct: number } {
|
|
185
195
|
const ok = this.window.filter(r => r.success).length;
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* In-process detached-subagent registry (gjc `subagent`/`job` parity, scoped down
|
|
3
|
+
* to one runtime). The synchronous `task` tool blocks the parent until a subagent
|
|
4
|
+
* finishes; a DETACHED launch registers the run here and returns immediately, so
|
|
5
|
+
* the parent can keep working and later list / inspect / await / cancel it via the
|
|
6
|
+
* `subagent` control tool. Concurrency is real (JS event loop): a detached run's
|
|
7
|
+
* awaits interleave with the parent's between steps.
|
|
8
|
+
*
|
|
9
|
+
* Lifecycle is bounded to the turn that created the registry — `cancelAll()` on
|
|
10
|
+
* turn teardown guarantees no background promise leaks into the next turn.
|
|
11
|
+
*/
|
|
12
|
+
import type { ToolResult } from "./tools";
|
|
13
|
+
|
|
14
|
+
export type SubagentStatus = "running" | "completed" | "failed" | "cancelled";
|
|
15
|
+
|
|
16
|
+
export interface SubagentRecord {
|
|
17
|
+
/** Stable id, e.g. "executor-1". */
|
|
18
|
+
id: string;
|
|
19
|
+
role: string;
|
|
20
|
+
/** The assignment text (trimmed for display). */
|
|
21
|
+
task: string;
|
|
22
|
+
status: SubagentStatus;
|
|
23
|
+
startedAt: number;
|
|
24
|
+
finishedAt?: number;
|
|
25
|
+
/** Whether the finished run reported success (contract satisfied). */
|
|
26
|
+
success?: boolean;
|
|
27
|
+
/** Final subagent report/output, set once the run settles. */
|
|
28
|
+
result?: string;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
interface Entry {
|
|
32
|
+
record: SubagentRecord;
|
|
33
|
+
promise: Promise<void>;
|
|
34
|
+
abort: AbortController;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/** A detached run: receives its own AbortSignal and resolves to the subagent's
|
|
38
|
+
* final ToolResult. The runner is responsible for streaming live events itself. */
|
|
39
|
+
export type DetachedRunner = (signal: AbortSignal) => Promise<ToolResult>;
|
|
40
|
+
|
|
41
|
+
export class SubagentRegistry {
|
|
42
|
+
private readonly entries = new Map<string, Entry>();
|
|
43
|
+
private readonly seq = new Map<string, number>();
|
|
44
|
+
|
|
45
|
+
/** Register and START a detached run; returns the (running) record immediately. */
|
|
46
|
+
launch(role: string, task: string, runner: DetachedRunner): SubagentRecord {
|
|
47
|
+
const n = (this.seq.get(role) ?? 0) + 1;
|
|
48
|
+
this.seq.set(role, n);
|
|
49
|
+
const id = `${role}-${n}`;
|
|
50
|
+
const abort = new AbortController();
|
|
51
|
+
const record: SubagentRecord = {
|
|
52
|
+
id,
|
|
53
|
+
role,
|
|
54
|
+
task: task.length > 200 ? task.slice(0, 197) + "…" : task,
|
|
55
|
+
status: "running",
|
|
56
|
+
startedAt: Date.now(),
|
|
57
|
+
};
|
|
58
|
+
const promise = (async () => {
|
|
59
|
+
try {
|
|
60
|
+
const res = await runner(abort.signal);
|
|
61
|
+
// A cancel that already fired wins — don't overwrite the terminal state.
|
|
62
|
+
if (record.status === "cancelled") return;
|
|
63
|
+
record.status = res.success ? "completed" : "failed";
|
|
64
|
+
record.success = res.success;
|
|
65
|
+
record.result = res.output || res.error || "";
|
|
66
|
+
} catch (err) {
|
|
67
|
+
if (record.status === "cancelled") return;
|
|
68
|
+
record.status = "failed";
|
|
69
|
+
record.result = err instanceof Error ? err.message : String(err);
|
|
70
|
+
} finally {
|
|
71
|
+
if (record.finishedAt === undefined) record.finishedAt = Date.now();
|
|
72
|
+
}
|
|
73
|
+
})();
|
|
74
|
+
this.entries.set(id, { record, promise, abort });
|
|
75
|
+
return record;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
list(): SubagentRecord[] {
|
|
79
|
+
return [...this.entries.values()].map(e => e.record);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
get(id: string): SubagentRecord | undefined {
|
|
83
|
+
return this.entries.get(id)?.record;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
running(): SubagentRecord[] {
|
|
87
|
+
return this.list().filter(r => r.status === "running");
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/** Wait for the given ids (or all running, when empty). With `timeoutMs` the wait
|
|
91
|
+
* is bounded — unfinished runs simply stay "running" in the returned snapshot. */
|
|
92
|
+
async awaitIds(ids: string[], timeoutMs?: number): Promise<SubagentRecord[]> {
|
|
93
|
+
const targets = ids
|
|
94
|
+
.map(id => this.entries.get(id))
|
|
95
|
+
.filter((e): e is Entry => e !== undefined);
|
|
96
|
+
const all = Promise.all(targets.map(e => e.promise)).then(() => {});
|
|
97
|
+
if (timeoutMs !== undefined && timeoutMs > 0) {
|
|
98
|
+
let handle: ReturnType<typeof setTimeout> | undefined;
|
|
99
|
+
const timer = new Promise<void>(resolve => {
|
|
100
|
+
handle = setTimeout(resolve, timeoutMs);
|
|
101
|
+
});
|
|
102
|
+
await Promise.race([all, timer]);
|
|
103
|
+
if (handle !== undefined) clearTimeout(handle);
|
|
104
|
+
} else {
|
|
105
|
+
await all;
|
|
106
|
+
}
|
|
107
|
+
return targets.map(e => e.record);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/** Cancel the given ids (or all running, when empty): aborts the run and marks the
|
|
111
|
+
* record cancelled. Already-terminal records are returned unchanged. */
|
|
112
|
+
cancel(ids: string[]): SubagentRecord[] {
|
|
113
|
+
const out: SubagentRecord[] = [];
|
|
114
|
+
for (const id of ids) {
|
|
115
|
+
const e = this.entries.get(id);
|
|
116
|
+
if (!e) continue;
|
|
117
|
+
if (e.record.status === "running") {
|
|
118
|
+
e.record.status = "cancelled";
|
|
119
|
+
e.record.finishedAt = Date.now();
|
|
120
|
+
e.abort.abort();
|
|
121
|
+
}
|
|
122
|
+
out.push(e.record);
|
|
123
|
+
}
|
|
124
|
+
return out;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/** Abort every still-running subagent (turn teardown / Ctrl-C). */
|
|
128
|
+
cancelAll(): SubagentRecord[] {
|
|
129
|
+
return this.cancel(this.running().map(r => r.id));
|
|
130
|
+
}
|
|
131
|
+
}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `subagent` control tool (#9) — the parent's handle on DETACHED subagents launched
|
|
3
|
+
* via `task {detached:true}`. Mirrors gjc's `subagent`/`job` control surface, scoped
|
|
4
|
+
* to an in-process registry: list, inspect, await (optionally bounded), and cancel.
|
|
5
|
+
*
|
|
6
|
+
* Out of scope here (separate subsystems, not stubbed): live peer messaging (IRC)
|
|
7
|
+
* and pause/resume — a step-budget loop has no safe mid-step checkpoint to resume
|
|
8
|
+
* from, so those are intentionally absent rather than faked.
|
|
9
|
+
*/
|
|
10
|
+
import type { ToolHandler } from "./engine";
|
|
11
|
+
import type { ToolResult } from "./tools";
|
|
12
|
+
import type { SubagentRegistry, SubagentRecord } from "./subagent-registry";
|
|
13
|
+
|
|
14
|
+
/** One-line protocol description appended to the launch system prompt. */
|
|
15
|
+
export const SUBAGENT_TOOL_PROTOCOL_LINE =
|
|
16
|
+
`subagent {action:"list"|"inspect"|"await"|"cancel", ids?, timeoutMs?} — control DETACHED ` +
|
|
17
|
+
`subagents started with task{detached:true}. 'await' blocks (optionally up to timeoutMs ms) and ` +
|
|
18
|
+
`returns their reports; 'inspect' shows status + result; 'cancel' aborts them. Omit ids to target all running.`;
|
|
19
|
+
|
|
20
|
+
function elapsed(rec: SubagentRecord): string {
|
|
21
|
+
const end = rec.finishedAt ?? Date.now();
|
|
22
|
+
return `${Math.max(0, Math.round((end - rec.startedAt) / 1000))}s`;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function rowLine(rec: SubagentRecord): string {
|
|
26
|
+
return `- ${rec.id} [${rec.status.toUpperCase()}] ${elapsed(rec)} · ${rec.task}`;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function detailBlock(rec: SubagentRecord): string {
|
|
30
|
+
const head = rowLine(rec);
|
|
31
|
+
if (rec.status === "running" || !rec.result) return head;
|
|
32
|
+
return `${head}\n${rec.result}`;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function idsOf(args: Record<string, any>): string[] {
|
|
36
|
+
if (Array.isArray(args.ids)) return args.ids.map((x: unknown) => String(x));
|
|
37
|
+
if (args.id !== undefined) return [String(args.id)];
|
|
38
|
+
return [];
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export function createSubagentTool(registry: SubagentRegistry): ToolHandler {
|
|
42
|
+
return async (args: Record<string, any>, _cwd: string): Promise<ToolResult> => {
|
|
43
|
+
const action = String(args.action ?? "list").trim().toLowerCase();
|
|
44
|
+
const ids = idsOf(args);
|
|
45
|
+
|
|
46
|
+
if (action === "list") {
|
|
47
|
+
const rows = registry.list();
|
|
48
|
+
if (rows.length === 0) {
|
|
49
|
+
return { success: true, output: "No detached subagents this turn. Launch one with task {detached:true}." };
|
|
50
|
+
}
|
|
51
|
+
const running = rows.filter(r => r.status === "running").length;
|
|
52
|
+
return { success: true, output: `${rows.length} subagent(s), ${running} running:\n${rows.map(rowLine).join("\n")}` };
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if (action === "inspect") {
|
|
56
|
+
const targets = (ids.length ? ids.map(id => registry.get(id)) : registry.list())
|
|
57
|
+
.filter((r): r is SubagentRecord => r !== undefined);
|
|
58
|
+
if (targets.length === 0) {
|
|
59
|
+
return { success: false, output: "", error: ids.length ? `No subagent matches ${ids.join(", ")}.` : "No detached subagents this turn." };
|
|
60
|
+
}
|
|
61
|
+
return { success: true, output: targets.map(detailBlock).join("\n\n") };
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
if (action === "await") {
|
|
65
|
+
const targets = ids.length ? ids : registry.running().map(r => r.id);
|
|
66
|
+
if (targets.length === 0) {
|
|
67
|
+
return { success: true, output: "No running subagents to await." };
|
|
68
|
+
}
|
|
69
|
+
const timeoutMs = typeof args.timeoutMs === "number" && args.timeoutMs > 0 ? args.timeoutMs : undefined;
|
|
70
|
+
const recs = await registry.awaitIds(targets, timeoutMs);
|
|
71
|
+
const stillRunning = recs.filter(r => r.status === "running").length;
|
|
72
|
+
const head = stillRunning > 0
|
|
73
|
+
? `Awaited ${recs.length} subagent(s); ${stillRunning} still running after the ${timeoutMs}ms timeout — await again or cancel.`
|
|
74
|
+
: `Awaited ${recs.length} subagent(s); all settled.`;
|
|
75
|
+
return { success: stillRunning === 0, output: `${head}\n\n${recs.map(detailBlock).join("\n\n")}` };
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (action === "cancel") {
|
|
79
|
+
const targets = ids.length ? ids : registry.running().map(r => r.id);
|
|
80
|
+
if (targets.length === 0) {
|
|
81
|
+
return { success: true, output: "No running subagents to cancel." };
|
|
82
|
+
}
|
|
83
|
+
const recs = registry.cancel(targets);
|
|
84
|
+
return { success: true, output: `Cancelled ${recs.length} subagent(s):\n${recs.map(rowLine).join("\n")}` };
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return { success: false, output: "", error: `Unknown subagent action '${action}'. Use list | inspect | await | cancel.` };
|
|
88
|
+
};
|
|
89
|
+
}
|