little-coder 1.8.4 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.pi/extensions/branding/branding.test.ts +42 -0
- package/.pi/extensions/branding/index.ts +56 -10
- package/.pi/extensions/extra-tools/glob.ts +3 -3
- package/.pi/extensions/extra-tools/index.ts +1 -1
- package/.pi/extensions/plan-mode/index.ts +377 -0
- package/.pi/extensions/plan-mode/plan-mode.test.ts +49 -0
- package/.pi/extensions/plan-mode/status.ts +79 -0
- package/.pi/extensions/prompt-history/index.ts +154 -0
- package/.pi/extensions/prompt-history/prompt-history.test.ts +72 -0
- package/.pi/extensions/read-guard-edit/index.ts +89 -0
- package/.pi/extensions/read-guard-edit/read-guard-edit.test.ts +100 -0
- package/.pi/extensions/skill-inject/index.ts +3 -0
- package/.pi/extensions/skill-inject/selector.test.ts +2 -2
- package/.pi/extensions/subagent/index.ts +201 -0
- package/.pi/extensions/subagent/live-spawn.test.ts +47 -0
- package/.pi/extensions/subagent/spawn.test.ts +97 -0
- package/.pi/extensions/subagent/spawn.ts +373 -0
- package/.pi/extensions/subagent/tracker.ts +139 -0
- package/AGENTS.md +5 -0
- package/CHANGELOG.md +23 -0
- package/README.md +17 -3
- package/bin/little-coder.mjs +56 -5
- package/package.json +2 -2
- package/skills/tools/dispatch.md +38 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { resolve } from "node:path";
|
|
3
|
+
import { runSubCoder, runSubCodersConcurrent } from "./spawn.ts";
|
|
4
|
+
|
|
5
|
+
// Live end-to-end: spawns real child little-coder sessions against the local
|
|
6
|
+
// model. Skipped unless LC_LIVE=1 (needs a running model server). Run with:
|
|
7
|
+
// LC_LIVE=1 LLAMACPP_API_KEY=noop npx vitest run .pi/extensions/subagent/live-spawn.test.ts
|
|
8
|
+
const LIVE = process.env.LC_LIVE === "1";
|
|
9
|
+
const MODEL = process.env.LC_LIVE_MODEL || "llamacpp/qwen3.6-35b-a3b";
|
|
10
|
+
const repoRoot = resolve(__dirname, "..", "..", "..");
|
|
11
|
+
|
|
12
|
+
describe.skipIf(!LIVE)("sub-coder live spawn", () => {
|
|
13
|
+
it(
|
|
14
|
+
"spawns a child that runs on the parent's model and returns a report",
|
|
15
|
+
async () => {
|
|
16
|
+
const r = await runSubCoder({
|
|
17
|
+
id: "1",
|
|
18
|
+
label: "ping",
|
|
19
|
+
task: "Respond with exactly the single word ALIVE. Do not use any tools.",
|
|
20
|
+
cwd: repoRoot,
|
|
21
|
+
model: MODEL,
|
|
22
|
+
});
|
|
23
|
+
expect(r.exitCode).toBe(0);
|
|
24
|
+
expect(r.report.toUpperCase()).toContain("ALIVE");
|
|
25
|
+
expect(r.usage.turns).toBeGreaterThan(0);
|
|
26
|
+
},
|
|
27
|
+
240_000,
|
|
28
|
+
);
|
|
29
|
+
|
|
30
|
+
it(
|
|
31
|
+
"runs two sub-coders in parallel and reports per-item",
|
|
32
|
+
async () => {
|
|
33
|
+
const results = await runSubCodersConcurrent(
|
|
34
|
+
[
|
|
35
|
+
{ id: "1", label: "two", task: "What is 2+2? Reply with just the number.", cwd: repoRoot },
|
|
36
|
+
{ id: "2", label: "cap", task: "What is the capital of France? One word.", cwd: repoRoot },
|
|
37
|
+
],
|
|
38
|
+
{ model: MODEL },
|
|
39
|
+
);
|
|
40
|
+
expect(results).toHaveLength(2);
|
|
41
|
+
expect(results.every((r) => r.exitCode === 0)).toBe(true);
|
|
42
|
+
expect(results[0].report).toContain("4");
|
|
43
|
+
expect(results[1].report.toLowerCase()).toContain("paris");
|
|
44
|
+
},
|
|
45
|
+
300_000,
|
|
46
|
+
);
|
|
47
|
+
});
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import {
|
|
3
|
+
buildChildEnv,
|
|
4
|
+
defaultConcurrency,
|
|
5
|
+
getFinalText,
|
|
6
|
+
resolveLauncher,
|
|
7
|
+
summarizeActivity,
|
|
8
|
+
truncateReport,
|
|
9
|
+
SUBCODER_ALLOWED_TOOLS,
|
|
10
|
+
type SubCoderResult,
|
|
11
|
+
} from "./spawn.ts";
|
|
12
|
+
|
|
13
|
+
const base: SubCoderResult = {
|
|
14
|
+
id: "1",
|
|
15
|
+
label: "x",
|
|
16
|
+
task: "t",
|
|
17
|
+
exitCode: -1,
|
|
18
|
+
report: "",
|
|
19
|
+
messages: [],
|
|
20
|
+
stderr: "",
|
|
21
|
+
usage: { input: 0, output: 0, cost: 0, turns: 0, contextTokens: 0 },
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
describe("getFinalText", () => {
|
|
25
|
+
it("returns the last assistant text block", () => {
|
|
26
|
+
const messages = [
|
|
27
|
+
{ role: "assistant", content: [{ type: "text", text: "first" }] },
|
|
28
|
+
{ role: "assistant", content: [{ type: "toolCall", name: "read", arguments: {} }] },
|
|
29
|
+
{ role: "assistant", content: [{ type: "text", text: "final answer" }] },
|
|
30
|
+
];
|
|
31
|
+
expect(getFinalText(messages)).toBe("final answer");
|
|
32
|
+
});
|
|
33
|
+
it("returns empty string when there is no assistant text", () => {
|
|
34
|
+
expect(getFinalText([{ role: "user", content: [{ type: "text", text: "hi" }] }])).toBe("");
|
|
35
|
+
expect(getFinalText([])).toBe("");
|
|
36
|
+
});
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
describe("truncateReport", () => {
|
|
40
|
+
it("leaves short reports intact", () => {
|
|
41
|
+
expect(truncateReport("short")).toBe("short");
|
|
42
|
+
});
|
|
43
|
+
it("truncates long reports with a notice", () => {
|
|
44
|
+
const out = truncateReport("a".repeat(5000), 100);
|
|
45
|
+
expect(out.length).toBeLessThan(300);
|
|
46
|
+
expect(out).toContain("truncated at 100 chars");
|
|
47
|
+
});
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
describe("summarizeActivity", () => {
|
|
51
|
+
it("shows the report's first line when done", () => {
|
|
52
|
+
expect(summarizeActivity({ ...base, exitCode: 0, report: "Found 3 routes\nmore" })).toBe("Found 3 routes");
|
|
53
|
+
});
|
|
54
|
+
it("surfaces the latest tool call while running", () => {
|
|
55
|
+
const r = {
|
|
56
|
+
...base,
|
|
57
|
+
messages: [{ role: "assistant", content: [{ type: "toolCall", name: "grep", arguments: { pattern: "login(" } }] }],
|
|
58
|
+
};
|
|
59
|
+
expect(summarizeActivity(r)).toBe("→ grep login(");
|
|
60
|
+
});
|
|
61
|
+
it("shows the error message on failure", () => {
|
|
62
|
+
expect(summarizeActivity({ ...base, exitCode: 1, errorMessage: "boom" })).toBe("boom");
|
|
63
|
+
});
|
|
64
|
+
it("falls back to working when running with no tool call", () => {
|
|
65
|
+
expect(summarizeActivity(base)).toBe("working…");
|
|
66
|
+
});
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
describe("buildChildEnv", () => {
|
|
70
|
+
it("constrains the child to read-only tools and the headless fast-path", () => {
|
|
71
|
+
const env = buildChildEnv();
|
|
72
|
+
expect(env.LITTLE_CODER_ALLOWED_TOOLS).toBe(SUBCODER_ALLOWED_TOOLS);
|
|
73
|
+
expect(env.LITTLE_CODER_ALLOWED_TOOLS).not.toContain("edit");
|
|
74
|
+
expect(env.LITTLE_CODER_ALLOWED_TOOLS).not.toContain("write");
|
|
75
|
+
expect(env.LITTLE_CODER_ALLOWED_TOOLS).not.toContain("dispatch");
|
|
76
|
+
expect(env.LITTLE_CODER_PERMISSION_MODE).toBe("auto");
|
|
77
|
+
expect(env.LITTLE_CODER_SUBAGENT).toBe("1");
|
|
78
|
+
});
|
|
79
|
+
it("merges extra overrides", () => {
|
|
80
|
+
expect(buildChildEnv({ FOO: "bar" }).FOO).toBe("bar");
|
|
81
|
+
});
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
describe("resolveLauncher / defaultConcurrency", () => {
|
|
85
|
+
it("points at bin/little-coder.mjs", () => {
|
|
86
|
+
expect(resolveLauncher().replace(/\\/g, "/")).toMatch(/\/bin\/little-coder\.mjs$/);
|
|
87
|
+
});
|
|
88
|
+
it("defaults concurrency to 2", () => {
|
|
89
|
+
const prev = process.env.LITTLE_CODER_SUBCODER_CONCURRENCY;
|
|
90
|
+
delete process.env.LITTLE_CODER_SUBCODER_CONCURRENCY;
|
|
91
|
+
expect(defaultConcurrency()).toBe(2);
|
|
92
|
+
process.env.LITTLE_CODER_SUBCODER_CONCURRENCY = "3";
|
|
93
|
+
expect(defaultConcurrency()).toBe(3);
|
|
94
|
+
if (prev === undefined) delete process.env.LITTLE_CODER_SUBCODER_CONCURRENCY;
|
|
95
|
+
else process.env.LITTLE_CODER_SUBCODER_CONCURRENCY = prev;
|
|
96
|
+
});
|
|
97
|
+
});
|
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
// Sub-coder spawn engine.
|
|
2
|
+
//
|
|
3
|
+
// A "sub-coder" is a child little-coder session with an isolated context window,
|
|
4
|
+
// spawned to research a focused question (read the repo + browse online) and
|
|
5
|
+
// report back concisely. Both the `dispatch` tool (index.ts) and Plan Mode
|
|
6
|
+
// (../plan-mode) drive children through this module.
|
|
7
|
+
//
|
|
8
|
+
// Why spawn little-coder's OWN launcher and not bare `pi`: the child must use
|
|
9
|
+
// the same local-model provider, the same extensions, and the same AGENTS.md as
|
|
10
|
+
// the parent. The launcher (bin/little-coder.mjs) is what composes all of that —
|
|
11
|
+
// it registers the provider (llama-cpp-provider), wires every .pi/extension, and
|
|
12
|
+
// passes --system-prompt AGENTS.md. Spawning `pi` directly would yield a blank
|
|
13
|
+
// agent with none of it. We therefore re-invoke the launcher headless
|
|
14
|
+
// (--mode json -p --no-session) and parse pi's JSON event stream from stdout.
|
|
15
|
+
//
|
|
16
|
+
// The child is constrained to read + browse (no edit/write, no recursive
|
|
17
|
+
// dispatch) entirely through environment variables the existing gates already
|
|
18
|
+
// honor — see buildChildEnv().
|
|
19
|
+
|
|
20
|
+
import { spawn } from "node:child_process";
|
|
21
|
+
import { existsSync } from "node:fs";
|
|
22
|
+
import { dirname, resolve } from "node:path";
|
|
23
|
+
import { fileURLToPath } from "node:url";
|
|
24
|
+
|
|
25
|
+
// Tools a sub-coder may use: read + search + browse online + read-only bash.
|
|
26
|
+
// Enforced by the tool-gating extension in the child. Deliberately omits
|
|
27
|
+
// edit/write (children never mutate the tree) and `dispatch` (no fan-out bombs).
|
|
28
|
+
export const SUBCODER_ALLOWED_TOOLS = [
|
|
29
|
+
"read",
|
|
30
|
+
"grep",
|
|
31
|
+
"glob",
|
|
32
|
+
"find",
|
|
33
|
+
"ls",
|
|
34
|
+
"bash",
|
|
35
|
+
"webfetch",
|
|
36
|
+
"websearch",
|
|
37
|
+
"BrowserNavigate",
|
|
38
|
+
"BrowserClick",
|
|
39
|
+
"BrowserType",
|
|
40
|
+
"BrowserScroll",
|
|
41
|
+
"BrowserExtract",
|
|
42
|
+
"BrowserBack",
|
|
43
|
+
"BrowserHistory",
|
|
44
|
+
].join(",");
|
|
45
|
+
|
|
46
|
+
// Appended to every task so children answer with a short, parent-friendly
|
|
47
|
+
// report rather than a wall of pasted file contents.
|
|
48
|
+
export const REPORT_SUFFIX =
|
|
49
|
+
"\n\nWhen done, reply with a CONCISE report (≤ ~200 words): the key findings, " +
|
|
50
|
+
"file:line citations where relevant, and a direct answer to the task. Do NOT " +
|
|
51
|
+
"paste large file contents or long logs — summarize them.";
|
|
52
|
+
|
|
53
|
+
export const MAX_REPORT_CHARS = 2000;
|
|
54
|
+
|
|
55
|
+
export interface SubCoderUsage {
|
|
56
|
+
input: number;
|
|
57
|
+
output: number;
|
|
58
|
+
cost: number;
|
|
59
|
+
turns: number;
|
|
60
|
+
contextTokens: number;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export interface SubCoderResult {
|
|
64
|
+
id: string;
|
|
65
|
+
label: string;
|
|
66
|
+
task: string;
|
|
67
|
+
/** -1 = still running, 0 = ok, >0 = failed. */
|
|
68
|
+
exitCode: number;
|
|
69
|
+
/** The child's final assistant text — the report shown to the parent model. */
|
|
70
|
+
report: string;
|
|
71
|
+
/** Full child transcript. UI-only (rendered in tool details); never sent to the parent model. */
|
|
72
|
+
messages: any[];
|
|
73
|
+
stderr: string;
|
|
74
|
+
usage: SubCoderUsage;
|
|
75
|
+
stopReason?: string;
|
|
76
|
+
errorMessage?: string;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function emptyUsage(): SubCoderUsage {
|
|
80
|
+
return { input: 0, output: 0, cost: 0, turns: 0, contextTokens: 0 };
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// .pi/extensions/subagent/spawn.ts → up 3 → package root → bin/little-coder.mjs.
|
|
84
|
+
// Same path math as branding/index.ts; works in the local checkout and the
|
|
85
|
+
// installed npm layout.
|
|
86
|
+
export function resolveLauncher(): string {
|
|
87
|
+
const here = dirname(fileURLToPath(import.meta.url));
|
|
88
|
+
return resolve(here, "..", "..", "..", "bin", "little-coder.mjs");
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export function buildChildEnv(extra?: Record<string, string>): NodeJS.ProcessEnv {
|
|
92
|
+
return {
|
|
93
|
+
...process.env,
|
|
94
|
+
// Constrain the child to read + browse, no mutation, no recursion.
|
|
95
|
+
LITTLE_CODER_ALLOWED_TOOLS: SUBCODER_ALLOWED_TOOLS,
|
|
96
|
+
// bash limited to permission-gate's read-only BUILTIN_SAFE_PREFIXES.
|
|
97
|
+
LITTLE_CODER_PERMISSION_MODE: "auto",
|
|
98
|
+
// Headless fast-path in the launcher (skip update-check + settings write).
|
|
99
|
+
LITTLE_CODER_SUBAGENT: "1",
|
|
100
|
+
// Belt and suspenders: never show pi's update banner in a child.
|
|
101
|
+
PI_SKIP_VERSION_CHECK: "1",
|
|
102
|
+
...extra,
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
export function defaultConcurrency(): number {
|
|
107
|
+
const n = Number(process.env.LITTLE_CODER_SUBCODER_CONCURRENCY);
|
|
108
|
+
return Number.isFinite(n) && n > 0 ? Math.floor(n) : 2;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/** The last assistant text block in a transcript — the child's report. */
|
|
112
|
+
export function getFinalText(messages: any[]): string {
|
|
113
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
114
|
+
const m = messages[i];
|
|
115
|
+
if (m?.role === "assistant" && Array.isArray(m.content)) {
|
|
116
|
+
for (const part of m.content) {
|
|
117
|
+
if (part?.type === "text" && typeof part.text === "string" && part.text.trim()) {
|
|
118
|
+
return part.text;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
return "";
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
export function truncateReport(text: string, max = MAX_REPORT_CHARS): string {
|
|
127
|
+
const t = (text ?? "").trim();
|
|
128
|
+
if (t.length <= max) return t;
|
|
129
|
+
return `${t.slice(0, max).trimEnd()}\n\n… (report truncated at ${max} chars — full transcript in tool details)`;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/** A one-line "what is this child doing right now" string for the tracker. */
|
|
133
|
+
export function summarizeActivity(r: SubCoderResult): string {
|
|
134
|
+
if (r.exitCode === 0) {
|
|
135
|
+
const firstLine = r.report.split(/\r?\n/).find((l) => l.trim()) ?? "(done)";
|
|
136
|
+
return firstLine.length > 56 ? `${firstLine.slice(0, 55)}…` : firstLine;
|
|
137
|
+
}
|
|
138
|
+
if (r.exitCode > 0) return r.errorMessage || r.stderr.split(/\r?\n/)[0] || "(failed)";
|
|
139
|
+
// running: surface the most recent tool call, else the latest partial text.
|
|
140
|
+
for (let i = r.messages.length - 1; i >= 0; i--) {
|
|
141
|
+
const m = r.messages[i];
|
|
142
|
+
if (m?.role === "assistant" && Array.isArray(m.content)) {
|
|
143
|
+
for (let j = m.content.length - 1; j >= 0; j--) {
|
|
144
|
+
const part = m.content[j];
|
|
145
|
+
if (part?.type === "toolCall") {
|
|
146
|
+
const a = part.arguments ?? {};
|
|
147
|
+
const hint = a.pattern || a.query || a.url || a.path || a.file_path || a.command || "";
|
|
148
|
+
return `→ ${part.name}${hint ? ` ${String(hint).slice(0, 40)}` : ""}`;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
return "working…";
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
export interface RunSubCoderOptions {
|
|
157
|
+
id: string;
|
|
158
|
+
label: string;
|
|
159
|
+
task: string;
|
|
160
|
+
cwd: string;
|
|
161
|
+
/** "provider/id" of the parent's model, so the child uses the same one. */
|
|
162
|
+
model?: string;
|
|
163
|
+
signal?: AbortSignal;
|
|
164
|
+
/** Called whenever the child emits a new message, with the live result. */
|
|
165
|
+
onUpdate?: (r: SubCoderResult) => void;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/** Run one sub-coder to completion. Never throws — failures land in exitCode/stderr. */
|
|
169
|
+
export async function runSubCoder(opts: RunSubCoderOptions): Promise<SubCoderResult> {
|
|
170
|
+
const result: SubCoderResult = {
|
|
171
|
+
id: opts.id,
|
|
172
|
+
label: opts.label,
|
|
173
|
+
task: opts.task,
|
|
174
|
+
exitCode: -1,
|
|
175
|
+
report: "",
|
|
176
|
+
messages: [],
|
|
177
|
+
stderr: "",
|
|
178
|
+
usage: emptyUsage(),
|
|
179
|
+
};
|
|
180
|
+
|
|
181
|
+
const launcher = resolveLauncher();
|
|
182
|
+
if (!existsSync(launcher)) {
|
|
183
|
+
result.exitCode = 1;
|
|
184
|
+
result.stderr = `sub-coder launcher not found at ${launcher}`;
|
|
185
|
+
result.errorMessage = result.stderr;
|
|
186
|
+
opts.onUpdate?.(result);
|
|
187
|
+
return result;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
const args = [
|
|
191
|
+
launcher,
|
|
192
|
+
"--no-update-check",
|
|
193
|
+
"--mode",
|
|
194
|
+
"json",
|
|
195
|
+
"-p",
|
|
196
|
+
"--no-session",
|
|
197
|
+
// Match the parent's model so children run on the same backend. Without
|
|
198
|
+
// this the child would fall back to pi's default model.
|
|
199
|
+
...(opts.model ? ["--model", opts.model] : []),
|
|
200
|
+
opts.task + REPORT_SUFFIX,
|
|
201
|
+
];
|
|
202
|
+
|
|
203
|
+
const emit = () => {
|
|
204
|
+
result.report = getFinalText(result.messages);
|
|
205
|
+
opts.onUpdate?.(result);
|
|
206
|
+
};
|
|
207
|
+
|
|
208
|
+
const exitCode = await new Promise<number>((resolveP) => {
|
|
209
|
+
let proc;
|
|
210
|
+
try {
|
|
211
|
+
proc = spawn(process.execPath, args, {
|
|
212
|
+
cwd: opts.cwd,
|
|
213
|
+
shell: false,
|
|
214
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
215
|
+
env: buildChildEnv(),
|
|
216
|
+
});
|
|
217
|
+
} catch (e) {
|
|
218
|
+
result.stderr += String((e as Error)?.message ?? e);
|
|
219
|
+
resolveP(1);
|
|
220
|
+
return;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
let buffer = "";
|
|
224
|
+
const processLine = (line: string) => {
|
|
225
|
+
if (!line.trim()) return;
|
|
226
|
+
let ev: any;
|
|
227
|
+
try {
|
|
228
|
+
ev = JSON.parse(line);
|
|
229
|
+
} catch {
|
|
230
|
+
return; // non-JSON noise (shouldn't happen in --mode json, but be safe)
|
|
231
|
+
}
|
|
232
|
+
if (ev.type === "message_end" && ev.message) {
|
|
233
|
+
const msg = ev.message;
|
|
234
|
+
result.messages.push(msg);
|
|
235
|
+
if (msg.role === "assistant") {
|
|
236
|
+
result.usage.turns++;
|
|
237
|
+
const u = msg.usage;
|
|
238
|
+
if (u) {
|
|
239
|
+
result.usage.input += u.input || 0;
|
|
240
|
+
result.usage.output += u.output || 0;
|
|
241
|
+
result.usage.cost += u.cost?.total || 0;
|
|
242
|
+
result.usage.contextTokens = u.totalTokens || 0;
|
|
243
|
+
}
|
|
244
|
+
if (msg.stopReason) result.stopReason = msg.stopReason;
|
|
245
|
+
if (msg.errorMessage) result.errorMessage = msg.errorMessage;
|
|
246
|
+
}
|
|
247
|
+
emit();
|
|
248
|
+
} else if (ev.type === "tool_result_end" && ev.message) {
|
|
249
|
+
result.messages.push(ev.message);
|
|
250
|
+
emit();
|
|
251
|
+
}
|
|
252
|
+
};
|
|
253
|
+
|
|
254
|
+
proc.stdout.on("data", (d) => {
|
|
255
|
+
buffer += d.toString();
|
|
256
|
+
const lines = buffer.split("\n");
|
|
257
|
+
buffer = lines.pop() || "";
|
|
258
|
+
for (const l of lines) processLine(l);
|
|
259
|
+
});
|
|
260
|
+
proc.stderr.on("data", (d) => {
|
|
261
|
+
result.stderr += d.toString();
|
|
262
|
+
});
|
|
263
|
+
proc.on("close", (code) => {
|
|
264
|
+
if (buffer.trim()) processLine(buffer);
|
|
265
|
+
resolveP(code ?? 0);
|
|
266
|
+
});
|
|
267
|
+
proc.on("error", (e) => {
|
|
268
|
+
result.stderr += String(e?.message ?? e);
|
|
269
|
+
resolveP(1);
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
if (opts.signal) {
|
|
273
|
+
const kill = () => {
|
|
274
|
+
try {
|
|
275
|
+
proc.kill("SIGTERM");
|
|
276
|
+
} catch {
|
|
277
|
+
/* already gone */
|
|
278
|
+
}
|
|
279
|
+
setTimeout(() => {
|
|
280
|
+
try {
|
|
281
|
+
if (!proc.killed) proc.kill("SIGKILL");
|
|
282
|
+
} catch {
|
|
283
|
+
/* ignore */
|
|
284
|
+
}
|
|
285
|
+
}, 4000);
|
|
286
|
+
};
|
|
287
|
+
if (opts.signal.aborted) kill();
|
|
288
|
+
else opts.signal.addEventListener("abort", kill, { once: true });
|
|
289
|
+
}
|
|
290
|
+
});
|
|
291
|
+
|
|
292
|
+
result.exitCode = exitCode;
|
|
293
|
+
result.report = getFinalText(result.messages);
|
|
294
|
+
if (exitCode !== 0 && !result.errorMessage) {
|
|
295
|
+
result.errorMessage = result.stderr.split(/\r?\n/).filter(Boolean).slice(-1)[0] || `exited ${exitCode}`;
|
|
296
|
+
}
|
|
297
|
+
return result;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
export interface SubCoderItem {
|
|
301
|
+
id: string;
|
|
302
|
+
label: string;
|
|
303
|
+
task: string;
|
|
304
|
+
cwd: string;
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
async function mapWithConcurrencyLimit<TIn, TOut>(
|
|
308
|
+
items: TIn[],
|
|
309
|
+
concurrency: number,
|
|
310
|
+
fn: (item: TIn, index: number) => Promise<TOut>,
|
|
311
|
+
): Promise<TOut[]> {
|
|
312
|
+
if (items.length === 0) return [];
|
|
313
|
+
const limit = Math.max(1, Math.min(concurrency, items.length));
|
|
314
|
+
const results: TOut[] = new Array(items.length);
|
|
315
|
+
let next = 0;
|
|
316
|
+
const workers = new Array(limit).fill(null).map(async () => {
|
|
317
|
+
while (true) {
|
|
318
|
+
const cur = next++;
|
|
319
|
+
if (cur >= items.length) return;
|
|
320
|
+
results[cur] = await fn(items[cur], cur);
|
|
321
|
+
}
|
|
322
|
+
});
|
|
323
|
+
await Promise.all(workers);
|
|
324
|
+
return results;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
/**
|
|
328
|
+
* Run several sub-coders with a concurrency cap (default 2 — a single local
|
|
329
|
+
* backend is easily starved). `onUpdate` receives a fresh snapshot of all
|
|
330
|
+
* results whenever any child changes, which drives the live tracker.
|
|
331
|
+
*/
|
|
332
|
+
export async function runSubCodersConcurrent(
|
|
333
|
+
items: SubCoderItem[],
|
|
334
|
+
opts: {
|
|
335
|
+
signal?: AbortSignal;
|
|
336
|
+
concurrency?: number;
|
|
337
|
+
model?: string;
|
|
338
|
+
onUpdate?: (all: SubCoderResult[]) => void;
|
|
339
|
+
} = {},
|
|
340
|
+
): Promise<SubCoderResult[]> {
|
|
341
|
+
const all: SubCoderResult[] = items.map((it) => ({
|
|
342
|
+
id: it.id,
|
|
343
|
+
label: it.label,
|
|
344
|
+
task: it.task,
|
|
345
|
+
exitCode: -1,
|
|
346
|
+
report: "",
|
|
347
|
+
messages: [],
|
|
348
|
+
stderr: "",
|
|
349
|
+
usage: emptyUsage(),
|
|
350
|
+
}));
|
|
351
|
+
const snapshot = () => opts.onUpdate?.(all.map((r) => ({ ...r })));
|
|
352
|
+
snapshot();
|
|
353
|
+
|
|
354
|
+
await mapWithConcurrencyLimit(items, opts.concurrency ?? defaultConcurrency(), async (it, i) => {
|
|
355
|
+
const r = await runSubCoder({
|
|
356
|
+
id: it.id,
|
|
357
|
+
label: it.label,
|
|
358
|
+
task: it.task,
|
|
359
|
+
cwd: it.cwd,
|
|
360
|
+
model: opts.model,
|
|
361
|
+
signal: opts.signal,
|
|
362
|
+
onUpdate: (live) => {
|
|
363
|
+
all[i] = live;
|
|
364
|
+
snapshot();
|
|
365
|
+
},
|
|
366
|
+
});
|
|
367
|
+
all[i] = r;
|
|
368
|
+
snapshot();
|
|
369
|
+
return r;
|
|
370
|
+
});
|
|
371
|
+
|
|
372
|
+
return all;
|
|
373
|
+
}
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
// Live sub-coder tracker — a small animated panel above the input showing each
|
|
2
|
+
// running/finished sub-coder, its status, elapsed time and current activity.
|
|
3
|
+
//
|
|
4
|
+
// Driven by string[] content re-set on a timer (the spinner + clock need to
|
|
5
|
+
// tick, which event updates alone can't do). Colors are raw 24-bit/SGR escapes
|
|
6
|
+
// (same approach as branding's honey accent) so the panel doesn't depend on the
|
|
7
|
+
// active theme and the string[] form of setWidget can be used directly.
|
|
8
|
+
|
|
9
|
+
import { summarizeActivity, type SubCoderResult } from "./spawn.ts";
|
|
10
|
+
|
|
11
|
+
const SPINNER = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
|
|
12
|
+
|
|
13
|
+
// Brand honey (matches branding/index.ts) + plain SGR status colors.
|
|
14
|
+
const honey = (s: string) => `\x1b[38;2;225;90;31m${s}\x1b[39m`;
|
|
15
|
+
const green = (s: string) => `\x1b[32m${s}\x1b[39m`;
|
|
16
|
+
const red = (s: string) => `\x1b[31m${s}\x1b[39m`;
|
|
17
|
+
const gray = (s: string) => `\x1b[90m${s}\x1b[39m`;
|
|
18
|
+
|
|
19
|
+
function fmtElapsed(ms: number): string {
|
|
20
|
+
const total = Math.max(0, Math.floor(ms / 1000));
|
|
21
|
+
const m = Math.floor(total / 60);
|
|
22
|
+
const s = total % 60;
|
|
23
|
+
return `${m}:${s.toString().padStart(2, "0")}`;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function padEnd(s: string, n: number): string {
|
|
27
|
+
return s.length >= n ? s : s + " ".repeat(n - s.length);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export interface TrackerUI {
|
|
31
|
+
hasUI: boolean;
|
|
32
|
+
ui: {
|
|
33
|
+
setWidget: (
|
|
34
|
+
key: string,
|
|
35
|
+
content: string[] | undefined,
|
|
36
|
+
options?: { placement?: "aboveEditor" | "belowEditor" },
|
|
37
|
+
) => void;
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export class SubCoderTracker {
|
|
42
|
+
private readonly key: string;
|
|
43
|
+
private readonly placement: "aboveEditor" | "belowEditor";
|
|
44
|
+
private results = new Map<string, SubCoderResult>();
|
|
45
|
+
private order: string[] = [];
|
|
46
|
+
private startedAt = new Map<string, number>();
|
|
47
|
+
private finishedAt = new Map<string, number>();
|
|
48
|
+
private timer: ReturnType<typeof setInterval> | null = null;
|
|
49
|
+
private lastFrame = "";
|
|
50
|
+
|
|
51
|
+
constructor(
|
|
52
|
+
private ctx: TrackerUI,
|
|
53
|
+
opts: { key?: string; placement?: "aboveEditor" | "belowEditor"; totalSince?: number } = {},
|
|
54
|
+
) {
|
|
55
|
+
this.key = opts.key ?? "subcoders";
|
|
56
|
+
this.placement = opts.placement ?? "aboveEditor";
|
|
57
|
+
this.totalSince = opts.totalSince;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// When set, the header shows a running total-elapsed timer (the overall
|
|
61
|
+
// process time, not just per-sub-coder).
|
|
62
|
+
private totalSince?: number;
|
|
63
|
+
|
|
64
|
+
/** Register the items and start the animation timer. */
|
|
65
|
+
begin(items: { id: string; label: string }[]): void {
|
|
66
|
+
if (!this.ctx.hasUI || items.length === 0) return;
|
|
67
|
+
const now = Date.now();
|
|
68
|
+
for (const it of items) {
|
|
69
|
+
if (!this.startedAt.has(it.id)) {
|
|
70
|
+
this.order.push(it.id);
|
|
71
|
+
this.startedAt.set(it.id, now);
|
|
72
|
+
this.results.set(it.id, {
|
|
73
|
+
id: it.id,
|
|
74
|
+
label: it.label,
|
|
75
|
+
task: "",
|
|
76
|
+
exitCode: -1,
|
|
77
|
+
report: "",
|
|
78
|
+
messages: [],
|
|
79
|
+
stderr: "",
|
|
80
|
+
usage: { input: 0, output: 0, cost: 0, turns: 0, contextTokens: 0 },
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
this.render();
|
|
85
|
+
if (!this.timer) this.timer = setInterval(() => this.render(), 120);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/** Feed a fresh snapshot of all results (from runSubCodersConcurrent). */
|
|
89
|
+
update(results: SubCoderResult[]): void {
|
|
90
|
+
if (!this.ctx.hasUI) return;
|
|
91
|
+
const now = Date.now();
|
|
92
|
+
for (const r of results) {
|
|
93
|
+
if (!this.startedAt.has(r.id)) {
|
|
94
|
+
this.order.push(r.id);
|
|
95
|
+
this.startedAt.set(r.id, now);
|
|
96
|
+
}
|
|
97
|
+
this.results.set(r.id, r);
|
|
98
|
+
if (r.exitCode !== -1 && !this.finishedAt.has(r.id)) this.finishedAt.set(r.id, now);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/** Stop the timer, paint a final static frame, then clear the panel. */
|
|
103
|
+
end(): void {
|
|
104
|
+
if (this.timer) {
|
|
105
|
+
clearInterval(this.timer);
|
|
106
|
+
this.timer = null;
|
|
107
|
+
}
|
|
108
|
+
if (!this.ctx.hasUI) return;
|
|
109
|
+
this.render();
|
|
110
|
+
this.ctx.ui.setWidget(this.key, undefined, { placement: this.placement });
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
private render(): void {
|
|
114
|
+
if (!this.ctx.hasUI || this.order.length === 0) return;
|
|
115
|
+
const now = Date.now();
|
|
116
|
+
const frame = SPINNER[Math.floor(now / 100) % SPINNER.length];
|
|
117
|
+
|
|
118
|
+
const items = this.order.map((id) => this.results.get(id)!).filter(Boolean);
|
|
119
|
+
const done = items.filter((r) => r.exitCode !== -1).length;
|
|
120
|
+
const labelWidth = Math.min(18, Math.max(...items.map((r) => r.label.length), 4));
|
|
121
|
+
|
|
122
|
+
const total = this.totalSince !== undefined ? ` · ${fmtElapsed(now - this.totalSince)}` : "";
|
|
123
|
+
const header = `${honey("sub-coders")} ${gray(`· ${done}/${items.length} done${total}`)}`;
|
|
124
|
+
const rows = items.map((r) => {
|
|
125
|
+
const running = r.exitCode === -1;
|
|
126
|
+
const icon = running ? honey(frame) : r.exitCode === 0 ? green("✓") : red("✗");
|
|
127
|
+
const end = this.finishedAt.get(r.id) ?? now;
|
|
128
|
+
const elapsed = fmtElapsed(end - (this.startedAt.get(r.id) ?? now));
|
|
129
|
+
const activity = summarizeActivity(r);
|
|
130
|
+
return ` ${icon} ${padEnd(r.label, labelWidth)} ${gray(padEnd(elapsed, 5))} ${gray(activity)}`;
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
const lines = [header, ...rows];
|
|
134
|
+
const frameKey = lines.join("\n");
|
|
135
|
+
if (frameKey === this.lastFrame) return; // diff-guard: skip identical repaints
|
|
136
|
+
this.lastFrame = frameKey;
|
|
137
|
+
this.ctx.ui.setWidget(this.key, lines, { placement: this.placement });
|
|
138
|
+
}
|
|
139
|
+
}
|
package/AGENTS.md
CHANGED
|
@@ -11,6 +11,7 @@ Instead, proactively write the necessary background scripts (Python, Bash, etc.)
|
|
|
11
11
|
# Runtime invariants
|
|
12
12
|
|
|
13
13
|
- **Write refuses on existing files.** Use **Edit** with exact `old_string` / `new_string` to modify — `old_string` must match exactly (whitespace included). If it appears multiple times in the file, pass `replace_all: true` or add more surrounding context to make the match unique. Read with line numbers first when precision is in doubt. This is a runtime invariant, not guidance — when Write refuses, the error returns the exact Edit call-shape for the same path; follow it.
|
|
14
|
+
- **Edit refuses on unread files.** A file must be **Read** in the current session before you can Edit it — this is a runtime invariant. If an edit is blocked, Read the file first to get the exact current text (so `old_string` matches), then Edit. Files you just wrote count as read.
|
|
14
15
|
- **Bash / ShellSession default timeout is 30 s.** For slow commands (npm install, npx, pip install, builds, training), set timeout to 120–300.
|
|
15
16
|
- Per-benchmark tools (`BrowserNavigate` / `Click` / `Type` / `Scroll` / `Extract` / `Back` / `History` and `EvidenceAdd` / `Get` / `List`) appear when relevant; their schemas are passed to you directly when available.
|
|
16
17
|
|
|
@@ -27,6 +28,10 @@ Instead, proactively write the necessary background scripts (Python, Bash, etc.)
|
|
|
27
28
|
- **WebFetch**: Fetch and extract content from a URL
|
|
28
29
|
- **WebSearch**: Search the web via DuckDuckGo
|
|
29
30
|
|
|
31
|
+
## Delegation
|
|
32
|
+
|
|
33
|
+
- **Dispatch**: Spawn isolated sub-coders to research a focused question. Each child reads the repo and browses online (read-only — no edit/write) and returns a concise report; the full transcript stays out of your context. Single mode `{ task }`, or parallel `{ tasks: [{ label, task }] }` (up to 4). Use it to gather facts before implementing, then do the edits yourself.
|
|
34
|
+
|
|
30
35
|
Additional tools appear per benchmark: `BrowserNavigate`/`Click`/`Type`/`Scroll`/`Extract`/`Back`/`History` and `EvidenceAdd`/`Get`/`List` (GAIA). Their schemas are passed to you directly when available.
|
|
31
36
|
|
|
32
37
|
# Approaching complex tasks
|