@kky42/pi-subagents 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -2
- package/package.json +6 -2
- package/scripts/e2e/main-agent-comparison.mjs +663 -0
- package/src/pi-subagent.ts +47 -105
- package/src/prompts.ts +11 -3
- package/src/types.ts +0 -4
package/README.md
CHANGED
|
@@ -17,7 +17,7 @@ Fresh subagents start with their own conversation and the same working directory
|
|
|
17
17
|
|
|
18
18
|
Recent comparison runs:
|
|
19
19
|
|
|
20
|
-
| Case | Claude
|
|
20
|
+
| Case | Claude Code | pi deepseek-v4-flash |
|
|
21
21
|
| --- | --- | --- |
|
|
22
22
|
| explore this repo | 1 Agent(Explore) | 1 Agent(explorer) |
|
|
23
23
|
| auth multi-repo comparison | 1 Agent | 3 Agent calls |
|
|
@@ -58,7 +58,32 @@ The explorer returns a concise repo map, and the main agent relays the useful pa
|
|
|
58
58
|
|
|
59
59
|
## Notes
|
|
60
60
|
|
|
61
|
-
-
|
|
61
|
+
- Subagents cannot launch other subagents; the main agent coordinates follow-up delegation after each result returns.
|
|
62
|
+
- Root-level parallel delegation is supported and bounded by the extension.
|
|
62
63
|
- Subagents inherit the caller's current model and thinking level.
|
|
63
64
|
- Subagents do not inherit parent conversation messages or tool results, so prompts should be self-contained.
|
|
64
65
|
- `explorer` is prompted as read-only; pi permissions are still controlled by the active pi runtime.
|
|
66
|
+
|
|
67
|
+
## E2E
|
|
68
|
+
|
|
69
|
+
Run the main-agent behavior e2e matrix:
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
npm run e2e
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
This downloads fresh GitHub fixtures (`sindresorhus/ky` and `sindresorhus/got` by default), runs fresh `pi -p` sessions with ambient skills, extensions, prompt templates, themes, and context files disabled, then records Claude Code-style routing scenarios. The default pi settings are `deepseek/deepseek-v4-flash` with `--thinking high`.
|
|
76
|
+
|
|
77
|
+
- codebase exploration
|
|
78
|
+
- review
|
|
79
|
+
- simple codebase QA
|
|
80
|
+
- small feature implementation
|
|
81
|
+
- two-codebase comparison
|
|
82
|
+
|
|
83
|
+
To compare the same scenarios against Claude Code:
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
npm run e2e:compare-claude
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
The Claude comparison uses `--model haiku --effort high` by default. If `DEEPSEEK_API_KEY` is exported or present in `.env`, the runner configures Claude Code with DeepSeek's Anthropic-compatible endpoint and maps `haiku` to `deepseek-v4-flash[1m]`; it also creates a temporary pi auth file for the same key. It writes a report under `/tmp`, shows `MATCH` or `DIFF` for each scenario, and treats timeouts or budget caps in observational scenarios as inconclusive by default. Add `-- --repeat 3` to repeat each task, `-- --strict-observed` when incomplete observational scenarios should fail the command, or `-- --strict-claude` when Claude-side failures should fail the command.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kky42/pi-subagents",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.2",
|
|
4
4
|
"description": "Claude Code-style subagents for pi.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./index.ts",
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
"files": [
|
|
9
9
|
"index.ts",
|
|
10
10
|
"src",
|
|
11
|
+
"scripts",
|
|
11
12
|
"README.md",
|
|
12
13
|
"LICENSE",
|
|
13
14
|
"assets"
|
|
@@ -21,6 +22,8 @@
|
|
|
21
22
|
"scripts": {
|
|
22
23
|
"build": "tsc",
|
|
23
24
|
"check": "tsc --noEmit && vitest run",
|
|
25
|
+
"e2e": "node scripts/e2e/main-agent-comparison.mjs",
|
|
26
|
+
"e2e:compare-claude": "node scripts/e2e/main-agent-comparison.mjs --with-claude",
|
|
24
27
|
"test": "vitest run"
|
|
25
28
|
},
|
|
26
29
|
"peerDependencies": {
|
|
@@ -43,7 +46,8 @@
|
|
|
43
46
|
"pi": {
|
|
44
47
|
"extensions": [
|
|
45
48
|
"./index.ts"
|
|
46
|
-
]
|
|
49
|
+
],
|
|
50
|
+
"image": "https://raw.githubusercontent.com/kky42/pi-subagents/main/assets/subagents.png"
|
|
47
51
|
},
|
|
48
52
|
"engines": {
|
|
49
53
|
"node": ">=22.19.0"
|
|
@@ -0,0 +1,663 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { spawn, spawnSync } from "node:child_process";
|
|
3
|
+
import {
|
|
4
|
+
cpSync,
|
|
5
|
+
createWriteStream,
|
|
6
|
+
existsSync,
|
|
7
|
+
mkdirSync,
|
|
8
|
+
readdirSync,
|
|
9
|
+
readFileSync,
|
|
10
|
+
rmSync,
|
|
11
|
+
writeFileSync,
|
|
12
|
+
} from "node:fs";
|
|
13
|
+
import { tmpdir } from "node:os";
|
|
14
|
+
import path from "node:path";
|
|
15
|
+
import process from "node:process";
|
|
16
|
+
import { fileURLToPath } from "node:url";
|
|
17
|
+
|
|
18
|
+
const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..", "..");
|
|
19
|
+
|
|
20
|
+
function loadDotEnv(filePath) {
|
|
21
|
+
if (!existsSync(filePath)) return;
|
|
22
|
+
const lines = readFileSync(filePath, "utf8").split("\n");
|
|
23
|
+
for (const line of lines) {
|
|
24
|
+
const trimmed = line.trim();
|
|
25
|
+
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
26
|
+
const separator = trimmed.indexOf("=");
|
|
27
|
+
if (separator === -1) continue;
|
|
28
|
+
const key = trimmed.slice(0, separator).trim();
|
|
29
|
+
let value = trimmed.slice(separator + 1).trim();
|
|
30
|
+
if (!key || process.env[key] !== undefined) continue;
|
|
31
|
+
if (
|
|
32
|
+
(value.startsWith('"') && value.endsWith('"')) ||
|
|
33
|
+
(value.startsWith("'") && value.endsWith("'"))
|
|
34
|
+
) {
|
|
35
|
+
value = value.slice(1, -1);
|
|
36
|
+
}
|
|
37
|
+
process.env[key] = value;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
loadDotEnv(path.join(repoRoot, ".env"));
|
|
42
|
+
|
|
43
|
+
const scenarios = [
|
|
44
|
+
{
|
|
45
|
+
id: "codebase-exploration",
|
|
46
|
+
fixture: "primary",
|
|
47
|
+
prompt:
|
|
48
|
+
"I just opened this project and need a quick orientation. What is it for, where is the important code, and what should I run to check it? Please just report back; don't change files.",
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
id: "review",
|
|
52
|
+
fixture: "primary",
|
|
53
|
+
prompt:
|
|
54
|
+
"Can you review this codebase for a few concrete maintainability or testing risks? Please cite the files that led you there, and don't change anything.",
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
id: "qa-about-codebase",
|
|
58
|
+
expectedBehavior: "direct",
|
|
59
|
+
fixture: "primary",
|
|
60
|
+
prompt:
|
|
61
|
+
"What package name and license does this repo declare? Please answer from the repo files.",
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
id: "implement-feature",
|
|
65
|
+
fixture: "primary",
|
|
66
|
+
prompt:
|
|
67
|
+
"Please add a short README section called \"Local checks\" that tells contributors the main command to run before opening a pull request.",
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
id: "compare-codebases",
|
|
71
|
+
fixture: "both",
|
|
72
|
+
prompt:
|
|
73
|
+
"I'm choosing between ./ky and ./got for a small project. Can you compare their purpose, rough architecture, and test setup?",
|
|
74
|
+
},
|
|
75
|
+
];
|
|
76
|
+
|
|
77
|
+
function parseArgs(argv) {
|
|
78
|
+
const options = {
|
|
79
|
+
cwd: repoRoot,
|
|
80
|
+
extension: path.join(repoRoot, "index.ts"),
|
|
81
|
+
model: "deepseek/deepseek-v4-flash",
|
|
82
|
+
thinking: "high",
|
|
83
|
+
sessionRoot: path.join(tmpdir(), `pi-subagent-main-agent-e2e-${Date.now()}`),
|
|
84
|
+
timeoutMs: 120_000,
|
|
85
|
+
repeat: 1,
|
|
86
|
+
primaryRepo: "https://github.com/sindresorhus/ky.git",
|
|
87
|
+
primaryName: "ky",
|
|
88
|
+
secondaryRepo: "https://github.com/sindresorhus/got.git",
|
|
89
|
+
secondaryName: "got",
|
|
90
|
+
deepseekApiKeyEnv: "DEEPSEEK_API_KEY",
|
|
91
|
+
withClaude: false,
|
|
92
|
+
strictClaude: false,
|
|
93
|
+
strictObserved: false,
|
|
94
|
+
claudeModel: "haiku",
|
|
95
|
+
claudeEffort: "high",
|
|
96
|
+
claudeTimeoutMs: 120_000,
|
|
97
|
+
claudeMaxBudgetUsd: "0.80",
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
for (let index = 0; index < argv.length; index += 1) {
|
|
101
|
+
const arg = argv[index];
|
|
102
|
+
if (arg === "--help" || arg === "-h") {
|
|
103
|
+
options.help = true;
|
|
104
|
+
continue;
|
|
105
|
+
}
|
|
106
|
+
if (arg === "--with-claude") {
|
|
107
|
+
options.withClaude = true;
|
|
108
|
+
continue;
|
|
109
|
+
}
|
|
110
|
+
if (arg === "--strict-claude") {
|
|
111
|
+
options.strictClaude = true;
|
|
112
|
+
continue;
|
|
113
|
+
}
|
|
114
|
+
if (arg === "--strict-observed") {
|
|
115
|
+
options.strictObserved = true;
|
|
116
|
+
continue;
|
|
117
|
+
}
|
|
118
|
+
const readValue = () => {
|
|
119
|
+
const value = argv[index + 1];
|
|
120
|
+
if (!value) throw new Error(`${arg} requires a value`);
|
|
121
|
+
index += 1;
|
|
122
|
+
return value;
|
|
123
|
+
};
|
|
124
|
+
if (arg === "--cwd") options.cwd = path.resolve(readValue());
|
|
125
|
+
else if (arg === "--extension") options.extension = path.resolve(readValue());
|
|
126
|
+
else if (arg === "--model") options.model = readValue();
|
|
127
|
+
else if (arg === "--thinking") options.thinking = readValue();
|
|
128
|
+
else if (arg === "--session-root") options.sessionRoot = path.resolve(readValue());
|
|
129
|
+
else if (arg === "--timeout-ms") options.timeoutMs = Number(readValue());
|
|
130
|
+
else if (arg === "--repeat") options.repeat = Number(readValue());
|
|
131
|
+
else if (arg === "--primary-repo") options.primaryRepo = readValue();
|
|
132
|
+
else if (arg === "--primary-name") options.primaryName = readValue();
|
|
133
|
+
else if (arg === "--secondary-repo") options.secondaryRepo = readValue();
|
|
134
|
+
else if (arg === "--secondary-name") options.secondaryName = readValue();
|
|
135
|
+
else if (arg === "--deepseek-api-key-env") options.deepseekApiKeyEnv = readValue();
|
|
136
|
+
else if (arg === "--claude-model") options.claudeModel = readValue();
|
|
137
|
+
else if (arg === "--claude-effort") options.claudeEffort = readValue();
|
|
138
|
+
else if (arg === "--claude-timeout-ms") options.claudeTimeoutMs = Number(readValue());
|
|
139
|
+
else if (arg === "--claude-max-budget-usd") options.claudeMaxBudgetUsd = readValue();
|
|
140
|
+
else throw new Error(`Unknown option: ${arg}`);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return options;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function printHelp() {
|
|
147
|
+
console.log(`Usage: node scripts/e2e/main-agent-comparison.mjs [options]
|
|
148
|
+
|
|
149
|
+
Runs main-agent behavior scenarios against pi. With --with-claude, runs the same
|
|
150
|
+
scenarios through Claude Code and compares whether each main agent delegated or
|
|
151
|
+
handled the task directly. Behavior differences are reported but do not fail the
|
|
152
|
+
run unless a scenario has an explicit expectedBehavior.
|
|
153
|
+
|
|
154
|
+
Options:
|
|
155
|
+
--model <id> pi model (default: deepseek/deepseek-v4-flash)
|
|
156
|
+
--thinking <level> pi thinking level (default: high)
|
|
157
|
+
--session-root <dir> artifact root (default: OS temp dir)
|
|
158
|
+
--timeout-ms <ms> per-pi-scenario timeout (default: 120000)
|
|
159
|
+
--repeat <n> repetitions per scenario (default: 1)
|
|
160
|
+
--primary-repo <url> GitHub repo for single-codebase scenarios
|
|
161
|
+
--secondary-repo <url> GitHub repo for two-codebase comparison
|
|
162
|
+
--deepseek-api-key-env <name> env var used for pi and Claude DeepSeek auth
|
|
163
|
+
--with-claude also run Claude Code comparison
|
|
164
|
+
--strict-claude fail if a Claude Code scenario is incomplete or unexpected
|
|
165
|
+
--strict-observed fail incomplete observed scenarios too
|
|
166
|
+
--claude-model <id> Claude Code model alias/id (default: haiku)
|
|
167
|
+
--claude-effort <level> Claude Code effort (default: high)
|
|
168
|
+
--claude-timeout-ms <ms> per-Claude-scenario timeout (default: 120000)
|
|
169
|
+
--claude-max-budget-usd <usd> Claude Code budget cap (default: 0.80)
|
|
170
|
+
`);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function ensureDirectory(dir) {
|
|
174
|
+
mkdirSync(dir, { recursive: true });
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function runText(command, args, cwd) {
|
|
178
|
+
const result = spawnSync(command, args, {
|
|
179
|
+
cwd,
|
|
180
|
+
encoding: "utf8",
|
|
181
|
+
env: process.env,
|
|
182
|
+
});
|
|
183
|
+
if (result.status !== 0) {
|
|
184
|
+
return undefined;
|
|
185
|
+
}
|
|
186
|
+
return result.stdout.trim();
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
function getDeepseekApiKey(options) {
|
|
190
|
+
return process.env[options.deepseekApiKeyEnv] || process.env.ANTHROPIC_AUTH_TOKEN || process.env.DEEPSEEK_API_KEY;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
function buildPiEnv(options, sessionDir) {
|
|
194
|
+
const env = { ...process.env };
|
|
195
|
+
const key = getDeepseekApiKey(options);
|
|
196
|
+
if (key) {
|
|
197
|
+
const agentDir = path.join(sessionDir, "agent");
|
|
198
|
+
ensureDirectory(agentDir);
|
|
199
|
+
env.PI_CODING_AGENT_DIR = agentDir;
|
|
200
|
+
env.DEEPSEEK_API_KEY = key;
|
|
201
|
+
writeFileSync(
|
|
202
|
+
path.join(agentDir, "auth.json"),
|
|
203
|
+
`${JSON.stringify({ deepseek: { type: "api_key", key } }, null, 2)}\n`,
|
|
204
|
+
);
|
|
205
|
+
}
|
|
206
|
+
return env;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
function buildClaudeEnv(options) {
|
|
210
|
+
const env = { ...process.env };
|
|
211
|
+
const key = getDeepseekApiKey(options);
|
|
212
|
+
if (key) {
|
|
213
|
+
env.ANTHROPIC_BASE_URL = "https://api.deepseek.com/anthropic";
|
|
214
|
+
env.ANTHROPIC_AUTH_TOKEN = key;
|
|
215
|
+
env.ANTHROPIC_MODEL = "deepseek-v4-pro[1m]";
|
|
216
|
+
env.ANTHROPIC_DEFAULT_OPUS_MODEL = "deepseek-v4-pro[1m]";
|
|
217
|
+
env.ANTHROPIC_DEFAULT_SONNET_MODEL = "deepseek-v4-pro[1m]";
|
|
218
|
+
env.ANTHROPIC_DEFAULT_HAIKU_MODEL = "deepseek-v4-flash[1m]";
|
|
219
|
+
}
|
|
220
|
+
env.CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC = "1";
|
|
221
|
+
env.CLAUDE_CODE_ATTRIBUTION_HEADER = "0";
|
|
222
|
+
env.CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS = "1";
|
|
223
|
+
return env;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
async function cloneRepo({ url, name, baseDir, timeoutMs }) {
|
|
227
|
+
const repoDir = path.join(baseDir, name);
|
|
228
|
+
if (existsSync(repoDir)) {
|
|
229
|
+
return {
|
|
230
|
+
name,
|
|
231
|
+
url,
|
|
232
|
+
path: repoDir,
|
|
233
|
+
commit: runText("git", ["rev-parse", "HEAD"], repoDir),
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
ensureDirectory(baseDir);
|
|
238
|
+
const logDir = path.join(baseDir, "_clone-logs");
|
|
239
|
+
ensureDirectory(logDir);
|
|
240
|
+
const command = await runProcess({
|
|
241
|
+
command: "git",
|
|
242
|
+
args: ["clone", "--depth", "1", url, repoDir],
|
|
243
|
+
cwd: baseDir,
|
|
244
|
+
stdoutPath: path.join(logDir, `${name}.stdout.txt`),
|
|
245
|
+
stderrPath: path.join(logDir, `${name}.stderr.txt`),
|
|
246
|
+
timeoutMs,
|
|
247
|
+
});
|
|
248
|
+
if (command.exitCode !== 0 || command.timedOut) {
|
|
249
|
+
throw new Error(`Failed to clone ${url}. See ${logDir}`);
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
return {
|
|
253
|
+
name,
|
|
254
|
+
url,
|
|
255
|
+
path: repoDir,
|
|
256
|
+
commit: runText("git", ["rev-parse", "HEAD"], repoDir),
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
async function prepareFixtures(options) {
|
|
261
|
+
const baseDir = path.join(options.sessionRoot, "fixtures", "base");
|
|
262
|
+
const primary = await cloneRepo({
|
|
263
|
+
url: options.primaryRepo,
|
|
264
|
+
name: options.primaryName,
|
|
265
|
+
baseDir,
|
|
266
|
+
timeoutMs: options.timeoutMs,
|
|
267
|
+
});
|
|
268
|
+
const secondary = await cloneRepo({
|
|
269
|
+
url: options.secondaryRepo,
|
|
270
|
+
name: options.secondaryName,
|
|
271
|
+
baseDir,
|
|
272
|
+
timeoutMs: options.timeoutMs,
|
|
273
|
+
});
|
|
274
|
+
return { baseDir, primary, secondary };
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
function prepareScenarioWorkdir(options, fixtures, sessionDir, scenario) {
|
|
278
|
+
const workRoot = path.join(sessionDir, "work");
|
|
279
|
+
rmSync(workRoot, { recursive: true, force: true });
|
|
280
|
+
ensureDirectory(workRoot);
|
|
281
|
+
|
|
282
|
+
const primaryTarget = path.join(workRoot, fixtures.primary.name);
|
|
283
|
+
cpSync(fixtures.primary.path, primaryTarget, { recursive: true });
|
|
284
|
+
|
|
285
|
+
if (scenario.fixture === "both") {
|
|
286
|
+
const secondaryTarget = path.join(workRoot, fixtures.secondary.name);
|
|
287
|
+
cpSync(fixtures.secondary.path, secondaryTarget, { recursive: true });
|
|
288
|
+
return workRoot;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
return primaryTarget;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
function writePromptFile(sessionDir, scenario, kind) {
|
|
295
|
+
const promptPath = path.join(sessionDir, "prompt.md");
|
|
296
|
+
const expectedLine = scenario.expectedBehavior
|
|
297
|
+
? `Expected behavior: ${scenario.expectedBehavior}`
|
|
298
|
+
: "Expected behavior: observe and report";
|
|
299
|
+
writeFileSync(
|
|
300
|
+
promptPath,
|
|
301
|
+
`# ${kind} Main-Agent Behavior E2E
|
|
302
|
+
|
|
303
|
+
Scenario: ${scenario.id}
|
|
304
|
+
${expectedLine}
|
|
305
|
+
|
|
306
|
+
${scenario.prompt}
|
|
307
|
+
`,
|
|
308
|
+
);
|
|
309
|
+
return promptPath;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
function runProcess({ command, args, cwd, stdoutPath, stderrPath, timeoutMs, env = process.env }) {
|
|
313
|
+
return new Promise((resolve) => {
|
|
314
|
+
const startedAt = new Date().toISOString();
|
|
315
|
+
const stdout = createWriteStream(stdoutPath, { flags: "a" });
|
|
316
|
+
const stderr = createWriteStream(stderrPath, { flags: "a" });
|
|
317
|
+
const child = spawn(command, args, {
|
|
318
|
+
cwd,
|
|
319
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
320
|
+
env,
|
|
321
|
+
});
|
|
322
|
+
|
|
323
|
+
let timedOut = false;
|
|
324
|
+
let settled = false;
|
|
325
|
+
let errorMessage;
|
|
326
|
+
let killTimer;
|
|
327
|
+
const timeout = setTimeout(() => {
|
|
328
|
+
timedOut = true;
|
|
329
|
+
child.kill("SIGTERM");
|
|
330
|
+
killTimer = setTimeout(() => child.kill("SIGKILL"), 5_000);
|
|
331
|
+
killTimer.unref();
|
|
332
|
+
}, timeoutMs);
|
|
333
|
+
timeout.unref();
|
|
334
|
+
|
|
335
|
+
child.stdout.pipe(stdout);
|
|
336
|
+
child.stderr.pipe(stderr);
|
|
337
|
+
child.on("error", (error) => {
|
|
338
|
+
errorMessage = error.message;
|
|
339
|
+
});
|
|
340
|
+
child.on("close", (exitCode, signal) => {
|
|
341
|
+
if (settled) return;
|
|
342
|
+
settled = true;
|
|
343
|
+
clearTimeout(timeout);
|
|
344
|
+
if (killTimer) clearTimeout(killTimer);
|
|
345
|
+
stdout.end();
|
|
346
|
+
stderr.end();
|
|
347
|
+
resolve({
|
|
348
|
+
command,
|
|
349
|
+
args,
|
|
350
|
+
cwd,
|
|
351
|
+
startedAt,
|
|
352
|
+
endedAt: new Date().toISOString(),
|
|
353
|
+
exitCode,
|
|
354
|
+
signal,
|
|
355
|
+
timedOut,
|
|
356
|
+
errorMessage,
|
|
357
|
+
});
|
|
358
|
+
});
|
|
359
|
+
});
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
function readJsonlRecords(filePath) {
|
|
363
|
+
if (!filePath || !existsSync(filePath)) return [];
|
|
364
|
+
const text = readFileSync(filePath, "utf8");
|
|
365
|
+
return text
|
|
366
|
+
.split("\n")
|
|
367
|
+
.filter(Boolean)
|
|
368
|
+
.map((line) => {
|
|
369
|
+
try {
|
|
370
|
+
return JSON.parse(line);
|
|
371
|
+
} catch {
|
|
372
|
+
return undefined;
|
|
373
|
+
}
|
|
374
|
+
})
|
|
375
|
+
.filter(Boolean);
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
function findNewestJsonl(dir) {
|
|
379
|
+
const files = readdirSync(dir)
|
|
380
|
+
.filter((file) => file.endsWith(".jsonl"))
|
|
381
|
+
.map((file) => path.join(dir, file))
|
|
382
|
+
.sort();
|
|
383
|
+
return files.at(-1);
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
function countTool(map, name) {
|
|
387
|
+
map[name] = (map[name] ?? 0) + 1;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
function analyzePiTrace(filePath) {
|
|
391
|
+
const toolCalls = {};
|
|
392
|
+
const toolResults = {};
|
|
393
|
+
const finalTexts = [];
|
|
394
|
+
|
|
395
|
+
for (const record of readJsonlRecords(filePath)) {
|
|
396
|
+
const message = record.message;
|
|
397
|
+
const content = Array.isArray(message?.content) ? message.content : [];
|
|
398
|
+
for (const item of content) {
|
|
399
|
+
if (item?.type === "toolCall" && typeof item.name === "string") {
|
|
400
|
+
countTool(toolCalls, item.name);
|
|
401
|
+
}
|
|
402
|
+
if (item?.type === "text" && typeof item.text === "string" && message?.role === "assistant") {
|
|
403
|
+
finalTexts.push(item.text);
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
if (message?.role === "toolResult" && typeof message.toolName === "string") {
|
|
407
|
+
countTool(toolResults, message.toolName);
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
const agentCalls = toolCalls.Agent ?? 0;
|
|
412
|
+
return {
|
|
413
|
+
filePath,
|
|
414
|
+
toolCalls,
|
|
415
|
+
toolResults,
|
|
416
|
+
agentCalls,
|
|
417
|
+
readCalls: toolCalls.read ?? 0,
|
|
418
|
+
behavior: agentCalls > 0 ? "delegate" : "direct",
|
|
419
|
+
finalText: finalTexts.at(-1) ?? "",
|
|
420
|
+
};
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
function analyzeClaudeTrace(filePath) {
|
|
424
|
+
const toolCalls = {};
|
|
425
|
+
const taskStarts = [];
|
|
426
|
+
const resultErrors = [];
|
|
427
|
+
const finalTexts = [];
|
|
428
|
+
|
|
429
|
+
for (const record of readJsonlRecords(filePath)) {
|
|
430
|
+
if (record.type === "system" && record.subtype === "task_started") {
|
|
431
|
+
taskStarts.push(record);
|
|
432
|
+
}
|
|
433
|
+
if (record.type === "result" && record.is_error) {
|
|
434
|
+
resultErrors.push(record.subtype ?? record.stop_reason ?? "error");
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
const message = record.message;
|
|
438
|
+
const content = Array.isArray(message?.content) ? message.content : [];
|
|
439
|
+
const isRootMessage = !record.parent_tool_use_id;
|
|
440
|
+
for (const item of content) {
|
|
441
|
+
if (isRootMessage && item?.type === "tool_use" && typeof item.name === "string") {
|
|
442
|
+
countTool(toolCalls, item.name);
|
|
443
|
+
}
|
|
444
|
+
if (isRootMessage && item?.type === "text" && typeof item.text === "string" && message?.role === "assistant") {
|
|
445
|
+
finalTexts.push(item.text);
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
const agentCalls = toolCalls.Agent ?? 0;
|
|
451
|
+
return {
|
|
452
|
+
filePath,
|
|
453
|
+
toolCalls,
|
|
454
|
+
taskStarts: taskStarts.length,
|
|
455
|
+
resultErrors,
|
|
456
|
+
agentCalls,
|
|
457
|
+
readCalls: toolCalls.Read ?? 0,
|
|
458
|
+
behavior: agentCalls > 0 || taskStarts.length > 0 ? "delegate" : "direct",
|
|
459
|
+
finalText: finalTexts.at(-1) ?? "",
|
|
460
|
+
};
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
async function runPiScenario(options, fixtures, scenario, repeatIndex) {
|
|
464
|
+
const sessionDir = path.join(options.sessionRoot, "pi", scenario.id, `r${repeatIndex}`);
|
|
465
|
+
ensureDirectory(sessionDir);
|
|
466
|
+
const workCwd = prepareScenarioWorkdir(options, fixtures, sessionDir, scenario);
|
|
467
|
+
const promptPath = writePromptFile(sessionDir, scenario, "pi");
|
|
468
|
+
const stdoutPath = path.join(sessionDir, "stdout.txt");
|
|
469
|
+
const stderrPath = path.join(sessionDir, "stderr.txt");
|
|
470
|
+
const args = [
|
|
471
|
+
"-p",
|
|
472
|
+
"--model",
|
|
473
|
+
options.model,
|
|
474
|
+
"--thinking",
|
|
475
|
+
options.thinking,
|
|
476
|
+
"--session-dir",
|
|
477
|
+
sessionDir,
|
|
478
|
+
"--no-prompt-templates",
|
|
479
|
+
"--no-themes",
|
|
480
|
+
"--no-context-files",
|
|
481
|
+
"--no-skills",
|
|
482
|
+
"--no-extensions",
|
|
483
|
+
"--extension",
|
|
484
|
+
options.extension,
|
|
485
|
+
`@${promptPath}`,
|
|
486
|
+
];
|
|
487
|
+
const command = await runProcess({
|
|
488
|
+
command: "pi",
|
|
489
|
+
args,
|
|
490
|
+
cwd: workCwd,
|
|
491
|
+
stdoutPath,
|
|
492
|
+
stderrPath,
|
|
493
|
+
timeoutMs: options.timeoutMs,
|
|
494
|
+
env: buildPiEnv(options, sessionDir),
|
|
495
|
+
});
|
|
496
|
+
const trace = analyzePiTrace(findNewestJsonl(sessionDir));
|
|
497
|
+
const pass =
|
|
498
|
+
command.exitCode === 0 &&
|
|
499
|
+
!command.timedOut &&
|
|
500
|
+
(!scenario.expectedBehavior || trace.behavior === scenario.expectedBehavior) &&
|
|
501
|
+
(!scenario.requirePiRead || trace.readCalls > 0);
|
|
502
|
+
|
|
503
|
+
const result = {
|
|
504
|
+
kind: "pi",
|
|
505
|
+
scenario: scenario.id,
|
|
506
|
+
repeat: repeatIndex,
|
|
507
|
+
expectedBehavior: scenario.expectedBehavior,
|
|
508
|
+
required: Boolean(scenario.expectedBehavior),
|
|
509
|
+
pass,
|
|
510
|
+
command,
|
|
511
|
+
sessionDir,
|
|
512
|
+
workCwd,
|
|
513
|
+
stdoutPath,
|
|
514
|
+
stderrPath,
|
|
515
|
+
trace,
|
|
516
|
+
};
|
|
517
|
+
writeFileSync(path.join(sessionDir, "result.json"), `${JSON.stringify(result, null, 2)}\n`);
|
|
518
|
+
return result;
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
async function runClaudeScenario(options, fixtures, scenario, repeatIndex) {
|
|
522
|
+
const sessionDir = path.join(options.sessionRoot, "claude", scenario.id, `r${repeatIndex}`);
|
|
523
|
+
ensureDirectory(sessionDir);
|
|
524
|
+
const workCwd = prepareScenarioWorkdir(options, fixtures, sessionDir, scenario);
|
|
525
|
+
const promptPath = writePromptFile(sessionDir, scenario, "Claude Code");
|
|
526
|
+
const stdoutPath = path.join(sessionDir, "stream.jsonl");
|
|
527
|
+
const stderrPath = path.join(sessionDir, "stderr.txt");
|
|
528
|
+
const args = [
|
|
529
|
+
"-p",
|
|
530
|
+
"--output-format",
|
|
531
|
+
"stream-json",
|
|
532
|
+
"--max-budget-usd",
|
|
533
|
+
options.claudeMaxBudgetUsd,
|
|
534
|
+
"--effort",
|
|
535
|
+
options.claudeEffort,
|
|
536
|
+
"--permission-mode",
|
|
537
|
+
"bypassPermissions",
|
|
538
|
+
"--disable-slash-commands",
|
|
539
|
+
"--exclude-dynamic-system-prompt-sections",
|
|
540
|
+
"--no-session-persistence",
|
|
541
|
+
];
|
|
542
|
+
if (options.claudeModel) args.push("--model", options.claudeModel);
|
|
543
|
+
args.push(readFileSync(promptPath, "utf8"));
|
|
544
|
+
|
|
545
|
+
const command = await runProcess({
|
|
546
|
+
command: "claude",
|
|
547
|
+
args,
|
|
548
|
+
cwd: workCwd,
|
|
549
|
+
stdoutPath,
|
|
550
|
+
stderrPath,
|
|
551
|
+
timeoutMs: options.claudeTimeoutMs,
|
|
552
|
+
env: buildClaudeEnv(options),
|
|
553
|
+
});
|
|
554
|
+
const trace = analyzeClaudeTrace(stdoutPath);
|
|
555
|
+
const completed = command.exitCode === 0 && !command.timedOut && trace.resultErrors.length === 0;
|
|
556
|
+
const pass = completed && (!scenario.expectedBehavior || trace.behavior === scenario.expectedBehavior);
|
|
557
|
+
|
|
558
|
+
const result = {
|
|
559
|
+
kind: "claude",
|
|
560
|
+
scenario: scenario.id,
|
|
561
|
+
repeat: repeatIndex,
|
|
562
|
+
expectedBehavior: scenario.expectedBehavior,
|
|
563
|
+
required: Boolean(scenario.expectedBehavior),
|
|
564
|
+
pass,
|
|
565
|
+
completed,
|
|
566
|
+
command,
|
|
567
|
+
sessionDir,
|
|
568
|
+
workCwd,
|
|
569
|
+
stdoutPath,
|
|
570
|
+
stderrPath,
|
|
571
|
+
trace,
|
|
572
|
+
};
|
|
573
|
+
writeFileSync(path.join(sessionDir, "result.json"), `${JSON.stringify(result, null, 2)}\n`);
|
|
574
|
+
return result;
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
function formatResult(result, options = {}) {
|
|
578
|
+
let status = result.pass ? "PASS" : "FAIL";
|
|
579
|
+
if ((result.kind === "pi" || result.kind === "claude") && !result.required && !result.pass) {
|
|
580
|
+
status = "INCONCLUSIVE";
|
|
581
|
+
}
|
|
582
|
+
if (result.kind === "claude" && !options.strictClaude && !result.required && !result.pass) {
|
|
583
|
+
status = "INCONCLUSIVE";
|
|
584
|
+
}
|
|
585
|
+
const expected = result.expectedBehavior ?? "observe";
|
|
586
|
+
const parts = [
|
|
587
|
+
status,
|
|
588
|
+
result.kind,
|
|
589
|
+
`${result.scenario}#${result.repeat ?? 1}`,
|
|
590
|
+
`expected=${expected}`,
|
|
591
|
+
`observed=${result.trace.behavior}`,
|
|
592
|
+
`agentCalls=${result.trace.agentCalls}`,
|
|
593
|
+
];
|
|
594
|
+
if (result.kind === "pi") parts.push(`readCalls=${result.trace.readCalls}`);
|
|
595
|
+
if (result.kind === "claude") {
|
|
596
|
+
parts.push(`completed=${result.completed}`);
|
|
597
|
+
if (result.command.timedOut) parts.push("timeout=true");
|
|
598
|
+
if (result.trace.resultErrors.length) parts.push(`errors=${result.trace.resultErrors.join(",")}`);
|
|
599
|
+
}
|
|
600
|
+
return parts.join(" ");
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
async function main() {
|
|
604
|
+
const options = parseArgs(process.argv.slice(2));
|
|
605
|
+
if (options.help) {
|
|
606
|
+
printHelp();
|
|
607
|
+
return;
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
ensureDirectory(options.sessionRoot);
|
|
611
|
+
const fixtures = await prepareFixtures(options);
|
|
612
|
+
const results = [];
|
|
613
|
+
for (let repeatIndex = 1; repeatIndex <= options.repeat; repeatIndex += 1) {
|
|
614
|
+
for (const scenario of scenarios) {
|
|
615
|
+
const piResult = await runPiScenario(options, fixtures, scenario, repeatIndex);
|
|
616
|
+
results.push(piResult);
|
|
617
|
+
console.log(formatResult(piResult, options));
|
|
618
|
+
|
|
619
|
+
if (options.withClaude) {
|
|
620
|
+
const claudeResult = await runClaudeScenario(options, fixtures, scenario, repeatIndex);
|
|
621
|
+
results.push(claudeResult);
|
|
622
|
+
console.log(formatResult(claudeResult, options));
|
|
623
|
+
|
|
624
|
+
const comparisonMatch = piResult.trace.behavior === claudeResult.trace.behavior;
|
|
625
|
+
const comparisonRequired = Boolean(scenario.expectedBehavior);
|
|
626
|
+
const comparisonPass = !comparisonRequired || comparisonMatch;
|
|
627
|
+
results.push({
|
|
628
|
+
kind: "comparison",
|
|
629
|
+
scenario: scenario.id,
|
|
630
|
+
repeat: repeatIndex,
|
|
631
|
+
pass: comparisonPass,
|
|
632
|
+
required: comparisonRequired,
|
|
633
|
+
match: comparisonMatch,
|
|
634
|
+
piBehavior: piResult.trace.behavior,
|
|
635
|
+
claudeBehavior: claudeResult.trace.behavior,
|
|
636
|
+
});
|
|
637
|
+
console.log(
|
|
638
|
+
`${comparisonMatch ? "MATCH" : "DIFF"} comparison ${scenario.id}#${repeatIndex} pi=${piResult.trace.behavior} claude=${claudeResult.trace.behavior}`,
|
|
639
|
+
);
|
|
640
|
+
}
|
|
641
|
+
}
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
const reportPath = path.join(options.sessionRoot, "report.json");
|
|
645
|
+
writeFileSync(reportPath, `${JSON.stringify({ options, fixtures, scenarios, results }, null, 2)}\n`);
|
|
646
|
+
console.log(`report=${reportPath}`);
|
|
647
|
+
|
|
648
|
+
const failed = results.filter((result) => {
|
|
649
|
+
if (result.kind === "pi") return !result.pass && (result.required || options.strictObserved);
|
|
650
|
+
if (result.kind === "claude") {
|
|
651
|
+
return options.strictClaude && !result.pass && (result.required || options.strictObserved);
|
|
652
|
+
}
|
|
653
|
+
return false;
|
|
654
|
+
});
|
|
655
|
+
if (failed.length > 0) {
|
|
656
|
+
process.exitCode = 1;
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
main().catch((error) => {
|
|
661
|
+
console.error(error?.stack || error?.message || String(error));
|
|
662
|
+
process.exitCode = 1;
|
|
663
|
+
});
|
package/src/pi-subagent.ts
CHANGED
|
@@ -12,7 +12,7 @@ import {
|
|
|
12
12
|
type Theme,
|
|
13
13
|
type ToolDefinition,
|
|
14
14
|
} from "@earendil-works/pi-coding-agent";
|
|
15
|
-
import { Container, Text } from "@earendil-works/pi-tui";
|
|
15
|
+
import { Container, Text, TruncatedText } from "@earendil-works/pi-tui";
|
|
16
16
|
import { Type, type Static } from "typebox";
|
|
17
17
|
import {
|
|
18
18
|
AGENT_PROMPT_GUIDELINES,
|
|
@@ -22,7 +22,6 @@ import {
|
|
|
22
22
|
} from "./prompts.ts";
|
|
23
23
|
import type { SubagentExtensionOptions, SubagentProgressNode, SubagentToolDetails, SubagentType } from "./types.ts";
|
|
24
24
|
|
|
25
|
-
const DEFAULT_MAX_DEPTH = 2;
|
|
26
25
|
const DEFAULT_MAX_WIDTH = 4;
|
|
27
26
|
const ALLOWED_SUBAGENTS: SubagentType[] = ["general-purpose", "explorer"];
|
|
28
27
|
|
|
@@ -43,8 +42,6 @@ const agentToolParameters = Type.Object({
|
|
|
43
42
|
type AgentToolParams = Static<typeof agentToolParameters>;
|
|
44
43
|
|
|
45
44
|
interface DelegationState {
|
|
46
|
-
depth: number;
|
|
47
|
-
maxDepth: number;
|
|
48
45
|
maxWidth: number;
|
|
49
46
|
childCount: number;
|
|
50
47
|
progressEnabled: boolean;
|
|
@@ -57,25 +54,21 @@ interface CreateAgentToolOptions {
|
|
|
57
54
|
type AgentToolResult = ReturnType<typeof textResult>;
|
|
58
55
|
|
|
59
56
|
const MAX_ACTIVITY_LINES = 3;
|
|
57
|
+
const ACTIVITY_DISPLAY_PREVIEW_CHARS = 120;
|
|
60
58
|
const PROGRESS_UPDATE_INTERVAL_MS = 250;
|
|
61
59
|
const PROGRESS_STATUSES: SubagentProgressNode["status"][] = ["running", "completed", "rejected", "error"];
|
|
62
60
|
|
|
63
|
-
function getCliMode(argv = process.argv): string | undefined {
|
|
64
|
-
for (let i = 0; i < argv.length; i++) {
|
|
65
|
-
const arg = argv[i];
|
|
66
|
-
if (arg === "--mode") {
|
|
67
|
-
return argv[i + 1];
|
|
68
|
-
}
|
|
69
|
-
if (arg.startsWith("--mode=")) {
|
|
70
|
-
return arg.slice("--mode=".length);
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
return undefined;
|
|
74
|
-
}
|
|
75
|
-
|
|
76
61
|
function shouldEnableProgress(ctx: ExtensionContext): boolean {
|
|
77
|
-
|
|
78
|
-
|
|
62
|
+
if (!ctx.hasUI) {
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
try {
|
|
66
|
+
// RPC exposes ExtensionUIContext but has no TUI theme surface. Keep compact
|
|
67
|
+
// progress updates limited to the interactive TUI renderer.
|
|
68
|
+
return ctx.ui.getAllThemes().length > 0;
|
|
69
|
+
} catch {
|
|
70
|
+
return false;
|
|
71
|
+
}
|
|
79
72
|
}
|
|
80
73
|
|
|
81
74
|
function normalizeLimit(value: number | undefined, fallback: number, label: string): number {
|
|
@@ -119,40 +112,28 @@ function isSubagentProgressNode(value: unknown): value is SubagentProgressNode {
|
|
|
119
112
|
typeof value.id === "string" &&
|
|
120
113
|
typeof value.description === "string" &&
|
|
121
114
|
(subagentType === "unknown" || ALLOWED_SUBAGENTS.includes(subagentType as SubagentType)) &&
|
|
122
|
-
Number.isFinite(value.depth) &&
|
|
123
115
|
isProgressStatus(value.status) &&
|
|
124
116
|
Number.isFinite(value.startedAt) &&
|
|
125
117
|
Array.isArray(value.activity) &&
|
|
126
118
|
value.activity.every((line) => typeof line === "string") &&
|
|
127
|
-
Number.isFinite(value.activityCount)
|
|
128
|
-
Array.isArray(value.children)
|
|
119
|
+
Number.isFinite(value.activityCount)
|
|
129
120
|
);
|
|
130
121
|
}
|
|
131
122
|
|
|
132
|
-
function getProgressFromToolResult(result: unknown): SubagentProgressNode | undefined {
|
|
133
|
-
if (!isRecord(result) || !isRecord(result.details) || !("subagentType" in result.details)) {
|
|
134
|
-
return undefined;
|
|
135
|
-
}
|
|
136
|
-
return isSubagentProgressNode(result.details.progress) ? result.details.progress : undefined;
|
|
137
|
-
}
|
|
138
|
-
|
|
139
123
|
function createProgressNode(
|
|
140
124
|
id: string,
|
|
141
125
|
params: AgentToolParams,
|
|
142
126
|
subagentType: SubagentType,
|
|
143
|
-
depth: number,
|
|
144
127
|
status: SubagentProgressNode["status"] = "running",
|
|
145
128
|
): SubagentProgressNode {
|
|
146
129
|
return {
|
|
147
130
|
id,
|
|
148
131
|
description: params.description,
|
|
149
132
|
subagentType,
|
|
150
|
-
depth,
|
|
151
133
|
status,
|
|
152
134
|
startedAt: Date.now(),
|
|
153
135
|
activity: [],
|
|
154
136
|
activityCount: 0,
|
|
155
|
-
children: [],
|
|
156
137
|
};
|
|
157
138
|
}
|
|
158
139
|
|
|
@@ -233,18 +214,10 @@ function updateProgressFromEvent(progress: SubagentProgressNode, event: AgentSes
|
|
|
233
214
|
}
|
|
234
215
|
|
|
235
216
|
if (event.type === "tool_execution_update") {
|
|
236
|
-
const childProgress = event.toolName === "Agent" ? getProgressFromToolResult(event.partialResult) : undefined;
|
|
237
|
-
if (childProgress) {
|
|
238
|
-
progress.children = mergeChildProgress(progress.children, childProgress);
|
|
239
|
-
}
|
|
240
217
|
return;
|
|
241
218
|
}
|
|
242
219
|
|
|
243
220
|
if (event.type === "tool_execution_end") {
|
|
244
|
-
const childProgress = event.toolName === "Agent" ? getProgressFromToolResult(event.result) : undefined;
|
|
245
|
-
if (childProgress) {
|
|
246
|
-
progress.children = mergeChildProgress(progress.children, childProgress);
|
|
247
|
-
}
|
|
248
221
|
return;
|
|
249
222
|
}
|
|
250
223
|
|
|
@@ -270,19 +243,6 @@ function updateProgressFromEvent(progress: SubagentProgressNode, event: AgentSes
|
|
|
270
243
|
}
|
|
271
244
|
}
|
|
272
245
|
|
|
273
|
-
function mergeChildProgress(
|
|
274
|
-
children: SubagentProgressNode[],
|
|
275
|
-
child: SubagentProgressNode,
|
|
276
|
-
): SubagentProgressNode[] {
|
|
277
|
-
const index = children.findIndex((candidate) => candidate.id === child.id);
|
|
278
|
-
if (index === -1) {
|
|
279
|
-
return [...children, child];
|
|
280
|
-
}
|
|
281
|
-
const next = children.slice();
|
|
282
|
-
next[index] = child;
|
|
283
|
-
return next;
|
|
284
|
-
}
|
|
285
|
-
|
|
286
246
|
function getDisplayLabel(subagentType: SubagentType | "unknown"): string {
|
|
287
247
|
return subagentType;
|
|
288
248
|
}
|
|
@@ -308,18 +268,21 @@ function formatDuration(ms: number): string {
|
|
|
308
268
|
return `${seconds}s`;
|
|
309
269
|
}
|
|
310
270
|
|
|
311
|
-
function
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
271
|
+
function formatActivityLineForDisplay(line: string): string {
|
|
272
|
+
if (line.length <= ACTIVITY_DISPLAY_PREVIEW_CHARS) {
|
|
273
|
+
return line;
|
|
274
|
+
}
|
|
275
|
+
const hiddenChars = line.length - ACTIVITY_DISPLAY_PREVIEW_CHARS;
|
|
276
|
+
return `${line.slice(0, ACTIVITY_DISPLAY_PREVIEW_CHARS).trimEnd()} ... (+${hiddenChars} chars)`;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
function renderProgressNode(node: SubagentProgressNode, theme: Theme): Container {
|
|
316
280
|
const container = new Container();
|
|
317
|
-
const indent = " ".repeat(depth);
|
|
318
281
|
const status = node.status === "completed" ? "done" : node.status;
|
|
319
282
|
const elapsed = formatDuration((node.endedAt ?? Date.now()) - node.startedAt);
|
|
320
283
|
container.addChild(
|
|
321
284
|
new Text(
|
|
322
|
-
`${
|
|
285
|
+
`${theme.bold(formatProgressTitle(node))} ${theme.fg("dim", `${status} ${elapsed}`)}`,
|
|
323
286
|
0,
|
|
324
287
|
0,
|
|
325
288
|
),
|
|
@@ -327,17 +290,14 @@ function renderProgressNode(
|
|
|
327
290
|
|
|
328
291
|
const skipped = node.activityCount - node.activity.length;
|
|
329
292
|
if (skipped > 0) {
|
|
330
|
-
container.addChild(new Text(
|
|
293
|
+
container.addChild(new Text(` ${theme.fg("muted", `... +${skipped} earlier events`)}`, 0, 0));
|
|
331
294
|
}
|
|
332
295
|
for (const line of node.activity) {
|
|
333
|
-
container.addChild(new
|
|
296
|
+
container.addChild(new TruncatedText(` ${theme.fg("muted", formatActivityLineForDisplay(line))}`, 0, 0));
|
|
334
297
|
}
|
|
335
298
|
|
|
336
|
-
for (const child of node.children) {
|
|
337
|
-
container.addChild(renderProgressNode(child, theme, depth + 1));
|
|
338
|
-
}
|
|
339
299
|
if (node.error) {
|
|
340
|
-
container.addChild(new Text(
|
|
300
|
+
container.addChild(new Text(` ${theme.fg("error", node.error)}`, 0, 0));
|
|
341
301
|
}
|
|
342
302
|
|
|
343
303
|
return container;
|
|
@@ -372,15 +332,13 @@ async function runSubagent(
|
|
|
372
332
|
ctx: ExtensionContext,
|
|
373
333
|
onProgress: ((result: AgentToolResult) => void) | undefined,
|
|
374
334
|
): Promise<ReturnType<typeof textResult>> {
|
|
375
|
-
const progressDepth = state.depth + 1;
|
|
376
335
|
const progress =
|
|
377
|
-
state.progressEnabled ? createProgressNode(toolCallId, params, subagentType
|
|
336
|
+
state.progressEnabled ? createProgressNode(toolCallId, params, subagentType) : undefined;
|
|
378
337
|
|
|
379
338
|
if (!ctx.model) {
|
|
380
339
|
return textResult("Cannot launch subagent: no model is selected.", {
|
|
381
340
|
description: params.description,
|
|
382
341
|
subagentType,
|
|
383
|
-
depth: progressDepth,
|
|
384
342
|
status: "rejected",
|
|
385
343
|
error: "No model is selected",
|
|
386
344
|
...(progress ? { progress: { ...progress, status: "rejected", error: "No model is selected" } } : {}),
|
|
@@ -392,25 +350,19 @@ async function runSubagent(
|
|
|
392
350
|
const settingsManager = SettingsManager.create(cwd, agentDir);
|
|
393
351
|
const appendPrompts = [
|
|
394
352
|
getPresetAppendPrompt(subagentType),
|
|
395
|
-
buildCoordinatorPrompt(state.maxDepth, state.maxWidth),
|
|
396
353
|
].filter((prompt): prompt is string => Boolean(prompt));
|
|
397
354
|
const resourceLoader = new DefaultResourceLoader({
|
|
398
355
|
cwd,
|
|
399
356
|
agentDir,
|
|
400
357
|
settingsManager,
|
|
401
|
-
|
|
358
|
+
extensionsOverride: (base) => ({
|
|
359
|
+
...base,
|
|
360
|
+
extensions: base.extensions.filter((extension) => !extension.tools.has("Agent")),
|
|
361
|
+
}),
|
|
362
|
+
appendSystemPromptOverride: (base) => [...base, ...appendPrompts],
|
|
402
363
|
});
|
|
403
364
|
await resourceLoader.reload();
|
|
404
365
|
|
|
405
|
-
const childState: DelegationState = {
|
|
406
|
-
depth: progressDepth,
|
|
407
|
-
maxDepth: state.maxDepth,
|
|
408
|
-
maxWidth: state.maxWidth,
|
|
409
|
-
childCount: 0,
|
|
410
|
-
progressEnabled: state.progressEnabled,
|
|
411
|
-
};
|
|
412
|
-
|
|
413
|
-
const nestedAgentTool = createAgentTool(childState, options);
|
|
414
366
|
const { session } = await createAgentSession({
|
|
415
367
|
cwd,
|
|
416
368
|
agentDir,
|
|
@@ -420,7 +372,7 @@ async function runSubagent(
|
|
|
420
372
|
settingsManager,
|
|
421
373
|
sessionManager: SessionManager.inMemory(cwd),
|
|
422
374
|
resourceLoader,
|
|
423
|
-
|
|
375
|
+
excludeTools: ["Agent"],
|
|
424
376
|
});
|
|
425
377
|
|
|
426
378
|
let abortHandler: (() => void) | undefined;
|
|
@@ -428,7 +380,9 @@ async function runSubagent(
|
|
|
428
380
|
abortHandler = () => {
|
|
429
381
|
void session.abort();
|
|
430
382
|
};
|
|
431
|
-
signal.
|
|
383
|
+
if (!signal.aborted) {
|
|
384
|
+
signal.addEventListener("abort", abortHandler, { once: true });
|
|
385
|
+
}
|
|
432
386
|
}
|
|
433
387
|
|
|
434
388
|
let lastProgressEmit = 0;
|
|
@@ -445,7 +399,6 @@ async function runSubagent(
|
|
|
445
399
|
onProgress(textResult(`Subagent "${params.description}" (${subagentType}) is running.`, {
|
|
446
400
|
description: params.description,
|
|
447
401
|
subagentType,
|
|
448
|
-
depth: progressDepth,
|
|
449
402
|
status: progress.status,
|
|
450
403
|
result: progress.result,
|
|
451
404
|
error: progress.error,
|
|
@@ -474,7 +427,13 @@ async function runSubagent(
|
|
|
474
427
|
: undefined;
|
|
475
428
|
|
|
476
429
|
try {
|
|
430
|
+
if (signal?.aborted) {
|
|
431
|
+
throw new Error("Subagent aborted before prompt start");
|
|
432
|
+
}
|
|
477
433
|
await session.bindExtensions({});
|
|
434
|
+
if (signal?.aborted) {
|
|
435
|
+
throw new Error("Subagent aborted before prompt start");
|
|
436
|
+
}
|
|
478
437
|
emitProgress();
|
|
479
438
|
await session.prompt(params.prompt, { source: "extension" });
|
|
480
439
|
const result = extractFinalAssistantText(session.messages) || "(no final text output)";
|
|
@@ -486,7 +445,6 @@ async function runSubagent(
|
|
|
486
445
|
return textResult(`Subagent "${params.description}" (${subagentType}) completed:\n\n${result}`, {
|
|
487
446
|
description: params.description,
|
|
488
447
|
subagentType,
|
|
489
|
-
depth: childState.depth,
|
|
490
448
|
status: "completed",
|
|
491
449
|
result,
|
|
492
450
|
...(progress ? { progress } : {}),
|
|
@@ -501,7 +459,6 @@ async function runSubagent(
|
|
|
501
459
|
return textResult(`Subagent "${params.description}" (${subagentType}) failed: ${message}`, {
|
|
502
460
|
description: params.description,
|
|
503
461
|
subagentType,
|
|
504
|
-
depth: childState.depth,
|
|
505
462
|
status: "error",
|
|
506
463
|
error: message,
|
|
507
464
|
...(progress ? { progress } : {}),
|
|
@@ -542,33 +499,18 @@ function createAgentTool(
|
|
|
542
499
|
{
|
|
543
500
|
description: params.description,
|
|
544
501
|
subagentType: "unknown",
|
|
545
|
-
depth: effectiveState.depth + 1,
|
|
546
502
|
status: "rejected",
|
|
547
503
|
error: "Unknown subagent_type",
|
|
548
504
|
},
|
|
549
505
|
);
|
|
550
506
|
}
|
|
551
507
|
|
|
552
|
-
if (effectiveState.depth >= effectiveState.maxDepth) {
|
|
553
|
-
return textResult(
|
|
554
|
-
`Maximum subagent depth reached. Current depth: ${effectiveState.depth}; maxDepth: ${effectiveState.maxDepth}.`,
|
|
555
|
-
{
|
|
556
|
-
description: params.description,
|
|
557
|
-
subagentType,
|
|
558
|
-
depth: effectiveState.depth + 1,
|
|
559
|
-
status: "rejected",
|
|
560
|
-
error: "Maximum subagent depth reached",
|
|
561
|
-
},
|
|
562
|
-
);
|
|
563
|
-
}
|
|
564
|
-
|
|
565
508
|
if (state.childCount >= effectiveState.maxWidth) {
|
|
566
509
|
return textResult(
|
|
567
510
|
`Maximum subagent width reached for this agent run. maxWidth: ${effectiveState.maxWidth}.`,
|
|
568
511
|
{
|
|
569
512
|
description: params.description,
|
|
570
513
|
subagentType,
|
|
571
|
-
depth: effectiveState.depth + 1,
|
|
572
514
|
status: "rejected",
|
|
573
515
|
error: "Maximum subagent width reached",
|
|
574
516
|
},
|
|
@@ -600,7 +542,7 @@ function createAgentTool(
|
|
|
600
542
|
);
|
|
601
543
|
},
|
|
602
544
|
renderResult(result, _options, theme) {
|
|
603
|
-
const details = result.details;
|
|
545
|
+
const details = result.details as SubagentToolDetails;
|
|
604
546
|
if (details.progress) {
|
|
605
547
|
return renderProgressNode(details.progress, theme);
|
|
606
548
|
}
|
|
@@ -614,13 +556,10 @@ function createAgentTool(
|
|
|
614
556
|
}
|
|
615
557
|
|
|
616
558
|
export function createSubagentExtension(options: SubagentExtensionOptions = {}): ExtensionFactory {
|
|
617
|
-
const maxDepth = normalizeLimit(options.maxDepth, DEFAULT_MAX_DEPTH, "maxDepth");
|
|
618
559
|
const maxWidth = normalizeLimit(options.maxWidth, DEFAULT_MAX_WIDTH, "maxWidth");
|
|
619
560
|
|
|
620
561
|
return function subagentExtension(pi: ExtensionAPI) {
|
|
621
562
|
const rootState: DelegationState = {
|
|
622
|
-
depth: 0,
|
|
623
|
-
maxDepth,
|
|
624
563
|
maxWidth,
|
|
625
564
|
childCount: 0,
|
|
626
565
|
progressEnabled: false,
|
|
@@ -632,9 +571,12 @@ export function createSubagentExtension(options: SubagentExtensionOptions = {}):
|
|
|
632
571
|
pi.registerTool(createAgentTool(rootState, toolOptions));
|
|
633
572
|
|
|
634
573
|
pi.on("before_agent_start", (event) => {
|
|
574
|
+
if (!pi.getAllTools().some((tool) => tool.name === "Agent")) {
|
|
575
|
+
return;
|
|
576
|
+
}
|
|
635
577
|
rootState.childCount = 0;
|
|
636
578
|
return {
|
|
637
|
-
systemPrompt: `${event.systemPrompt}\n\n${buildCoordinatorPrompt(
|
|
579
|
+
systemPrompt: `${event.systemPrompt}\n\n${buildCoordinatorPrompt()}`,
|
|
638
580
|
};
|
|
639
581
|
});
|
|
640
582
|
};
|
package/src/prompts.ts
CHANGED
|
@@ -19,6 +19,7 @@ export const AGENT_PROMPT_GUIDELINES = [
|
|
|
19
19
|
"If the user asks to explore or survey a repo, use explorer to produce a concise map before doing detailed follow-up yourself.",
|
|
20
20
|
"If the user asks for parallel work, launch multiple Agent calls in the same assistant response.",
|
|
21
21
|
"Write self-contained subagent prompts: fresh subagents do not inherit parent conversation, tool results, or reasoning.",
|
|
22
|
+
"Subagents cannot launch Agent themselves; coordinate any follow-up delegation from the main conversation after a result returns.",
|
|
22
23
|
"Clearly tell the subagent whether you expect read-only research or code changes.",
|
|
23
24
|
"The Agent final message is returned to you as the tool result and is not shown to the user; relay what matters.",
|
|
24
25
|
];
|
|
@@ -47,17 +48,24 @@ Adapt your search breadth to the caller's prompt. For targeted lookups, be fast
|
|
|
47
48
|
Return a concise final report with the relevant files, symbols, and caveats. Do not create documentation files.`;
|
|
48
49
|
}
|
|
49
50
|
|
|
50
|
-
export function buildCoordinatorPrompt(
|
|
51
|
+
export function buildCoordinatorPrompt(): string {
|
|
51
52
|
return `# Subagent Delegation
|
|
52
53
|
|
|
53
54
|
Available agents:
|
|
54
55
|
${formatAvailableAgents(PRESET_DESCRIPTIONS)}
|
|
55
56
|
|
|
56
|
-
Use Agent
|
|
57
|
+
Use Agent when a specialized agent matches the task, the work can run independently, or delegating would keep large search/read output out of the main context.
|
|
58
|
+
|
|
59
|
+
Guidelines:
|
|
60
|
+
- Do not use subagents excessively; direct lookup is better when the target file, symbol, or value is already known.
|
|
61
|
+
- If the user asks for parallel work, launch independent Agent calls in the same assistant response.
|
|
62
|
+
- Subagents start fresh and do not inherit parent messages, tool results, or reasoning. Brief them with all needed context.
|
|
63
|
+
- Subagents cannot launch other subagents. Coordinate follow-up delegation from the main conversation after each result returns.
|
|
64
|
+
- The Agent final message is returned to you as the tool result. Relay what matters to the user.
|
|
57
65
|
|
|
58
66
|
Example usage:
|
|
59
67
|
- User asks "explore this repo": use Agent with subagent_type "explorer" and ask it to map the project purpose, key directories, important files, scripts, tests, and caveats without editing files.
|
|
60
68
|
- User asks for a second opinion on a risky change: use Agent with subagent_type "general-purpose" and give it enough context to review independently.
|
|
61
69
|
|
|
62
|
-
|
|
70
|
+
Root-level parallel delegation is bounded by the extension. If the limit is reached, the Agent tool will reject the call.`;
|
|
63
71
|
}
|
package/src/types.ts
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
export type SubagentType = "general-purpose" | "explorer";
|
|
2
2
|
|
|
3
3
|
export interface SubagentExtensionOptions {
|
|
4
|
-
maxDepth?: number;
|
|
5
4
|
maxWidth?: number;
|
|
6
5
|
}
|
|
7
6
|
|
|
@@ -9,13 +8,11 @@ export interface SubagentProgressNode {
|
|
|
9
8
|
id: string;
|
|
10
9
|
description: string;
|
|
11
10
|
subagentType: SubagentType | "unknown";
|
|
12
|
-
depth: number;
|
|
13
11
|
status: "running" | "completed" | "rejected" | "error";
|
|
14
12
|
startedAt: number;
|
|
15
13
|
endedAt?: number;
|
|
16
14
|
activity: string[];
|
|
17
15
|
activityCount: number;
|
|
18
|
-
children: SubagentProgressNode[];
|
|
19
16
|
result?: string;
|
|
20
17
|
error?: string;
|
|
21
18
|
}
|
|
@@ -23,7 +20,6 @@ export interface SubagentProgressNode {
|
|
|
23
20
|
export interface SubagentToolDetails {
|
|
24
21
|
description: string;
|
|
25
22
|
subagentType: SubagentType | "unknown";
|
|
26
|
-
depth: number;
|
|
27
23
|
status: "running" | "completed" | "rejected" | "error";
|
|
28
24
|
result?: string;
|
|
29
25
|
error?: string;
|