@phi-code-admin/phi-code 0.72.0 → 0.74.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents/test.md +3 -0
- package/extensions/phi/memory.ts +2 -1
- package/extensions/phi/orchestrator.ts +80 -470
- package/package.json +1 -1
package/agents/test.md
CHANGED
|
@@ -33,6 +33,9 @@ Use implementation results to know which files were created/modified and what be
|
|
|
33
33
|
- **Realistic assertions**: Test what matters, not trivial details
|
|
34
34
|
- **Match conventions**: Use the project's test framework, directory structure, and naming patterns
|
|
35
35
|
- **Clean test code**: Tests are documentation — use descriptive names that explain expected behavior
|
|
36
|
+
- Prefer targeted `edit` calls over full file rewrites. When a test fails, fix ONLY the failing test function, not the entire file
|
|
37
|
+
- Maximum 1 full file rewrite per test file. After that, use `edit` for surgical fixes
|
|
38
|
+
- When debugging test failures: read the error → locate the exact failing assertion → fix that specific line
|
|
36
39
|
|
|
37
40
|
## Test Writing
|
|
38
41
|
|
package/extensions/phi/memory.ts
CHANGED
|
@@ -43,9 +43,10 @@ export default function memoryExtension(pi: ExtensionAPI) {
|
|
|
43
43
|
description: "Search for content in memory using unified search (notes + ontology + vector search)",
|
|
44
44
|
promptSnippet: "Search project memory (notes, ontology, vector search). ALWAYS call before answering questions about prior work, decisions, or project context.",
|
|
45
45
|
promptGuidelines: [
|
|
46
|
-
"Before
|
|
46
|
+
"MANDATORY: Before starting ANY task, call memory_search with relevant keywords. This is not optional.",
|
|
47
47
|
"When starting work on a topic, search memory for existing notes and learnings.",
|
|
48
48
|
"After completing important work or learning something new, use memory_write to save it.",
|
|
49
|
+
"MANDATORY: After completing any significant work, call memory_write to save what you did and what you learned.",
|
|
49
50
|
"When a command fails or produces an unexpected error, document the error and fix in memory_write (self-improvement).",
|
|
50
51
|
"When the user corrects you, save the correction in memory_write so you never repeat the mistake.",
|
|
51
52
|
"After a significant debugging session, write a summary of root cause and solution to memory.",
|
|
@@ -16,41 +16,12 @@
|
|
|
16
16
|
* /plans — List plans and their execution status
|
|
17
17
|
*/
|
|
18
18
|
|
|
19
|
-
import { Type } from "@sinclair/typebox";
|
|
20
19
|
import type { ExtensionAPI } from "phi-code";
|
|
21
20
|
import { writeFile, mkdir, readdir, readFile } from "node:fs/promises";
|
|
22
21
|
import { join } from "node:path";
|
|
23
22
|
import { existsSync, readFileSync } from "node:fs";
|
|
24
|
-
// execFile removed — tasks now execute in-session, no subprocess
|
|
25
23
|
import { homedir } from "node:os";
|
|
26
24
|
|
|
27
|
-
// ─── Types ───────────────────────────────────────────────────────────────
|
|
28
|
-
|
|
29
|
-
interface TaskDef {
|
|
30
|
-
title: string;
|
|
31
|
-
description: string;
|
|
32
|
-
agent?: string;
|
|
33
|
-
priority?: string;
|
|
34
|
-
dependencies?: number[];
|
|
35
|
-
subtasks?: string[];
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
interface TaskResult {
|
|
39
|
-
taskIndex: number;
|
|
40
|
-
title: string;
|
|
41
|
-
agent: string;
|
|
42
|
-
status: "success" | "error" | "skipped";
|
|
43
|
-
output: string;
|
|
44
|
-
durationMs: number;
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
interface AgentDef {
|
|
48
|
-
name: string;
|
|
49
|
-
description: string;
|
|
50
|
-
tools: string;
|
|
51
|
-
systemPrompt: string;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
25
|
// ─── Extension ───────────────────────────────────────────────────────────
|
|
55
26
|
|
|
56
27
|
export default function orchestratorExtension(pi: ExtensionAPI) {
|
|
@@ -64,390 +35,6 @@ export default function orchestratorExtension(pi: ExtensionAPI) {
|
|
|
64
35
|
return new Date().toISOString().replace(/[:.]/g, "-").replace("T", "_").slice(0, 19);
|
|
65
36
|
}
|
|
66
37
|
|
|
67
|
-
// ─── Agent Discovery ─────────────────────────────────────────────
|
|
68
|
-
|
|
69
|
-
function loadAgentDefs(): Map<string, AgentDef> {
|
|
70
|
-
const agents = new Map<string, AgentDef>();
|
|
71
|
-
const dirs = [
|
|
72
|
-
join(process.cwd(), ".phi", "agents"),
|
|
73
|
-
join(homedir(), ".phi", "agent", "agents"),
|
|
74
|
-
join(__dirname, "..", "..", "..", "agents"),
|
|
75
|
-
];
|
|
76
|
-
|
|
77
|
-
for (const dir of dirs) {
|
|
78
|
-
if (!existsSync(dir)) continue;
|
|
79
|
-
try {
|
|
80
|
-
const files = require("fs").readdirSync(dir) as string[];
|
|
81
|
-
for (const file of files) {
|
|
82
|
-
if (!file.endsWith(".md")) continue;
|
|
83
|
-
const name = file.replace(".md", "");
|
|
84
|
-
if (agents.has(name)) continue;
|
|
85
|
-
|
|
86
|
-
try {
|
|
87
|
-
const content = readFileSync(join(dir, file), "utf-8");
|
|
88
|
-
const fmMatch = content.match(/^---\s*\n([\s\S]*?)\n---\s*\n([\s\S]*)$/);
|
|
89
|
-
if (!fmMatch) continue;
|
|
90
|
-
|
|
91
|
-
const frontmatter = fmMatch[1];
|
|
92
|
-
const body = fmMatch[2].trim();
|
|
93
|
-
const desc = frontmatter.match(/description:\s*(.+)/)?.[1] || "";
|
|
94
|
-
const tools = frontmatter.match(/tools:\s*(.+)/)?.[1] || "";
|
|
95
|
-
|
|
96
|
-
agents.set(name, { name, description: desc, tools, systemPrompt: body });
|
|
97
|
-
} catch { /* skip */ }
|
|
98
|
-
}
|
|
99
|
-
} catch { /* skip */ }
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
return agents;
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
function resolveAgentModel(agentType: string): string | null {
|
|
106
|
-
const routingPath = join(homedir(), ".phi", "agent", "routing.json");
|
|
107
|
-
try {
|
|
108
|
-
const config = JSON.parse(readFileSync(routingPath, "utf-8"));
|
|
109
|
-
for (const [_cat, route] of Object.entries(config.routes || {})) {
|
|
110
|
-
const r = route as any;
|
|
111
|
-
if (r.agent === agentType) return r.preferredModel || null;
|
|
112
|
-
}
|
|
113
|
-
// Map agent type to route category
|
|
114
|
-
const categoryMap: Record<string, string> = {
|
|
115
|
-
code: "code", explore: "explore", plan: "plan",
|
|
116
|
-
test: "test", review: "review", debug: "debug",
|
|
117
|
-
};
|
|
118
|
-
const category = categoryMap[agentType];
|
|
119
|
-
if (category && config.routes?.[category]) {
|
|
120
|
-
return config.routes[category].preferredModel || null;
|
|
121
|
-
}
|
|
122
|
-
return config.default?.model || null;
|
|
123
|
-
} catch {
|
|
124
|
-
return null;
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
function findPhiBinary(): string {
|
|
129
|
-
// Try the bundled CLI relative to extensions dir
|
|
130
|
-
const bundledCli = join(__dirname, "..", "..", "..", "dist", "cli.js");
|
|
131
|
-
if (existsSync(bundledCli)) return bundledCli;
|
|
132
|
-
|
|
133
|
-
// Try npm global install paths
|
|
134
|
-
const npmGlobalPaths = [
|
|
135
|
-
join(homedir(), "AppData", "Roaming", "npm", "node_modules", "@phi-code-admin", "phi-code", "dist", "cli.js"), // Windows
|
|
136
|
-
join(homedir(), ".npm-global", "lib", "node_modules", "@phi-code-admin", "phi-code", "dist", "cli.js"), // Linux custom
|
|
137
|
-
"/usr/local/lib/node_modules/@phi-code-admin/phi-code/dist/cli.js", // Linux/Mac default
|
|
138
|
-
"/usr/lib/node_modules/@phi-code-admin/phi-code/dist/cli.js", // Some Linux
|
|
139
|
-
];
|
|
140
|
-
for (const p of npmGlobalPaths) {
|
|
141
|
-
if (existsSync(p)) return p;
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
// Try `which phi` (Linux/Mac) or `where phi` (Windows)
|
|
145
|
-
try {
|
|
146
|
-
const isWin = process.platform === "win32";
|
|
147
|
-
const cmd = isWin ? "where" : "which";
|
|
148
|
-
const result = require("child_process").execSync(`${cmd} phi 2>${isWin ? "NUL" : "/dev/null"}`, { encoding: "utf-8" }).trim();
|
|
149
|
-
if (result) {
|
|
150
|
-
const firstLine = result.split("\n")[0].trim();
|
|
151
|
-
// On Windows, `where phi` returns the .cmd shim; we need the actual JS
|
|
152
|
-
if (isWin && firstLine.endsWith(".cmd")) {
|
|
153
|
-
const npmPrefix = require("child_process").execSync("npm prefix -g", { encoding: "utf-8" }).trim();
|
|
154
|
-
const jsPath = join(npmPrefix, "node_modules", "@phi-code-admin", "phi-code", "dist", "cli.js");
|
|
155
|
-
if (existsSync(jsPath)) return jsPath;
|
|
156
|
-
}
|
|
157
|
-
return firstLine;
|
|
158
|
-
}
|
|
159
|
-
} catch { /* not in PATH */ }
|
|
160
|
-
|
|
161
|
-
// Last resort: assume phi is in PATH (works with shell:true on Windows)
|
|
162
|
-
return "phi";
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
// ─── Task Execution (in-session, no subprocess) ─────────────────
|
|
166
|
-
|
|
167
|
-
/**
|
|
168
|
-
* Execute a task by sending it as a user message to the current session.
|
|
169
|
-
* The LLM handles it directly — no subprocess spawning, no cold boot.
|
|
170
|
-
* Much faster and more reliable than spawning phi --print processes.
|
|
171
|
-
*/
|
|
172
|
-
function executeTaskInSession(
|
|
173
|
-
task: TaskDef,
|
|
174
|
-
sharedContext: {
|
|
175
|
-
projectTitle: string;
|
|
176
|
-
projectDescription: string;
|
|
177
|
-
specSummary: string;
|
|
178
|
-
completedTasks: Array<{ index: number; title: string; agent: string; output: string }>;
|
|
179
|
-
},
|
|
180
|
-
): { taskPrompt: string } {
|
|
181
|
-
const agentType = task.agent || "code";
|
|
182
|
-
|
|
183
|
-
// Build prompt with shared context
|
|
184
|
-
let taskPrompt = `## 🔧 Task: ${task.title} [${agentType}]\n\n`;
|
|
185
|
-
|
|
186
|
-
taskPrompt += `**Project:** ${sharedContext.projectTitle}\n\n`;
|
|
187
|
-
|
|
188
|
-
if (sharedContext.specSummary) {
|
|
189
|
-
taskPrompt += `**Spec:** ${sharedContext.specSummary}\n\n`;
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
// Inject results from dependency tasks
|
|
193
|
-
const deps = task.dependencies || [];
|
|
194
|
-
if (deps.length > 0) {
|
|
195
|
-
const depResults = sharedContext.completedTasks.filter(ct => deps.includes(ct.index));
|
|
196
|
-
if (depResults.length > 0) {
|
|
197
|
-
taskPrompt += `**Previous results:**\n`;
|
|
198
|
-
for (const dep of depResults) {
|
|
199
|
-
const truncated = dep.output.length > 500 ? dep.output.slice(0, 500) + "..." : dep.output;
|
|
200
|
-
taskPrompt += `- Task ${dep.index} (${dep.title}): ${truncated}\n`;
|
|
201
|
-
}
|
|
202
|
-
taskPrompt += "\n";
|
|
203
|
-
}
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
// The actual task
|
|
207
|
-
taskPrompt += `### What to do\n\n${task.description}\n`;
|
|
208
|
-
if (task.subtasks && task.subtasks.length > 0) {
|
|
209
|
-
taskPrompt += "\n**Sub-tasks:**\n" + task.subtasks.map((st, i) => `${i + 1}. ${st}`).join("\n") + "\n";
|
|
210
|
-
}
|
|
211
|
-
taskPrompt += `\n**Instructions:** Execute this task completely. Create/edit all necessary files. Report what you did.\n`;
|
|
212
|
-
|
|
213
|
-
return { taskPrompt };
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
// ─── Execute All Tasks (parallel with dependency resolution) ─────
|
|
217
|
-
|
|
218
|
-
async function executePlan(
|
|
219
|
-
tasks: TaskDef[],
|
|
220
|
-
todoFile: string,
|
|
221
|
-
notify: (msg: string, type: "info" | "error" | "warning") => void,
|
|
222
|
-
projectContext?: { title: string; description: string; specSummary: string },
|
|
223
|
-
): Promise<{ results: TaskResult[]; progressFile: string }> {
|
|
224
|
-
const progressFile = todoFile.replace("todo-", "progress-");
|
|
225
|
-
const progressPath = join(plansDir, progressFile);
|
|
226
|
-
const totalTasks = tasks.length;
|
|
227
|
-
|
|
228
|
-
const sharedContext = {
|
|
229
|
-
projectTitle: projectContext?.title || "Project",
|
|
230
|
-
projectDescription: projectContext?.description || "",
|
|
231
|
-
specSummary: projectContext?.specSummary || "",
|
|
232
|
-
completedTasks: [] as Array<{ index: number; title: string; agent: string; output: string }>,
|
|
233
|
-
};
|
|
234
|
-
|
|
235
|
-
notify(`🚀 Executing ${totalTasks} tasks in-session...`, "info");
|
|
236
|
-
|
|
237
|
-
// Build a single comprehensive prompt with ALL tasks
|
|
238
|
-
let megaPrompt = `# 📋 Project: ${sharedContext.projectTitle}\n\n`;
|
|
239
|
-
megaPrompt += `${sharedContext.projectDescription}\n\n`;
|
|
240
|
-
if (sharedContext.specSummary) {
|
|
241
|
-
megaPrompt += `## Spec\n${sharedContext.specSummary}\n\n`;
|
|
242
|
-
}
|
|
243
|
-
megaPrompt += `## Tasks (execute ALL in order)\n\n`;
|
|
244
|
-
|
|
245
|
-
const results: TaskResult[] = [];
|
|
246
|
-
|
|
247
|
-
for (let i = 0; i < tasks.length; i++) {
|
|
248
|
-
const task = tasks[i];
|
|
249
|
-
const { taskPrompt } = executeTaskInSession(task, sharedContext);
|
|
250
|
-
megaPrompt += `---\n\n${taskPrompt}\n\n`;
|
|
251
|
-
results.push({
|
|
252
|
-
taskIndex: i + 1, title: task.title,
|
|
253
|
-
agent: task.agent || "code", status: "success",
|
|
254
|
-
output: "(in-session)", durationMs: 0,
|
|
255
|
-
});
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
megaPrompt += `---\n\n## ⚠️ Instructions\n\n`;
|
|
259
|
-
megaPrompt += `Execute ALL ${totalTasks} tasks above **sequentially**. For each task:\n`;
|
|
260
|
-
megaPrompt += `1. Create/edit the required files using your tools\n`;
|
|
261
|
-
megaPrompt += `2. Report what you did briefly\n`;
|
|
262
|
-
megaPrompt += `3. Move to the next task\n\n`;
|
|
263
|
-
megaPrompt += `Do NOT skip any task. Complete the entire project.\n`;
|
|
264
|
-
|
|
265
|
-
// Write progress file
|
|
266
|
-
let progress = `# Progress: ${todoFile}\n\n`;
|
|
267
|
-
progress += `**Started:** ${new Date().toLocaleString()}\n`;
|
|
268
|
-
progress += `**Tasks:** ${totalTasks} | **Mode:** in-session\n\n`;
|
|
269
|
-
for (const r of results) {
|
|
270
|
-
progress += `- Task ${r.taskIndex}: ${r.title} [${r.agent}]\n`;
|
|
271
|
-
}
|
|
272
|
-
await writeFile(progressPath, progress, "utf-8");
|
|
273
|
-
|
|
274
|
-
// Return the mega-prompt as tool result — LLM sees it and executes
|
|
275
|
-
return { results, progressFile, megaPrompt };
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
// ─── Generate Plan Files ─────────────────────────────────────────
|
|
279
|
-
|
|
280
|
-
function generateSpec(p: {
|
|
281
|
-
title: string; description: string; goals: string[]; requirements: string[];
|
|
282
|
-
architecture?: string[]; constraints?: string[]; successCriteria?: string[]; tasks: TaskDef[];
|
|
283
|
-
}): string {
|
|
284
|
-
let spec = `# ${p.title}\n\n`;
|
|
285
|
-
spec += `**Created:** ${new Date().toLocaleString()}\n\n`;
|
|
286
|
-
spec += `## Description\n\n${p.description}\n\n`;
|
|
287
|
-
spec += `## Goals\n\n`;
|
|
288
|
-
p.goals.forEach((g, i) => { spec += `${i + 1}. ${g}\n`; });
|
|
289
|
-
spec += "\n## Requirements\n\n";
|
|
290
|
-
p.requirements.forEach(r => { spec += `- ${r}\n`; });
|
|
291
|
-
spec += "\n";
|
|
292
|
-
if (p.architecture?.length) {
|
|
293
|
-
spec += `## Architecture\n\n`;
|
|
294
|
-
p.architecture.forEach(a => { spec += `- ${a}\n`; });
|
|
295
|
-
spec += "\n";
|
|
296
|
-
}
|
|
297
|
-
if (p.constraints?.length) {
|
|
298
|
-
spec += `## Constraints\n\n`;
|
|
299
|
-
p.constraints.forEach(c => { spec += `- ${c}\n`; });
|
|
300
|
-
spec += "\n";
|
|
301
|
-
}
|
|
302
|
-
if (p.successCriteria?.length) {
|
|
303
|
-
spec += `## Success Criteria\n\n`;
|
|
304
|
-
p.successCriteria.forEach(s => { spec += `- [ ] ${s}\n`; });
|
|
305
|
-
spec += "\n";
|
|
306
|
-
}
|
|
307
|
-
spec += `## Task Overview\n\n| # | Task | Agent | Priority | Dependencies |\n|---|------|-------|----------|-------------|\n`;
|
|
308
|
-
p.tasks.forEach((t, i) => {
|
|
309
|
-
const deps = t.dependencies?.map(d => `#${d}`).join(", ") || "—";
|
|
310
|
-
spec += `| ${i + 1} | ${t.title} | ${t.agent || "code"} | ${t.priority || "medium"} | ${deps} |\n`;
|
|
311
|
-
});
|
|
312
|
-
spec += `\n---\n*Generated by Phi Code Orchestrator*\n`;
|
|
313
|
-
return spec;
|
|
314
|
-
}
|
|
315
|
-
|
|
316
|
-
function generateTodo(title: string, tasks: TaskDef[]): string {
|
|
317
|
-
let todo = `# TODO: ${title}\n\n`;
|
|
318
|
-
todo += `**Created:** ${new Date().toLocaleString()}\n`;
|
|
319
|
-
todo += `**Tasks:** ${tasks.length}\n**Status:** executing\n\n`;
|
|
320
|
-
tasks.forEach((t, i) => {
|
|
321
|
-
const agentTag = t.agent ? ` [${t.agent}]` : "";
|
|
322
|
-
const prioTag = t.priority === "high" ? " 🔴" : t.priority === "low" ? " 🟢" : " 🟡";
|
|
323
|
-
const depsTag = t.dependencies?.length ? ` (after #${t.dependencies.join(", #")})` : "";
|
|
324
|
-
todo += `## Task ${i + 1}: ${t.title}${prioTag}${agentTag}${depsTag}\n\n- [ ] ${t.description}\n`;
|
|
325
|
-
if (t.subtasks) t.subtasks.forEach(st => { todo += ` - [ ] ${st}\n`; });
|
|
326
|
-
todo += "\n";
|
|
327
|
-
});
|
|
328
|
-
todo += `---\n\n## Progress\n\n- Total: ${tasks.length} tasks\n`;
|
|
329
|
-
todo += `- High priority: ${tasks.filter(t => t.priority === "high").length}\n`;
|
|
330
|
-
todo += `- Agents: ${[...new Set(tasks.map(t => t.agent || "code"))].join(", ")}\n`;
|
|
331
|
-
return todo;
|
|
332
|
-
}
|
|
333
|
-
|
|
334
|
-
// ─── Orchestrate Tool (plan + auto-execute) ──────────────────────
|
|
335
|
-
|
|
336
|
-
pi.registerTool({
|
|
337
|
-
name: "orchestrate",
|
|
338
|
-
label: "Project Orchestrator",
|
|
339
|
-
description: "Create a project plan AND automatically execute all tasks with sub-agents in parallel. Each agent gets its own isolated context, model, and system prompt. Tasks without dependencies run simultaneously.",
|
|
340
|
-
promptSnippet: "Plan + execute projects in parallel waves. Each sub-agent gets isolated context + model. Use prompt-architect patterns for structured task descriptions.",
|
|
341
|
-
promptGuidelines: [
|
|
342
|
-
"When asked to plan or build a project: analyze the request thoroughly, then call the orchestrate tool. It plans AND executes automatically.",
|
|
343
|
-
"CRITICAL: Each task description must be SELF-CONTAINED. The sub-agent has NO access to this conversation. It receives: (1) project context (title, description, spec summary) automatically, (2) outputs from its dependency tasks automatically, (3) your task description. So include specific details: file paths, expected behavior, code patterns, success criteria. Don't repeat the project description — that's injected automatically.",
|
|
344
|
-
"Structure each task description using the prompt-architect pattern: [CONTEXT] what exists and why → [TASK] what to do specifically → [FORMAT] expected output → [CONSTRAINTS] rules and limitations.",
|
|
345
|
-
"Assign agent types strategically: 'explore' (read-only analysis, codebase understanding), 'plan' (architecture, design decisions), 'code' (implementation, file creation/modification), 'test' (write + run tests, validate behavior), 'review' (security audit, quality check, read-only).",
|
|
346
|
-
"Set dependencies to maximize parallelism: tasks without dependencies run simultaneously in the same wave. Only add dependencies when a task truly needs another task's output.",
|
|
347
|
-
"Order tasks logically: explore → plan → code → test → review. But allow independent tasks at each stage to run in parallel.",
|
|
348
|
-
"Set priority=high for critical-path tasks, medium for standard work, low for nice-to-haves.",
|
|
349
|
-
],
|
|
350
|
-
parameters: Type.Object({
|
|
351
|
-
title: Type.String({ description: "Concise project title" }),
|
|
352
|
-
description: Type.String({ description: "Full project description: what to build, why, and any relevant context" }),
|
|
353
|
-
goals: Type.Union([Type.Array(Type.String()), Type.String()], { description: "Measurable project goals (what success looks like)" }),
|
|
354
|
-
requirements: Type.Union([Type.Array(Type.String()), Type.String()], { description: "Technical and functional requirements" }),
|
|
355
|
-
architecture: Type.Optional(Type.Union([Type.Array(Type.String()), Type.String()], { description: "Architecture decisions, tech stack choices, trade-offs" })),
|
|
356
|
-
tasks: Type.Array(
|
|
357
|
-
Type.Object({
|
|
358
|
-
title: Type.String({ description: "Clear, action-oriented task title" }),
|
|
359
|
-
description: Type.String({ description: "SELF-CONTAINED task description. Include ALL context the sub-agent needs: file paths, expected behavior, code patterns, conventions. The agent has NO shared history." }),
|
|
360
|
-
agent: Type.Optional(Type.String({ description: "Agent type: explore (read-only analysis), plan (architecture), code (implementation), test (write+run tests), review (quality audit)" })),
|
|
361
|
-
priority: Type.Optional(Type.String({ description: "high (critical path), medium (standard), low (nice-to-have)" })),
|
|
362
|
-
dependencies: Type.Optional(Type.Array(Type.Number(), { description: "Task numbers this depends on (1-indexed). Only add when truly needed — fewer dependencies = more parallelism" })),
|
|
363
|
-
subtasks: Type.Optional(Type.Array(Type.String(), { description: "Specific sub-steps within this task" })),
|
|
364
|
-
}),
|
|
365
|
-
{ description: "Ordered list of tasks. Independent tasks run in parallel. Dependent tasks wait for prerequisites." }
|
|
366
|
-
),
|
|
367
|
-
constraints: Type.Optional(Type.Union([Type.Array(Type.String()), Type.String()], { description: "Hard constraints: frameworks, patterns, rules, things to avoid" })),
|
|
368
|
-
successCriteria: Type.Optional(Type.Union([Type.Array(Type.String()), Type.String()], { description: "How to verify the project is complete and correct" })),
|
|
369
|
-
}),
|
|
370
|
-
|
|
371
|
-
async execute(_toolCallId, params, _signal, _onUpdate, ctx) {
|
|
372
|
-
const raw = params as any;
|
|
373
|
-
|
|
374
|
-
// Normalize string fields to arrays (some models send strings instead of arrays)
|
|
375
|
-
const toArray = (v: any): string[] => {
|
|
376
|
-
if (!v) return [];
|
|
377
|
-
if (Array.isArray(v)) return v;
|
|
378
|
-
if (typeof v === "string") return v.split("\n").map((s: string) => s.replace(/^[-•*]\s*/, "").trim()).filter(Boolean);
|
|
379
|
-
return [];
|
|
380
|
-
};
|
|
381
|
-
|
|
382
|
-
const p = {
|
|
383
|
-
title: raw.title as string,
|
|
384
|
-
description: raw.description as string,
|
|
385
|
-
goals: toArray(raw.goals),
|
|
386
|
-
requirements: toArray(raw.requirements),
|
|
387
|
-
architecture: raw.architecture ? toArray(raw.architecture) : undefined,
|
|
388
|
-
tasks: raw.tasks as TaskDef[],
|
|
389
|
-
constraints: raw.constraints ? toArray(raw.constraints) : undefined,
|
|
390
|
-
successCriteria: raw.successCriteria ? toArray(raw.successCriteria) : undefined,
|
|
391
|
-
};
|
|
392
|
-
|
|
393
|
-
try {
|
|
394
|
-
await ensurePlansDir();
|
|
395
|
-
const ts = timestamp();
|
|
396
|
-
const specFile = `spec-${ts}.md`;
|
|
397
|
-
const todoFile = `todo-${ts}.md`;
|
|
398
|
-
|
|
399
|
-
// Generate and write plan files
|
|
400
|
-
const spec = generateSpec(p);
|
|
401
|
-
const todo = generateTodo(p.title, p.tasks);
|
|
402
|
-
await writeFile(join(plansDir, specFile), spec, "utf-8");
|
|
403
|
-
await writeFile(join(plansDir, todoFile), todo, "utf-8");
|
|
404
|
-
|
|
405
|
-
// Notify plan created
|
|
406
|
-
const notify = (msg: string, type: "info" | "error" | "warning") => {
|
|
407
|
-
// Use onUpdate for streaming progress to the user
|
|
408
|
-
if (_onUpdate) {
|
|
409
|
-
_onUpdate({ content: [{ type: "text", text: msg }] });
|
|
410
|
-
}
|
|
411
|
-
};
|
|
412
|
-
|
|
413
|
-
notify(`📋 Plan created: **${p.title}** (${p.tasks.length} tasks)\nNow executing with sub-agents...`, "info");
|
|
414
|
-
|
|
415
|
-
// Auto-execute all tasks
|
|
416
|
-
// Build spec summary for shared context
|
|
417
|
-
const specSummary = [
|
|
418
|
-
`Goals: ${p.goals.join("; ")}`,
|
|
419
|
-
`Requirements: ${p.requirements.join("; ")}`,
|
|
420
|
-
p.architecture?.length ? `Architecture: ${p.architecture.join("; ")}` : "",
|
|
421
|
-
p.constraints?.length ? `Constraints: ${p.constraints.join("; ")}` : "",
|
|
422
|
-
].filter(Boolean).join("\n");
|
|
423
|
-
|
|
424
|
-
const { results, progressFile, megaPrompt } = await executePlan(
|
|
425
|
-
p.tasks, todoFile, notify,
|
|
426
|
-
{ title: p.title, description: p.description, specSummary },
|
|
427
|
-
);
|
|
428
|
-
|
|
429
|
-
const header = `**📋 Project "${p.title}" — ${p.tasks.length} tasks planned!**\n` +
|
|
430
|
-
`Plan: \`${specFile}\`, \`${todoFile}\` | Progress: \`${progressFile}\`\n\n` +
|
|
431
|
-
`---\n\n`;
|
|
432
|
-
|
|
433
|
-
// Return the mega-prompt as tool result
|
|
434
|
-
// The LLM sees this and executes all tasks in its current turn
|
|
435
|
-
return {
|
|
436
|
-
content: [{ type: "text", text: header + megaPrompt }],
|
|
437
|
-
details: {
|
|
438
|
-
specFile, todoFile, progressFile,
|
|
439
|
-
taskCount: p.tasks.length,
|
|
440
|
-
},
|
|
441
|
-
};
|
|
442
|
-
} catch (error) {
|
|
443
|
-
return {
|
|
444
|
-
content: [{ type: "text", text: `Orchestration failed: ${error}` }],
|
|
445
|
-
details: { error: String(error) },
|
|
446
|
-
};
|
|
447
|
-
}
|
|
448
|
-
},
|
|
449
|
-
});
|
|
450
|
-
|
|
451
38
|
// ─── Orchestration State ─────────────────────────────────────────
|
|
452
39
|
|
|
453
40
|
interface AgentDef {
|
|
@@ -471,6 +58,10 @@ export default function orchestratorExtension(pi: ExtensionAPI) {
|
|
|
471
58
|
let activeAgentTools: string[] | null = null;
|
|
472
59
|
let savedTools: string[] | null = null;
|
|
473
60
|
let phasePending = false; // true while waiting for a phase to complete
|
|
61
|
+
let phaseTimeoutId: ReturnType<typeof setTimeout> | null = null;
|
|
62
|
+
const MAX_PHASE_DURATION_MS = 10 * 60 * 1000; // 10 minutes per phase
|
|
63
|
+
const MAX_TOOL_CALLS_PER_PHASE = 60; // Safety limit
|
|
64
|
+
let phaseStartTime: number | null = null;
|
|
474
65
|
|
|
475
66
|
/**
|
|
476
67
|
* Parse agent .md file with YAML frontmatter
|
|
@@ -541,6 +132,9 @@ export default function orchestratorExtension(pi: ExtensionAPI) {
|
|
|
541
132
|
**Project Request:** ${description}
|
|
542
133
|
|
|
543
134
|
**Your tasks:**
|
|
135
|
+
|
|
136
|
+
**Parallelization:** When making multiple tool calls that don't depend on each other (e.g., memory_search + ontology_query, or reading 2+ files), call them IN PARALLEL in the same response. This is faster.
|
|
137
|
+
|
|
544
138
|
1. Call \`memory_search\` with project-relevant keywords (MANDATORY)
|
|
545
139
|
2. List all existing files and read key ones
|
|
546
140
|
3. Identify tech stack, patterns, and constraints
|
|
@@ -555,6 +149,7 @@ export default function orchestratorExtension(pi: ExtensionAPI) {
|
|
|
555
149
|
**LAST ACTION (MANDATORY):** Call \`memory_write\` to save your exploration findings for downstream agents.
|
|
556
150
|
|
|
557
151
|
**Knowledge Graph:**
|
|
152
|
+
// TODO: ontology_batch_add for reducing API calls (currently single-item only)
|
|
558
153
|
After your analysis, use \`ontology_add\` to save key project entities AND their relations:
|
|
559
154
|
- Add entities for: the project, each major library, each module/directory
|
|
560
155
|
- Add relations between them: "uses", "contains", "depends_on", "implements"
|
|
@@ -721,6 +316,8 @@ After implementation, use \`memory_write\` to save a summary of what was built,
|
|
|
721
316
|
- On Linux/Mac fallback: \`lsof -ti:PORT | xargs kill -9\`
|
|
722
317
|
- Always clean up after tests: kill background processes, remove temp files
|
|
723
318
|
|
|
319
|
+
**Anti-loop rule:** If the SAME test fails 3 times in a row with the same error after your fixes, STOP trying to fix it. Write the failure in your test report as "UNRESOLVED" and move on. Do not waste more than 3 iterations on the same issue.
|
|
320
|
+
|
|
724
321
|
After testing, use \`memory_write\` to save test results, bugs found, and lessons learned.` + runtimeInfo,
|
|
725
322
|
},
|
|
726
323
|
{
|
|
@@ -780,7 +377,12 @@ After your review, use \`memory_write\` ONCE to save:
|
|
|
780
377
|
- Common mistakes to avoid in future projects
|
|
781
378
|
Tag the note with relevant keywords for vector search.
|
|
782
379
|
|
|
783
|
-
**Important:** Write lessons-learned ONCE. Do not call memory_write twice with the same filename or duplicate content
|
|
380
|
+
**Important:** Write lessons-learned ONCE. Do not call memory_write twice with the same filename or duplicate content.
|
|
381
|
+
|
|
382
|
+
**Ontology enrichment:** After your review, use \`ontology_add\` to save your key findings:
|
|
383
|
+
- Add a "review-report" entity with type "Document"
|
|
384
|
+
- Add relations to the project: "reviews" → project, quality score as entity property
|
|
385
|
+
- Save any new architectural decisions or patterns discovered` + runtimeInfo,
|
|
784
386
|
},
|
|
785
387
|
];
|
|
786
388
|
}
|
|
@@ -852,6 +454,16 @@ Tag the note with relevant keywords for vector search.
|
|
|
852
454
|
setOrchestrationActive(false);
|
|
853
455
|
phasePending = false;
|
|
854
456
|
deactivateAgent();
|
|
457
|
+
if (phaseTimeoutId) { clearTimeout(phaseTimeoutId); phaseTimeoutId = null; }
|
|
458
|
+
// Generate global final summary
|
|
459
|
+
const totalPhases = 5; // always 5
|
|
460
|
+
const elapsed = phaseStartTime ? Math.round((Date.now() - phaseStartTime) / 1000) : 0;
|
|
461
|
+
const minutes = Math.floor(elapsed / 60);
|
|
462
|
+
const seconds = elapsed % 60;
|
|
463
|
+
ctx.ui.notify(`\n📊 **Orchestration Summary**\n` +
|
|
464
|
+
` Phases: ${totalPhases}/5 completed\n` +
|
|
465
|
+
` Duration: ${minutes}m ${seconds}s\n` +
|
|
466
|
+
` Check \`.phi/plans/\` for all reports`, "info");
|
|
855
467
|
try {
|
|
856
468
|
ctx.ui.notify(`\n✅ **All 5 phases complete!**`, "info");
|
|
857
469
|
} catch {
|
|
@@ -870,6 +482,15 @@ Tag the note with relevant keywords for vector search.
|
|
|
870
482
|
ctx.ui.notify(`\n${phase.label} → \`${modelId}\` (agent: ${agentName})`, "info");
|
|
871
483
|
// Small delay to let the model switch settle, then send instruction
|
|
872
484
|
setTimeout(() => pi.sendUserMessage(phase.instruction), 500);
|
|
485
|
+
// Set phase timeout — abort if phase takes too long
|
|
486
|
+
if (phaseTimeoutId) clearTimeout(phaseTimeoutId);
|
|
487
|
+
phaseTimeoutId = setTimeout(() => {
|
|
488
|
+
if (orchestrationActive && phasePending) {
|
|
489
|
+
ctx.ui.notify(`\n⏰ **Phase timed out** (${MAX_PHASE_DURATION_MS / 60000} min limit). Skipping to next phase.`, "warning");
|
|
490
|
+
phasePending = false;
|
|
491
|
+
sendNextPhase(ctx);
|
|
492
|
+
}
|
|
493
|
+
}, MAX_PHASE_DURATION_MS);
|
|
873
494
|
});
|
|
874
495
|
}
|
|
875
496
|
|
|
@@ -902,6 +523,9 @@ Tag the note with relevant keywords for vector search.
|
|
|
902
523
|
return;
|
|
903
524
|
}
|
|
904
525
|
|
|
526
|
+
// Clear phase timeout on normal completion
|
|
527
|
+
if (phaseTimeoutId) { clearTimeout(phaseTimeoutId); phaseTimeoutId = null; }
|
|
528
|
+
|
|
905
529
|
// Build a structured summary of what happened in this phase
|
|
906
530
|
// Instead of raw LLM text, extract concrete actions: files created/modified,
|
|
907
531
|
// errors encountered, test results. This gives the next phase actionable context.
|
|
@@ -930,8 +554,12 @@ Tag the note with relevant keywords for vector search.
|
|
|
930
554
|
const match = content.match(/edited (.+)/) || content.match(/in (.+)/);
|
|
931
555
|
if (match) filesEdited.push(match[1]);
|
|
932
556
|
}
|
|
933
|
-
// Track errors
|
|
934
|
-
if (content.includes('ERR:') || content.includes('Error:') || content.includes('FAIL'))
|
|
557
|
+
// Track errors — but filter out edit retries (old_text mismatch = normal retry, not error)
|
|
558
|
+
if ((content.includes('ERR:') || content.includes('Error:') || content.includes('FAIL'))
|
|
559
|
+
&& !content.includes('old text must match')
|
|
560
|
+
&& !content.includes('The old text')
|
|
561
|
+
&& !content.includes('oldText not found')
|
|
562
|
+
&& !content.includes('old_text not found')) {
|
|
935
563
|
const preview = content.slice(0, 150).replace(/\n/g, ' ');
|
|
936
564
|
errorsHit.push(`${name}: ${preview}`);
|
|
937
565
|
}
|
|
@@ -943,13 +571,42 @@ Tag the note with relevant keywords for vector search.
|
|
|
943
571
|
}
|
|
944
572
|
}
|
|
945
573
|
|
|
574
|
+
// Detect API errors (401, auth failures) — abort workflow if found
|
|
575
|
+
const hasAuthError = messages.some((msg: any) => {
|
|
576
|
+
const content = typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content || '');
|
|
577
|
+
return content.includes('401') && (content.includes('invalid access token') || content.includes('token expired') || content.includes('Unauthorized'));
|
|
578
|
+
});
|
|
579
|
+
if (hasAuthError || (toolCallCount === 0 && messages.length > 0)) {
|
|
580
|
+
const errorMsg = hasAuthError ? 'API authentication error (401)' : 'Phase produced 0 tool calls — possible API or model error';
|
|
581
|
+
ctx.ui.notify(`\n❌ **Orchestrator aborted:** ${errorMsg}\nCheck your API key and model configuration.`, "error");
|
|
582
|
+
setOrchestrationActive(false);
|
|
583
|
+
phasePending = false;
|
|
584
|
+
deactivateAgent();
|
|
585
|
+
if (phaseTimeoutId) { clearTimeout(phaseTimeoutId); phaseTimeoutId = null; }
|
|
586
|
+
return;
|
|
587
|
+
}
|
|
588
|
+
|
|
946
589
|
// Build the summary
|
|
947
590
|
const summaryParts: string[] = [];
|
|
948
591
|
summaryParts.push(`Tool calls: ${toolCallCount}`);
|
|
592
|
+
// Anti-loop guard: warn if tool calls are excessive
|
|
593
|
+
if (toolCallCount > MAX_TOOL_CALLS_PER_PHASE) {
|
|
594
|
+
summaryParts.push(`⚠️ WARNING: Phase used ${toolCallCount} tool calls (limit: ${MAX_TOOL_CALLS_PER_PHASE}). Possible loop detected.`);
|
|
595
|
+
}
|
|
949
596
|
if (filesWritten.length > 0) summaryParts.push(`Files created/written: ${filesWritten.join(', ')}`);
|
|
950
597
|
if (filesEdited.length > 0) summaryParts.push(`Files edited: ${filesEdited.join(', ')}`);
|
|
951
598
|
if (testResults.length > 0) summaryParts.push(`Test results:\n${testResults.join('\n')}`);
|
|
952
599
|
if (errorsHit.length > 0) summaryParts.push(`Errors encountered: ${errorsHit.length}\n${errorsHit.slice(0, 5).join('\n')}`);
|
|
600
|
+
|
|
601
|
+
// Verify mandatory tool usage
|
|
602
|
+
const toolNames = messages
|
|
603
|
+
.filter((m: any) => m.role === 'tool' || m.role === 'function' || m.role === 'toolResult')
|
|
604
|
+
.map((m: any) => (m as any).name || (m as any).toolName || '');
|
|
605
|
+
const hasMemorySearch = toolNames.includes('memory_search');
|
|
606
|
+
const hasMemoryWrite = toolNames.includes('memory_write');
|
|
607
|
+
if (!hasMemorySearch) summaryParts.push(`⚠️ Phase did NOT call memory_search (mandatory)`);
|
|
608
|
+
if (!hasMemoryWrite) summaryParts.push(`⚠️ Phase did NOT call memory_write (mandatory)`);
|
|
609
|
+
|
|
953
610
|
const phaseSummary = summaryParts.join('\n');
|
|
954
611
|
|
|
955
612
|
// Inject structured summary into next phase
|
|
@@ -1005,6 +662,8 @@ Tag the note with relevant keywords for vector search.
|
|
|
1005
662
|
}
|
|
1006
663
|
ctx.ui.notify("", "info");
|
|
1007
664
|
|
|
665
|
+
// Record orchestration start time for final summary
|
|
666
|
+
phaseStartTime = Date.now();
|
|
1008
667
|
// Switch model and activate agent for first phase
|
|
1009
668
|
const modelId = await switchModelForPhase(firstPhase, ctx);
|
|
1010
669
|
activateAgent(firstPhase, ctx);
|
|
@@ -1017,61 +676,12 @@ Tag the note with relevant keywords for vector search.
|
|
|
1017
676
|
// ─── /run Command — Re-execute existing plan ─────────────────────
|
|
1018
677
|
|
|
1019
678
|
pi.registerCommand("run", {
|
|
1020
|
-
description: "Re-execute an existing plan
|
|
1021
|
-
handler: async (
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
}
|
|
1026
|
-
|
|
1027
|
-
const files = (await readdir(plansDir)).sort().reverse();
|
|
1028
|
-
const todoFiles = files.filter(f => f.startsWith("todo-") && f.endsWith(".md"));
|
|
1029
|
-
|
|
1030
|
-
if (todoFiles.length === 0) {
|
|
1031
|
-
ctx.ui.notify("No todo files found. Use `/plan <description>` first.", "warning");
|
|
1032
|
-
return;
|
|
1033
|
-
}
|
|
1034
|
-
|
|
1035
|
-
const todoFile = todoFiles[0];
|
|
1036
|
-
const todoContent = await readFile(join(plansDir, todoFile), "utf-8");
|
|
1037
|
-
|
|
1038
|
-
// Parse tasks
|
|
1039
|
-
const tasks: TaskDef[] = [];
|
|
1040
|
-
const sections = todoContent.split(/## Task \d+:/);
|
|
1041
|
-
for (let i = 1; i < sections.length; i++) {
|
|
1042
|
-
const section = sections[i];
|
|
1043
|
-
const titleMatch = section.match(/^(.+?)(?:\s*🔴|\s*🟡|\s*🟢)/);
|
|
1044
|
-
const agentMatch = section.match(/\[(\w+)\]/);
|
|
1045
|
-
const descMatch = section.match(/- \[ \] (.+)/);
|
|
1046
|
-
const subtasks: string[] = [];
|
|
1047
|
-
const stMatches = section.matchAll(/ - \[ \] (.+)/g);
|
|
1048
|
-
for (const m of stMatches) subtasks.push(m[1]);
|
|
1049
|
-
|
|
1050
|
-
if (titleMatch && descMatch) {
|
|
1051
|
-
tasks.push({
|
|
1052
|
-
title: titleMatch[1].trim(),
|
|
1053
|
-
agent: agentMatch?.[1] || "code",
|
|
1054
|
-
description: descMatch[1].trim(),
|
|
1055
|
-
subtasks: subtasks.length > 0 ? subtasks : undefined,
|
|
1056
|
-
});
|
|
1057
|
-
}
|
|
1058
|
-
}
|
|
1059
|
-
|
|
1060
|
-
if (tasks.length === 0) {
|
|
1061
|
-
ctx.ui.notify("Could not parse tasks from todo file.", "error");
|
|
1062
|
-
return;
|
|
1063
|
-
}
|
|
1064
|
-
|
|
1065
|
-
const confirmed = await ctx.ui.confirm(
|
|
1066
|
-
"Re-execute Plan",
|
|
1067
|
-
`${tasks.length} tasks found in \`${todoFile}\`.\nEach will spawn an isolated sub-agent.\n\nProceed?`
|
|
679
|
+
description: "Re-execute an existing plan (deprecated — use /plan instead)",
|
|
680
|
+
handler: async (_args, ctx) => {
|
|
681
|
+
ctx.ui.notify(
|
|
682
|
+
"⚠️ `/run` is deprecated. Use `/plan <description>` to create and execute a new plan with the 5-phase orchestrator.",
|
|
683
|
+
"warning",
|
|
1068
684
|
);
|
|
1069
|
-
if (!confirmed) {
|
|
1070
|
-
ctx.ui.notify("Cancelled.", "info");
|
|
1071
|
-
return;
|
|
1072
|
-
}
|
|
1073
|
-
|
|
1074
|
-
await executePlan(tasks, todoFile, (msg, type) => ctx.ui.notify(msg, type));
|
|
1075
685
|
},
|
|
1076
686
|
});
|
|
1077
687
|
|