@phi-code-admin/phi-code 0.72.0 → 0.74.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/agents/test.md CHANGED
@@ -33,6 +33,9 @@ Use implementation results to know which files were created/modified and what be
33
33
  - **Realistic assertions**: Test what matters, not trivial details
34
34
  - **Match conventions**: Use the project's test framework, directory structure, and naming patterns
35
35
  - **Clean test code**: Tests are documentation — use descriptive names that explain expected behavior
36
+ - Prefer targeted `edit` calls over full file rewrites. When a test fails, fix ONLY the failing test function, not the entire file
37
+ - Maximum 1 full file rewrite per test file. After that, use `edit` for surgical fixes
38
+ - When debugging test failures: read the error → locate the exact failing assertion → fix that specific line
36
39
 
37
40
  ## Test Writing
38
41
 
@@ -43,9 +43,10 @@ export default function memoryExtension(pi: ExtensionAPI) {
43
43
  description: "Search for content in memory using unified search (notes + ontology + vector search)",
44
44
  promptSnippet: "Search project memory (notes, ontology, vector search). ALWAYS call before answering questions about prior work, decisions, or project context.",
45
45
  promptGuidelines: [
46
- "Before answering questions about prior work, architecture, decisions, or project context: call memory_search first.",
46
+ "MANDATORY: Before starting ANY task, call memory_search with relevant keywords. This is not optional.",
47
47
  "When starting work on a topic, search memory for existing notes and learnings.",
48
48
  "After completing important work or learning something new, use memory_write to save it.",
49
+ "MANDATORY: After completing any significant work, call memory_write to save what you did and what you learned.",
49
50
  "When a command fails or produces an unexpected error, document the error and fix in memory_write (self-improvement).",
50
51
  "When the user corrects you, save the correction in memory_write so you never repeat the mistake.",
51
52
  "After a significant debugging session, write a summary of root cause and solution to memory.",
@@ -16,41 +16,12 @@
16
16
  * /plans — List plans and their execution status
17
17
  */
18
18
 
19
- import { Type } from "@sinclair/typebox";
20
19
  import type { ExtensionAPI } from "phi-code";
21
20
  import { writeFile, mkdir, readdir, readFile } from "node:fs/promises";
22
21
  import { join } from "node:path";
23
22
  import { existsSync, readFileSync } from "node:fs";
24
- // execFile removed — tasks now execute in-session, no subprocess
25
23
  import { homedir } from "node:os";
26
24
 
27
- // ─── Types ───────────────────────────────────────────────────────────────
28
-
29
- interface TaskDef {
30
- title: string;
31
- description: string;
32
- agent?: string;
33
- priority?: string;
34
- dependencies?: number[];
35
- subtasks?: string[];
36
- }
37
-
38
- interface TaskResult {
39
- taskIndex: number;
40
- title: string;
41
- agent: string;
42
- status: "success" | "error" | "skipped";
43
- output: string;
44
- durationMs: number;
45
- }
46
-
47
- interface AgentDef {
48
- name: string;
49
- description: string;
50
- tools: string;
51
- systemPrompt: string;
52
- }
53
-
54
25
  // ─── Extension ───────────────────────────────────────────────────────────
55
26
 
56
27
  export default function orchestratorExtension(pi: ExtensionAPI) {
@@ -64,390 +35,6 @@ export default function orchestratorExtension(pi: ExtensionAPI) {
64
35
  return new Date().toISOString().replace(/[:.]/g, "-").replace("T", "_").slice(0, 19);
65
36
  }
66
37
 
67
- // ─── Agent Discovery ─────────────────────────────────────────────
68
-
69
- function loadAgentDefs(): Map<string, AgentDef> {
70
- const agents = new Map<string, AgentDef>();
71
- const dirs = [
72
- join(process.cwd(), ".phi", "agents"),
73
- join(homedir(), ".phi", "agent", "agents"),
74
- join(__dirname, "..", "..", "..", "agents"),
75
- ];
76
-
77
- for (const dir of dirs) {
78
- if (!existsSync(dir)) continue;
79
- try {
80
- const files = require("fs").readdirSync(dir) as string[];
81
- for (const file of files) {
82
- if (!file.endsWith(".md")) continue;
83
- const name = file.replace(".md", "");
84
- if (agents.has(name)) continue;
85
-
86
- try {
87
- const content = readFileSync(join(dir, file), "utf-8");
88
- const fmMatch = content.match(/^---\s*\n([\s\S]*?)\n---\s*\n([\s\S]*)$/);
89
- if (!fmMatch) continue;
90
-
91
- const frontmatter = fmMatch[1];
92
- const body = fmMatch[2].trim();
93
- const desc = frontmatter.match(/description:\s*(.+)/)?.[1] || "";
94
- const tools = frontmatter.match(/tools:\s*(.+)/)?.[1] || "";
95
-
96
- agents.set(name, { name, description: desc, tools, systemPrompt: body });
97
- } catch { /* skip */ }
98
- }
99
- } catch { /* skip */ }
100
- }
101
-
102
- return agents;
103
- }
104
-
105
- function resolveAgentModel(agentType: string): string | null {
106
- const routingPath = join(homedir(), ".phi", "agent", "routing.json");
107
- try {
108
- const config = JSON.parse(readFileSync(routingPath, "utf-8"));
109
- for (const [_cat, route] of Object.entries(config.routes || {})) {
110
- const r = route as any;
111
- if (r.agent === agentType) return r.preferredModel || null;
112
- }
113
- // Map agent type to route category
114
- const categoryMap: Record<string, string> = {
115
- code: "code", explore: "explore", plan: "plan",
116
- test: "test", review: "review", debug: "debug",
117
- };
118
- const category = categoryMap[agentType];
119
- if (category && config.routes?.[category]) {
120
- return config.routes[category].preferredModel || null;
121
- }
122
- return config.default?.model || null;
123
- } catch {
124
- return null;
125
- }
126
- }
127
-
128
- function findPhiBinary(): string {
129
- // Try the bundled CLI relative to extensions dir
130
- const bundledCli = join(__dirname, "..", "..", "..", "dist", "cli.js");
131
- if (existsSync(bundledCli)) return bundledCli;
132
-
133
- // Try npm global install paths
134
- const npmGlobalPaths = [
135
- join(homedir(), "AppData", "Roaming", "npm", "node_modules", "@phi-code-admin", "phi-code", "dist", "cli.js"), // Windows
136
- join(homedir(), ".npm-global", "lib", "node_modules", "@phi-code-admin", "phi-code", "dist", "cli.js"), // Linux custom
137
- "/usr/local/lib/node_modules/@phi-code-admin/phi-code/dist/cli.js", // Linux/Mac default
138
- "/usr/lib/node_modules/@phi-code-admin/phi-code/dist/cli.js", // Some Linux
139
- ];
140
- for (const p of npmGlobalPaths) {
141
- if (existsSync(p)) return p;
142
- }
143
-
144
- // Try `which phi` (Linux/Mac) or `where phi` (Windows)
145
- try {
146
- const isWin = process.platform === "win32";
147
- const cmd = isWin ? "where" : "which";
148
- const result = require("child_process").execSync(`${cmd} phi 2>${isWin ? "NUL" : "/dev/null"}`, { encoding: "utf-8" }).trim();
149
- if (result) {
150
- const firstLine = result.split("\n")[0].trim();
151
- // On Windows, `where phi` returns the .cmd shim; we need the actual JS
152
- if (isWin && firstLine.endsWith(".cmd")) {
153
- const npmPrefix = require("child_process").execSync("npm prefix -g", { encoding: "utf-8" }).trim();
154
- const jsPath = join(npmPrefix, "node_modules", "@phi-code-admin", "phi-code", "dist", "cli.js");
155
- if (existsSync(jsPath)) return jsPath;
156
- }
157
- return firstLine;
158
- }
159
- } catch { /* not in PATH */ }
160
-
161
- // Last resort: assume phi is in PATH (works with shell:true on Windows)
162
- return "phi";
163
- }
164
-
165
- // ─── Task Execution (in-session, no subprocess) ─────────────────
166
-
167
- /**
168
- * Execute a task by sending it as a user message to the current session.
169
- * The LLM handles it directly — no subprocess spawning, no cold boot.
170
- * Much faster and more reliable than spawning phi --print processes.
171
- */
172
- function executeTaskInSession(
173
- task: TaskDef,
174
- sharedContext: {
175
- projectTitle: string;
176
- projectDescription: string;
177
- specSummary: string;
178
- completedTasks: Array<{ index: number; title: string; agent: string; output: string }>;
179
- },
180
- ): { taskPrompt: string } {
181
- const agentType = task.agent || "code";
182
-
183
- // Build prompt with shared context
184
- let taskPrompt = `## 🔧 Task: ${task.title} [${agentType}]\n\n`;
185
-
186
- taskPrompt += `**Project:** ${sharedContext.projectTitle}\n\n`;
187
-
188
- if (sharedContext.specSummary) {
189
- taskPrompt += `**Spec:** ${sharedContext.specSummary}\n\n`;
190
- }
191
-
192
- // Inject results from dependency tasks
193
- const deps = task.dependencies || [];
194
- if (deps.length > 0) {
195
- const depResults = sharedContext.completedTasks.filter(ct => deps.includes(ct.index));
196
- if (depResults.length > 0) {
197
- taskPrompt += `**Previous results:**\n`;
198
- for (const dep of depResults) {
199
- const truncated = dep.output.length > 500 ? dep.output.slice(0, 500) + "..." : dep.output;
200
- taskPrompt += `- Task ${dep.index} (${dep.title}): ${truncated}\n`;
201
- }
202
- taskPrompt += "\n";
203
- }
204
- }
205
-
206
- // The actual task
207
- taskPrompt += `### What to do\n\n${task.description}\n`;
208
- if (task.subtasks && task.subtasks.length > 0) {
209
- taskPrompt += "\n**Sub-tasks:**\n" + task.subtasks.map((st, i) => `${i + 1}. ${st}`).join("\n") + "\n";
210
- }
211
- taskPrompt += `\n**Instructions:** Execute this task completely. Create/edit all necessary files. Report what you did.\n`;
212
-
213
- return { taskPrompt };
214
- }
215
-
216
- // ─── Execute All Tasks (parallel with dependency resolution) ─────
217
-
218
- async function executePlan(
219
- tasks: TaskDef[],
220
- todoFile: string,
221
- notify: (msg: string, type: "info" | "error" | "warning") => void,
222
- projectContext?: { title: string; description: string; specSummary: string },
223
- ): Promise<{ results: TaskResult[]; progressFile: string }> {
224
- const progressFile = todoFile.replace("todo-", "progress-");
225
- const progressPath = join(plansDir, progressFile);
226
- const totalTasks = tasks.length;
227
-
228
- const sharedContext = {
229
- projectTitle: projectContext?.title || "Project",
230
- projectDescription: projectContext?.description || "",
231
- specSummary: projectContext?.specSummary || "",
232
- completedTasks: [] as Array<{ index: number; title: string; agent: string; output: string }>,
233
- };
234
-
235
- notify(`🚀 Executing ${totalTasks} tasks in-session...`, "info");
236
-
237
- // Build a single comprehensive prompt with ALL tasks
238
- let megaPrompt = `# 📋 Project: ${sharedContext.projectTitle}\n\n`;
239
- megaPrompt += `${sharedContext.projectDescription}\n\n`;
240
- if (sharedContext.specSummary) {
241
- megaPrompt += `## Spec\n${sharedContext.specSummary}\n\n`;
242
- }
243
- megaPrompt += `## Tasks (execute ALL in order)\n\n`;
244
-
245
- const results: TaskResult[] = [];
246
-
247
- for (let i = 0; i < tasks.length; i++) {
248
- const task = tasks[i];
249
- const { taskPrompt } = executeTaskInSession(task, sharedContext);
250
- megaPrompt += `---\n\n${taskPrompt}\n\n`;
251
- results.push({
252
- taskIndex: i + 1, title: task.title,
253
- agent: task.agent || "code", status: "success",
254
- output: "(in-session)", durationMs: 0,
255
- });
256
- }
257
-
258
- megaPrompt += `---\n\n## ⚠️ Instructions\n\n`;
259
- megaPrompt += `Execute ALL ${totalTasks} tasks above **sequentially**. For each task:\n`;
260
- megaPrompt += `1. Create/edit the required files using your tools\n`;
261
- megaPrompt += `2. Report what you did briefly\n`;
262
- megaPrompt += `3. Move to the next task\n\n`;
263
- megaPrompt += `Do NOT skip any task. Complete the entire project.\n`;
264
-
265
- // Write progress file
266
- let progress = `# Progress: ${todoFile}\n\n`;
267
- progress += `**Started:** ${new Date().toLocaleString()}\n`;
268
- progress += `**Tasks:** ${totalTasks} | **Mode:** in-session\n\n`;
269
- for (const r of results) {
270
- progress += `- Task ${r.taskIndex}: ${r.title} [${r.agent}]\n`;
271
- }
272
- await writeFile(progressPath, progress, "utf-8");
273
-
274
- // Return the mega-prompt as tool result — LLM sees it and executes
275
- return { results, progressFile, megaPrompt };
276
- }
277
-
278
- // ─── Generate Plan Files ─────────────────────────────────────────
279
-
280
- function generateSpec(p: {
281
- title: string; description: string; goals: string[]; requirements: string[];
282
- architecture?: string[]; constraints?: string[]; successCriteria?: string[]; tasks: TaskDef[];
283
- }): string {
284
- let spec = `# ${p.title}\n\n`;
285
- spec += `**Created:** ${new Date().toLocaleString()}\n\n`;
286
- spec += `## Description\n\n${p.description}\n\n`;
287
- spec += `## Goals\n\n`;
288
- p.goals.forEach((g, i) => { spec += `${i + 1}. ${g}\n`; });
289
- spec += "\n## Requirements\n\n";
290
- p.requirements.forEach(r => { spec += `- ${r}\n`; });
291
- spec += "\n";
292
- if (p.architecture?.length) {
293
- spec += `## Architecture\n\n`;
294
- p.architecture.forEach(a => { spec += `- ${a}\n`; });
295
- spec += "\n";
296
- }
297
- if (p.constraints?.length) {
298
- spec += `## Constraints\n\n`;
299
- p.constraints.forEach(c => { spec += `- ${c}\n`; });
300
- spec += "\n";
301
- }
302
- if (p.successCriteria?.length) {
303
- spec += `## Success Criteria\n\n`;
304
- p.successCriteria.forEach(s => { spec += `- [ ] ${s}\n`; });
305
- spec += "\n";
306
- }
307
- spec += `## Task Overview\n\n| # | Task | Agent | Priority | Dependencies |\n|---|------|-------|----------|-------------|\n`;
308
- p.tasks.forEach((t, i) => {
309
- const deps = t.dependencies?.map(d => `#${d}`).join(", ") || "—";
310
- spec += `| ${i + 1} | ${t.title} | ${t.agent || "code"} | ${t.priority || "medium"} | ${deps} |\n`;
311
- });
312
- spec += `\n---\n*Generated by Phi Code Orchestrator*\n`;
313
- return spec;
314
- }
315
-
316
- function generateTodo(title: string, tasks: TaskDef[]): string {
317
- let todo = `# TODO: ${title}\n\n`;
318
- todo += `**Created:** ${new Date().toLocaleString()}\n`;
319
- todo += `**Tasks:** ${tasks.length}\n**Status:** executing\n\n`;
320
- tasks.forEach((t, i) => {
321
- const agentTag = t.agent ? ` [${t.agent}]` : "";
322
- const prioTag = t.priority === "high" ? " 🔴" : t.priority === "low" ? " 🟢" : " 🟡";
323
- const depsTag = t.dependencies?.length ? ` (after #${t.dependencies.join(", #")})` : "";
324
- todo += `## Task ${i + 1}: ${t.title}${prioTag}${agentTag}${depsTag}\n\n- [ ] ${t.description}\n`;
325
- if (t.subtasks) t.subtasks.forEach(st => { todo += ` - [ ] ${st}\n`; });
326
- todo += "\n";
327
- });
328
- todo += `---\n\n## Progress\n\n- Total: ${tasks.length} tasks\n`;
329
- todo += `- High priority: ${tasks.filter(t => t.priority === "high").length}\n`;
330
- todo += `- Agents: ${[...new Set(tasks.map(t => t.agent || "code"))].join(", ")}\n`;
331
- return todo;
332
- }
333
-
334
- // ─── Orchestrate Tool (plan + auto-execute) ──────────────────────
335
-
336
- pi.registerTool({
337
- name: "orchestrate",
338
- label: "Project Orchestrator",
339
- description: "Create a project plan AND automatically execute all tasks with sub-agents in parallel. Each agent gets its own isolated context, model, and system prompt. Tasks without dependencies run simultaneously.",
340
- promptSnippet: "Plan + execute projects in parallel waves. Each sub-agent gets isolated context + model. Use prompt-architect patterns for structured task descriptions.",
341
- promptGuidelines: [
342
- "When asked to plan or build a project: analyze the request thoroughly, then call the orchestrate tool. It plans AND executes automatically.",
343
- "CRITICAL: Each task description must be SELF-CONTAINED. The sub-agent has NO access to this conversation. It receives: (1) project context (title, description, spec summary) automatically, (2) outputs from its dependency tasks automatically, (3) your task description. So include specific details: file paths, expected behavior, code patterns, success criteria. Don't repeat the project description — that's injected automatically.",
344
- "Structure each task description using the prompt-architect pattern: [CONTEXT] what exists and why → [TASK] what to do specifically → [FORMAT] expected output → [CONSTRAINTS] rules and limitations.",
345
- "Assign agent types strategically: 'explore' (read-only analysis, codebase understanding), 'plan' (architecture, design decisions), 'code' (implementation, file creation/modification), 'test' (write + run tests, validate behavior), 'review' (security audit, quality check, read-only).",
346
- "Set dependencies to maximize parallelism: tasks without dependencies run simultaneously in the same wave. Only add dependencies when a task truly needs another task's output.",
347
- "Order tasks logically: explore → plan → code → test → review. But allow independent tasks at each stage to run in parallel.",
348
- "Set priority=high for critical-path tasks, medium for standard work, low for nice-to-haves.",
349
- ],
350
- parameters: Type.Object({
351
- title: Type.String({ description: "Concise project title" }),
352
- description: Type.String({ description: "Full project description: what to build, why, and any relevant context" }),
353
- goals: Type.Union([Type.Array(Type.String()), Type.String()], { description: "Measurable project goals (what success looks like)" }),
354
- requirements: Type.Union([Type.Array(Type.String()), Type.String()], { description: "Technical and functional requirements" }),
355
- architecture: Type.Optional(Type.Union([Type.Array(Type.String()), Type.String()], { description: "Architecture decisions, tech stack choices, trade-offs" })),
356
- tasks: Type.Array(
357
- Type.Object({
358
- title: Type.String({ description: "Clear, action-oriented task title" }),
359
- description: Type.String({ description: "SELF-CONTAINED task description. Include ALL context the sub-agent needs: file paths, expected behavior, code patterns, conventions. The agent has NO shared history." }),
360
- agent: Type.Optional(Type.String({ description: "Agent type: explore (read-only analysis), plan (architecture), code (implementation), test (write+run tests), review (quality audit)" })),
361
- priority: Type.Optional(Type.String({ description: "high (critical path), medium (standard), low (nice-to-have)" })),
362
- dependencies: Type.Optional(Type.Array(Type.Number(), { description: "Task numbers this depends on (1-indexed). Only add when truly needed — fewer dependencies = more parallelism" })),
363
- subtasks: Type.Optional(Type.Array(Type.String(), { description: "Specific sub-steps within this task" })),
364
- }),
365
- { description: "Ordered list of tasks. Independent tasks run in parallel. Dependent tasks wait for prerequisites." }
366
- ),
367
- constraints: Type.Optional(Type.Union([Type.Array(Type.String()), Type.String()], { description: "Hard constraints: frameworks, patterns, rules, things to avoid" })),
368
- successCriteria: Type.Optional(Type.Union([Type.Array(Type.String()), Type.String()], { description: "How to verify the project is complete and correct" })),
369
- }),
370
-
371
- async execute(_toolCallId, params, _signal, _onUpdate, ctx) {
372
- const raw = params as any;
373
-
374
- // Normalize string fields to arrays (some models send strings instead of arrays)
375
- const toArray = (v: any): string[] => {
376
- if (!v) return [];
377
- if (Array.isArray(v)) return v;
378
- if (typeof v === "string") return v.split("\n").map((s: string) => s.replace(/^[-•*]\s*/, "").trim()).filter(Boolean);
379
- return [];
380
- };
381
-
382
- const p = {
383
- title: raw.title as string,
384
- description: raw.description as string,
385
- goals: toArray(raw.goals),
386
- requirements: toArray(raw.requirements),
387
- architecture: raw.architecture ? toArray(raw.architecture) : undefined,
388
- tasks: raw.tasks as TaskDef[],
389
- constraints: raw.constraints ? toArray(raw.constraints) : undefined,
390
- successCriteria: raw.successCriteria ? toArray(raw.successCriteria) : undefined,
391
- };
392
-
393
- try {
394
- await ensurePlansDir();
395
- const ts = timestamp();
396
- const specFile = `spec-${ts}.md`;
397
- const todoFile = `todo-${ts}.md`;
398
-
399
- // Generate and write plan files
400
- const spec = generateSpec(p);
401
- const todo = generateTodo(p.title, p.tasks);
402
- await writeFile(join(plansDir, specFile), spec, "utf-8");
403
- await writeFile(join(plansDir, todoFile), todo, "utf-8");
404
-
405
- // Notify plan created
406
- const notify = (msg: string, type: "info" | "error" | "warning") => {
407
- // Use onUpdate for streaming progress to the user
408
- if (_onUpdate) {
409
- _onUpdate({ content: [{ type: "text", text: msg }] });
410
- }
411
- };
412
-
413
- notify(`📋 Plan created: **${p.title}** (${p.tasks.length} tasks)\nNow executing with sub-agents...`, "info");
414
-
415
- // Auto-execute all tasks
416
- // Build spec summary for shared context
417
- const specSummary = [
418
- `Goals: ${p.goals.join("; ")}`,
419
- `Requirements: ${p.requirements.join("; ")}`,
420
- p.architecture?.length ? `Architecture: ${p.architecture.join("; ")}` : "",
421
- p.constraints?.length ? `Constraints: ${p.constraints.join("; ")}` : "",
422
- ].filter(Boolean).join("\n");
423
-
424
- const { results, progressFile, megaPrompt } = await executePlan(
425
- p.tasks, todoFile, notify,
426
- { title: p.title, description: p.description, specSummary },
427
- );
428
-
429
- const header = `**📋 Project "${p.title}" — ${p.tasks.length} tasks planned!**\n` +
430
- `Plan: \`${specFile}\`, \`${todoFile}\` | Progress: \`${progressFile}\`\n\n` +
431
- `---\n\n`;
432
-
433
- // Return the mega-prompt as tool result
434
- // The LLM sees this and executes all tasks in its current turn
435
- return {
436
- content: [{ type: "text", text: header + megaPrompt }],
437
- details: {
438
- specFile, todoFile, progressFile,
439
- taskCount: p.tasks.length,
440
- },
441
- };
442
- } catch (error) {
443
- return {
444
- content: [{ type: "text", text: `Orchestration failed: ${error}` }],
445
- details: { error: String(error) },
446
- };
447
- }
448
- },
449
- });
450
-
451
38
  // ─── Orchestration State ─────────────────────────────────────────
452
39
 
453
40
  interface AgentDef {
@@ -471,6 +58,10 @@ export default function orchestratorExtension(pi: ExtensionAPI) {
471
58
  let activeAgentTools: string[] | null = null;
472
59
  let savedTools: string[] | null = null;
473
60
  let phasePending = false; // true while waiting for a phase to complete
61
+ let phaseTimeoutId: ReturnType<typeof setTimeout> | null = null;
62
+ const MAX_PHASE_DURATION_MS = 10 * 60 * 1000; // 10 minutes per phase
63
+ const MAX_TOOL_CALLS_PER_PHASE = 60; // Safety limit
64
+ let phaseStartTime: number | null = null;
474
65
 
475
66
  /**
476
67
  * Parse agent .md file with YAML frontmatter
@@ -541,6 +132,9 @@ export default function orchestratorExtension(pi: ExtensionAPI) {
541
132
  **Project Request:** ${description}
542
133
 
543
134
  **Your tasks:**
135
+
136
+ **Parallelization:** When making multiple tool calls that don't depend on each other (e.g., memory_search + ontology_query, or reading 2+ files), call them IN PARALLEL in the same response. This is faster.
137
+
544
138
  1. Call \`memory_search\` with project-relevant keywords (MANDATORY)
545
139
  2. List all existing files and read key ones
546
140
  3. Identify tech stack, patterns, and constraints
@@ -555,6 +149,7 @@ export default function orchestratorExtension(pi: ExtensionAPI) {
555
149
  **LAST ACTION (MANDATORY):** Call \`memory_write\` to save your exploration findings for downstream agents.
556
150
 
557
151
  **Knowledge Graph:**
152
+ // TODO: ontology_batch_add for reducing API calls (currently single-item only)
558
153
  After your analysis, use \`ontology_add\` to save key project entities AND their relations:
559
154
  - Add entities for: the project, each major library, each module/directory
560
155
  - Add relations between them: "uses", "contains", "depends_on", "implements"
@@ -721,6 +316,8 @@ After implementation, use \`memory_write\` to save a summary of what was built,
721
316
  - On Linux/Mac fallback: \`lsof -ti:PORT | xargs kill -9\`
722
317
  - Always clean up after tests: kill background processes, remove temp files
723
318
 
319
+ **Anti-loop rule:** If the SAME test fails 3 times in a row with the same error after your fixes, STOP trying to fix it. Write the failure in your test report as "UNRESOLVED" and move on. Do not waste more than 3 iterations on the same issue.
320
+
724
321
  After testing, use \`memory_write\` to save test results, bugs found, and lessons learned.` + runtimeInfo,
725
322
  },
726
323
  {
@@ -780,7 +377,12 @@ After your review, use \`memory_write\` ONCE to save:
780
377
  - Common mistakes to avoid in future projects
781
378
  Tag the note with relevant keywords for vector search.
782
379
 
783
- **Important:** Write lessons-learned ONCE. Do not call memory_write twice with the same filename or duplicate content.` + runtimeInfo,
380
+ **Important:** Write lessons-learned ONCE. Do not call memory_write twice with the same filename or duplicate content.
381
+
382
+ **Ontology enrichment:** After your review, use \`ontology_add\` to save your key findings:
383
+ - Add a "review-report" entity with type "Document"
384
+ - Add relations to the project: "reviews" → project, quality score as entity property
385
+ - Save any new architectural decisions or patterns discovered` + runtimeInfo,
784
386
  },
785
387
  ];
786
388
  }
@@ -852,6 +454,16 @@ Tag the note with relevant keywords for vector search.
852
454
  setOrchestrationActive(false);
853
455
  phasePending = false;
854
456
  deactivateAgent();
457
+ if (phaseTimeoutId) { clearTimeout(phaseTimeoutId); phaseTimeoutId = null; }
458
+ // Generate global final summary
459
+ const totalPhases = 5; // always 5
460
+ const elapsed = phaseStartTime ? Math.round((Date.now() - phaseStartTime) / 1000) : 0;
461
+ const minutes = Math.floor(elapsed / 60);
462
+ const seconds = elapsed % 60;
463
+ ctx.ui.notify(`\n📊 **Orchestration Summary**\n` +
464
+ ` Phases: ${totalPhases}/5 completed\n` +
465
+ ` Duration: ${minutes}m ${seconds}s\n` +
466
+ ` Check \`.phi/plans/\` for all reports`, "info");
855
467
  try {
856
468
  ctx.ui.notify(`\n✅ **All 5 phases complete!**`, "info");
857
469
  } catch {
@@ -870,6 +482,15 @@ Tag the note with relevant keywords for vector search.
870
482
  ctx.ui.notify(`\n${phase.label} → \`${modelId}\` (agent: ${agentName})`, "info");
871
483
  // Small delay to let the model switch settle, then send instruction
872
484
  setTimeout(() => pi.sendUserMessage(phase.instruction), 500);
485
+ // Set phase timeout — abort if phase takes too long
486
+ if (phaseTimeoutId) clearTimeout(phaseTimeoutId);
487
+ phaseTimeoutId = setTimeout(() => {
488
+ if (orchestrationActive && phasePending) {
489
+ ctx.ui.notify(`\n⏰ **Phase timed out** (${MAX_PHASE_DURATION_MS / 60000} min limit). Skipping to next phase.`, "warning");
490
+ phasePending = false;
491
+ sendNextPhase(ctx);
492
+ }
493
+ }, MAX_PHASE_DURATION_MS);
873
494
  });
874
495
  }
875
496
 
@@ -902,6 +523,9 @@ Tag the note with relevant keywords for vector search.
902
523
  return;
903
524
  }
904
525
 
526
+ // Clear phase timeout on normal completion
527
+ if (phaseTimeoutId) { clearTimeout(phaseTimeoutId); phaseTimeoutId = null; }
528
+
905
529
  // Build a structured summary of what happened in this phase
906
530
  // Instead of raw LLM text, extract concrete actions: files created/modified,
907
531
  // errors encountered, test results. This gives the next phase actionable context.
@@ -930,8 +554,12 @@ Tag the note with relevant keywords for vector search.
930
554
  const match = content.match(/edited (.+)/) || content.match(/in (.+)/);
931
555
  if (match) filesEdited.push(match[1]);
932
556
  }
933
- // Track errors
934
- if (content.includes('ERR:') || content.includes('Error:') || content.includes('FAIL')) {
557
+ // Track errors — but filter out edit retries (old_text mismatch = normal retry, not error)
558
+ if ((content.includes('ERR:') || content.includes('Error:') || content.includes('FAIL'))
559
+ && !content.includes('old text must match')
560
+ && !content.includes('The old text')
561
+ && !content.includes('oldText not found')
562
+ && !content.includes('old_text not found')) {
935
563
  const preview = content.slice(0, 150).replace(/\n/g, ' ');
936
564
  errorsHit.push(`${name}: ${preview}`);
937
565
  }
@@ -943,13 +571,42 @@ Tag the note with relevant keywords for vector search.
943
571
  }
944
572
  }
945
573
 
574
+ // Detect API errors (401, auth failures) — abort workflow if found
575
+ const hasAuthError = messages.some((msg: any) => {
576
+ const content = typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content || '');
577
+ return content.includes('401') && (content.includes('invalid access token') || content.includes('token expired') || content.includes('Unauthorized'));
578
+ });
579
+ if (hasAuthError || (toolCallCount === 0 && messages.length > 0)) {
580
+ const errorMsg = hasAuthError ? 'API authentication error (401)' : 'Phase produced 0 tool calls — possible API or model error';
581
+ ctx.ui.notify(`\n❌ **Orchestrator aborted:** ${errorMsg}\nCheck your API key and model configuration.`, "error");
582
+ setOrchestrationActive(false);
583
+ phasePending = false;
584
+ deactivateAgent();
585
+ if (phaseTimeoutId) { clearTimeout(phaseTimeoutId); phaseTimeoutId = null; }
586
+ return;
587
+ }
588
+
946
589
  // Build the summary
947
590
  const summaryParts: string[] = [];
948
591
  summaryParts.push(`Tool calls: ${toolCallCount}`);
592
+ // Anti-loop guard: warn if tool calls are excessive
593
+ if (toolCallCount > MAX_TOOL_CALLS_PER_PHASE) {
594
+ summaryParts.push(`⚠️ WARNING: Phase used ${toolCallCount} tool calls (limit: ${MAX_TOOL_CALLS_PER_PHASE}). Possible loop detected.`);
595
+ }
949
596
  if (filesWritten.length > 0) summaryParts.push(`Files created/written: ${filesWritten.join(', ')}`);
950
597
  if (filesEdited.length > 0) summaryParts.push(`Files edited: ${filesEdited.join(', ')}`);
951
598
  if (testResults.length > 0) summaryParts.push(`Test results:\n${testResults.join('\n')}`);
952
599
  if (errorsHit.length > 0) summaryParts.push(`Errors encountered: ${errorsHit.length}\n${errorsHit.slice(0, 5).join('\n')}`);
600
+
601
+ // Verify mandatory tool usage
602
+ const toolNames = messages
603
+ .filter((m: any) => m.role === 'tool' || m.role === 'function' || m.role === 'toolResult')
604
+ .map((m: any) => (m as any).name || (m as any).toolName || '');
605
+ const hasMemorySearch = toolNames.includes('memory_search');
606
+ const hasMemoryWrite = toolNames.includes('memory_write');
607
+ if (!hasMemorySearch) summaryParts.push(`⚠️ Phase did NOT call memory_search (mandatory)`);
608
+ if (!hasMemoryWrite) summaryParts.push(`⚠️ Phase did NOT call memory_write (mandatory)`);
609
+
953
610
  const phaseSummary = summaryParts.join('\n');
954
611
 
955
612
  // Inject structured summary into next phase
@@ -1005,6 +662,8 @@ Tag the note with relevant keywords for vector search.
1005
662
  }
1006
663
  ctx.ui.notify("", "info");
1007
664
 
665
+ // Record orchestration start time for final summary
666
+ phaseStartTime = Date.now();
1008
667
  // Switch model and activate agent for first phase
1009
668
  const modelId = await switchModelForPhase(firstPhase, ctx);
1010
669
  activateAgent(firstPhase, ctx);
@@ -1017,61 +676,12 @@ Tag the note with relevant keywords for vector search.
1017
676
  // ─── /run Command — Re-execute existing plan ─────────────────────
1018
677
 
1019
678
  pi.registerCommand("run", {
1020
- description: "Re-execute an existing plan's tasks with sub-agents",
1021
- handler: async (args, ctx) => {
1022
- if (!existsSync(plansDir)) {
1023
- ctx.ui.notify("No plans found. Use `/plan <description>` to create and execute one.", "warning");
1024
- return;
1025
- }
1026
-
1027
- const files = (await readdir(plansDir)).sort().reverse();
1028
- const todoFiles = files.filter(f => f.startsWith("todo-") && f.endsWith(".md"));
1029
-
1030
- if (todoFiles.length === 0) {
1031
- ctx.ui.notify("No todo files found. Use `/plan <description>` first.", "warning");
1032
- return;
1033
- }
1034
-
1035
- const todoFile = todoFiles[0];
1036
- const todoContent = await readFile(join(plansDir, todoFile), "utf-8");
1037
-
1038
- // Parse tasks
1039
- const tasks: TaskDef[] = [];
1040
- const sections = todoContent.split(/## Task \d+:/);
1041
- for (let i = 1; i < sections.length; i++) {
1042
- const section = sections[i];
1043
- const titleMatch = section.match(/^(.+?)(?:\s*🔴|\s*🟡|\s*🟢)/);
1044
- const agentMatch = section.match(/\[(\w+)\]/);
1045
- const descMatch = section.match(/- \[ \] (.+)/);
1046
- const subtasks: string[] = [];
1047
- const stMatches = section.matchAll(/ - \[ \] (.+)/g);
1048
- for (const m of stMatches) subtasks.push(m[1]);
1049
-
1050
- if (titleMatch && descMatch) {
1051
- tasks.push({
1052
- title: titleMatch[1].trim(),
1053
- agent: agentMatch?.[1] || "code",
1054
- description: descMatch[1].trim(),
1055
- subtasks: subtasks.length > 0 ? subtasks : undefined,
1056
- });
1057
- }
1058
- }
1059
-
1060
- if (tasks.length === 0) {
1061
- ctx.ui.notify("Could not parse tasks from todo file.", "error");
1062
- return;
1063
- }
1064
-
1065
- const confirmed = await ctx.ui.confirm(
1066
- "Re-execute Plan",
1067
- `${tasks.length} tasks found in \`${todoFile}\`.\nEach will spawn an isolated sub-agent.\n\nProceed?`
679
+ description: "Re-execute an existing plan (deprecated use /plan instead)",
680
+ handler: async (_args, ctx) => {
681
+ ctx.ui.notify(
682
+ "⚠️ `/run` is deprecated. Use `/plan <description>` to create and execute a new plan with the 5-phase orchestrator.",
683
+ "warning",
1068
684
  );
1069
- if (!confirmed) {
1070
- ctx.ui.notify("Cancelled.", "info");
1071
- return;
1072
- }
1073
-
1074
- await executePlan(tasks, todoFile, (msg, type) => ctx.ui.notify(msg, type));
1075
685
  },
1076
686
  });
1077
687
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@phi-code-admin/phi-code",
3
- "version": "0.72.0",
3
+ "version": "0.74.0",
4
4
  "description": "Coding agent CLI with read, bash, edit, write tools and session management",
5
5
  "type": "module",
6
6
  "piConfig": {