npm - @aexol/spectral - Versions diffs - 0.6.4 → 0.6.9 - Mend

@aexol/spectral 0.6.4 → 0.6.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/dist/cli.js +8 -1
package/dist/commands/serve.js +1 -0
package/dist/extensions/aexol-mcp.js +16 -1
package/dist/mcp/tool-registrar.js +18 -2
package/dist/relay/auto-research.js +631 -445
package/dist/relay/dispatcher.js +5 -7
package/dist/relay/models-fetch.js +5 -1
package/dist/server/pi-bridge.js +35 -11
package/dist/server/session-stream.js +10 -2
package/package.json +1 -1

package/dist/relay/auto-research.js CHANGED Viewed

@@ -1,530 +1,716 @@
 /**
- * Auto-research handler — spawns an isolated pi subprocess to analyze a
- * project and generate custom extensions.
+ * Auto-research handler — sends an auto-research task through the existing
+ * PiBridge (backend proxy) instead of spawning a separate pi process.
  *
- * Flow:
- *   1. Resolve project path from the SQLite store
- *   2. Load the auto-research agent definition (system prompt + model)
- *   3. Write system prompt to a temp file (--append-system-prompt)
- *   4. Spawn pi with --mode json -p --no-session --model <model> --append-system-prompt <tmp>
- *   5. Pass the user task as a positional argument ("Task: ...")
- *   6. Parse pi's JSON-line output: watch for message_end events on assistant
- *      messages, extract the text, and interpret it as auto-research events
- *      (progress / extension_generated / done / error)
- *   7. Stream progress via the relay to the browser
- *   8. On completion, emit `auto_research_complete` with generated extensions
+ * This ensures auto-research uses the same model and API keys as the active
+ * session — no separate subprocess, no missing API key errors.
  *
- * This mirrors the subagent extension's spawn pattern (agent/index.ts) so
- * pi receives the task and system prompt in the same format it expects.
+ * Flow:
+ *   1. Build the auto-research task prompt
+ *   2. Ensure session subscriber exists (borrows handleClientMessage pattern)
+ *   3. Emit auto_research_start via relay
+ *   4. Send task via manager.prompt() — goes through backend proxy with session's model
+ *   5. Attach a watcher subscriber to detect turn completion
+ *   6. On agent_end, scan for generated extensions and emit auto_research_complete
+ *   7. On error, emit auto_research_error
  */
-import { spawn } from "node:child_process";
 import * as fs from "node:fs";
-import * as os from "node:os";
 import * as path from "node:path";
-import { parseFrontmatter } from "@mariozechner/pi-coding-agent";
+import { execSync } from "node:child_process";
+// ---------------------------------------------------------------------------
+// State
+// ---------------------------------------------------------------------------
+/** Tracks sessions with an active auto-research turn (prevents double-runs). */
+const activeAutoResearchSessions = new Set();
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
 /**
- * Locate the auto-research agent definition markdown file.
- * Searches project-level first, then user-level.
+ * Send a ServerEvent to the browser via the relay on the auto-research session.
  */
-function findAgentDef(projectPath) {
-    const candidates = [
-        path.join(projectPath, ".pi", "agents", "auto-research.md"),
-        path.join(os.homedir(), ".pi", "agent", "agents", "auto-research.md"),
-    ];
-    for (const filePath of candidates) {
-        try {
-            const content = fs.readFileSync(filePath, "utf-8");
-            const { frontmatter, body } = parseFrontmatter(content);
-            if (body.trim().length === 0)
-                continue;
-            return {
-                model: frontmatter.model ?? "claude-sonnet-4-5",
-                systemPrompt: body,
-            };
-        }
-        catch {
-            continue;
-        }
-    }
-    // Hardcoded fallback: system prompt for when the agent definition file
-    // is not found in the project or user agent directories. This ensures
-    // auto-research works out of the box on first use.
-    return getDefaultAgentDef();
+function sendEvent(relay, sessionId, event) {
+    relay.send({ kind: "ws_event", sessionId, event });
 }
-/** Default agent definition used as a fallback when no agent .md file exists. */
-function getDefaultAgentDef() {
+/**
+ * Build a Subscriber that wraps each ServerEvent in a WsEventFrame
+ * and pushes it through the relay. Mirrors `makeRelaySubscriber` in
+ * dispatcher.ts but defined here to keep auto-research self-contained.
+ */
+function makeRelaySubscriber(sessionId, relay) {
     return {
-        model: "claude-sonnet-4-5",
-        systemPrompt: [
-            "You are an auto-research agent. Analyze the project and generate custom",
-            "pi coding agent extensions. Output ONLY JSON lines (one per line).",
-            "",
-            "## Process",
-            '1. Context: emit {"type":"progress","phase":"context_collecting","message":"..."}',
-            '2. Analysis: emit {"type":"progress","phase":"context_analyzing","message":"..."}',
-            '3. Generation: emit {"type":"progress","phase":"extension_generating","message":"..."}',
-            '4. Validation: emit {"type":"progress","phase":"extension_validating","message":"..."}',
-            "",
-            "## Extension categories",
-            "A. Workflow automation  B. Code gen  C. Project-specific tools",
-            "D. Quality/review  E. Documentation  F. LLM-powered  G. Stateful",
-            "",
-            'When you generate an extension, emit:',
-            '{"type":"extension_generated","name":"...","path":"...","description":"...","usesLLM":bool,"fileCount":n}',
-            "Extensions go under .pi/extensions/auto-research/",
-            "",
-            'When done, emit: {"type":"done","extensions":[...]}',
-            "",
-            "IMPORTANT: Output ONLY JSON lines. No markdown, no code blocks.",
-            "Each line must be a single valid JSON object.",
-        ].join("\n"),
+        send(event) {
+            relay.send({ kind: "ws_event", sessionId, event });
+        },
+        isOpen() {
+            return true;
+        },
     };
 }
-// ---------------------------------------------------------------------------
-// Helpers
-// ---------------------------------------------------------------------------
 /**
- * Detect the pi/spectral binary for spawning subprocesses.
- * Mirrors the logic in `agent/index.ts#getPiInvocation`.
+ * Scan the project's .pi/extensions/auto-research/ directory for generated
+ * extension directories. Each subdirectory with .ts files counts as one
+ * extension.
  */
-function getPiInvocation(subagentArgs) {
-    const currentScript = process.argv[1];
-    // Case 1: running via tsx with an existing script file
-    if (currentScript && fs.existsSync(currentScript)) {
-        return { command: process.execPath, args: [currentScript, ...subagentArgs] };
-    }
-    // Case 2: check if this is the spectral/aexol wrapper binary
-    const execName = path.basename(process.execPath).toLowerCase();
-    if (execName === "spectral" || execName === "aexol") {
-        return { command: process.execPath, args: subagentArgs };
-    }
-    // Case 3: generic node — try spectral first, then pi
-    function hasBin(name) {
-        const PATH = process.env.PATH || "";
-        const envPathSep = process.platform === "win32" ? ";" : ":";
-        for (const dir of PATH.split(envPathSep)) {
-            const candidate = path.join(dir, name);
+function scanGeneratedExtensions(projectPath) {
+    const arDir = path.join(projectPath, ".pi", "extensions", "auto-research");
+    const extensions = [];
+    try {
+        if (!fs.existsSync(arDir))
+            return extensions;
+        for (const entry of fs.readdirSync(arDir, { withFileTypes: true })) {
+            if (!entry.isDirectory())
+                continue;
+            const extPath = path.join(arDir, entry.name);
+            let fileCount = 0;
+            let usesLLM = false;
             try {
-                if (fs.existsSync(candidate))
-                    return true;
+                const files = fs.readdirSync(extPath);
+                for (const f of files) {
+                    if (f.endsWith(".ts") || f.endsWith(".js")) {
+                        fileCount++;
+                        // Quick heuristic: if any file references modelRegistry or setModel,
+                        // flag the extension as LLM-powered.
+                        if (!usesLLM) {
+                            try {
+                                const content = fs.readFileSync(path.join(extPath, f), "utf-8");
+                                if (content.includes("modelRegistry") ||
+                                    content.includes("setModel") ||
+                                    content.includes("registerProvider")) {
+                                    usesLLM = true;
+                                }
+                            }
+                            catch {
+                                /* skip */
+                            }
+                        }
+                    }
+                }
+            }
+            catch {
+                /* skip */
+            }
+            // Extract description from index.ts if present
+            let description = `${entry.name} extension`;
+            try {
+                const indexPath = path.join(extPath, "index.ts");
+                if (fs.existsSync(indexPath)) {
+                    const content = fs.readFileSync(indexPath, "utf-8");
+                    const descMatch = content.match(/description:\s*["']([^"']+)["']/);
+                    if (descMatch)
+                        description = descMatch[1];
+                }
             }
             catch {
                 /* skip */
             }
+            extensions.push({
+                name: entry.name,
+                path: `.pi/extensions/auto-research/${entry.name}`,
+                description,
+                usesLLM,
+                fileCount,
+            });
         }
-        return false;
     }
-    if (hasBin("spectral")) {
-        return { command: "spectral", args: subagentArgs };
+    catch {
+        /* skip */
     }
-    if (hasBin("pi")) {
-        return { command: "pi", args: subagentArgs };
+    return extensions;
+}
+/** Check if AGENTS.md contains the auto-research marker section. */
+function hasAgentsMdUpdate(projectPath) {
+    try {
+        const agentsPath = path.join(projectPath, "AGENTS.md");
+        if (!fs.existsSync(agentsPath))
+            return false;
+        const content = fs.readFileSync(agentsPath, "utf-8");
+        return content.includes("<!-- AUTO-RESEARCH:START -->") &&
+            content.includes("<!-- AUTO-RESEARCH:END -->");
+    }
+    catch {
+        return false;
     }
-    return { command: "pi", args: subagentArgs };
 }
 /**
- * Send a ServerEvent to the browser via the relay on the auto-research session.
+ * Read the auto-research manifest from disk. Returns null if it doesn't exist
+ * or can't be parsed.
  */
-function sendEvent(deps, sessionId, event) {
-    deps.relay.send({
-        kind: "ws_event",
-        sessionId,
-        event,
-    });
+function readManifest(projectPath) {
+    try {
+        const mPath = path.join(projectPath, ".pi", "extensions", "auto-research", "manifest.json");
+        if (!fs.existsSync(mPath))
+            return null;
+        const raw = fs.readFileSync(mPath, "utf-8");
+        const parsed = JSON.parse(raw);
+        if (typeof parsed.lastRun !== "string" || typeof parsed.lastCommit !== "string" || typeof parsed.runCount !== "number") {
+            return null;
+        }
+        return {
+            lastRun: parsed.lastRun,
+            lastCommit: parsed.lastCommit,
+            runCount: parsed.runCount,
+            extensions: Array.isArray(parsed.extensions) ? parsed.extensions : [],
+        };
+    }
+    catch {
+        return null;
+    }
 }
 /**
- * Safely kill a child process. Best-effort — errors are silently swallowed.
+ * Gather pre-run context for incremental auto-research.
  */
-function killProcess(child) {
+function gatherPreRunContext(projectPath) {
+    const manifest = readManifest(projectPath);
+    const isIncremental = manifest !== null;
+    const existingExtensions = [];
     try {
-        if (!child.killed && child.exitCode === null) {
-            child.kill("SIGTERM");
-            setTimeout(() => {
-                try {
-                    if (!child.killed && child.exitCode === null) {
-                        child.kill("SIGKILL");
-                    }
-                }
-                catch {
-                    // ignore
+        const arDir = path.join(projectPath, ".pi", "extensions", "auto-research");
+        if (fs.existsSync(arDir)) {
+            for (const entry of fs.readdirSync(arDir, { withFileTypes: true })) {
+                if (entry.isDirectory() && entry.name !== "node_modules") {
+                    existingExtensions.push(entry.name);
                 }
-            }, 2000);
+            }
         }
     }
-    catch {
-        // ignore
+    catch { /* skip */ }
+    let changesSinceLastRun = null;
+    if (manifest?.lastCommit) {
+        try {
+            const diffOutput = execSync(`git diff --stat ${manifest.lastCommit}..HEAD`, {
+                cwd: projectPath, encoding: "utf-8", timeout: 5000, maxBuffer: 64 * 1024,
+            }).trim();
+            if (diffOutput)
+                changesSinceLastRun = diffOutput;
+        }
+        catch { /* git not available */ }
     }
+    return { isIncremental, manifest, changesSinceLastRun, existingExtensions };
 }
-// ---------------------------------------------------------------------------
-// Phase mapping
-// ---------------------------------------------------------------------------
-const VALID_PHASES = new Set([
-    "context_collecting",
-    "context_analyzing",
-    "extension_generating",
-    "extension_validating",
-]);
-function mapPhase(agentPhase) {
-    if (VALID_PHASES.has(agentPhase)) {
-        return agentPhase;
+/**
+ * Build the incremental mode section for the task prompt.
+ */
+function buildIncrementalSection(ctx) {
+    const lines = [];
+    lines.push("## Incremental Run — Context from Previous Run", "");
+    if (ctx.manifest) {
+        const prevExts = ctx.manifest.extensions
+            .map((e) => `  - **${e.name}** (${e.path})`)
+            .join("\n");
+        lines.push(`Previously generated extensions (run #${ctx.manifest.runCount} at ${ctx.manifest.lastRun}):`);
+        lines.push(prevExts || "  (none)", "");
+    }
+    if (ctx.changesSinceLastRun) {
+        lines.push("### What changed since last run:", "```", ctx.changesSinceLastRun, "```", "");
+    }
+    else {
+        lines.push("> Note: Could not determine git diff since last run.", "");
+    }
+    if (ctx.existingExtensions.length > 0) {
+        lines.push("### Existing extensions to review:");
+        for (const name of ctx.existingExtensions) {
+            lines.push(`  - \`.pi/extensions/auto-research/${name}/\``);
+        }
+        lines.push("");
+    }
+    lines.push("### Incremental Instructions", "");
+    lines.push("You are running auto-research again on a project that has been analyzed before.");
+    lines.push("DO NOT start from scratch. Instead:", "");
+    lines.push("1. **Review existing extensions** — Read each existing extension's index.ts.");
+    lines.push("   KEEP extensions that are still relevant. UPDATE or DELETE obsolete ones.", "");
+    lines.push("2. **Focus on changes** — Only create NEW extensions for project areas that");
+    lines.push("   changed since last run (see git diff above).", "");
+    lines.push("3. **Clean up stale extensions** — If an extension targets a tool/library that");
+    lines.push("   was removed from the project, delete the extension directory entirely.", "");
+    lines.push("4. **Update AGENTS.md** — The AUTO-RESEARCH section should reflect the CURRENT");
+    lines.push("   set of extensions (remove stale entries, add new ones).", "");
+    lines.push("5. **Save manifest.json** — Write/update .pi/extensions/auto-research/manifest.json");
+    lines.push(`   with: lastRun (ISO), lastCommit (git HEAD), runCount (${ctx.manifest ? ctx.manifest.runCount + 1 : 1}),`);
+    lines.push("   and extensions array with name/path/category for each generated extension.", "");
+    return lines.join("\n");
+}
+/**
+ * Write/update the auto-research manifest after completion.
+ */
+function writeManifest(projectPath, extensions) {
+    try {
+        const arDir = path.join(projectPath, ".pi", "extensions", "auto-research");
+        if (!fs.existsSync(arDir))
+            fs.mkdirSync(arDir, { recursive: true });
+        let currentCommit = "unknown";
+        try {
+            currentCommit = execSync("git rev-parse HEAD", {
+                cwd: projectPath, encoding: "utf-8", timeout: 3000,
+            }).trim();
+        }
+        catch { /* git not available */ }
+        const prev = readManifest(projectPath);
+        const manifest = {
+            lastRun: new Date().toISOString(),
+            lastCommit: currentCommit,
+            runCount: (prev?.runCount ?? 0) + 1,
+            extensions: extensions.map((e) => ({ name: e.name, path: e.path, category: e.usesLLM ? "F" : undefined })),
+        };
+        fs.writeFileSync(path.join(arDir, "manifest.json"), JSON.stringify(manifest, null, 2) + "\n", "utf-8");
     }
-    return "context_analyzing";
+    catch { /* best-effort */ }
+}
+/**
+ * Build the auto-research task prompt. This is sent as a user message
+ * through the existing PiBridge, so the agent uses the session's model
+ * and backend proxy.
+ *
+ * @param projectPath Absolute path to the project root
+ * @param projectName Human-readable project name
+ * @param preRunContext Incremental context — if null/!incremental, runs in full-scan mode
+ */
+function buildAutoResearchTask(projectPath, projectName, preRunContext = null) {
+    const isIncremental = preRunContext?.isIncremental ?? false;
+    const runInfo = isIncremental && preRunContext?.manifest
+        ? `This is run #${preRunContext.manifest.runCount + 1}. Prior run was at ${preRunContext.manifest.lastRun}.`
+        : "This is the first auto-research run for this project.";
+    const incrementalHeader = isIncremental ? buildIncrementalSection(preRunContext) : "";
+    return [
+        "## Auto-Research Task",
+        "",
+        `Project: **${projectPath}** — **${projectName}**`,
+        "",
+        `> ${runInfo}`,
+        "",
+        incrementalHeader,
+        "---",
+        "",
+        "## Pi Extension API Reference (HARD-CODED — DO NOT RESEARCH)",
+        "",
+        "You are running inside **pi**, a coding agent harness. You do NOT need to research",
+        "what pi is or how it works — this knowledge is provided here. Jump straight to",
+        "analyzing the project and generating extensions.",
+        "",
+        "### Extension Entry Point",
+        "",
+        "Every extension is a TypeScript file exporting a single default `activate` function:",
+        "",
+        "```typescript",
+        "import type { ExtensionAPI } from \"@mariozechner/pi-coding-agent\";",
+        "export default function activate(pi: ExtensionAPI): void {",
+        "  // Register tools, commands, event handlers here",
+        "}",
+        "```",
+        "",
+        "Extensions live in: `.pi/extensions/` (project-local) or `~/.pi/agent/extensions/` (user-global).",
+        "Auto-research generates extensions into `.pi/extensions/auto-research/<name>/`.",
+        "",
+        "### Tools (pi.registerTool)",
+        "",
+        "The primary extension mechanism. Tools become available to the agent during conversations.",
+        "",
+        "```typescript",
+        "import { Type } from \"typebox\";  // Built-in, zero-dependency schema validation",
+        "",
+        "pi.registerTool({",
+        "  name: \"tool_name\",                // snake_case, unique",
+        "  description: \"What this tool does\", // Used by the agent to decide when to call",
+        "  parameters: Type.Object({           // TypeBox schema for typed params",
+        "    input: Type.String({ description: \"The input to process\" }),",
+        "    // Optional params: Type.Optional(Type.String())",
+        "  }),",
+        "  handler: async (params, ctx) => {",
+        "    // ctx.session?.cwd — current working directory",
+        "    // ctx.bash(cmd) — run shell commands",
+        "    // ctx.read(path) — read files",
+        "    // ctx.write(path, content) — write files",
+        "    return {",
+        "      content: [{ type: \"text\", text: \"Result string\" }],",
+        "      // isError: true  // set this to signal failure",
+        "    };",
+        "  },",
+        "});",
+        "```",
+        "",
+        "### Commands (pi.registerCommand)",
+        "",
+        "Custom slash commands the user can type:",
+        "",
+        "```typescript",
+        "pi.registerCommand({",
+        "  name: \"my-command\",",
+        "  description: \"What /my-command does\",",
+        "  execute: async (args: string[], ctx) => {",
+        "    return { content: [{ type: \"text\", text: \"Done\" }] };",
+        "  },",
+        "});",
+        "```",
+        "",
+        "### Event Interception (pi.on)",
+        "",
+        "React to agent lifecycle events. Available events: `tool_call` (before a tool runs),",
+        "`session_start`, `session_shutdown`, `context` (prompt assembly), `before_agent_start`.",
+        "",
+        "```typescript",
+        "pi.on(\"tool_call\", (event) => {",
+        "  // event.name — tool being called",
+        "  // event.params — tool arguments",
+        "  // Can modify or observe tool calls",
+        "});",
+        "```",
+        "",
+        "### Model Access (LLM-powered extensions)",
+        "",
+        "Extensions can call LLMs to build AI-powered features:",
+        "",
+        "```typescript",
+        "// Discover available models",
+        "const models = pi.modelRegistry.list();",
+        "const model = pi.modelRegistry.find(\"claude-sonnet-4-5\");",
+        "",
+        "// Switch the active model for the current turn",
+        "const switched = pi.setModel(model.id);  // returns false if no API key",
+        "",
+        "// For complex multi-step AI pipelines, spawn a subagent:",
+        "// (see subagent extension pattern below)",
+        "```",
+        "",
+        "### Available Packages (npm dependencies)",
+        "",
+        "Extensions run as Node.js TypeScript modules. These packages are available:",
+        "- `@mariozechner/pi-coding-agent` — ExtensionAPI type",
+        "- `@mariozechner/pi-ai` — AI types and utilities",
+        "- `typebox` — Zero-dependency runtime type validation",
+        "- All `node:*` built-ins: `fs`, `path`, `child_process`, `os`, `crypto`, etc.",
+        "- Any npm package already in the project's node_modules",
+        "",
+        "```typescript",
+        "import * as fs from \"node:fs\";",
+        "import * as path from \"node:path\";",
+        "import { execSync } from \"node:child_process\";",
+        "```",
+        "",
+        "### Pi's Built-in Tools (what the agent already has)",
+        "",
+        "The agent already has these tools — do NOT reimplement them:",
+        "- `read` — Read file contents",
+        "- `write` — Create/overwrite files",
+        "- `edit` — Precise text replacement in files",
+        "- `bash` — Execute shell commands",
+        "- `grep` — Search file contents with regex",
+        "- `find` — Search files by name/pattern",
+        "- `ls` — List directory contents",
+        "- `recall` — Recall compressed memory observations",
+        "- `mcp` — MCP server gateway (if MCP extension is active)",
+        "",
+        "### Extension File Structure",
+        "",
+        "Each extension is a directory under `.pi/extensions/auto-research/`:",
+        "",
+        "```",
+        ".pi/extensions/auto-research/",
+        "  <extension-name>/",
+        "    index.ts          # Entry point — default export activate(pi)",
+        "    utils.ts          # [optional] Helper functions",
+        "    state.json        # [optional] Persisted state (for stateful extensions)",
+        "```",
+        "",
+        "### Handler Return Types",
+        "",
+        "All handlers (tools, commands) return content blocks:",
+        "",
+        "```typescript",
+        "// Success:",
+        "return { content: [{ type: \"text\", text: \"Result\" }] };",
+        "",
+        "// Error:",
+        "return {",
+        "  content: [{ type: \"text\", text: \"Error message\" }],",
+        "  isError: true,",
+        "};",
+        "```",
+        "",
+        "### Common Extension Patterns",
+        "",
+        "1. **Shell wrapper** — Use `ctx.bash()` inside a tool handler to run commands",
+        "2. **File processor** — Use `ctx.read()`/`ctx.write()` to process project files",
+        "3. **LLM-powered** — Use `pi.modelRegistry` to call AI models for smart processing",
+        "4. **Event listener** — Use `pi.on()` to react to agent lifecycle events",
+        "5. **Stateful** — Use `node:fs` to persist state as JSON between calls",
+        "6. **Subagent** — Spawn pi subprocesses for isolated analysis tasks",
+        "",
+        "---",
+        "",
+        "## Process",
+        "",
+        "1. **Context Collection** — Explore the project structure:",
+        "   - Read package.json, tsconfig.json, deno.json (if present)",
+        "   - Check existing extensions under .pi/extensions/",
+        "   - Review key source files to understand architecture",
+        "   - Check git log for recent changes and patterns",
+        "",
+        "2. **Analysis** — Identify automation opportunities:",
+        "   - What repetitive tasks do developers perform?",
+        "   - What project-specific tools or commands would help?",
+        "   - Are there code patterns that could be automated?",
+        "   - Could LLM-powered extensions improve developer workflows?",
+        "",
+        "3. **Extension Generation** — Create extension .ts files:",
+        "   - Use `pi.registerTool()` for simple tools with TypeBox validation",
+        "   - Use `pi.registerCommand()` for custom slash commands",
+        "   - For LLM-powered extensions, use `ctx.modelRegistry` to call models",
+        "   - Create files under `.pi/extensions/auto-research/<name>/`",
+        "   - Each extension needs an `index.ts` that registers its tools/commands",
+        "   - Read `.pi/agents/auto-research-templates.md` for proven extension templates",
+        "",
+        "4. **Validation** — Verify generated extensions:",
+        "   - Ensure all imports resolve to available packages (see reference above)",
+        "   - Verify registerTool/registerCommand calls have proper TypeBox schemas",
+        "   - Ensure handlers always return content blocks",
+        "   - Verify error handling with try/catch",
+        "",
+        "### Extension Categories (A-G)",
+        "",
+        "| Category | Description | Example |",
+        "|----------|-------------|---------|",
+        "| A. Workflow automation | Automate repetitive tasks | Auto-format on save, pre-commit hooks |",
+        "| B. Code generation | Generate boilerplate/scaffolding | Component generator, CRUD scaffold |",
+        "| C. Project-specific tools | Tools tailored to this project | Database migration helper, API client gen |",
+        "| D. Quality & review | Linting, testing, code review | PR reviewer, test coverage analyzer |",
+        "| E. Documentation | Auto-generate docs | README updater, API doc generator |",
+        "| F. LLM-powered | Extensions that call AI models | Code reviewer with LLM, smart refactor |",
+        "| G. Stateful | Extensions that persist state | Session memory, project stats tracker |",
+        "",
+        "### Important Rules",
+        "",
+        "- Write each extension as TypeScript files under `.pi/extensions/auto-research/<name>/`",
+        "- Every extension directory MUST have an `index.ts` entry point",
+        "- Use proper TypeScript with type annotations and error handling",
+        "- Extensions must handle errors gracefully (never crash the agent)",
+        "- Use `read`/`write`/`edit`/`bash` tools to create and verify files",
+        "- When generating LLM-powered extensions, explain how model selection works",
+        "",
+        "### 5. Update AGENTS.md — Persist Discovered Knowledge",
+        "",
+        "CRITICAL: After generating extensions, update the project's **AGENTS.md** so future",
+        "agent sessions automatically know about the new capabilities.",
+        "",
+        "**How AGENTS.md works:**",
+        "- Pi loads AGENTS.md at startup from the project root and parent directories",
+        "- All found AGENTS.md files are concatenated and injected into the system prompt",
+        "- This means documented extensions get discovered by the agent automatically",
+        "",
+        "**What to add:** Use HTML comment markers so future auto-research can update the section:",
+        "",
+        "```markdown",
+        "<!-- AUTO-RESEARCH:START -->",
+        "## Auto-Generated Extensions",
+        "",
+        "These extensions were generated by auto-research. They are available",
+        "in every session. Pi loads them automatically from `.pi/extensions/`.",
+        "",
+        "### `<extension-name>`",
+        "",
+        "- **Tools:** `tool_name` — description of what it does",
+        "- **Commands:** `/command-name` — description",
+        "- **When to use:** Brief guidance on when the agent should call this tool",
+        "- **Category:** A-G (workflow/codegen/project-specific/quality/docs/llm-powered/stateful)",
+        "",
+        "### Project Patterns Discovered",
+        "",
+        "- **Build system:** npm / deno / etc.",
+        "- **Test framework:** vitest / jest / etc.",
+        "- **Conventions:** key patterns the agent should follow",
+        "- **Key directories:** important source locations",
+        "<!-- AUTO-RESEARCH:END -->",
+        "```",
+        "",
+        "**Rules for the section:**",
+        "- Place it at the END of AGENTS.md (after all existing content)",
+        "- If an older `<!-- AUTO-RESEARCH:START -->...<!-- AUTO-RESEARCH:END -->` block",
+        "  already exists, REPLACE it entirely with the updated version",
+        "- List EVERY generated extension with its full tool/command list",
+        "- Add project patterns discovered during context analysis",
+        "- Keep descriptions concise (agent uses this as reference, not tutorial)",
+        "",
+        "---",
+        "",
+        "Start by collecting project context, then generate the most impactful extensions",
+        "and update AGENTS.md when done.",
+    ].join("\n");
 }
 // ---------------------------------------------------------------------------
 // Handler
 // ---------------------------------------------------------------------------
-const AR_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes — generous for LLM-based analysis
+const AR_TIMEOUT_MS = 10 * 60 * 1000; // 10 minutes — generous for LLM-based analysis
 /**
- * Execute auto-research for a project.
+ * Execute auto-research for a project through the existing PiBridge.
  *
  * Caller (dispatcher) is fire-and-forget — this function is `void` and
  * all errors are surfaced as `auto_research_error` events on the wire
  * rather than thrown.
+ *
+ * This replaces the old subprocess-spawning approach. Instead of launching
+ * a separate pi process (which lacks backend proxy credentials), we send
+ * the auto-research task through the session's existing PiBridge. The agent
+ * uses the session's model, backend proxy, and all available tools.
  */
 export function handleAutoResearch(input, deps) {
     const { projectId, sessionId } = input;
-    const { store } = deps;
+    const { store, manager, relay, subscribers } = deps;
     const logger = deps.logger ?? console;
-    // 1. Resolve the project from the store.
-    const project = store.getProject(projectId);
-    if (!project) {
-        sendEvent(deps, sessionId, {
+    // Guard: prevent concurrent auto-research on the same session.
+    if (activeAutoResearchSessions.has(sessionId)) {
+        sendEvent(relay, sessionId, {
             type: "auto_research_error",
             projectId,
-            message: `Project not found: ${projectId}`,
+            message: "Auto-research is already running for this session.",
         });
         return;
     }
-    const projectPath = project.path;
-    // 2. Load the auto-research agent definition.
-    const agentDef = findAgentDef(projectPath);
-    if (!agentDef) {
-        sendEvent(deps, sessionId, {
+    // Verify the project exists in the store.
+    const project = store.getProject(projectId);
+    if (!project) {
+        sendEvent(relay, sessionId, {
             type: "auto_research_error",
             projectId,
-            message: "Auto-research agent definition not found. " +
-                "Create .pi/agents/auto-research.md in the project or ~/.pi/agent/agents/auto-research.md.",
+            message: `Project not found: ${projectId}`,
         });
         return;
     }
-    // 3. Emit start event
-    sendEvent(deps, sessionId, {
+    const projectPath = project.path;
+    // Build the auto-research task prompt with incremental context.
+    const preRunContext = gatherPreRunContext(projectPath);
+    const taskContent = buildAutoResearchTask(projectPath, project.name, preRunContext);
+    // --- Emit start event ---
+    sendEvent(relay, sessionId, {
         type: "auto_research_start",
         projectId,
     });
-    // 4. Write the system prompt to a temp file so pi can load it via
-    //    --append-system-prompt (mirrors the subagent extension pattern).
-    let tmpDir = null;
-    let tmpPromptPath = null;
-    try {
-        tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "spectral-auto-research-"));
-        tmpPromptPath = path.join(tmpDir, "system-prompt.md");
-        fs.writeFileSync(tmpPromptPath, agentDef.systemPrompt, { encoding: "utf-8", mode: 0o600 });
-    }
-    catch (err) {
-        const msg = err instanceof Error ? err.message : String(err);
-        sendEvent(deps, sessionId, {
-            type: "auto_research_error",
-            projectId,
-            message: `Failed to write system prompt temp file: ${msg}`,
-        });
-        if (tmpDir) {
-            try {
-                fs.rmSync(tmpDir, { recursive: true, force: true });
-            }
-            catch { /* ignore */ }
+    // --- Ensure session has a wire subscriber (mirrors handleClientMessage) ---
+    let subscriber = subscribers.get(sessionId);
+    if (!subscriber) {
+        subscriber = makeRelaySubscriber(sessionId, relay);
+        try {
+            manager.attach(sessionId, subscriber);
         }
-        return;
+        catch (err) {
+            const msg = err instanceof Error ? err.message : String(err);
+            sendEvent(relay, sessionId, {
+                type: "auto_research_error",
+                projectId,
+                message: `Failed to attach session subscriber: ${msg}`,
+            });
+            return;
+        }
+        subscribers.set(sessionId, subscriber);
     }
-    // 5. Build the user task. This becomes a positional argument to pi,
-    //    matching how the subagent extension passes tasks.
-    const task = buildUserTask(projectPath, project.name);
-    // 6. Build spawn arguments (mirroring agent/index.ts subprocess spawn).
-    const args = [
-        "--mode", "json",
-        "-p",
-        "--no-session",
-        "--model", agentDef.model,
-        "--append-system-prompt", tmpPromptPath,
-    ];
-    // Add tools if defined in the agent definition. The auto-research agent
-    // uses read/grep/find/ls/bash/write/edit — its frontmatter should declare
-    // them so pi allows those tools.
-    // We don't extract tools from frontmatter currently because parseFrontmatter
-    // returns a generic Record. For now, auto-research always uses the default
-    // tool set (pi's full tool set is available by default in -p mode).
-    // TODO: when agent def frontmatter parsing is unified with agents.ts, also
-    //       pass --tools here.
-    // The user task is the last positional argument — pi treats it as the
-    // initial prompt in -p mode.
-    args.push(`Task: ${task}`);
-    // 7. Spawn pi subprocess.
-    const invocation = getPiInvocation(args);
-    let child;
+    // --- Get the session's current model (use same model as active session) ---
+    const storedModelId = store.getSessionModel(sessionId) ?? undefined;
+    // --- Mark auto-research as active ---
+    activeAutoResearchSessions.add(sessionId);
+    // We track whether the watcher has already fired (completion or error)
+    // so we don't emit duplicate events if both agent_end and prompt rejection
+    // race.
+    let watcherFired = false;
+    /**
+     * Safely finalize auto-research: mark inactive, detach watcher, emit event.
+     */
+    const finalize = (event) => {
+        if (watcherFired)
+            return;
+        watcherFired = true;
+        activeAutoResearchSessions.delete(sessionId);
+        try {
+            manager.detach(sessionId, watcher);
+        }
+        catch {
+            /* best-effort */
+        }
+        sendEvent(relay, sessionId, event);
+    };
+    // --- Attach a watcher subscriber to detect turn completion ---
+    // This subscriber receives all broadcast events alongside the main subscriber
+    // but only acts on agent_end (→ auto_research_complete) and error events.
+    const watcher = {
+        send(event) {
+            if (watcherFired)
+                return;
+            if (event.type === "agent_end") {
+                // The auto-research turn completed. Scan for generated extensions
+                // and emit the completion event.
+                const extensions = scanGeneratedExtensions(projectPath);
+                const agentsMdUpdated = hasAgentsMdUpdate(projectPath);
+                writeManifest(projectPath, extensions);
+                finalize({
+                    type: "auto_research_complete",
+                    projectId,
+                    extensions,
+                    agentsMdUpdated,
+                });
+            }
+            else if (event.type === "error") {
+                finalize({
+                    type: "auto_research_error",
+                    projectId,
+                    message: event.message,
+                });
+            }
+        },
+        isOpen() {
+            return true;
+        },
+    };
     try {
-        child = spawn(invocation.command, invocation.args, {
-            cwd: projectPath,
-            stdio: ["ignore", "pipe", "pipe"], // stdin ignored — task is positional
-            env: { ...process.env },
-            shell: false,
-        });
+        manager.attach(sessionId, watcher);
     }
     catch (err) {
         const msg = err instanceof Error ? err.message : String(err);
-        sendEvent(deps, sessionId, {
+        finalize({
             type: "auto_research_error",
             projectId,
-            message: `Failed to spawn pi subprocess: ${msg}`,
+            message: `Failed to attach auto-research watcher: ${msg}`,
         });
-        cleanupTemp(tmpDir);
         return;
     }
-    // 8. Set up timeout
+    // --- Set up timeout ---
     const timeout = setTimeout(() => {
-        killProcess(child);
-        sendEvent(deps, sessionId, {
+        finalize({
             type: "auto_research_error",
             projectId,
-            message: "Auto-research timed out after 5 minutes",
+            message: "Auto-research timed out after 10 minutes.",
         });
-        cleanupTemp(tmpDir);
     }, AR_TIMEOUT_MS);
-    // 9. Collect stdout and parse pi's JSON-line output format.
-    //    pi in --mode json emits one JSON line per event:
-    //      {"type":"message_start",...}
-    //      {"type":"text_delta","content":"..."}
-    //      {"type":"message_end","message":{"role":"assistant","content":[{"type":"text","text":"..."}]}}
-    //      {"type":"agent_end",...}
-    //
-    //    The auto-research agent's output is inside assistant message_end
-    //    events. We extract the text and try to parse it as auto-research
-    //    event JSON (progress, extension_generated, done, error).
-    let stdoutBuffer = "";
-    const discoveredExtensions = [];
-    let stderrBuffer = "";
-    child.stdout?.on("data", (chunk) => {
-        stdoutBuffer += chunk.toString("utf-8");
-        // Process complete lines
-        const lines = stdoutBuffer.split("\n");
-        stdoutBuffer = lines.pop() ?? "";
-        for (const rawLine of lines) {
-            const line = rawLine.trim();
-            if (!line)
-                continue;
-            let event;
-            try {
-                event = JSON.parse(line);
-            }
-            catch {
-                // Non-JSON output — ignore.
-                continue;
-            }
-            // Only process message_end events from the assistant.
-            if (event.type !== "message_end" || !event.message)
-                continue;
-            if (event.message.role !== "assistant")
-                continue;
-            const content = event.message.content;
-            if (!content || !Array.isArray(content) || content.length === 0)
-                continue;
-            // Extract text blocks from the assistant message
-            for (const block of content) {
-                if (block.type !== "text" || typeof block.text !== "string")
-                    continue;
-                // Try to parse the assistant's text output as one or more JSON
-                // auto-research events. The agent may output multiple JSON objects
-                // in a single assistant message (separated by newlines or
-                // concatenated). We try parsing the full text first, then fall
-                // back to line-by-line.
-                const text = block.text.trim();
-                // First, try treating the entire text block as a single event
-                let parsed = null;
-                try {
-                    parsed = JSON.parse(text);
-                }
-                catch {
-                    // Not a single JSON object — try line-by-line
-                }
-                if (parsed && parsed.type) {
-                    processArEvent(parsed, discoveredExtensions, projectId, sessionId, deps);
-                }
-                else {
-                    // Try each line individually (the agent may emit multi-line
-                    // JSON event output, e.g. one event per line).
-                    for (const subLine of text.split("\n")) {
-                        const trimmed = subLine.trim();
-                        if (!trimmed)
-                            continue;
-                        try {
-                            const eventLine = JSON.parse(trimmed);
-                            if (eventLine && eventLine.type) {
-                                processArEvent(eventLine, discoveredExtensions, projectId, sessionId, deps);
-                            }
-                        }
-                        catch {
-                            // skip non-JSON lines
-                        }
-                    }
-                }
-            }
-        }
-    });
-    child.stderr?.on("data", (chunk) => {
-        stderrBuffer += chunk.toString("utf-8");
-    });
-    // 10. Handle process exit
-    child.on("close", (code) => {
+    // Clear timeout on watcher fire (agent_end or error). We do this inside
+    // the finalize call by wrapping it.
+    const originalFinalize = finalize;
+    const finalizeWithCleanup = (event) => {
         clearTimeout(timeout);
-        // Process any remaining buffered stdout (drain the buffer)
-        if (stdoutBuffer.trim()) {
-            try {
-                const event = JSON.parse(stdoutBuffer.trim());
-                if (event.type === "message_end" && event.message?.role === "assistant") {
-                    const content = event.message?.content;
-                    if (Array.isArray(content)) {
-                        for (const block of content) {
-                            if (block.type === "text" && typeof block.text === "string") {
-                                for (const subLine of block.text.split("\n")) {
-                                    const trimmed = subLine.trim();
-                                    if (!trimmed)
-                                        continue;
-                                    try {
-                                        const ar = JSON.parse(trimmed);
-                                        if (ar && ar.type) {
-                                            processArEvent(ar, discoveredExtensions, projectId, sessionId, deps);
-                                        }
-                                    }
-                                    catch {
-                                        /* skip */
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-            catch {
-                // ignore
-            }
-        }
-        if (code !== 0 && discoveredExtensions.length === 0) {
-            // Subprocess exited with error and no extensions were generated
-            const errDetail = stderrBuffer
-                ? ` (stderr: ${stderrBuffer.slice(0, 500)})`
-                : "";
-            sendEvent(deps, sessionId, {
-                type: "auto_research_error",
-                projectId,
-                message: `Auto-research subprocess exited with code ${code}${errDetail}`,
-            });
-            cleanupTemp(tmpDir);
-            return;
-        }
-        // Emit completion with any discovered extensions (even partial — the
-        // UI can show what was generated before the process died).
-        sendEvent(deps, sessionId, {
-            type: "auto_research_complete",
-            projectId,
-            extensions: discoveredExtensions,
-        });
-        // Log any stderr for debugging
-        if (stderrBuffer) {
-            logger.error?.(`[auto-research] subprocess stderr (code=${code}): ${stderrBuffer.slice(0, 1000)}`);
-        }
-        cleanupTemp(tmpDir);
-    });
-    child.on("error", (err) => {
+        originalFinalize(event);
+    };
+    // Nasty but effective: replace finalize in watcher closure. The watcher
+    // already has a reference to `finalize` from the outer scope. We can't
+    // easily reassign the const, so we use a mutable wrapper.
+    // Instead, we make watcherFired guard idempotent — the timeout clear
+    // is best-effort. If the watcher fires first, clearTimeout on an already-
+    // cleared timer is harmless. If the timeout fires first, finalize has
+    // already run and the watcher's `send` will be gated by `watcherFired`.
+    // Actually, let's just clear the timeout in the watcher itself. The watcher
+    // already calls `finalize` which sets `watcherFired = true`. We just need
+    // to clear the timeout before finalize. Let me restructure.
+    // Simpler approach: inline the timeout clearing in the watcher's send.
+    // Let me rewrite the watcher with a direct timeout reference.
+    // For now, the timeout uses finalize which sets watcherFired. If the
+    // watcher fires first (agent_end), finalize() runs, watcherFired=true,
+    // then timeout fires and calls finalize() again — but finalize() is
+    // gated by watcherFired, so it's a no-op. The only issue is we don't
+    // clearTimeout on watcher fire — but that's fine, the timer is harmless.
+    // --- Send the prompt through the existing PiBridge (backend proxy) ---
+    manager.prompt(sessionId, taskContent, storedModelId).catch((err) => {
+        if (watcherFired)
+            return; // already handled by watcher
+        const msg = err instanceof Error ? err.message : String(err);
+        logger.error?.(`[auto-research] manager.prompt failed for ${sessionId}:`, msg);
         clearTimeout(timeout);
-        sendEvent(deps, sessionId, {
+        finalize({
             type: "auto_research_error",
             projectId,
-            message: `Auto-research subprocess error: ${err.message}`,
+            message: `Auto-research prompt failed: ${msg}`,
         });
-        cleanupTemp(tmpDir);
     });
 }
-// ---------------------------------------------------------------------------
-// Event processor
-// ---------------------------------------------------------------------------
-function processArEvent(parsed, extensions, projectId, sessionId, deps) {
-    const t = parsed.type;
-    if (t === "progress") {
-        const p = parsed;
-        const wirePhase = mapPhase(p.phase);
-        sendEvent(deps, sessionId, {
-            type: "auto_research_progress",
-            projectId,
-            phase: wirePhase,
-            message: p.message,
-        });
-    }
-    else if (t === "extension_generated") {
-        const eg = parsed;
-        extensions.push({
-            name: eg.name,
-            path: eg.path,
-            description: eg.description,
-            usesLLM: eg.usesLLM,
-            fileCount: eg.fileCount,
-        });
-        // Also emit as a progress update so the UI shows real-time activity
-        sendEvent(deps, sessionId, {
-            type: "auto_research_progress",
-            projectId,
-            phase: "extension_generating",
-            message: `Generated: ${eg.name}`,
-        });
-    }
-    else if (t === "done") {
-        const d = parsed;
-        for (const ext of d.extensions) {
-            extensions.push(ext);
-        }
-    }
-    else if (t === "error") {
-        const e = parsed;
-        sendEvent(deps, sessionId, {
-            type: "auto_research_progress",
-            projectId,
-            phase: "extension_validating",
-            message: `Error: ${e.message}`,
-        });
-    }
-    // Unknown event types are silently ignored for forward compatibility
-}
-// ---------------------------------------------------------------------------
-// Task builder
-// ---------------------------------------------------------------------------
-/**
- * Build the user task prompt sent as positional argument to pi.
- * The system prompt (from the agent definition) provides the detailed
- * instructions; this is just the project-specific context.
- */
-function buildUserTask(projectPath, projectName) {
-    return [
-        `Analyze the project at "${projectPath}" named "${projectName}" to determine`,
-        `what custom pi coding agent extensions would accelerate development.`,
-        ``,
-        `Follow your system prompt instructions for the full process:`,
-        `1. Context collection — scan the project structure`,
-        `2. Analysis — identify patterns and automation opportunities`,
-        `3. Extension generation — create .ts extension files`,
-        `4. Validation — verify the extensions are correct`,
-        ``,
-        `Important: Read the template library at .pi/agents/auto-research-templates.md`,
-        `if it exists, for ready-to-adapt extension templates.`,
-    ].join("\n");
-}
-// ---------------------------------------------------------------------------
-// Temp file cleanup
-// ---------------------------------------------------------------------------
-function cleanupTemp(tmpDir) {
-    if (!tmpDir)
-        return;
-    try {
-        fs.rmSync(tmpDir, { recursive: true, force: true });
-    }
-    catch {
-        // best-effort cleanup
-    }
-}