npm - @davidorex/pi-behavior-monitors - Versions diffs - 0.12.0 → 0.14.1 - Mend

@davidorex/pi-behavior-monitors 0.12.0 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/agents/commit-hygiene-classifier.agent.yaml +11 -0
package/agents/fragility-classifier.agent.yaml +11 -0
package/agents/hedge-classifier.agent.yaml +11 -0
package/agents/unauthorized-action-classifier.agent.yaml +11 -0
package/agents/work-quality-classifier.agent.yaml +11 -0
package/dist/index.d.ts +28 -6
package/dist/index.d.ts.map +1 -1
package/dist/index.js +343 -132
package/dist/index.js.map +1 -1
package/examples/commit-hygiene/classify.md +4 -3
package/examples/commit-hygiene.monitor.json +1 -3
package/examples/fragility/classify.md +4 -6
package/examples/fragility.monitor.json +1 -3
package/examples/hedge/classify.md +17 -8
package/examples/hedge.monitor.json +2 -4
package/examples/unauthorized-action/classify.md +4 -3
package/examples/unauthorized-action.monitor.json +1 -3
package/examples/work-quality/classify.md +4 -5
package/examples/work-quality.monitor.json +1 -3
package/package.json +4 -2
package/schemas/monitor.schema.json +3 -11
package/schemas/verdict.schema.json +14 -0
package/skills/pi-behavior-monitors/SKILL.md +5 -4
package/skills/pi-behavior-monitors/references/bundled-resources.md +10 -1

package/dist/index.js CHANGED Viewed

@@ -13,6 +13,9 @@ import * as os from "node:os";
 import * as path from "node:path";
 import { fileURLToPath } from "node:url";
 import { readBlock } from "@davidorex/pi-project/block-api";
+import { validateFromFile } from "@davidorex/pi-project/schema-validator";
+import { createAgentLoader } from "@davidorex/pi-workflows/agent-spec";
+import { compileAgentSpec } from "@davidorex/pi-workflows/step-shared";
 import { complete, StringEnum } from "@mariozechner/pi-ai";
 import { getAgentDir } from "@mariozechner/pi-coding-agent";
 import { Box, Text } from "@mariozechner/pi-tui";
@@ -20,6 +23,18 @@ import { Type } from "@sinclair/typebox";
 import nunjucks from "nunjucks";
 const EXTENSION_DIR = path.dirname(fileURLToPath(import.meta.url));
 const EXAMPLES_DIR = path.join(EXTENSION_DIR, "..", "examples");
+const AGENTS_DIR = path.join(EXTENSION_DIR, "..", "agents");
+/** Tool definition for forcing structured verdict output from the classify LLM call. */
+const VERDICT_TOOL = {
+    name: "classify_verdict",
+    description: "Output the monitor classification verdict",
+    parameters: Type.Object({
+        verdict: Type.String({ description: "Classification result: CLEAN, FLAG, or NEW" }),
+        description: Type.Optional(Type.String({ description: "One-sentence explanation (required for FLAG/NEW)" })),
+        newPattern: Type.Optional(Type.String({ description: "Pattern to learn (required for NEW)" })),
+        severity: Type.Optional(Type.String({ description: "Issue severity: info, warning, or critical" })),
+    }),
+};
 export const COLLECTOR_DESCRIPTORS = [
     { name: "user_text", description: "Most recent user message text" },
     { name: "assistant_text", description: "Most recent assistant message text" },
@@ -33,6 +48,11 @@ export const COLLECTOR_DESCRIPTORS = [
     { name: "project_vision", description: ".project/project.json vision, core_value, name" },
     { name: "project_conventions", description: ".project/conformance-reference.json principle names" },
     { name: "git_status", description: "Output of git status --porcelain", limits: "5s timeout" },
+    {
+        name: "conversation_history",
+        description: "Prior turn summaries (user request + actions + assistant response)",
+        limits: "1-3 turns adaptive, 2000 char max",
+    },
 ];
 export const WHEN_CONDITIONS = [
     { name: "always", description: "Fire every time the event occurs", parameterized: false },
@@ -58,7 +78,7 @@ export const WHEN_CONDITIONS = [
         parameterized: true,
     },
 ];
-export const VERDICT_TYPES = ["clean", "flag", "new"];
+export const VERDICT_TYPES = ["clean", "flag", "new", "error"];
 export const SCOPE_TARGETS = ["main", "subagent", "all", "workflow"];
 export const VALID_EVENTS = new Set(["message_end", "turn_end", "agent_end", "command", "tool_call"]);
 function isValidEvent(event) {
@@ -67,7 +87,7 @@ function isValidEvent(event) {
 // =============================================================================
 // Discovery
 // =============================================================================
-function discoverMonitors() {
+export function discoverMonitors() {
     const dirs = [];
     // project-local
     let cwd = process.cwd();
@@ -77,6 +97,9 @@ function discoverMonitors() {
             dirs.push(candidate);
             break;
         }
+        // Stop at project root (.git boundary) — don't traverse into user home config
+        if (isDir(path.join(cwd, ".git")))
+            break;
         const parent = path.dirname(cwd);
         if (parent === cwd)
             break;
@@ -138,8 +161,8 @@ function parseMonitorJson(filePath, dir) {
         return null;
     }
     const classify = spec.classify;
-    if (!classify?.prompt && !classify?.promptTemplate) {
-        console.error(`[${name}] Missing classify.prompt or classify.promptTemplate`);
+    if (!classify?.agent || typeof classify.agent !== "string") {
+        console.error(`[${name}] Missing classify.agent — all monitors require an agent spec`);
         return null;
     }
     const patternsSpec = spec.patterns;
@@ -157,11 +180,9 @@ function parseMonitorJson(filePath, dir) {
         when: String(spec.when ?? "always"),
         scope: scope ?? { target: "main" },
         classify: {
-            model: classify.model ?? "claude-sonnet-4-20250514",
             context: Array.isArray(classify.context) ? classify.context : ["tool_results", "assistant_text"],
             excludes: Array.isArray(classify.excludes) ? classify.excludes : [],
-            prompt: classify.prompt ?? "",
-            promptTemplate: typeof classify.promptTemplate === "string" ? classify.promptTemplate : undefined,
+            agent: classify.agent,
         },
         patterns: {
             path: patternsSpec.path,
@@ -190,12 +211,15 @@ function parseMonitorJson(filePath, dir) {
 // =============================================================================
 // Example seeding
 // =============================================================================
-function resolveProjectMonitorsDir() {
+export function resolveProjectMonitorsDir() {
     let cwd = process.cwd();
     while (true) {
         const piDir = path.join(cwd, ".pi");
         if (isDir(piDir))
             return path.join(piDir, "monitors");
+        // Stop at project root (.git boundary) — don't traverse into user home config
+        if (isDir(path.join(cwd, ".git")))
+            break;
         const parent = path.dirname(cwd);
         if (parent === cwd)
             break;
@@ -351,6 +375,153 @@ function collectCustomMessages(branch) {
     }
     return msgs.join("\n");
 }
+// -- conversation_history collector ------------------------------------------
+const BACKREFERENCE_PATTERNS = [
+    /\bas\s+(i|we)\s+(said|mentioned|described|asked|requested|specified)/i,
+    /\b(earlier|previously|before|original|initial|first)\b/i,
+    /\bgo\s+back\s+to\b/i,
+    /\bsame\s+(thing|as|way)\b/i,
+    /\blike\s+(you|i)\s+(did|said|asked)\b/i,
+    /\b(continue|keep\s+going|proceed|carry\s+on)\b/i,
+    /\b(do|run|try)\s+(that|it|this)\s+(again|once\s+more)\b/i,
+    /\bre-?(output|generate|create|do|run|build|make)\b/i,
+];
+const AFFIRMATION_PATTERN = /^\s*(yes|yeah|yep|correct|exactly|right|ok|okay|sure|please|go|do it|proceed)\s*[.!]?\s*$/i;
+const ACTION_VERBS = /\b(create|write|build|implement|add|fix|update|delete|remove|refactor|test|deploy|install|configure|set up|generate)\b/i;
+/**
+ * Detect whether the current user message references prior conversation context
+ * via backreferences, affirmations, or short messages without action verbs.
+ * Exported for testing.
+ */
+export function isReferentialMessage(text) {
+    const hasBackref = BACKREFERENCE_PATTERNS.some((re) => re.test(text));
+    const isAffirmation = AFFIRMATION_PATTERN.test(text);
+    const isShortNoAction = text.length < 80 && !ACTION_VERBS.test(text);
+    return hasBackref || isAffirmation || isShortNoAction;
+}
+function summarizeTurnTools(turnEntries) {
+    const toolMap = new Map();
+    for (const entry of turnEntries) {
+        if (!isMessageEntry(entry))
+            continue;
+        const msg = entry.message;
+        if (msg.role === "assistant") {
+            for (const part of msg.content) {
+                if (part.type === "toolCall") {
+                    const existing = toolMap.get(part.name);
+                    if (existing) {
+                        existing.count++;
+                    }
+                    else {
+                        toolMap.set(part.name, { count: 1, errors: 0 });
+                    }
+                }
+            }
+        }
+        if (msg.role === "toolResult" && msg.isError) {
+            const existing = toolMap.get(msg.toolName);
+            if (existing) {
+                existing.errors++;
+            }
+        }
+    }
+    if (toolMap.size === 0)
+        return "[no tools]";
+    const parts = [];
+    for (const [name, stats] of toolMap) {
+        if (stats.errors > 0) {
+            parts.push(`${name}(${stats.count}, ${stats.errors} error${stats.errors > 1 ? "s" : ""})`);
+        }
+        else {
+            parts.push(`${name}(${stats.count})`);
+        }
+    }
+    return parts.join(", ");
+}
+function truncShort(text, max) {
+    return text.length <= max ? text : `${text.slice(0, max)}…`;
+}
+export function collectConversationHistory(branch) {
+    // Step A — Segment turns by finding user message indices
+    const userIndices = [];
+    for (let i = 0; i < branch.length; i++) {
+        const entry = branch[i];
+        if (isMessageEntry(entry) && entry.message.role === "user") {
+            userIndices.push(i);
+        }
+    }
+    // Need at least 2 user messages (current + 1 prior) for history
+    if (userIndices.length < 2)
+        return "";
+    // Step B — Determine window size from current user text
+    const currentUserText = collectUserText(branch);
+    const referential = isReferentialMessage(currentUserText);
+    const maxTurns = referential ? 3 : 1;
+    // Prior turns are all user-message-initiated segments except the last one
+    const priorTurnCount = userIndices.length - 1;
+    const turnsToInclude = Math.min(maxTurns, priorTurnCount);
+    // Take the last N prior turns (skip current turn which is the last userIndex)
+    const startTurnIdx = priorTurnCount - turnsToInclude;
+    // Step C — Summarize prior turns
+    const turnSummaries = [];
+    for (let t = startTurnIdx; t < priorTurnCount; t++) {
+        const turnStart = userIndices[t];
+        const turnEnd = userIndices[t + 1]; // next user message starts the next turn
+        const turnEntries = branch.slice(turnStart, turnEnd);
+        // User text from the first entry of the turn
+        const firstEntry = turnEntries[0];
+        const userText = isMessageEntry(firstEntry) && firstEntry.message.role === "user"
+            ? extractUserText(firstEntry.message.content)
+            : "";
+        // Actions
+        const actions = summarizeTurnTools(turnEntries);
+        // Assistant conclusion: last assistant message in turn with text content
+        let assistantConclusion = "[tool actions only]";
+        for (let i = turnEntries.length - 1; i >= 0; i--) {
+            const e = turnEntries[i];
+            if (isMessageEntry(e) && e.message.role === "assistant") {
+                const text = extractText(e.message.content);
+                if (text.trim()) {
+                    assistantConclusion = truncShort(text.trim(), 200);
+                    break;
+                }
+            }
+        }
+        turnSummaries.push(`--- Prior turn ---\nUser: "${truncShort(userText, 200)}"\nActions: ${actions}\nAssistant: "${assistantConclusion}"`);
+    }
+    if (turnSummaries.length === 0)
+        return "";
+    // Step D & E — Format and enforce budget
+    let result = turnSummaries.join("\n\n");
+    while (result.length > TRUNCATE && turnSummaries.length > 1) {
+        turnSummaries.shift(); // drop oldest
+        result = turnSummaries.join("\n\n");
+    }
+    // If single turn still exceeds budget, truncate user and assistant text
+    if (result.length > TRUNCATE && turnSummaries.length === 1) {
+        const firstEntry = branch[userIndices[startTurnIdx]];
+        const userText = isMessageEntry(firstEntry) && firstEntry.message.role === "user"
+            ? extractUserText(firstEntry.message.content)
+            : "";
+        const turnStart = userIndices[startTurnIdx];
+        const turnEnd = userIndices[startTurnIdx + 1];
+        const turnEntries = branch.slice(turnStart, turnEnd);
+        const actions = summarizeTurnTools(turnEntries);
+        let assistantConclusion = "[tool actions only]";
+        for (let i = turnEntries.length - 1; i >= 0; i--) {
+            const e = turnEntries[i];
+            if (isMessageEntry(e) && e.message.role === "assistant") {
+                const text = extractText(e.message.content);
+                if (text.trim()) {
+                    assistantConclusion = truncShort(text.trim(), 100);
+                    break;
+                }
+            }
+        }
+        result = `--- Prior turn ---\nUser: "${truncShort(userText, 100)}"\nActions: ${actions}\nAssistant: "${assistantConclusion}"`;
+    }
+    return result;
+}
 function collectProjectVision(_branch) {
     try {
         const raw = readBlock(process.cwd(), "project");
@@ -404,6 +575,7 @@ const collectors = {
     project_vision: collectProjectVision,
     project_conventions: collectProjectConventions,
     git_status: collectGitStatus,
+    conversation_history: collectConversationHistory,
 };
 /** Collector names derived from the runtime registry — used for consistency testing. */
 export const COLLECTOR_NAMES = Object.keys(collectors);
@@ -691,32 +863,120 @@ function formatInstructionsForPrompt(instructions) {
     const lines = instructions.map((i) => `- ${i.text}`).join("\n");
     return `\nOperating instructions from the user (follow these strictly):\n${lines}\n`;
 }
+// =============================================================================
+// Classification
+// =============================================================================
+export function parseVerdict(raw) {
+    const text = raw.trim();
+    if (text.startsWith("CLEAN"))
+        return { verdict: "clean" };
+    if (text.startsWith("NEW:")) {
+        const rest = text.slice(4);
+        const pipe = rest.indexOf("|");
+        if (pipe !== -1)
+            return { verdict: "new", newPattern: rest.slice(0, pipe).trim(), description: rest.slice(pipe + 1).trim() };
+        return { verdict: "new", newPattern: rest.trim(), description: rest.trim() };
+    }
+    if (text.startsWith("FLAG:"))
+        return { verdict: "flag", description: text.slice(5).trim() };
+    console.error(`[monitors] unrecognized verdict format: "${text.slice(0, 80)}"`);
+    return { verdict: "error", error: `Unrecognized verdict format: "${text.slice(0, 80)}"` };
+}
+export function parseModelSpec(spec) {
+    const slashIndex = spec.indexOf("/");
+    if (slashIndex !== -1) {
+        return { provider: spec.slice(0, slashIndex), modelId: spec.slice(slashIndex + 1) };
+    }
+    return { provider: "anthropic", modelId: spec };
+}
+/**
+ * Extract response text from LLM response parts, falling back to thinking
+ * block content when no text parts are present. Fixes issue-024 where
+ * models with thinking enabled place the entire verdict inside the thinking
+ * block, leaving text content empty.
+ */
+export function extractResponseText(parts) {
+    const text = parts
+        .filter((b) => b.type === "text")
+        .map((b) => b.text)
+        .join("");
+    if (text.trim())
+        return text;
+    for (const part of parts) {
+        if (part.type === "thinking" && "thinking" in part)
+            return part.thinking;
+    }
+    return "";
+}
+/**
+ * Map a parsed JSON verdict object to a ClassifyResult.
+ * Handles case-insensitive verdict strings and optional fields.
+ */
+export function mapVerdictToClassifyResult(parsed) {
+    const verdict = String(parsed.verdict).toUpperCase();
+    if (verdict === "CLEAN")
+        return { verdict: "clean" };
+    if (verdict === "FLAG")
+        return {
+            verdict: "flag",
+            description: String(parsed.description ?? ""),
+            severity: parsed.severity,
+        };
+    if (verdict === "NEW")
+        return {
+            verdict: "new",
+            description: String(parsed.description ?? ""),
+            newPattern: String(parsed.newPattern ?? parsed.description ?? ""),
+            severity: parsed.severity,
+        };
+    return { verdict: "error", error: `Unknown verdict: ${verdict}` };
+}
 /**
- * Create a Nunjucks environment for monitor prompt templates.
- * Three-tier search: project monitors dir > user monitors dir > package examples.
+ * Create a merged Nunjucks template environment combining monitor search paths
+ * (for classify templates) with agent template search paths (for shared macros).
+ * Monitor paths take precedence.
  */
-function createMonitorTemplateEnv() {
-    const projectDir = resolveProjectMonitorsDir();
-    const userDir = path.join(os.homedir(), ".pi", "agent", "monitors");
+function createMonitorAgentTemplateEnv(cwd) {
+    const projectMonitorsDir = resolveProjectMonitorsDir();
+    const userMonitorsDir = path.join(os.homedir(), ".pi", "agent", "monitors");
+    const projectTemplatesDir = path.join(cwd, ".pi", "templates");
+    const userTemplatesDir = path.join(os.homedir(), ".pi", "agent", "templates");
     const searchPaths = [];
-    if (isDir(projectDir))
-        searchPaths.push(projectDir);
-    if (isDir(userDir))
-        searchPaths.push(userDir);
+    // Monitor paths first — monitor templates take precedence
+    if (isDir(projectMonitorsDir))
+        searchPaths.push(projectMonitorsDir);
+    if (isDir(userMonitorsDir))
+        searchPaths.push(userMonitorsDir);
     if (isDir(EXAMPLES_DIR))
         searchPaths.push(EXAMPLES_DIR);
+    // Agent template paths — for shared macros and fallback
+    if (isDir(projectTemplatesDir))
+        searchPaths.push(projectTemplatesDir);
+    if (isDir(userTemplatesDir))
+        searchPaths.push(userTemplatesDir);
     const loader = searchPaths.length > 0 ? new nunjucks.FileSystemLoader(searchPaths) : undefined;
     return new nunjucks.Environment(loader, {
         autoescape: false,
         throwOnUndefined: false,
     });
 }
-/** Module-level template environment, initialized in extension entry point. */
-let monitorTemplateEnv;
-function renderClassifyPrompt(monitor, branch, extraContext) {
+/** Module-level cached agent loader, populated at session_start. */
+let cachedAgentLoader = null;
+/** Module-level cached template environment for classify agent specs, populated at session_start. */
+let cachedMonitorAgentEnv = null;
+/**
+ * Classify via agent spec — the sole classify path.
+ * Loads the agent YAML, builds context from collectors, compiles via
+ * compileAgentSpec, calls complete() in-process, validates JSON verdict
+ * against outputSchema, falls back to parseVerdict() for robustness.
+ */
+async function classifyViaAgent(ctx, monitor, branch, extraContext, signal) {
+    const agentName = monitor.classify.agent;
+    // Load agent spec (use session cache if available)
+    const loadAgent = cachedAgentLoader ?? createAgentLoader(process.cwd(), AGENTS_DIR);
+    const agentSpec = loadAgent(agentName);
+    // Build context: collectors + patterns + instructions + json_output
     const patterns = loadPatterns(monitor);
-    if (patterns.length === 0)
-        return null;
     const instructions = loadInstructions(monitor);
     const collected = {};
     for (const key of monitor.classify.context) {
@@ -724,71 +984,61 @@ function renderClassifyPrompt(monitor, branch, extraContext) {
         if (fn)
             collected[key] = fn(branch);
         else
-            collected[key] = ""; // unknown collectors produce empty string (graceful degradation)
+            collected[key] = "";
     }
-    const context = {
+    const templateContext = {
         patterns: formatPatternsForPrompt(patterns),
         instructions: formatInstructionsForPrompt(instructions),
         iteration: monitor.whileCount,
+        json_output: true,
         ...collected,
         ...(extraContext ?? {}),
     };
-    if (monitor.classify.promptTemplate && monitorTemplateEnv) {
-        // Nunjucks template file
-        try {
-            return monitorTemplateEnv.render(monitor.classify.promptTemplate, context);
-        }
-        catch (err) {
-            const msg = err instanceof Error ? err.message : String(err);
-            console.error(`[${monitor.name}] Template render failed (${monitor.classify.promptTemplate}): ${msg}`);
-            // Fall through to inline prompt if available
-            if (!monitor.classify.prompt)
-                return null;
-        }
-    }
-    // Fallback: inline string with {placeholder} replacement
-    if (!monitor.classify.prompt)
-        return null;
-    return monitor.classify.prompt.replace(/\{(\w+)\}/g, (match, key) => {
-        return String(context[key] ?? match);
-    });
-}
-// =============================================================================
-// Classification
-// =============================================================================
-export function parseVerdict(raw) {
-    const text = raw.trim();
-    if (text.startsWith("CLEAN"))
-        return { verdict: "clean" };
-    if (text.startsWith("NEW:")) {
-        const rest = text.slice(4);
-        const pipe = rest.indexOf("|");
-        if (pipe !== -1)
-            return { verdict: "new", newPattern: rest.slice(0, pipe).trim(), description: rest.slice(pipe + 1).trim() };
-        return { verdict: "new", newPattern: rest.trim(), description: rest.trim() };
-    }
-    if (text.startsWith("FLAG:"))
-        return { verdict: "flag", description: text.slice(5).trim() };
-    console.error(`[monitors] unrecognized verdict format, defaulting to CLEAN: "${text.slice(0, 80)}"`);
-    return { verdict: "clean" };
-}
-export function parseModelSpec(spec) {
-    const slashIndex = spec.indexOf("/");
-    if (slashIndex !== -1) {
-        return { provider: spec.slice(0, slashIndex), modelId: spec.slice(slashIndex + 1) };
-    }
-    return { provider: "anthropic", modelId: spec };
-}
-async function classifyPrompt(ctx, monitor, prompt, signal) {
-    const { provider, modelId } = parseModelSpec(monitor.classify.model);
+    // Use session-cached template environment or create one
+    const mergedEnv = cachedMonitorAgentEnv ?? createMonitorAgentTemplateEnv(process.cwd());
+    const compiled = compileAgentSpec(agentSpec, templateContext, mergedEnv, process.cwd());
+    // The task template is the compiled classify prompt
+    const prompt = compiled.taskTemplate;
+    if (!prompt)
+        throw new Error(`Agent ${agentName}: compiled task template is empty`);
+    // Resolve model from agent spec
+    const modelSpec = compiled.model;
+    if (!modelSpec)
+        throw new Error(`Agent ${agentName}: no model specified`);
+    const { provider, modelId } = parseModelSpec(modelSpec);
     const model = ctx.modelRegistry.find(provider, modelId);
     if (!model)
-        throw new Error(`Model ${monitor.classify.model} not found`);
+        throw new Error(`Model ${modelSpec} not found`);
     const auth = await ctx.modelRegistry.getApiKeyAndHeaders(model);
     if (!auth.ok)
         throw new Error(auth.error);
-    const response = await complete(model, { messages: [{ role: "user", content: [{ type: "text", text: prompt }], timestamp: Date.now() }] }, { apiKey: auth.apiKey, headers: auth.headers, maxTokens: 150, signal });
-    return parseVerdict(extractText(response.content));
+    // Determine thinking from agent spec
+    const thinkingEnabled = compiled.thinking === "on" || compiled.thinking === "true";
+    const response = await complete(model, {
+        messages: [{ role: "user", content: [{ type: "text", text: prompt }], timestamp: Date.now() }],
+        tools: [VERDICT_TOOL],
+    }, {
+        apiKey: auth.apiKey,
+        headers: auth.headers,
+        maxTokens: 300,
+        signal,
+        thinkingEnabled,
+        effort: "low",
+        toolChoice: { type: "tool", name: "classify_verdict" },
+    });
+    const toolCall = response.content.find((c) => c.type === "toolCall");
+    if (!toolCall) {
+        return { verdict: "error", error: "Model did not produce a tool call response" };
+    }
+    const parsed = toolCall.arguments;
+    // Validate against verdict schema if the agent spec declares one
+    if (compiled.outputSchema) {
+        const schemaPath = path.isAbsolute(compiled.outputSchema)
+            ? compiled.outputSchema
+            : path.resolve(AGENTS_DIR, compiled.outputSchema);
+        validateFromFile(schemaPath, parsed, `verdict for monitor '${monitor.name}'`);
+    }
+    return mapVerdictToClassifyResult(parsed);
 }
 // =============================================================================
 // Pattern learning (JSON)
@@ -921,49 +1171,8 @@ export async function invokeMonitor(name, context) {
     const patterns = loadPatterns(monitor);
     if (patterns.length === 0)
         return { verdict: "clean" };
-    const instructions = loadInstructions(monitor);
-    // Build context: collectors + caller-supplied overrides
-    const collected = {};
     const branch = invokeCtx.sessionManager.getBranch();
-    for (const key of monitor.classify.context) {
-        const fn = collectors[key];
-        if (fn)
-            collected[key] = fn(branch);
-        else
-            collected[key] = "";
-    }
-    if (context) {
-        for (const [key, value] of Object.entries(context)) {
-            collected[key] = value;
-        }
-    }
-    const templateContext = {
-        patterns: formatPatternsForPrompt(patterns),
-        instructions: formatInstructionsForPrompt(instructions),
-        iteration: 0,
-        ...collected,
-    };
-    // Render prompt (same logic as renderClassifyPrompt but with injected context)
-    let prompt = null;
-    if (monitor.classify.promptTemplate && monitorTemplateEnv) {
-        try {
-            prompt = monitorTemplateEnv.render(monitor.classify.promptTemplate, templateContext);
-        }
-        catch (err) {
-            const msg = err instanceof Error ? err.message : String(err);
-            console.error(`[${monitor.name}] Template render failed (${monitor.classify.promptTemplate}): ${msg}`);
-            if (!monitor.classify.prompt)
-                throw new Error(`Template render failed and no inline prompt fallback: ${msg}`);
-        }
-    }
-    if (!prompt && monitor.classify.prompt) {
-        prompt = monitor.classify.prompt.replace(/\{(\w+)\}/g, (match, key) => {
-            return String(templateContext[key] ?? match);
-        });
-    }
-    if (!prompt)
-        return { verdict: "clean" };
-    const result = await classifyPrompt(invokeCtx, monitor, prompt);
+    const result = await classifyViaAgent(invokeCtx, monitor, branch, context);
     // Execute write actions (findings files) based on verdict
     if (result.verdict === "clean") {
         const cleanAction = monitor.actions.on_clean;
@@ -1006,9 +1215,6 @@ async function activate(monitor, pi, ctx, branch, steeredThisTurn, updateStatus,
         updateStatus();
         return;
     }
-    const prompt = renderClassifyPrompt(monitor, branch);
-    if (!prompt)
-        return;
     // Backoff: skip classification if this monitor has failed repeatedly
     if (monitor.classifySkipRemaining > 0) {
         monitor.classifySkipRemaining--;
@@ -1016,7 +1222,7 @@ async function activate(monitor, pi, ctx, branch, steeredThisTurn, updateStatus,
     }
     let result;
     try {
-        result = await classifyPrompt(ctx, monitor, prompt);
+        result = await classifyViaAgent(ctx, monitor, branch, undefined, undefined);
     }
     catch (e) {
         const message = e instanceof Error ? e.message : String(e);
@@ -1053,6 +1259,16 @@ async function activate(monitor, pi, ctx, branch, steeredThisTurn, updateStatus,
         updateStatus();
         return;
     }
+    if (result.verdict === "error") {
+        if (ctx.hasUI) {
+            ctx.ui.notify(`[${monitor.name}] classify failed: ${result.error}`, "warning");
+        }
+        else {
+            console.error(`[${monitor.name}] classify failed: ${result.error}`);
+        }
+        updateStatus();
+        return;
+    }
     // Determine which action to execute
     const action = result.verdict === "new" ? monitor.actions.on_new : monitor.actions.on_flag;
     if (!action)
@@ -1075,7 +1291,7 @@ async function activate(monitor, pi, ctx, branch, steeredThisTurn, updateStatus,
             severity: result.severity ?? "warning",
             monitor_name: monitor.name,
         };
-        const renderedSteer = monitorTemplateEnv ? nunjucks.renderString(action.steer, steerContext) : action.steer;
+        const renderedSteer = nunjucks.renderString(action.steer, steerContext);
         const details = {
             monitorName: monitor.name,
             verdict: result.verdict,
@@ -1141,8 +1357,6 @@ export default function (pi) {
     loadedMonitors = monitors;
     if (monitors.length === 0)
         return;
-    // Initialize Nunjucks template environment for monitor prompt templates
-    monitorTemplateEnv = createMonitorTemplateEnv();
     let statusCtx;
     function updateStatus() {
         if (!statusCtx?.hasUI)
@@ -1193,6 +1407,9 @@ export default function (pi) {
             monitorsEnabled = true;
             pendingAgentEndSteers = [];
             projectDirMissingLogged = false;
+            // Cache agent loader and template environment for classify calls
+            cachedAgentLoader = createAgentLoader(process.cwd(), AGENTS_DIR);
+            cachedMonitorAgentEnv = createMonitorAgentTemplateEnv(process.cwd());
             updateStatus();
         }
         catch {
@@ -1250,7 +1467,7 @@ export default function (pi) {
                 when: monitor.when,
                 scope: monitor.scope,
                 classify: {
-                    model: monitor.classify.model,
+                    agent: monitor.classify.agent,
                     context: monitor.classify.context,
                     excludes: monitor.classify.excludes,
                 },
@@ -1567,15 +1784,9 @@ export default function (pi) {
                         continue;
                     }
                     // Build pending tool call context for template injection.
-                    // Branch-based collectors (user_text, tool_calls, etc.) are still
-                    // collected inside renderClassifyPrompt from the branch parameter.
                     const toolContext = `Pending tool call:\nTool: ${ev.toolName}\nArguments: ${JSON.stringify(ev.input, null, 2).slice(0, 2000)}`;
-                    // Render classify prompt with tool context injected as extra template variable
-                    const prompt = renderClassifyPrompt(m, branch, { tool_call_context: toolContext });
-                    if (!prompt)
-                        continue;
                     try {
-                        const result = await classifyPrompt(ctx, m, prompt);
+                        const result = await classifyViaAgent(ctx, m, branch, { tool_call_context: toolContext });
                         // Reset failure counter on success
                         m.classifyFailures = 0;
                         if (result.verdict === "flag" || result.verdict === "new") {