@davidorex/pi-behavior-monitors 0.12.0 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -13,6 +13,9 @@ import * as os from "node:os";
13
13
  import * as path from "node:path";
14
14
  import { fileURLToPath } from "node:url";
15
15
  import { readBlock } from "@davidorex/pi-project/block-api";
16
+ import { validateFromFile } from "@davidorex/pi-project/schema-validator";
17
+ import { createAgentLoader } from "@davidorex/pi-workflows/agent-spec";
18
+ import { compileAgentSpec } from "@davidorex/pi-workflows/step-shared";
16
19
  import { complete, StringEnum } from "@mariozechner/pi-ai";
17
20
  import { getAgentDir } from "@mariozechner/pi-coding-agent";
18
21
  import { Box, Text } from "@mariozechner/pi-tui";
@@ -20,6 +23,18 @@ import { Type } from "@sinclair/typebox";
20
23
  import nunjucks from "nunjucks";
21
24
  const EXTENSION_DIR = path.dirname(fileURLToPath(import.meta.url));
22
25
  const EXAMPLES_DIR = path.join(EXTENSION_DIR, "..", "examples");
26
+ const AGENTS_DIR = path.join(EXTENSION_DIR, "..", "agents");
27
+ /** Tool definition for forcing structured verdict output from the classify LLM call. */
28
+ const VERDICT_TOOL = {
29
+ name: "classify_verdict",
30
+ description: "Output the monitor classification verdict",
31
+ parameters: Type.Object({
32
+ verdict: Type.String({ description: "Classification result: CLEAN, FLAG, or NEW" }),
33
+ description: Type.Optional(Type.String({ description: "One-sentence explanation (required for FLAG/NEW)" })),
34
+ newPattern: Type.Optional(Type.String({ description: "Pattern to learn (required for NEW)" })),
35
+ severity: Type.Optional(Type.String({ description: "Issue severity: info, warning, or critical" })),
36
+ }),
37
+ };
23
38
  export const COLLECTOR_DESCRIPTORS = [
24
39
  { name: "user_text", description: "Most recent user message text" },
25
40
  { name: "assistant_text", description: "Most recent assistant message text" },
@@ -33,6 +48,11 @@ export const COLLECTOR_DESCRIPTORS = [
33
48
  { name: "project_vision", description: ".project/project.json vision, core_value, name" },
34
49
  { name: "project_conventions", description: ".project/conformance-reference.json principle names" },
35
50
  { name: "git_status", description: "Output of git status --porcelain", limits: "5s timeout" },
51
+ {
52
+ name: "conversation_history",
53
+ description: "Prior turn summaries (user request + actions + assistant response)",
54
+ limits: "1-3 turns adaptive, 2000 char max",
55
+ },
36
56
  ];
37
57
  export const WHEN_CONDITIONS = [
38
58
  { name: "always", description: "Fire every time the event occurs", parameterized: false },
@@ -58,7 +78,7 @@ export const WHEN_CONDITIONS = [
58
78
  parameterized: true,
59
79
  },
60
80
  ];
61
- export const VERDICT_TYPES = ["clean", "flag", "new"];
81
+ export const VERDICT_TYPES = ["clean", "flag", "new", "error"];
62
82
  export const SCOPE_TARGETS = ["main", "subagent", "all", "workflow"];
63
83
  export const VALID_EVENTS = new Set(["message_end", "turn_end", "agent_end", "command", "tool_call"]);
64
84
  function isValidEvent(event) {
@@ -67,7 +87,7 @@ function isValidEvent(event) {
67
87
  // =============================================================================
68
88
  // Discovery
69
89
  // =============================================================================
70
- function discoverMonitors() {
90
+ export function discoverMonitors() {
71
91
  const dirs = [];
72
92
  // project-local
73
93
  let cwd = process.cwd();
@@ -77,6 +97,9 @@ function discoverMonitors() {
77
97
  dirs.push(candidate);
78
98
  break;
79
99
  }
100
+ // Stop at project root (.git boundary) — don't traverse into user home config
101
+ if (isDir(path.join(cwd, ".git")))
102
+ break;
80
103
  const parent = path.dirname(cwd);
81
104
  if (parent === cwd)
82
105
  break;
@@ -138,8 +161,8 @@ function parseMonitorJson(filePath, dir) {
138
161
  return null;
139
162
  }
140
163
  const classify = spec.classify;
141
- if (!classify?.prompt && !classify?.promptTemplate) {
142
- console.error(`[${name}] Missing classify.prompt or classify.promptTemplate`);
164
+ if (!classify?.agent || typeof classify.agent !== "string") {
165
+ console.error(`[${name}] Missing classify.agent all monitors require an agent spec`);
143
166
  return null;
144
167
  }
145
168
  const patternsSpec = spec.patterns;
@@ -157,11 +180,9 @@ function parseMonitorJson(filePath, dir) {
157
180
  when: String(spec.when ?? "always"),
158
181
  scope: scope ?? { target: "main" },
159
182
  classify: {
160
- model: classify.model ?? "claude-sonnet-4-20250514",
161
183
  context: Array.isArray(classify.context) ? classify.context : ["tool_results", "assistant_text"],
162
184
  excludes: Array.isArray(classify.excludes) ? classify.excludes : [],
163
- prompt: classify.prompt ?? "",
164
- promptTemplate: typeof classify.promptTemplate === "string" ? classify.promptTemplate : undefined,
185
+ agent: classify.agent,
165
186
  },
166
187
  patterns: {
167
188
  path: patternsSpec.path,
@@ -190,12 +211,15 @@ function parseMonitorJson(filePath, dir) {
190
211
  // =============================================================================
191
212
  // Example seeding
192
213
  // =============================================================================
193
- function resolveProjectMonitorsDir() {
214
+ export function resolveProjectMonitorsDir() {
194
215
  let cwd = process.cwd();
195
216
  while (true) {
196
217
  const piDir = path.join(cwd, ".pi");
197
218
  if (isDir(piDir))
198
219
  return path.join(piDir, "monitors");
220
+ // Stop at project root (.git boundary) — don't traverse into user home config
221
+ if (isDir(path.join(cwd, ".git")))
222
+ break;
199
223
  const parent = path.dirname(cwd);
200
224
  if (parent === cwd)
201
225
  break;
@@ -351,6 +375,153 @@ function collectCustomMessages(branch) {
351
375
  }
352
376
  return msgs.join("\n");
353
377
  }
378
+ // -- conversation_history collector ------------------------------------------
379
+ const BACKREFERENCE_PATTERNS = [
380
+ /\bas\s+(i|we)\s+(said|mentioned|described|asked|requested|specified)/i,
381
+ /\b(earlier|previously|before|original|initial|first)\b/i,
382
+ /\bgo\s+back\s+to\b/i,
383
+ /\bsame\s+(thing|as|way)\b/i,
384
+ /\blike\s+(you|i)\s+(did|said|asked)\b/i,
385
+ /\b(continue|keep\s+going|proceed|carry\s+on)\b/i,
386
+ /\b(do|run|try)\s+(that|it|this)\s+(again|once\s+more)\b/i,
387
+ /\bre-?(output|generate|create|do|run|build|make)\b/i,
388
+ ];
389
+ const AFFIRMATION_PATTERN = /^\s*(yes|yeah|yep|correct|exactly|right|ok|okay|sure|please|go|do it|proceed)\s*[.!]?\s*$/i;
390
+ const ACTION_VERBS = /\b(create|write|build|implement|add|fix|update|delete|remove|refactor|test|deploy|install|configure|set up|generate)\b/i;
391
+ /**
392
+ * Detect whether the current user message references prior conversation context
393
+ * via backreferences, affirmations, or short messages without action verbs.
394
+ * Exported for testing.
395
+ */
396
+ export function isReferentialMessage(text) {
397
+ const hasBackref = BACKREFERENCE_PATTERNS.some((re) => re.test(text));
398
+ const isAffirmation = AFFIRMATION_PATTERN.test(text);
399
+ const isShortNoAction = text.length < 80 && !ACTION_VERBS.test(text);
400
+ return hasBackref || isAffirmation || isShortNoAction;
401
+ }
402
+ function summarizeTurnTools(turnEntries) {
403
+ const toolMap = new Map();
404
+ for (const entry of turnEntries) {
405
+ if (!isMessageEntry(entry))
406
+ continue;
407
+ const msg = entry.message;
408
+ if (msg.role === "assistant") {
409
+ for (const part of msg.content) {
410
+ if (part.type === "toolCall") {
411
+ const existing = toolMap.get(part.name);
412
+ if (existing) {
413
+ existing.count++;
414
+ }
415
+ else {
416
+ toolMap.set(part.name, { count: 1, errors: 0 });
417
+ }
418
+ }
419
+ }
420
+ }
421
+ if (msg.role === "toolResult" && msg.isError) {
422
+ const existing = toolMap.get(msg.toolName);
423
+ if (existing) {
424
+ existing.errors++;
425
+ }
426
+ }
427
+ }
428
+ if (toolMap.size === 0)
429
+ return "[no tools]";
430
+ const parts = [];
431
+ for (const [name, stats] of toolMap) {
432
+ if (stats.errors > 0) {
433
+ parts.push(`${name}(${stats.count}, ${stats.errors} error${stats.errors > 1 ? "s" : ""})`);
434
+ }
435
+ else {
436
+ parts.push(`${name}(${stats.count})`);
437
+ }
438
+ }
439
+ return parts.join(", ");
440
+ }
441
+ function truncShort(text, max) {
442
+ return text.length <= max ? text : `${text.slice(0, max)}…`;
443
+ }
444
+ export function collectConversationHistory(branch) {
445
+ // Step A — Segment turns by finding user message indices
446
+ const userIndices = [];
447
+ for (let i = 0; i < branch.length; i++) {
448
+ const entry = branch[i];
449
+ if (isMessageEntry(entry) && entry.message.role === "user") {
450
+ userIndices.push(i);
451
+ }
452
+ }
453
+ // Need at least 2 user messages (current + 1 prior) for history
454
+ if (userIndices.length < 2)
455
+ return "";
456
+ // Step B — Determine window size from current user text
457
+ const currentUserText = collectUserText(branch);
458
+ const referential = isReferentialMessage(currentUserText);
459
+ const maxTurns = referential ? 3 : 1;
460
+ // Prior turns are all user-message-initiated segments except the last one
461
+ const priorTurnCount = userIndices.length - 1;
462
+ const turnsToInclude = Math.min(maxTurns, priorTurnCount);
463
+ // Take the last N prior turns (skip current turn which is the last userIndex)
464
+ const startTurnIdx = priorTurnCount - turnsToInclude;
465
+ // Step C — Summarize prior turns
466
+ const turnSummaries = [];
467
+ for (let t = startTurnIdx; t < priorTurnCount; t++) {
468
+ const turnStart = userIndices[t];
469
+ const turnEnd = userIndices[t + 1]; // next user message starts the next turn
470
+ const turnEntries = branch.slice(turnStart, turnEnd);
471
+ // User text from the first entry of the turn
472
+ const firstEntry = turnEntries[0];
473
+ const userText = isMessageEntry(firstEntry) && firstEntry.message.role === "user"
474
+ ? extractUserText(firstEntry.message.content)
475
+ : "";
476
+ // Actions
477
+ const actions = summarizeTurnTools(turnEntries);
478
+ // Assistant conclusion: last assistant message in turn with text content
479
+ let assistantConclusion = "[tool actions only]";
480
+ for (let i = turnEntries.length - 1; i >= 0; i--) {
481
+ const e = turnEntries[i];
482
+ if (isMessageEntry(e) && e.message.role === "assistant") {
483
+ const text = extractText(e.message.content);
484
+ if (text.trim()) {
485
+ assistantConclusion = truncShort(text.trim(), 200);
486
+ break;
487
+ }
488
+ }
489
+ }
490
+ turnSummaries.push(`--- Prior turn ---\nUser: "${truncShort(userText, 200)}"\nActions: ${actions}\nAssistant: "${assistantConclusion}"`);
491
+ }
492
+ if (turnSummaries.length === 0)
493
+ return "";
494
+ // Step D & E — Format and enforce budget
495
+ let result = turnSummaries.join("\n\n");
496
+ while (result.length > TRUNCATE && turnSummaries.length > 1) {
497
+ turnSummaries.shift(); // drop oldest
498
+ result = turnSummaries.join("\n\n");
499
+ }
500
+ // If single turn still exceeds budget, truncate user and assistant text
501
+ if (result.length > TRUNCATE && turnSummaries.length === 1) {
502
+ const firstEntry = branch[userIndices[startTurnIdx]];
503
+ const userText = isMessageEntry(firstEntry) && firstEntry.message.role === "user"
504
+ ? extractUserText(firstEntry.message.content)
505
+ : "";
506
+ const turnStart = userIndices[startTurnIdx];
507
+ const turnEnd = userIndices[startTurnIdx + 1];
508
+ const turnEntries = branch.slice(turnStart, turnEnd);
509
+ const actions = summarizeTurnTools(turnEntries);
510
+ let assistantConclusion = "[tool actions only]";
511
+ for (let i = turnEntries.length - 1; i >= 0; i--) {
512
+ const e = turnEntries[i];
513
+ if (isMessageEntry(e) && e.message.role === "assistant") {
514
+ const text = extractText(e.message.content);
515
+ if (text.trim()) {
516
+ assistantConclusion = truncShort(text.trim(), 100);
517
+ break;
518
+ }
519
+ }
520
+ }
521
+ result = `--- Prior turn ---\nUser: "${truncShort(userText, 100)}"\nActions: ${actions}\nAssistant: "${assistantConclusion}"`;
522
+ }
523
+ return result;
524
+ }
354
525
  function collectProjectVision(_branch) {
355
526
  try {
356
527
  const raw = readBlock(process.cwd(), "project");
@@ -404,6 +575,7 @@ const collectors = {
404
575
  project_vision: collectProjectVision,
405
576
  project_conventions: collectProjectConventions,
406
577
  git_status: collectGitStatus,
578
+ conversation_history: collectConversationHistory,
407
579
  };
408
580
  /** Collector names derived from the runtime registry — used for consistency testing. */
409
581
  export const COLLECTOR_NAMES = Object.keys(collectors);
@@ -691,32 +863,120 @@ function formatInstructionsForPrompt(instructions) {
691
863
  const lines = instructions.map((i) => `- ${i.text}`).join("\n");
692
864
  return `\nOperating instructions from the user (follow these strictly):\n${lines}\n`;
693
865
  }
866
+ // =============================================================================
867
+ // Classification
868
+ // =============================================================================
869
+ export function parseVerdict(raw) {
870
+ const text = raw.trim();
871
+ if (text.startsWith("CLEAN"))
872
+ return { verdict: "clean" };
873
+ if (text.startsWith("NEW:")) {
874
+ const rest = text.slice(4);
875
+ const pipe = rest.indexOf("|");
876
+ if (pipe !== -1)
877
+ return { verdict: "new", newPattern: rest.slice(0, pipe).trim(), description: rest.slice(pipe + 1).trim() };
878
+ return { verdict: "new", newPattern: rest.trim(), description: rest.trim() };
879
+ }
880
+ if (text.startsWith("FLAG:"))
881
+ return { verdict: "flag", description: text.slice(5).trim() };
882
+ console.error(`[monitors] unrecognized verdict format: "${text.slice(0, 80)}"`);
883
+ return { verdict: "error", error: `Unrecognized verdict format: "${text.slice(0, 80)}"` };
884
+ }
885
+ export function parseModelSpec(spec) {
886
+ const slashIndex = spec.indexOf("/");
887
+ if (slashIndex !== -1) {
888
+ return { provider: spec.slice(0, slashIndex), modelId: spec.slice(slashIndex + 1) };
889
+ }
890
+ return { provider: "anthropic", modelId: spec };
891
+ }
892
+ /**
893
+ * Extract response text from LLM response parts, falling back to thinking
894
+ * block content when no text parts are present. Fixes issue-024 where
895
+ * models with thinking enabled place the entire verdict inside the thinking
896
+ * block, leaving text content empty.
897
+ */
898
+ export function extractResponseText(parts) {
899
+ const text = parts
900
+ .filter((b) => b.type === "text")
901
+ .map((b) => b.text)
902
+ .join("");
903
+ if (text.trim())
904
+ return text;
905
+ for (const part of parts) {
906
+ if (part.type === "thinking" && "thinking" in part)
907
+ return part.thinking;
908
+ }
909
+ return "";
910
+ }
911
+ /**
912
+ * Map a parsed JSON verdict object to a ClassifyResult.
913
+ * Handles case-insensitive verdict strings and optional fields.
914
+ */
915
+ export function mapVerdictToClassifyResult(parsed) {
916
+ const verdict = String(parsed.verdict).toUpperCase();
917
+ if (verdict === "CLEAN")
918
+ return { verdict: "clean" };
919
+ if (verdict === "FLAG")
920
+ return {
921
+ verdict: "flag",
922
+ description: String(parsed.description ?? ""),
923
+ severity: parsed.severity,
924
+ };
925
+ if (verdict === "NEW")
926
+ return {
927
+ verdict: "new",
928
+ description: String(parsed.description ?? ""),
929
+ newPattern: String(parsed.newPattern ?? parsed.description ?? ""),
930
+ severity: parsed.severity,
931
+ };
932
+ return { verdict: "error", error: `Unknown verdict: ${verdict}` };
933
+ }
694
934
  /**
695
- * Create a Nunjucks environment for monitor prompt templates.
696
- * Three-tier search: project monitors dir > user monitors dir > package examples.
935
+ * Create a merged Nunjucks template environment combining monitor search paths
936
+ * (for classify templates) with agent template search paths (for shared macros).
937
+ * Monitor paths take precedence.
697
938
  */
698
- function createMonitorTemplateEnv() {
699
- const projectDir = resolveProjectMonitorsDir();
700
- const userDir = path.join(os.homedir(), ".pi", "agent", "monitors");
939
+ function createMonitorAgentTemplateEnv(cwd) {
940
+ const projectMonitorsDir = resolveProjectMonitorsDir();
941
+ const userMonitorsDir = path.join(os.homedir(), ".pi", "agent", "monitors");
942
+ const projectTemplatesDir = path.join(cwd, ".pi", "templates");
943
+ const userTemplatesDir = path.join(os.homedir(), ".pi", "agent", "templates");
701
944
  const searchPaths = [];
702
- if (isDir(projectDir))
703
- searchPaths.push(projectDir);
704
- if (isDir(userDir))
705
- searchPaths.push(userDir);
945
+ // Monitor paths first — monitor templates take precedence
946
+ if (isDir(projectMonitorsDir))
947
+ searchPaths.push(projectMonitorsDir);
948
+ if (isDir(userMonitorsDir))
949
+ searchPaths.push(userMonitorsDir);
706
950
  if (isDir(EXAMPLES_DIR))
707
951
  searchPaths.push(EXAMPLES_DIR);
952
+ // Agent template paths — for shared macros and fallback
953
+ if (isDir(projectTemplatesDir))
954
+ searchPaths.push(projectTemplatesDir);
955
+ if (isDir(userTemplatesDir))
956
+ searchPaths.push(userTemplatesDir);
708
957
  const loader = searchPaths.length > 0 ? new nunjucks.FileSystemLoader(searchPaths) : undefined;
709
958
  return new nunjucks.Environment(loader, {
710
959
  autoescape: false,
711
960
  throwOnUndefined: false,
712
961
  });
713
962
  }
714
- /** Module-level template environment, initialized in extension entry point. */
715
- let monitorTemplateEnv;
716
- function renderClassifyPrompt(monitor, branch, extraContext) {
963
+ /** Module-level cached agent loader, populated at session_start. */
964
+ let cachedAgentLoader = null;
965
+ /** Module-level cached template environment for classify agent specs, populated at session_start. */
966
+ let cachedMonitorAgentEnv = null;
967
+ /**
968
+ * Classify via agent spec — the sole classify path.
969
+ * Loads the agent YAML, builds context from collectors, compiles via
970
+ * compileAgentSpec, calls complete() in-process, validates JSON verdict
971
+ * against outputSchema, falls back to parseVerdict() for robustness.
972
+ */
973
+ async function classifyViaAgent(ctx, monitor, branch, extraContext, signal) {
974
+ const agentName = monitor.classify.agent;
975
+ // Load agent spec (use session cache if available)
976
+ const loadAgent = cachedAgentLoader ?? createAgentLoader(process.cwd(), AGENTS_DIR);
977
+ const agentSpec = loadAgent(agentName);
978
+ // Build context: collectors + patterns + instructions + json_output
717
979
  const patterns = loadPatterns(monitor);
718
- if (patterns.length === 0)
719
- return null;
720
980
  const instructions = loadInstructions(monitor);
721
981
  const collected = {};
722
982
  for (const key of monitor.classify.context) {
@@ -724,71 +984,61 @@ function renderClassifyPrompt(monitor, branch, extraContext) {
724
984
  if (fn)
725
985
  collected[key] = fn(branch);
726
986
  else
727
- collected[key] = ""; // unknown collectors produce empty string (graceful degradation)
987
+ collected[key] = "";
728
988
  }
729
- const context = {
989
+ const templateContext = {
730
990
  patterns: formatPatternsForPrompt(patterns),
731
991
  instructions: formatInstructionsForPrompt(instructions),
732
992
  iteration: monitor.whileCount,
993
+ json_output: true,
733
994
  ...collected,
734
995
  ...(extraContext ?? {}),
735
996
  };
736
- if (monitor.classify.promptTemplate && monitorTemplateEnv) {
737
- // Nunjucks template file
738
- try {
739
- return monitorTemplateEnv.render(monitor.classify.promptTemplate, context);
740
- }
741
- catch (err) {
742
- const msg = err instanceof Error ? err.message : String(err);
743
- console.error(`[${monitor.name}] Template render failed (${monitor.classify.promptTemplate}): ${msg}`);
744
- // Fall through to inline prompt if available
745
- if (!monitor.classify.prompt)
746
- return null;
747
- }
748
- }
749
- // Fallback: inline string with {placeholder} replacement
750
- if (!monitor.classify.prompt)
751
- return null;
752
- return monitor.classify.prompt.replace(/\{(\w+)\}/g, (match, key) => {
753
- return String(context[key] ?? match);
754
- });
755
- }
756
- // =============================================================================
757
- // Classification
758
- // =============================================================================
759
- export function parseVerdict(raw) {
760
- const text = raw.trim();
761
- if (text.startsWith("CLEAN"))
762
- return { verdict: "clean" };
763
- if (text.startsWith("NEW:")) {
764
- const rest = text.slice(4);
765
- const pipe = rest.indexOf("|");
766
- if (pipe !== -1)
767
- return { verdict: "new", newPattern: rest.slice(0, pipe).trim(), description: rest.slice(pipe + 1).trim() };
768
- return { verdict: "new", newPattern: rest.trim(), description: rest.trim() };
769
- }
770
- if (text.startsWith("FLAG:"))
771
- return { verdict: "flag", description: text.slice(5).trim() };
772
- console.error(`[monitors] unrecognized verdict format, defaulting to CLEAN: "${text.slice(0, 80)}"`);
773
- return { verdict: "clean" };
774
- }
775
- export function parseModelSpec(spec) {
776
- const slashIndex = spec.indexOf("/");
777
- if (slashIndex !== -1) {
778
- return { provider: spec.slice(0, slashIndex), modelId: spec.slice(slashIndex + 1) };
779
- }
780
- return { provider: "anthropic", modelId: spec };
781
- }
782
- async function classifyPrompt(ctx, monitor, prompt, signal) {
783
- const { provider, modelId } = parseModelSpec(monitor.classify.model);
997
+ // Use session-cached template environment or create one
998
+ const mergedEnv = cachedMonitorAgentEnv ?? createMonitorAgentTemplateEnv(process.cwd());
999
+ const compiled = compileAgentSpec(agentSpec, templateContext, mergedEnv, process.cwd());
1000
+ // The task template is the compiled classify prompt
1001
+ const prompt = compiled.taskTemplate;
1002
+ if (!prompt)
1003
+ throw new Error(`Agent ${agentName}: compiled task template is empty`);
1004
+ // Resolve model from agent spec
1005
+ const modelSpec = compiled.model;
1006
+ if (!modelSpec)
1007
+ throw new Error(`Agent ${agentName}: no model specified`);
1008
+ const { provider, modelId } = parseModelSpec(modelSpec);
784
1009
  const model = ctx.modelRegistry.find(provider, modelId);
785
1010
  if (!model)
786
- throw new Error(`Model ${monitor.classify.model} not found`);
1011
+ throw new Error(`Model ${modelSpec} not found`);
787
1012
  const auth = await ctx.modelRegistry.getApiKeyAndHeaders(model);
788
1013
  if (!auth.ok)
789
1014
  throw new Error(auth.error);
790
- const response = await complete(model, { messages: [{ role: "user", content: [{ type: "text", text: prompt }], timestamp: Date.now() }] }, { apiKey: auth.apiKey, headers: auth.headers, maxTokens: 150, signal });
791
- return parseVerdict(extractText(response.content));
1015
+ // Determine thinking from agent spec
1016
+ const thinkingEnabled = compiled.thinking === "on" || compiled.thinking === "true";
1017
+ const response = await complete(model, {
1018
+ messages: [{ role: "user", content: [{ type: "text", text: prompt }], timestamp: Date.now() }],
1019
+ tools: [VERDICT_TOOL],
1020
+ }, {
1021
+ apiKey: auth.apiKey,
1022
+ headers: auth.headers,
1023
+ maxTokens: 300,
1024
+ signal,
1025
+ thinkingEnabled,
1026
+ effort: "low",
1027
+ toolChoice: { type: "tool", name: "classify_verdict" },
1028
+ });
1029
+ const toolCall = response.content.find((c) => c.type === "toolCall");
1030
+ if (!toolCall) {
1031
+ return { verdict: "error", error: "Model did not produce a tool call response" };
1032
+ }
1033
+ const parsed = toolCall.arguments;
1034
+ // Validate against verdict schema if the agent spec declares one
1035
+ if (compiled.outputSchema) {
1036
+ const schemaPath = path.isAbsolute(compiled.outputSchema)
1037
+ ? compiled.outputSchema
1038
+ : path.resolve(AGENTS_DIR, compiled.outputSchema);
1039
+ validateFromFile(schemaPath, parsed, `verdict for monitor '${monitor.name}'`);
1040
+ }
1041
+ return mapVerdictToClassifyResult(parsed);
792
1042
  }
793
1043
  // =============================================================================
794
1044
  // Pattern learning (JSON)
@@ -921,49 +1171,8 @@ export async function invokeMonitor(name, context) {
921
1171
  const patterns = loadPatterns(monitor);
922
1172
  if (patterns.length === 0)
923
1173
  return { verdict: "clean" };
924
- const instructions = loadInstructions(monitor);
925
- // Build context: collectors + caller-supplied overrides
926
- const collected = {};
927
1174
  const branch = invokeCtx.sessionManager.getBranch();
928
- for (const key of monitor.classify.context) {
929
- const fn = collectors[key];
930
- if (fn)
931
- collected[key] = fn(branch);
932
- else
933
- collected[key] = "";
934
- }
935
- if (context) {
936
- for (const [key, value] of Object.entries(context)) {
937
- collected[key] = value;
938
- }
939
- }
940
- const templateContext = {
941
- patterns: formatPatternsForPrompt(patterns),
942
- instructions: formatInstructionsForPrompt(instructions),
943
- iteration: 0,
944
- ...collected,
945
- };
946
- // Render prompt (same logic as renderClassifyPrompt but with injected context)
947
- let prompt = null;
948
- if (monitor.classify.promptTemplate && monitorTemplateEnv) {
949
- try {
950
- prompt = monitorTemplateEnv.render(monitor.classify.promptTemplate, templateContext);
951
- }
952
- catch (err) {
953
- const msg = err instanceof Error ? err.message : String(err);
954
- console.error(`[${monitor.name}] Template render failed (${monitor.classify.promptTemplate}): ${msg}`);
955
- if (!monitor.classify.prompt)
956
- throw new Error(`Template render failed and no inline prompt fallback: ${msg}`);
957
- }
958
- }
959
- if (!prompt && monitor.classify.prompt) {
960
- prompt = monitor.classify.prompt.replace(/\{(\w+)\}/g, (match, key) => {
961
- return String(templateContext[key] ?? match);
962
- });
963
- }
964
- if (!prompt)
965
- return { verdict: "clean" };
966
- const result = await classifyPrompt(invokeCtx, monitor, prompt);
1175
+ const result = await classifyViaAgent(invokeCtx, monitor, branch, context);
967
1176
  // Execute write actions (findings files) based on verdict
968
1177
  if (result.verdict === "clean") {
969
1178
  const cleanAction = monitor.actions.on_clean;
@@ -1006,9 +1215,6 @@ async function activate(monitor, pi, ctx, branch, steeredThisTurn, updateStatus,
1006
1215
  updateStatus();
1007
1216
  return;
1008
1217
  }
1009
- const prompt = renderClassifyPrompt(monitor, branch);
1010
- if (!prompt)
1011
- return;
1012
1218
  // Backoff: skip classification if this monitor has failed repeatedly
1013
1219
  if (monitor.classifySkipRemaining > 0) {
1014
1220
  monitor.classifySkipRemaining--;
@@ -1016,7 +1222,7 @@ async function activate(monitor, pi, ctx, branch, steeredThisTurn, updateStatus,
1016
1222
  }
1017
1223
  let result;
1018
1224
  try {
1019
- result = await classifyPrompt(ctx, monitor, prompt);
1225
+ result = await classifyViaAgent(ctx, monitor, branch, undefined, undefined);
1020
1226
  }
1021
1227
  catch (e) {
1022
1228
  const message = e instanceof Error ? e.message : String(e);
@@ -1053,6 +1259,16 @@ async function activate(monitor, pi, ctx, branch, steeredThisTurn, updateStatus,
1053
1259
  updateStatus();
1054
1260
  return;
1055
1261
  }
1262
+ if (result.verdict === "error") {
1263
+ if (ctx.hasUI) {
1264
+ ctx.ui.notify(`[${monitor.name}] classify failed: ${result.error}`, "warning");
1265
+ }
1266
+ else {
1267
+ console.error(`[${monitor.name}] classify failed: ${result.error}`);
1268
+ }
1269
+ updateStatus();
1270
+ return;
1271
+ }
1056
1272
  // Determine which action to execute
1057
1273
  const action = result.verdict === "new" ? monitor.actions.on_new : monitor.actions.on_flag;
1058
1274
  if (!action)
@@ -1075,7 +1291,7 @@ async function activate(monitor, pi, ctx, branch, steeredThisTurn, updateStatus,
1075
1291
  severity: result.severity ?? "warning",
1076
1292
  monitor_name: monitor.name,
1077
1293
  };
1078
- const renderedSteer = monitorTemplateEnv ? nunjucks.renderString(action.steer, steerContext) : action.steer;
1294
+ const renderedSteer = nunjucks.renderString(action.steer, steerContext);
1079
1295
  const details = {
1080
1296
  monitorName: monitor.name,
1081
1297
  verdict: result.verdict,
@@ -1141,8 +1357,6 @@ export default function (pi) {
1141
1357
  loadedMonitors = monitors;
1142
1358
  if (monitors.length === 0)
1143
1359
  return;
1144
- // Initialize Nunjucks template environment for monitor prompt templates
1145
- monitorTemplateEnv = createMonitorTemplateEnv();
1146
1360
  let statusCtx;
1147
1361
  function updateStatus() {
1148
1362
  if (!statusCtx?.hasUI)
@@ -1193,6 +1407,9 @@ export default function (pi) {
1193
1407
  monitorsEnabled = true;
1194
1408
  pendingAgentEndSteers = [];
1195
1409
  projectDirMissingLogged = false;
1410
+ // Cache agent loader and template environment for classify calls
1411
+ cachedAgentLoader = createAgentLoader(process.cwd(), AGENTS_DIR);
1412
+ cachedMonitorAgentEnv = createMonitorAgentTemplateEnv(process.cwd());
1196
1413
  updateStatus();
1197
1414
  }
1198
1415
  catch {
@@ -1250,7 +1467,7 @@ export default function (pi) {
1250
1467
  when: monitor.when,
1251
1468
  scope: monitor.scope,
1252
1469
  classify: {
1253
- model: monitor.classify.model,
1470
+ agent: monitor.classify.agent,
1254
1471
  context: monitor.classify.context,
1255
1472
  excludes: monitor.classify.excludes,
1256
1473
  },
@@ -1567,15 +1784,9 @@ export default function (pi) {
1567
1784
  continue;
1568
1785
  }
1569
1786
  // Build pending tool call context for template injection.
1570
- // Branch-based collectors (user_text, tool_calls, etc.) are still
1571
- // collected inside renderClassifyPrompt from the branch parameter.
1572
1787
  const toolContext = `Pending tool call:\nTool: ${ev.toolName}\nArguments: ${JSON.stringify(ev.input, null, 2).slice(0, 2000)}`;
1573
- // Render classify prompt with tool context injected as extra template variable
1574
- const prompt = renderClassifyPrompt(m, branch, { tool_call_context: toolContext });
1575
- if (!prompt)
1576
- continue;
1577
1788
  try {
1578
- const result = await classifyPrompt(ctx, m, prompt);
1789
+ const result = await classifyViaAgent(ctx, m, branch, { tool_call_context: toolContext });
1579
1790
  // Reset failure counter on success
1580
1791
  m.classifyFailures = 0;
1581
1792
  if (result.verdict === "flag" || result.verdict === "new") {