@davidorex/pi-behavior-monitors 0.12.0 → 0.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents/commit-hygiene-classifier.agent.yaml +11 -0
- package/agents/fragility-classifier.agent.yaml +11 -0
- package/agents/hedge-classifier.agent.yaml +11 -0
- package/agents/unauthorized-action-classifier.agent.yaml +11 -0
- package/agents/work-quality-classifier.agent.yaml +11 -0
- package/dist/index.d.ts +28 -6
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +343 -132
- package/dist/index.js.map +1 -1
- package/examples/commit-hygiene/classify.md +4 -3
- package/examples/commit-hygiene.monitor.json +1 -3
- package/examples/fragility/classify.md +4 -6
- package/examples/fragility.monitor.json +1 -3
- package/examples/hedge/classify.md +17 -8
- package/examples/hedge.monitor.json +2 -4
- package/examples/unauthorized-action/classify.md +4 -3
- package/examples/unauthorized-action.monitor.json +1 -3
- package/examples/work-quality/classify.md +4 -5
- package/examples/work-quality.monitor.json +1 -3
- package/package.json +4 -2
- package/schemas/monitor.schema.json +3 -11
- package/schemas/verdict.schema.json +14 -0
- package/skills/pi-behavior-monitors/SKILL.md +5 -4
- package/skills/pi-behavior-monitors/references/bundled-resources.md +10 -1
package/dist/index.js
CHANGED
|
@@ -13,6 +13,9 @@ import * as os from "node:os";
|
|
|
13
13
|
import * as path from "node:path";
|
|
14
14
|
import { fileURLToPath } from "node:url";
|
|
15
15
|
import { readBlock } from "@davidorex/pi-project/block-api";
|
|
16
|
+
import { validateFromFile } from "@davidorex/pi-project/schema-validator";
|
|
17
|
+
import { createAgentLoader } from "@davidorex/pi-workflows/agent-spec";
|
|
18
|
+
import { compileAgentSpec } from "@davidorex/pi-workflows/step-shared";
|
|
16
19
|
import { complete, StringEnum } from "@mariozechner/pi-ai";
|
|
17
20
|
import { getAgentDir } from "@mariozechner/pi-coding-agent";
|
|
18
21
|
import { Box, Text } from "@mariozechner/pi-tui";
|
|
@@ -20,6 +23,18 @@ import { Type } from "@sinclair/typebox";
|
|
|
20
23
|
import nunjucks from "nunjucks";
|
|
21
24
|
const EXTENSION_DIR = path.dirname(fileURLToPath(import.meta.url));
|
|
22
25
|
const EXAMPLES_DIR = path.join(EXTENSION_DIR, "..", "examples");
|
|
26
|
+
const AGENTS_DIR = path.join(EXTENSION_DIR, "..", "agents");
|
|
27
|
+
/** Tool definition for forcing structured verdict output from the classify LLM call. */
|
|
28
|
+
const VERDICT_TOOL = {
|
|
29
|
+
name: "classify_verdict",
|
|
30
|
+
description: "Output the monitor classification verdict",
|
|
31
|
+
parameters: Type.Object({
|
|
32
|
+
verdict: Type.String({ description: "Classification result: CLEAN, FLAG, or NEW" }),
|
|
33
|
+
description: Type.Optional(Type.String({ description: "One-sentence explanation (required for FLAG/NEW)" })),
|
|
34
|
+
newPattern: Type.Optional(Type.String({ description: "Pattern to learn (required for NEW)" })),
|
|
35
|
+
severity: Type.Optional(Type.String({ description: "Issue severity: info, warning, or critical" })),
|
|
36
|
+
}),
|
|
37
|
+
};
|
|
23
38
|
export const COLLECTOR_DESCRIPTORS = [
|
|
24
39
|
{ name: "user_text", description: "Most recent user message text" },
|
|
25
40
|
{ name: "assistant_text", description: "Most recent assistant message text" },
|
|
@@ -33,6 +48,11 @@ export const COLLECTOR_DESCRIPTORS = [
|
|
|
33
48
|
{ name: "project_vision", description: ".project/project.json vision, core_value, name" },
|
|
34
49
|
{ name: "project_conventions", description: ".project/conformance-reference.json principle names" },
|
|
35
50
|
{ name: "git_status", description: "Output of git status --porcelain", limits: "5s timeout" },
|
|
51
|
+
{
|
|
52
|
+
name: "conversation_history",
|
|
53
|
+
description: "Prior turn summaries (user request + actions + assistant response)",
|
|
54
|
+
limits: "1-3 turns adaptive, 2000 char max",
|
|
55
|
+
},
|
|
36
56
|
];
|
|
37
57
|
export const WHEN_CONDITIONS = [
|
|
38
58
|
{ name: "always", description: "Fire every time the event occurs", parameterized: false },
|
|
@@ -58,7 +78,7 @@ export const WHEN_CONDITIONS = [
|
|
|
58
78
|
parameterized: true,
|
|
59
79
|
},
|
|
60
80
|
];
|
|
61
|
-
export const VERDICT_TYPES = ["clean", "flag", "new"];
|
|
81
|
+
export const VERDICT_TYPES = ["clean", "flag", "new", "error"];
|
|
62
82
|
export const SCOPE_TARGETS = ["main", "subagent", "all", "workflow"];
|
|
63
83
|
export const VALID_EVENTS = new Set(["message_end", "turn_end", "agent_end", "command", "tool_call"]);
|
|
64
84
|
function isValidEvent(event) {
|
|
@@ -67,7 +87,7 @@ function isValidEvent(event) {
|
|
|
67
87
|
// =============================================================================
|
|
68
88
|
// Discovery
|
|
69
89
|
// =============================================================================
|
|
70
|
-
function discoverMonitors() {
|
|
90
|
+
export function discoverMonitors() {
|
|
71
91
|
const dirs = [];
|
|
72
92
|
// project-local
|
|
73
93
|
let cwd = process.cwd();
|
|
@@ -77,6 +97,9 @@ function discoverMonitors() {
|
|
|
77
97
|
dirs.push(candidate);
|
|
78
98
|
break;
|
|
79
99
|
}
|
|
100
|
+
// Stop at project root (.git boundary) — don't traverse into user home config
|
|
101
|
+
if (isDir(path.join(cwd, ".git")))
|
|
102
|
+
break;
|
|
80
103
|
const parent = path.dirname(cwd);
|
|
81
104
|
if (parent === cwd)
|
|
82
105
|
break;
|
|
@@ -138,8 +161,8 @@ function parseMonitorJson(filePath, dir) {
|
|
|
138
161
|
return null;
|
|
139
162
|
}
|
|
140
163
|
const classify = spec.classify;
|
|
141
|
-
if (!classify?.
|
|
142
|
-
console.error(`[${name}] Missing classify.
|
|
164
|
+
if (!classify?.agent || typeof classify.agent !== "string") {
|
|
165
|
+
console.error(`[${name}] Missing classify.agent — all monitors require an agent spec`);
|
|
143
166
|
return null;
|
|
144
167
|
}
|
|
145
168
|
const patternsSpec = spec.patterns;
|
|
@@ -157,11 +180,9 @@ function parseMonitorJson(filePath, dir) {
|
|
|
157
180
|
when: String(spec.when ?? "always"),
|
|
158
181
|
scope: scope ?? { target: "main" },
|
|
159
182
|
classify: {
|
|
160
|
-
model: classify.model ?? "claude-sonnet-4-20250514",
|
|
161
183
|
context: Array.isArray(classify.context) ? classify.context : ["tool_results", "assistant_text"],
|
|
162
184
|
excludes: Array.isArray(classify.excludes) ? classify.excludes : [],
|
|
163
|
-
|
|
164
|
-
promptTemplate: typeof classify.promptTemplate === "string" ? classify.promptTemplate : undefined,
|
|
185
|
+
agent: classify.agent,
|
|
165
186
|
},
|
|
166
187
|
patterns: {
|
|
167
188
|
path: patternsSpec.path,
|
|
@@ -190,12 +211,15 @@ function parseMonitorJson(filePath, dir) {
|
|
|
190
211
|
// =============================================================================
|
|
191
212
|
// Example seeding
|
|
192
213
|
// =============================================================================
|
|
193
|
-
function resolveProjectMonitorsDir() {
|
|
214
|
+
export function resolveProjectMonitorsDir() {
|
|
194
215
|
let cwd = process.cwd();
|
|
195
216
|
while (true) {
|
|
196
217
|
const piDir = path.join(cwd, ".pi");
|
|
197
218
|
if (isDir(piDir))
|
|
198
219
|
return path.join(piDir, "monitors");
|
|
220
|
+
// Stop at project root (.git boundary) — don't traverse into user home config
|
|
221
|
+
if (isDir(path.join(cwd, ".git")))
|
|
222
|
+
break;
|
|
199
223
|
const parent = path.dirname(cwd);
|
|
200
224
|
if (parent === cwd)
|
|
201
225
|
break;
|
|
@@ -351,6 +375,153 @@ function collectCustomMessages(branch) {
|
|
|
351
375
|
}
|
|
352
376
|
return msgs.join("\n");
|
|
353
377
|
}
|
|
378
|
+
// -- conversation_history collector ------------------------------------------
|
|
379
|
+
const BACKREFERENCE_PATTERNS = [
|
|
380
|
+
/\bas\s+(i|we)\s+(said|mentioned|described|asked|requested|specified)/i,
|
|
381
|
+
/\b(earlier|previously|before|original|initial|first)\b/i,
|
|
382
|
+
/\bgo\s+back\s+to\b/i,
|
|
383
|
+
/\bsame\s+(thing|as|way)\b/i,
|
|
384
|
+
/\blike\s+(you|i)\s+(did|said|asked)\b/i,
|
|
385
|
+
/\b(continue|keep\s+going|proceed|carry\s+on)\b/i,
|
|
386
|
+
/\b(do|run|try)\s+(that|it|this)\s+(again|once\s+more)\b/i,
|
|
387
|
+
/\bre-?(output|generate|create|do|run|build|make)\b/i,
|
|
388
|
+
];
|
|
389
|
+
const AFFIRMATION_PATTERN = /^\s*(yes|yeah|yep|correct|exactly|right|ok|okay|sure|please|go|do it|proceed)\s*[.!]?\s*$/i;
|
|
390
|
+
const ACTION_VERBS = /\b(create|write|build|implement|add|fix|update|delete|remove|refactor|test|deploy|install|configure|set up|generate)\b/i;
|
|
391
|
+
/**
|
|
392
|
+
* Detect whether the current user message references prior conversation context
|
|
393
|
+
* via backreferences, affirmations, or short messages without action verbs.
|
|
394
|
+
* Exported for testing.
|
|
395
|
+
*/
|
|
396
|
+
export function isReferentialMessage(text) {
|
|
397
|
+
const hasBackref = BACKREFERENCE_PATTERNS.some((re) => re.test(text));
|
|
398
|
+
const isAffirmation = AFFIRMATION_PATTERN.test(text);
|
|
399
|
+
const isShortNoAction = text.length < 80 && !ACTION_VERBS.test(text);
|
|
400
|
+
return hasBackref || isAffirmation || isShortNoAction;
|
|
401
|
+
}
|
|
402
|
+
function summarizeTurnTools(turnEntries) {
|
|
403
|
+
const toolMap = new Map();
|
|
404
|
+
for (const entry of turnEntries) {
|
|
405
|
+
if (!isMessageEntry(entry))
|
|
406
|
+
continue;
|
|
407
|
+
const msg = entry.message;
|
|
408
|
+
if (msg.role === "assistant") {
|
|
409
|
+
for (const part of msg.content) {
|
|
410
|
+
if (part.type === "toolCall") {
|
|
411
|
+
const existing = toolMap.get(part.name);
|
|
412
|
+
if (existing) {
|
|
413
|
+
existing.count++;
|
|
414
|
+
}
|
|
415
|
+
else {
|
|
416
|
+
toolMap.set(part.name, { count: 1, errors: 0 });
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
if (msg.role === "toolResult" && msg.isError) {
|
|
422
|
+
const existing = toolMap.get(msg.toolName);
|
|
423
|
+
if (existing) {
|
|
424
|
+
existing.errors++;
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
if (toolMap.size === 0)
|
|
429
|
+
return "[no tools]";
|
|
430
|
+
const parts = [];
|
|
431
|
+
for (const [name, stats] of toolMap) {
|
|
432
|
+
if (stats.errors > 0) {
|
|
433
|
+
parts.push(`${name}(${stats.count}, ${stats.errors} error${stats.errors > 1 ? "s" : ""})`);
|
|
434
|
+
}
|
|
435
|
+
else {
|
|
436
|
+
parts.push(`${name}(${stats.count})`);
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
return parts.join(", ");
|
|
440
|
+
}
|
|
441
|
+
function truncShort(text, max) {
|
|
442
|
+
return text.length <= max ? text : `${text.slice(0, max)}…`;
|
|
443
|
+
}
|
|
444
|
+
export function collectConversationHistory(branch) {
|
|
445
|
+
// Step A — Segment turns by finding user message indices
|
|
446
|
+
const userIndices = [];
|
|
447
|
+
for (let i = 0; i < branch.length; i++) {
|
|
448
|
+
const entry = branch[i];
|
|
449
|
+
if (isMessageEntry(entry) && entry.message.role === "user") {
|
|
450
|
+
userIndices.push(i);
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
// Need at least 2 user messages (current + 1 prior) for history
|
|
454
|
+
if (userIndices.length < 2)
|
|
455
|
+
return "";
|
|
456
|
+
// Step B — Determine window size from current user text
|
|
457
|
+
const currentUserText = collectUserText(branch);
|
|
458
|
+
const referential = isReferentialMessage(currentUserText);
|
|
459
|
+
const maxTurns = referential ? 3 : 1;
|
|
460
|
+
// Prior turns are all user-message-initiated segments except the last one
|
|
461
|
+
const priorTurnCount = userIndices.length - 1;
|
|
462
|
+
const turnsToInclude = Math.min(maxTurns, priorTurnCount);
|
|
463
|
+
// Take the last N prior turns (skip current turn which is the last userIndex)
|
|
464
|
+
const startTurnIdx = priorTurnCount - turnsToInclude;
|
|
465
|
+
// Step C — Summarize prior turns
|
|
466
|
+
const turnSummaries = [];
|
|
467
|
+
for (let t = startTurnIdx; t < priorTurnCount; t++) {
|
|
468
|
+
const turnStart = userIndices[t];
|
|
469
|
+
const turnEnd = userIndices[t + 1]; // next user message starts the next turn
|
|
470
|
+
const turnEntries = branch.slice(turnStart, turnEnd);
|
|
471
|
+
// User text from the first entry of the turn
|
|
472
|
+
const firstEntry = turnEntries[0];
|
|
473
|
+
const userText = isMessageEntry(firstEntry) && firstEntry.message.role === "user"
|
|
474
|
+
? extractUserText(firstEntry.message.content)
|
|
475
|
+
: "";
|
|
476
|
+
// Actions
|
|
477
|
+
const actions = summarizeTurnTools(turnEntries);
|
|
478
|
+
// Assistant conclusion: last assistant message in turn with text content
|
|
479
|
+
let assistantConclusion = "[tool actions only]";
|
|
480
|
+
for (let i = turnEntries.length - 1; i >= 0; i--) {
|
|
481
|
+
const e = turnEntries[i];
|
|
482
|
+
if (isMessageEntry(e) && e.message.role === "assistant") {
|
|
483
|
+
const text = extractText(e.message.content);
|
|
484
|
+
if (text.trim()) {
|
|
485
|
+
assistantConclusion = truncShort(text.trim(), 200);
|
|
486
|
+
break;
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
turnSummaries.push(`--- Prior turn ---\nUser: "${truncShort(userText, 200)}"\nActions: ${actions}\nAssistant: "${assistantConclusion}"`);
|
|
491
|
+
}
|
|
492
|
+
if (turnSummaries.length === 0)
|
|
493
|
+
return "";
|
|
494
|
+
// Step D & E — Format and enforce budget
|
|
495
|
+
let result = turnSummaries.join("\n\n");
|
|
496
|
+
while (result.length > TRUNCATE && turnSummaries.length > 1) {
|
|
497
|
+
turnSummaries.shift(); // drop oldest
|
|
498
|
+
result = turnSummaries.join("\n\n");
|
|
499
|
+
}
|
|
500
|
+
// If single turn still exceeds budget, truncate user and assistant text
|
|
501
|
+
if (result.length > TRUNCATE && turnSummaries.length === 1) {
|
|
502
|
+
const firstEntry = branch[userIndices[startTurnIdx]];
|
|
503
|
+
const userText = isMessageEntry(firstEntry) && firstEntry.message.role === "user"
|
|
504
|
+
? extractUserText(firstEntry.message.content)
|
|
505
|
+
: "";
|
|
506
|
+
const turnStart = userIndices[startTurnIdx];
|
|
507
|
+
const turnEnd = userIndices[startTurnIdx + 1];
|
|
508
|
+
const turnEntries = branch.slice(turnStart, turnEnd);
|
|
509
|
+
const actions = summarizeTurnTools(turnEntries);
|
|
510
|
+
let assistantConclusion = "[tool actions only]";
|
|
511
|
+
for (let i = turnEntries.length - 1; i >= 0; i--) {
|
|
512
|
+
const e = turnEntries[i];
|
|
513
|
+
if (isMessageEntry(e) && e.message.role === "assistant") {
|
|
514
|
+
const text = extractText(e.message.content);
|
|
515
|
+
if (text.trim()) {
|
|
516
|
+
assistantConclusion = truncShort(text.trim(), 100);
|
|
517
|
+
break;
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
result = `--- Prior turn ---\nUser: "${truncShort(userText, 100)}"\nActions: ${actions}\nAssistant: "${assistantConclusion}"`;
|
|
522
|
+
}
|
|
523
|
+
return result;
|
|
524
|
+
}
|
|
354
525
|
function collectProjectVision(_branch) {
|
|
355
526
|
try {
|
|
356
527
|
const raw = readBlock(process.cwd(), "project");
|
|
@@ -404,6 +575,7 @@ const collectors = {
|
|
|
404
575
|
project_vision: collectProjectVision,
|
|
405
576
|
project_conventions: collectProjectConventions,
|
|
406
577
|
git_status: collectGitStatus,
|
|
578
|
+
conversation_history: collectConversationHistory,
|
|
407
579
|
};
|
|
408
580
|
/** Collector names derived from the runtime registry — used for consistency testing. */
|
|
409
581
|
export const COLLECTOR_NAMES = Object.keys(collectors);
|
|
@@ -691,32 +863,120 @@ function formatInstructionsForPrompt(instructions) {
|
|
|
691
863
|
const lines = instructions.map((i) => `- ${i.text}`).join("\n");
|
|
692
864
|
return `\nOperating instructions from the user (follow these strictly):\n${lines}\n`;
|
|
693
865
|
}
|
|
866
|
+
// =============================================================================
|
|
867
|
+
// Classification
|
|
868
|
+
// =============================================================================
|
|
869
|
+
export function parseVerdict(raw) {
|
|
870
|
+
const text = raw.trim();
|
|
871
|
+
if (text.startsWith("CLEAN"))
|
|
872
|
+
return { verdict: "clean" };
|
|
873
|
+
if (text.startsWith("NEW:")) {
|
|
874
|
+
const rest = text.slice(4);
|
|
875
|
+
const pipe = rest.indexOf("|");
|
|
876
|
+
if (pipe !== -1)
|
|
877
|
+
return { verdict: "new", newPattern: rest.slice(0, pipe).trim(), description: rest.slice(pipe + 1).trim() };
|
|
878
|
+
return { verdict: "new", newPattern: rest.trim(), description: rest.trim() };
|
|
879
|
+
}
|
|
880
|
+
if (text.startsWith("FLAG:"))
|
|
881
|
+
return { verdict: "flag", description: text.slice(5).trim() };
|
|
882
|
+
console.error(`[monitors] unrecognized verdict format: "${text.slice(0, 80)}"`);
|
|
883
|
+
return { verdict: "error", error: `Unrecognized verdict format: "${text.slice(0, 80)}"` };
|
|
884
|
+
}
|
|
885
|
+
export function parseModelSpec(spec) {
|
|
886
|
+
const slashIndex = spec.indexOf("/");
|
|
887
|
+
if (slashIndex !== -1) {
|
|
888
|
+
return { provider: spec.slice(0, slashIndex), modelId: spec.slice(slashIndex + 1) };
|
|
889
|
+
}
|
|
890
|
+
return { provider: "anthropic", modelId: spec };
|
|
891
|
+
}
|
|
892
|
+
/**
|
|
893
|
+
* Extract response text from LLM response parts, falling back to thinking
|
|
894
|
+
* block content when no text parts are present. Fixes issue-024 where
|
|
895
|
+
* models with thinking enabled place the entire verdict inside the thinking
|
|
896
|
+
* block, leaving text content empty.
|
|
897
|
+
*/
|
|
898
|
+
export function extractResponseText(parts) {
|
|
899
|
+
const text = parts
|
|
900
|
+
.filter((b) => b.type === "text")
|
|
901
|
+
.map((b) => b.text)
|
|
902
|
+
.join("");
|
|
903
|
+
if (text.trim())
|
|
904
|
+
return text;
|
|
905
|
+
for (const part of parts) {
|
|
906
|
+
if (part.type === "thinking" && "thinking" in part)
|
|
907
|
+
return part.thinking;
|
|
908
|
+
}
|
|
909
|
+
return "";
|
|
910
|
+
}
|
|
911
|
+
/**
|
|
912
|
+
* Map a parsed JSON verdict object to a ClassifyResult.
|
|
913
|
+
* Handles case-insensitive verdict strings and optional fields.
|
|
914
|
+
*/
|
|
915
|
+
export function mapVerdictToClassifyResult(parsed) {
|
|
916
|
+
const verdict = String(parsed.verdict).toUpperCase();
|
|
917
|
+
if (verdict === "CLEAN")
|
|
918
|
+
return { verdict: "clean" };
|
|
919
|
+
if (verdict === "FLAG")
|
|
920
|
+
return {
|
|
921
|
+
verdict: "flag",
|
|
922
|
+
description: String(parsed.description ?? ""),
|
|
923
|
+
severity: parsed.severity,
|
|
924
|
+
};
|
|
925
|
+
if (verdict === "NEW")
|
|
926
|
+
return {
|
|
927
|
+
verdict: "new",
|
|
928
|
+
description: String(parsed.description ?? ""),
|
|
929
|
+
newPattern: String(parsed.newPattern ?? parsed.description ?? ""),
|
|
930
|
+
severity: parsed.severity,
|
|
931
|
+
};
|
|
932
|
+
return { verdict: "error", error: `Unknown verdict: ${verdict}` };
|
|
933
|
+
}
|
|
694
934
|
/**
|
|
695
|
-
* Create a Nunjucks environment
|
|
696
|
-
*
|
|
935
|
+
* Create a merged Nunjucks template environment combining monitor search paths
|
|
936
|
+
* (for classify templates) with agent template search paths (for shared macros).
|
|
937
|
+
* Monitor paths take precedence.
|
|
697
938
|
*/
|
|
698
|
-
function
|
|
699
|
-
const
|
|
700
|
-
const
|
|
939
|
+
function createMonitorAgentTemplateEnv(cwd) {
|
|
940
|
+
const projectMonitorsDir = resolveProjectMonitorsDir();
|
|
941
|
+
const userMonitorsDir = path.join(os.homedir(), ".pi", "agent", "monitors");
|
|
942
|
+
const projectTemplatesDir = path.join(cwd, ".pi", "templates");
|
|
943
|
+
const userTemplatesDir = path.join(os.homedir(), ".pi", "agent", "templates");
|
|
701
944
|
const searchPaths = [];
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
945
|
+
// Monitor paths first — monitor templates take precedence
|
|
946
|
+
if (isDir(projectMonitorsDir))
|
|
947
|
+
searchPaths.push(projectMonitorsDir);
|
|
948
|
+
if (isDir(userMonitorsDir))
|
|
949
|
+
searchPaths.push(userMonitorsDir);
|
|
706
950
|
if (isDir(EXAMPLES_DIR))
|
|
707
951
|
searchPaths.push(EXAMPLES_DIR);
|
|
952
|
+
// Agent template paths — for shared macros and fallback
|
|
953
|
+
if (isDir(projectTemplatesDir))
|
|
954
|
+
searchPaths.push(projectTemplatesDir);
|
|
955
|
+
if (isDir(userTemplatesDir))
|
|
956
|
+
searchPaths.push(userTemplatesDir);
|
|
708
957
|
const loader = searchPaths.length > 0 ? new nunjucks.FileSystemLoader(searchPaths) : undefined;
|
|
709
958
|
return new nunjucks.Environment(loader, {
|
|
710
959
|
autoescape: false,
|
|
711
960
|
throwOnUndefined: false,
|
|
712
961
|
});
|
|
713
962
|
}
|
|
714
|
-
/** Module-level
|
|
715
|
-
let
|
|
716
|
-
|
|
963
|
+
/** Module-level cached agent loader, populated at session_start. */
|
|
964
|
+
let cachedAgentLoader = null;
|
|
965
|
+
/** Module-level cached template environment for classify agent specs, populated at session_start. */
|
|
966
|
+
let cachedMonitorAgentEnv = null;
|
|
967
|
+
/**
|
|
968
|
+
* Classify via agent spec — the sole classify path.
|
|
969
|
+
* Loads the agent YAML, builds context from collectors, compiles via
|
|
970
|
+
* compileAgentSpec, calls complete() in-process, validates JSON verdict
|
|
971
|
+
* against outputSchema, falls back to parseVerdict() for robustness.
|
|
972
|
+
*/
|
|
973
|
+
async function classifyViaAgent(ctx, monitor, branch, extraContext, signal) {
|
|
974
|
+
const agentName = monitor.classify.agent;
|
|
975
|
+
// Load agent spec (use session cache if available)
|
|
976
|
+
const loadAgent = cachedAgentLoader ?? createAgentLoader(process.cwd(), AGENTS_DIR);
|
|
977
|
+
const agentSpec = loadAgent(agentName);
|
|
978
|
+
// Build context: collectors + patterns + instructions + json_output
|
|
717
979
|
const patterns = loadPatterns(monitor);
|
|
718
|
-
if (patterns.length === 0)
|
|
719
|
-
return null;
|
|
720
980
|
const instructions = loadInstructions(monitor);
|
|
721
981
|
const collected = {};
|
|
722
982
|
for (const key of monitor.classify.context) {
|
|
@@ -724,71 +984,61 @@ function renderClassifyPrompt(monitor, branch, extraContext) {
|
|
|
724
984
|
if (fn)
|
|
725
985
|
collected[key] = fn(branch);
|
|
726
986
|
else
|
|
727
|
-
collected[key] = "";
|
|
987
|
+
collected[key] = "";
|
|
728
988
|
}
|
|
729
|
-
const
|
|
989
|
+
const templateContext = {
|
|
730
990
|
patterns: formatPatternsForPrompt(patterns),
|
|
731
991
|
instructions: formatInstructionsForPrompt(instructions),
|
|
732
992
|
iteration: monitor.whileCount,
|
|
993
|
+
json_output: true,
|
|
733
994
|
...collected,
|
|
734
995
|
...(extraContext ?? {}),
|
|
735
996
|
};
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
}
|
|
749
|
-
// Fallback: inline string with {placeholder} replacement
|
|
750
|
-
if (!monitor.classify.prompt)
|
|
751
|
-
return null;
|
|
752
|
-
return monitor.classify.prompt.replace(/\{(\w+)\}/g, (match, key) => {
|
|
753
|
-
return String(context[key] ?? match);
|
|
754
|
-
});
|
|
755
|
-
}
|
|
756
|
-
// =============================================================================
|
|
757
|
-
// Classification
|
|
758
|
-
// =============================================================================
|
|
759
|
-
export function parseVerdict(raw) {
|
|
760
|
-
const text = raw.trim();
|
|
761
|
-
if (text.startsWith("CLEAN"))
|
|
762
|
-
return { verdict: "clean" };
|
|
763
|
-
if (text.startsWith("NEW:")) {
|
|
764
|
-
const rest = text.slice(4);
|
|
765
|
-
const pipe = rest.indexOf("|");
|
|
766
|
-
if (pipe !== -1)
|
|
767
|
-
return { verdict: "new", newPattern: rest.slice(0, pipe).trim(), description: rest.slice(pipe + 1).trim() };
|
|
768
|
-
return { verdict: "new", newPattern: rest.trim(), description: rest.trim() };
|
|
769
|
-
}
|
|
770
|
-
if (text.startsWith("FLAG:"))
|
|
771
|
-
return { verdict: "flag", description: text.slice(5).trim() };
|
|
772
|
-
console.error(`[monitors] unrecognized verdict format, defaulting to CLEAN: "${text.slice(0, 80)}"`);
|
|
773
|
-
return { verdict: "clean" };
|
|
774
|
-
}
|
|
775
|
-
export function parseModelSpec(spec) {
|
|
776
|
-
const slashIndex = spec.indexOf("/");
|
|
777
|
-
if (slashIndex !== -1) {
|
|
778
|
-
return { provider: spec.slice(0, slashIndex), modelId: spec.slice(slashIndex + 1) };
|
|
779
|
-
}
|
|
780
|
-
return { provider: "anthropic", modelId: spec };
|
|
781
|
-
}
|
|
782
|
-
async function classifyPrompt(ctx, monitor, prompt, signal) {
|
|
783
|
-
const { provider, modelId } = parseModelSpec(monitor.classify.model);
|
|
997
|
+
// Use session-cached template environment or create one
|
|
998
|
+
const mergedEnv = cachedMonitorAgentEnv ?? createMonitorAgentTemplateEnv(process.cwd());
|
|
999
|
+
const compiled = compileAgentSpec(agentSpec, templateContext, mergedEnv, process.cwd());
|
|
1000
|
+
// The task template is the compiled classify prompt
|
|
1001
|
+
const prompt = compiled.taskTemplate;
|
|
1002
|
+
if (!prompt)
|
|
1003
|
+
throw new Error(`Agent ${agentName}: compiled task template is empty`);
|
|
1004
|
+
// Resolve model from agent spec
|
|
1005
|
+
const modelSpec = compiled.model;
|
|
1006
|
+
if (!modelSpec)
|
|
1007
|
+
throw new Error(`Agent ${agentName}: no model specified`);
|
|
1008
|
+
const { provider, modelId } = parseModelSpec(modelSpec);
|
|
784
1009
|
const model = ctx.modelRegistry.find(provider, modelId);
|
|
785
1010
|
if (!model)
|
|
786
|
-
throw new Error(`Model ${
|
|
1011
|
+
throw new Error(`Model ${modelSpec} not found`);
|
|
787
1012
|
const auth = await ctx.modelRegistry.getApiKeyAndHeaders(model);
|
|
788
1013
|
if (!auth.ok)
|
|
789
1014
|
throw new Error(auth.error);
|
|
790
|
-
|
|
791
|
-
|
|
1015
|
+
// Determine thinking from agent spec
|
|
1016
|
+
const thinkingEnabled = compiled.thinking === "on" || compiled.thinking === "true";
|
|
1017
|
+
const response = await complete(model, {
|
|
1018
|
+
messages: [{ role: "user", content: [{ type: "text", text: prompt }], timestamp: Date.now() }],
|
|
1019
|
+
tools: [VERDICT_TOOL],
|
|
1020
|
+
}, {
|
|
1021
|
+
apiKey: auth.apiKey,
|
|
1022
|
+
headers: auth.headers,
|
|
1023
|
+
maxTokens: 300,
|
|
1024
|
+
signal,
|
|
1025
|
+
thinkingEnabled,
|
|
1026
|
+
effort: "low",
|
|
1027
|
+
toolChoice: { type: "tool", name: "classify_verdict" },
|
|
1028
|
+
});
|
|
1029
|
+
const toolCall = response.content.find((c) => c.type === "toolCall");
|
|
1030
|
+
if (!toolCall) {
|
|
1031
|
+
return { verdict: "error", error: "Model did not produce a tool call response" };
|
|
1032
|
+
}
|
|
1033
|
+
const parsed = toolCall.arguments;
|
|
1034
|
+
// Validate against verdict schema if the agent spec declares one
|
|
1035
|
+
if (compiled.outputSchema) {
|
|
1036
|
+
const schemaPath = path.isAbsolute(compiled.outputSchema)
|
|
1037
|
+
? compiled.outputSchema
|
|
1038
|
+
: path.resolve(AGENTS_DIR, compiled.outputSchema);
|
|
1039
|
+
validateFromFile(schemaPath, parsed, `verdict for monitor '${monitor.name}'`);
|
|
1040
|
+
}
|
|
1041
|
+
return mapVerdictToClassifyResult(parsed);
|
|
792
1042
|
}
|
|
793
1043
|
// =============================================================================
|
|
794
1044
|
// Pattern learning (JSON)
|
|
@@ -921,49 +1171,8 @@ export async function invokeMonitor(name, context) {
|
|
|
921
1171
|
const patterns = loadPatterns(monitor);
|
|
922
1172
|
if (patterns.length === 0)
|
|
923
1173
|
return { verdict: "clean" };
|
|
924
|
-
const instructions = loadInstructions(monitor);
|
|
925
|
-
// Build context: collectors + caller-supplied overrides
|
|
926
|
-
const collected = {};
|
|
927
1174
|
const branch = invokeCtx.sessionManager.getBranch();
|
|
928
|
-
|
|
929
|
-
const fn = collectors[key];
|
|
930
|
-
if (fn)
|
|
931
|
-
collected[key] = fn(branch);
|
|
932
|
-
else
|
|
933
|
-
collected[key] = "";
|
|
934
|
-
}
|
|
935
|
-
if (context) {
|
|
936
|
-
for (const [key, value] of Object.entries(context)) {
|
|
937
|
-
collected[key] = value;
|
|
938
|
-
}
|
|
939
|
-
}
|
|
940
|
-
const templateContext = {
|
|
941
|
-
patterns: formatPatternsForPrompt(patterns),
|
|
942
|
-
instructions: formatInstructionsForPrompt(instructions),
|
|
943
|
-
iteration: 0,
|
|
944
|
-
...collected,
|
|
945
|
-
};
|
|
946
|
-
// Render prompt (same logic as renderClassifyPrompt but with injected context)
|
|
947
|
-
let prompt = null;
|
|
948
|
-
if (monitor.classify.promptTemplate && monitorTemplateEnv) {
|
|
949
|
-
try {
|
|
950
|
-
prompt = monitorTemplateEnv.render(monitor.classify.promptTemplate, templateContext);
|
|
951
|
-
}
|
|
952
|
-
catch (err) {
|
|
953
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
954
|
-
console.error(`[${monitor.name}] Template render failed (${monitor.classify.promptTemplate}): ${msg}`);
|
|
955
|
-
if (!monitor.classify.prompt)
|
|
956
|
-
throw new Error(`Template render failed and no inline prompt fallback: ${msg}`);
|
|
957
|
-
}
|
|
958
|
-
}
|
|
959
|
-
if (!prompt && monitor.classify.prompt) {
|
|
960
|
-
prompt = monitor.classify.prompt.replace(/\{(\w+)\}/g, (match, key) => {
|
|
961
|
-
return String(templateContext[key] ?? match);
|
|
962
|
-
});
|
|
963
|
-
}
|
|
964
|
-
if (!prompt)
|
|
965
|
-
return { verdict: "clean" };
|
|
966
|
-
const result = await classifyPrompt(invokeCtx, monitor, prompt);
|
|
1175
|
+
const result = await classifyViaAgent(invokeCtx, monitor, branch, context);
|
|
967
1176
|
// Execute write actions (findings files) based on verdict
|
|
968
1177
|
if (result.verdict === "clean") {
|
|
969
1178
|
const cleanAction = monitor.actions.on_clean;
|
|
@@ -1006,9 +1215,6 @@ async function activate(monitor, pi, ctx, branch, steeredThisTurn, updateStatus,
|
|
|
1006
1215
|
updateStatus();
|
|
1007
1216
|
return;
|
|
1008
1217
|
}
|
|
1009
|
-
const prompt = renderClassifyPrompt(monitor, branch);
|
|
1010
|
-
if (!prompt)
|
|
1011
|
-
return;
|
|
1012
1218
|
// Backoff: skip classification if this monitor has failed repeatedly
|
|
1013
1219
|
if (monitor.classifySkipRemaining > 0) {
|
|
1014
1220
|
monitor.classifySkipRemaining--;
|
|
@@ -1016,7 +1222,7 @@ async function activate(monitor, pi, ctx, branch, steeredThisTurn, updateStatus,
|
|
|
1016
1222
|
}
|
|
1017
1223
|
let result;
|
|
1018
1224
|
try {
|
|
1019
|
-
result = await
|
|
1225
|
+
result = await classifyViaAgent(ctx, monitor, branch, undefined, undefined);
|
|
1020
1226
|
}
|
|
1021
1227
|
catch (e) {
|
|
1022
1228
|
const message = e instanceof Error ? e.message : String(e);
|
|
@@ -1053,6 +1259,16 @@ async function activate(monitor, pi, ctx, branch, steeredThisTurn, updateStatus,
|
|
|
1053
1259
|
updateStatus();
|
|
1054
1260
|
return;
|
|
1055
1261
|
}
|
|
1262
|
+
if (result.verdict === "error") {
|
|
1263
|
+
if (ctx.hasUI) {
|
|
1264
|
+
ctx.ui.notify(`[${monitor.name}] classify failed: ${result.error}`, "warning");
|
|
1265
|
+
}
|
|
1266
|
+
else {
|
|
1267
|
+
console.error(`[${monitor.name}] classify failed: ${result.error}`);
|
|
1268
|
+
}
|
|
1269
|
+
updateStatus();
|
|
1270
|
+
return;
|
|
1271
|
+
}
|
|
1056
1272
|
// Determine which action to execute
|
|
1057
1273
|
const action = result.verdict === "new" ? monitor.actions.on_new : monitor.actions.on_flag;
|
|
1058
1274
|
if (!action)
|
|
@@ -1075,7 +1291,7 @@ async function activate(monitor, pi, ctx, branch, steeredThisTurn, updateStatus,
|
|
|
1075
1291
|
severity: result.severity ?? "warning",
|
|
1076
1292
|
monitor_name: monitor.name,
|
|
1077
1293
|
};
|
|
1078
|
-
const renderedSteer =
|
|
1294
|
+
const renderedSteer = nunjucks.renderString(action.steer, steerContext);
|
|
1079
1295
|
const details = {
|
|
1080
1296
|
monitorName: monitor.name,
|
|
1081
1297
|
verdict: result.verdict,
|
|
@@ -1141,8 +1357,6 @@ export default function (pi) {
|
|
|
1141
1357
|
loadedMonitors = monitors;
|
|
1142
1358
|
if (monitors.length === 0)
|
|
1143
1359
|
return;
|
|
1144
|
-
// Initialize Nunjucks template environment for monitor prompt templates
|
|
1145
|
-
monitorTemplateEnv = createMonitorTemplateEnv();
|
|
1146
1360
|
let statusCtx;
|
|
1147
1361
|
function updateStatus() {
|
|
1148
1362
|
if (!statusCtx?.hasUI)
|
|
@@ -1193,6 +1407,9 @@ export default function (pi) {
|
|
|
1193
1407
|
monitorsEnabled = true;
|
|
1194
1408
|
pendingAgentEndSteers = [];
|
|
1195
1409
|
projectDirMissingLogged = false;
|
|
1410
|
+
// Cache agent loader and template environment for classify calls
|
|
1411
|
+
cachedAgentLoader = createAgentLoader(process.cwd(), AGENTS_DIR);
|
|
1412
|
+
cachedMonitorAgentEnv = createMonitorAgentTemplateEnv(process.cwd());
|
|
1196
1413
|
updateStatus();
|
|
1197
1414
|
}
|
|
1198
1415
|
catch {
|
|
@@ -1250,7 +1467,7 @@ export default function (pi) {
|
|
|
1250
1467
|
when: monitor.when,
|
|
1251
1468
|
scope: monitor.scope,
|
|
1252
1469
|
classify: {
|
|
1253
|
-
|
|
1470
|
+
agent: monitor.classify.agent,
|
|
1254
1471
|
context: monitor.classify.context,
|
|
1255
1472
|
excludes: monitor.classify.excludes,
|
|
1256
1473
|
},
|
|
@@ -1567,15 +1784,9 @@ export default function (pi) {
|
|
|
1567
1784
|
continue;
|
|
1568
1785
|
}
|
|
1569
1786
|
// Build pending tool call context for template injection.
|
|
1570
|
-
// Branch-based collectors (user_text, tool_calls, etc.) are still
|
|
1571
|
-
// collected inside renderClassifyPrompt from the branch parameter.
|
|
1572
1787
|
const toolContext = `Pending tool call:\nTool: ${ev.toolName}\nArguments: ${JSON.stringify(ev.input, null, 2).slice(0, 2000)}`;
|
|
1573
|
-
// Render classify prompt with tool context injected as extra template variable
|
|
1574
|
-
const prompt = renderClassifyPrompt(m, branch, { tool_call_context: toolContext });
|
|
1575
|
-
if (!prompt)
|
|
1576
|
-
continue;
|
|
1577
1788
|
try {
|
|
1578
|
-
const result = await
|
|
1789
|
+
const result = await classifyViaAgent(ctx, m, branch, { tool_call_context: toolContext });
|
|
1579
1790
|
// Reset failure counter on success
|
|
1580
1791
|
m.classifyFailures = 0;
|
|
1581
1792
|
if (result.verdict === "flag" || result.verdict === "new") {
|