@davidorex/pi-behavior-monitors 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +61 -0
- package/README.md +59 -0
- package/examples/fragility.instructions.json +1 -0
- package/examples/fragility.monitor.json +62 -0
- package/examples/fragility.patterns.json +86 -0
- package/examples/hedge.instructions.json +1 -0
- package/examples/hedge.monitor.json +34 -0
- package/examples/hedge.patterns.json +10 -0
- package/examples/work-quality.instructions.json +1 -0
- package/examples/work-quality.monitor.json +62 -0
- package/examples/work-quality.patterns.json +13 -0
- package/index.ts +1166 -0
- package/package.json +50 -0
- package/schemas/monitor-pattern.schema.json +38 -0
- package/schemas/monitor.schema.json +156 -0
- package/skills/pi-behavior-monitors/SKILL.md +404 -0
package/package.json
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@davidorex/pi-behavior-monitors",
|
|
3
|
+
"version": "0.1.2",
|
|
4
|
+
"description": "Behavior monitors for pi that watch agent activity and steer corrections",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"keywords": [
|
|
7
|
+
"pi-package"
|
|
8
|
+
],
|
|
9
|
+
"license": "MIT",
|
|
10
|
+
"author": "David Ryan",
|
|
11
|
+
"repository": {
|
|
12
|
+
"type": "git",
|
|
13
|
+
"url": "git+https://github.com/davidorex/pi-behavior-monitors.git"
|
|
14
|
+
},
|
|
15
|
+
"homepage": "https://github.com/davidorex/pi-behavior-monitors",
|
|
16
|
+
"files": [
|
|
17
|
+
"index.ts",
|
|
18
|
+
"examples",
|
|
19
|
+
"schemas",
|
|
20
|
+
"skills",
|
|
21
|
+
"README.md",
|
|
22
|
+
"CHANGELOG.md"
|
|
23
|
+
],
|
|
24
|
+
"pi": {
|
|
25
|
+
"extensions": [
|
|
26
|
+
"./index.ts"
|
|
27
|
+
],
|
|
28
|
+
"skills": [
|
|
29
|
+
"./skills"
|
|
30
|
+
]
|
|
31
|
+
},
|
|
32
|
+
"scripts": {
|
|
33
|
+
"test": "vitest run",
|
|
34
|
+
"test:watch": "vitest",
|
|
35
|
+
"release": "npx changelogen --bump --release",
|
|
36
|
+
"release:patch": "npx changelogen --bump --release --patch",
|
|
37
|
+
"release:minor": "npx changelogen --bump --release --minor",
|
|
38
|
+
"release:major": "npx changelogen --bump --release --major",
|
|
39
|
+
"release:push": "git push --follow-tags"
|
|
40
|
+
},
|
|
41
|
+
"peerDependencies": {
|
|
42
|
+
"@mariozechner/pi-ai": "*",
|
|
43
|
+
"@mariozechner/pi-coding-agent": "*",
|
|
44
|
+
"@mariozechner/pi-tui": "*",
|
|
45
|
+
"@sinclair/typebox": "*"
|
|
46
|
+
},
|
|
47
|
+
"devDependencies": {
|
|
48
|
+
"vitest": "^3.2.4"
|
|
49
|
+
}
|
|
50
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "object",
|
|
3
|
+
"required": ["id", "description"],
|
|
4
|
+
"properties": {
|
|
5
|
+
"id": {
|
|
6
|
+
"type": "string",
|
|
7
|
+
"description": "Stable identifier for dedup"
|
|
8
|
+
},
|
|
9
|
+
"description": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"description": "What this pattern detects"
|
|
12
|
+
},
|
|
13
|
+
"severity": {
|
|
14
|
+
"type": "string",
|
|
15
|
+
"enum": ["error", "warning", "info"],
|
|
16
|
+
"default": "warning"
|
|
17
|
+
},
|
|
18
|
+
"category": {
|
|
19
|
+
"type": "string",
|
|
20
|
+
"description": "Grouping key for the pattern"
|
|
21
|
+
},
|
|
22
|
+
"examples": {
|
|
23
|
+
"type": "array",
|
|
24
|
+
"items": { "type": "string" },
|
|
25
|
+
"description": "Example manifestations of this pattern"
|
|
26
|
+
},
|
|
27
|
+
"learned_at": {
|
|
28
|
+
"type": "string",
|
|
29
|
+
"format": "date-time",
|
|
30
|
+
"description": "When this pattern was first detected"
|
|
31
|
+
},
|
|
32
|
+
"source": {
|
|
33
|
+
"type": "string",
|
|
34
|
+
"enum": ["bundled", "learned", "user"],
|
|
35
|
+
"description": "How this pattern was added"
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "object",
|
|
3
|
+
"required": ["name", "event", "classify", "patterns", "actions"],
|
|
4
|
+
"properties": {
|
|
5
|
+
"name": { "type": "string" },
|
|
6
|
+
"description": { "type": "string" },
|
|
7
|
+
"event": {
|
|
8
|
+
"type": "string",
|
|
9
|
+
"enum": ["message_end", "turn_end", "agent_end", "command"]
|
|
10
|
+
},
|
|
11
|
+
"when": {
|
|
12
|
+
"type": "string",
|
|
13
|
+
"description": "Activation condition: 'always', 'has_tool_results', 'has_file_writes', 'has_bash', 'every(N)', 'tool(name)'",
|
|
14
|
+
"default": "always"
|
|
15
|
+
},
|
|
16
|
+
"scope": {
|
|
17
|
+
"type": "object",
|
|
18
|
+
"properties": {
|
|
19
|
+
"target": {
|
|
20
|
+
"type": "string",
|
|
21
|
+
"enum": ["main", "subagent", "all", "workflow"],
|
|
22
|
+
"default": "main"
|
|
23
|
+
},
|
|
24
|
+
"filter": {
|
|
25
|
+
"type": "object",
|
|
26
|
+
"properties": {
|
|
27
|
+
"agent_type": {
|
|
28
|
+
"type": "array",
|
|
29
|
+
"items": { "type": "string" },
|
|
30
|
+
"description": "Only monitor agents with these names/types"
|
|
31
|
+
},
|
|
32
|
+
"step_name": {
|
|
33
|
+
"type": "string",
|
|
34
|
+
"description": "Glob pattern for workflow step names"
|
|
35
|
+
},
|
|
36
|
+
"workflow": {
|
|
37
|
+
"type": "string",
|
|
38
|
+
"description": "Glob pattern for workflow names"
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
},
|
|
44
|
+
"classify": {
|
|
45
|
+
"type": "object",
|
|
46
|
+
"required": ["model", "context", "prompt"],
|
|
47
|
+
"properties": {
|
|
48
|
+
"model": {
|
|
49
|
+
"type": "string",
|
|
50
|
+
"description": "Model ID or provider/model for classification calls"
|
|
51
|
+
},
|
|
52
|
+
"context": {
|
|
53
|
+
"type": "array",
|
|
54
|
+
"items": {
|
|
55
|
+
"type": "string",
|
|
56
|
+
"enum": ["tool_results", "assistant_text", "user_text", "tool_calls", "custom_messages"]
|
|
57
|
+
}
|
|
58
|
+
},
|
|
59
|
+
"excludes": {
|
|
60
|
+
"type": "array",
|
|
61
|
+
"items": { "type": "string" },
|
|
62
|
+
"description": "Skip activation if these monitors already steered this turn"
|
|
63
|
+
},
|
|
64
|
+
"prompt": {
|
|
65
|
+
"type": "string",
|
|
66
|
+
"description": "Classification prompt template with {tool_results}, {assistant_text}, {patterns}, {instructions} placeholders"
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
},
|
|
70
|
+
"patterns": {
|
|
71
|
+
"type": "object",
|
|
72
|
+
"required": ["path"],
|
|
73
|
+
"properties": {
|
|
74
|
+
"path": {
|
|
75
|
+
"type": "string",
|
|
76
|
+
"description": "Path to patterns JSON file (relative to monitor dir)"
|
|
77
|
+
},
|
|
78
|
+
"learn": {
|
|
79
|
+
"type": "boolean",
|
|
80
|
+
"default": true,
|
|
81
|
+
"description": "Auto-learn new patterns from 'new' verdicts"
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
},
|
|
85
|
+
"instructions": {
|
|
86
|
+
"type": "object",
|
|
87
|
+
"properties": {
|
|
88
|
+
"path": {
|
|
89
|
+
"type": "string",
|
|
90
|
+
"description": "Path to instructions JSON file (relative to monitor dir)"
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
},
|
|
94
|
+
"actions": {
|
|
95
|
+
"type": "object",
|
|
96
|
+
"properties": {
|
|
97
|
+
"on_flag": { "$ref": "#/$defs/action" },
|
|
98
|
+
"on_new": { "$ref": "#/$defs/action" },
|
|
99
|
+
"on_clean": { "$ref": "#/$defs/action" }
|
|
100
|
+
}
|
|
101
|
+
},
|
|
102
|
+
"ceiling": {
|
|
103
|
+
"type": "integer",
|
|
104
|
+
"default": 5,
|
|
105
|
+
"description": "Max consecutive steers before escalation"
|
|
106
|
+
},
|
|
107
|
+
"escalate": {
|
|
108
|
+
"type": "string",
|
|
109
|
+
"enum": ["ask", "dismiss"],
|
|
110
|
+
"default": "ask"
|
|
111
|
+
}
|
|
112
|
+
},
|
|
113
|
+
"$defs": {
|
|
114
|
+
"action": {
|
|
115
|
+
"type": "object",
|
|
116
|
+
"properties": {
|
|
117
|
+
"steer": {
|
|
118
|
+
"type": ["string", "null"],
|
|
119
|
+
"description": "Message to inject into conversation. Null = no steering."
|
|
120
|
+
},
|
|
121
|
+
"write": {
|
|
122
|
+
"type": "object",
|
|
123
|
+
"required": ["path", "merge"],
|
|
124
|
+
"properties": {
|
|
125
|
+
"path": {
|
|
126
|
+
"type": "string",
|
|
127
|
+
"description": "JSON file to write findings to"
|
|
128
|
+
},
|
|
129
|
+
"schema": {
|
|
130
|
+
"type": "string",
|
|
131
|
+
"description": "Schema the target file conforms to"
|
|
132
|
+
},
|
|
133
|
+
"merge": {
|
|
134
|
+
"type": "string",
|
|
135
|
+
"enum": ["append", "upsert"],
|
|
136
|
+
"description": "append = add to array, upsert = update by id"
|
|
137
|
+
},
|
|
138
|
+
"array_field": {
|
|
139
|
+
"type": "string",
|
|
140
|
+
"description": "Which field in the target JSON holds the array (e.g. 'gaps', 'findings')",
|
|
141
|
+
"default": "items"
|
|
142
|
+
},
|
|
143
|
+
"template": {
|
|
144
|
+
"type": "object",
|
|
145
|
+
"description": "Template for mapping classification output to target schema. Use {description}, {severity}, {finding_id}, {monitor_name}, {timestamp} placeholders."
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
},
|
|
149
|
+
"learn_pattern": {
|
|
150
|
+
"type": "boolean",
|
|
151
|
+
"description": "If true, add new pattern to patterns file on 'new' verdict"
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
@@ -0,0 +1,404 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: pi-behavior-monitors
|
|
3
|
+
description: >
|
|
4
|
+
Behavior monitors that watch agent activity and steer corrections when issues are detected.
|
|
5
|
+
Monitors are JSON files (.monitor.json) in .pi/monitors/ with classify, patterns, actions,
|
|
6
|
+
and scope blocks. Patterns and instructions are JSON arrays. Use when creating, editing,
|
|
7
|
+
debugging, or understanding behavior monitors.
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
<objective>
|
|
11
|
+
Monitors are autonomous watchdogs that observe agent activity, classify it against a
|
|
12
|
+
JSON pattern library using a side-channel LLM call, and either steer corrections or
|
|
13
|
+
write structured findings to JSON files for downstream consumption.
|
|
14
|
+
</objective>
|
|
15
|
+
|
|
16
|
+
<monitor_locations>
|
|
17
|
+
Monitors are discovered from two locations, checked in order:
|
|
18
|
+
|
|
19
|
+
1. **Project**: `.pi/monitors/*.monitor.json` (walks up from cwd to find `.pi/`)
|
|
20
|
+
2. **Global**: `~/.pi/agent/monitors/*.monitor.json` (via `getAgentDir()`)
|
|
21
|
+
|
|
22
|
+
Project monitors take precedence — if a project monitor has the same `name` as a global
|
|
23
|
+
one, the global monitor is ignored. The extension silently exits if zero monitors are
|
|
24
|
+
discovered after checking both locations.
|
|
25
|
+
</monitor_locations>
|
|
26
|
+
|
|
27
|
+
<seeding>
|
|
28
|
+
On first run in a project, the extension seeds bundled example monitors into
|
|
29
|
+
`.pi/monitors/` if ALL of the following are true:
|
|
30
|
+
|
|
31
|
+
- `discoverMonitors()` finds zero monitors (neither project nor global)
|
|
32
|
+
- The `examples/` directory exists in the extension package
|
|
33
|
+
- The target `.pi/monitors/` directory contains no `.monitor.json` files
|
|
34
|
+
|
|
35
|
+
Seeding copies all `.json` files from `examples/` (monitor definitions, patterns, and
|
|
36
|
+
instructions files) into `.pi/monitors/`. It skips files that already exist at the
|
|
37
|
+
destination. The user is notified: "Edit or delete them to customize."
|
|
38
|
+
|
|
39
|
+
To customize seeded monitors, edit the copies in `.pi/monitors/` directly. To remove a
|
|
40
|
+
bundled monitor, delete its three files (`.monitor.json`, `.patterns.json`,
|
|
41
|
+
`.instructions.json`). Seeding never re-runs once any monitors exist.
|
|
42
|
+
</seeding>
|
|
43
|
+
|
|
44
|
+
<file_structure>
|
|
45
|
+
Each monitor is a triad of JSON files sharing a name prefix:
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
.pi/monitors/
|
|
49
|
+
├── fragility.monitor.json # Monitor definition (classify + patterns + actions + scope)
|
|
50
|
+
├── fragility.patterns.json # Known patterns (JSON array, grows automatically)
|
|
51
|
+
├── fragility.instructions.json # User corrections (JSON array, optional)
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
The instructions file is optional. If omitted, the extension defaults the path to
|
|
55
|
+
`${name}.instructions.json` and treats a missing file as an empty array.
|
|
56
|
+
</file_structure>
|
|
57
|
+
|
|
58
|
+
<monitor_definition>
|
|
59
|
+
A `.monitor.json` file conforms to `schemas/monitor.schema.json`:
|
|
60
|
+
|
|
61
|
+
```json
|
|
62
|
+
{
|
|
63
|
+
"name": "my-monitor",
|
|
64
|
+
"description": "What this monitor watches for",
|
|
65
|
+
"event": "message_end",
|
|
66
|
+
"when": "has_tool_results",
|
|
67
|
+
"scope": {
|
|
68
|
+
"target": "main",
|
|
69
|
+
"filter": { "agent_type": ["audit-fixer"] }
|
|
70
|
+
},
|
|
71
|
+
"classify": {
|
|
72
|
+
"model": "claude-sonnet-4-20250514",
|
|
73
|
+
"context": ["tool_results", "assistant_text"],
|
|
74
|
+
"excludes": ["other-monitor"],
|
|
75
|
+
"prompt": "Classification prompt with {tool_results} {assistant_text} {patterns} {instructions} placeholders.\n\nReply CLEAN, FLAG:<desc>, or NEW:<pattern>|<desc>."
|
|
76
|
+
},
|
|
77
|
+
"patterns": {
|
|
78
|
+
"path": "my-monitor.patterns.json",
|
|
79
|
+
"learn": true
|
|
80
|
+
},
|
|
81
|
+
"instructions": {
|
|
82
|
+
"path": "my-monitor.instructions.json"
|
|
83
|
+
},
|
|
84
|
+
"actions": {
|
|
85
|
+
"on_flag": {
|
|
86
|
+
"steer": "Fix the issue.",
|
|
87
|
+
"write": {
|
|
88
|
+
"path": ".workflow/gaps.json",
|
|
89
|
+
"merge": "append",
|
|
90
|
+
"array_field": "gaps",
|
|
91
|
+
"template": {
|
|
92
|
+
"id": "monitor-{finding_id}",
|
|
93
|
+
"description": "{description}",
|
|
94
|
+
"status": "open",
|
|
95
|
+
"category": "monitor",
|
|
96
|
+
"source": "monitor"
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
},
|
|
100
|
+
"on_new": {
|
|
101
|
+
"steer": "Fix the issue.",
|
|
102
|
+
"learn_pattern": true,
|
|
103
|
+
"write": { "...": "same as on_flag" }
|
|
104
|
+
},
|
|
105
|
+
"on_clean": null
|
|
106
|
+
},
|
|
107
|
+
"ceiling": 5,
|
|
108
|
+
"escalate": "ask"
|
|
109
|
+
}
|
|
110
|
+
```
|
|
111
|
+
</monitor_definition>
|
|
112
|
+
|
|
113
|
+
<fields>
|
|
114
|
+
|
|
115
|
+
**Top-level fields:**
|
|
116
|
+
|
|
117
|
+
| Field | Default | Description |
|
|
118
|
+
|-------|---------|-------------|
|
|
119
|
+
| `name` | (required) | Monitor identifier. Must be unique across project and global. |
|
|
120
|
+
| `description` | `""` | Human-readable description. Also used as command description for `event: command` monitors. |
|
|
121
|
+
| `event` | `message_end` | When to fire: `message_end`, `turn_end`, `agent_end`, or `command`. |
|
|
122
|
+
| `when` | `always` | Activation condition (see below). |
|
|
123
|
+
| `ceiling` | `5` | Max consecutive steers before escalation. |
|
|
124
|
+
| `escalate` | `ask` | At ceiling: `ask` (confirm with user) or `dismiss` (silence for session). |
|
|
125
|
+
|
|
126
|
+
**Scope block:**
|
|
127
|
+
|
|
128
|
+
| Field | Default | Description |
|
|
129
|
+
|-------|---------|-------------|
|
|
130
|
+
| `scope.target` | `main` | What to observe: `main`, `subagent`, `all`, `workflow`. |
|
|
131
|
+
| `scope.filter.agent_type` | — | Only monitor agents with these names. |
|
|
132
|
+
| `scope.filter.step_name` | — | Glob pattern for workflow step names. |
|
|
133
|
+
| `scope.filter.workflow` | — | Glob pattern for workflow names. |
|
|
134
|
+
|
|
135
|
+
Steering (injecting messages into the conversation) only fires for `main` scope.
|
|
136
|
+
Non-main scopes can still write findings to JSON files.
|
|
137
|
+
|
|
138
|
+
**Classify block:**
|
|
139
|
+
|
|
140
|
+
| Field | Default | Description |
|
|
141
|
+
|-------|---------|-------------|
|
|
142
|
+
| `classify.model` | `claude-sonnet-4-20250514` | Model for classification. Plain model ID uses `anthropic` provider. Use `provider/model` for other providers. |
|
|
143
|
+
| `classify.context` | `["tool_results", "assistant_text"]` | Conversation parts to collect. |
|
|
144
|
+
| `classify.excludes` | `[]` | Monitor names — skip activation if any of these already steered this turn. |
|
|
145
|
+
| `classify.prompt` | (required) | Classification prompt template with `{placeholders}`. |
|
|
146
|
+
|
|
147
|
+
**Actions block** — per verdict (`on_flag`, `on_new`, `on_clean`):
|
|
148
|
+
|
|
149
|
+
| Field | Description |
|
|
150
|
+
|-------|-------------|
|
|
151
|
+
| `steer` | Message to inject into conversation. `null` = no steering. Only effective for `scope.target: "main"`. |
|
|
152
|
+
| `write.path` | JSON file to write findings to. Relative paths resolve from `process.cwd()`, not from the monitor directory. |
|
|
153
|
+
| `write.merge` | `append` (add to array) or `upsert` (update by matching `id` field). |
|
|
154
|
+
| `write.array_field` | Which field in target JSON holds the array (e.g. `"gaps"`, `"findings"`). |
|
|
155
|
+
| `write.template` | Template mapping with `{finding_id}`, `{description}`, `{severity}`, `{monitor_name}`, `{timestamp}`. |
|
|
156
|
+
| `write.schema` | Optional schema path for documentation. Not enforced at runtime. |
|
|
157
|
+
| `learn_pattern` | If true, add new pattern to patterns file on `new` verdict. |
|
|
158
|
+
|
|
159
|
+
`on_clean` can be configured with a `write` action to log clean verdicts. Setting it to
|
|
160
|
+
`null` means no action on clean (the default behavior).
|
|
161
|
+
</fields>
|
|
162
|
+
|
|
163
|
+
<when_conditions>
|
|
164
|
+
- `always` — fire every time the event occurs
|
|
165
|
+
- `has_tool_results` — fire only if tool results are present since last user message
|
|
166
|
+
- `has_file_writes` — fire only if `write` or `edit` tool was called since last user message
|
|
167
|
+
- `has_bash` — fire only if `bash` tool was called since last user message
|
|
168
|
+
- `tool(name)` — fire only if a specific named tool was called since last user message
|
|
169
|
+
- `every(N)` — fire every Nth activation within the same user prompt (counter resets when user text changes)
|
|
170
|
+
</when_conditions>
|
|
171
|
+
|
|
172
|
+
<context_collectors>
|
|
173
|
+
| Collector | Placeholder | What it collects | Limits |
|
|
174
|
+
|-----------|-------------|------------------|--------|
|
|
175
|
+
| `user_text` | `{user_text}` | Most recent user message text (walks back past assistant to find preceding user message) | — |
|
|
176
|
+
| `assistant_text` | `{assistant_text}` | Most recent assistant message text | — |
|
|
177
|
+
| `tool_results` | `{tool_results}` | Tool results with tool name and error status | Last 5, each truncated to 2000 chars |
|
|
178
|
+
| `tool_calls` | `{tool_calls}` | Tool calls and their results interleaved | Last 20, each truncated to 2000 chars |
|
|
179
|
+
| `custom_messages` | `{custom_messages}` | Custom extension messages since last user message | — |
|
|
180
|
+
|
|
181
|
+
Built-in placeholders (always available, not listed in `classify.context`):
|
|
182
|
+
- `{patterns}` — formatted from patterns JSON as numbered list: `1. [severity] description`
|
|
183
|
+
- `{instructions}` — formatted from instructions JSON as bulleted list with preamble "Operating instructions from the user (follow these strictly):" — empty string if no instructions
|
|
184
|
+
- `{iteration}` — current consecutive steer count (0-indexed)
|
|
185
|
+
</context_collectors>
|
|
186
|
+
|
|
187
|
+
<patterns_file>
|
|
188
|
+
JSON array conforming to `schemas/monitor-pattern.schema.json`:
|
|
189
|
+
|
|
190
|
+
```json
|
|
191
|
+
[
|
|
192
|
+
{
|
|
193
|
+
"id": "empty-catch",
|
|
194
|
+
"description": "Silently catching exceptions with empty catch blocks",
|
|
195
|
+
"severity": "error",
|
|
196
|
+
"category": "error-handling",
|
|
197
|
+
"examples": ["try { ... } catch {}"],
|
|
198
|
+
"source": "bundled"
|
|
199
|
+
},
|
|
200
|
+
{
|
|
201
|
+
"id": "learned-pattern-abc",
|
|
202
|
+
"description": "Learned pattern from runtime detection",
|
|
203
|
+
"severity": "warning",
|
|
204
|
+
"source": "learned",
|
|
205
|
+
"learned_at": "2026-03-15T02:30:00.000Z"
|
|
206
|
+
}
|
|
207
|
+
]
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
| Field | Required | Description |
|
|
211
|
+
|-------|----------|-------------|
|
|
212
|
+
| `id` | yes | Stable identifier for dedup. Auto-generated for learned patterns: lowercased, non-alphanumeric replaced with hyphens, truncated to 60 chars. |
|
|
213
|
+
| `description` | yes | What this pattern detects. Used for dedup (exact match) when learning. |
|
|
214
|
+
| `severity` | no | `"error"`, `"warning"`, or `"info"`. Defaults to `"warning"` in prompt formatting. |
|
|
215
|
+
| `category` | no | Grouping key (e.g. `"error-handling"`, `"avoidance"`, `"deferral"`). |
|
|
216
|
+
| `examples` | no | Example manifestations. Stored but not surfaced in classification prompts. |
|
|
217
|
+
| `source` | no | `"bundled"`, `"learned"`, or `"user"`. Learned patterns are tagged `"learned"`. |
|
|
218
|
+
| `learned_at` | no | ISO timestamp for learned patterns. |
|
|
219
|
+
|
|
220
|
+
Patterns grow automatically when `learn_pattern: true` and a `NEW:` verdict is returned.
|
|
221
|
+
Dedup is by exact `description` match — duplicates are silently skipped.
|
|
222
|
+
|
|
223
|
+
**Critical**: If the patterns array is empty (file missing, empty array, or unparseable),
|
|
224
|
+
classification is skipped entirely for that activation. A monitor with no patterns does nothing.
|
|
225
|
+
</patterns_file>
|
|
226
|
+
|
|
227
|
+
<instructions_file>
|
|
228
|
+
JSON array of user rules (called "instructions" on disk, "rules" in the command surface):
|
|
229
|
+
|
|
230
|
+
```json
|
|
231
|
+
[
|
|
232
|
+
{ "text": "grep exit code 1 is not an error", "added_at": "2026-03-15T02:30:00.000Z" },
|
|
233
|
+
{ "text": "catch-and-log in event handlers is correct for non-critical extensions", "added_at": "2026-03-15T03:00:00.000Z" }
|
|
234
|
+
]
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
Manage via `/monitors <name> rules` (list), `/monitors <name> rules add <text>` (add),
|
|
238
|
+
`/monitors <name> rules remove <n>` (remove by number), `/monitors <name> rules replace <n> <text>`
|
|
239
|
+
(replace by number). The LLM can also edit the `.instructions.json` file directly.
|
|
240
|
+
|
|
241
|
+
Rules are injected into the classification prompt under a preamble
|
|
242
|
+
"Operating instructions from the user (follow these strictly):" — only if the array is
|
|
243
|
+
non-empty. An empty array or missing file produces no rules block in the prompt.
|
|
244
|
+
</instructions_file>
|
|
245
|
+
|
|
246
|
+
<verdict_format>
|
|
247
|
+
The classification LLM must respond with one of:
|
|
248
|
+
|
|
249
|
+
- `CLEAN` — no issue detected. Resets consecutive steer counter to 0.
|
|
250
|
+
- `FLAG:<description>` — known pattern matched. Triggers `on_flag` action.
|
|
251
|
+
- `NEW:<pattern>|<description>` — novel issue. The text before `|` becomes the learned pattern description; the text after `|` becomes the finding description. If no `|` is present, the full text after `NEW:` is used for both. Triggers `on_new` action.
|
|
252
|
+
|
|
253
|
+
Any response that does not start with `CLEAN`, `FLAG:`, or `NEW:` is treated as `CLEAN`.
|
|
254
|
+
|
|
255
|
+
Classification calls use `maxTokens: 150`.
|
|
256
|
+
</verdict_format>
|
|
257
|
+
|
|
258
|
+
<runtime_behavior>
|
|
259
|
+
|
|
260
|
+
**Dedup**: A monitor will not re-classify the same user text. Once a user message has been
|
|
261
|
+
classified, the monitor skips until the user text changes. This prevents redundant
|
|
262
|
+
side-channel LLM calls within the same user turn.
|
|
263
|
+
|
|
264
|
+
**Ceiling and escalation**: After `ceiling` consecutive steers (flag/new verdicts without
|
|
265
|
+
an intervening clean), the monitor escalates. With `escalate: "ask"`, the user is prompted
|
|
266
|
+
to continue or dismiss. With `escalate: "dismiss"`, the monitor is silently dismissed for
|
|
267
|
+
the session. A `CLEAN` verdict resets the consecutive steer counter.
|
|
268
|
+
|
|
269
|
+
**Turn exclusion**: The `excludes` array prevents double-steering. If monitor A steers in
|
|
270
|
+
a turn, and monitor B has `"excludes": ["A"]`, monitor B skips that turn. Exclusion tracking
|
|
271
|
+
resets at `turn_start`.
|
|
272
|
+
|
|
273
|
+
**Abort**: Classification calls are aborted when the agent ends (via `agent_end` event).
|
|
274
|
+
Aborted classifications produce no verdict and no action.
|
|
275
|
+
|
|
276
|
+
**Write action**: Relative `write.path` values resolve from `process.cwd()`, not from the
|
|
277
|
+
monitor directory. Parent directories are created automatically. If the target file doesn't
|
|
278
|
+
exist or is unparseable, a fresh object is created. The `upsert` merge strategy matches on
|
|
279
|
+
the `id` field of array entries.
|
|
280
|
+
</runtime_behavior>
|
|
281
|
+
|
|
282
|
+
<commands>
|
|
283
|
+
All monitor management is through the `/monitors` command:
|
|
284
|
+
|
|
285
|
+
| Command | Description |
|
|
286
|
+
|---------|-------------|
|
|
287
|
+
| `/monitors` | List all monitors with global on/off state and per-monitor status |
|
|
288
|
+
| `/monitors on` | Enable all monitoring (session default) |
|
|
289
|
+
| `/monitors off` | Pause all monitoring for this session |
|
|
290
|
+
| `/monitors <name>` | Inspect a monitor: description, event, state, rule count, pattern count |
|
|
291
|
+
| `/monitors <name> rules` | List current rules (numbered) |
|
|
292
|
+
| `/monitors <name> rules add <text>` | Add a rule to calibrate the classifier |
|
|
293
|
+
| `/monitors <name> rules remove <n>` | Remove a rule by number |
|
|
294
|
+
| `/monitors <name> rules replace <n> <text>` | Replace a rule by number |
|
|
295
|
+
| `/monitors <name> patterns` | List current patterns (numbered, with severity and source) |
|
|
296
|
+
| `/monitors <name> dismiss` | Dismiss a monitor for this session |
|
|
297
|
+
| `/monitors <name> reset` | Reset a monitor's state and un-dismiss it |
|
|
298
|
+
|
|
299
|
+
Monitors with `event: "command"` also register `/<name>` as a programmatic trigger
|
|
300
|
+
for other extensions or workflows to invoke classification directly.
|
|
301
|
+
</commands>
|
|
302
|
+
|
|
303
|
+
<bundled_monitors>
|
|
304
|
+
Three example monitors ship in `examples/` and are seeded on first run:
|
|
305
|
+
|
|
306
|
+
**fragility** (`message_end`, `when: has_tool_results`)
|
|
307
|
+
Watches for unaddressed fragilities after tool use — errors, warnings, or broken state the
|
|
308
|
+
agent noticed but chose not to fix. Steers with "Fix the issue you left behind." Writes
|
|
309
|
+
findings to `.workflow/gaps.json` under `category: "fragility"`. Excludes: none. Ceiling: 5.
|
|
310
|
+
12 bundled patterns across categories: avoidance (dismiss-preexisting, not-my-change,
|
|
311
|
+
blame-environment, workaround-over-root-cause, elaborate-workaround-for-fixable),
|
|
312
|
+
error-handling (empty-catch, happy-path-only, early-return-on-unexpected,
|
|
313
|
+
undocumented-delegation, silent-fallback), deferral (todo-instead-of-fix,
|
|
314
|
+
prose-without-action).
|
|
315
|
+
|
|
316
|
+
**hedge** (`turn_end`, `when: always`)
|
|
317
|
+
Detects when the assistant deviates from what the user actually said — substituting
|
|
318
|
+
questions, projecting intent, or deflecting instead of answering. Steers with "Address
|
|
319
|
+
what the user actually said." Does not write to files (steer-only). Excludes: `["fragility"]`
|
|
320
|
+
(skips if fragility already steered this turn). Ceiling: 3.
|
|
321
|
+
8 bundled patterns across categories: substitution (rephrase-question, reinterpret-words),
|
|
322
|
+
projection (assume-intent, attribute-position), augmentation (add-questions),
|
|
323
|
+
deflection (ask-permission, qualify-yesno, counter-question).
|
|
324
|
+
|
|
325
|
+
**work-quality** (`command`, `when: always`)
|
|
326
|
+
On-demand work quality analysis invoked via `/work-quality`. Analyzes user request, tool
|
|
327
|
+
calls, and assistant response for quality issues. Writes findings to `.workflow/gaps.json`
|
|
328
|
+
under `category: "work-quality"`. Ceiling: 3.
|
|
329
|
+
11 bundled patterns across categories: methodology (trial-and-error, symptom-fix,
|
|
330
|
+
double-edit, edit-without-read, insanity-retry, no-plan), verification (no-verify),
|
|
331
|
+
scope (excessive-changes, wrong-problem), quality (copy-paste), cleanup (debug-artifacts).
|
|
332
|
+
</bundled_monitors>
|
|
333
|
+
|
|
334
|
+
<disabling_monitors>
|
|
335
|
+
**Session-level** (temporary):
|
|
336
|
+
- `/monitors off` — pauses all monitoring for the current session
|
|
337
|
+
- `/monitors <name> dismiss` — silences a single monitor for the session
|
|
338
|
+
- `/monitors <name> reset` — un-dismisses and resets a monitor's state
|
|
339
|
+
|
|
340
|
+
**Permanent**:
|
|
341
|
+
- Delete its `.monitor.json` file (and optionally its `.patterns.json` and `.instructions.json`)
|
|
342
|
+
- Or empty its patterns array — a monitor with zero patterns skips classification entirely
|
|
343
|
+
- To disable all monitoring: remove all `.monitor.json` files from `.pi/monitors/` and
|
|
344
|
+
`~/.pi/agent/monitors/`. The extension exits silently when zero monitors are discovered.
|
|
345
|
+
|
|
346
|
+
Monitors also auto-silence at their ceiling. With `escalate: "ask"`, the user is prompted
|
|
347
|
+
to continue or dismiss. With `escalate: "dismiss"`, the monitor silences automatically.
|
|
348
|
+
</disabling_monitors>
|
|
349
|
+
|
|
350
|
+
<example_creating>
|
|
351
|
+
1. Create `.pi/monitors/naming.monitor.json`:
|
|
352
|
+
|
|
353
|
+
```json
|
|
354
|
+
{
|
|
355
|
+
"name": "naming",
|
|
356
|
+
"description": "Detects poor naming choices in code changes",
|
|
357
|
+
"event": "turn_end",
|
|
358
|
+
"when": "has_file_writes",
|
|
359
|
+
"scope": { "target": "main" },
|
|
360
|
+
"classify": {
|
|
361
|
+
"model": "claude-sonnet-4-20250514",
|
|
362
|
+
"context": ["tool_calls"],
|
|
363
|
+
"excludes": [],
|
|
364
|
+
"prompt": "An agent made code changes. Check if any new identifiers have poor names.\n\nActions taken:\n{tool_calls}\n\n{instructions}\n\nNaming patterns to check:\n{patterns}\n\nReply CLEAN if all names are clear.\nReply FLAG:<description> if a known naming pattern matched.\nReply NEW:<pattern>|<description> if a naming issue not covered by existing patterns."
|
|
365
|
+
},
|
|
366
|
+
"patterns": { "path": "naming.patterns.json", "learn": true },
|
|
367
|
+
"instructions": { "path": "naming.instructions.json" },
|
|
368
|
+
"actions": {
|
|
369
|
+
"on_flag": { "steer": "Rename the poorly named identifier." },
|
|
370
|
+
"on_new": { "steer": "Rename the poorly named identifier.", "learn_pattern": true },
|
|
371
|
+
"on_clean": null
|
|
372
|
+
},
|
|
373
|
+
"ceiling": 3,
|
|
374
|
+
"escalate": "ask"
|
|
375
|
+
}
|
|
376
|
+
```
|
|
377
|
+
|
|
378
|
+
2. Create `.pi/monitors/naming.patterns.json`:
|
|
379
|
+
|
|
380
|
+
```json
|
|
381
|
+
[
|
|
382
|
+
{ "id": "single-letter", "description": "Single-letter variable names outside of loop counters", "severity": "warning", "source": "bundled" },
|
|
383
|
+
{ "id": "generic-names", "description": "Generic names like data, info, result, value, temp without context", "severity": "warning", "source": "bundled" },
|
|
384
|
+
{ "id": "bool-not-question", "description": "Boolean variables not phrased as questions (is, has, can, should)", "severity": "info", "source": "bundled" }
|
|
385
|
+
]
|
|
386
|
+
```
|
|
387
|
+
|
|
388
|
+
3. Create `.pi/monitors/naming.instructions.json`:
|
|
389
|
+
|
|
390
|
+
```json
|
|
391
|
+
[]
|
|
392
|
+
```
|
|
393
|
+
</example_creating>
|
|
394
|
+
|
|
395
|
+
<success_criteria>
|
|
396
|
+
- Monitor `.monitor.json` validates against `schemas/monitor.schema.json`
|
|
397
|
+
- Patterns `.patterns.json` validates against `schemas/monitor-pattern.schema.json`
|
|
398
|
+
- Patterns array is non-empty (empty patterns = monitor does nothing)
|
|
399
|
+
- Classification prompt includes `{patterns}` placeholder and verdict format instructions (CLEAN/FLAG/NEW)
|
|
400
|
+
- Actions specify `steer` for `scope.target: "main"` monitors, `write` for findings output
|
|
401
|
+
- `write.path` is set relative to project cwd, not monitor directory
|
|
402
|
+
- `excludes` lists monitors that should not double-steer in the same turn
|
|
403
|
+
- Instructions file exists (even if empty `[]`) to enable `/monitors <name> rules add <text>` calibration
|
|
404
|
+
</success_criteria>
|