@davidorex/pi-behavior-monitors 0.1.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +30 -50
- package/README.md +9 -1
- package/dist/index.d.ts +149 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +1456 -0
- package/dist/index.js.map +1 -0
- package/examples/commit-hygiene/classify.md +29 -0
- package/examples/commit-hygiene.instructions.json +1 -0
- package/examples/commit-hygiene.monitor.json +33 -0
- package/examples/commit-hygiene.patterns.json +44 -0
- package/examples/fragility/classify.md +33 -0
- package/examples/fragility.monitor.json +61 -60
- package/examples/fragility.patterns.json +84 -84
- package/examples/hedge/classify.md +38 -0
- package/examples/hedge.monitor.json +33 -32
- package/examples/hedge.patterns.json +56 -8
- package/examples/work-quality/classify.md +30 -0
- package/examples/work-quality.monitor.json +61 -60
- package/examples/work-quality.patterns.json +77 -11
- package/package.json +53 -48
- package/schemas/monitor-pattern.schema.json +36 -36
- package/schemas/monitor.schema.json +158 -154
- package/skills/pi-behavior-monitors/SKILL.md +369 -48
- package/index.ts +0 -1234
|
@@ -1,34 +1,35 @@
|
|
|
1
1
|
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
2
|
+
"name": "hedge",
|
|
3
|
+
"description": "Detects when assistant deviates from what the user said",
|
|
4
|
+
"event": "turn_end",
|
|
5
|
+
"when": "always",
|
|
6
|
+
"scope": {
|
|
7
|
+
"target": "main"
|
|
8
|
+
},
|
|
9
|
+
"classify": {
|
|
10
|
+
"model": "claude-sonnet-4-20250514",
|
|
11
|
+
"context": ["user_text", "tool_calls", "custom_messages", "assistant_text"],
|
|
12
|
+
"excludes": ["fragility"],
|
|
13
|
+
"promptTemplate": "hedge/classify.md",
|
|
14
|
+
"prompt": "The user said:\n\"{user_text}\"\n\n{tool_calls}\n{custom_messages}\n\nThe assistant's latest response:\n\"{assistant_text}\"\n\n{instructions}\n\nGiven the full context of what the user asked and what the assistant did,\ndid the assistant deviate from what the user actually said in its latest\nresponse?\n\nIf the user's request has been addressed by the actions taken, the\nassistant summarizing that completed work is not a deviation.\n\nCheck against these patterns:\n{patterns}\n\nReply CLEAN if the assistant stuck to what the user actually said.\nReply FLAG:<one sentence, what was added or substituted> if a known\npattern was matched.\nReply NEW:<new pattern to add>|<one sentence, what was added or\nsubstituted> if the assistant deviated in a way not covered by\nexisting patterns."
|
|
15
|
+
},
|
|
16
|
+
"patterns": {
|
|
17
|
+
"path": "hedge.patterns.json",
|
|
18
|
+
"learn": true
|
|
19
|
+
},
|
|
20
|
+
"instructions": {
|
|
21
|
+
"path": "hedge.instructions.json"
|
|
22
|
+
},
|
|
23
|
+
"actions": {
|
|
24
|
+
"on_flag": {
|
|
25
|
+
"steer": "Address what the user actually said."
|
|
26
|
+
},
|
|
27
|
+
"on_new": {
|
|
28
|
+
"steer": "Address what the user actually said.",
|
|
29
|
+
"learn_pattern": true
|
|
30
|
+
},
|
|
31
|
+
"on_clean": null
|
|
32
|
+
},
|
|
33
|
+
"ceiling": 3,
|
|
34
|
+
"escalate": "ask"
|
|
34
35
|
}
|
|
@@ -1,10 +1,58 @@
|
|
|
1
1
|
[
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
2
|
+
{
|
|
3
|
+
"id": "rephrase-question",
|
|
4
|
+
"description": "Rephrasing the user's question into a different question and answering that instead",
|
|
5
|
+
"severity": "warning",
|
|
6
|
+
"category": "substitution",
|
|
7
|
+
"source": "bundled"
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
"id": "assume-intent",
|
|
11
|
+
"description": "Assuming intent the user did not express",
|
|
12
|
+
"severity": "warning",
|
|
13
|
+
"category": "projection",
|
|
14
|
+
"source": "bundled"
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"id": "add-questions",
|
|
18
|
+
"description": "Adding questions the user did not ask",
|
|
19
|
+
"severity": "warning",
|
|
20
|
+
"category": "augmentation",
|
|
21
|
+
"source": "bundled"
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"id": "reinterpret-words",
|
|
25
|
+
"description": "Interpreting the user's words as meaning something other than what they said",
|
|
26
|
+
"severity": "warning",
|
|
27
|
+
"category": "substitution",
|
|
28
|
+
"source": "bundled"
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"id": "attribute-position",
|
|
32
|
+
"description": "Attributing a position or preference the user did not state",
|
|
33
|
+
"severity": "warning",
|
|
34
|
+
"category": "projection",
|
|
35
|
+
"source": "bundled"
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
"id": "ask-permission",
|
|
39
|
+
"description": "Asking permission to do something instead of doing it when the user asked a direct question",
|
|
40
|
+
"severity": "warning",
|
|
41
|
+
"category": "deflection",
|
|
42
|
+
"source": "bundled"
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
"id": "qualify-yesno",
|
|
46
|
+
"description": "Answering a yes/no question with qualifiers instead of yes or no",
|
|
47
|
+
"severity": "info",
|
|
48
|
+
"category": "deflection",
|
|
49
|
+
"source": "bundled"
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
"id": "counter-question",
|
|
53
|
+
"description": "Deflecting with a counter-question when the user expected an answer",
|
|
54
|
+
"severity": "warning",
|
|
55
|
+
"category": "deflection",
|
|
56
|
+
"source": "bundled"
|
|
57
|
+
}
|
|
10
58
|
]
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
An agent was asked:
|
|
2
|
+
"{{ user_text }}"
|
|
3
|
+
|
|
4
|
+
It performed these actions:
|
|
5
|
+
{{ tool_calls }}
|
|
6
|
+
|
|
7
|
+
Then it said:
|
|
8
|
+
"{{ assistant_text }}"
|
|
9
|
+
|
|
10
|
+
{{ instructions }}
|
|
11
|
+
|
|
12
|
+
Analyze the quality of the work. Check against these patterns:
|
|
13
|
+
{{ patterns }}
|
|
14
|
+
|
|
15
|
+
{% if iteration > 0 %}
|
|
16
|
+
NOTE: You have steered {{ iteration }} time(s) already this session.
|
|
17
|
+
The agent's latest response is below. If the agent explicitly acknowledged
|
|
18
|
+
the quality issue and stated a concrete plan to address it, reply CLEAN to
|
|
19
|
+
allow the agent to follow through. Re-flag only if the agent ignored or
|
|
20
|
+
deflected the steer.
|
|
21
|
+
|
|
22
|
+
Agent response:
|
|
23
|
+
{{ assistant_text }}
|
|
24
|
+
{% endif %}
|
|
25
|
+
|
|
26
|
+
Reply CLEAN if the work was sound.
|
|
27
|
+
Reply FLAG:<one sentence describing the quality issue> if a known
|
|
28
|
+
pattern was matched.
|
|
29
|
+
Reply NEW:<new pattern to add>|<one sentence describing the quality
|
|
30
|
+
issue> if there's a work quality problem not covered by existing patterns.
|
|
@@ -1,62 +1,63 @@
|
|
|
1
1
|
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
2
|
+
"name": "work-quality",
|
|
3
|
+
"description": "On-demand work quality analysis",
|
|
4
|
+
"event": "command",
|
|
5
|
+
"when": "always",
|
|
6
|
+
"scope": {
|
|
7
|
+
"target": "main"
|
|
8
|
+
},
|
|
9
|
+
"classify": {
|
|
10
|
+
"model": "claude-sonnet-4-20250514",
|
|
11
|
+
"context": ["user_text", "tool_calls", "assistant_text"],
|
|
12
|
+
"excludes": [],
|
|
13
|
+
"promptTemplate": "work-quality/classify.md",
|
|
14
|
+
"prompt": "An agent was asked:\n\"{user_text}\"\n\nIt performed these actions:\n{tool_calls}\n\nThen it said:\n\"{assistant_text}\"\n\n{instructions}\n\nAnalyze the quality of the work. Check against these patterns:\n{patterns}\n\nReply CLEAN if the work was sound.\nReply FLAG:<one sentence describing the quality issue> if a known\npattern was matched.\nReply NEW:<new pattern to add>|<one sentence describing the quality\nissue> if there's a work quality problem not covered by existing patterns."
|
|
15
|
+
},
|
|
16
|
+
"patterns": {
|
|
17
|
+
"path": "work-quality.patterns.json",
|
|
18
|
+
"learn": true
|
|
19
|
+
},
|
|
20
|
+
"instructions": {
|
|
21
|
+
"path": "work-quality.instructions.json"
|
|
22
|
+
},
|
|
23
|
+
"actions": {
|
|
24
|
+
"on_flag": {
|
|
25
|
+
"steer": "Fix the quality issue.",
|
|
26
|
+
"write": {
|
|
27
|
+
"path": ".workflow/gaps.json",
|
|
28
|
+
"schema": "schemas/gaps.schema.json",
|
|
29
|
+
"merge": "append",
|
|
30
|
+
"array_field": "gaps",
|
|
31
|
+
"template": {
|
|
32
|
+
"id": "quality-{finding_id}",
|
|
33
|
+
"description": "{description}",
|
|
34
|
+
"status": "open",
|
|
35
|
+
"category": "work-quality",
|
|
36
|
+
"priority": "{severity}",
|
|
37
|
+
"source": "monitor"
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
},
|
|
41
|
+
"on_new": {
|
|
42
|
+
"steer": "Fix the quality issue.",
|
|
43
|
+
"learn_pattern": true,
|
|
44
|
+
"write": {
|
|
45
|
+
"path": ".workflow/gaps.json",
|
|
46
|
+
"schema": "schemas/gaps.schema.json",
|
|
47
|
+
"merge": "append",
|
|
48
|
+
"array_field": "gaps",
|
|
49
|
+
"template": {
|
|
50
|
+
"id": "quality-{finding_id}",
|
|
51
|
+
"description": "{description}",
|
|
52
|
+
"status": "open",
|
|
53
|
+
"category": "work-quality",
|
|
54
|
+
"priority": "warning",
|
|
55
|
+
"source": "monitor"
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
},
|
|
59
|
+
"on_clean": null
|
|
60
|
+
},
|
|
61
|
+
"ceiling": 3,
|
|
62
|
+
"escalate": "ask"
|
|
62
63
|
}
|
|
@@ -1,13 +1,79 @@
|
|
|
1
1
|
[
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
2
|
+
{
|
|
3
|
+
"id": "trial-and-error",
|
|
4
|
+
"description": "Trial-and-error instead of reading code to understand it first",
|
|
5
|
+
"severity": "warning",
|
|
6
|
+
"category": "methodology",
|
|
7
|
+
"source": "bundled"
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
"id": "no-verify",
|
|
11
|
+
"description": "Making changes without verifying them (no check/test run after edits)",
|
|
12
|
+
"severity": "error",
|
|
13
|
+
"category": "verification",
|
|
14
|
+
"source": "bundled"
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"id": "symptom-fix",
|
|
18
|
+
"description": "Fixing symptoms instead of root causes",
|
|
19
|
+
"severity": "warning",
|
|
20
|
+
"category": "methodology",
|
|
21
|
+
"source": "bundled"
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"id": "excessive-changes",
|
|
25
|
+
"description": "Changing more files than necessary to solve the problem",
|
|
26
|
+
"severity": "warning",
|
|
27
|
+
"category": "scope",
|
|
28
|
+
"source": "bundled"
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"id": "copy-paste",
|
|
32
|
+
"description": "Copy-pasting code instead of extracting shared logic",
|
|
33
|
+
"severity": "warning",
|
|
34
|
+
"category": "quality",
|
|
35
|
+
"source": "bundled"
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
"id": "debug-artifacts",
|
|
39
|
+
"description": "Leaving debug artifacts (console.log, commented-out code, temporary files)",
|
|
40
|
+
"severity": "warning",
|
|
41
|
+
"category": "cleanup",
|
|
42
|
+
"source": "bundled"
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
"id": "double-edit",
|
|
46
|
+
"description": "Making an edit then immediately making another edit to the same file to fix the first edit",
|
|
47
|
+
"severity": "info",
|
|
48
|
+
"category": "methodology",
|
|
49
|
+
"source": "bundled"
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
"id": "edit-without-read",
|
|
53
|
+
"description": "Not reading a file before editing it",
|
|
54
|
+
"severity": "error",
|
|
55
|
+
"category": "methodology",
|
|
56
|
+
"source": "bundled"
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"id": "insanity-retry",
|
|
60
|
+
"description": "Running a command, getting an error, and running the same command again expecting different results",
|
|
61
|
+
"severity": "warning",
|
|
62
|
+
"category": "methodology",
|
|
63
|
+
"source": "bundled"
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
"id": "wrong-problem",
|
|
67
|
+
"description": "Solving a different problem than the one that was asked about",
|
|
68
|
+
"severity": "error",
|
|
69
|
+
"category": "scope",
|
|
70
|
+
"source": "bundled"
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
"id": "no-plan",
|
|
74
|
+
"description": "Did not create a plan before starting work",
|
|
75
|
+
"severity": "info",
|
|
76
|
+
"category": "methodology",
|
|
77
|
+
"source": "bundled"
|
|
78
|
+
}
|
|
13
79
|
]
|
package/package.json
CHANGED
|
@@ -1,50 +1,55 @@
|
|
|
1
1
|
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
2
|
+
"name": "@davidorex/pi-behavior-monitors",
|
|
3
|
+
"version": "0.3.0",
|
|
4
|
+
"description": "Behavior monitors for pi that watch agent activity and steer corrections",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"keywords": [
|
|
7
|
+
"pi-package"
|
|
8
|
+
],
|
|
9
|
+
"license": "MIT",
|
|
10
|
+
"author": "David Ryan",
|
|
11
|
+
"repository": {
|
|
12
|
+
"type": "git",
|
|
13
|
+
"url": "git+https://github.com/davidorex/pi-project-workflows.git",
|
|
14
|
+
"directory": "packages/pi-behavior-monitors"
|
|
15
|
+
},
|
|
16
|
+
"homepage": "https://github.com/davidorex/pi-project-workflows/tree/main/packages/pi-behavior-monitors",
|
|
17
|
+
"main": "./dist/index.js",
|
|
18
|
+
"types": "./dist/index.d.ts",
|
|
19
|
+
"files": [
|
|
20
|
+
"dist/",
|
|
21
|
+
"examples",
|
|
22
|
+
"schemas",
|
|
23
|
+
"skills",
|
|
24
|
+
"README.md",
|
|
25
|
+
"CHANGELOG.md"
|
|
26
|
+
],
|
|
27
|
+
"pi": {
|
|
28
|
+
"extensions": [
|
|
29
|
+
"./dist/index.js"
|
|
30
|
+
],
|
|
31
|
+
"skills": [
|
|
32
|
+
"./skills"
|
|
33
|
+
]
|
|
34
|
+
},
|
|
35
|
+
"scripts": {
|
|
36
|
+
"clean": "rm -rf dist",
|
|
37
|
+
"build": "tsc -p tsconfig.build.json",
|
|
38
|
+
"prepublishOnly": "npm run clean && npm run build",
|
|
39
|
+
"test": "vitest run",
|
|
40
|
+
"test:watch": "vitest"
|
|
41
|
+
},
|
|
42
|
+
"dependencies": {
|
|
43
|
+
"nunjucks": "^3.2.4"
|
|
44
|
+
},
|
|
45
|
+
"peerDependencies": {
|
|
46
|
+
"@mariozechner/pi-ai": "*",
|
|
47
|
+
"@mariozechner/pi-coding-agent": "*",
|
|
48
|
+
"@mariozechner/pi-tui": "*",
|
|
49
|
+
"@sinclair/typebox": "*"
|
|
50
|
+
},
|
|
51
|
+
"devDependencies": {
|
|
52
|
+
"@types/nunjucks": "^3.2.6",
|
|
53
|
+
"vitest": "^3.2.4"
|
|
54
|
+
}
|
|
50
55
|
}
|
|
@@ -1,38 +1,38 @@
|
|
|
1
1
|
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
2
|
+
"type": "object",
|
|
3
|
+
"required": ["id", "description"],
|
|
4
|
+
"properties": {
|
|
5
|
+
"id": {
|
|
6
|
+
"type": "string",
|
|
7
|
+
"description": "Stable identifier for dedup"
|
|
8
|
+
},
|
|
9
|
+
"description": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"description": "What this pattern detects"
|
|
12
|
+
},
|
|
13
|
+
"severity": {
|
|
14
|
+
"type": "string",
|
|
15
|
+
"enum": ["error", "warning", "info"],
|
|
16
|
+
"default": "warning"
|
|
17
|
+
},
|
|
18
|
+
"category": {
|
|
19
|
+
"type": "string",
|
|
20
|
+
"description": "Grouping key for the pattern"
|
|
21
|
+
},
|
|
22
|
+
"examples": {
|
|
23
|
+
"type": "array",
|
|
24
|
+
"items": { "type": "string" },
|
|
25
|
+
"description": "Example manifestations of this pattern"
|
|
26
|
+
},
|
|
27
|
+
"learned_at": {
|
|
28
|
+
"type": "string",
|
|
29
|
+
"format": "date-time",
|
|
30
|
+
"description": "When this pattern was first detected"
|
|
31
|
+
},
|
|
32
|
+
"source": {
|
|
33
|
+
"type": "string",
|
|
34
|
+
"enum": ["bundled", "learned", "user"],
|
|
35
|
+
"description": "How this pattern was added"
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
38
|
}
|