claude-recall 0.21.2 → 0.22.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.json +12 -1
- package/.claude/skills/auto-preferences/SKILL.md +4 -4
- package/.claude/skills/auto-preferences/manifest.json +7 -7
- package/.claude/skills/memory-management/SKILL.md +7 -5
- package/README.md +92 -37
- package/dist/cli/claude-recall-cli.js +16 -1
- package/dist/cli/commands/hook-commands.js +10 -0
- package/dist/hooks/memory-stop-hook.js +2 -2
- package/dist/hooks/rule-injection-resolver.js +43 -0
- package/dist/hooks/rule-injector.js +155 -0
- package/dist/memory/storage.js +23 -0
- package/dist/pi/extension.js +86 -7
- package/dist/services/outcome-storage.js +61 -1
- package/dist/services/rule-retrieval.js +221 -0
- package/package.json +1 -1
package/.claude/settings.json
CHANGED
|
@@ -108,7 +108,18 @@
|
|
|
108
108
|
}
|
|
109
109
|
]
|
|
110
110
|
}
|
|
111
|
+
],
|
|
112
|
+
"SessionEnd": [
|
|
113
|
+
{
|
|
114
|
+
"hooks": [
|
|
115
|
+
{
|
|
116
|
+
"type": "command",
|
|
117
|
+
"command": "node /home/ebiarao/.nvm/versions/node/v20.19.3/lib/node_modules/claude-recall/dist/cli/claude-recall-cli.js hook run session-end-checkpoint",
|
|
118
|
+
"timeout": 5
|
|
119
|
+
}
|
|
120
|
+
]
|
|
121
|
+
}
|
|
111
122
|
]
|
|
112
123
|
},
|
|
113
|
-
"hooksVersion": "
|
|
124
|
+
"hooksVersion": "13.0.0"
|
|
114
125
|
}
|
|
@@ -12,10 +12,10 @@ Auto-generated from 5 memories. Last updated: 2026-04-11.
|
|
|
12
12
|
|
|
13
13
|
## Rules
|
|
14
14
|
|
|
15
|
-
- Session test preference
|
|
16
|
-
- Test preference
|
|
17
|
-
- Test preference
|
|
18
|
-
- Test preference
|
|
15
|
+
- Session test preference 1775902182248
|
|
16
|
+
- Test preference 1775902182184-2
|
|
17
|
+
- Test preference 1775902182184-1
|
|
18
|
+
- Test preference 1775902182184-0
|
|
19
19
|
- Test memory content
|
|
20
20
|
|
|
21
21
|
---
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"topicId": "preferences",
|
|
3
|
-
"sourceHash": "
|
|
3
|
+
"sourceHash": "a383c0d6502023d06954eb49fcab8886dc5181d5e59666f6c74a381221e44f87",
|
|
4
4
|
"memoryCount": 5,
|
|
5
|
-
"generatedAt": "2026-04-
|
|
5
|
+
"generatedAt": "2026-04-11T10:09:42.271Z",
|
|
6
6
|
"memoryKeys": [
|
|
7
|
-
"
|
|
8
|
-
"
|
|
9
|
-
"
|
|
10
|
-
"
|
|
11
|
-
"
|
|
7
|
+
"memory_1775902182249_x5rzzep7s",
|
|
8
|
+
"memory_1775902182226_9uo2kaw57",
|
|
9
|
+
"memory_1775902182211_pl5fzrb85",
|
|
10
|
+
"memory_1775902182185_q6f9widp3",
|
|
11
|
+
"memory_1775902182147_olowsptz3"
|
|
12
12
|
]
|
|
13
13
|
}
|
|
@@ -137,14 +137,16 @@ a SKILL.md file that Claude Code loads automatically.
|
|
|
137
137
|
|
|
138
138
|
## Automatic Capture Hooks
|
|
139
139
|
|
|
140
|
-
Claude Recall registers hooks on
|
|
140
|
+
Claude Recall registers hooks on six Claude Code events for automatic capture, just-in-time rule injection, and outcome tracking — no MCP tool call needed:
|
|
141
141
|
|
|
142
|
-
| Hook | Event | What it
|
|
142
|
+
| Hook | Event | What it does |
|
|
143
143
|
|------|-------|-----------------|
|
|
144
|
-
| `correction-detector` | UserPromptSubmit |
|
|
145
|
-
| `memory-stop` | Stop |
|
|
144
|
+
| `correction-detector` | UserPromptSubmit | Captures user corrections, preferences, and project knowledge from natural language |
|
|
145
|
+
| `memory-stop` | Stop | Captures corrections, preferences, failures, and devops patterns from the last 6 transcript entries |
|
|
146
146
|
| `precompact-preserve` | PreCompact | Broader sweep of up to 50 transcript entries before context compression |
|
|
147
147
|
| `session-end-checkpoint` | SessionEnd | Auto-saves a `{completed, remaining, blockers}` task checkpoint when the session ends voluntarily (`clear`, `prompt_input_exit`, `logout`). Spawns a detached worker so it stays within Claude Code's 1.5s SessionEnd timeout. Pi has the equivalent via the `session_shutdown` event handler. |
|
|
148
|
+
| `rule-injector` | PreToolUse | **Just-in-time rule injection.** Before each tool call, searches active rules for matches against `tool_name + tool_input` and injects the top 3 (excluding raw failures) as a `<system-reminder>` block adjacent to the action. Closes the rule-loading gap: rules are surfaced at the moment of decision, not 50,000 tokens upstream from where attention has moved on. Each injection is logged to `rule_injection_events` for outcome correlation. Pi has the equivalent via per-turn injection in the `before_agent_start` handler. |
|
|
149
|
+
| `rule-injection-resolver` | PostToolUse / PostToolUseFailure | Resolves recorded `rule_injection_events` with the tool outcome (success/failure). Together with the injector, this becomes the new "is this rule actually helpful" signal — replacing the broken `(applied from memory: ...)` citation regex. |
|
|
148
150
|
|
|
149
151
|
**Key behaviors:**
|
|
150
152
|
- **LLM-first classification** via Claude Haiku — detects natural statements like "we use tabs here" or "tests go in \_\_tests\_\_/" that regex would miss
|
|
@@ -156,7 +158,7 @@ Claude Recall registers hooks on four Claude Code events to capture memories aut
|
|
|
156
158
|
- Auto-checkpoint quality gate: refuses to save when the LLM detects the task was already complete — manual checkpoints stay sticky
|
|
157
159
|
- Always exits 0 — hooks never block Claude
|
|
158
160
|
|
|
159
|
-
**Setup:** Run `npx claude-recall setup --install` to register hooks in `.claude/settings.json`. After
|
|
161
|
+
**Setup:** Run `npx claude-recall setup --install` to register hooks in `.claude/settings.json`. After any upgrade, re-run `setup --install` in each project so newly-added hook events get registered (claude-recall uses a `hooksVersion` field to signal when registration has changed).
|
|
160
162
|
|
|
161
163
|
## Example Workflows
|
|
162
164
|
|
package/README.md
CHANGED
|
@@ -33,20 +33,33 @@ Your preferences, project structure, workflows, corrections, and coding style ar
|
|
|
33
33
|
|
|
34
34
|
### Install for Claude Code
|
|
35
35
|
|
|
36
|
+
#### First-time install
|
|
37
|
+
|
|
38
|
+
Run this **once** on your machine:
|
|
39
|
+
|
|
36
40
|
```bash
|
|
37
|
-
# Install globally
|
|
38
41
|
npm install -g claude-recall
|
|
42
|
+
```
|
|
39
43
|
|
|
40
|
-
|
|
41
|
-
claude-recall setup --install
|
|
44
|
+
Then run these **in the project directory** where you want claude-recall active:
|
|
42
45
|
|
|
43
|
-
|
|
46
|
+
```bash
|
|
47
|
+
claude-recall setup --install
|
|
44
48
|
claude mcp add claude-recall -- claude-recall mcp start
|
|
45
49
|
```
|
|
46
50
|
|
|
47
|
-
|
|
51
|
+
Restart Claude Code. **Verify**: ask *"Load my rules"* — Claude should call `mcp__claude-recall__load_rules`.
|
|
52
|
+
|
|
53
|
+
#### Adding to another project
|
|
48
54
|
|
|
49
|
-
|
|
55
|
+
The global binary is already installed. Just `cd` into the new project and run the per-project commands:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
claude-recall setup --install
|
|
59
|
+
claude mcp add claude-recall -- claude-recall mcp start
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Restart Claude Code in that project.
|
|
50
63
|
|
|
51
64
|
### Install for Pi
|
|
52
65
|
|
|
@@ -64,35 +77,61 @@ Both agents use the same database (`~/.claude-recall/claude-recall.db`). Memorie
|
|
|
64
77
|
|
|
65
78
|
### Upgrading
|
|
66
79
|
|
|
80
|
+
#### If you use Claude Code
|
|
81
|
+
|
|
82
|
+
Run this **once** to update the global binary:
|
|
83
|
+
|
|
67
84
|
```bash
|
|
68
|
-
# Claude Code — update binary + re-install hooks in each project
|
|
69
85
|
npm install -g claude-recall
|
|
70
|
-
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Then run this **in each project directory** where you use claude-recall (the binary upgrade alone isn't enough — new releases sometimes add hook events that need to be registered in each project's `.claude/settings.json`):
|
|
71
89
|
|
|
72
|
-
|
|
73
|
-
|
|
90
|
+
```bash
|
|
91
|
+
claude-recall setup --install
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Restart Claude Code so the new MCP server starts (or run `claude-recall mcp restart` from the project directory to keep the current session running).
|
|
95
|
+
|
|
96
|
+
**Verify**: `claude-recall --version` shows the new version, and asking *"Load my rules"* in Claude Code triggers `mcp__claude-recall__load_rules`.
|
|
97
|
+
|
|
98
|
+
#### If you use Pi
|
|
99
|
+
|
|
100
|
+
Run this **once** — the `npm:` prefix is required (it matches the original install command):
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
pi update npm:claude-recall
|
|
74
104
|
```
|
|
75
105
|
|
|
76
|
-
|
|
106
|
+
Restart Pi to load the updated extension.
|
|
107
|
+
|
|
108
|
+
**Verify**: `pi list` shows the new `claude-recall` version, and asking *"Load my rules"* in Pi triggers `recall_load_rules`.
|
|
109
|
+
|
|
110
|
+
#### If you use both
|
|
111
|
+
|
|
112
|
+
Both upgrades are independent — run the Claude Code section AND the Pi section. Both agents share the same `~/.claude-recall/claude-recall.db`, so memories captured in either are visible to the other.
|
|
77
113
|
|
|
78
114
|
---
|
|
79
115
|
|
|
80
116
|
## What to Expect
|
|
81
117
|
|
|
82
|
-
Once installed, Claude Recall works automatically in the background
|
|
118
|
+
Once installed, Claude Recall works automatically in the background. Each row below is tagged with the runtime it applies to so you can skip what doesn't apply to you.
|
|
83
119
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
120
|
+
| When | What happens | CC | Pi |
|
|
121
|
+
|---|---|:-:|:-:|
|
|
122
|
+
| **Session start** | Active rules are loaded before the first action and injected into the agent's context | ✓ | ✓ |
|
|
123
|
+
| **As you work** | Every prompt is classified for corrections and preferences. Natural statements like *"we use tabs here"* are detected and stored | ✓ | ✓ |
|
|
124
|
+
| **Before each tool call / agent turn** | **Just-in-time rule injection** — relevant rules are surfaced as a `<system-reminder>` block adjacent to the action so the agent sees them at the moment of decision (not 50,000 tokens upstream). Per-tool-call in CC; per-turn in Pi | ✓ | ✓ |
|
|
125
|
+
| **Tool outcomes** | Tool results (Bash, Edit, Write, etc.) are captured. Failures are stored; Bash failures are paired with their successful fixes | ✓ | ✓ |
|
|
126
|
+
| **Reask detection** | Frustration signals (*"still broken"*, *"that didn't work"*) are recorded as outcome events | ✓ | ✓ |
|
|
127
|
+
| **Before context compression** | Aggressive memory sweep captures important context before the window shrinks | ✓ | ✓ |
|
|
128
|
+
| **After context compression** | Rules are automatically re-injected into the new context so they're not lost | ✓ | |
|
|
129
|
+
| **Sub-agent spawned** | Active rules are injected into the sub-agent's context. Sub-agent outcomes (completed/failed/killed) are captured | ✓ | |
|
|
130
|
+
| **Rules sync** | Top 30 rules are exported as typed `.md` files to Claude Code's native memory directory | ✓ | |
|
|
131
|
+
| **Session exit** | **Auto-checkpoint** — the most recent task is extracted into a `{completed, remaining, blockers}` snapshot and saved for the next session. Critical for Pi (no `--resume` flag); safety net for CC users who exit without resuming | ✓ | ✓ |
|
|
132
|
+
| **End of session** | Session episodes are created, candidate lessons are extracted from failures, and validated patterns are promoted into active rules | ✓ | ✓ |
|
|
94
133
|
|
|
95
|
-
Classification
|
|
134
|
+
Classification and checkpoint extraction use Claude Haiku (via `ANTHROPIC_API_KEY`) with silent regex fallback. No configuration needed.
|
|
96
135
|
|
|
97
136
|
**Next session:** `load_rules` returns everything captured previously — the agent applies your preferences without being told twice.
|
|
98
137
|
|
|
@@ -120,24 +159,35 @@ Claude Recall provides four memory tools backed by a local SQLite database with
|
|
|
120
159
|
|
|
121
160
|
### Skills
|
|
122
161
|
|
|
123
|
-
Claude Recall uses skill files to teach agents when and how to use memory tools
|
|
162
|
+
Claude Recall uses skill files to teach agents when and how to use memory tools.
|
|
163
|
+
|
|
164
|
+
**Claude Code** uses Anthropic's [Agent Skills](https://agentskills.io/) open standard:
|
|
124
165
|
|
|
125
|
-
-
|
|
126
|
-
-
|
|
166
|
+
- `.claude/skills/memory-management/SKILL.md` — core skill, guides memory behavior
|
|
167
|
+
- `.claude/skills/auto-*/` — auto-generated, crystallized from accumulated memories
|
|
168
|
+
|
|
169
|
+
See Anthropic's [Agent Skills blog post](https://claude.com/blog/equipping-agents-for-the-real-world-with-agent-skills) for the standard.
|
|
170
|
+
|
|
171
|
+
**Pi** ships a single `skills/memory-management.md` loaded via Pi's package manifest. No setup needed.
|
|
127
172
|
|
|
128
173
|
### Outcome-Aware Learning
|
|
129
174
|
|
|
130
|
-
Claude Recall tracks what happens *after* the agent acts — not just what was said. The
|
|
175
|
+
Claude Recall tracks what happens *after* the agent acts — not just what was said. The pipeline:
|
|
131
176
|
|
|
132
177
|
```
|
|
133
178
|
action → outcome event → episode → candidate lesson → promotion → active rule
|
|
179
|
+
↓
|
|
180
|
+
JIT-injected before next action
|
|
181
|
+
↓
|
|
182
|
+
PostToolUse resolves outcome per rule
|
|
134
183
|
```
|
|
135
184
|
|
|
136
185
|
- **Outcome events** capture results from all tool types (Bash, Edit, Write, MCP), test outcomes, user corrections, and reask signals
|
|
137
186
|
- **Episodes** summarize entire sessions with outcome type, severity, and confidence
|
|
138
187
|
- **Candidate lessons** are extracted from failure patterns — deduplicated by Jaccard similarity
|
|
139
|
-
- **Promotion engine** graduates lessons into active rules after 2+ observations (or immediately for high-severity failures)
|
|
140
|
-
- **
|
|
188
|
+
- **Promotion engine** graduates lessons into active rules after 2+ observations (or immediately for high-severity failures)
|
|
189
|
+
- **Just-in-time rule injection (v0.22.0+)** — active rules are surfaced as a `<system-reminder>` block adjacent to each tool call (Claude Code) or each agent turn (Pi). Each injection is recorded in `rule_injection_events` and resolved with the tool's success/failure outcome by the PostToolUse hook. **This is the meter that measures rule effectiveness in practice.** It replaces the older citation-detection regex (which empirically returned 0 citations across thousands of opportunities — agents don't reliably write `(applied from memory: …)` markers, so the meter never had data to work with).
|
|
190
|
+
- **Per-rule effectiveness data** accumulates over time in `rule_injection_events`. Future releases will use it to deboost rules that are repeatedly injected without correlating to successful tool calls, and to auto-promote rules that are repeatedly injected before failures. As of v0.22.0 the data is being collected; ranking is not yet feeding back from it.
|
|
141
191
|
|
|
142
192
|
---
|
|
143
193
|
|
|
@@ -203,17 +253,22 @@ Agents can also save/load checkpoints via MCP tools (`mcp__claude-recall__save_c
|
|
|
203
253
|
|
|
204
254
|
Manual `checkpoint save` is the explicit path. **Auto-checkpoint** is the safety net: when a session ends, the most recent task is extracted into a checkpoint automatically so the next session can resume.
|
|
205
255
|
|
|
206
|
-
|
|
207
|
-
|
|
256
|
+
**When it fires:**
|
|
257
|
+
|
|
258
|
+
- **Pi** — every `session_shutdown` event. **This is the only way to recover context in Pi: there is no `pi --resume` equivalent.**
|
|
259
|
+
- **Claude Code** — voluntary `SessionEnd` reasons (`clear`, `prompt_input_exit`, `logout`). Skips `bypass_permissions_disabled` and `other` (system-driven exits, not user intent). Useful if you exit and start fresh instead of using `claude --resume`.
|
|
260
|
+
|
|
261
|
+
**Behavior (both runtimes):**
|
|
208
262
|
|
|
209
|
-
|
|
263
|
+
- Uses Haiku to extract `{completed, remaining, blockers}` from the most recent task in the transcript
|
|
264
|
+
- **Quality gate**: refuses to save if the LLM detects the task was already complete (e.g., agent said "Done.", user said "thanks"). **Manual checkpoints are never overwritten with garbage** — an empty checkpoint is far better than a fabricated one
|
|
265
|
+
- **Tagged**: auto-saved checkpoints include `[auto-saved on <pi|cc> session exit at <iso-timestamp>]` in their notes field
|
|
266
|
+
- **Requires `ANTHROPIC_API_KEY`**. Without it, no auto-checkpoint is saved and manual `checkpoint save` still works
|
|
210
267
|
|
|
211
|
-
|
|
212
|
-
- **Notes tag**: auto-saved checkpoints include `[auto-saved on <pi|cc> session exit at <iso-timestamp>]` in the notes field, so you can tell auto from manual via `checkpoint load`.
|
|
213
|
-
- **Requires `ANTHROPIC_API_KEY`**. Without it, `extractCheckpointWithLLM` returns `null` (graceful fallback) and no auto-checkpoint is saved. Manual `checkpoint save` still works.
|
|
214
|
-
- **Disable**: remove the `SessionEnd` block from `.claude/settings.json` (Claude Code) or, for Pi, no per-project disable flag exists yet — open an issue if you need one.
|
|
268
|
+
**Disable:**
|
|
215
269
|
|
|
216
|
-
|
|
270
|
+
- **Claude Code**: remove the `SessionEnd` block from `.claude/settings.json`
|
|
271
|
+
- **Pi**: no per-project disable flag yet — [open an issue](https://github.com/raoulbia-ai/claude-recall/issues) if you need one
|
|
217
272
|
|
|
218
273
|
### Troubleshooting
|
|
219
274
|
|
|
@@ -809,7 +809,7 @@ async function main() {
|
|
|
809
809
|
// This avoids registry lookups on every hook invocation.
|
|
810
810
|
const cliScript = path.join(packageDir, 'dist', 'cli', 'claude-recall-cli.js');
|
|
811
811
|
const hookCmd = `node ${cliScript} hook run`;
|
|
812
|
-
settings.hooksVersion = '
|
|
812
|
+
settings.hooksVersion = '14.0.0'; // v14 = add PreToolUse rule-injector + Post resolver for JITRI
|
|
813
813
|
settings.hooks = {
|
|
814
814
|
SubagentStart: [
|
|
815
815
|
{
|
|
@@ -852,6 +852,11 @@ async function main() {
|
|
|
852
852
|
type: "command",
|
|
853
853
|
command: `${hookCmd} tool-outcome-watcher`,
|
|
854
854
|
timeout: 3
|
|
855
|
+
},
|
|
856
|
+
{
|
|
857
|
+
type: "command",
|
|
858
|
+
command: `${hookCmd} rule-injection-resolver`,
|
|
859
|
+
timeout: 3
|
|
855
860
|
}
|
|
856
861
|
]
|
|
857
862
|
}
|
|
@@ -863,6 +868,11 @@ async function main() {
|
|
|
863
868
|
type: "command",
|
|
864
869
|
command: `${hookCmd} tool-failure`,
|
|
865
870
|
timeout: 3
|
|
871
|
+
},
|
|
872
|
+
{
|
|
873
|
+
type: "command",
|
|
874
|
+
command: `${hookCmd} rule-injection-resolver`,
|
|
875
|
+
timeout: 3
|
|
866
876
|
}
|
|
867
877
|
]
|
|
868
878
|
}
|
|
@@ -874,6 +884,11 @@ async function main() {
|
|
|
874
884
|
{
|
|
875
885
|
type: "command",
|
|
876
886
|
command: `python3 ${hookDest}`
|
|
887
|
+
},
|
|
888
|
+
{
|
|
889
|
+
type: "command",
|
|
890
|
+
command: `${hookCmd} rule-injector`,
|
|
891
|
+
timeout: 5
|
|
877
892
|
}
|
|
878
893
|
]
|
|
879
894
|
}
|
|
@@ -116,6 +116,16 @@ class HookCommands {
|
|
|
116
116
|
await handleSessionEndCheckpointWorker(input);
|
|
117
117
|
break;
|
|
118
118
|
}
|
|
119
|
+
case 'rule-injector': {
|
|
120
|
+
const { handleRuleInjector } = await Promise.resolve().then(() => __importStar(require('../../hooks/rule-injector')));
|
|
121
|
+
await handleRuleInjector(input);
|
|
122
|
+
break;
|
|
123
|
+
}
|
|
124
|
+
case 'rule-injection-resolver': {
|
|
125
|
+
const { handleRuleInjectionResolver } = await Promise.resolve().then(() => __importStar(require('../../hooks/rule-injection-resolver')));
|
|
126
|
+
await handleRuleInjectionResolver(input);
|
|
127
|
+
break;
|
|
128
|
+
}
|
|
119
129
|
default:
|
|
120
130
|
console.error(`Unknown hook: ${name}`);
|
|
121
131
|
console.error('Available: correction-detector, memory-stop, precompact-preserve, memory-sync, tool-outcome-watcher, session-end-checkpoint');
|
|
@@ -174,9 +174,9 @@ async function handleMemoryStop(input) {
|
|
|
174
174
|
// Prune old outcome data to prevent unbounded table growth
|
|
175
175
|
try {
|
|
176
176
|
const pruned = outcomeStorage.pruneOldData();
|
|
177
|
-
const total = pruned.episodes + pruned.events + pruned.lessons + pruned.stats;
|
|
177
|
+
const total = pruned.episodes + pruned.events + pruned.lessons + pruned.stats + pruned.injections;
|
|
178
178
|
if (total > 0) {
|
|
179
|
-
(0, shared_1.hookLog)('memory-stop', `Pruned: ${pruned.episodes} episodes, ${pruned.events} events, ${pruned.lessons} lessons, ${pruned.stats} orphaned stats`);
|
|
179
|
+
(0, shared_1.hookLog)('memory-stop', `Pruned: ${pruned.episodes} episodes, ${pruned.events} events, ${pruned.lessons} lessons, ${pruned.stats} orphaned stats, ${pruned.injections} injections`);
|
|
180
180
|
}
|
|
181
181
|
}
|
|
182
182
|
catch (err) {
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* rule-injection-resolver hook — fires on PostToolUse and PostToolUseFailure.
|
|
4
|
+
*
|
|
5
|
+
* Counterpart to rule-injector.ts. After a tool call completes (successfully
|
|
6
|
+
* or with failure), this hook resolves any rule_injection_events that were
|
|
7
|
+
* recorded for that tool_use_id with the actual outcome.
|
|
8
|
+
*
|
|
9
|
+
* The pair gives us a direct measurement of rule effectiveness:
|
|
10
|
+
* - Rule X was injected before Bash call Y
|
|
11
|
+
* - Bash call Y succeeded → rule X co-occurs with success
|
|
12
|
+
* - Bash call Y failed → rule X was either ignored, wrong, or unrelated
|
|
13
|
+
*
|
|
14
|
+
* Aggregated over time, this becomes the new "is this rule helpful" signal,
|
|
15
|
+
* replacing the broken citation-detection regex (.research/rule-loading-gap.md).
|
|
16
|
+
*
|
|
17
|
+
* Always exits cleanly with no stdout — this hook only writes to the DB,
|
|
18
|
+
* it doesn't influence tool execution.
|
|
19
|
+
*/
|
|
20
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
21
|
+
exports.handleRuleInjectionResolver = handleRuleInjectionResolver;
|
|
22
|
+
const shared_1 = require("./shared");
|
|
23
|
+
const outcome_storage_1 = require("../services/outcome-storage");
|
|
24
|
+
async function handleRuleInjectionResolver(input) {
|
|
25
|
+
const toolUseId = input?.tool_use_id ?? '';
|
|
26
|
+
const eventName = input?.hook_event_name ?? '';
|
|
27
|
+
if (!toolUseId) {
|
|
28
|
+
return;
|
|
29
|
+
}
|
|
30
|
+
// Outcome inference: PostToolUseFailure means failure, anything else means success.
|
|
31
|
+
// (PostToolUse fires on success; PostToolUseFailure on tool errors.)
|
|
32
|
+
const outcome = eventName === 'PostToolUseFailure' ? 'failure' : 'success';
|
|
33
|
+
try {
|
|
34
|
+
const outcomeStorage = outcome_storage_1.OutcomeStorage.getInstance();
|
|
35
|
+
const resolved = outcomeStorage.resolveRuleInjections(toolUseId, outcome);
|
|
36
|
+
if (resolved > 0) {
|
|
37
|
+
(0, shared_1.hookLog)('rule-injection-resolver', `Resolved ${resolved} rule injection(s) for ${toolUseId} as ${outcome}`);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
catch (err) {
|
|
41
|
+
(0, shared_1.hookLog)('rule-injection-resolver', `Error: ${err.message}`);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* rule-injector hook — fires on Claude Code's PreToolUse event.
|
|
4
|
+
*
|
|
5
|
+
* Just-in-time rule injection (JITRI). The core fix for the rule-loading gap
|
|
6
|
+
* documented in .research/rule-loading-gap.md: rules are loaded once at session
|
|
7
|
+
* start, then ignored when the agent acts because they're 50,000 tokens upstream
|
|
8
|
+
* by the time of the action. This hook closes that gap by searching active rules
|
|
9
|
+
* for matches against THIS specific tool call and injecting the top matches as
|
|
10
|
+
* a system-reminder block immediately adjacent to the tool action.
|
|
11
|
+
*
|
|
12
|
+
* Output mechanism (verified against cc-source-code/utils/hooks.ts:621 and
|
|
13
|
+
* services/tools/toolHooks.ts:565):
|
|
14
|
+
* - Hook prints JSON to stdout
|
|
15
|
+
* - JSON includes hookSpecificOutput.additionalContext
|
|
16
|
+
* - CC wraps that string in a <system-reminder> block via wrapInSystemReminder()
|
|
17
|
+
* and creates a meta user message at the moment of the tool call
|
|
18
|
+
* - The agent sees the rules adjacent to the action it's about to take
|
|
19
|
+
*
|
|
20
|
+
* No LLM call in the hot path — pure keyword-based ranking, ~10-30ms typical.
|
|
21
|
+
*
|
|
22
|
+
* Each injection is recorded as a rule_injection_event so we can later
|
|
23
|
+
* resolve it with the tool outcome (success/failure) and measure rule
|
|
24
|
+
* effectiveness directly. This is the meter that replaces the broken
|
|
25
|
+
* citation-detection regex.
|
|
26
|
+
*/
|
|
27
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
28
|
+
exports.handleRuleInjector = handleRuleInjector;
|
|
29
|
+
const shared_1 = require("./shared");
|
|
30
|
+
const memory_1 = require("../services/memory");
|
|
31
|
+
const config_1 = require("../services/config");
|
|
32
|
+
const outcome_storage_1 = require("../services/outcome-storage");
|
|
33
|
+
const rule_retrieval_1 = require("../services/rule-retrieval");
|
|
34
|
+
const memory_tools_1 = require("../mcp/tools/memory-tools");
|
|
35
|
+
const TYPE_LABELS = {
|
|
36
|
+
correction: 'correction',
|
|
37
|
+
devops: 'devops',
|
|
38
|
+
preference: 'preference',
|
|
39
|
+
failure: 'avoid',
|
|
40
|
+
'project-knowledge': 'project',
|
|
41
|
+
};
|
|
42
|
+
/**
|
|
43
|
+
* Render a rule's value for injection. Reuses the same formatRuleValue helper
|
|
44
|
+
* that handleLoadRules uses (memory-tools.ts), so the rule-injector and
|
|
45
|
+
* load_rules output stay consistent. handles all the historical value shapes
|
|
46
|
+
* including nested-content failures and stringified-JSON content.
|
|
47
|
+
*/
|
|
48
|
+
function extractRuleSnippet(value) {
|
|
49
|
+
let snippet = (0, memory_tools_1.formatRuleValue)(value);
|
|
50
|
+
// formatRuleValue may return a stringified JSON for legacy shapes where
|
|
51
|
+
// value.content is a JSON string. Try one parse-and-extract pass to pull
|
|
52
|
+
// out a more readable summary.
|
|
53
|
+
if (snippet.startsWith('{') && snippet.includes('what_failed')) {
|
|
54
|
+
try {
|
|
55
|
+
const parsed = JSON.parse(snippet);
|
|
56
|
+
if (typeof parsed?.what_failed === 'string') {
|
|
57
|
+
snippet = parsed.what_failed;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
catch { /* fall through with the stringified JSON */ }
|
|
61
|
+
}
|
|
62
|
+
return snippet;
|
|
63
|
+
}
|
|
64
|
+
function formatInjection(matches, toolName) {
|
|
65
|
+
if (matches.length === 0)
|
|
66
|
+
return '';
|
|
67
|
+
const lines = matches.map(m => {
|
|
68
|
+
const label = TYPE_LABELS[m.rule.type] ?? m.rule.type;
|
|
69
|
+
const snippet = extractRuleSnippet(m.rule.value).substring(0, 200).replace(/\s+/g, ' ').trim();
|
|
70
|
+
return `• [${label}] ${snippet}`;
|
|
71
|
+
});
|
|
72
|
+
return (`Recall: ${matches.length} rule${matches.length === 1 ? '' : 's'} relevant to this ${toolName} call. ` +
|
|
73
|
+
`Apply them or explicitly note why they don't fit:\n${lines.join('\n')}`);
|
|
74
|
+
}
|
|
75
|
+
async function handleRuleInjector(input) {
|
|
76
|
+
const toolName = input?.tool_name ?? '';
|
|
77
|
+
const toolInput = input?.tool_input ?? {};
|
|
78
|
+
const toolUseId = input?.tool_use_id ?? '';
|
|
79
|
+
if (!toolName) {
|
|
80
|
+
// Nothing to do — print empty JSON so CC parses it cleanly
|
|
81
|
+
process.stdout.write('{}\n');
|
|
82
|
+
return;
|
|
83
|
+
}
|
|
84
|
+
// Skip the hook for our own tools so we don't recursively inject rules
|
|
85
|
+
// about claude-recall into claude-recall calls. The agent already has
|
|
86
|
+
// claude-recall context when calling its own tools.
|
|
87
|
+
if (toolName.startsWith('mcp__claude-recall__') || toolName.startsWith('mcp__claude_recall')) {
|
|
88
|
+
process.stdout.write('{}\n');
|
|
89
|
+
return;
|
|
90
|
+
}
|
|
91
|
+
try {
|
|
92
|
+
const projectId = config_1.ConfigService.getInstance().getProjectId();
|
|
93
|
+
const memoryService = memory_1.MemoryService.getInstance();
|
|
94
|
+
// Fetch all active rules for this project. We pass them all to the ranker
|
|
95
|
+
// because the ranking function is fast and we want sticky rules to surface
|
|
96
|
+
// even when token overlap is low.
|
|
97
|
+
const activeRules = memoryService.loadActiveRules(projectId);
|
|
98
|
+
const allRules = [
|
|
99
|
+
...activeRules.preferences,
|
|
100
|
+
...activeRules.corrections,
|
|
101
|
+
...activeRules.failures,
|
|
102
|
+
...activeRules.devops,
|
|
103
|
+
].map(m => ({
|
|
104
|
+
key: m.key,
|
|
105
|
+
type: m.type,
|
|
106
|
+
value: m.value,
|
|
107
|
+
is_active: m.is_active !== false,
|
|
108
|
+
timestamp: m.timestamp,
|
|
109
|
+
project_id: m.project_id,
|
|
110
|
+
}));
|
|
111
|
+
if (allRules.length === 0) {
|
|
112
|
+
(0, shared_1.hookLog)('rule-injector', `No active rules for project ${projectId} (tool=${toolName})`);
|
|
113
|
+
process.stdout.write('{}\n');
|
|
114
|
+
return;
|
|
115
|
+
}
|
|
116
|
+
const matches = (0, rule_retrieval_1.rankRulesForToolCall)(toolName, toolInput, allRules);
|
|
117
|
+
if (matches.length === 0) {
|
|
118
|
+
(0, shared_1.hookLog)('rule-injector', `No relevant rules for ${toolName} (scanned ${allRules.length})`);
|
|
119
|
+
process.stdout.write('{}\n');
|
|
120
|
+
return;
|
|
121
|
+
}
|
|
122
|
+
// Record each injection so PostToolUse can resolve it with the outcome
|
|
123
|
+
try {
|
|
124
|
+
const outcomeStorage = outcome_storage_1.OutcomeStorage.getInstance();
|
|
125
|
+
for (const m of matches) {
|
|
126
|
+
outcomeStorage.recordRuleInjection({
|
|
127
|
+
rule_key: m.rule.key,
|
|
128
|
+
tool_name: toolName,
|
|
129
|
+
tool_use_id: toolUseId,
|
|
130
|
+
project_id: projectId,
|
|
131
|
+
match_score: m.score,
|
|
132
|
+
matched_tokens: m.matchedTokens,
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
catch (err) {
|
|
137
|
+
// Non-critical — failure to record shouldn't block the injection itself
|
|
138
|
+
(0, shared_1.hookLog)('rule-injector', `Failed to record injections: ${err.message}`);
|
|
139
|
+
}
|
|
140
|
+
const additionalContext = formatInjection(matches, toolName);
|
|
141
|
+
const output = {
|
|
142
|
+
hookSpecificOutput: {
|
|
143
|
+
hookEventName: 'PreToolUse',
|
|
144
|
+
additionalContext,
|
|
145
|
+
},
|
|
146
|
+
};
|
|
147
|
+
process.stdout.write(JSON.stringify(output) + '\n');
|
|
148
|
+
(0, shared_1.hookLog)('rule-injector', `Injected ${matches.length} rule(s) for ${toolName} (top score=${matches[0].score.toFixed(3)})`);
|
|
149
|
+
}
|
|
150
|
+
catch (err) {
|
|
151
|
+
(0, shared_1.hookLog)('rule-injector', `Error: ${err.message}`);
|
|
152
|
+
// Best-effort — never block the tool call
|
|
153
|
+
process.stdout.write('{}\n');
|
|
154
|
+
}
|
|
155
|
+
}
|
package/dist/memory/storage.js
CHANGED
|
@@ -206,6 +206,29 @@ class MemoryStorage {
|
|
|
206
206
|
last_retrieved_at TEXT
|
|
207
207
|
)`);
|
|
208
208
|
}
|
|
209
|
+
// v0.21.x: Just-in-time rule injection tracking. Replaces the broken
|
|
210
|
+
// citation-detection regex with direct measurement of "was the rule
|
|
211
|
+
// present at the moment of action." See .research/rule-loading-gap.md
|
|
212
|
+
// for the design motivation.
|
|
213
|
+
const injectionTable = this.db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name = 'rule_injection_events'").get();
|
|
214
|
+
if (!injectionTable) {
|
|
215
|
+
this.db.exec(`CREATE TABLE rule_injection_events (
|
|
216
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
217
|
+
rule_key TEXT NOT NULL,
|
|
218
|
+
tool_name TEXT NOT NULL,
|
|
219
|
+
tool_use_id TEXT,
|
|
220
|
+
project_id TEXT,
|
|
221
|
+
match_score REAL,
|
|
222
|
+
matched_tokens TEXT,
|
|
223
|
+
injected_at INTEGER NOT NULL,
|
|
224
|
+
tool_outcome TEXT,
|
|
225
|
+
resolved_at INTEGER
|
|
226
|
+
)`);
|
|
227
|
+
this.db.exec('CREATE INDEX idx_injection_rule ON rule_injection_events(rule_key)');
|
|
228
|
+
this.db.exec('CREATE INDEX idx_injection_project ON rule_injection_events(project_id)');
|
|
229
|
+
this.db.exec('CREATE INDEX idx_injection_tool_use ON rule_injection_events(tool_use_id)');
|
|
230
|
+
this.db.exec('CREATE INDEX idx_injection_unresolved ON rule_injection_events(resolved_at) WHERE resolved_at IS NULL');
|
|
231
|
+
}
|
|
209
232
|
}
|
|
210
233
|
catch (error) {
|
|
211
234
|
console.error('⚠️ Schema migration error:', error);
|
package/dist/pi/extension.js
CHANGED
|
@@ -14,6 +14,7 @@ const config_1 = require("../services/config");
|
|
|
14
14
|
const outcome_storage_1 = require("../services/outcome-storage");
|
|
15
15
|
const logging_1 = require("../services/logging");
|
|
16
16
|
const event_processors_1 = require("../shared/event-processors");
|
|
17
|
+
const rule_retrieval_1 = require("../services/rule-retrieval");
|
|
17
18
|
const LOAD_RULES_DIRECTIVE = 'Before your FIRST action, briefly state which rules below you will apply to this task.\n' +
|
|
18
19
|
'As you work, cite each rule at the point where it influences your action:\n' +
|
|
19
20
|
'(applied from memory: <short rule name>)\n' +
|
|
@@ -44,6 +45,38 @@ function extractVal(value) {
|
|
|
44
45
|
}
|
|
45
46
|
return String(value ?? '');
|
|
46
47
|
}
|
|
48
|
+
/**
|
|
49
|
+
* Format the just-in-time relevant rules for injection into the per-turn
|
|
50
|
+
* system prompt addendum. Mirrors the CC rule-injector hook output but as
|
|
51
|
+
* plain text (no system-reminder wrapper since Pi handles that itself).
|
|
52
|
+
*/
|
|
53
|
+
function formatJitReminder(matches) {
|
|
54
|
+
if (matches.length === 0)
|
|
55
|
+
return '';
|
|
56
|
+
const TYPE_LABELS = {
|
|
57
|
+
correction: 'correction',
|
|
58
|
+
devops: 'devops',
|
|
59
|
+
preference: 'preference',
|
|
60
|
+
failure: 'avoid',
|
|
61
|
+
'project-knowledge': 'project',
|
|
62
|
+
};
|
|
63
|
+
const lines = matches.map(m => {
|
|
64
|
+
const label = TYPE_LABELS[m.rule.type] ?? m.rule.type;
|
|
65
|
+
const v = m.rule.value;
|
|
66
|
+
let snippet = '';
|
|
67
|
+
if (typeof v === 'string')
|
|
68
|
+
snippet = v;
|
|
69
|
+
else if (v && typeof v === 'object') {
|
|
70
|
+
snippet = (typeof v.content === 'string' ? v.content
|
|
71
|
+
: typeof v.value === 'string' ? v.value
|
|
72
|
+
: typeof v.title === 'string' ? v.title
|
|
73
|
+
: JSON.stringify(v).substring(0, 200));
|
|
74
|
+
}
|
|
75
|
+
return `• [${label}] ${snippet.substring(0, 200).replace(/\s+/g, ' ').trim()}`;
|
|
76
|
+
});
|
|
77
|
+
return (`Recall: ${matches.length} rule${matches.length === 1 ? '' : 's'} relevant to this turn. ` +
|
|
78
|
+
`Apply them or explicitly note why they don't fit:\n${lines.join('\n')}`);
|
|
79
|
+
}
|
|
47
80
|
/** Format active rules as markdown sections. */
|
|
48
81
|
function formatRules(rules) {
|
|
49
82
|
const sections = [];
|
|
@@ -97,17 +130,63 @@ function default_1(pi) {
|
|
|
97
130
|
// Non-critical
|
|
98
131
|
}
|
|
99
132
|
});
|
|
100
|
-
// --- Event: inject rules before
|
|
133
|
+
// --- Event: inject rules before each agent turn (full load on first turn,
|
|
134
|
+
// just-in-time relevant rules on subsequent turns based on the user's
|
|
135
|
+
// current prompt — Pi's analog of CC's PreToolUse rule injector) ---
|
|
101
136
|
pi.on('before_agent_start', (_event, _ctx) => {
|
|
102
|
-
if (rulesLoaded)
|
|
103
|
-
return;
|
|
104
|
-
rulesLoaded = true;
|
|
105
137
|
try {
|
|
106
138
|
const ms = memory_1.MemoryService.getInstance();
|
|
107
139
|
const rules = ms.loadActiveRules(projectId || undefined);
|
|
108
|
-
const
|
|
109
|
-
|
|
110
|
-
|
|
140
|
+
const allRulesFlat = [
|
|
141
|
+
...rules.preferences,
|
|
142
|
+
...rules.corrections,
|
|
143
|
+
...rules.failures,
|
|
144
|
+
...rules.devops,
|
|
145
|
+
].map(m => ({
|
|
146
|
+
key: m.key,
|
|
147
|
+
type: m.type,
|
|
148
|
+
value: m.value,
|
|
149
|
+
is_active: m.is_active !== false,
|
|
150
|
+
timestamp: m.timestamp,
|
|
151
|
+
project_id: m.project_id,
|
|
152
|
+
}));
|
|
153
|
+
// First turn: full ruleset to seed context, plus JIT injection for the
|
|
154
|
+
// very first prompt. Subsequent turns: JIT only — context already has
|
|
155
|
+
// the full set from turn 1.
|
|
156
|
+
let systemPromptOut;
|
|
157
|
+
if (!rulesLoaded) {
|
|
158
|
+
rulesLoaded = true;
|
|
159
|
+
const body = formatRules(rules);
|
|
160
|
+
if (body) {
|
|
161
|
+
systemPromptOut = _event.systemPrompt + '\n\n' + LOAD_RULES_DIRECTIVE + '\n\n---\n\n' + body;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
// JIT injection on every turn — match rules against the current user prompt
|
|
165
|
+
const userPrompt = _event?.prompt ?? '';
|
|
166
|
+
if (userPrompt && allRulesFlat.length > 0) {
|
|
167
|
+
const matches = (0, rule_retrieval_1.rankRulesForToolCall)('agent_turn', { command: userPrompt }, allRulesFlat);
|
|
168
|
+
if (matches.length > 0) {
|
|
169
|
+
const reminder = formatJitReminder(matches);
|
|
170
|
+
systemPromptOut = (systemPromptOut ?? _event.systemPrompt) + '\n\n' + reminder;
|
|
171
|
+
// Record each injection so we can correlate with success/failure later
|
|
172
|
+
try {
|
|
173
|
+
const outcomeStorage = outcome_storage_1.OutcomeStorage.getInstance();
|
|
174
|
+
for (const m of matches) {
|
|
175
|
+
outcomeStorage.recordRuleInjection({
|
|
176
|
+
rule_key: m.rule.key,
|
|
177
|
+
tool_name: 'pi:agent_turn',
|
|
178
|
+
tool_use_id: `pi_turn_${Date.now()}`,
|
|
179
|
+
project_id: projectId,
|
|
180
|
+
match_score: m.score,
|
|
181
|
+
matched_tokens: m.matchedTokens,
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
catch { /* non-critical */ }
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
if (systemPromptOut) {
|
|
189
|
+
return { systemPrompt: systemPromptOut };
|
|
111
190
|
}
|
|
112
191
|
}
|
|
113
192
|
catch {
|
|
@@ -201,21 +201,81 @@ class OutcomeStorage {
|
|
|
201
201
|
times_unhelpful = times_unhelpful + 1
|
|
202
202
|
`).run(key);
|
|
203
203
|
}
|
|
204
|
+
// --- Rule injection events (just-in-time rule injection meter) ---
|
|
205
|
+
//
|
|
206
|
+
// Replaces the broken citation-detection regex. Every time the JITRI hook
|
|
207
|
+
// injects a rule into a tool call's context, we record an event here.
|
|
208
|
+
// PostToolUse later resolves the event with the tool outcome (success or
|
|
209
|
+
// failure), giving us direct evidence of whether rules-at-the-moment-of-action
|
|
210
|
+
// are correlated with successful tool calls — without depending on the model
|
|
211
|
+
// remembering to write "(applied from memory: ...)" markers.
|
|
212
|
+
recordRuleInjection(input) {
|
|
213
|
+
const now = Date.now();
|
|
214
|
+
this.db.prepare(`
|
|
215
|
+
INSERT INTO rule_injection_events
|
|
216
|
+
(rule_key, tool_name, tool_use_id, project_id, match_score, matched_tokens, injected_at)
|
|
217
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
218
|
+
`).run(input.rule_key, input.tool_name, input.tool_use_id ?? null, input.project_id ?? null, input.match_score, JSON.stringify(input.matched_tokens), now);
|
|
219
|
+
}
|
|
220
|
+
/**
|
|
221
|
+
* Resolve all unresolved injection events for a given tool_use_id with
|
|
222
|
+
* the tool's outcome. Called from PostToolUse / PostToolUseFailure.
|
|
223
|
+
*/
|
|
224
|
+
resolveRuleInjections(toolUseId, outcome) {
|
|
225
|
+
const now = Date.now();
|
|
226
|
+
const result = this.db.prepare(`
|
|
227
|
+
UPDATE rule_injection_events
|
|
228
|
+
SET tool_outcome = ?, resolved_at = ?
|
|
229
|
+
WHERE tool_use_id = ? AND resolved_at IS NULL
|
|
230
|
+
`).run(outcome, now, toolUseId);
|
|
231
|
+
return result.changes;
|
|
232
|
+
}
|
|
233
|
+
/**
|
|
234
|
+
* Per-rule injection summary for the outcomes CLI.
|
|
235
|
+
* Returns: rule_key, total injections, success/failure counts, helpfulness rate.
|
|
236
|
+
*/
|
|
237
|
+
getInjectionStats(opts) {
|
|
238
|
+
const limit = opts?.limit ?? 50;
|
|
239
|
+
const where = opts?.project_id ? 'WHERE project_id = ?' : '';
|
|
240
|
+
const params = opts?.project_id ? [opts.project_id, limit] : [limit];
|
|
241
|
+
const rows = this.db.prepare(`
|
|
242
|
+
SELECT
|
|
243
|
+
rule_key,
|
|
244
|
+
COUNT(*) as total_injections,
|
|
245
|
+
SUM(CASE WHEN tool_outcome = 'success' THEN 1 ELSE 0 END) as successes,
|
|
246
|
+
SUM(CASE WHEN tool_outcome = 'failure' THEN 1 ELSE 0 END) as failures,
|
|
247
|
+
SUM(CASE WHEN resolved_at IS NULL THEN 1 ELSE 0 END) as unresolved
|
|
248
|
+
FROM rule_injection_events
|
|
249
|
+
${where}
|
|
250
|
+
GROUP BY rule_key
|
|
251
|
+
ORDER BY total_injections DESC
|
|
252
|
+
LIMIT ?
|
|
253
|
+
`).all(...params);
|
|
254
|
+
return rows.map(r => ({
|
|
255
|
+
...r,
|
|
256
|
+
success_rate: (r.successes + r.failures) > 0
|
|
257
|
+
? r.successes / (r.successes + r.failures)
|
|
258
|
+
: 0,
|
|
259
|
+
}));
|
|
260
|
+
}
|
|
204
261
|
/**
|
|
205
262
|
* Prune old data from outcome tables to prevent unbounded growth.
|
|
206
263
|
* - Episodes older than 90 days
|
|
207
264
|
* - Outcome events older than 90 days
|
|
208
265
|
* - Rejected/archived candidate lessons older than 14 days
|
|
209
266
|
* - Orphaned memory_stats entries (key no longer in memories table)
|
|
267
|
+
* - Rule injection events older than 90 days
|
|
210
268
|
*/
|
|
211
269
|
pruneOldData() {
|
|
212
270
|
const cutoff90 = new Date(Date.now() - 90 * 24 * 60 * 60 * 1000).toISOString();
|
|
213
271
|
const cutoff14 = new Date(Date.now() - 14 * 24 * 60 * 60 * 1000).toISOString();
|
|
272
|
+
const cutoff90Ms = Date.now() - 90 * 24 * 60 * 60 * 1000;
|
|
214
273
|
const episodes = this.db.prepare('DELETE FROM episodes WHERE created_at < ?').run(cutoff90).changes;
|
|
215
274
|
const events = this.db.prepare('DELETE FROM outcome_events WHERE created_at < ?').run(cutoff90).changes;
|
|
216
275
|
const lessons = this.db.prepare("DELETE FROM candidate_lessons WHERE status IN ('rejected', 'archived') AND updated_at < ?").run(cutoff14).changes;
|
|
217
276
|
const stats = this.db.prepare('DELETE FROM memory_stats WHERE memory_key NOT IN (SELECT key FROM memories)').run().changes;
|
|
218
|
-
|
|
277
|
+
const injections = this.db.prepare('DELETE FROM rule_injection_events WHERE injected_at < ?').run(cutoff90Ms).changes;
|
|
278
|
+
return { episodes, events, lessons, stats, injections };
|
|
219
279
|
}
|
|
220
280
|
}
|
|
221
281
|
exports.OutcomeStorage = OutcomeStorage;
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Rule retrieval & ranking — the core of just-in-time rule injection (JITRI).
|
|
4
|
+
*
|
|
5
|
+
* This module is the meter that replaces the broken citation-detection regex.
|
|
6
|
+
* Instead of trying to detect "(applied from memory: ...)" markers in agent
|
|
7
|
+
* output (which empirically doesn't work — see .research/rule-loading-gap.md),
|
|
8
|
+
* we measure "was the relevant rule present at the moment of action" by
|
|
9
|
+
* injecting matched rules into the agent's context immediately adjacent to
|
|
10
|
+
* each tool call via a PreToolUse hook.
|
|
11
|
+
*
|
|
12
|
+
* This file is intentionally pure — it takes pre-fetched rules as input and
|
|
13
|
+
* has no DB access. The DB-fetching wrapper lives in RuleRetrievalService.
|
|
14
|
+
* Keeping the ranking pure makes it dead-simple to test and lets the same
|
|
15
|
+
* function serve both the CC PreToolUse hook path and the Pi
|
|
16
|
+
* `before_agent_start` path.
|
|
17
|
+
*
|
|
18
|
+
* Ranking ingredients:
|
|
19
|
+
* 1. Token overlap (Jaccard between query tokens and rule tokens) — main signal
|
|
20
|
+
* 2. Sticky boost (+0.5) — sticky rules always bubble to the top
|
|
21
|
+
* 3. Type priority — corrections > devops > preferences > failures
|
|
22
|
+
* 4. Recency boost — rules updated within 7 days get a small lift
|
|
23
|
+
*
|
|
24
|
+
* Filter: only rules with combined score >= MIN_SCORE are returned. Caps at
|
|
25
|
+
* TOP_N (3) so the additionalContext payload stays small enough to fit
|
|
26
|
+
* comfortably in the agent's attention budget.
|
|
27
|
+
*/
|
|
28
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
29
|
+
exports.buildToolCallQuery = buildToolCallQuery;
|
|
30
|
+
exports.rankRulesForToolCall = rankRulesForToolCall;
|
|
31
|
+
const STOP_WORDS = new Set([
|
|
32
|
+
'the', 'a', 'an', 'and', 'or', 'but', 'is', 'are', 'was', 'were', 'be', 'been',
|
|
33
|
+
'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'should',
|
|
34
|
+
'could', 'may', 'might', 'must', 'shall', 'can', 'this', 'that', 'these', 'those',
|
|
35
|
+
'i', 'you', 'he', 'she', 'it', 'we', 'they', 'them', 'their', 'what', 'which',
|
|
36
|
+
'who', 'when', 'where', 'why', 'how', 'all', 'each', 'every', 'both', 'few',
|
|
37
|
+
'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own',
|
|
38
|
+
'same', 'so', 'than', 'too', 'very', 'just', 'as', 'in', 'on', 'at', 'to',
|
|
39
|
+
'for', 'of', 'with', 'by', 'from', 'up', 'down', 'into', 'over', 'under',
|
|
40
|
+
]);
|
|
41
|
+
const MIN_TOKEN_LENGTH = 3;
|
|
42
|
+
const MIN_SCORE = 0.15;
|
|
43
|
+
const TOP_N = 3;
|
|
44
|
+
const RECENT_WINDOW_MS = 7 * 24 * 60 * 60 * 1000;
|
|
45
|
+
const STICKY_BOOST = 0.5;
|
|
46
|
+
const RECENCY_BOOST = 0.1;
|
|
47
|
+
// Type boosts: corrections and devops are ACTIONABLE rules — boost them.
|
|
48
|
+
// Failures are auto-captured post-hoc records that tend to accumulate as
|
|
49
|
+
// noise (every "test failed" attempt becomes a memory). Deboost so generic
|
|
50
|
+
// failure entries need substantial token overlap to surface; real anti-patterns
|
|
51
|
+
// with high overlap still come through. See .research/rule-loading-gap.md.
|
|
52
|
+
const TYPE_BOOSTS = {
|
|
53
|
+
correction: 0.25,
|
|
54
|
+
devops: 0.20,
|
|
55
|
+
preference: 0.10,
|
|
56
|
+
'project-knowledge': 0.05,
|
|
57
|
+
failure: -0.10,
|
|
58
|
+
};
|
|
59
|
+
/**
|
|
60
|
+
* Tokenize a string: lowercase, keep alphanumeric only, drop short tokens
|
|
61
|
+
* and stop words.
|
|
62
|
+
*/
|
|
63
|
+
function tokenize(text) {
|
|
64
|
+
if (!text || typeof text !== 'string')
|
|
65
|
+
return [];
|
|
66
|
+
return text
|
|
67
|
+
.toLowerCase()
|
|
68
|
+
.replace(/[^a-z0-9\s]/g, ' ')
|
|
69
|
+
.split(/\s+/)
|
|
70
|
+
.filter(t => t.length >= MIN_TOKEN_LENGTH && !STOP_WORDS.has(t));
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Build the query tokens from a tool call. Includes the tool name plus
|
|
74
|
+
* relevant fields from tool_input depending on the tool type.
|
|
75
|
+
*
|
|
76
|
+
* For Bash: command
|
|
77
|
+
* For Edit: file_path + old_string (truncated)
|
|
78
|
+
* For Write: file_path + content (truncated)
|
|
79
|
+
* For Read/Glob: file_path + pattern
|
|
80
|
+
* For Grep: pattern + path
|
|
81
|
+
* For Task: description + prompt
|
|
82
|
+
* For others: best-effort stringification of all string-valued fields
|
|
83
|
+
*/
|
|
84
|
+
function buildToolCallQuery(toolName, toolInput) {
|
|
85
|
+
const parts = [toolName];
|
|
86
|
+
if (toolInput && typeof toolInput === 'object') {
|
|
87
|
+
const stringFields = ['command', 'file_path', 'pattern', 'path', 'description', 'prompt', 'query', 'url'];
|
|
88
|
+
for (const field of stringFields) {
|
|
89
|
+
const v = toolInput[field];
|
|
90
|
+
if (typeof v === 'string')
|
|
91
|
+
parts.push(v);
|
|
92
|
+
}
|
|
93
|
+
// Truncated diff fields — keep them but cap length
|
|
94
|
+
if (typeof toolInput.old_string === 'string') {
|
|
95
|
+
parts.push(toolInput.old_string.substring(0, 200));
|
|
96
|
+
}
|
|
97
|
+
if (typeof toolInput.new_string === 'string') {
|
|
98
|
+
parts.push(toolInput.new_string.substring(0, 200));
|
|
99
|
+
}
|
|
100
|
+
if (typeof toolInput.content === 'string') {
|
|
101
|
+
parts.push(toolInput.content.substring(0, 200));
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
return tokenize(parts.join(' '));
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Recursively extract leaf string values from a value object — used to build
|
|
108
|
+
* the rule's token vocabulary. Skips JSON structure tokens (keys, brackets).
|
|
109
|
+
*/
|
|
110
|
+
function extractRuleText(value) {
|
|
111
|
+
if (value == null)
|
|
112
|
+
return '';
|
|
113
|
+
if (typeof value === 'string')
|
|
114
|
+
return value;
|
|
115
|
+
if (typeof value === 'number' || typeof value === 'boolean')
|
|
116
|
+
return String(value);
|
|
117
|
+
if (Array.isArray(value)) {
|
|
118
|
+
return value.map(extractRuleText).join(' ');
|
|
119
|
+
}
|
|
120
|
+
if (typeof value === 'object') {
|
|
121
|
+
// Prefer common content fields first
|
|
122
|
+
if (typeof value.content === 'string')
|
|
123
|
+
return value.content;
|
|
124
|
+
if (typeof value.value === 'string')
|
|
125
|
+
return value.value;
|
|
126
|
+
// Recurse into all string-leaf fields, including nested
|
|
127
|
+
const parts = [];
|
|
128
|
+
for (const v of Object.values(value)) {
|
|
129
|
+
const text = extractRuleText(v);
|
|
130
|
+
if (text)
|
|
131
|
+
parts.push(text);
|
|
132
|
+
}
|
|
133
|
+
return parts.join(' ');
|
|
134
|
+
}
|
|
135
|
+
return '';
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Check if a rule has the sticky flag set (in value.sticky or top-level).
|
|
139
|
+
*/
|
|
140
|
+
function isSticky(rule) {
|
|
141
|
+
if (rule.value && typeof rule.value === 'object' && rule.value.sticky === true)
|
|
142
|
+
return true;
|
|
143
|
+
return false;
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Compute Jaccard-like overlap: |intersection| / |query|.
|
|
147
|
+
* Asymmetric: we care what fraction of the QUERY tokens appear in the rule,
|
|
148
|
+
* not the other way around. A long rule that contains all query tokens scores
|
|
149
|
+
* higher than a short rule that contains some query tokens — which matches
|
|
150
|
+
* intuition (specific rules win).
|
|
151
|
+
*/
|
|
152
|
+
function tokenOverlap(queryTokens, ruleTokens) {
|
|
153
|
+
if (queryTokens.length === 0)
|
|
154
|
+
return { score: 0, matched: [] };
|
|
155
|
+
const matched = [];
|
|
156
|
+
for (const t of queryTokens) {
|
|
157
|
+
if (ruleTokens.has(t))
|
|
158
|
+
matched.push(t);
|
|
159
|
+
}
|
|
160
|
+
return { score: matched.length / queryTokens.length, matched };
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* A "promoted lesson" is a failure-type memory that the promotion engine has
|
|
164
|
+
* graduated into an actionable rule. Detected by key prefix or value.source.
|
|
165
|
+
* These ARE worth surfacing in JIT injection (unlike raw failure logs which
|
|
166
|
+
* are just noise from the auto-capture pipeline).
|
|
167
|
+
*/
|
|
168
|
+
function isPromotedLesson(rule) {
|
|
169
|
+
if (rule.key && rule.key.startsWith('promoted_'))
|
|
170
|
+
return true;
|
|
171
|
+
if (rule.value && typeof rule.value === 'object' && rule.value.source === 'promotion-engine')
|
|
172
|
+
return true;
|
|
173
|
+
return false;
|
|
174
|
+
}
|
|
175
|
+
/**
|
|
176
|
+
* Rank a list of rules against a tool call. Returns the top N (default 3)
|
|
177
|
+
* with score >= MIN_SCORE, sorted by descending score.
|
|
178
|
+
*
|
|
179
|
+
* Sticky rules always pass the threshold (their boost guarantees it).
|
|
180
|
+
*
|
|
181
|
+
* Raw failures are EXCLUDED from JIT injection — they're reference material,
|
|
182
|
+
* not actionable rules at the moment of decision. The auto-capture pipeline
|
|
183
|
+
* generates many low-value failure entries ("Avoid: Test command reported
|
|
184
|
+
* failures: npm test ...") that share tokens with common dev commands but
|
|
185
|
+
* aren't useful as decision-time guidance. The actionable equivalents are
|
|
186
|
+
* (a) promoted lessons (failures graduated by the promotion engine — these
|
|
187
|
+
* ARE included), (b) corrections, and (c) devops rules. See
|
|
188
|
+
* .research/rule-loading-gap.md for the full reasoning.
|
|
189
|
+
*/
|
|
190
|
+
function rankRulesForToolCall(toolName, toolInput, rules) {
|
|
191
|
+
const queryTokens = buildToolCallQuery(toolName, toolInput);
|
|
192
|
+
if (queryTokens.length === 0)
|
|
193
|
+
return [];
|
|
194
|
+
const ranked = [];
|
|
195
|
+
for (const rule of rules) {
|
|
196
|
+
if (rule.is_active === false)
|
|
197
|
+
continue;
|
|
198
|
+
// Exclude raw failures from JIT injection. Promoted lessons survive
|
|
199
|
+
// because they've been graduated into actionable rules.
|
|
200
|
+
if (rule.type === 'failure' && !isPromotedLesson(rule))
|
|
201
|
+
continue;
|
|
202
|
+
const ruleText = extractRuleText(rule.value);
|
|
203
|
+
if (!ruleText)
|
|
204
|
+
continue;
|
|
205
|
+
const ruleTokens = new Set(tokenize(ruleText));
|
|
206
|
+
const { score: overlapScore, matched } = tokenOverlap(queryTokens, ruleTokens);
|
|
207
|
+
let totalScore = overlapScore;
|
|
208
|
+
if (isSticky(rule))
|
|
209
|
+
totalScore += STICKY_BOOST;
|
|
210
|
+
const typeBoost = TYPE_BOOSTS[rule.type] ?? 0;
|
|
211
|
+
totalScore += typeBoost * (overlapScore > 0 ? 1 : 0); // Only apply type boost if there's some overlap
|
|
212
|
+
if (rule.timestamp && Date.now() - rule.timestamp < RECENT_WINDOW_MS) {
|
|
213
|
+
totalScore += RECENCY_BOOST * (overlapScore > 0 ? 1 : 0);
|
|
214
|
+
}
|
|
215
|
+
if (totalScore >= MIN_SCORE) {
|
|
216
|
+
ranked.push({ rule, score: totalScore, matchedTokens: matched });
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
ranked.sort((a, b) => b.score - a.score);
|
|
220
|
+
return ranked.slice(0, TOP_N);
|
|
221
|
+
}
|
package/package.json
CHANGED