pi-canary 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitattributes +4 -0
- package/README.md +83 -0
- package/extensions/canary.json +6 -0
- package/extensions/canary.ts +323 -0
- package/package.json +10 -0
package/.gitattributes
ADDED
package/README.md
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# pi-canary
|
|
2
|
+
|
|
3
|
+
A [pi](https://pi.dev) extension that silently verifies the agent's context awareness on every turn using hidden canary tokens.
|
|
4
|
+
|
|
5
|
+
Before answering your message, the agent must locate and return N canary tokens distributed across the conversation history. The entire verification exchange is invisible: no tokens in the thinking block, no tokens in the visible response, nothing in the history the agent uses to answer you.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pi install git:github.com/sebaxzero/pi-canary.git
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Or install project-locally (adds to `.pi/settings.json` only):
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pi install git:github.com/sebaxzero/pi-canary.git -l
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## How it works
|
|
20
|
+
|
|
21
|
+
Every time you send a message, the extension runs a hidden two-phase exchange before your question is answered:
|
|
22
|
+
|
|
23
|
+
**Phase 1 — Verify**
|
|
24
|
+
|
|
25
|
+
- N random 24-character canary tokens are generated (or reused if `VARIANT=fixed`).
|
|
26
|
+
- They are injected at the configured positions across the conversation history. The last token also carries a verification instruction.
|
|
27
|
+
- Your original question is temporarily suppressed.
|
|
28
|
+
- The agent is asked only to return the N tokens by name.
|
|
29
|
+
- The response is captured and checked. The exchange is hidden from the TUI.
|
|
30
|
+
|
|
31
|
+
**Phase 2 — Respond**
|
|
32
|
+
|
|
33
|
+
- The tokens and the verification exchange are stripped from context entirely.
|
|
34
|
+
- Your original question is restored.
|
|
35
|
+
- The agent answers normally, with no canary tokens anywhere in its view.
|
|
36
|
+
|
|
37
|
+
If verification fails, a warning notification appears in the TUI.
|
|
38
|
+
|
|
39
|
+
## Configuration
|
|
40
|
+
|
|
41
|
+
Persistent configuration lives in `extensions/canary.json`. You can ask the agent to edit it directly:
|
|
42
|
+
|
|
43
|
+
```json
|
|
44
|
+
{
|
|
45
|
+
"COUNT": 3,
|
|
46
|
+
"POSITION": "end",
|
|
47
|
+
"VARIANT": "fixed",
|
|
48
|
+
"FAIL_COMPACT": 0
|
|
49
|
+
}
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
| Key | Default | Description |
|
|
53
|
+
|-----|---------|-------------|
|
|
54
|
+
| `COUNT` | `3` | Number of canary tokens injected per turn |
|
|
55
|
+
| `POSITION` | `end` | Where tokens are injected: `start`, `equidistant`, or `end` |
|
|
56
|
+
| `VARIANT` | `fixed` | `fixed` = same tokens every turn (preserves KV cache); `variant` = new tokens each turn |
|
|
57
|
+
| `FAIL_COMPACT` | `0` | Compact context after N consecutive failures (`0` = disabled) |
|
|
58
|
+
|
|
59
|
+
**`POSITION=end` + `VARIANT=fixed`** (the defaults) is the cache-friendly mode for local model servers: the message prefix never changes and the injected suffix is always the same tokens, so the KV cache stays warm after the first turn. Use `POSITION=equidistant` + `VARIANT=variant` for maximum coverage at the cost of cache invalidation every turn.
|
|
60
|
+
|
|
61
|
+
Changes to the JSON take effect on the next session. For live tuning within a session, use the command below.
|
|
62
|
+
|
|
63
|
+
## Command
|
|
64
|
+
|
|
65
|
+
```
|
|
66
|
+
/canary — show current phase, failure count, and config
|
|
67
|
+
/canary set KEY=VAL — override config for the current session only
|
|
68
|
+
/canary set KEY=VAL KEY=VAL ...
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Example: `/canary set COUNT=5 POSITION=equidistant VARIANT=variant`
|
|
72
|
+
|
|
73
|
+
## Compatibility
|
|
74
|
+
|
|
75
|
+
Works alongside [pi-loop-police](https://github.com/sebaxzero/pi-loop-police). When loop-police aborts a turn, the canary check yields gracefully and does not fire its own recovery.
|
|
76
|
+
|
|
77
|
+
## License
|
|
78
|
+
|
|
79
|
+
MIT
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
Built with [Claude](https://claude.ai).
|
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Canary Extension
|
|
3
|
+
*
|
|
4
|
+
* Runs a hidden pre-turn where the agent must return N canary tokens distributed
|
|
5
|
+
* across the conversation history. The verification is invisible to the user:
|
|
6
|
+
* tokens never appear in the thinking block, visible responses, or final answer.
|
|
7
|
+
*
|
|
8
|
+
* Flow per user turn:
|
|
9
|
+
* 1. [VERIFY] Hidden LLM call — tokens visible, original question suppressed.
|
|
10
|
+
* Agent returns tokens only. Response hidden.
|
|
11
|
+
* 2. [RESPOND] Real LLM call — clean context, no tokens anywhere, agent answers normally.
|
|
12
|
+
*
|
|
13
|
+
* POSITION=end + VARIANT=fixed (defaults) preserves KV cache on local model servers:
|
|
14
|
+
* the prefix never changes and the injected suffix is always the same tokens.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
18
|
+
|
|
19
|
+
// Loaded from sibling JSON at startup; /set overrides for the current session only
|
|
20
|
+
const cfg = (() => {
|
|
21
|
+
const defaults = {
|
|
22
|
+
COUNT: 3,
|
|
23
|
+
POSITION: "end" as "start" | "equidistant" | "end",
|
|
24
|
+
VARIANT: "fixed" as "fixed" | "variant",
|
|
25
|
+
FAIL_COMPACT: 0,
|
|
26
|
+
};
|
|
27
|
+
try {
|
|
28
|
+
const path = new URL("canary.json", import.meta.url).pathname;
|
|
29
|
+
return { ...defaults, ...JSON.parse((globalThis as any).Deno.readTextFileSync(path)) };
|
|
30
|
+
} catch {
|
|
31
|
+
return defaults;
|
|
32
|
+
}
|
|
33
|
+
})();
|
|
34
|
+
|
|
35
|
+
// --- Token generation ---
|
|
36
|
+
|
|
37
|
+
const TOKEN_LENGTH = 24;
|
|
38
|
+
const TOKEN_CHARSET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
|
|
39
|
+
|
|
40
|
+
function generateToken(): string {
|
|
41
|
+
const arr = new Uint8Array(TOKEN_LENGTH);
|
|
42
|
+
crypto.getRandomValues(arr);
|
|
43
|
+
return Array.from(arr, (b) => TOKEN_CHARSET[b % TOKEN_CHARSET.length]).join("");
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// --- Message builders ---
|
|
47
|
+
|
|
48
|
+
function positionLabel(index: number, total: number): string {
|
|
49
|
+
if (cfg.POSITION === "start") return "beginning of context";
|
|
50
|
+
if (cfg.POSITION === "end") return "end of context";
|
|
51
|
+
// equidistant
|
|
52
|
+
if (total === 1) return "end of context";
|
|
53
|
+
const fraction = index / (total - 1);
|
|
54
|
+
if (fraction === 0) return "beginning of context";
|
|
55
|
+
if (fraction === 1) return "end of context";
|
|
56
|
+
return `middle of context (~${Math.round(fraction * 100)}%)`;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function buildAnchorContent(token: string, index: number, total: number): string {
|
|
60
|
+
return `[CANARY — TOKEN_${index + 1} of ${total} — ${positionLabel(index, total)}]\n${token}`;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function buildVerificationInstruction(count: number): string {
|
|
64
|
+
const formatLines = Array.from({ length: count }, (_, i) => `TOKEN_${i + 1}: <value>`);
|
|
65
|
+
return [
|
|
66
|
+
"═══════════════════════════════════════════════════════════",
|
|
67
|
+
"⚠️ CANARY CHECK ⚠️",
|
|
68
|
+
"═══════════════════════════════════════════════════════════",
|
|
69
|
+
"",
|
|
70
|
+
`Return ONLY the ${count} canary token${count === 1 ? "" : "s"} below. Do not say anything else.`,
|
|
71
|
+
...formatLines,
|
|
72
|
+
"═══════════════════════════════════════════════════════════",
|
|
73
|
+
].join("\n");
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// --- Extension ---
|
|
77
|
+
|
|
78
|
+
type Phase = "idle" | "verifying" | "responding";
|
|
79
|
+
|
|
80
|
+
export default function (pi: ExtensionAPI) {
|
|
81
|
+
let phase: Phase = "idle";
|
|
82
|
+
let currentTokens: string[] | null = null;
|
|
83
|
+
// Reused across turns in fixed mode; null forces regeneration
|
|
84
|
+
let fixedTokens: string[] | null = null;
|
|
85
|
+
let consecutiveFailures = 0;
|
|
86
|
+
// Guard against double-injection if context fires twice before message_end (retries)
|
|
87
|
+
let verifyContextSent = false;
|
|
88
|
+
// Timestamp of the hidden verification assistant message — used to filter it in Phase 2
|
|
89
|
+
let verifyResponseTimestamp: number | null = null;
|
|
90
|
+
|
|
91
|
+
pi.on("before_agent_start", (_event, _ctx) => {
|
|
92
|
+
if (cfg.VARIANT === "fixed") {
|
|
93
|
+
if (!fixedTokens || fixedTokens.length !== cfg.COUNT) {
|
|
94
|
+
fixedTokens = Array.from({ length: cfg.COUNT }, generateToken);
|
|
95
|
+
}
|
|
96
|
+
currentTokens = fixedTokens;
|
|
97
|
+
} else {
|
|
98
|
+
currentTokens = Array.from({ length: cfg.COUNT }, generateToken);
|
|
99
|
+
}
|
|
100
|
+
phase = "verifying";
|
|
101
|
+
verifyContextSent = false;
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
pi.on("context", (event, _ctx) => {
|
|
105
|
+
// --- Phase 1: build verification-only context ---
|
|
106
|
+
if (phase === "verifying" && currentTokens && !verifyContextSent) {
|
|
107
|
+
verifyContextSent = true;
|
|
108
|
+
const messages = [...event.messages];
|
|
109
|
+
|
|
110
|
+
// Suppress the original user question so the agent focuses only on the canary check.
|
|
111
|
+
// The question remains in session history and reappears in Phase 2.
|
|
112
|
+
if (messages.length > 0 && (messages[messages.length - 1] as any).role === "user") {
|
|
113
|
+
messages.pop();
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
const histLen = messages.length;
|
|
117
|
+
const count = currentTokens.length;
|
|
118
|
+
|
|
119
|
+
const injections = currentTokens
|
|
120
|
+
.map((token, i) => {
|
|
121
|
+
let insertAt: number;
|
|
122
|
+
if (cfg.POSITION === "start") insertAt = 0;
|
|
123
|
+
else if (cfg.POSITION === "end") insertAt = histLen;
|
|
124
|
+
else {
|
|
125
|
+
// equidistant
|
|
126
|
+
const fraction = count === 1 ? 1 : i / (count - 1);
|
|
127
|
+
insertAt = Math.round(fraction * histLen);
|
|
128
|
+
}
|
|
129
|
+
return { insertAt, token, i, isLast: i === count - 1 };
|
|
130
|
+
})
|
|
131
|
+
.reverse(); // reverse so splices don't shift earlier indices
|
|
132
|
+
|
|
133
|
+
for (const { insertAt, token, i, isLast } of injections) {
|
|
134
|
+
const content = isLast
|
|
135
|
+
? buildAnchorContent(token, i, count) + "\n\n" + buildVerificationInstruction(count)
|
|
136
|
+
: buildAnchorContent(token, i, count);
|
|
137
|
+
|
|
138
|
+
messages.splice(insertAt, 0, {
|
|
139
|
+
role: "custom",
|
|
140
|
+
customType: "canary",
|
|
141
|
+
content,
|
|
142
|
+
display: false,
|
|
143
|
+
timestamp: Date.now(),
|
|
144
|
+
} as any);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return { messages };
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// --- Phase 2: strip the hidden verification exchange ---
|
|
151
|
+
if (phase === "responding") {
|
|
152
|
+
const messages = event.messages.filter(
|
|
153
|
+
(m: any) => m.customType !== "canary" &&
|
|
154
|
+
m.timestamp !== verifyResponseTimestamp
|
|
155
|
+
);
|
|
156
|
+
if (messages.length !== event.messages.length) return { messages };
|
|
157
|
+
}
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
pi.on("message_end", (event, ctx) => {
|
|
161
|
+
if (event.message.role !== "assistant") return;
|
|
162
|
+
|
|
163
|
+
// --- Phase 2 completion: reset and let the response through ---
|
|
164
|
+
if (phase === "responding") {
|
|
165
|
+
phase = "idle";
|
|
166
|
+
return;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
if (phase !== "verifying") return;
|
|
170
|
+
|
|
171
|
+
const tokens = currentTokens;
|
|
172
|
+
currentTokens = null;
|
|
173
|
+
|
|
174
|
+
// Skip aborted or errored turns
|
|
175
|
+
const stopReason = (event.message as any).stopReason;
|
|
176
|
+
if (stopReason === "aborted" || stopReason === "error") {
|
|
177
|
+
phase = "idle";
|
|
178
|
+
return;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// Yield to loop-police
|
|
182
|
+
const rawContent = event.message.content;
|
|
183
|
+
if (Array.isArray(rawContent)) {
|
|
184
|
+
for (const part of rawContent) {
|
|
185
|
+
if (
|
|
186
|
+
part && typeof part === "object" && "type" in part &&
|
|
187
|
+
part.type === "thinking" && "thinking" in part &&
|
|
188
|
+
typeof part.thinking === "string" &&
|
|
189
|
+
(part.thinking.includes("[THINKING LOOP") || part.thinking.includes("[SEMANTIC LOOP"))
|
|
190
|
+
) {
|
|
191
|
+
phase = "idle";
|
|
192
|
+
return;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
if (!tokens) { phase = "idle"; return; }
|
|
198
|
+
|
|
199
|
+
// Extract text from the verification response
|
|
200
|
+
const textParts: string[] = [];
|
|
201
|
+
if (typeof rawContent === "string") {
|
|
202
|
+
textParts.push(rawContent);
|
|
203
|
+
} else if (Array.isArray(rawContent)) {
|
|
204
|
+
for (const part of rawContent) {
|
|
205
|
+
if (part && typeof part === "object" && "type" in part && part.type === "text" && "text" in part && typeof part.text === "string") {
|
|
206
|
+
textParts.push(part.text);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
const fullText = textParts.join("\n");
|
|
211
|
+
|
|
212
|
+
const missing = tokens
|
|
213
|
+
.map((t, i) => ({ label: `TOKEN_${i + 1}`, value: t, found: fullText.includes(t) }))
|
|
214
|
+
.filter((x) => !x.found);
|
|
215
|
+
|
|
216
|
+
// Replace the verification response with an empty assistant message.
|
|
217
|
+
// AssistantMessage.role must stay "assistant" — no display flag exists on this type,
|
|
218
|
+
// but content: [] renders as nothing in TUI. Filtered from Phase 2 context by timestamp.
|
|
219
|
+
verifyResponseTimestamp = (event.message as any).timestamp ?? null;
|
|
220
|
+
const emptyResponse = { ...event.message, content: [] } as any;
|
|
221
|
+
|
|
222
|
+
if (missing.length === 0) {
|
|
223
|
+
// Verification passed — proceed to Phase 2
|
|
224
|
+
consecutiveFailures = 0;
|
|
225
|
+
phase = "responding";
|
|
226
|
+
pi.sendMessage(
|
|
227
|
+
{ customType: "canary", content: "✓", display: false },
|
|
228
|
+
{ triggerTurn: true, deliverAs: "steer" }
|
|
229
|
+
);
|
|
230
|
+
return { message: emptyResponse };
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Verification failed
|
|
234
|
+
consecutiveFailures++;
|
|
235
|
+
|
|
236
|
+
if (cfg.FAIL_COMPACT > 0 && consecutiveFailures >= cfg.FAIL_COMPACT) {
|
|
237
|
+
consecutiveFailures = 0;
|
|
238
|
+
phase = "idle";
|
|
239
|
+
ctx.ui.notify(
|
|
240
|
+
`Canary: ${cfg.FAIL_COMPACT} consecutive failure(s) — triggering compaction`,
|
|
241
|
+
"warning"
|
|
242
|
+
);
|
|
243
|
+
ctx.compact({
|
|
244
|
+
customInstructions: `The agent failed to recall canary tokens ${cfg.FAIL_COMPACT} time(s) in a row. After compaction, return all canary tokens before doing anything else.`,
|
|
245
|
+
});
|
|
246
|
+
return { message: emptyResponse };
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// Proceed to Phase 2 despite failure (user still gets a response)
|
|
250
|
+
ctx.ui.notify("⚠️ Canary check failed — context may be degraded", "warning");
|
|
251
|
+
phase = "responding";
|
|
252
|
+
pi.sendMessage(
|
|
253
|
+
{ customType: "canary", content: "⚠️", display: false },
|
|
254
|
+
{ triggerTurn: true, deliverAs: "steer" }
|
|
255
|
+
);
|
|
256
|
+
return { message: emptyResponse };
|
|
257
|
+
});
|
|
258
|
+
|
|
259
|
+
pi.registerCommand("canary", {
|
|
260
|
+
description: "Show status; /canary set KEY=VAL [KEY=VAL ...]",
|
|
261
|
+
handler: (args, ctx) => {
|
|
262
|
+
const trimmed = args?.trim() ?? "";
|
|
263
|
+
|
|
264
|
+
if (trimmed.startsWith("set ")) {
|
|
265
|
+
const results: string[] = [];
|
|
266
|
+
for (const pair of trimmed.slice(4).trim().split(/\s+/)) {
|
|
267
|
+
const eq = pair.indexOf("=");
|
|
268
|
+
const key = pair.slice(0, eq).toUpperCase();
|
|
269
|
+
const val = pair.slice(eq + 1);
|
|
270
|
+
if (eq > 0 && val !== "") {
|
|
271
|
+
if (key === "COUNT") {
|
|
272
|
+
const n = parseInt(val, 10);
|
|
273
|
+
if (n > 0) { cfg.COUNT = n; fixedTokens = null; results.push(`COUNT=${cfg.COUNT}`); }
|
|
274
|
+
else results.push(`invalid COUNT: ${val}`);
|
|
275
|
+
} else if (key === "POSITION") {
|
|
276
|
+
if (val === "start" || val === "equidistant" || val === "end") {
|
|
277
|
+
cfg.POSITION = val; results.push(`POSITION=${cfg.POSITION}`);
|
|
278
|
+
} else results.push(`invalid POSITION: ${val} (start|equidistant|end)`);
|
|
279
|
+
} else if (key === "VARIANT") {
|
|
280
|
+
if (val === "fixed" || val === "variant") {
|
|
281
|
+
cfg.VARIANT = val; if (val === "variant") fixedTokens = null;
|
|
282
|
+
results.push(`VARIANT=${cfg.VARIANT}`);
|
|
283
|
+
} else results.push(`invalid VARIANT: ${val} (fixed|variant)`);
|
|
284
|
+
} else if (key === "FAIL_COMPACT") {
|
|
285
|
+
const n = parseInt(val, 10);
|
|
286
|
+
if (n >= 0) { cfg.FAIL_COMPACT = n; results.push(`FAIL_COMPACT=${cfg.FAIL_COMPACT}`); }
|
|
287
|
+
else results.push(`invalid FAIL_COMPACT: ${val}`);
|
|
288
|
+
} else {
|
|
289
|
+
results.push(`unknown: ${key}`);
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
ctx.ui.notify(`Canary: ${results.join(", ")}`, "info");
|
|
294
|
+
return;
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
ctx.ui.notify(
|
|
298
|
+
[
|
|
299
|
+
"Canary status",
|
|
300
|
+
` phase: ${phase}`,
|
|
301
|
+
` tokens per turn: ${cfg.COUNT}`,
|
|
302
|
+
` position: ${cfg.POSITION}`,
|
|
303
|
+
` variant: ${cfg.VARIANT}`,
|
|
304
|
+
` compact after N fails: ${cfg.FAIL_COMPACT === 0 ? "disabled" : cfg.FAIL_COMPACT}`,
|
|
305
|
+
` consecutive failures: ${consecutiveFailures}`,
|
|
306
|
+
"",
|
|
307
|
+
" config (/set = session only; edit canary.json for persistence):",
|
|
308
|
+
` COUNT=${cfg.COUNT}`,
|
|
309
|
+
` POSITION=${cfg.POSITION}`,
|
|
310
|
+
` VARIANT=${cfg.VARIANT}`,
|
|
311
|
+
` FAIL_COMPACT=${cfg.FAIL_COMPACT}`,
|
|
312
|
+
].join("\n"),
|
|
313
|
+
"info"
|
|
314
|
+
);
|
|
315
|
+
},
|
|
316
|
+
});
|
|
317
|
+
|
|
318
|
+
pi.on("session_start", (_event, ctx) => {
|
|
319
|
+
if (ctx.hasUI) {
|
|
320
|
+
ctx.ui.notify(`Canary loaded — ${cfg.COUNT} token(s), position=${cfg.POSITION}, variant=${cfg.VARIANT}`, "info");
|
|
321
|
+
}
|
|
322
|
+
});
|
|
323
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "pi-canary",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Pi extension: silently verifies agent context awareness every turn using hidden canary tokens. KV-cache friendly.",
|
|
5
|
+
"keywords": ["pi-package", "pi", "pi-coding-agent", "extension", "context-awareness", "canary", "safety", "verification", "local-llm"],
|
|
6
|
+
"license": "MIT",
|
|
7
|
+
"pi": {
|
|
8
|
+
"extensions": ["./extensions"]
|
|
9
|
+
}
|
|
10
|
+
}
|