pi-canary 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.gitattributes ADDED
@@ -0,0 +1,4 @@
1
+ * text=auto eol=lf
2
+ *.ts text eol=lf
3
+ *.json text eol=lf
4
+ *.md text eol=lf
package/README.md ADDED
@@ -0,0 +1,83 @@
1
+ # pi-canary
2
+
3
+ A [pi](https://pi.dev) extension that silently verifies the agent's context awareness on every turn using hidden canary tokens.
4
+
5
+ Before answering your message, the agent must locate and return N canary tokens distributed across the conversation history. The entire verification exchange is invisible: no tokens in the thinking block, no tokens in the visible response, nothing in the history the agent uses to answer you.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ pi install git:github.com/sebaxzero/pi-canary.git
11
+ ```
12
+
13
+ Or install project-locally (adds to `.pi/settings.json` only):
14
+
15
+ ```bash
16
+ pi install git:github.com/sebaxzero/pi-canary.git -l
17
+ ```
18
+
19
+ ## How it works
20
+
21
+ Every time you send a message, the extension runs a hidden two-phase exchange before your question is answered:
22
+
23
+ **Phase 1 — Verify**
24
+
25
+ - N random 24-character canary tokens are generated (or reused if `VARIANT=fixed`).
26
+ - They are injected at the configured positions across the conversation history. The last token also carries a verification instruction.
27
+ - Your original question is temporarily suppressed.
28
+ - The agent is asked only to return the N tokens by name.
29
+ - The response is captured and checked. The exchange is hidden from the TUI.
30
+
31
+ **Phase 2 — Respond**
32
+
33
+ - The tokens and the verification exchange are stripped from context entirely.
34
+ - Your original question is restored.
35
+ - The agent answers normally, with no canary tokens anywhere in its view.
36
+
37
+ If verification fails, a warning notification appears in the TUI.
38
+
39
+ ## Configuration
40
+
41
+ Persistent configuration lives in `extensions/canary.json`. You can ask the agent to edit it directly:
42
+
43
+ ```json
44
+ {
45
+ "COUNT": 3,
46
+ "POSITION": "end",
47
+ "VARIANT": "fixed",
48
+ "FAIL_COMPACT": 0
49
+ }
50
+ ```
51
+
52
+ | Key | Default | Description |
53
+ |-----|---------|-------------|
54
+ | `COUNT` | `3` | Number of canary tokens injected per turn |
55
+ | `POSITION` | `end` | Where tokens are injected: `start`, `equidistant`, or `end` |
56
+ | `VARIANT` | `fixed` | `fixed` = same tokens every turn (preserves KV cache); `variant` = new tokens each turn |
57
+ | `FAIL_COMPACT` | `0` | Compact context after N consecutive failures (`0` = disabled) |
58
+
59
+ **`POSITION=end` + `VARIANT=fixed`** (the defaults) is the cache-friendly mode for local model servers: the message prefix never changes and the injected suffix is always the same tokens, so the KV cache stays warm after the first turn. Use `POSITION=equidistant` + `VARIANT=variant` for maximum coverage at the cost of cache invalidation every turn.
60
+
61
+ Changes to the JSON take effect on the next session. For live tuning within a session, use the command below.
62
+
63
+ ## Command
64
+
65
+ ```
66
+ /canary — show current phase, failure count, and config
67
+ /canary set KEY=VAL — override config for the current session only
68
+ /canary set KEY=VAL KEY=VAL ...
69
+ ```
70
+
71
+ Example: `/canary set COUNT=5 POSITION=equidistant VARIANT=variant`
72
+
73
+ ## Compatibility
74
+
75
+ Works alongside [pi-loop-police](https://github.com/sebaxzero/pi-loop-police). When loop-police aborts a turn, the canary check yields gracefully and does not fire its own recovery.
76
+
77
+ ## License
78
+
79
+ MIT
80
+
81
+ ---
82
+
83
+ Built with [Claude](https://claude.ai).
@@ -0,0 +1,6 @@
1
+ {
2
+ "COUNT": 3,
3
+ "POSITION": "end",
4
+ "VARIANT": "fixed",
5
+ "FAIL_COMPACT": 0
6
+ }
@@ -0,0 +1,323 @@
1
+ /**
2
+ * Canary Extension
3
+ *
4
+ * Runs a hidden pre-turn where the agent must return N canary tokens distributed
5
+ * across the conversation history. The verification is invisible to the user:
6
+ * tokens never appear in the thinking block, visible responses, or final answer.
7
+ *
8
+ * Flow per user turn:
9
+ * 1. [VERIFY] Hidden LLM call — tokens visible, original question suppressed.
10
+ * Agent returns tokens only. Response hidden.
11
+ * 2. [RESPOND] Real LLM call — clean context, no tokens anywhere, agent answers normally.
12
+ *
13
+ * POSITION=end + VARIANT=fixed (defaults) preserves KV cache on local model servers:
14
+ * the prefix never changes and the injected suffix is always the same tokens.
15
+ */
16
+
17
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
18
+
19
+ // Loaded from sibling JSON at startup; /set overrides for the current session only
20
+ const cfg = (() => {
21
+ const defaults = {
22
+ COUNT: 3,
23
+ POSITION: "end" as "start" | "equidistant" | "end",
24
+ VARIANT: "fixed" as "fixed" | "variant",
25
+ FAIL_COMPACT: 0,
26
+ };
27
+ try {
28
+ const path = new URL("canary.json", import.meta.url).pathname;
29
+ return { ...defaults, ...JSON.parse((globalThis as any).Deno.readTextFileSync(path)) };
30
+ } catch {
31
+ return defaults;
32
+ }
33
+ })();
34
+
35
+ // --- Token generation ---
36
+
37
+ const TOKEN_LENGTH = 24;
38
+ const TOKEN_CHARSET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
39
+
40
+ function generateToken(): string {
41
+ const arr = new Uint8Array(TOKEN_LENGTH);
42
+ crypto.getRandomValues(arr);
43
+ return Array.from(arr, (b) => TOKEN_CHARSET[b % TOKEN_CHARSET.length]).join("");
44
+ }
45
+
46
+ // --- Message builders ---
47
+
48
+ function positionLabel(index: number, total: number): string {
49
+ if (cfg.POSITION === "start") return "beginning of context";
50
+ if (cfg.POSITION === "end") return "end of context";
51
+ // equidistant
52
+ if (total === 1) return "end of context";
53
+ const fraction = index / (total - 1);
54
+ if (fraction === 0) return "beginning of context";
55
+ if (fraction === 1) return "end of context";
56
+ return `middle of context (~${Math.round(fraction * 100)}%)`;
57
+ }
58
+
59
+ function buildAnchorContent(token: string, index: number, total: number): string {
60
+ return `[CANARY — TOKEN_${index + 1} of ${total} — ${positionLabel(index, total)}]\n${token}`;
61
+ }
62
+
63
+ function buildVerificationInstruction(count: number): string {
64
+ const formatLines = Array.from({ length: count }, (_, i) => `TOKEN_${i + 1}: <value>`);
65
+ return [
66
+ "═══════════════════════════════════════════════════════════",
67
+ "⚠️ CANARY CHECK ⚠️",
68
+ "═══════════════════════════════════════════════════════════",
69
+ "",
70
+ `Return ONLY the ${count} canary token${count === 1 ? "" : "s"} below. Do not say anything else.`,
71
+ ...formatLines,
72
+ "═══════════════════════════════════════════════════════════",
73
+ ].join("\n");
74
+ }
75
+
76
+ // --- Extension ---
77
+
78
+ type Phase = "idle" | "verifying" | "responding";
79
+
80
+ export default function (pi: ExtensionAPI) {
81
+ let phase: Phase = "idle";
82
+ let currentTokens: string[] | null = null;
83
+ // Reused across turns in fixed mode; null forces regeneration
84
+ let fixedTokens: string[] | null = null;
85
+ let consecutiveFailures = 0;
86
+ // Guard against double-injection if context fires twice before message_end (retries)
87
+ let verifyContextSent = false;
88
+ // Timestamp of the hidden verification assistant message — used to filter it in Phase 2
89
+ let verifyResponseTimestamp: number | null = null;
90
+
91
+ pi.on("before_agent_start", (_event, _ctx) => {
92
+ if (cfg.VARIANT === "fixed") {
93
+ if (!fixedTokens || fixedTokens.length !== cfg.COUNT) {
94
+ fixedTokens = Array.from({ length: cfg.COUNT }, generateToken);
95
+ }
96
+ currentTokens = fixedTokens;
97
+ } else {
98
+ currentTokens = Array.from({ length: cfg.COUNT }, generateToken);
99
+ }
100
+ phase = "verifying";
101
+ verifyContextSent = false;
102
+ });
103
+
104
+ pi.on("context", (event, _ctx) => {
105
+ // --- Phase 1: build verification-only context ---
106
+ if (phase === "verifying" && currentTokens && !verifyContextSent) {
107
+ verifyContextSent = true;
108
+ const messages = [...event.messages];
109
+
110
+ // Suppress the original user question so the agent focuses only on the canary check.
111
+ // The question remains in session history and reappears in Phase 2.
112
+ if (messages.length > 0 && (messages[messages.length - 1] as any).role === "user") {
113
+ messages.pop();
114
+ }
115
+
116
+ const histLen = messages.length;
117
+ const count = currentTokens.length;
118
+
119
+ const injections = currentTokens
120
+ .map((token, i) => {
121
+ let insertAt: number;
122
+ if (cfg.POSITION === "start") insertAt = 0;
123
+ else if (cfg.POSITION === "end") insertAt = histLen;
124
+ else {
125
+ // equidistant
126
+ const fraction = count === 1 ? 1 : i / (count - 1);
127
+ insertAt = Math.round(fraction * histLen);
128
+ }
129
+ return { insertAt, token, i, isLast: i === count - 1 };
130
+ })
131
+ .reverse(); // reverse so splices don't shift earlier indices
132
+
133
+ for (const { insertAt, token, i, isLast } of injections) {
134
+ const content = isLast
135
+ ? buildAnchorContent(token, i, count) + "\n\n" + buildVerificationInstruction(count)
136
+ : buildAnchorContent(token, i, count);
137
+
138
+ messages.splice(insertAt, 0, {
139
+ role: "custom",
140
+ customType: "canary",
141
+ content,
142
+ display: false,
143
+ timestamp: Date.now(),
144
+ } as any);
145
+ }
146
+
147
+ return { messages };
148
+ }
149
+
150
+ // --- Phase 2: strip the hidden verification exchange ---
151
+ if (phase === "responding") {
152
+ const messages = event.messages.filter(
153
+ (m: any) => m.customType !== "canary" &&
154
+ m.timestamp !== verifyResponseTimestamp
155
+ );
156
+ if (messages.length !== event.messages.length) return { messages };
157
+ }
158
+ });
159
+
160
+ pi.on("message_end", (event, ctx) => {
161
+ if (event.message.role !== "assistant") return;
162
+
163
+ // --- Phase 2 completion: reset and let the response through ---
164
+ if (phase === "responding") {
165
+ phase = "idle";
166
+ return;
167
+ }
168
+
169
+ if (phase !== "verifying") return;
170
+
171
+ const tokens = currentTokens;
172
+ currentTokens = null;
173
+
174
+ // Skip aborted or errored turns
175
+ const stopReason = (event.message as any).stopReason;
176
+ if (stopReason === "aborted" || stopReason === "error") {
177
+ phase = "idle";
178
+ return;
179
+ }
180
+
181
+ // Yield to loop-police
182
+ const rawContent = event.message.content;
183
+ if (Array.isArray(rawContent)) {
184
+ for (const part of rawContent) {
185
+ if (
186
+ part && typeof part === "object" && "type" in part &&
187
+ part.type === "thinking" && "thinking" in part &&
188
+ typeof part.thinking === "string" &&
189
+ (part.thinking.includes("[THINKING LOOP") || part.thinking.includes("[SEMANTIC LOOP"))
190
+ ) {
191
+ phase = "idle";
192
+ return;
193
+ }
194
+ }
195
+ }
196
+
197
+ if (!tokens) { phase = "idle"; return; }
198
+
199
+ // Extract text from the verification response
200
+ const textParts: string[] = [];
201
+ if (typeof rawContent === "string") {
202
+ textParts.push(rawContent);
203
+ } else if (Array.isArray(rawContent)) {
204
+ for (const part of rawContent) {
205
+ if (part && typeof part === "object" && "type" in part && part.type === "text" && "text" in part && typeof part.text === "string") {
206
+ textParts.push(part.text);
207
+ }
208
+ }
209
+ }
210
+ const fullText = textParts.join("\n");
211
+
212
+ const missing = tokens
213
+ .map((t, i) => ({ label: `TOKEN_${i + 1}`, value: t, found: fullText.includes(t) }))
214
+ .filter((x) => !x.found);
215
+
216
+ // Replace the verification response with an empty assistant message.
217
+ // AssistantMessage.role must stay "assistant" — no display flag exists on this type,
218
+ // but content: [] renders as nothing in TUI. Filtered from Phase 2 context by timestamp.
219
+ verifyResponseTimestamp = (event.message as any).timestamp ?? null;
220
+ const emptyResponse = { ...event.message, content: [] } as any;
221
+
222
+ if (missing.length === 0) {
223
+ // Verification passed — proceed to Phase 2
224
+ consecutiveFailures = 0;
225
+ phase = "responding";
226
+ pi.sendMessage(
227
+ { customType: "canary", content: "✓", display: false },
228
+ { triggerTurn: true, deliverAs: "steer" }
229
+ );
230
+ return { message: emptyResponse };
231
+ }
232
+
233
+ // Verification failed
234
+ consecutiveFailures++;
235
+
236
+ if (cfg.FAIL_COMPACT > 0 && consecutiveFailures >= cfg.FAIL_COMPACT) {
237
+ consecutiveFailures = 0;
238
+ phase = "idle";
239
+ ctx.ui.notify(
240
+ `Canary: ${cfg.FAIL_COMPACT} consecutive failure(s) — triggering compaction`,
241
+ "warning"
242
+ );
243
+ ctx.compact({
244
+ customInstructions: `The agent failed to recall canary tokens ${cfg.FAIL_COMPACT} time(s) in a row. After compaction, return all canary tokens before doing anything else.`,
245
+ });
246
+ return { message: emptyResponse };
247
+ }
248
+
249
+ // Proceed to Phase 2 despite failure (user still gets a response)
250
+ ctx.ui.notify("⚠️ Canary check failed — context may be degraded", "warning");
251
+ phase = "responding";
252
+ pi.sendMessage(
253
+ { customType: "canary", content: "⚠️", display: false },
254
+ { triggerTurn: true, deliverAs: "steer" }
255
+ );
256
+ return { message: emptyResponse };
257
+ });
258
+
259
+ pi.registerCommand("canary", {
260
+ description: "Show status; /canary set KEY=VAL [KEY=VAL ...]",
261
+ handler: (args, ctx) => {
262
+ const trimmed = args?.trim() ?? "";
263
+
264
+ if (trimmed.startsWith("set ")) {
265
+ const results: string[] = [];
266
+ for (const pair of trimmed.slice(4).trim().split(/\s+/)) {
267
+ const eq = pair.indexOf("=");
268
+ const key = pair.slice(0, eq).toUpperCase();
269
+ const val = pair.slice(eq + 1);
270
+ if (eq > 0 && val !== "") {
271
+ if (key === "COUNT") {
272
+ const n = parseInt(val, 10);
273
+ if (n > 0) { cfg.COUNT = n; fixedTokens = null; results.push(`COUNT=${cfg.COUNT}`); }
274
+ else results.push(`invalid COUNT: ${val}`);
275
+ } else if (key === "POSITION") {
276
+ if (val === "start" || val === "equidistant" || val === "end") {
277
+ cfg.POSITION = val; results.push(`POSITION=${cfg.POSITION}`);
278
+ } else results.push(`invalid POSITION: ${val} (start|equidistant|end)`);
279
+ } else if (key === "VARIANT") {
280
+ if (val === "fixed" || val === "variant") {
281
+ cfg.VARIANT = val; if (val === "variant") fixedTokens = null;
282
+ results.push(`VARIANT=${cfg.VARIANT}`);
283
+ } else results.push(`invalid VARIANT: ${val} (fixed|variant)`);
284
+ } else if (key === "FAIL_COMPACT") {
285
+ const n = parseInt(val, 10);
286
+ if (n >= 0) { cfg.FAIL_COMPACT = n; results.push(`FAIL_COMPACT=${cfg.FAIL_COMPACT}`); }
287
+ else results.push(`invalid FAIL_COMPACT: ${val}`);
288
+ } else {
289
+ results.push(`unknown: ${key}`);
290
+ }
291
+ }
292
+ }
293
+ ctx.ui.notify(`Canary: ${results.join(", ")}`, "info");
294
+ return;
295
+ }
296
+
297
+ ctx.ui.notify(
298
+ [
299
+ "Canary status",
300
+ ` phase: ${phase}`,
301
+ ` tokens per turn: ${cfg.COUNT}`,
302
+ ` position: ${cfg.POSITION}`,
303
+ ` variant: ${cfg.VARIANT}`,
304
+ ` compact after N fails: ${cfg.FAIL_COMPACT === 0 ? "disabled" : cfg.FAIL_COMPACT}`,
305
+ ` consecutive failures: ${consecutiveFailures}`,
306
+ "",
307
+ " config (/set = session only; edit canary.json for persistence):",
308
+ ` COUNT=${cfg.COUNT}`,
309
+ ` POSITION=${cfg.POSITION}`,
310
+ ` VARIANT=${cfg.VARIANT}`,
311
+ ` FAIL_COMPACT=${cfg.FAIL_COMPACT}`,
312
+ ].join("\n"),
313
+ "info"
314
+ );
315
+ },
316
+ });
317
+
318
+ pi.on("session_start", (_event, ctx) => {
319
+ if (ctx.hasUI) {
320
+ ctx.ui.notify(`Canary loaded — ${cfg.COUNT} token(s), position=${cfg.POSITION}, variant=${cfg.VARIANT}`, "info");
321
+ }
322
+ });
323
+ }
package/package.json ADDED
@@ -0,0 +1,10 @@
1
+ {
2
+ "name": "pi-canary",
3
+ "version": "1.0.0",
4
+ "description": "Pi extension: silently verifies agent context awareness every turn using hidden canary tokens. KV-cache friendly.",
5
+ "keywords": ["pi-package", "pi", "pi-coding-agent", "extension", "context-awareness", "canary", "safety", "verification", "local-llm"],
6
+ "license": "MIT",
7
+ "pi": {
8
+ "extensions": ["./extensions"]
9
+ }
10
+ }