@exfil/canary 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +387 -0
  3. package/SECURITY.md +50 -0
  4. package/dist/entities.d.ts +43 -0
  5. package/dist/entities.d.ts.map +1 -0
  6. package/dist/entities.js +218 -0
  7. package/dist/entities.js.map +1 -0
  8. package/dist/index.d.ts +14 -0
  9. package/dist/index.d.ts.map +1 -0
  10. package/dist/index.js +183 -0
  11. package/dist/index.js.map +1 -0
  12. package/dist/logger.d.ts +29 -0
  13. package/dist/logger.d.ts.map +1 -0
  14. package/dist/logger.js +50 -0
  15. package/dist/logger.js.map +1 -0
  16. package/dist/persistence.d.ts +48 -0
  17. package/dist/persistence.d.ts.map +1 -0
  18. package/dist/persistence.js +296 -0
  19. package/dist/persistence.js.map +1 -0
  20. package/dist/proxy/DownstreamManager.d.ts +55 -0
  21. package/dist/proxy/DownstreamManager.d.ts.map +1 -0
  22. package/dist/proxy/DownstreamManager.js +110 -0
  23. package/dist/proxy/DownstreamManager.js.map +1 -0
  24. package/dist/proxy/ProxyServer.d.ts +60 -0
  25. package/dist/proxy/ProxyServer.d.ts.map +1 -0
  26. package/dist/proxy/ProxyServer.js +480 -0
  27. package/dist/proxy/ProxyServer.js.map +1 -0
  28. package/dist/proxy/auditor/DualAuditor.d.ts +27 -0
  29. package/dist/proxy/auditor/DualAuditor.d.ts.map +1 -0
  30. package/dist/proxy/auditor/DualAuditor.js +44 -0
  31. package/dist/proxy/auditor/DualAuditor.js.map +1 -0
  32. package/dist/proxy/auditor/LLMAuditor.d.ts +16 -0
  33. package/dist/proxy/auditor/LLMAuditor.d.ts.map +1 -0
  34. package/dist/proxy/auditor/LLMAuditor.js +221 -0
  35. package/dist/proxy/auditor/LLMAuditor.js.map +1 -0
  36. package/dist/proxy/auditor/types.d.ts +54 -0
  37. package/dist/proxy/auditor/types.d.ts.map +1 -0
  38. package/dist/proxy/auditor/types.js +11 -0
  39. package/dist/proxy/auditor/types.js.map +1 -0
  40. package/dist/proxy/types.d.ts +71 -0
  41. package/dist/proxy/types.d.ts.map +1 -0
  42. package/dist/proxy/types.js +8 -0
  43. package/dist/proxy/types.js.map +1 -0
  44. package/dist/scanner.d.ts +37 -0
  45. package/dist/scanner.d.ts.map +1 -0
  46. package/dist/scanner.js +57 -0
  47. package/dist/scanner.js.map +1 -0
  48. package/dist/server.d.ts +59 -0
  49. package/dist/server.d.ts.map +1 -0
  50. package/dist/server.js +711 -0
  51. package/dist/server.js.map +1 -0
  52. package/dist/simhash.d.ts +65 -0
  53. package/dist/simhash.d.ts.map +1 -0
  54. package/dist/simhash.js +151 -0
  55. package/dist/simhash.js.map +1 -0
  56. package/dist/state.d.ts +86 -0
  57. package/dist/state.d.ts.map +1 -0
  58. package/dist/state.js +136 -0
  59. package/dist/state.js.map +1 -0
  60. package/dist/token.d.ts +70 -0
  61. package/dist/token.d.ts.map +1 -0
  62. package/dist/token.js +146 -0
  63. package/dist/token.js.map +1 -0
  64. package/dist/types.d.ts +190 -0
  65. package/dist/types.d.ts.map +1 -0
  66. package/dist/types.js +12 -0
  67. package/dist/types.js.map +1 -0
  68. package/package.json +52 -0
  69. package/proxy.example.json +53 -0
package/dist/token.js ADDED
@@ -0,0 +1,146 @@
1
+ /**
2
+ * Token generation, embedding, and detection utilities.
3
+ *
4
+ * Tokens are composed of invisible Unicode characters:
5
+ * U+2060 WORD JOINER (anchor) +
6
+ * 40 × Variation Selector characters (U+FE00–U+FE0F, 4 bits each) +
7
+ * U+2060 WORD JOINER (anchor)
8
+ *
9
+ * This gives 160 bits of entropy per token (40 nibbles × 4 bits).
10
+ * The token sequence is NEVER returned in tool outputs.
11
+ */
12
+ import { randomBytes, randomInt } from 'crypto';
13
+ /** U+2060 WORD JOINER — invisible, zero-width, not a space. */
14
+ const ANCHOR = '\u2060';
15
+ /** Base code point for Variation Selectors (U+FE00). */
16
+ const VS_BASE = 0xfe00;
17
+ /**
18
+ * Generates a fresh invisible Unicode canary sequence.
19
+ *
20
+ * Uses 20 crypto-random bytes: each byte is split into two 4-bit nibbles,
21
+ * each nibble maps to one Variation Selector character (U+FE00–U+FE0F).
22
+ *
23
+ * @returns 42-character string (anchor + 40 VS chars + anchor). NEVER expose.
24
+ */
25
+ export function generateTokenSequence() {
26
+ const bytes = randomBytes(20);
27
+ let seq = ANCHOR;
28
+ for (const byte of bytes) {
29
+ const hi = (byte >> 4) & 0x0f;
30
+ const lo = byte & 0x0f;
31
+ seq += String.fromCodePoint(VS_BASE + hi);
32
+ seq += String.fromCodePoint(VS_BASE + lo);
33
+ }
34
+ seq += ANCHOR;
35
+ return seq;
36
+ }
37
+ /**
38
+ * Generates an opaque token identifier (32 lowercase hex chars).
39
+ *
40
+ * @returns 32-char lowercase hex string.
41
+ */
42
+ export function generateTokenId() {
43
+ return randomBytes(16).toString('hex');
44
+ }
45
+ /**
46
+ * Embeds a canary sequence into `content` at the specified position.
47
+ *
48
+ * - 'prefix': sequence prepended before all content.
49
+ * - 'suffix': sequence appended after all content.
50
+ * - 'both': sequence prepended AND appended.
51
+ * - 'random_word_boundary': sequence inserted at a random word boundary
52
+ * (space or start/end); falls back to prefix when
53
+ * no whitespace is found.
54
+ *
55
+ * @param content The original content string.
56
+ * @param sequence The generated canary sequence (invisible Unicode).
57
+ * @param position Where to embed.
58
+ * @returns Content with the canary sequence embedded.
59
+ */
60
+ export function embedToken(content, sequence, position) {
61
+ switch (position) {
62
+ case 'prefix':
63
+ return sequence + content;
64
+ case 'suffix':
65
+ return content + sequence;
66
+ case 'both':
67
+ return sequence + content + sequence;
68
+ case 'random_word_boundary': {
69
+ // Find all word boundaries (positions just before a space or at start/end).
70
+ const boundaries = [0];
71
+ for (let i = 0; i < content.length; i++) {
72
+ if (content[i] === ' ' || content[i] === '\n' || content[i] === '\t') {
73
+ boundaries.push(i + 1);
74
+ }
75
+ }
76
+ boundaries.push(content.length);
77
+ if (boundaries.length <= 2) {
78
+ // No meaningful interior boundary — fall back to prefix.
79
+ return sequence + content;
80
+ }
81
+ // Pick a random interior boundary (exclude index 0 and last to keep
82
+ // it truly interior, but if only exterior ones exist, use index 0).
83
+ const pick = randomInt(1, boundaries.length - 1);
84
+ const insertAt = boundaries[pick];
85
+ return content.slice(0, insertAt) + sequence + content.slice(insertAt);
86
+ }
87
+ }
88
+ }
89
+ /**
90
+ * Builds a RegExp that matches `sequence` literally anywhere in a string.
91
+ * Each code point is escaped individually to be regex-safe.
92
+ *
93
+ * @param sequence The canary sequence to search for.
94
+ * @returns A global RegExp for the sequence.
95
+ */
96
+ export function buildTokenRegex(sequence) {
97
+ const escaped = [...sequence]
98
+ .map((ch) => escapeRegexChar(ch))
99
+ .join('');
100
+ return new RegExp(escaped, 'g');
101
+ }
102
+ /**
103
+ * Fast check: returns true if `content` contains `sequence`.
104
+ *
105
+ * Uses String.prototype.includes for O(n) performance before falling back
106
+ * to a regex scan — includes() is sufficient here since we own the sequence
107
+ * format and it contains no ambiguous surrogate pairs.
108
+ *
109
+ * @param content The string to search within.
110
+ * @param sequence The canary sequence to find.
111
+ */
112
+ export function containsSequence(content, sequence) {
113
+ return content.includes(sequence);
114
+ }
115
+ /**
116
+ * Counts the number of non-overlapping occurrences of `sequence` in `content`.
117
+ *
118
+ * @param content The string to search within.
119
+ * @param sequence The canary sequence to count.
120
+ */
121
+ export function countOccurrences(content, sequence) {
122
+ const regex = buildTokenRegex(sequence);
123
+ const matches = content.match(regex);
124
+ return matches ? matches.length : 0;
125
+ }
126
+ // ---------------------------------------------------------------------------
127
+ // Internal helpers
128
+ // ---------------------------------------------------------------------------
129
+ /**
130
+ * Escapes a single character for use inside a RegExp pattern.
131
+ * Handles Unicode code points above U+FFFF via surrogate pairs.
132
+ */
133
+ function escapeRegexChar(ch) {
134
+ // Characters that have special meaning in regex syntax.
135
+ const SPECIAL = /[.*+?^${}()|[\]\\]/;
136
+ if (SPECIAL.test(ch)) {
137
+ return `\\${ch}`;
138
+ }
139
+ const cp = ch.codePointAt(0);
140
+ if (cp > 0xffff) {
141
+ // Surrogate pair range — use Unicode escape sequence.
142
+ return `\\u{${cp.toString(16)}}`;
143
+ }
144
+ return ch;
145
+ }
146
+ //# sourceMappingURL=token.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"token.js","sourceRoot":"","sources":["../src/token.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AAGhD,+DAA+D;AAC/D,MAAM,MAAM,GAAG,QAAQ,CAAC;AAExB,wDAAwD;AACxD,MAAM,OAAO,GAAG,MAAM,CAAC;AAEvB;;;;;;;GAOG;AACH,MAAM,UAAU,qBAAqB;IACnC,MAAM,KAAK,GAAG,WAAW,CAAC,EAAE,CAAC,CAAC;IAC9B,IAAI,GAAG,GAAG,MAAM,CAAC;IACjB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,EAAE,GAAG,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC;QAC9B,MAAM,EAAE,GAAG,IAAI,GAAG,IAAI,CAAC;QACvB,GAAG,IAAI,MAAM,CAAC,aAAa,CAAC,OAAO,GAAG,EAAE,CAAC,CAAC;QAC1C,GAAG,IAAI,MAAM,CAAC,aAAa,CAAC,OAAO,GAAG,EAAE,CAAC,CAAC;IAC5C,CAAC;IACD,GAAG,IAAI,MAAM,CAAC;IACd,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,eAAe;IAC7B,OAAO,WAAW,CAAC,EAAE,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;AACzC,CAAC;AAED;;;;;;;;;;;;;;GAcG;AACH,MAAM,UAAU,UAAU,CACxB,OAAe,EACf,QAAgB,EAChB,QAAuB;IAEvB,QAAQ,QAAQ,EAAE,CAAC;QACjB,KAAK,QAAQ;YACX,OAAO,QAAQ,GAAG,OAAO,CAAC;QAE5B,KAAK,QAAQ;YACX,OAAO,OAAO,GAAG,QAAQ,CAAC;QAE5B,KAAK,MAAM;YACT,OAAO,QAAQ,GAAG,OAAO,GAAG,QAAQ,CAAC;QAEvC,KAAK,sBAAsB,CAAC,CAAC,CAAC;YAC5B,4EAA4E;YAC5E,MAAM,UAAU,GAAa,CAAC,CAAC,CAAC,CAAC;YACjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACxC,IAAI,OAAO,CAAC,CAAC,CAAC,KAAK,GAAG,IAAI,OAAO,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,OAAO,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;oBACrE,UAAU,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;gBACzB,CAAC;YACH,CAAC;YACD,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;YAEhC,IAAI,UAAU,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;gBAC3B,yDAAyD;gBACzD,OAAO,QAAQ,GAAG,OAAO,CAAC;YAC5B,CAAC;YAED,oEAAoE;YACpE,oEAAoE;YACpE,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,EAAE,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;YACjD,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;YAClC,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,GAAG,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QACzE,CAAC;IACH,CAAC;AACH,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,eAAe,CAAC,QAAgB;IAC9C,MAAM,OAAO,GAAG,CAAC,GAAG,QAAQ,CAAC;SAC1B,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,eAAe,CAAC,EAAE,CAAC,CAAC;SAChC,IAAI,CAAC,EAAE,CAAC,CAAC;IACZ,OAAO,IAAI,MAAM,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;AAClC,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,gBAAgB,CAAC,OAAe,EAAE,QAAgB;IAChE,OAAO,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;AACpC,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,gBAAgB,CAAC,OAAe,EAAE,QAAgB;IAChE,MAAM,KAAK,GAAG,eAAe,CAAC,QAAQ,CAAC,CAAC;IACxC,MAAM,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACrC,OAAO,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;AACtC,CAAC;AAED,8EAA8E;AAC9E,mBAAmB;AACnB,8EAA8E;AAE9E;;;GAGG;AACH,SAAS,eAAe,CAAC,EAAU;IACjC,wDAAwD;IACxD,MAAM,OAAO,GAAG,oBAAoB,CAAC;IACrC,IAAI,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC;QACrB,OAAO,KAAK,EAAE,EAAE,CAAC;IACnB,CAAC;IACD,MAAM,EAAE,GAAG,EAAE,CAAC,WAAW,CAAC,CAAC,CAAE,CAAC;IAC9B,IAAI,EAAE,GAAG,MAAM,EAAE,CAAC;QAChB,sDAAsD;QACtD,OAAO,OAAO,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,GAAG,CAAC;IACnC,CAAC;IACD,OAAO,EAAE,CAAC;AACZ,CAAC"}
@@ -0,0 +1,190 @@
1
+ /**
2
+ * Core type definitions for exfil/canary.
3
+ *
4
+ * SECURITY NOTE: The `sequence` field of CanaryToken contains raw Unicode
5
+ * variation-selector characters and MUST NEVER appear in any tool output,
6
+ * log line, or persisted plaintext. Every other field is safe to expose.
7
+ *
8
+ * SECURITY NOTE (v1.1): The `value` field of EntityCanary contains the
9
+ * actual extracted secret/entity. Treat with the same sensitivity as `sequence`.
10
+ */
11
+ /** Where the original content came from. */
12
+ export type SourceType = 'tool_result' | 'file_read' | 'api_response' | 'database_row' | 'user_message' | 'other';
13
+ /** Where within the content the token sequence is embedded. */
14
+ export type EmbedPosition = 'prefix' | 'suffix' | 'both' | 'random_word_boundary';
15
+ /** Lifecycle status of a canary token. */
16
+ export type TokenStatus = 'active' | 'expired' | 'unknown';
17
+ /** What the server does when leakage is detected. */
18
+ export type ResponseMode = 'log' | 'halt' | 'alert';
19
+ /** What action was actually taken when a leakage event was recorded. */
20
+ export type ActionTaken = 'logged' | 'halted' | 'alerted' | 'none';
21
+ /** Which tool surface detected the token. */
22
+ export type DetectionMethod = 'check_leakage' | 'scan_outbound';
23
+ /** Minimum severity for log output. */
24
+ export type LogLevel = 'debug' | 'info' | 'warn' | 'error';
25
+ /** Classification of an extracted named entity. */
26
+ export type EntityType = 'api_key' | 'bearer_token' | 'credential_pair' | 'email' | 'url' | 'ip_address' | 'uuid' | 'high_entropy_string';
27
+ /**
28
+ * A named entity extracted from content at wrap time.
29
+ *
30
+ * SECURITY: `value` is the actual extracted string (could be a real secret).
31
+ * Treat identically to `sequence` — never log, never return in tool output,
32
+ * never persist in plaintext.
33
+ */
34
+ export interface EntityCanary {
35
+ entity_type: EntityType;
36
+ /**
37
+ * The extracted entity value. SENSITIVE. NEVER expose in tool outputs or logs.
38
+ * null after server restart (RC-1 — not persisted; cannot re-detect).
39
+ */
40
+ value: string | null;
41
+ /**
42
+ * Surrounding context with the value redacted to `[ENTITY]`.
43
+ * Safe to include in reports and logs.
44
+ */
45
+ context_hint: string;
46
+ }
47
+ /**
48
+ * Persisted form of an EntityCanary — value intentionally omitted.
49
+ * Retained for reporting/audit after a restart, but cannot re-detect.
50
+ */
51
+ export interface PersistedEntityCanary {
52
+ entity_type: EntityType;
53
+ context_hint: string;
54
+ }
55
+ /** Internal result from scanning outbound data for a single entity canary. */
56
+ export interface EntityScanResult {
57
+ token_id: string;
58
+ entity_type: EntityType;
59
+ /** context_hint from the matched entity — safe to include in leakage events. */
60
+ context_hint: string;
61
+ }
62
+ /**
63
+ * A canary token embedded into agent-processed content.
64
+ *
65
+ * SECURITY: `sequence` is the actual Unicode payload. It MUST NOT be
66
+ * serialised into tool responses or log output.
67
+ */
68
+ export interface CanaryToken {
69
+ /** 32-char lowercase hex string; safe to expose in operator tooling. */
70
+ token_id: string;
71
+ /**
72
+ * Raw Unicode string (WORD JOINER + 40 Variation Selectors + WORD JOINER).
73
+ * NEVER expose in tool outputs or logs.
74
+ */
75
+ sequence: string;
76
+ source_type: SourceType;
77
+ source_server?: string;
78
+ source_tool?: string;
79
+ source_call_id?: string;
80
+ embed_position: EmbedPosition;
81
+ /** Unix epoch milliseconds. */
82
+ created_at: number;
83
+ /** Unix epoch milliseconds. */
84
+ expires_at: number;
85
+ leaked: boolean;
86
+ leakage_event_ids: string[];
87
+ /**
88
+ * v1.1: Named entities extracted from the wrapped content.
89
+ * Values are sensitive — never expose. Empty array when entity extraction
90
+ * is disabled or no entities were found.
91
+ */
92
+ entity_canaries: EntityCanary[];
93
+ /**
94
+ * v1.6: SimHash fingerprint of the wrapped content (64-bit, in-memory only).
95
+ * null when content was too short for reliable fingerprinting (< 5 bigrams).
96
+ * BigInt is not JSON-serialisable — intentionally excluded from persistence (RC-1).
97
+ */
98
+ simhash: bigint | null;
99
+ }
100
+ /** A single detected leakage incident. */
101
+ export interface LeakageEvent {
102
+ /** UUID-style hex identifier. */
103
+ event_id: string;
104
+ token_id: string;
105
+ /** Unix epoch milliseconds. */
106
+ detected_at: number;
107
+ detection_method: DetectionMethod;
108
+ target_server?: string;
109
+ target_tool?: string;
110
+ target_call_id?: string;
111
+ turn_number?: number;
112
+ action_taken: ActionTaken;
113
+ webhook_attempted: boolean;
114
+ webhook_delivered: boolean | null;
115
+ }
116
+ /** In-memory session state. Never serialised in full (sequence omitted). */
117
+ export interface SessionState {
118
+ session_id: string;
119
+ /** Unix epoch milliseconds. */
120
+ created_at: number;
121
+ tokens: Map<string, CanaryToken>;
122
+ leakage_events: Map<string, LeakageEvent>;
123
+ /** Monotonically increasing counter used to derive token IDs. */
124
+ token_counter: number;
125
+ }
126
+ /** Validated configuration derived from environment variables at startup. */
127
+ export interface CanaryConfig {
128
+ persist_path: string | null;
129
+ /** Seconds; range 60–86400. */
130
+ token_ttl_seconds: number;
131
+ /** Validated HTTPS URL, or null. */
132
+ alert_webhook: string | null;
133
+ /** Optional HMAC secret for webhook signing. */
134
+ webhook_secret: string | null;
135
+ response_mode: ResponseMode;
136
+ log_level: LogLevel;
137
+ }
138
+ /**
139
+ * Result of a single-token scan operation.
140
+ * Internal use only — NOT returned to agents.
141
+ */
142
+ export interface ScanResult {
143
+ token_id: string;
144
+ found: boolean;
145
+ match_count: number;
146
+ }
147
+ /**
148
+ * The safe shape returned to the agent by scan_outbound (RC-3).
149
+ * Contains NO token identifiers, sequences, or entity values.
150
+ */
151
+ export interface ScanOutboundAgentResult {
152
+ clean: boolean;
153
+ tokens_scanned: number;
154
+ scan_duration_ms: number;
155
+ /** Count of unicode-marker leakage detections (v1.0). */
156
+ leakage_count: number;
157
+ /** Count of named-entity leakage detections (v1.1). */
158
+ entity_leakage_count: number;
159
+ /** Count of SimHash semantic-similarity leakage detections (v1.6). */
160
+ semantic_leakage_count: number;
161
+ }
162
+ /**
163
+ * Persisted representation of a CanaryToken — sequence is intentionally
164
+ * omitted (RC-1). After a restart, existing tokens are tracked by ID/metadata
165
+ * but cannot re-detect the embedded sequence. This is a documented limitation.
166
+ */
167
+ export interface PersistedToken {
168
+ token_id: string;
169
+ source_type: SourceType;
170
+ source_server?: string;
171
+ source_tool?: string;
172
+ source_call_id?: string;
173
+ embed_position: EmbedPosition;
174
+ created_at: number;
175
+ expires_at: number;
176
+ leaked: boolean;
177
+ leakage_event_ids: string[];
178
+ /** v1.1: Entity metadata (values omitted — same RC-1 treatment as sequence). */
179
+ entity_canaries: PersistedEntityCanary[];
180
+ }
181
+ /** Full on-disk representation of session state. */
182
+ export interface PersistedState {
183
+ version: number;
184
+ session_id: string;
185
+ created_at: number;
186
+ token_counter: number;
187
+ tokens: PersistedToken[];
188
+ leakage_events: LeakageEvent[];
189
+ }
190
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,4CAA4C;AAC5C,MAAM,MAAM,UAAU,GAClB,aAAa,GACb,WAAW,GACX,cAAc,GACd,cAAc,GACd,cAAc,GACd,OAAO,CAAC;AAEZ,+DAA+D;AAC/D,MAAM,MAAM,aAAa,GAAG,QAAQ,GAAG,QAAQ,GAAG,MAAM,GAAG,sBAAsB,CAAC;AAElF,0CAA0C;AAC1C,MAAM,MAAM,WAAW,GAAG,QAAQ,GAAG,SAAS,GAAG,SAAS,CAAC;AAE3D,qDAAqD;AACrD,MAAM,MAAM,YAAY,GAAG,KAAK,GAAG,MAAM,GAAG,OAAO,CAAC;AAEpD,wEAAwE;AACxE,MAAM,MAAM,WAAW,GAAG,QAAQ,GAAG,QAAQ,GAAG,SAAS,GAAG,MAAM,CAAC;AAEnE,6CAA6C;AAC7C,MAAM,MAAM,eAAe,GAAG,eAAe,GAAG,eAAe,CAAC;AAEhE,uCAAuC;AACvC,MAAM,MAAM,QAAQ,GAAG,OAAO,GAAG,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC;AAI3D,mDAAmD;AACnD,MAAM,MAAM,UAAU,GAClB,SAAS,GACT,cAAc,GACd,iBAAiB,GACjB,OAAO,GACP,KAAK,GACL,YAAY,GACZ,MAAM,GACN,qBAAqB,CAAC;AAE1B;;;;;;GAMG;AACH,MAAM,WAAW,YAAY;IAC3B,WAAW,EAAE,UAAU,CAAC;IACxB;;;OAGG;IACH,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB;;;OAGG;IACH,YAAY,EAAE,MAAM,CAAC;CACtB;AAED;;;GAGG;AACH,MAAM,WAAW,qBAAqB;IACpC,WAAW,EAAE,UAAU,CAAC;IACxB,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,8EAA8E;AAC9E,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,UAAU,CAAC;IACxB,gFAAgF;IAChF,YAAY,EAAE,MAAM,CAAC;CACtB;AAED;;;;;GAKG;AACH,MAAM,WAAW,WAAW;IAC1B,wEAAwE;IACxE,QAAQ,EAAE,MAAM,CAAC;IACjB;;;OAGG;IACH,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,UAAU,CAAC;IACxB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,aAAa,CAAC;IAC9B,+BAA+B;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,+BAA+B;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,OAAO,CAAC;IAChB,iBAAiB,EAAE,MAAM,EAAE,CAAC;IAC5B;;;;OAIG;IACH,eAAe,EAAE,YAAY,EAAE,CAAC;IAChC;;;;OAIG;IACH,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;CACxB;AAED,0CAA0C;AAC1C,MAAM,WAAW,YAAY;IAC3B,iCAAiC;IACjC,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,+BAA+B;IAC/B,WAAW,EAAE,MAAM,CAAC;IACpB,gBAAgB,EAAE,eAAe,CAAC;IAClC,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,WAAW,CAAC;IAC1B,iBAAiB,EAAE,OAAO,CAAC;IAC3B,iBAAiB,EAAE,OAAO,GAAG,IAAI,CAAC;CACnC;AAED,4EAA4E;AAC5E,MAAM,WAAW,YAAY;IAC3B,UAAU,EAAE,MAAM,CAAC;IACnB,+BAA+B;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;IACjC,cAAc,EAAE,GAAG,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;IAC1C,iEAAiE;IACjE,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,6EAA6E;AAC7E,MAAM,WAAW,YAAY;IAC3B,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,+BAA+B;IAC/B,iBAAiB,EAAE,MAAM,CAAC;IAC1B,oCAAoC;IACpC,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,gDAAgD;IAChD,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,aAAa,EAAE,YAAY,CAAC;IAC5B,SAAS,EAAE,QAAQ,CAAC;CACrB;AAED;;;GAGG;AACH,MAAM,WAAW,UAAU;IACzB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,OAAO,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;CACrB;AAED;;;GAGG;AACH,MAAM,WAAW,uBAAuB;IACtC,KAAK,EAAE,OAAO,CAAC;IACf,cAAc,EAAE,MAAM,CAAC;IACvB,gBAAgB,EAAE,MAAM,CAAC;IACzB,yDAAyD;IACzD,aAAa,EAAE,MAAM,CAAC;IACtB,uDAAuD;IACvD,oBAAoB,EAAE,MAAM,CAAC;IAC7B,sEAAsE;IACtE,sBAAsB,EAAE,MAAM,CAAC;CAChC;AAED;;;;GAIG;AACH,MAAM,WAAW,cAAc;IAC7B,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,UAAU,CAAC;IACxB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,aAAa,CAAC;IAC9B,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,OAAO,CAAC;IAChB,iBAAiB,EAAE,MAAM,EAAE,CAAC;IAC5B,gFAAgF;IAChF,eAAe,EAAE,qBAAqB,EAAE,CAAC;CAC1C;AAED,oDAAoD;AACpD,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,MAAM,EAAE,cAAc,EAAE,CAAC;IACzB,cAAc,EAAE,YAAY,EAAE,CAAC;CAChC"}
package/dist/types.js ADDED
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Core type definitions for exfil/canary.
3
+ *
4
+ * SECURITY NOTE: The `sequence` field of CanaryToken contains raw Unicode
5
+ * variation-selector characters and MUST NEVER appear in any tool output,
6
+ * log line, or persisted plaintext. Every other field is safe to expose.
7
+ *
8
+ * SECURITY NOTE (v1.1): The `value` field of EntityCanary contains the
9
+ * actual extracted secret/entity. Treat with the same sensitivity as `sequence`.
10
+ */
11
+ export {};
12
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG"}
package/package.json ADDED
@@ -0,0 +1,52 @@
1
+ {
2
+ "name": "@exfil/canary",
3
+ "version": "1.0.0",
4
+ "description": "Transparent MCP proxy that watermarks agent tool responses and blocks data exfiltration caused by prompt injection.",
5
+ "license": "MIT",
6
+ "publishConfig": {
7
+ "access": "public"
8
+ },
9
+ "type": "module",
10
+ "main": "dist/index.js",
11
+ "bin": {
12
+ "exfil-canary": "dist/index.js"
13
+ },
14
+ "repository": {
15
+ "type": "git",
16
+ "url": "https://github.com/exfil-hq/canary.git"
17
+ },
18
+ "homepage": "https://github.com/exfil-hq/canary#readme",
19
+ "files": [
20
+ "dist",
21
+ "README.md",
22
+ "SECURITY.md",
23
+ "LICENSE",
24
+ "proxy.example.json"
25
+ ],
26
+ "scripts": {
27
+ "build": "tsc",
28
+ "start": "node dist/index.js",
29
+ "dev": "tsc --watch",
30
+ "test": "vitest run",
31
+ "test:watch": "vitest"
32
+ },
33
+ "dependencies": {
34
+ "@modelcontextprotocol/sdk": "^1.0.0"
35
+ },
36
+ "devDependencies": {
37
+ "@types/node": "^20.19.37",
38
+ "typescript": "^5.4.0",
39
+ "vitest": "^2.1.9"
40
+ },
41
+ "engines": {
42
+ "node": ">=18.0.0"
43
+ },
44
+ "keywords": [
45
+ "mcp",
46
+ "model-context-protocol",
47
+ "security",
48
+ "prompt-injection",
49
+ "canary-token",
50
+ "data-exfiltration"
51
+ ]
52
+ }
@@ -0,0 +1,53 @@
1
+ {
2
+ "servers": [
3
+ {
4
+ "id": "filesystem",
5
+ "command": "npx",
6
+ "args": ["-y", "@modelcontextprotocol/server-filesystem", "/path/to/your/working/dir"]
7
+ },
8
+ {
9
+ "id": "web",
10
+ "command": "npx",
11
+ "args": ["-y", "@modelcontextprotocol/server-fetch"]
12
+ },
13
+ {
14
+ "id": "github",
15
+ "command": "npx",
16
+ "args": ["-y", "@modelcontextprotocol/server-github"],
17
+ "env": {
18
+ "GITHUB_PERSONAL_ACCESS_TOKEN": "your-token-here"
19
+ }
20
+ }
21
+ ],
22
+ "_comment_auditors": "Optional dual-LLM auditor. Remove this block to disable. Requires two entries with different providers.",
23
+ "auditors": [
24
+ {
25
+ "provider": "anthropic",
26
+ "model": "claude-haiku-4-5-20251001",
27
+ "api_key_env": "ANTHROPIC_API_KEY",
28
+ "timeout_ms": 5000
29
+ },
30
+ {
31
+ "provider": "openai",
32
+ "model": "gpt-4o-mini",
33
+ "api_key_env": "OPENAI_API_KEY",
34
+ "timeout_ms": 5000
35
+ }
36
+ ],
37
+ "audit_timeout_action": "block",
38
+
39
+ "_comment_allowed_domains": "Fail-closed. Absent or [] = all outbound URLs blocked. List every domain your agent legitimately calls.",
40
+ "allowed_domains": [
41
+ "api.github.com",
42
+ "*.githubusercontent.com",
43
+ "registry.npmjs.org",
44
+ "api.openai.com"
45
+ ],
46
+
47
+ "_comment_allowed_tools": "Optional tool allowlist. Absent or [] = all tools allowed. Non-empty = only listed tools callable. Entries ending with * are prefix wildcards (e.g. filesystem__* allows all filesystem tools).",
48
+ "allowed_tools": [
49
+ "filesystem__*",
50
+ "web__fetch",
51
+ "github__create_issue"
52
+ ]
53
+ }