@sandrobuilds/tracerney 0.9.11 → 0.9.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -710
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,740 +1,54 @@
|
|
|
1
1
|
# Tracerney
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Lightweight prompt injection detection for LLM applications. Runs 100% locally.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
[](https://opensource.org/licenses/MIT)
|
|
5
|
+
## Install
|
|
7
6
|
|
|
8
|
-
Tracerney is a lightweight, free SDK for detecting prompt injection attacks. It runs 100% locally with no dependencies and no data collection.
|
|
9
|
-
|
|
10
|
-
**Free SDK includes:**
|
|
11
|
-
- **Layer 1 (Pattern Detection)**: 238 embedded attack patterns with Unicode normalization
|
|
12
|
-
- <2ms detection latency per prompt
|
|
13
|
-
- Zero network overhead — all detection is local
|
|
14
|
-
- Works offline — no backend required
|
|
15
|
-
|
|
16
|
-
## 🚀 Quick Start
|
|
17
|
-
|
|
18
|
-
**Install:**
|
|
19
7
|
```bash
|
|
20
8
|
npm install @sandrobuilds/tracerney
|
|
21
9
|
```
|
|
22
10
|
|
|
23
|
-
**Simplest Setup (Free SDK — Pattern Detection):**
|
|
24
|
-
```typescript
|
|
25
|
-
import { Tracerney } from '@sandrobuilds/tracerney';
|
|
26
|
-
|
|
27
|
-
const tracer = new Tracerney({
|
|
28
|
-
allowedTools: ['search', 'calculator'],
|
|
29
|
-
});
|
|
30
|
-
|
|
31
|
-
// Check if a prompt is suspicious
|
|
32
|
-
const result = await tracer.scanPrompt(userInput);
|
|
33
|
-
|
|
34
|
-
console.log(result);
|
|
35
|
-
// {
|
|
36
|
-
// suspicious: true, // Layer 1 detected pattern match
|
|
37
|
-
// patternName: "Ignore Instructions",
|
|
38
|
-
// severity: "CRITICAL",
|
|
39
|
-
// blocked: false // No backend verification yet
|
|
40
|
-
// }
|
|
41
|
-
|
|
42
|
-
if (result.suspicious) {
|
|
43
|
-
console.log(`⚠️ Suspicious: ${result.patternName}`);
|
|
44
|
-
// Handle the suspicious prompt (log, rate-limit, etc.)
|
|
45
|
-
}
|
|
46
|
-
```
|
|
47
|
-
|
|
48
|
-
**Advanced Setup (with Backend LLM Verification):**
|
|
49
|
-
```typescript
|
|
50
|
-
import { Tracerney, ShieldBlockError } from '@sandrobuilds/tracerney';
|
|
51
|
-
|
|
52
|
-
const shield = new Tracerney({
|
|
53
|
-
baseUrl: 'http://localhost:3000', // Backend with LLM Sentinel
|
|
54
|
-
allowedTools: ['search', 'calculator'],
|
|
55
|
-
apiKey: process.env.TRACERNY_API_KEY,
|
|
56
|
-
});
|
|
57
|
-
|
|
58
|
-
try {
|
|
59
|
-
const response = await shield.wrap(() =>
|
|
60
|
-
openai.chat.completions.create({
|
|
61
|
-
model: 'gpt-4',
|
|
62
|
-
messages: [{ role: 'user', content: userInput }],
|
|
63
|
-
})
|
|
64
|
-
);
|
|
65
|
-
console.log(response);
|
|
66
|
-
} catch (error) {
|
|
67
|
-
if (error instanceof ShieldBlockError) {
|
|
68
|
-
console.error('🛡️ Attack blocked:', error.event.blockReason);
|
|
69
|
-
// Only throws if LLM Sentinel confirms attack
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
```
|
|
73
|
-
|
|
74
|
-
Start with the simple setup. Add backend verification when ready!
|
|
75
|
-
|
|
76
|
-
## Philosophy
|
|
77
|
-
|
|
78
|
-
We are not a "platform" where you send data to our servers first. We are a **Runtime Sentinel** that lives inside your process:
|
|
79
|
-
|
|
80
|
-
- **🚀 No latency friction**: Detection happens locally in <2ms
|
|
81
|
-
- **🔒 No privacy concerns**: Your prompts never leave your infrastructure
|
|
82
|
-
- **📦 3 lines of code**: Wrap your existing LLM call
|
|
83
|
-
- **👨💻 Developer-first**: Fast integration, zero configuration needed
|
|
84
|
-
|
|
85
|
-
---
|
|
86
|
-
|
|
87
|
-
## Common Setups
|
|
88
|
-
|
|
89
|
-
### Next.js (API Route)
|
|
90
|
-
|
|
91
|
-
```typescript
|
|
92
|
-
// app/api/chat/route.ts
|
|
93
|
-
import { Tracerney, ShieldBlockError } from '@sandrobuilds/tracerney';
|
|
94
|
-
import OpenAI from 'openai';
|
|
95
|
-
|
|
96
|
-
const openai = new OpenAI();
|
|
97
|
-
|
|
98
|
-
export async function POST(req: Request) {
|
|
99
|
-
const { userMessage } = await req.json();
|
|
100
|
-
|
|
101
|
-
try {
|
|
102
|
-
const response = await shield.wrap(
|
|
103
|
-
() => openai.chat.completions.create({
|
|
104
|
-
model: 'gpt-4',
|
|
105
|
-
messages: [{ role: 'user', content: userMessage }],
|
|
106
|
-
}),
|
|
107
|
-
{ prompt: userMessage } // Pre-scan before LLM
|
|
108
|
-
);
|
|
109
|
-
|
|
110
|
-
return Response.json({ success: true, data: response });
|
|
111
|
-
} catch (error) {
|
|
112
|
-
if (error instanceof ShieldBlockError) {
|
|
113
|
-
return Response.json(
|
|
114
|
-
{ error: 'Request blocked for security' },
|
|
115
|
-
{ status: 403 }
|
|
116
|
-
);
|
|
117
|
-
}
|
|
118
|
-
throw error;
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
```
|
|
122
|
-
|
|
123
|
-
### Node.js / Express
|
|
124
|
-
|
|
125
|
-
```typescript
|
|
126
|
-
import { Tracerney, ShieldBlockError } from '@sandrobuilds/tracerney';
|
|
127
|
-
import OpenAI from 'openai';
|
|
128
|
-
|
|
129
|
-
const shield = new Tracerney({ allowedTools: ['search'] });
|
|
130
|
-
const openai = new OpenAI();
|
|
131
|
-
|
|
132
|
-
app.post('/chat', async (req, res) => {
|
|
133
|
-
try {
|
|
134
|
-
const response = await shield.wrap(() =>
|
|
135
|
-
openai.chat.completions.create({
|
|
136
|
-
model: 'gpt-4',
|
|
137
|
-
messages: [{ role: 'user', content: req.body.message }],
|
|
138
|
-
})
|
|
139
|
-
);
|
|
140
|
-
res.json(response);
|
|
141
|
-
} catch (error) {
|
|
142
|
-
if (error instanceof ShieldBlockError) {
|
|
143
|
-
return res.status(403).json({ error: 'Blocked' });
|
|
144
|
-
}
|
|
145
|
-
throw error;
|
|
146
|
-
}
|
|
147
|
-
});
|
|
148
|
-
```
|
|
149
|
-
|
|
150
|
-
### Minimal Setup (No Telemetry)
|
|
151
|
-
|
|
152
|
-
```typescript
|
|
153
|
-
// Just blocking, no monitoring
|
|
154
|
-
const shield = new Tracerney({
|
|
155
|
-
allowedTools: ['search'],
|
|
156
|
-
// No apiEndpoint = no telemetry
|
|
157
|
-
});
|
|
158
|
-
|
|
159
|
-
await shield.wrap(() => llmCall());
|
|
160
|
-
```
|
|
161
|
-
|
|
162
|
-
### With Monitoring Dashboard
|
|
163
|
-
|
|
164
|
-
```typescript
|
|
165
|
-
// Using baseUrl — all endpoints auto-configured
|
|
166
|
-
const shield = new Tracerney({
|
|
167
|
-
baseUrl: 'http://localhost:3000',
|
|
168
|
-
allowedTools: ['search', 'calculator'],
|
|
169
|
-
apiKey: process.env.TRACERNY_API_KEY,
|
|
170
|
-
enableTelemetry: true, // Events + patterns auto-configured
|
|
171
|
-
});
|
|
172
|
-
```
|
|
173
|
-
|
|
174
|
-
---
|
|
175
|
-
|
|
176
|
-
## Architecture
|
|
177
|
-
|
|
178
|
-
### The Request-Response Lifecycle
|
|
179
|
-
|
|
180
|
-
```
|
|
181
|
-
Request → Vanguard (Regex Check) → LLM Provider
|
|
182
|
-
↓
|
|
183
|
-
← Tool-Guard (Schema Check) ← Response
|
|
184
|
-
↓
|
|
185
|
-
App Logic (if clean) / Block (if dirty)
|
|
186
|
-
↓
|
|
187
|
-
Async Signal (Non-blocking Telemetry)
|
|
188
|
-
```
|
|
189
|
-
|
|
190
|
-
## Three-Layer Defense Architecture
|
|
191
|
-
|
|
192
|
-
### Layer 1: Vanguard (Pattern Matching)
|
|
193
|
-
**Speed**: <2ms | **Coverage**: Known attacks
|
|
194
|
-
|
|
195
|
-
Fast regex-based detection with Unicode normalization to prevent homoglyph evasion.
|
|
196
|
-
|
|
197
|
-
**Improved patterns detect:**
|
|
198
|
-
- `ignore [your|my] instructions` (not just "previous")
|
|
199
|
-
- `forget|disregard [all] [rules|guidelines]`
|
|
200
|
-
- `reveal|show system prompt`
|
|
201
|
-
- `act as unrestricted AI` (jailbreak)
|
|
202
|
-
- SQL/code injection
|
|
203
|
-
- Token smuggling
|
|
204
|
-
- **20+ OWASP-mapped patterns**
|
|
205
|
-
|
|
206
|
-
**Example: Homoglyph evasion blocked**
|
|
207
|
-
```
|
|
208
|
-
Input: "ignore your instructions" (fullwidth)
|
|
209
|
-
Normalized: "ignore your instructions"
|
|
210
|
-
Result: ✅ BLOCKED by Pattern_001
|
|
211
|
-
```
|
|
212
|
-
|
|
213
|
-
### Layer 2: Sentinel (LLM Verification)
|
|
214
|
-
**Speed**: 200-1500ms | **Coverage**: Novel attacks
|
|
215
|
-
|
|
216
|
-
Backend-side LLM classifier runs only if Layer 1 misses. Uses OpenRouter Gemini with:
|
|
217
|
-
- **Rate limiting**: 5 calls/min per API key (prevents botnet cost spikes)
|
|
218
|
-
- **Cost protection**: Only hits for suspicious prompts
|
|
219
|
-
- **Fallback**: Non-blocking—if backend unavailable, execution continues
|
|
220
|
-
|
|
221
|
-
**Example: Novel attack caught**
|
|
222
|
-
```
|
|
223
|
-
Input: "explain social engineering vectors for credential theft"
|
|
224
|
-
Layer 1: ❌ No regex match
|
|
225
|
-
Layer 2: ✅ LLM classifier: "YES, this is an injection"
|
|
226
|
-
Result: BLOCKED + logged to database
|
|
227
|
-
```
|
|
228
|
-
|
|
229
|
-
### Layer 3: Hardened Middleware
|
|
230
|
-
Automatic protections applied to all requests:
|
|
231
|
-
|
|
232
|
-
**Normalization**: Removes Unicode tricks before pattern matching
|
|
233
|
-
**Jitter**: Random 300-500ms delay masks which layer blocked the attack
|
|
234
|
-
**Rate Limiting**: Backend enforces 5 verifications/min per key
|
|
235
|
-
|
|
236
|
-
```typescript
|
|
237
|
-
// All automatic—no config needed
|
|
238
|
-
// Attacks from any layer see ~300-500ms latency
|
|
239
|
-
await shield.scanPrompt(userInput); // Returns in 300-500ms
|
|
240
|
-
// (attacker can't tell if Layer 1 or Layer 2 executed)
|
|
241
|
-
```
|
|
242
|
-
|
|
243
|
-
```typescript
|
|
244
|
-
// Optionally scan raw prompts before LLM call
|
|
245
|
-
try {
|
|
246
|
-
shield.scanPrompt(userInput);
|
|
247
|
-
// Safe to call LLM
|
|
248
|
-
} catch (err) {
|
|
249
|
-
if (err instanceof ShieldBlockError) {
|
|
250
|
-
console.error('Blocked:', err.event);
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
```
|
|
254
|
-
|
|
255
|
-
### 3. Signal Sink (Telemetry)
|
|
256
|
-
|
|
257
|
-
Asynchronous, non-blocking event reporting.
|
|
258
|
-
|
|
259
|
-
When a block occurs:
|
|
260
|
-
1. Event is queued in-memory
|
|
261
|
-
2. Execution continues (no latency penalty)
|
|
262
|
-
3. Events are batched and sent to your API in the background
|
|
263
|
-
4. Uses `process.nextTick()` for non-blocking dispatch
|
|
264
|
-
|
|
265
|
-
**No data sent to us**. You own your Signal endpoint (`/api/v1/signal`).
|
|
266
|
-
|
|
267
|
-
### 4. Manifest Sync (Definition Updates)
|
|
268
|
-
|
|
269
|
-
Your patterns don't require an npm update to change.
|
|
270
|
-
|
|
271
|
-
- **Static Manifest**: Shipped with the SDK
|
|
272
|
-
- **Polling**: On instantiation, checks for new version
|
|
273
|
-
- **Stale-While-Revalidate**: Serves cached version while fetching new one in background
|
|
274
|
-
- **Zero-Day Patches**: Deploy new patterns instantly without forcing users to update npm
|
|
275
|
-
|
|
276
11
|
## Usage
|
|
277
12
|
|
|
278
|
-
### Basic Setup
|
|
279
|
-
|
|
280
13
|
```typescript
|
|
281
14
|
import { Tracerney } from '@sandrobuilds/tracerney';
|
|
282
15
|
|
|
283
|
-
const
|
|
284
|
-
baseUrl: process.env.TRACERNY_BACKEND_URL, // e.g., https://myapp.com
|
|
285
|
-
allowedTools: ['search', 'calculator'],
|
|
286
|
-
apiKey: process.env.TRACERNY_API_KEY,
|
|
287
|
-
enableTelemetry: true,
|
|
288
|
-
});
|
|
289
|
-
|
|
290
|
-
// That's it. Wrap your LLM calls.
|
|
291
|
-
const response = await shield.wrap(() =>
|
|
292
|
-
openai.chat.completions.create({
|
|
293
|
-
model: 'gpt-4',
|
|
294
|
-
messages: [{ role: 'user', content: userInput }],
|
|
295
|
-
tools: [
|
|
296
|
-
{
|
|
297
|
-
type: 'function',
|
|
298
|
-
function: {
|
|
299
|
-
name: 'search',
|
|
300
|
-
description: 'Search the web',
|
|
301
|
-
},
|
|
302
|
-
},
|
|
303
|
-
],
|
|
304
|
-
})
|
|
305
|
-
);
|
|
306
|
-
```
|
|
307
|
-
|
|
308
|
-
### Error Handling
|
|
309
|
-
|
|
310
|
-
```typescript
|
|
311
|
-
import { Tracerney, ShieldBlockError } from '@sandrobuilds/tracerney';
|
|
312
|
-
|
|
313
|
-
try {
|
|
314
|
-
const response = await shield.wrap(() => llmCall());
|
|
315
|
-
} catch (err) {
|
|
316
|
-
if (err instanceof ShieldBlockError) {
|
|
317
|
-
// Security block - log and respond to user
|
|
318
|
-
console.error('Attack blocked:', err.event.blockReason);
|
|
319
|
-
res.status(403).json({ error: 'Request blocked' });
|
|
320
|
-
} else {
|
|
321
|
-
// Actual error from LLM or network
|
|
322
|
-
throw err;
|
|
323
|
-
}
|
|
324
|
-
}
|
|
325
|
-
```
|
|
326
|
-
|
|
327
|
-
### Scanning Prompts Pre-LLM
|
|
16
|
+
const tracer = new Tracerney();
|
|
328
17
|
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
```typescript
|
|
332
|
-
const result = await shield.scanPrompt(userInput);
|
|
18
|
+
const result = await tracer.scanPrompt(userInput);
|
|
333
19
|
|
|
334
20
|
if (result.suspicious) {
|
|
335
|
-
console.log(
|
|
336
|
-
|
|
337
|
-
// Log, rate-limit, or notify security team
|
|
338
|
-
|
|
339
|
-
if (result.blocked) {
|
|
340
|
-
// Only true if LLM Sentinel confirmed (requires backend)
|
|
341
|
-
return res.status(403).json({ error: 'Request blocked' });
|
|
342
|
-
}
|
|
21
|
+
console.log('⚠️ Suspicious:', result.patternName);
|
|
22
|
+
// Handle flagged prompt (log, block, rate-limit, etc.)
|
|
343
23
|
}
|
|
344
|
-
|
|
345
|
-
// Safe to call LLM
|
|
346
|
-
const response = await openai.chat.completions.create({...});
|
|
347
24
|
```
|
|
348
25
|
|
|
349
|
-
|
|
350
|
-
```typescript
|
|
351
|
-
interface ScanResult {
|
|
352
|
-
suspicious: boolean; // Layer 1 detected pattern match
|
|
353
|
-
patternName?: string; // e.g., "Ignore Instructions"
|
|
354
|
-
severity?: string; // "low" | "medium" | "high" | "critical"
|
|
355
|
-
blocked: boolean; // true only if LLM Sentinel confirmed (requires backend)
|
|
356
|
-
}
|
|
357
|
-
```
|
|
358
|
-
|
|
359
|
-
### Updating Allowed Tools
|
|
360
|
-
|
|
361
|
-
```typescript
|
|
362
|
-
shield.setAllowedTools(['search', 'email', 'calendar']);
|
|
363
|
-
```
|
|
364
|
-
|
|
365
|
-
### Getting Shield Status
|
|
366
|
-
|
|
367
|
-
```typescript
|
|
368
|
-
const status = shield.getStatus();
|
|
369
|
-
console.log(status);
|
|
370
|
-
// {
|
|
371
|
-
// patternMatcher: { ready: true, stats: {...} },
|
|
372
|
-
// toolGuard: { allowedTools: [...] },
|
|
373
|
-
// telemetry: { enabled: true, status: {...} }
|
|
374
|
-
// }
|
|
375
|
-
```
|
|
26
|
+
## What's Included
|
|
376
27
|
|
|
377
|
-
|
|
28
|
+
- **238 embedded attack patterns** — covers known injection techniques
|
|
29
|
+
- **Local detection** — <5ms latency, zero network calls
|
|
30
|
+
- **Zero dependencies** — single npm package
|
|
31
|
+
- **No data collection** — all detection happens in your process
|
|
378
32
|
|
|
379
|
-
|
|
33
|
+
## Result Object
|
|
380
34
|
|
|
381
35
|
```typescript
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
//
|
|
385
|
-
|
|
386
|
-
//
|
|
387
|
-
|
|
388
|
-
// Add latency jitter (300-500ms default)
|
|
389
|
-
await jitter(); // Waits random 300-500ms
|
|
390
|
-
await jitter(100, 200); // Custom range: 100-200ms
|
|
391
|
-
```
|
|
392
|
-
|
|
393
|
-
## Configuration
|
|
394
|
-
|
|
395
|
-
### TracernyOptions
|
|
396
|
-
|
|
397
|
-
```typescript
|
|
398
|
-
interface TracernyOptions {
|
|
399
|
-
// === RECOMMENDED: Single Domain URL ===
|
|
400
|
-
// Automatically constructs all backend endpoints:
|
|
401
|
-
// - {baseUrl}/api/v1/signal (events)
|
|
402
|
-
// - {baseUrl}/api/v1/verify-prompt (Layer 2 verification)
|
|
403
|
-
// - {baseUrl}/api/v1/shadow-log (potential attacks)
|
|
404
|
-
// - {baseUrl}/api/v1/definitions (pattern updates)
|
|
405
|
-
baseUrl?: string;
|
|
406
|
-
|
|
407
|
-
// === LAYER 1: Vanguard ===
|
|
408
|
-
// List of tool names the LLM is allowed to call
|
|
409
|
-
allowedTools?: string[];
|
|
410
|
-
|
|
411
|
-
// === Authentication ===
|
|
412
|
-
// API key for authentication to backend endpoints
|
|
413
|
-
apiKey?: string;
|
|
414
|
-
|
|
415
|
-
// === LAYER 2: Sentinel (Advanced - use baseUrl instead) ===
|
|
416
|
-
// Only needed if NOT using baseUrl
|
|
417
|
-
sentinelEndpoint?: string;
|
|
418
|
-
sentinelEnabled?: boolean;
|
|
419
|
-
|
|
420
|
-
// === LAYER 3: Telemetry & Logging (Advanced - use baseUrl instead) ===
|
|
421
|
-
// Only needed if NOT using baseUrl
|
|
422
|
-
apiEndpoint?: string;
|
|
423
|
-
shadowLogEndpoint?: string;
|
|
424
|
-
manifestUrl?: string;
|
|
425
|
-
|
|
426
|
-
// Enable telemetry (default: true)
|
|
427
|
-
enableTelemetry?: boolean;
|
|
428
|
-
|
|
429
|
-
// Path to local manifest cache (for serverless, use /tmp)
|
|
430
|
-
localManifestPath?: string;
|
|
431
|
-
}
|
|
432
|
-
```
|
|
433
|
-
|
|
434
|
-
**Minimal setup (Layer 1 only):**
|
|
435
|
-
```typescript
|
|
436
|
-
const shield = new Tracerney({ allowedTools: ['search'] });
|
|
437
|
-
```
|
|
438
|
-
|
|
439
|
-
**Recommended setup with backend (all 3 layers):**
|
|
440
|
-
```typescript
|
|
441
|
-
const shield = new Tracerney({
|
|
442
|
-
baseUrl: 'http://localhost:3000', // Single domain — paths auto-constructed
|
|
443
|
-
allowedTools: ['search', 'calculator'],
|
|
444
|
-
apiKey: process.env.TRACERNY_API_KEY,
|
|
445
|
-
enableTelemetry: true,
|
|
446
|
-
});
|
|
447
|
-
```
|
|
448
|
-
|
|
449
|
-
**Advanced setup (individual endpoints):**
|
|
450
|
-
```typescript
|
|
451
|
-
const shield = new Tracerney({
|
|
452
|
-
allowedTools: ['search', 'calculator'],
|
|
453
|
-
sentinelEndpoint: 'http://localhost:3000/api/v1/verify-prompt',
|
|
454
|
-
shadowLogEndpoint: 'http://localhost:3000/api/v1/shadow-log',
|
|
455
|
-
apiEndpoint: 'http://localhost:3000/api/v1/signal',
|
|
456
|
-
apiKey: process.env.TRACERNY_API_KEY,
|
|
457
|
-
enableTelemetry: true,
|
|
458
|
-
});
|
|
459
|
-
```
|
|
460
|
-
|
|
461
|
-
## Events
|
|
462
|
-
|
|
463
|
-
Security events have this structure:
|
|
464
|
-
|
|
465
|
-
```typescript
|
|
466
|
-
interface SecurityEvent {
|
|
467
|
-
type: 'INJECTION_DETECTED' | 'UNAUTHORIZED_TOOL' | 'BLOCKED_PATTERN';
|
|
468
|
-
severity: 'low' | 'medium' | 'high' | 'critical';
|
|
469
|
-
timestamp: number; // Unix ms
|
|
470
|
-
blockReason: string;
|
|
471
|
-
metadata: {
|
|
472
|
-
toolName?: string;
|
|
473
|
-
patternName?: string;
|
|
474
|
-
requestSnippet?: string; // First 100 chars, anonymized
|
|
475
|
-
blockLatencyMs?: number;
|
|
476
|
-
};
|
|
477
|
-
anonymized: boolean;
|
|
36
|
+
{
|
|
37
|
+
suspicious: boolean; // true if pattern matched
|
|
38
|
+
patternName?: string; // e.g., "Ignore Instructions"
|
|
39
|
+
severity?: string; // "CRITICAL" | "HIGH" | "MEDIUM" | "LOW"
|
|
40
|
+
blocked: boolean; // false (Layer 1 only)
|
|
478
41
|
}
|
|
479
42
|
```
|
|
480
43
|
|
|
481
|
-
##
|
|
482
|
-
|
|
483
|
-
Your backend should implement these 3 endpoints for full 3-layer defense:
|
|
484
|
-
|
|
485
|
-
### 1. POST /api/v1/verify-prompt (Layer 2: LLM Sentinel)
|
|
486
|
-
|
|
487
|
-
**Called by**: SDK when Layer 1 misses
|
|
488
|
-
**Purpose**: Backend LLM verification with rate limiting
|
|
489
|
-
|
|
490
|
-
```typescript
|
|
491
|
-
// Input: { prompt, keywords?, requestId? }
|
|
492
|
-
// Output: { blocked, confidence, model, latencyMs, remaining }
|
|
493
|
-
// Status 429: Rate limit exceeded
|
|
494
|
-
```
|
|
495
|
-
|
|
496
|
-
Example (Next.js):
|
|
497
|
-
```typescript
|
|
498
|
-
export async function POST(req: NextRequest) {
|
|
499
|
-
const { prompt, keywords, requestId } = await req.json();
|
|
500
|
-
|
|
501
|
-
// Verify API key
|
|
502
|
-
const apiKey = req.headers.get('Authorization')?.replace('Bearer ', '');
|
|
503
|
-
const user = await validateApiKey(apiKey); // Your implementation
|
|
504
|
-
|
|
505
|
-
// Check rate limit (5 calls/min per API key)
|
|
506
|
-
const limiter = getRateLimiter();
|
|
507
|
-
const { allowed, remaining } = limiter.check(user.id, 5, 60000);
|
|
508
|
-
if (!allowed) {
|
|
509
|
-
return NextResponse.json(
|
|
510
|
-
{ blocked: true, reason: 'rate_limit', remaining: 0 },
|
|
511
|
-
{ status: 429 }
|
|
512
|
-
);
|
|
513
|
-
}
|
|
514
|
-
|
|
515
|
-
// Call OpenRouter LLM with your API key (kept secret on backend)
|
|
516
|
-
const llmVerdict = await callOpenRouter(prompt, process.env.OPENROUTER_API_KEY);
|
|
517
|
-
|
|
518
|
-
return NextResponse.json({
|
|
519
|
-
blocked: llmVerdict,
|
|
520
|
-
confidence: llmVerdict ? 0.95 : 0.15,
|
|
521
|
-
model: 'google/gemini-2.5-flash-lite',
|
|
522
|
-
latencyMs: Date.now() - startTime,
|
|
523
|
-
remaining,
|
|
524
|
-
});
|
|
525
|
-
}
|
|
526
|
-
```
|
|
527
|
-
|
|
528
|
-
### 2. POST /api/v1/shadow-log (Layer 3: Potential Attacks)
|
|
529
|
-
|
|
530
|
-
**Called by**: SDK for prompts that pass both Layer 1 + Layer 2
|
|
531
|
-
**Purpose**: Track suspicious-but-allowed input for feedback loop
|
|
532
|
-
|
|
533
|
-
```typescript
|
|
534
|
-
// Input: { prompt, keywords?, confidence, passed, requestId? }
|
|
535
|
-
// Used to identify novel attack patterns for next version
|
|
536
|
-
```
|
|
537
|
-
|
|
538
|
-
### 3. POST /api/v1/signal (Layer 3: Blocked Events)
|
|
539
|
-
|
|
540
|
-
**Called by**: SDK when any layer blocks
|
|
541
|
-
**Purpose**: Security event logging and analytics
|
|
542
|
-
|
|
543
|
-
```typescript
|
|
544
|
-
interface SignalPayload {
|
|
545
|
-
events: SecurityEvent[];
|
|
546
|
-
sdkVersion: string;
|
|
547
|
-
}
|
|
548
|
-
```
|
|
549
|
-
|
|
550
|
-
Example handler (Next.js):
|
|
551
|
-
```typescript
|
|
552
|
-
export async function POST(req: Request) {
|
|
553
|
-
const payload = await req.json();
|
|
554
|
-
|
|
555
|
-
// Log to your monitoring system
|
|
556
|
-
console.log(`[Tracerny] ${payload.events.length} security events`);
|
|
557
|
-
payload.events.forEach((event) => {
|
|
558
|
-
console.log(` - ${event.blockReason} (${event.severity})`);
|
|
559
|
-
});
|
|
560
|
-
|
|
561
|
-
// Store in database
|
|
562
|
-
await db.securityEvents.insertMany(payload.events);
|
|
563
|
-
|
|
564
|
-
// Optional: Send alerts for critical blocks
|
|
565
|
-
if (payload.events.some(e => e.severity === 'CRITICAL')) {
|
|
566
|
-
await notifySecurityTeam(payload.events);
|
|
567
|
-
}
|
|
568
|
-
|
|
569
|
-
return Response.json({ received: true });
|
|
570
|
-
}
|
|
571
|
-
```
|
|
572
|
-
|
|
573
|
-
## Latency Profile
|
|
574
|
-
|
|
575
|
-
**Layer 1 (Vanguard)**: <2ms — Regex pattern matching
|
|
576
|
-
**Layer 2 (Sentinel)**: 200-1500ms — Backend LLM verification (only if Layer 1 misses)
|
|
577
|
-
**Layer 3 (Jitter)**: +300-500ms — Random delay masks which layer executed
|
|
578
|
-
**Total**:
|
|
579
|
-
- Safe prompt Layer 1 only: ~300-500ms (jitter only)
|
|
580
|
-
- Attack Layer 1 hits: ~300-500ms (jitter masks instant block)
|
|
581
|
-
- Novel attack Layer 2 hits: ~500-2000ms (LLM + jitter)
|
|
582
|
-
- No overhead without `sentinelEndpoint`: <3ms (Layer 1 + jitter)
|
|
583
|
-
|
|
584
|
-
## Provider Support
|
|
585
|
-
|
|
586
|
-
Designed for provider-agnostic LLM interfaces. Tested with:
|
|
587
|
-
- OpenAI (GPT-4, GPT-3.5)
|
|
588
|
-
- Anthropic (Claude)
|
|
589
|
-
- Others (Azure OpenAI, local models via compatible APIs)
|
|
590
|
-
|
|
591
|
-
Response structure mapping is automatic for standard provider APIs.
|
|
592
|
-
|
|
593
|
-
## Graceful Shutdown
|
|
594
|
-
|
|
595
|
-
```typescript
|
|
596
|
-
process.on('SIGTERM', async () => {
|
|
597
|
-
shield.destroy();
|
|
598
|
-
// Ensures any queued events are flushed
|
|
599
|
-
});
|
|
600
|
-
```
|
|
601
|
-
|
|
602
|
-
## Troubleshooting
|
|
603
|
-
|
|
604
|
-
### "Cannot find module 'tracerney'"
|
|
605
|
-
Build the SDK first:
|
|
606
|
-
```bash
|
|
607
|
-
npm run build
|
|
608
|
-
```
|
|
609
|
-
|
|
610
|
-
### "API endpoint not responding"
|
|
611
|
-
Make sure your Signal backend is running:
|
|
612
|
-
```bash
|
|
613
|
-
# In backend directory
|
|
614
|
-
npm run dev
|
|
615
|
-
```
|
|
616
|
-
|
|
617
|
-
### "Events not appearing in dashboard"
|
|
618
|
-
1. Check `enableTelemetry: true` in config
|
|
619
|
-
2. Verify `apiEndpoint` is reachable: `curl http://localhost:3000/api/v1/stats`
|
|
620
|
-
3. Check browser console for CORS errors
|
|
621
|
-
|
|
622
|
-
### "Pattern not detecting attacks"
|
|
623
|
-
1. Wait for patterns to load: `shield.getStatus().patternMatcher.ready`
|
|
624
|
-
2. Check if attack matches bundled patterns (20+ vanguard patterns with homoglyph detection)
|
|
625
|
-
3. If Layer 1 misses, enable Layer 2: Set `sentinelEndpoint`
|
|
626
|
-
4. Test directly: `npm install @sandrobuilds/tracerney && node test-layer1.js`
|
|
627
|
-
|
|
628
|
-
### "Layer 2 verification failing (sentiment not working)"
|
|
629
|
-
1. Check backend is running: `curl http://localhost:3000/api/v1/health`
|
|
630
|
-
2. Verify `OPENROUTER_API_KEY` is set in backend `.env` (not SDK, kept secure on backend)
|
|
631
|
-
3. Check OpenRouter API key is valid: https://openrouter.ai/keys
|
|
632
|
-
4. Look for 429 errors = rate limit hit (wait 60 seconds)
|
|
633
|
-
5. Check backend logs: `cd backend && npm run dev`
|
|
634
|
-
|
|
635
|
-
### "Getting 429 Rate Limit errors"
|
|
636
|
-
1. **Expected behavior** — 5 verifications per API key per minute
|
|
637
|
-
2. Wait 60+ seconds for window to reset
|
|
638
|
-
3. Or: Use different API key for testing
|
|
639
|
-
4. Or: Change limit in backend: `limiter.check(key, 10, 60000)` (10 calls/min)
|
|
640
|
-
|
|
641
|
-
### "High latency from shield.wrap()"
|
|
642
|
-
- Layer 1 only (no `sentinelEndpoint`): Should be <3ms + jitter (300-500ms)
|
|
643
|
-
- With Layer 2: 200-1500ms (LLM) + jitter (300-500ms)
|
|
644
|
-
- If higher, check:
|
|
645
|
-
- Is OpenRouter API slow? (check https://status.openrouter.ai)
|
|
646
|
-
- Is backend responding? `curl http://localhost:3000/api/v1/verify-prompt`
|
|
647
|
-
- Profile: `console.time('shield'); await shield.wrap(...); console.timeEnd('shield');`
|
|
648
|
-
|
|
649
|
-
---
|
|
650
|
-
|
|
651
|
-
## FAQ
|
|
652
|
-
|
|
653
|
-
**Q: Will Shield block my legitimate use cases?**
|
|
654
|
-
A: Shield uses regex patterns targeting known injection techniques. False positives are rare. If you see them, add to allowlist or adjust patterns in your Signal backend.
|
|
655
|
-
|
|
656
|
-
**Q: Does my data go to your servers?**
|
|
657
|
-
A: No. You own the `apiEndpoint` where events are sent. Shield is a runtime library that runs in your process. No data leaves your infrastructure unless you configure it.
|
|
658
|
-
|
|
659
|
-
**Q: Can I use Shield without telemetry?**
|
|
660
|
-
A: Yes! Set only `allowedTools`:
|
|
661
|
-
```typescript
|
|
662
|
-
const shield = new Tracerney({ allowedTools: ['search'] });
|
|
663
|
-
```
|
|
664
|
-
|
|
665
|
-
**Q: What if an attack isn't detected?**
|
|
666
|
-
A: Layer 1 catches known patterns. Novel attacks are caught by Layer 2 (backend LLM):
|
|
667
|
-
1. Set `sentinelEndpoint` to enable Layer 2
|
|
668
|
-
2. Provide `OPENROUTER_API_KEY` in backend `.env`
|
|
669
|
-
3. Rate limiting (5 calls/min) prevents botnet cost spikes
|
|
670
|
-
4. Monitor `shadow_log` for potential attacks that pass both layers
|
|
671
|
-
|
|
672
|
-
**Q: How does Layer 2 rate limiting work?**
|
|
673
|
-
A: Backend enforces 5 verifications per API key per minute:
|
|
674
|
-
- First 5 calls: Returns verification result + `remaining` count
|
|
675
|
-
- Call 6+: Returns HTTP 429 with `blocked: true, reason: 'rate_limit'`
|
|
676
|
-
- Resets every 60 seconds
|
|
677
|
-
|
|
678
|
-
Example:
|
|
679
|
-
```typescript
|
|
680
|
-
// SDK automatically handles 429
|
|
681
|
-
try {
|
|
682
|
-
await shield.scanPrompt(userInput);
|
|
683
|
-
} catch (err) {
|
|
684
|
-
if (err.event.blockReason === 'rate_limit') {
|
|
685
|
-
return res.status(429).json({ error: 'Too many verification requests' });
|
|
686
|
-
}
|
|
687
|
-
}
|
|
688
|
-
```
|
|
689
|
-
|
|
690
|
-
**Q: What's the difference between `apiEndpoint` and `sentinelEndpoint`?**
|
|
691
|
-
A:
|
|
692
|
-
- `sentinelEndpoint`: Layer 2 LLM verification (backend security check)
|
|
693
|
-
- `shadowLogEndpoint`: Layer 3 potential attacks (feedback loop)
|
|
694
|
-
- `apiEndpoint`: Layer 3 security events (blocked attacks)
|
|
695
|
-
|
|
696
|
-
Use all three for complete observability.
|
|
697
|
-
|
|
698
|
-
**Q: Does Shield work with streaming responses?**
|
|
699
|
-
A: Yes. `shield.wrap()` works with streaming LLM responses—detection happens after the stream completes.
|
|
700
|
-
|
|
701
|
-
**Q: Can I rate-limit based on Shield blocks?**
|
|
702
|
-
A: Yes. Use `ShieldBlockError` to track patterns:
|
|
703
|
-
```typescript
|
|
704
|
-
const blocks = new Map();
|
|
705
|
-
try {
|
|
706
|
-
await shield.wrap(() => llmCall());
|
|
707
|
-
} catch (err) {
|
|
708
|
-
if (err instanceof ShieldBlockError) {
|
|
709
|
-
const pattern = err.event.patternName;
|
|
710
|
-
blocks.set(pattern, (blocks.get(pattern) || 0) + 1);
|
|
711
|
-
if (blocks.get(pattern) > 5) banIP();
|
|
712
|
-
}
|
|
713
|
-
}
|
|
714
|
-
```
|
|
715
|
-
|
|
716
|
-
**Q: What's the difference between `scanPrompt()` and `wrap()`?**
|
|
717
|
-
- `scanPrompt()`: Scans input **before** LLM call (edge defense)
|
|
718
|
-
- `wrap()`: Scans input + validates tool output **after** LLM call (comprehensive defense)
|
|
719
|
-
|
|
720
|
-
Use both for defense-in-depth.
|
|
721
|
-
|
|
722
|
-
**Q: How do I update patterns without redeploying?**
|
|
723
|
-
A: Set `manifestUrl` to your backend. The SDK checks for new patterns every 24h and uses stale-while-revalidate caching. Push new patterns to your backend—all clients update automatically.
|
|
724
|
-
|
|
725
|
-
**Q: Can Shield work offline?**
|
|
726
|
-
A: Yes! Uses bundled 20 vanguard patterns offline. For remote patterns, install fails gracefully and uses cached version.
|
|
727
|
-
|
|
728
|
-
---
|
|
729
|
-
|
|
730
|
-
## Resources
|
|
731
|
-
|
|
732
|
-
- **GitHub**: https://github.com/sandrosaric/tracerney
|
|
733
|
-
- **Issues**: Report bugs or request features
|
|
734
|
-
- **Dashboard**: Built-in real-time threat monitoring at `http://localhost:3000`
|
|
735
|
-
- **Examples**: See `/examples` for Next.js, Express, serverless setups
|
|
44
|
+
## Detected Patterns
|
|
736
45
|
|
|
737
|
-
|
|
46
|
+
- Instruction overrides ("ignore all instructions")
|
|
47
|
+
- Role-play jailbreaks ("act as unrestricted AI")
|
|
48
|
+
- Context confusion attacks
|
|
49
|
+
- Data extraction attempts
|
|
50
|
+
- Code execution risks
|
|
51
|
+
- And 233 more...
|
|
738
52
|
|
|
739
53
|
## License
|
|
740
54
|
|