ai-sdk-guardrails 4.0.0 → 5.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +472 -735
- package/package.json +27 -23
package/README.md
CHANGED
|
@@ -1,895 +1,632 @@
|
|
|
1
1
|
# AI SDK Guardrails
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
**Safety and quality controls for Vercel AI SDK**
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Add guardrails to your AI applications in one line of code. Block PII, prevent prompt injection, enforce output quality - while keeping your existing telemetry and observability stack intact.
|
|
6
6
|
|
|
7
|
-
](https://www.npmjs.com/package/ai-sdk-guardrails)
|
|
8
|
+
[](https://www.npmjs.com/package/ai-sdk-guardrails)
|
|
9
|
+
[](https://bundlephobia.com/package/ai-sdk-guardrails)
|
|
10
|
+
[](./LICENSE)
|
|
11
|
+

|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
import { openai } from '@ai-sdk/openai';
|
|
15
|
-
import { generateText } from 'ai';
|
|
16
|
-
import {
|
|
17
|
-
wrapWithGuardrails,
|
|
18
|
-
defineInputGuardrail,
|
|
19
|
-
defineOutputGuardrail,
|
|
20
|
-
} from 'ai-sdk-guardrails';
|
|
13
|
+

|
|
21
14
|
|
|
22
|
-
|
|
23
|
-
const inputGuard = defineInputGuardrail({
|
|
24
|
-
name: 'length-check',
|
|
25
|
-
execute: async ({ prompt }) =>
|
|
26
|
-
prompt.length > 100
|
|
27
|
-
? { tripwireTriggered: true, message: 'Input too long' }
|
|
28
|
-
: { tripwireTriggered: false },
|
|
29
|
-
});
|
|
15
|
+
## Drop-in Guardrails for any AI model
|
|
30
16
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
result.text.length < 10
|
|
35
|
-
? { tripwireTriggered: true, message: 'Response too short' }
|
|
36
|
-
: { tripwireTriggered: false },
|
|
37
|
-
});
|
|
17
|
+
```ts
|
|
18
|
+
import { withGuardrails, piiDetector } from 'ai-sdk-guardrails';
|
|
19
|
+
const model = openai('gpt-4o'); // or any other AI model
|
|
38
20
|
|
|
39
|
-
//
|
|
40
|
-
const
|
|
41
|
-
inputGuardrails: [
|
|
42
|
-
outputGuardrails: [outputGuard],
|
|
21
|
+
// Everything else stays the same
|
|
22
|
+
const safeModel = withGuardrails(model, {
|
|
23
|
+
inputGuardrails: [piiDetector()],
|
|
43
24
|
});
|
|
44
25
|
|
|
45
|
-
//
|
|
46
|
-
|
|
47
|
-
model: guardedModel,
|
|
48
|
-
prompt: 'A prompt that is definitely not too long.',
|
|
49
|
-
});
|
|
26
|
+
// Your existing code, telemetry, and logging still works
|
|
27
|
+
await generateText({ model: safeModel, prompt: '...' });
|
|
50
28
|
```
|
|
51
29
|
|
|
52
|
-
|
|
30
|
+
**That's it.** Your AI now blocks PII automatically.
|
|
53
31
|
|
|
54
|
-
|
|
32
|
+
## Installation
|
|
55
33
|
|
|
56
|
-
```
|
|
57
|
-
|
|
58
|
-
A[User Input<br/>'hello'] --> B[AI Model] --> C[Response<br/>⚠️ Wastes resources<br/>😞 Often useless]
|
|
34
|
+
```bash
|
|
35
|
+
npm install ai-sdk-guardrails
|
|
59
36
|
```
|
|
60
37
|
|
|
61
|
-
|
|
38
|
+
## Why Guardrails Matter
|
|
62
39
|
|
|
63
|
-
|
|
64
|
-
flowchart LR
|
|
65
|
-
A[User Input<br/>'hello'] --> B[Input Guardrails] --> C[❌ STOPPED<br/>✅ No API call made]
|
|
66
|
-
```
|
|
40
|
+
Real problems that guardrails solve:
|
|
67
41
|
|
|
68
|
-
|
|
42
|
+
❌ **Without guardrails:**
|
|
69
43
|
|
|
70
|
-
```
|
|
71
|
-
|
|
72
|
-
|
|
44
|
+
```ts
|
|
45
|
+
// User: "My email is john@company.com, help me..."
|
|
46
|
+
// → Sends PII to model → Compliance violation → $$$
|
|
73
47
|
```
|
|
74
48
|
|
|
75
|
-
|
|
49
|
+
✅ **With guardrails:**
|
|
76
50
|
|
|
77
|
-
```
|
|
78
|
-
|
|
79
|
-
|
|
51
|
+
```ts
|
|
52
|
+
const model = withGuardrails(baseModel, {
|
|
53
|
+
inputGuardrails: [piiDetector()], // Blocks before API call
|
|
54
|
+
});
|
|
55
|
+
// → Request blocked → No PII leak → No cost → Compliant
|
|
80
56
|
```
|
|
81
57
|
|
|
82
|
-
|
|
58
|
+
Common use cases:
|
|
83
59
|
|
|
84
|
-
|
|
60
|
+
- 🛡️ **Compliance**: Block PII before it reaches your model
|
|
61
|
+
- 💰 **Cost control**: Stop bad requests before they cost money
|
|
62
|
+
- 🔒 **Security**: Prevent prompt injection and data exfiltration
|
|
63
|
+
- ✅ **Quality**: Enforce minimum response standards
|
|
64
|
+
- 🔧 **Production**: Works with your existing observability tools
|
|
85
65
|
|
|
86
|
-
|
|
87
|
-
npm install ai-sdk-guardrails
|
|
66
|
+
## Copy-Paste Examples
|
|
88
67
|
|
|
89
|
-
|
|
68
|
+
### Basic Protection (Most Common)
|
|
90
69
|
|
|
91
|
-
|
|
70
|
+
```ts
|
|
71
|
+
import { generateText } from 'ai';
|
|
72
|
+
import { openai } from '@ai-sdk/openai';
|
|
73
|
+
import {
|
|
74
|
+
withGuardrails,
|
|
75
|
+
piiDetector,
|
|
76
|
+
promptInjectionDetector,
|
|
77
|
+
} from 'ai-sdk-guardrails';
|
|
92
78
|
|
|
93
|
-
|
|
79
|
+
const model = withGuardrails(openai('gpt-4o'), {
|
|
80
|
+
inputGuardrails: [piiDetector(), promptInjectionDetector()],
|
|
81
|
+
});
|
|
94
82
|
|
|
95
|
-
|
|
83
|
+
// Use exactly like before - nothing else changes
|
|
84
|
+
const { text } = await generateText({
|
|
85
|
+
model,
|
|
86
|
+
prompt: 'Write a friendly email',
|
|
87
|
+
});
|
|
96
88
|
```
|
|
97
89
|
|
|
98
|
-
|
|
90
|
+
### Input + Output Protection
|
|
99
91
|
|
|
100
|
-
|
|
92
|
+
```ts
|
|
93
|
+
import {
|
|
94
|
+
withGuardrails,
|
|
95
|
+
piiDetector,
|
|
96
|
+
sensitiveDataFilter,
|
|
97
|
+
minLengthRequirement,
|
|
98
|
+
} from 'ai-sdk-guardrails';
|
|
101
99
|
|
|
102
|
-
|
|
100
|
+
const model = withGuardrails(openai('gpt-4o'), {
|
|
101
|
+
inputGuardrails: [piiDetector()], // Block PII in prompts
|
|
102
|
+
outputGuardrails: [
|
|
103
|
+
sensitiveDataFilter(), // Remove secrets from responses
|
|
104
|
+
minLengthRequirement(100), // Enforce quality standards
|
|
105
|
+
],
|
|
106
|
+
});
|
|
107
|
+
```
|
|
103
108
|
|
|
104
|
-
|
|
109
|
+
### Works With Streaming
|
|
105
110
|
|
|
106
|
-
|
|
111
|
+
```ts
|
|
112
|
+
import { streamText } from 'ai';
|
|
107
113
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
import { openai } from '@ai-sdk/openai';
|
|
111
|
-
import {
|
|
112
|
-
wrapWithInputGuardrails,
|
|
113
|
-
defineInputGuardrail,
|
|
114
|
-
} from 'ai-sdk-guardrails';
|
|
115
|
-
import { extractTextContent } from 'ai-sdk-guardrails/guardrails/input';
|
|
116
|
-
|
|
117
|
-
// Block inefficient requests before calling the AI model
|
|
118
|
-
const lengthGuard = defineInputGuardrail({
|
|
119
|
-
name: 'blocked-keywords',
|
|
120
|
-
execute: async (context) => {
|
|
121
|
-
const { prompt } = extractTextContent(context);
|
|
122
|
-
const blockedWords = ['spam', 'test', 'hello'];
|
|
123
|
-
|
|
124
|
-
const foundWord = blockedWords.find((word) =>
|
|
125
|
-
prompt.toLowerCase().includes(word.toLowerCase()),
|
|
126
|
-
);
|
|
127
|
-
|
|
128
|
-
if (foundWord) {
|
|
129
|
-
return {
|
|
130
|
-
tripwireTriggered: true,
|
|
131
|
-
message: `Blocked keyword detected: ${foundWord}`,
|
|
132
|
-
severity: 'medium',
|
|
133
|
-
};
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
return { tripwireTriggered: false };
|
|
137
|
-
},
|
|
114
|
+
const model = withGuardrails(openai('gpt-4o'), {
|
|
115
|
+
outputGuardrails: [minLengthRequirement(100)],
|
|
138
116
|
});
|
|
139
117
|
|
|
140
|
-
|
|
141
|
-
|
|
118
|
+
// Streaming just works - guardrails run after stream completes
|
|
119
|
+
const { textStream } = await streamText({ model, prompt: '...' });
|
|
120
|
+
for await (const chunk of textStream) {
|
|
121
|
+
process.stdout.write(chunk);
|
|
122
|
+
}
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Production Setup (With Error Handling)
|
|
126
|
+
|
|
127
|
+
```ts
|
|
128
|
+
import { isGuardrailsError } from 'ai-sdk-guardrails';
|
|
129
|
+
|
|
130
|
+
const model = withGuardrails(openai('gpt-4o'), {
|
|
131
|
+
inputGuardrails: [piiDetector(), promptInjectionDetector()],
|
|
132
|
+
outputGuardrails: [sensitiveDataFilter()],
|
|
133
|
+
throwOnBlocked: true, // Throw errors instead of silent blocking
|
|
142
134
|
});
|
|
143
135
|
|
|
144
|
-
// This would normally waste an API call for a useless response
|
|
145
136
|
try {
|
|
146
|
-
const
|
|
147
|
-
|
|
148
|
-
prompt: 'hello', // ❌ Blocked - prevents unnecessary API call
|
|
149
|
-
});
|
|
137
|
+
const { text } = await generateText({ model, prompt: '...' });
|
|
138
|
+
console.log(text);
|
|
150
139
|
} catch (error) {
|
|
151
|
-
|
|
140
|
+
if (isGuardrailsError(error)) {
|
|
141
|
+
console.error('Blocked by guardrail:', error.message);
|
|
142
|
+
// Show user-friendly message
|
|
143
|
+
}
|
|
152
144
|
}
|
|
153
|
-
|
|
154
|
-
// This generates valuable content
|
|
155
|
-
const goodResult = await generateText({
|
|
156
|
-
model: optimizedModel,
|
|
157
|
-
prompt: 'Write a product description for our new software', // ✅ This creates value
|
|
158
|
-
});
|
|
159
145
|
```
|
|
160
146
|
|
|
161
|
-
|
|
147
|
+
## How It Works
|
|
162
148
|
|
|
163
|
-
|
|
164
|
-
import {
|
|
165
|
-
wrapWithOutputGuardrails,
|
|
166
|
-
defineOutputGuardrail,
|
|
167
|
-
} from 'ai-sdk-guardrails';
|
|
168
|
-
import { extractContent } from 'ai-sdk-guardrails/guardrails/output';
|
|
149
|
+
Guardrails run **in parallel** with your AI calls as middleware:
|
|
169
150
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
/\b\d{3}-\d{3}-\d{4}\b/, // Phone
|
|
180
|
-
];
|
|
181
|
-
|
|
182
|
-
const foundPattern = sensitivePatterns.find((pattern) =>
|
|
183
|
-
pattern.test(text),
|
|
184
|
-
);
|
|
185
|
-
|
|
186
|
-
if (foundPattern) {
|
|
187
|
-
return {
|
|
188
|
-
tripwireTriggered: true,
|
|
189
|
-
message: 'Sensitive information detected in response',
|
|
190
|
-
severity: 'high',
|
|
191
|
-
};
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
return { tripwireTriggered: false };
|
|
195
|
-
},
|
|
196
|
-
});
|
|
151
|
+
```mermaid
|
|
152
|
+
flowchart LR
|
|
153
|
+
A[Input] --> B[Input Guardrails]
|
|
154
|
+
B -->|✅ Clean| C[AI Model]
|
|
155
|
+
B -->|❌ Blocked| X[No API Call]
|
|
156
|
+
C --> D[Output Guardrails]
|
|
157
|
+
D -->|✅ Clean| E[Response]
|
|
158
|
+
D -->|❌ Blocked| R[Retry/Replace/Block]
|
|
159
|
+
```
|
|
197
160
|
|
|
198
|
-
|
|
199
|
-
outputGuardrails: [qualityGuard],
|
|
200
|
-
onOutputBlocked: (executionSummary) => {
|
|
201
|
-
console.log(
|
|
202
|
-
'Prevented sensitive data leak:',
|
|
203
|
-
executionSummary.blockedResults[0]?.message,
|
|
204
|
-
);
|
|
205
|
-
|
|
206
|
-
// Access comprehensive analytics (New in v4.0.0)
|
|
207
|
-
console.log(
|
|
208
|
-
`Blocked ${executionSummary.stats.blocked} of ${executionSummary.guardrailsExecuted} guardrails`,
|
|
209
|
-
);
|
|
210
|
-
},
|
|
211
|
-
});
|
|
161
|
+
**Three-step workflow:**
|
|
212
162
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
});
|
|
217
|
-
// Automatically blocks responses containing emails, phone numbers, or SSNs
|
|
218
|
-
```
|
|
163
|
+
1. **Receive**: Input or output arrives
|
|
164
|
+
2. **Check**: Guardrails run (PII detection, validation, etc.)
|
|
165
|
+
3. **Decide**: Pass through, block, or retry
|
|
219
166
|
|
|
220
|
-
|
|
167
|
+
**Key benefit**: Non-invasive. Your existing telemetry, logging, and observability tools keep working because guardrails are just middleware.
|
|
221
168
|
|
|
222
|
-
|
|
223
|
-
const businessHoursGuard = defineInputGuardrail({
|
|
224
|
-
name: 'business-hours-only',
|
|
225
|
-
execute: async () => {
|
|
226
|
-
const hour = new Date().getUTCHours();
|
|
227
|
-
// Only allow requests between 9 AM and 5 PM UTC
|
|
228
|
-
if (hour < 9 || hour > 17) {
|
|
229
|
-
return {
|
|
230
|
-
tripwireTriggered: true,
|
|
231
|
-
message:
|
|
232
|
-
'Requests are only permitted during business hours (9:00-17:00 UTC).',
|
|
233
|
-
severity: 'low',
|
|
234
|
-
};
|
|
235
|
-
}
|
|
236
|
-
return { tripwireTriggered: false };
|
|
237
|
-
},
|
|
238
|
-
});
|
|
169
|
+
## Built-in Guardrails
|
|
239
170
|
|
|
240
|
-
|
|
241
|
-
inputGuardrails: [businessHoursGuard],
|
|
242
|
-
});
|
|
243
|
-
```
|
|
171
|
+
### Input Guardrails (Run Before Model)
|
|
244
172
|
|
|
245
|
-
|
|
173
|
+
| Guardrail | Purpose | Example |
|
|
174
|
+
| --------------------------- | -------------------------------- | ------------------- |
|
|
175
|
+
| `piiDetector()` | Block emails, phones, SSNs | Compliance, privacy |
|
|
176
|
+
| `promptInjectionDetector()` | Detect injection attempts | Security |
|
|
177
|
+
| `blockedKeywords()` | Block specific terms | Content policy |
|
|
178
|
+
| `inputLengthLimit()` | Enforce max input length | Cost control |
|
|
179
|
+
| `rateLimiting()` | Per-user rate limits | Abuse prevention |
|
|
180
|
+
| `profanityFilter()` | Block offensive language | Content moderation |
|
|
181
|
+
| `toxicityDetector()` | Detect toxic content | Safety |
|
|
182
|
+
| `allowedToolsGuardrail()` | Restrict which tools can be used | Tool security |
|
|
246
183
|
|
|
247
|
-
|
|
184
|
+
### Output Guardrails (Run After Model)
|
|
248
185
|
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
186
|
+
| Guardrail | Purpose | Example |
|
|
187
|
+
| ------------------------- | --------------------------- | ------------------------- |
|
|
188
|
+
| `sensitiveDataFilter()` | Remove secrets, API keys | Security |
|
|
189
|
+
| `minLengthRequirement()` | Enforce minimum length | Quality control |
|
|
190
|
+
| `outputLengthLimit()` | Enforce maximum length | Cost/UX control |
|
|
191
|
+
| `toxicityFilter()` | Block toxic responses | Safety |
|
|
192
|
+
| `jsonValidation()` | Validate JSON structure | Structured output |
|
|
193
|
+
| `schemaValidation()` | Validate against Zod schema | Type safety |
|
|
194
|
+
| `confidenceThreshold()` | Require minimum confidence | Quality |
|
|
195
|
+
| `hallucinationDetector()` | Detect uncertain claims | Accuracy |
|
|
196
|
+
| `secretRedaction()` | Redact secrets from output | Security |
|
|
197
|
+
| `mcpSecurityGuardrail()` | MCP tool security | Prevent data exfiltration |
|
|
255
198
|
|
|
256
|
-
|
|
257
|
-
const piiDetectionGuardrail = defineInputGuardrail({
|
|
258
|
-
name: 'pii-detection',
|
|
259
|
-
execute: async (context) => {
|
|
260
|
-
const { prompt } = extractTextContent(context);
|
|
261
|
-
|
|
262
|
-
const patterns = [
|
|
263
|
-
{
|
|
264
|
-
name: 'SSN',
|
|
265
|
-
regex: /\b\d{3}-\d{2}-\d{4}\b/,
|
|
266
|
-
description: 'Social Security Number',
|
|
267
|
-
},
|
|
268
|
-
{
|
|
269
|
-
name: 'Email',
|
|
270
|
-
regex: /\b[\w\.-]+@[\w\.-]+\.\w+\b/,
|
|
271
|
-
description: 'Email address',
|
|
272
|
-
},
|
|
273
|
-
];
|
|
274
|
-
|
|
275
|
-
const detected = patterns.filter((p) => p.regex.test(prompt));
|
|
276
|
-
|
|
277
|
-
if (detected.length > 0) {
|
|
278
|
-
// TypeScript knows this metadata matches PIIMetadata
|
|
279
|
-
const metadata: PIIMetadata = {
|
|
280
|
-
detectedTypes: detected.map((p) => ({
|
|
281
|
-
type: p.name,
|
|
282
|
-
description: p.description,
|
|
283
|
-
})),
|
|
284
|
-
count: detected.length,
|
|
285
|
-
};
|
|
286
|
-
|
|
287
|
-
return {
|
|
288
|
-
tripwireTriggered: true,
|
|
289
|
-
message: `PII detected: ${detected.map((p) => p.name).join(', ')}`,
|
|
290
|
-
severity: 'high',
|
|
291
|
-
metadata, // Type is automatically inferred!
|
|
292
|
-
};
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
return { tripwireTriggered: false };
|
|
296
|
-
},
|
|
297
|
-
});
|
|
199
|
+
### MCP Security Guardrails
|
|
298
200
|
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
201
|
+
Protect against prompt injection and data exfiltration when using Model Context Protocol (MCP) tools:
|
|
202
|
+
|
|
203
|
+
```ts
|
|
204
|
+
import { mcpSecurityGuardrail, mcpResponseSanitizer } from 'ai-sdk-guardrails';
|
|
205
|
+
|
|
206
|
+
const model = withGuardrails(openai('gpt-4o'), {
|
|
207
|
+
outputGuardrails: [
|
|
208
|
+
mcpSecurityGuardrail({
|
|
209
|
+
detectExfiltration: true, // Detect data exfiltration attempts
|
|
210
|
+
scanEncodedContent: true, // Scan base64/hex encoded content
|
|
211
|
+
allowedDomains: ['api.company.com'], // Domain allowlist
|
|
212
|
+
maxContentSize: 51200, // 50KB limit
|
|
213
|
+
injectionThreshold: 0.7, // Sensitivity (lower = stricter)
|
|
214
|
+
}),
|
|
215
|
+
mcpResponseSanitizer(), // Clean malicious content vs blocking
|
|
216
|
+
],
|
|
311
217
|
});
|
|
312
218
|
```
|
|
313
219
|
|
|
314
|
-
**
|
|
315
|
-
|
|
316
|
-
## ✨ Features
|
|
317
|
-
|
|
318
|
-
- 🛡️ **Input & Output Guardrails**: Enforce custom safety, compliance, and quality policies on both prompts and LLM responses.
|
|
319
|
-
- 💰 **Cost Control**: Block invalid or wasteful prompts before they are sent to your LLM provider, saving you money.
|
|
320
|
-
- 🎯 **Quality Improvement**: Automatically filter, flag, or retry low-quality or irrelevant model outputs.
|
|
321
|
-
- 🔒 **Security Protection**: Built-in defenses against prompt injection, jailbreak attempts, PII leakage, secret exposure, and tool call validation.
|
|
322
|
-
- 🏛️ **Compliance & Governance**: Enforce regulatory guidelines and business rules for enterprise applications with jurisdiction-specific compliance.
|
|
323
|
-
- 🔄 **Streaming Support**: Works seamlessly with both streaming (streamText) and standard (generateText) API responses with real-time content monitoring.
|
|
324
|
-
- 📊 **Observability Hooks**: Built-in callbacks (onInputBlocked, onOutputBlocked, etc.) for logging and monitoring with comprehensive execution analytics.
|
|
325
|
-
- ⚙️ **Configurable Execution**: Run guardrails in parallel or sequentially and set custom timeouts.
|
|
326
|
-
- 🚀 **AI SDK Native**: Designed from the ground up to integrate cleanly with AI SDK middleware patterns.
|
|
327
|
-
- 🧠 **AI-Powered Verification**: LLM-as-judge capabilities for hallucination detection and quality assessment.
|
|
328
|
-
- 🌍 **Global Compliance**: Support for multiple jurisdictions (US, EU, UK, CA, AU, JP, CN, IN) with region-specific policies.
|
|
329
|
-
- 📝 **Content Protection**: Copyright and IP protection with originality scoring and verbatim passage detection.
|
|
330
|
-
- 🔐 **Data Integrity**: Comprehensive table validation, SQL code safety, and schema enforcement.
|
|
331
|
-
- 🌐 **Network Security**: Domain allowlisting, URL sanitization, and external access controls.
|
|
332
|
-
- 🔒 **Privacy & Memory**: PII redaction, memory minimization, and secure logging practices.
|
|
333
|
-
- 🛡️ **Safety & Escalation**: Toxicity de-escalation, human review workflows, and streaming early termination.
|
|
334
|
-
|
|
335
|
-
## 📚 API Overview
|
|
336
|
-
|
|
337
|
-
| Function | Description |
|
|
338
|
-
| ---------------------------- | ----------------------------------------------------------------------------- |
|
|
339
|
-
| `defineInputGuardrail()` | Creates a guardrail to validate, inspect, or block prompts. |
|
|
340
|
-
| `defineOutputGuardrail()` | Creates a guardrail to validate, filter, or re-route LLM outputs. |
|
|
341
|
-
| `wrapWithGuardrails()` | ⭐ **Recommended** - The easiest way to add both input and output guardrails. |
|
|
342
|
-
| `wrapWithInputGuardrails()` | Attaches input-only guardrails to a model. |
|
|
343
|
-
| `wrapWithOutputGuardrails()` | Attaches output-only guardrails to a model. |
|
|
344
|
-
| `isGuardrailsError()`, etc. | Error handling utilities and structured error types. |
|
|
345
|
-
|
|
346
|
-
## 🧠 Design Philosophy
|
|
347
|
-
|
|
348
|
-
- ✅ **Helper-First**: Simple, chainable utility functions provide a great developer experience for fast adoption.
|
|
349
|
-
- 🧩 **Composable**: Multiple guardrails can be chained together and will run in your specified order (or in parallel).
|
|
350
|
-
- 🧾 **Type-Safe**: Full TypeScript support with automatic type inference for guardrail metadata - no manual type annotations needed!
|
|
351
|
-
- 🧪 **Sensible Defaults**: Get started quickly with zero-config default behaviors that can be easily overridden.
|
|
352
|
-
|
|
353
|
-
## Architecture Overview
|
|
354
|
-
|
|
355
|
-
The library leverages the Vercel AI SDK's middleware architecture to provide composable guardrails that integrate seamlessly with your existing AI applications:
|
|
220
|
+
**Attack vectors prevented:**
|
|
356
221
|
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
subgraph "AI SDK Guardrails Middleware"
|
|
365
|
-
InputMW[Input Guardrails Middleware]
|
|
366
|
-
OutputMW[Output Guardrails Middleware]
|
|
367
|
-
|
|
368
|
-
subgraph "Input Guardrails Layer"
|
|
369
|
-
Length[Length Validation]
|
|
370
|
-
Spam[Spam Detection]
|
|
371
|
-
PII[PII Detection]
|
|
372
|
-
Business[Business Rules]
|
|
373
|
-
Custom1[Custom Guards]
|
|
374
|
-
end
|
|
375
|
-
|
|
376
|
-
subgraph "Output Guardrails Layer"
|
|
377
|
-
Quality[Quality Assurance]
|
|
378
|
-
Sensitive[Sensitive Info Filter]
|
|
379
|
-
Professional[Professional Tone]
|
|
380
|
-
Factual[Factual Validation]
|
|
381
|
-
Custom2[Custom Guards]
|
|
382
|
-
end
|
|
383
|
-
end
|
|
384
|
-
|
|
385
|
-
subgraph "AI SDK Core"
|
|
386
|
-
Wrapper[wrapLanguageModel]
|
|
387
|
-
Generator[generateText/Object/Stream]
|
|
388
|
-
end
|
|
389
|
-
|
|
390
|
-
subgraph "External Services"
|
|
391
|
-
AI[AI Model Provider]
|
|
392
|
-
Log[Logging & Telemetry]
|
|
393
|
-
end
|
|
394
|
-
|
|
395
|
-
App --> Config
|
|
396
|
-
Config --> InputMW
|
|
397
|
-
InputMW --> Length
|
|
398
|
-
InputMW --> Spam
|
|
399
|
-
InputMW --> PII
|
|
400
|
-
InputMW --> Business
|
|
401
|
-
InputMW --> Custom1
|
|
402
|
-
|
|
403
|
-
InputMW -->|Valid Request| Wrapper
|
|
404
|
-
InputMW -->|Blocked Request| Log
|
|
405
|
-
|
|
406
|
-
Wrapper --> Generator
|
|
407
|
-
Generator --> AI
|
|
408
|
-
AI --> OutputMW
|
|
409
|
-
|
|
410
|
-
OutputMW --> Quality
|
|
411
|
-
OutputMW --> Sensitive
|
|
412
|
-
OutputMW --> Professional
|
|
413
|
-
OutputMW --> Factual
|
|
414
|
-
OutputMW --> Custom2
|
|
415
|
-
|
|
416
|
-
OutputMW -->|Clean Response| App
|
|
417
|
-
OutputMW -->|Quality Issues| Log
|
|
418
|
-
|
|
419
|
-
style InputMW fill:#e1f5fe
|
|
420
|
-
style OutputMW fill:#f3e5f5
|
|
421
|
-
style AI fill:#fff3e0
|
|
422
|
-
style App fill:#e8f5e8
|
|
423
|
-
```
|
|
222
|
+
- ✅ Direct prompt injection
|
|
223
|
+
- ✅ Tool response poisoning
|
|
224
|
+
- ✅ Data exfiltration via URLs
|
|
225
|
+
- ✅ Encoded attacks (base64/hex)
|
|
226
|
+
- ✅ Cascading exploits
|
|
227
|
+
- ✅ Context poisoning
|
|
424
228
|
|
|
425
|
-
|
|
229
|
+
See [MCP Security documentation](#mcp-security-guardrails-advanced) for full details.
|
|
426
230
|
|
|
427
|
-
|
|
231
|
+
## Advanced Features
|
|
428
232
|
|
|
429
|
-
###
|
|
233
|
+
### Custom Guardrails
|
|
430
234
|
|
|
431
|
-
|
|
235
|
+
Create domain-specific guardrails:
|
|
432
236
|
|
|
433
|
-
```
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
execute: async ({ metadata }) => {
|
|
437
|
-
const userId = metadata?.userId ?? 'anonymous';
|
|
438
|
-
const allowed = await checkRateLimit(userId); // Your rate-limiting logic
|
|
237
|
+
```ts
|
|
238
|
+
import { defineInputGuardrail, defineOutputGuardrail } from 'ai-sdk-guardrails';
|
|
239
|
+
import { extractContent } from 'ai-sdk-guardrails/guardrails/output';
|
|
439
240
|
|
|
440
|
-
|
|
241
|
+
// Custom input guardrail
|
|
242
|
+
const businessHours = defineInputGuardrail({
|
|
243
|
+
name: 'business-hours',
|
|
244
|
+
execute: async () => {
|
|
245
|
+
const hour = new Date().getHours();
|
|
246
|
+
return hour >= 9 && hour <= 17
|
|
441
247
|
? { tripwireTriggered: false }
|
|
442
|
-
: {
|
|
443
|
-
tripwireTriggered: true,
|
|
444
|
-
message: `Rate limit exceeded for user: ${userId}`,
|
|
445
|
-
};
|
|
248
|
+
: { tripwireTriggered: true, message: 'Outside business hours' };
|
|
446
249
|
},
|
|
447
250
|
});
|
|
448
|
-
```
|
|
449
|
-
|
|
450
|
-
### LLM-as-Judge for Quality Scoring
|
|
451
251
|
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
const qualityJudge = defineOutputGuardrail({
|
|
456
|
-
name: 'llm-quality-judge',
|
|
252
|
+
// Custom output guardrail
|
|
253
|
+
const minQuality = defineOutputGuardrail({
|
|
254
|
+
name: 'min-quality',
|
|
457
255
|
execute: async ({ result }) => {
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
model: openai('gpt-3.5-turbo'),
|
|
461
|
-
prompt: `Is the following response helpful and safe? Answer YES or NO. \n\nResponse: "${result.text}"`,
|
|
462
|
-
});
|
|
463
|
-
|
|
464
|
-
const isSafe = judgement.text.includes('YES');
|
|
465
|
-
return isSafe
|
|
256
|
+
const { text } = extractContent(result);
|
|
257
|
+
return text.length >= 100
|
|
466
258
|
? { tripwireTriggered: false }
|
|
467
|
-
: {
|
|
468
|
-
tripwireTriggered: true,
|
|
469
|
-
message: `Output failed LLM-as-judge quality check.`,
|
|
470
|
-
metadata: { originalText: result.text },
|
|
471
|
-
};
|
|
259
|
+
: { tripwireTriggered: true, message: 'Response too short' };
|
|
472
260
|
},
|
|
473
261
|
});
|
|
474
|
-
```
|
|
475
262
|
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
import { extractTextContent } from 'ai-sdk-guardrails/guardrails/input';
|
|
480
|
-
|
|
481
|
-
const comprehensiveInputGuard = defineInputGuardrail({
|
|
482
|
-
name: 'comprehensive-input-validation',
|
|
483
|
-
execute: async (context) => {
|
|
484
|
-
const { prompt } = extractTextContent(context);
|
|
485
|
-
|
|
486
|
-
// Length validation
|
|
487
|
-
if (prompt.length < 10) {
|
|
488
|
-
return {
|
|
489
|
-
tripwireTriggered: true,
|
|
490
|
-
message: 'Input too short - likely to produce low-value response',
|
|
491
|
-
severity: 'medium',
|
|
492
|
-
suggestion: 'Please provide more detailed input for better results',
|
|
493
|
-
};
|
|
494
|
-
}
|
|
495
|
-
|
|
496
|
-
if (prompt.length > 4000) {
|
|
497
|
-
return {
|
|
498
|
-
tripwireTriggered: true,
|
|
499
|
-
message: 'Input too long - may exceed token limits',
|
|
500
|
-
severity: 'high',
|
|
501
|
-
suggestion: 'Break your request into smaller, focused parts',
|
|
502
|
-
};
|
|
503
|
-
}
|
|
504
|
-
|
|
505
|
-
// Content quality checks
|
|
506
|
-
const spamPatterns = [
|
|
507
|
-
/^(.)\1{10,}$/, // Repeated characters
|
|
508
|
-
/^(test|hello|hi|hey)$/i, // Common spam words
|
|
509
|
-
];
|
|
510
|
-
|
|
511
|
-
const foundSpam = spamPatterns.find((pattern) => pattern.test(prompt));
|
|
512
|
-
if (foundSpam) {
|
|
513
|
-
return {
|
|
514
|
-
tripwireTriggered: true,
|
|
515
|
-
message: 'Low-quality input detected',
|
|
516
|
-
severity: 'high',
|
|
517
|
-
};
|
|
518
|
-
}
|
|
519
|
-
|
|
520
|
-
return { tripwireTriggered: false };
|
|
521
|
-
},
|
|
263
|
+
const model = withGuardrails(openai('gpt-4o'), {
|
|
264
|
+
inputGuardrails: [businessHours],
|
|
265
|
+
outputGuardrails: [minQuality],
|
|
522
266
|
});
|
|
523
267
|
```
|
|
524
268
|
|
|
525
|
-
###
|
|
269
|
+
### Auto-Retry on Failures
|
|
526
270
|
|
|
527
|
-
|
|
528
|
-
import { extractContent } from 'ai-sdk-guardrails/guardrails/output';
|
|
271
|
+
Automatically retry when output doesn't meet requirements:
|
|
529
272
|
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
}
|
|
556
|
-
|
|
557
|
-
// Check for excessive repetition
|
|
558
|
-
const sentences = text.split(/[.!?]+/).filter((s) => s.trim());
|
|
559
|
-
const uniqueSentences = new Set(
|
|
560
|
-
sentences.map((s) => s.trim().toLowerCase()),
|
|
561
|
-
);
|
|
562
|
-
const repetitionRatio = uniqueSentences.size / sentences.length;
|
|
563
|
-
|
|
564
|
-
if (sentences.length > 3 && repetitionRatio < 0.6) {
|
|
565
|
-
qualityIssues.push('Excessive repetition detected');
|
|
566
|
-
}
|
|
567
|
-
|
|
568
|
-
if (qualityIssues.length > 0) {
|
|
569
|
-
return {
|
|
570
|
-
tripwireTriggered: true,
|
|
571
|
-
message: `Quality issues found: ${qualityIssues.join(', ')}`,
|
|
572
|
-
severity: 'medium',
|
|
573
|
-
suggestion: 'Request a more professional, complete response',
|
|
574
|
-
metadata: {
|
|
575
|
-
issues: qualityIssues,
|
|
576
|
-
quality_score: repetitionRatio,
|
|
577
|
-
},
|
|
578
|
-
};
|
|
579
|
-
}
|
|
580
|
-
|
|
581
|
-
return { tripwireTriggered: false };
|
|
273
|
+
```ts
|
|
274
|
+
import {
|
|
275
|
+
wrapWithOutputGuardrails,
|
|
276
|
+
minLengthRequirement,
|
|
277
|
+
} from 'ai-sdk-guardrails';
|
|
278
|
+
|
|
279
|
+
const model = wrapWithOutputGuardrails(
|
|
280
|
+
openai('gpt-4o'),
|
|
281
|
+
[minLengthRequirement(100)],
|
|
282
|
+
{
|
|
283
|
+
retry: {
|
|
284
|
+
maxRetries: 2,
|
|
285
|
+
buildRetryParams: ({ lastParams }) => ({
|
|
286
|
+
...lastParams,
|
|
287
|
+
// Increase max tokens on retry
|
|
288
|
+
maxOutputTokens: (lastParams.maxOutputTokens ?? 400) + 200,
|
|
289
|
+
// Add context about the failure
|
|
290
|
+
prompt: [
|
|
291
|
+
...lastParams.prompt,
|
|
292
|
+
{
|
|
293
|
+
role: 'user',
|
|
294
|
+
content: 'Please provide a more detailed response.',
|
|
295
|
+
},
|
|
296
|
+
],
|
|
297
|
+
}),
|
|
298
|
+
},
|
|
582
299
|
},
|
|
583
|
-
|
|
300
|
+
);
|
|
584
301
|
```
|
|
585
302
|
|
|
586
|
-
|
|
303
|
+
### Reusable Configurations
|
|
587
304
|
|
|
588
|
-
|
|
305
|
+
Create reusable guardrail sets:
|
|
589
306
|
|
|
590
|
-
```
|
|
591
|
-
import {
|
|
307
|
+
```ts
|
|
308
|
+
import {
|
|
309
|
+
createGuardrails,
|
|
310
|
+
piiDetector,
|
|
311
|
+
sensitiveDataFilter,
|
|
312
|
+
} from 'ai-sdk-guardrails';
|
|
592
313
|
|
|
593
|
-
|
|
594
|
-
|
|
314
|
+
// Define once
|
|
315
|
+
const productionGuards = createGuardrails({
|
|
316
|
+
inputGuardrails: [piiDetector()],
|
|
317
|
+
outputGuardrails: [sensitiveDataFilter()],
|
|
318
|
+
throwOnBlocked: true,
|
|
595
319
|
});
|
|
596
320
|
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
321
|
+
// Apply to multiple models
|
|
322
|
+
const gpt4 = productionGuards(openai('gpt-4o'));
|
|
323
|
+
const claude = productionGuards(anthropic('claude-3-sonnet'));
|
|
324
|
+
```
|
|
601
325
|
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
}
|
|
326
|
+
### Streaming Modes
|
|
327
|
+
|
|
328
|
+
Control when guardrails run during streaming:
|
|
606
329
|
|
|
607
|
-
|
|
330
|
+
```ts
|
|
331
|
+
const model = withGuardrails(openai('gpt-4o'), {
|
|
332
|
+
outputGuardrails: [minLengthRequirement(100)],
|
|
333
|
+
streamMode: 'progressive', // Run guardrails as tokens arrive
|
|
334
|
+
replaceOnBlocked: true, // Replace blocked output with fallback
|
|
335
|
+
});
|
|
608
336
|
```
|
|
609
337
|
|
|
610
|
-
|
|
338
|
+
- `buffer` (default): Wait for stream to complete, then check
|
|
339
|
+
- `progressive`: Check guardrails as tokens arrive (early termination)
|
|
611
340
|
|
|
612
|
-
|
|
341
|
+
### Agent Support
|
|
342
|
+
|
|
343
|
+
Guardrails work with AI SDK Agents:
|
|
613
344
|
|
|
614
345
|
```ts
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
}
|
|
346
|
+
import { withAgentGuardrails } from 'ai-sdk-guardrails';
|
|
347
|
+
import { tool } from 'ai';
|
|
348
|
+
|
|
349
|
+
const agent = withAgentGuardrails(
|
|
350
|
+
{
|
|
351
|
+
model: openai('gpt-4o'),
|
|
352
|
+
tools: { search: searchTool },
|
|
353
|
+
system: 'You are a helpful assistant.',
|
|
354
|
+
},
|
|
355
|
+
{
|
|
356
|
+
inputGuardrails: [piiDetector()],
|
|
357
|
+
outputGuardrails: [sensitiveDataFilter()],
|
|
358
|
+
toolGuardrails: [
|
|
359
|
+
toolEgressPolicy({
|
|
360
|
+
allowedHosts: ['api.company.com'],
|
|
361
|
+
scanForUrls: true,
|
|
362
|
+
}),
|
|
363
|
+
],
|
|
364
|
+
},
|
|
365
|
+
);
|
|
366
|
+
|
|
367
|
+
const result = await agent.generate({ prompt: '...' });
|
|
622
368
|
```
|
|
623
369
|
|
|
624
|
-
|
|
370
|
+
## MCP Security Guardrails (Advanced)
|
|
371
|
+
|
|
372
|
+
**Production-Ready**: Protect against the ["lethal trifecta" vulnerability](https://simonwillison.net/2025/Jun/16/the-lethal-trifecta/) when using Model Context Protocol (MCP) tools.
|
|
625
373
|
|
|
626
|
-
|
|
627
|
-
- with `replaceOnBlocked: true`, a placeholder message is streamed and the stream ends.
|
|
628
|
-
- otherwise, the original chunks continue (with a callback via `onOutputBlocked`).
|
|
374
|
+
### The Problem
|
|
629
375
|
|
|
630
|
-
|
|
376
|
+
AI agents with MCP tools are vulnerable when they have:
|
|
631
377
|
|
|
632
|
-
|
|
378
|
+
1. **Access to private data** (through tools)
|
|
379
|
+
2. **Process untrusted content** (from tool responses)
|
|
380
|
+
3. **Can communicate externally** (make web requests)
|
|
633
381
|
|
|
634
|
-
|
|
635
|
-
- `executionOptions.logLevel`: defaults to `'warn'` (respects `'none' | 'error' | 'warn' | 'info' | 'debug'`).
|
|
636
|
-
- `onInputBlocked` / `onOutputBlocked`: receive a `GuardrailExecutionSummary` with analytics.
|
|
382
|
+
Malicious tool responses can contain hidden instructions that trick the AI into exfiltrating sensitive data.
|
|
637
383
|
|
|
638
|
-
###
|
|
384
|
+
### Production Configuration
|
|
639
385
|
|
|
640
|
-
|
|
386
|
+
Full configurability with sensible defaults:
|
|
641
387
|
|
|
642
388
|
```ts
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
389
|
+
import {
|
|
390
|
+
withGuardrails,
|
|
391
|
+
promptInjectionDetector,
|
|
392
|
+
mcpSecurityGuardrail,
|
|
393
|
+
mcpResponseSanitizer,
|
|
394
|
+
toolEgressPolicy,
|
|
395
|
+
} from 'ai-sdk-guardrails';
|
|
396
|
+
|
|
397
|
+
// Conservative production setup (high security)
|
|
398
|
+
const secureModel = withGuardrails(openai('gpt-4o'), {
|
|
399
|
+
inputGuardrails: [
|
|
400
|
+
promptInjectionDetector({ threshold: 0.6, includeExamples: true }),
|
|
401
|
+
],
|
|
402
|
+
outputGuardrails: [
|
|
403
|
+
mcpSecurityGuardrail({
|
|
404
|
+
injectionThreshold: 0.5, // Lower = more sensitive
|
|
405
|
+
maxSuspiciousUrls: 0, // Zero tolerance
|
|
406
|
+
maxContentSize: 25600, // 25KB limit
|
|
407
|
+
minEncodedLength: 15, // Detect shorter encoded attacks
|
|
408
|
+
encodedInjectionThreshold: 0.2, // Combined threshold
|
|
409
|
+
highRiskThreshold: 0.3, // High-risk cascade blocking
|
|
410
|
+
authorityThreshold: 0.5, // Authority manipulation detection
|
|
411
|
+
allowedDomains: ['api.company.com', 'trusted-partner.com'],
|
|
412
|
+
customSuspiciousDomains: ['evil.com'],
|
|
413
|
+
blockCascadingCalls: true,
|
|
414
|
+
scanEncodedContent: true,
|
|
415
|
+
detectExfiltration: true,
|
|
416
|
+
}),
|
|
417
|
+
mcpResponseSanitizer(), // Clean vs block
|
|
418
|
+
toolEgressPolicy({
|
|
419
|
+
allowedHosts: ['api.company.com'],
|
|
420
|
+
blockedHosts: ['webhook.site', 'requestcatcher.com'],
|
|
421
|
+
scanForUrls: true,
|
|
422
|
+
}),
|
|
423
|
+
],
|
|
649
424
|
});
|
|
425
|
+
```
|
|
426
|
+
|
|
427
|
+
### Environment-Based Configuration
|
|
650
428
|
|
|
651
|
-
|
|
429
|
+
```ts
|
|
430
|
+
function getSecurityConfig(env: 'production' | 'staging' | 'development') {
|
|
431
|
+
const configs = {
|
|
432
|
+
production: {
|
|
433
|
+
injectionThreshold: 0.5, // High security
|
|
434
|
+
maxContentSize: 25600, // 25KB
|
|
435
|
+
authorityThreshold: 0.5,
|
|
436
|
+
},
|
|
437
|
+
staging: {
|
|
438
|
+
injectionThreshold: 0.7, // Balanced
|
|
439
|
+
maxContentSize: 51200, // 50KB
|
|
440
|
+
authorityThreshold: 0.7,
|
|
441
|
+
},
|
|
442
|
+
development: {
|
|
443
|
+
injectionThreshold: 0.8, // Permissive
|
|
444
|
+
maxContentSize: 102400, // 100KB
|
|
445
|
+
authorityThreshold: 0.8,
|
|
446
|
+
},
|
|
447
|
+
};
|
|
448
|
+
return configs[env];
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
const model = withGuardrails(openai('gpt-4o'), {
|
|
452
|
+
outputGuardrails: [mcpSecurityGuardrail(getSecurityConfig('production'))],
|
|
453
|
+
});
|
|
652
454
|
```
|
|
653
455
|
|
|
654
|
-
|
|
456
|
+
### Configuration Options
|
|
655
457
|
|
|
656
|
-
|
|
458
|
+
| Option | Default | Description |
|
|
459
|
+
| --------------------------- | ------- | ------------------------------------------------ |
|
|
460
|
+
| `injectionThreshold` | 0.7 | Prompt injection confidence threshold (0-1) |
|
|
461
|
+
| `maxSuspiciousUrls` | 0 | Max allowed suspicious URLs (0 = zero tolerance) |
|
|
462
|
+
| `maxContentSize` | 51200 | Max content size in bytes (50KB default) |
|
|
463
|
+
| `minEncodedLength` | 20 | Min encoded content length to analyze |
|
|
464
|
+
| `encodedInjectionThreshold` | 0.3 | Combined encoded + injection threshold |
|
|
465
|
+
| `authorityThreshold` | 0.7 | Authority manipulation detection sensitivity |
|
|
466
|
+
| `allowedDomains` | [] | Allowed domains for URL construction |
|
|
467
|
+
| `customSuspiciousDomains` | [] | Additional suspicious domain patterns |
|
|
657
468
|
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
469
|
+
See complete examples:
|
|
470
|
+
|
|
471
|
+
- [Production MCP Configuration](./examples/44-production-mcp-config.ts)
|
|
472
|
+
- [MCP Security Test Suite](./examples/41-mcp-security-test.ts)
|
|
473
|
+
- [Enhanced Security Testing](./examples/43-enhanced-mcp-security-test.ts)
|
|
474
|
+
|
|
475
|
+
## Error Handling
|
|
476
|
+
|
|
477
|
+
### Throw Errors on Block
|
|
478
|
+
|
|
479
|
+
```ts
|
|
480
|
+
const model = withGuardrails(openai('gpt-4o'), {
|
|
481
|
+
inputGuardrails: [piiDetector()],
|
|
482
|
+
throwOnBlocked: true, // Throw errors instead of silent blocking
|
|
483
|
+
});
|
|
661
484
|
|
|
662
485
|
try {
|
|
663
|
-
const
|
|
664
|
-
model: guardedModel,
|
|
665
|
-
prompt: 'A prompt that might be blocked...',
|
|
666
|
-
});
|
|
486
|
+
const { text } = await generateText({ model, prompt: '...' });
|
|
667
487
|
} catch (error) {
|
|
668
488
|
if (isGuardrailsError(error)) {
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
console.error('Triggered Guards:', error.results);
|
|
672
|
-
} else {
|
|
673
|
-
// Some other error occurred
|
|
674
|
-
console.error('An unexpected error occurred:', error);
|
|
489
|
+
console.error('Blocked:', error.message);
|
|
490
|
+
// error.results gives details per guardrail
|
|
675
491
|
}
|
|
676
492
|
}
|
|
677
493
|
```
|
|
678
494
|
|
|
679
|
-
###
|
|
680
|
-
|
|
681
|
-
Transform technical guardrail messages into user-friendly guidance:
|
|
495
|
+
### Error Types
|
|
682
496
|
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
497
|
+
- `GuardrailsInputError` - Input guardrail blocked
|
|
498
|
+
- `GuardrailsOutputError` - Output guardrail blocked
|
|
499
|
+
- `GuardrailExecutionError` - Guardrail threw an error
|
|
500
|
+
- `GuardrailTimeoutError` - Guardrail exceeded timeout
|
|
501
|
+
- `GuardrailConfigurationError` - Invalid configuration
|
|
686
502
|
|
|
687
|
-
|
|
688
|
-
case 'content-length-limit':
|
|
689
|
-
return 'Your message is too long. Please keep it under 500 characters for the best response.';
|
|
503
|
+
## API Reference
|
|
690
504
|
|
|
691
|
-
|
|
692
|
-
return "I can't help with that topic. Try asking about something else I can assist with.";
|
|
505
|
+
### Primary Functions
|
|
693
506
|
|
|
694
|
-
|
|
695
|
-
|
|
507
|
+
| Function | Purpose |
|
|
508
|
+
| ------------------------- | ---------------------------------------- |
|
|
509
|
+
| `withGuardrails` | Wrap model with guardrails (main API) |
|
|
510
|
+
| `createGuardrails` | Create reusable guardrail configurations |
|
|
511
|
+
| `withAgentGuardrails` | Wrap AI SDK Agents with guardrails |
|
|
512
|
+
| `defineInputGuardrail` | Create custom input guardrail |
|
|
513
|
+
| `defineOutputGuardrail` | Create custom output guardrail |
|
|
514
|
+
| `executeInputGuardrails` | Run input guardrails programmatically |
|
|
515
|
+
| `executeOutputGuardrails` | Run output guardrails programmatically |
|
|
696
516
|
|
|
697
|
-
|
|
698
|
-
return (
|
|
699
|
-
guardrailResult.suggestion ||
|
|
700
|
-
'Please refine your request and try again.'
|
|
701
|
-
);
|
|
702
|
-
}
|
|
703
|
-
}
|
|
704
|
-
```
|
|
517
|
+
### Error Utilities
|
|
705
518
|
|
|
706
|
-
|
|
519
|
+
| Function | Purpose |
|
|
520
|
+
| ------------------- | ------------------------------------ |
|
|
521
|
+
| `isGuardrailsError` | Check if error is from guardrails |
|
|
522
|
+
| `extractErrorInfo` | Extract structured error information |
|
|
707
523
|
|
|
708
|
-
|
|
524
|
+
### Retry Utilities
|
|
709
525
|
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
console.log('Input blocked:', executionSummary.blockedResults[0]?.message);
|
|
526
|
+
| Function | Purpose |
|
|
527
|
+
| ---------------------------- | --------------------------------- |
|
|
528
|
+
| `retry` | Standalone retry utility |
|
|
529
|
+
| `exponentialBackoff` | Exponential backoff strategy |
|
|
530
|
+
| `linearBackoff` | Linear backoff strategy |
|
|
531
|
+
| `jitteredExponentialBackoff` | Jittered exponential backoff |
|
|
532
|
+
| `backoffPresets` | Pre-configured backoff strategies |
|
|
718
533
|
|
|
719
|
-
|
|
720
|
-
console.log(`Execution time: ${executionSummary.totalExecutionTime}ms`);
|
|
721
|
-
console.log(
|
|
722
|
-
`Guardrails: ${executionSummary.stats.blocked} blocked, ${executionSummary.stats.passed} passed`,
|
|
723
|
-
);
|
|
724
|
-
},
|
|
725
|
-
onOutputBlocked: (executionSummary) => {
|
|
726
|
-
console.log(
|
|
727
|
-
'Output filtered:',
|
|
728
|
-
executionSummary.blockedResults[0]?.message,
|
|
729
|
-
);
|
|
730
|
-
|
|
731
|
-
// Track comprehensive metrics
|
|
732
|
-
analytics.track('output_blocked', {
|
|
733
|
-
severity: executionSummary.blockedResults[0]?.severity,
|
|
734
|
-
totalGuardrails: executionSummary.guardrailsExecuted,
|
|
735
|
-
executionTime: executionSummary.totalExecutionTime,
|
|
736
|
-
});
|
|
737
|
-
},
|
|
738
|
-
});
|
|
534
|
+
See source for all built-in guardrails:
|
|
739
535
|
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
});
|
|
745
|
-
|
|
746
|
-
const objectResult = await generateObject({
|
|
747
|
-
model: productionModel,
|
|
748
|
-
prompt: 'Create a user profile',
|
|
749
|
-
schema: userProfileSchema,
|
|
750
|
-
});
|
|
751
|
-
|
|
752
|
-
const textStream = await streamText({
|
|
753
|
-
model: productionModel,
|
|
754
|
-
prompt: 'Explain our product features',
|
|
755
|
-
});
|
|
756
|
-
```
|
|
536
|
+
- Input helpers: [`./src/guardrails/input.ts`](./src/guardrails/input.ts)
|
|
537
|
+
- Output helpers: [`./src/guardrails/output.ts`](./src/guardrails/output.ts)
|
|
538
|
+
- Tool helpers: [`./src/guardrails/tools.ts`](./src/guardrails/tools.ts)
|
|
539
|
+
- MCP security: [`./src/guardrails/mcp-security.ts`](./src/guardrails/mcp-security.ts)
|
|
757
540
|
|
|
758
541
|
## Examples
|
|
759
542
|
|
|
760
|
-
|
|
543
|
+
Browse 48+ runnable examples: [examples/README.md](./examples/README.md)
|
|
761
544
|
|
|
762
|
-
###
|
|
545
|
+
### Quick Starts
|
|
763
546
|
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
-
|
|
769
|
-
-
|
|
770
|
-
- **[Blocking vs Warning](examples/08-blocking-vs-warning.ts)** - Compare blocking and warning modes with error handling
|
|
547
|
+
| Example | Description | File |
|
|
548
|
+
| -------------------------- | ------------------------------- | --------------------------------------------------------------------------------- |
|
|
549
|
+
| Simple combined protection | Minimal input and output setup | [07a-simple-combined-protection.ts](./examples/07a-simple-combined-protection.ts) |
|
|
550
|
+
| Auto retry on output | Retry until output meets a rule | [32-auto-retry-output.ts](./examples/32-auto-retry-output.ts) |
|
|
551
|
+
| LLM judge auto-retry | Judge feedback drives retry | [35-judge-auto-retry.ts](./examples/35-judge-auto-retry.ts) |
|
|
552
|
+
| Weather assistant | End-to-end input/output + retry | [33-blog-post-weather-assistant.ts](./examples/33-blog-post-weather-assistant.ts) |
|
|
771
553
|
|
|
772
|
-
###
|
|
554
|
+
### Input Safety
|
|
773
555
|
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
-
|
|
780
|
-
- **[Secret Leakage Scan](examples/18-secret-leakage-scan.ts)** - Secret leakage scanning with automatic redaction and entropy calculation
|
|
781
|
-
- **[Jailbreak Detection](examples/30-jailbreak-detection.ts)** - Jailbreak detection with safe response templates and pattern recognition
|
|
556
|
+
| Example | Description | File |
|
|
557
|
+
| ------------------ | ----------------------------------- | --------------------------------------------------------------- |
|
|
558
|
+
| Input length limit | Enforce max input length | [01-input-length-limit.ts](./examples/01-input-length-limit.ts) |
|
|
559
|
+
| Blocked keywords | Block specific terms | [02-blocked-keywords.ts](./examples/02-blocked-keywords.ts) |
|
|
560
|
+
| PII detection | Detect PII before calling the model | [03-pii-detection.ts](./examples/03-pii-detection.ts) |
|
|
561
|
+
| Rate limiting | Simple per-user rate limit | [13-rate-limiting.ts](./examples/13-rate-limiting.ts) |
|
|
782
562
|
|
|
783
|
-
###
|
|
563
|
+
### Output Safety
|
|
784
564
|
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
- **[Response Consistency](examples/22-response-consistency.ts)** - Response consistency validation and coherence checking
|
|
565
|
+
| Example | Description | File |
|
|
566
|
+
| ----------------------- | ----------------------------------- | ------------------------------------------------------------------------- |
|
|
567
|
+
| Output length check | Require min/max output length | [04-output-length-check.ts](./examples/04-output-length-check.ts) |
|
|
568
|
+
| Sensitive output filter | Filter secrets and PII in responses | [05-sensitive-output-filter.ts](./examples/05-sensitive-output-filter.ts) |
|
|
569
|
+
| Hallucination detection | Flag uncertain factual claims | [19-hallucination-detection.ts](./examples/19-hallucination-detection.ts) |
|
|
791
570
|
|
|
792
|
-
###
|
|
571
|
+
### Streaming
|
|
793
572
|
|
|
794
|
-
|
|
795
|
-
|
|
573
|
+
| Example | Description | File |
|
|
574
|
+
| ----------------- | ---------------------------------- | --------------------------------------------------------------------------------- |
|
|
575
|
+
| Streaming limits | Apply limits in buffered streaming | [11-streaming-limits.ts](./examples/11-streaming-limits.ts) |
|
|
576
|
+
| Streaming quality | Quality checks with streaming | [12-streaming-quality.ts](./examples/12-streaming-quality.ts) |
|
|
577
|
+
| Early termination | Stop streams early when blocked | [28-streaming-early-termination.ts](./examples/28-streaming-early-termination.ts) |
|
|
796
578
|
|
|
797
|
-
###
|
|
579
|
+
### Advanced
|
|
798
580
|
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
581
|
+
| Example | Description | File |
|
|
582
|
+
| -------------------------- | ----------------------------- | ------------------------------------------------------------------------------- |
|
|
583
|
+
| Simple quality judge | Cheaper model judges quality | [15a-simple-quality-judge.ts](./examples/15a-simple-quality-judge.ts) |
|
|
584
|
+
| Secret leakage scan | Scan responses for secrets | [18-secret-leakage-scan.ts](./examples/18-secret-leakage-scan.ts) |
|
|
585
|
+
| SQL code safety | Basic SQL safety checks | [24-sql-code-safety.ts](./examples/24-sql-code-safety.ts) |
|
|
586
|
+
| Role hierarchy enforcement | Enforce role rules in prompts | [23-role-hierarchy-enforcement.ts](./examples/23-role-hierarchy-enforcement.ts) |
|
|
802
587
|
|
|
803
|
-
|
|
588
|
+
## Migration from v3.x
|
|
804
589
|
|
|
805
|
-
|
|
590
|
+
API naming has been improved in v4.x (old names still work but are deprecated):
|
|
806
591
|
|
|
807
|
-
|
|
592
|
+
```ts
|
|
593
|
+
// Before (v3.x - still works but deprecated)
|
|
594
|
+
import { wrapWithGuardrails, InputBlockedError } from 'ai-sdk-guardrails';
|
|
595
|
+
const model = wrapWithGuardrails(openai('gpt-4o'), { ... });
|
|
808
596
|
|
|
809
|
-
|
|
810
|
-
|
|
597
|
+
// After (v4.x - recommended)
|
|
598
|
+
import { withGuardrails, GuardrailsInputError } from 'ai-sdk-guardrails';
|
|
599
|
+
const model = withGuardrails(openai('gpt-4o'), { ... });
|
|
600
|
+
```
|
|
811
601
|
|
|
812
|
-
|
|
602
|
+
Changes:
|
|
813
603
|
|
|
814
|
-
-
|
|
815
|
-
-
|
|
604
|
+
- `wrapWithGuardrails` → `withGuardrails`
|
|
605
|
+
- `wrapAgentWithGuardrails` → `withAgentGuardrails`
|
|
606
|
+
- `InputBlockedError` → `GuardrailsInputError`
|
|
607
|
+
- `OutputBlockedError` → `GuardrailsOutputError`
|
|
816
608
|
|
|
817
|
-
|
|
609
|
+
## Compatibility
|
|
818
610
|
|
|
819
|
-
- **
|
|
820
|
-
- **
|
|
821
|
-
- **
|
|
611
|
+
- **Runtime**: Node.js 18+ recommended
|
|
612
|
+
- **AI SDK**: Compatible with AI SDK 5.x (`ai@^5`)
|
|
613
|
+
- **TypeScript**: Full type safety with TypeScript 5+
|
|
614
|
+
- **Works with any model**: OpenAI, Anthropic, Mistral, Groq, etc.
|
|
822
615
|
|
|
823
|
-
|
|
616
|
+
## Why This Library?
|
|
824
617
|
|
|
825
|
-
-
|
|
618
|
+
**Non-invasive**: Guardrails are middleware. Your existing code, telemetry (Langfuse, Helicone), and logging stay intact.
|
|
826
619
|
|
|
827
|
-
|
|
620
|
+
**Production-ready**: Used in production by teams who need compliance, security, and cost control without rebuilding their infrastructure.
|
|
828
621
|
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
# Run core foundation examples
|
|
834
|
-
tsx examples/01-input-length-limit.ts # Basic input validation
|
|
835
|
-
tsx examples/02-blocked-keywords.ts # Keyword blocking
|
|
836
|
-
tsx examples/04-output-length-check.ts # Output length validation
|
|
837
|
-
tsx examples/06-quality-assessment.ts # Quality assessment
|
|
838
|
-
tsx examples/07-combined-protection.ts # Combined input/output protection
|
|
839
|
-
tsx examples/07a-simple-combined-protection.ts # Simplified combined protection
|
|
840
|
-
tsx examples/08-blocking-vs-warning.ts # Blocking vs warning modes
|
|
841
|
-
|
|
842
|
-
# Run security examples
|
|
843
|
-
tsx examples/03-pii-detection.ts # PII protection
|
|
844
|
-
tsx examples/05-sensitive-output-filter.ts # Sensitive output filtering
|
|
845
|
-
tsx examples/16-prompt-injection-detection.ts # Prompt injection protection
|
|
846
|
-
tsx examples/17-tool-call-validation.ts # Tool call validation
|
|
847
|
-
tsx examples/17a-basic-tool-allowlist.ts # Basic tool allowlisting
|
|
848
|
-
tsx examples/17b-tool-parameter-validation.ts # Tool parameter validation
|
|
849
|
-
tsx examples/18-secret-leakage-scan.ts # Secret leakage prevention
|
|
850
|
-
tsx examples/30-jailbreak-detection.ts # Jailbreak detection
|
|
851
|
-
|
|
852
|
-
# Run content quality examples
|
|
853
|
-
tsx examples/31-autoevals-guardrails.ts # AI-powered quality evaluation with Autoevals
|
|
854
|
-
tsx examples/14-business-logic.ts # Business-specific rules
|
|
855
|
-
tsx examples/15-llm-as-judge.ts # AI-powered quality control
|
|
856
|
-
tsx examples/15a-simple-quality-judge.ts # Simplified quality assessment
|
|
857
|
-
tsx examples/19-hallucination-detection.ts # Hallucination detection
|
|
858
|
-
tsx examples/22-response-consistency.ts # Response consistency
|
|
859
|
-
|
|
860
|
-
# Run compliance examples
|
|
861
|
-
tsx examples/21-regulated-advice-compliance.ts # Regulatory compliance
|
|
862
|
-
tsx examples/23-role-hierarchy-enforcement.ts # Role hierarchy enforcement
|
|
863
|
-
|
|
864
|
-
# Run data integrity examples
|
|
865
|
-
tsx examples/09-schema-validation.ts # Schema validation
|
|
866
|
-
tsx examples/10-object-content-filter.ts # Object content filtering
|
|
867
|
-
tsx examples/24-sql-code-safety.ts # SQL code safety
|
|
868
|
-
|
|
869
|
-
# Run network security examples
|
|
870
|
-
tsx examples/25-browsing-domain-allowlist.ts # Domain allowlisting
|
|
871
|
-
|
|
872
|
-
# Run privacy examples
|
|
873
|
-
tsx examples/26-memory-minimization.ts # Memory minimization
|
|
874
|
-
tsx examples/27-logging-redaction.ts # Logging redaction
|
|
875
|
-
|
|
876
|
-
# Run safety examples
|
|
877
|
-
tsx examples/20-human-review-escalation.ts # Human review escalation
|
|
878
|
-
tsx examples/29-toxicity-harassment-deescalation.ts # Toxicity de-escalation
|
|
879
|
-
|
|
880
|
-
# Run streaming examples
|
|
881
|
-
tsx examples/11-streaming-limits.ts # Streaming limits
|
|
882
|
-
tsx examples/12-streaming-quality.ts # Streaming quality monitoring
|
|
883
|
-
tsx examples/28-streaming-early-termination.ts # Streaming early termination
|
|
884
|
-
|
|
885
|
-
# Run resource management examples
|
|
886
|
-
tsx examples/13-rate-limiting.ts # Rate limiting
|
|
887
|
-
```
|
|
622
|
+
**Developer experience**: One line to add safety. Progressive complexity - start simple, add advanced features when needed.
|
|
623
|
+
|
|
624
|
+
**Type-safe**: Rich TypeScript types and inference throughout.
|
|
888
625
|
|
|
889
|
-
##
|
|
626
|
+
## Contributing
|
|
890
627
|
|
|
891
|
-
|
|
628
|
+
Issues and PRs are welcome.
|
|
892
629
|
|
|
893
|
-
##
|
|
630
|
+
## License
|
|
894
631
|
|
|
895
|
-
MIT ©
|
|
632
|
+
MIT © Jag Reehal. See [LICENSE](./LICENSE) for details.
|