ai-sdk-guardrails 5.0.0 → 5.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +345 -436
- package/package.json +14 -14
package/README.md
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
1
|
# AI SDK Guardrails
|
|
2
2
|
|
|
3
|
-
**
|
|
3
|
+
**Safety and quality controls for Vercel AI SDK**
|
|
4
4
|
|
|
5
|
-
Add
|
|
6
|
-
|
|
7
|
-
**Now includes MCP (Model Context Protocol) security guardrails** to help protect against attacks when using AI tools.
|
|
5
|
+
Add guardrails to your AI applications in one line of code. Block PII, prevent prompt injection, enforce output quality - while keeping your existing telemetry and observability stack intact.
|
|
8
6
|
|
|
9
7
|
[](https://www.npmjs.com/package/ai-sdk-guardrails)
|
|
10
8
|
[](https://www.npmjs.com/package/ai-sdk-guardrails)
|
|
@@ -14,52 +12,60 @@ Add safety checks and quality controls to your AI applications. Guard against pr
|
|
|
14
12
|
|
|
15
13
|

|
|
16
14
|
|
|
17
|
-
##
|
|
15
|
+
## Drop-in Guardrails for any AI model
|
|
18
16
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
- **Save costs**: Block unnecessary requests before they hit your model
|
|
23
|
-
- **Improve safety**: Detect PII, block harmful content, prevent prompt injection
|
|
24
|
-
- **Better quality**: Enforce minimum response lengths, validate structure, auto-retry on failures
|
|
25
|
-
- **Easy integration**: Works as middleware with any AI SDK model
|
|
17
|
+
```ts
|
|
18
|
+
import { withGuardrails, piiDetector } from 'ai-sdk-guardrails';
|
|
19
|
+
const model = openai('gpt-4o'); // or any other AI model
|
|
26
20
|
|
|
27
|
-
|
|
21
|
+
// Everything else stays the same
|
|
22
|
+
const safeModel = withGuardrails(model, {
|
|
23
|
+
inputGuardrails: [piiDetector()],
|
|
24
|
+
});
|
|
28
25
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
- Prompt injection prevention
|
|
33
|
-
- Tool usage validation
|
|
34
|
-
- Auto-retry on low-quality responses
|
|
26
|
+
// Your existing code, telemetry, and logging still works
|
|
27
|
+
await generateText({ model: safeModel, prompt: '...' });
|
|
28
|
+
```
|
|
35
29
|
|
|
36
|
-
|
|
30
|
+
**That's it.** Your AI now blocks PII automatically.
|
|
37
31
|
|
|
38
|
-
|
|
32
|
+
## Installation
|
|
39
33
|
|
|
40
34
|
```bash
|
|
41
35
|
npm install ai-sdk-guardrails
|
|
42
36
|
```
|
|
43
37
|
|
|
44
|
-
|
|
38
|
+
## Why Guardrails Matter
|
|
39
|
+
|
|
40
|
+
Real problems that guardrails solve:
|
|
41
|
+
|
|
42
|
+
❌ **Without guardrails:**
|
|
45
43
|
|
|
46
44
|
```ts
|
|
47
|
-
|
|
45
|
+
// User: "My email is john@company.com, help me..."
|
|
46
|
+
// → Sends PII to model → Compliance violation → $$$
|
|
48
47
|
```
|
|
49
48
|
|
|
50
|
-
**
|
|
49
|
+
✅ **With guardrails:**
|
|
51
50
|
|
|
52
51
|
```ts
|
|
53
|
-
const
|
|
54
|
-
inputGuardrails: [piiDetector()],
|
|
52
|
+
const model = withGuardrails(baseModel, {
|
|
53
|
+
inputGuardrails: [piiDetector()], // Blocks before API call
|
|
55
54
|
});
|
|
55
|
+
// → Request blocked → No PII leak → No cost → Compliant
|
|
56
56
|
```
|
|
57
57
|
|
|
58
|
-
|
|
58
|
+
Common use cases:
|
|
59
|
+
|
|
60
|
+
- 🛡️ **Compliance**: Block PII before it reaches your model
|
|
61
|
+
- 💰 **Cost control**: Stop bad requests before they cost money
|
|
62
|
+
- 🔒 **Security**: Prevent prompt injection and data exfiltration
|
|
63
|
+
- ✅ **Quality**: Enforce minimum response standards
|
|
64
|
+
- 🔧 **Production**: Works with your existing observability tools
|
|
59
65
|
|
|
60
|
-
##
|
|
66
|
+
## Copy-Paste Examples
|
|
61
67
|
|
|
62
|
-
|
|
68
|
+
### Basic Protection (Most Common)
|
|
63
69
|
|
|
64
70
|
```ts
|
|
65
71
|
import { generateText } from 'ai';
|
|
@@ -68,142 +74,187 @@ import {
|
|
|
68
74
|
withGuardrails,
|
|
69
75
|
piiDetector,
|
|
70
76
|
promptInjectionDetector,
|
|
71
|
-
minLengthRequirement,
|
|
72
|
-
mcpSecurityGuardrail,
|
|
73
77
|
} from 'ai-sdk-guardrails';
|
|
74
78
|
|
|
75
79
|
const model = withGuardrails(openai('gpt-4o'), {
|
|
76
80
|
inputGuardrails: [piiDetector(), promptInjectionDetector()],
|
|
77
|
-
outputGuardrails: [
|
|
78
|
-
minLengthRequirement(160),
|
|
79
|
-
mcpSecurityGuardrail({
|
|
80
|
-
maxContentSize: 51200, // 50KB limit
|
|
81
|
-
injectionThreshold: 0.7, // Configurable sensitivity
|
|
82
|
-
allowedDomains: ['api.company.com'], // Domain allowlist
|
|
83
|
-
}),
|
|
84
|
-
],
|
|
85
81
|
});
|
|
86
82
|
|
|
83
|
+
// Use exactly like before - nothing else changes
|
|
87
84
|
const { text } = await generateText({
|
|
88
85
|
model,
|
|
89
|
-
prompt: 'Write a friendly
|
|
86
|
+
prompt: 'Write a friendly email',
|
|
90
87
|
});
|
|
91
88
|
```
|
|
92
89
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
## Quickstart (30 seconds)
|
|
90
|
+
### Input + Output Protection
|
|
96
91
|
|
|
97
|
-
|
|
92
|
+
```ts
|
|
93
|
+
import {
|
|
94
|
+
withGuardrails,
|
|
95
|
+
piiDetector,
|
|
96
|
+
sensitiveDataFilter,
|
|
97
|
+
minLengthRequirement,
|
|
98
|
+
} from 'ai-sdk-guardrails';
|
|
98
99
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
100
|
+
const model = withGuardrails(openai('gpt-4o'), {
|
|
101
|
+
inputGuardrails: [piiDetector()], // Block PII in prompts
|
|
102
|
+
outputGuardrails: [
|
|
103
|
+
sensitiveDataFilter(), // Remove secrets from responses
|
|
104
|
+
minLengthRequirement(100), // Enforce quality standards
|
|
105
|
+
],
|
|
106
|
+
});
|
|
103
107
|
```
|
|
104
108
|
|
|
105
|
-
|
|
109
|
+
### Works With Streaming
|
|
106
110
|
|
|
107
111
|
```ts
|
|
108
|
-
import {
|
|
109
|
-
import { openai } from '@ai-sdk/openai';
|
|
110
|
-
import { withGuardrails, piiDetector } from 'ai-sdk-guardrails';
|
|
112
|
+
import { streamText } from 'ai';
|
|
111
113
|
|
|
112
114
|
const model = withGuardrails(openai('gpt-4o'), {
|
|
113
|
-
|
|
115
|
+
outputGuardrails: [minLengthRequirement(100)],
|
|
114
116
|
});
|
|
115
117
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
118
|
+
// Streaming just works - guardrails run after stream completes
|
|
119
|
+
const { textStream } = await streamText({ model, prompt: '...' });
|
|
120
|
+
for await (const chunk of textStream) {
|
|
121
|
+
process.stdout.write(chunk);
|
|
122
|
+
}
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Production Setup (With Error Handling)
|
|
126
|
+
|
|
127
|
+
```ts
|
|
128
|
+
import { isGuardrailsError } from 'ai-sdk-guardrails';
|
|
129
|
+
|
|
130
|
+
const model = withGuardrails(openai('gpt-4o'), {
|
|
131
|
+
inputGuardrails: [piiDetector(), promptInjectionDetector()],
|
|
132
|
+
outputGuardrails: [sensitiveDataFilter()],
|
|
133
|
+
throwOnBlocked: true, // Throw errors instead of silent blocking
|
|
119
134
|
});
|
|
135
|
+
|
|
136
|
+
try {
|
|
137
|
+
const { text } = await generateText({ model, prompt: '...' });
|
|
138
|
+
console.log(text);
|
|
139
|
+
} catch (error) {
|
|
140
|
+
if (isGuardrailsError(error)) {
|
|
141
|
+
console.error('Blocked by guardrail:', error.message);
|
|
142
|
+
// Show user-friendly message
|
|
143
|
+
}
|
|
144
|
+
}
|
|
120
145
|
```
|
|
121
146
|
|
|
122
|
-
##
|
|
123
|
-
|
|
124
|
-
- Overview
|
|
125
|
-
- Concepts
|
|
126
|
-
- Installation
|
|
127
|
-
- Usage
|
|
128
|
-
- Define a guardrail
|
|
129
|
-
- Built-in helpers
|
|
130
|
-
- Streaming
|
|
131
|
-
- Auto Retry (utility and middleware)
|
|
132
|
-
- Error Handling
|
|
133
|
-
- API
|
|
134
|
-
- Examples
|
|
135
|
-
- Compatibility
|
|
136
|
-
- Architecture
|
|
137
|
-
- Contributing
|
|
138
|
-
|
|
139
|
-
## API Overview
|
|
147
|
+
## How It Works
|
|
140
148
|
|
|
141
|
-
|
|
149
|
+
Guardrails run **in parallel** with your AI calls as middleware:
|
|
142
150
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
151
|
+
```mermaid
|
|
152
|
+
flowchart LR
|
|
153
|
+
A[Input] --> B[Input Guardrails]
|
|
154
|
+
B -->|✅ Clean| C[AI Model]
|
|
155
|
+
B -->|❌ Blocked| X[No API Call]
|
|
156
|
+
C --> D[Output Guardrails]
|
|
157
|
+
D -->|✅ Clean| E[Response]
|
|
158
|
+
D -->|❌ Blocked| R[Retry/Replace/Block]
|
|
159
|
+
```
|
|
146
160
|
|
|
147
|
-
|
|
161
|
+
**Three-step workflow:**
|
|
148
162
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
163
|
+
1. **Receive**: Input or output arrives
|
|
164
|
+
2. **Check**: Guardrails run (PII detection, validation, etc.)
|
|
165
|
+
3. **Decide**: Pass through, block, or retry
|
|
152
166
|
|
|
153
|
-
|
|
154
|
-
// Before (v3.x - still works but deprecated)
|
|
155
|
-
import { wrapWithGuardrails, InputBlockedError } from 'ai-sdk-guardrails';
|
|
156
|
-
const model = wrapWithGuardrails(openai('gpt-4o'), { ... });
|
|
167
|
+
**Key benefit**: Non-invasive. Your existing telemetry, logging, and observability tools keep working because guardrails are just middleware.
|
|
157
168
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
169
|
+
## Built-in Guardrails
|
|
170
|
+
|
|
171
|
+
### Input Guardrails (Run Before Model)
|
|
172
|
+
|
|
173
|
+
| Guardrail | Purpose | Example |
|
|
174
|
+
| --------------------------- | -------------------------------- | ------------------- |
|
|
175
|
+
| `piiDetector()` | Block emails, phones, SSNs | Compliance, privacy |
|
|
176
|
+
| `promptInjectionDetector()` | Detect injection attempts | Security |
|
|
177
|
+
| `blockedKeywords()` | Block specific terms | Content policy |
|
|
178
|
+
| `inputLengthLimit()` | Enforce max input length | Cost control |
|
|
179
|
+
| `rateLimiting()` | Per-user rate limits | Abuse prevention |
|
|
180
|
+
| `profanityFilter()` | Block offensive language | Content moderation |
|
|
181
|
+
| `toxicityDetector()` | Detect toxic content | Safety |
|
|
182
|
+
| `allowedToolsGuardrail()` | Restrict which tools can be used | Tool security |
|
|
161
183
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
184
|
+
### Output Guardrails (Run After Model)
|
|
185
|
+
|
|
186
|
+
| Guardrail | Purpose | Example |
|
|
187
|
+
| ------------------------- | --------------------------- | ------------------------- |
|
|
188
|
+
| `sensitiveDataFilter()` | Remove secrets, API keys | Security |
|
|
189
|
+
| `minLengthRequirement()` | Enforce minimum length | Quality control |
|
|
190
|
+
| `outputLengthLimit()` | Enforce maximum length | Cost/UX control |
|
|
191
|
+
| `toxicityFilter()` | Block toxic responses | Safety |
|
|
192
|
+
| `jsonValidation()` | Validate JSON structure | Structured output |
|
|
193
|
+
| `schemaValidation()` | Validate against Zod schema | Type safety |
|
|
194
|
+
| `confidenceThreshold()` | Require minimum confidence | Quality |
|
|
195
|
+
| `hallucinationDetector()` | Detect uncertain claims | Accuracy |
|
|
196
|
+
| `secretRedaction()` | Redact secrets from output | Security |
|
|
197
|
+
| `mcpSecurityGuardrail()` | MCP tool security | Prevent data exfiltration |
|
|
198
|
+
|
|
199
|
+
### MCP Security Guardrails
|
|
200
|
+
|
|
201
|
+
Protect against prompt injection and data exfiltration when using Model Context Protocol (MCP) tools:
|
|
202
|
+
|
|
203
|
+
```ts
|
|
204
|
+
import { mcpSecurityGuardrail, mcpResponseSanitizer } from 'ai-sdk-guardrails';
|
|
205
|
+
|
|
206
|
+
const model = withGuardrails(openai('gpt-4o'), {
|
|
207
|
+
outputGuardrails: [
|
|
208
|
+
mcpSecurityGuardrail({
|
|
209
|
+
detectExfiltration: true, // Detect data exfiltration attempts
|
|
210
|
+
scanEncodedContent: true, // Scan base64/hex encoded content
|
|
211
|
+
allowedDomains: ['api.company.com'], // Domain allowlist
|
|
212
|
+
maxContentSize: 51200, // 50KB limit
|
|
213
|
+
injectionThreshold: 0.7, // Sensitivity (lower = stricter)
|
|
214
|
+
}),
|
|
215
|
+
mcpResponseSanitizer(), // Clean malicious content vs blocking
|
|
216
|
+
],
|
|
217
|
+
});
|
|
166
218
|
```
|
|
167
219
|
|
|
168
|
-
|
|
220
|
+
**Attack vectors prevented:**
|
|
169
221
|
|
|
170
|
-
-
|
|
171
|
-
-
|
|
172
|
-
-
|
|
222
|
+
- ✅ Direct prompt injection
|
|
223
|
+
- ✅ Tool response poisoning
|
|
224
|
+
- ✅ Data exfiltration via URLs
|
|
225
|
+
- ✅ Encoded attacks (base64/hex)
|
|
226
|
+
- ✅ Cascading exploits
|
|
227
|
+
- ✅ Context poisoning
|
|
173
228
|
|
|
174
|
-
|
|
229
|
+
See [MCP Security documentation](#mcp-security-guardrails-advanced) for full details.
|
|
175
230
|
|
|
176
|
-
|
|
231
|
+
## Advanced Features
|
|
177
232
|
|
|
178
|
-
|
|
233
|
+
### Custom Guardrails
|
|
179
234
|
|
|
180
|
-
|
|
235
|
+
Create domain-specific guardrails:
|
|
181
236
|
|
|
182
237
|
```ts
|
|
183
|
-
import {
|
|
184
|
-
import {
|
|
185
|
-
defineInputGuardrail,
|
|
186
|
-
defineOutputGuardrail,
|
|
187
|
-
withGuardrails,
|
|
188
|
-
} from 'ai-sdk-guardrails';
|
|
189
|
-
import { extractTextContent } from 'ai-sdk-guardrails/guardrails/input';
|
|
238
|
+
import { defineInputGuardrail, defineOutputGuardrail } from 'ai-sdk-guardrails';
|
|
190
239
|
import { extractContent } from 'ai-sdk-guardrails/guardrails/output';
|
|
191
240
|
|
|
241
|
+
// Custom input guardrail
|
|
192
242
|
const businessHours = defineInputGuardrail({
|
|
193
243
|
name: 'business-hours',
|
|
194
|
-
execute: async (
|
|
195
|
-
const
|
|
196
|
-
return
|
|
244
|
+
execute: async () => {
|
|
245
|
+
const hour = new Date().getHours();
|
|
246
|
+
return hour >= 9 && hour <= 17
|
|
197
247
|
? { tripwireTriggered: false }
|
|
198
248
|
: { tripwireTriggered: true, message: 'Outside business hours' };
|
|
199
249
|
},
|
|
200
250
|
});
|
|
201
251
|
|
|
252
|
+
// Custom output guardrail
|
|
202
253
|
const minQuality = defineOutputGuardrail({
|
|
203
254
|
name: 'min-quality',
|
|
204
255
|
execute: async ({ result }) => {
|
|
205
256
|
const { text } = extractContent(result);
|
|
206
|
-
return text.length >=
|
|
257
|
+
return text.length >= 100
|
|
207
258
|
? { tripwireTriggered: false }
|
|
208
259
|
: { tripwireTriggered: true, message: 'Response too short' };
|
|
209
260
|
},
|
|
@@ -215,213 +266,114 @@ const model = withGuardrails(openai('gpt-4o'), {
|
|
|
215
266
|
});
|
|
216
267
|
```
|
|
217
268
|
|
|
218
|
-
###
|
|
269
|
+
### Auto-Retry on Failures
|
|
270
|
+
|
|
271
|
+
Automatically retry when output doesn't meet requirements:
|
|
219
272
|
|
|
220
273
|
```ts
|
|
221
|
-
import { openai } from '@ai-sdk/openai';
|
|
222
274
|
import {
|
|
223
|
-
|
|
224
|
-
piiDetector,
|
|
225
|
-
blockedKeywords,
|
|
226
|
-
contentLengthLimit,
|
|
227
|
-
promptInjectionDetector,
|
|
228
|
-
sensitiveDataFilter,
|
|
275
|
+
wrapWithOutputGuardrails,
|
|
229
276
|
minLengthRequirement,
|
|
230
|
-
confidenceThreshold,
|
|
231
|
-
mcpSecurityGuardrail,
|
|
232
|
-
mcpResponseSanitizer,
|
|
233
277
|
} from 'ai-sdk-guardrails';
|
|
234
278
|
|
|
235
|
-
const model =
|
|
236
|
-
inputGuardrails: [
|
|
237
|
-
piiDetector(),
|
|
238
|
-
promptInjectionDetector({ threshold: 0.7 }),
|
|
239
|
-
blockedKeywords(['test', 'spam']),
|
|
240
|
-
contentLengthLimit(4000),
|
|
241
|
-
],
|
|
242
|
-
outputGuardrails: [
|
|
243
|
-
mcpSecurityGuardrail({
|
|
244
|
-
detectExfiltration: true,
|
|
245
|
-
scanEncodedContent: true,
|
|
246
|
-
allowedDomains: ['trusted-api.com'],
|
|
247
|
-
}),
|
|
248
|
-
mcpResponseSanitizer(),
|
|
249
|
-
sensitiveDataFilter(),
|
|
250
|
-
minLengthRequirement(160),
|
|
251
|
-
confidenceThreshold(0.6),
|
|
252
|
-
],
|
|
253
|
-
});
|
|
254
|
-
```
|
|
255
|
-
|
|
256
|
-
## Streaming
|
|
257
|
-
|
|
258
|
-
Works out of the box. By default, guardrails run after the stream ends (buffer mode). For early blocking, enable progressive mode.
|
|
259
|
-
|
|
260
|
-
```ts
|
|
261
|
-
import { streamText } from 'ai';
|
|
262
|
-
import { openai } from '@ai-sdk/openai';
|
|
263
|
-
import { withGuardrails, minLengthRequirement } from 'ai-sdk-guardrails';
|
|
264
|
-
|
|
265
|
-
const model = withGuardrails(openai('gpt-4o'), {
|
|
266
|
-
outputGuardrails: [minLengthRequirement(120)],
|
|
267
|
-
// Evaluate as tokens arrive; stop or replace early when blocked
|
|
268
|
-
streamMode: 'progressive',
|
|
269
|
-
replaceOnBlocked: true,
|
|
270
|
-
});
|
|
271
|
-
|
|
272
|
-
const { textStream } = await streamText({
|
|
273
|
-
model,
|
|
274
|
-
prompt: 'Tell me a short story about a robot.',
|
|
275
|
-
});
|
|
276
|
-
|
|
277
|
-
for await (const delta of textStream) process.stdout.write(delta);
|
|
278
|
-
```
|
|
279
|
-
|
|
280
|
-
## Auto Retry
|
|
281
|
-
|
|
282
|
-
Choose what fits your flow:
|
|
283
|
-
|
|
284
|
-
- Standalone utility: Use `retry()` to wrap any generation function with your own validator and backoff.
|
|
285
|
-
- Middleware option: Add `retry` to output guardrails so retries run automatically when a check fails.
|
|
286
|
-
|
|
287
|
-
### Utility
|
|
288
|
-
|
|
289
|
-
```ts
|
|
290
|
-
import { retry } from 'ai-sdk-guardrails';
|
|
291
|
-
import { generateText } from 'ai';
|
|
292
|
-
import { openai } from '@ai-sdk/openai';
|
|
293
|
-
|
|
294
|
-
const result = await retry({
|
|
295
|
-
generate: (params) => generateText({ model: openai('gpt-4o'), ...params }),
|
|
296
|
-
params: { prompt: 'Explain backpropagation in depth.' },
|
|
297
|
-
validate: (r) => ({
|
|
298
|
-
blocked: (r.text ?? '').length < 500,
|
|
299
|
-
message: 'Response too short',
|
|
300
|
-
}),
|
|
301
|
-
buildRetryParams: ({ lastParams }) => ({
|
|
302
|
-
...lastParams,
|
|
303
|
-
maxOutputTokens: Math.max(800, (lastParams.maxOutputTokens ?? 400) + 300),
|
|
304
|
-
}),
|
|
305
|
-
maxRetries: 2,
|
|
306
|
-
});
|
|
307
|
-
```
|
|
308
|
-
|
|
309
|
-
### Middleware
|
|
310
|
-
|
|
311
|
-
```ts
|
|
312
|
-
import { generateText } from 'ai';
|
|
313
|
-
import { openai } from '@ai-sdk/openai';
|
|
314
|
-
import { withGuardrails, defineOutputGuardrail } from 'ai-sdk-guardrails';
|
|
315
|
-
import { extractContent } from 'ai-sdk-guardrails/guardrails/output';
|
|
316
|
-
|
|
317
|
-
const minLengthGuardrail = defineOutputGuardrail<{ minChars: number }>({
|
|
318
|
-
name: 'min-output-length',
|
|
319
|
-
execute: async ({ result }) => {
|
|
320
|
-
const { text } = extractContent(result);
|
|
321
|
-
const minChars = text.length + 1;
|
|
322
|
-
return text.length < minChars
|
|
323
|
-
? {
|
|
324
|
-
tripwireTriggered: true,
|
|
325
|
-
severity: 'medium',
|
|
326
|
-
message: `Answer too short: ${text.length} < ${minChars}`,
|
|
327
|
-
metadata: { minChars },
|
|
328
|
-
}
|
|
329
|
-
: { tripwireTriggered: false };
|
|
330
|
-
},
|
|
331
|
-
});
|
|
332
|
-
|
|
333
|
-
const guarded = wrapWithOutputGuardrails(
|
|
279
|
+
const model = wrapWithOutputGuardrails(
|
|
334
280
|
openai('gpt-4o'),
|
|
335
|
-
[
|
|
281
|
+
[minLengthRequirement(100)],
|
|
336
282
|
{
|
|
337
|
-
replaceOnBlocked: false,
|
|
338
283
|
retry: {
|
|
339
|
-
maxRetries:
|
|
340
|
-
buildRetryParams: ({
|
|
284
|
+
maxRetries: 2,
|
|
285
|
+
buildRetryParams: ({ lastParams }) => ({
|
|
341
286
|
...lastParams,
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
),
|
|
287
|
+
// Increase max tokens on retry
|
|
288
|
+
maxOutputTokens: (lastParams.maxOutputTokens ?? 400) + 200,
|
|
289
|
+
// Add context about the failure
|
|
346
290
|
prompt: [
|
|
347
|
-
...
|
|
291
|
+
...lastParams.prompt,
|
|
348
292
|
{
|
|
349
|
-
role: 'user'
|
|
350
|
-
content:
|
|
351
|
-
{
|
|
352
|
-
type: 'text' as const,
|
|
353
|
-
text: `Note: The previous answer ${summary.blockedResults[0]?.message}. Provide a comprehensive, detailed answer with examples.`,
|
|
354
|
-
},
|
|
355
|
-
],
|
|
293
|
+
role: 'user',
|
|
294
|
+
content: 'Please provide a more detailed response.',
|
|
356
295
|
},
|
|
357
296
|
],
|
|
358
297
|
}),
|
|
359
298
|
},
|
|
360
299
|
},
|
|
361
300
|
);
|
|
362
|
-
|
|
363
|
-
const { text } = await generateText({
|
|
364
|
-
model: guarded,
|
|
365
|
-
prompt: 'Explain the significance of the Turing Test in AI history.',
|
|
366
|
-
});
|
|
367
301
|
```
|
|
368
302
|
|
|
369
|
-
|
|
303
|
+
### Reusable Configurations
|
|
370
304
|
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
Set `throwOnBlocked: true` to throw structured errors you can catch and turn into friendly messages.
|
|
305
|
+
Create reusable guardrail sets:
|
|
374
306
|
|
|
375
307
|
```ts
|
|
376
|
-
import {
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
}
|
|
381
|
-
if (isGuardrailsError(err)) {
|
|
382
|
-
console.error('Guardrail blocked:', err.message);
|
|
383
|
-
// err.results gives you details per guardrail
|
|
384
|
-
} else {
|
|
385
|
-
console.error('Unexpected error:', err);
|
|
386
|
-
}
|
|
387
|
-
}
|
|
388
|
-
```
|
|
389
|
-
|
|
390
|
-
## Reusable Guardrails Factory
|
|
391
|
-
|
|
392
|
-
Use `createGuardrails()` to create reusable guardrail configurations that can be applied to multiple models:
|
|
393
|
-
|
|
394
|
-
```ts
|
|
395
|
-
import { openai } from '@ai-sdk/openai';
|
|
396
|
-
import { anthropic } from '@ai-sdk/anthropic';
|
|
397
|
-
import { createGuardrails, defineInputGuardrail } from 'ai-sdk-guardrails';
|
|
308
|
+
import {
|
|
309
|
+
createGuardrails,
|
|
310
|
+
piiDetector,
|
|
311
|
+
sensitiveDataFilter,
|
|
312
|
+
} from 'ai-sdk-guardrails';
|
|
398
313
|
|
|
399
|
-
//
|
|
314
|
+
// Define once
|
|
400
315
|
const productionGuards = createGuardrails({
|
|
401
|
-
inputGuardrails: [piiDetector()
|
|
402
|
-
outputGuardrails: [
|
|
316
|
+
inputGuardrails: [piiDetector()],
|
|
317
|
+
outputGuardrails: [sensitiveDataFilter()],
|
|
403
318
|
throwOnBlocked: true,
|
|
404
319
|
});
|
|
405
320
|
|
|
406
321
|
// Apply to multiple models
|
|
407
322
|
const gpt4 = productionGuards(openai('gpt-4o'));
|
|
408
323
|
const claude = productionGuards(anthropic('claude-3-sonnet'));
|
|
324
|
+
```
|
|
409
325
|
|
|
410
|
-
|
|
411
|
-
const strictLimits = createGuardrails({ inputGuardrails: [maxLength(500)] });
|
|
412
|
-
const piiProtection = createGuardrails({ inputGuardrails: [piiDetector()] });
|
|
326
|
+
### Streaming Modes
|
|
413
327
|
|
|
414
|
-
|
|
415
|
-
|
|
328
|
+
Control when guardrails run during streaming:
|
|
329
|
+
|
|
330
|
+
```ts
|
|
331
|
+
const model = withGuardrails(openai('gpt-4o'), {
|
|
332
|
+
outputGuardrails: [minLengthRequirement(100)],
|
|
333
|
+
streamMode: 'progressive', // Run guardrails as tokens arrive
|
|
334
|
+
replaceOnBlocked: true, // Replace blocked output with fallback
|
|
335
|
+
});
|
|
416
336
|
```
|
|
417
337
|
|
|
418
|
-
|
|
338
|
+
- `buffer` (default): Wait for stream to complete, then check
|
|
339
|
+
- `progressive`: Check guardrails as tokens arrive (early termination)
|
|
419
340
|
|
|
420
|
-
|
|
341
|
+
### Agent Support
|
|
342
|
+
|
|
343
|
+
Guardrails work with AI SDK Agents:
|
|
344
|
+
|
|
345
|
+
```ts
|
|
346
|
+
import { withAgentGuardrails } from 'ai-sdk-guardrails';
|
|
347
|
+
import { tool } from 'ai';
|
|
348
|
+
|
|
349
|
+
const agent = withAgentGuardrails(
|
|
350
|
+
{
|
|
351
|
+
model: openai('gpt-4o'),
|
|
352
|
+
tools: { search: searchTool },
|
|
353
|
+
system: 'You are a helpful assistant.',
|
|
354
|
+
},
|
|
355
|
+
{
|
|
356
|
+
inputGuardrails: [piiDetector()],
|
|
357
|
+
outputGuardrails: [sensitiveDataFilter()],
|
|
358
|
+
toolGuardrails: [
|
|
359
|
+
toolEgressPolicy({
|
|
360
|
+
allowedHosts: ['api.company.com'],
|
|
361
|
+
scanForUrls: true,
|
|
362
|
+
}),
|
|
363
|
+
],
|
|
364
|
+
},
|
|
365
|
+
);
|
|
366
|
+
|
|
367
|
+
const result = await agent.generate({ prompt: '...' });
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
## MCP Security Guardrails (Advanced)
|
|
371
|
+
|
|
372
|
+
**Production-Ready**: Protect against the ["lethal trifecta" vulnerability](https://simonwillison.net/2025/Jun/16/the-lethal-trifecta/) when using Model Context Protocol (MCP) tools.
|
|
421
373
|
|
|
422
374
|
### The Problem
|
|
423
375
|
|
|
424
|
-
AI agents with MCP tools
|
|
376
|
+
AI agents with MCP tools are vulnerable when they have:
|
|
425
377
|
|
|
426
378
|
1. **Access to private data** (through tools)
|
|
427
379
|
2. **Process untrusted content** (from tool responses)
|
|
@@ -429,9 +381,9 @@ AI agents with MCP tools can be vulnerable when they have:
|
|
|
429
381
|
|
|
430
382
|
Malicious tool responses can contain hidden instructions that trick the AI into exfiltrating sensitive data.
|
|
431
383
|
|
|
432
|
-
### Production
|
|
384
|
+
### Production Configuration
|
|
433
385
|
|
|
434
|
-
Full configurability with sensible defaults
|
|
386
|
+
Full configurability with sensible defaults:
|
|
435
387
|
|
|
436
388
|
```ts
|
|
437
389
|
import {
|
|
@@ -451,100 +403,58 @@ const secureModel = withGuardrails(openai('gpt-4o'), {
|
|
|
451
403
|
mcpSecurityGuardrail({
|
|
452
404
|
injectionThreshold: 0.5, // Lower = more sensitive
|
|
453
405
|
maxSuspiciousUrls: 0, // Zero tolerance
|
|
454
|
-
maxContentSize: 25600, // 25KB limit
|
|
406
|
+
maxContentSize: 25600, // 25KB limit
|
|
455
407
|
minEncodedLength: 15, // Detect shorter encoded attacks
|
|
456
|
-
encodedInjectionThreshold: 0.2, // Combined
|
|
408
|
+
encodedInjectionThreshold: 0.2, // Combined threshold
|
|
457
409
|
highRiskThreshold: 0.3, // High-risk cascade blocking
|
|
458
410
|
authorityThreshold: 0.5, // Authority manipulation detection
|
|
459
411
|
allowedDomains: ['api.company.com', 'trusted-partner.com'],
|
|
460
|
-
customSuspiciousDomains: ['evil.com'
|
|
412
|
+
customSuspiciousDomains: ['evil.com'],
|
|
461
413
|
blockCascadingCalls: true,
|
|
462
414
|
scanEncodedContent: true,
|
|
463
415
|
detectExfiltration: true,
|
|
464
416
|
}),
|
|
465
|
-
mcpResponseSanitizer(), // Clean
|
|
417
|
+
mcpResponseSanitizer(), // Clean vs block
|
|
466
418
|
toolEgressPolicy({
|
|
467
|
-
allowedHosts: ['api.company.com'
|
|
468
|
-
blockedHosts: ['webhook.site', 'requestcatcher.com'
|
|
419
|
+
allowedHosts: ['api.company.com'],
|
|
420
|
+
blockedHosts: ['webhook.site', 'requestcatcher.com'],
|
|
469
421
|
scanForUrls: true,
|
|
470
422
|
}),
|
|
471
423
|
],
|
|
472
424
|
});
|
|
473
425
|
```
|
|
474
426
|
|
|
475
|
-
### Environment
|
|
427
|
+
### Environment-Based Configuration
|
|
476
428
|
|
|
477
429
|
```ts
|
|
478
|
-
// Different security profiles for different environments
|
|
479
430
|
function getSecurityConfig(env: 'production' | 'staging' | 'development') {
|
|
480
431
|
const configs = {
|
|
481
432
|
production: {
|
|
482
433
|
injectionThreshold: 0.5, // High security
|
|
483
|
-
maxContentSize: 25600, // 25KB
|
|
484
|
-
authorityThreshold: 0.5,
|
|
434
|
+
maxContentSize: 25600, // 25KB
|
|
435
|
+
authorityThreshold: 0.5,
|
|
485
436
|
},
|
|
486
437
|
staging: {
|
|
487
|
-
injectionThreshold: 0.7, // Balanced
|
|
488
|
-
maxContentSize: 51200, // 50KB
|
|
489
|
-
authorityThreshold: 0.7,
|
|
438
|
+
injectionThreshold: 0.7, // Balanced
|
|
439
|
+
maxContentSize: 51200, // 50KB
|
|
440
|
+
authorityThreshold: 0.7,
|
|
490
441
|
},
|
|
491
442
|
development: {
|
|
492
|
-
injectionThreshold: 0.8, //
|
|
493
|
-
maxContentSize: 102400, // 100KB
|
|
494
|
-
authorityThreshold: 0.8,
|
|
443
|
+
injectionThreshold: 0.8, // Permissive
|
|
444
|
+
maxContentSize: 102400, // 100KB
|
|
445
|
+
authorityThreshold: 0.8,
|
|
495
446
|
},
|
|
496
447
|
};
|
|
497
448
|
return configs[env];
|
|
498
449
|
}
|
|
499
450
|
|
|
500
|
-
const
|
|
451
|
+
const model = withGuardrails(openai('gpt-4o'), {
|
|
501
452
|
outputGuardrails: [mcpSecurityGuardrail(getSecurityConfig('production'))],
|
|
502
453
|
});
|
|
503
454
|
```
|
|
504
455
|
|
|
505
|
-
### Attack Vectors Prevented
|
|
506
|
-
|
|
507
|
-
✅ **Direct prompt injection** - "System: ignore all previous instructions"
|
|
508
|
-
✅ **Tool response poisoning** - Malicious content in MCP tool responses
|
|
509
|
-
✅ **Data exfiltration** - URLs constructed to steal sensitive data
|
|
510
|
-
✅ **Encoded attacks** - Base64/hex hidden malicious instructions
|
|
511
|
-
✅ **Cascading exploits** - Tool responses triggering additional dangerous calls
|
|
512
|
-
✅ **Context poisoning** - Attempts to modify AI behavior mid-conversation
|
|
513
|
-
|
|
514
|
-
### Secure MCP Agent Example
|
|
515
|
-
|
|
516
|
-
```ts
|
|
517
|
-
import { withAgentGuardrails } from 'ai-sdk-guardrails';
|
|
518
|
-
|
|
519
|
-
const secureAgent = withAgentGuardrails(
|
|
520
|
-
{
|
|
521
|
-
model: openai('gpt-4o'),
|
|
522
|
-
tools: { file_search, api_call, database_query },
|
|
523
|
-
system: 'You are a secure assistant. Always validate tool responses.',
|
|
524
|
-
},
|
|
525
|
-
{
|
|
526
|
-
inputGuardrails: [promptInjectionDetector()],
|
|
527
|
-
outputGuardrails: [
|
|
528
|
-
mcpSecurityGuardrail({
|
|
529
|
-
detectExfiltration: true,
|
|
530
|
-
allowedDomains: ['trusted-api.com'],
|
|
531
|
-
}),
|
|
532
|
-
mcpResponseSanitizer(),
|
|
533
|
-
],
|
|
534
|
-
toolGuardrails: [
|
|
535
|
-
toolEgressPolicy({
|
|
536
|
-
allowedHosts: ['trusted-api.com'],
|
|
537
|
-
scanForUrls: true,
|
|
538
|
-
}),
|
|
539
|
-
],
|
|
540
|
-
},
|
|
541
|
-
);
|
|
542
|
-
```
|
|
543
|
-
|
|
544
456
|
### Configuration Options
|
|
545
457
|
|
|
546
|
-
All security parameters are fully configurable with sensible defaults:
|
|
547
|
-
|
|
548
458
|
| Option | Default | Description |
|
|
549
459
|
| --------------------------- | ------- | ------------------------------------------------ |
|
|
550
460
|
| `injectionThreshold` | 0.7 | Prompt injection confidence threshold (0-1) |
|
|
@@ -556,106 +466,92 @@ All security parameters are fully configurable with sensible defaults:
|
|
|
556
466
|
| `allowedDomains` | [] | Allowed domains for URL construction |
|
|
557
467
|
| `customSuspiciousDomains` | [] | Additional suspicious domain patterns |
|
|
558
468
|
|
|
559
|
-
### Performance & Security Balance
|
|
560
|
-
|
|
561
|
-
- **High Security**: Lower thresholds, stricter limits, comprehensive scanning
|
|
562
|
-
- **Balanced**: Default settings, good for most production use cases
|
|
563
|
-
- **High Performance**: Higher thresholds, larger limits, selective scanning
|
|
564
|
-
|
|
565
469
|
See complete examples:
|
|
566
470
|
|
|
567
|
-
- [Production MCP Configuration](./examples/44-production-mcp-config.ts)
|
|
471
|
+
- [Production MCP Configuration](./examples/44-production-mcp-config.ts)
|
|
568
472
|
- [MCP Security Test Suite](./examples/41-mcp-security-test.ts)
|
|
569
473
|
- [Enhanced Security Testing](./examples/43-enhanced-mcp-security-test.ts)
|
|
570
|
-
- [Vulnerability Proof of Concept](./examples/42-mcp-vulnerability-proof.ts)
|
|
571
474
|
|
|
572
|
-
##
|
|
475
|
+
## Error Handling
|
|
573
476
|
|
|
574
|
-
|
|
477
|
+
### Throw Errors on Block
|
|
575
478
|
|
|
576
479
|
```ts
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
import { z } from 'zod';
|
|
581
|
-
|
|
582
|
-
// Define tools for the agent
|
|
583
|
-
const searchTool = tool({
|
|
584
|
-
description: 'Search for information',
|
|
585
|
-
inputSchema: z.object({ query: z.string() }),
|
|
586
|
-
execute: async ({ query }) => `Results for: ${query}`,
|
|
480
|
+
const model = withGuardrails(openai('gpt-4o'), {
|
|
481
|
+
inputGuardrails: [piiDetector()],
|
|
482
|
+
throwOnBlocked: true, // Throw errors instead of silent blocking
|
|
587
483
|
});
|
|
588
484
|
|
|
589
|
-
|
|
590
|
-
const
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
}
|
|
596
|
-
|
|
597
|
-
outputGuardrails: [
|
|
598
|
-
defineOutputGuardrail({
|
|
599
|
-
name: 'tool-usage-required',
|
|
600
|
-
description: 'Ensures agent uses search tools',
|
|
601
|
-
execute: async (params) => {
|
|
602
|
-
const hasToolCall = params.result.steps?.some(
|
|
603
|
-
(step) => step.type === 'tool-call',
|
|
604
|
-
);
|
|
605
|
-
|
|
606
|
-
return {
|
|
607
|
-
tripwireTriggered: !hasToolCall,
|
|
608
|
-
message: hasToolCall
|
|
609
|
-
? 'Tool usage validated'
|
|
610
|
-
: 'Must use search tools for research',
|
|
611
|
-
severity: 'high',
|
|
612
|
-
};
|
|
613
|
-
},
|
|
614
|
-
}),
|
|
615
|
-
],
|
|
616
|
-
throwOnBlocked: true,
|
|
617
|
-
},
|
|
618
|
-
);
|
|
619
|
-
|
|
620
|
-
// Use the guarded agent
|
|
621
|
-
const result = await agent.generate({
|
|
622
|
-
prompt: 'Research the latest AI developments',
|
|
623
|
-
});
|
|
485
|
+
try {
|
|
486
|
+
const { text } = await generateText({ model, prompt: '...' });
|
|
487
|
+
} catch (error) {
|
|
488
|
+
if (isGuardrailsError(error)) {
|
|
489
|
+
console.error('Blocked:', error.message);
|
|
490
|
+
// error.results gives details per guardrail
|
|
491
|
+
}
|
|
492
|
+
}
|
|
624
493
|
```
|
|
625
494
|
|
|
626
|
-
|
|
495
|
+
### Error Types
|
|
627
496
|
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
| `retry`, `retryHelpers` | Standalone auto-retry utilities with validation and backoff. |
|
|
634
|
-
| `GuardrailsError`, `GuardrailsInputError`, `GuardrailsOutputError`, `isGuardrailsError`, `extractErrorInfo` | Structured errors and helpers for robust handling. |
|
|
635
|
-
| `exponentialBackoff`, `linearBackoff`, `fixedBackoff`, `jitteredExponentialBackoff`, `backoffPresets` | Backoff strategies to control retry pacing. |
|
|
497
|
+
- `GuardrailsInputError` - Input guardrail blocked
|
|
498
|
+
- `GuardrailsOutputError` - Output guardrail blocked
|
|
499
|
+
- `GuardrailExecutionError` - Guardrail threw an error
|
|
500
|
+
- `GuardrailTimeoutError` - Guardrail exceeded timeout
|
|
501
|
+
- `GuardrailConfigurationError` - Invalid configuration
|
|
636
502
|
|
|
637
|
-
|
|
503
|
+
## API Reference
|
|
638
504
|
|
|
639
|
-
|
|
640
|
-
- Output helpers: `./src/guardrails/output.ts`
|
|
505
|
+
### Primary Functions
|
|
641
506
|
|
|
642
|
-
|
|
507
|
+
| Function | Purpose |
|
|
508
|
+
| ------------------------- | ---------------------------------------- |
|
|
509
|
+
| `withGuardrails` | Wrap model with guardrails (main API) |
|
|
510
|
+
| `createGuardrails` | Create reusable guardrail configurations |
|
|
511
|
+
| `withAgentGuardrails` | Wrap AI SDK Agents with guardrails |
|
|
512
|
+
| `defineInputGuardrail` | Create custom input guardrail |
|
|
513
|
+
| `defineOutputGuardrail` | Create custom output guardrail |
|
|
514
|
+
| `executeInputGuardrails` | Run input guardrails programmatically |
|
|
515
|
+
| `executeOutputGuardrails` | Run output guardrails programmatically |
|
|
516
|
+
|
|
517
|
+
### Error Utilities
|
|
518
|
+
|
|
519
|
+
| Function | Purpose |
|
|
520
|
+
| ------------------- | ------------------------------------ |
|
|
521
|
+
| `isGuardrailsError` | Check if error is from guardrails |
|
|
522
|
+
| `extractErrorInfo` | Extract structured error information |
|
|
643
523
|
|
|
644
|
-
|
|
524
|
+
### Retry Utilities
|
|
645
525
|
|
|
646
|
-
|
|
526
|
+
| Function | Purpose |
|
|
527
|
+
| ---------------------------- | --------------------------------- |
|
|
528
|
+
| `retry` | Standalone retry utility |
|
|
529
|
+
| `exponentialBackoff` | Exponential backoff strategy |
|
|
530
|
+
| `linearBackoff` | Linear backoff strategy |
|
|
531
|
+
| `jitteredExponentialBackoff` | Jittered exponential backoff |
|
|
532
|
+
| `backoffPresets` | Pre-configured backoff strategies |
|
|
647
533
|
|
|
648
|
-
|
|
534
|
+
See source for all built-in guardrails:
|
|
535
|
+
|
|
536
|
+
- Input helpers: [`./src/guardrails/input.ts`](./src/guardrails/input.ts)
|
|
537
|
+
- Output helpers: [`./src/guardrails/output.ts`](./src/guardrails/output.ts)
|
|
538
|
+
- Tool helpers: [`./src/guardrails/tools.ts`](./src/guardrails/tools.ts)
|
|
539
|
+
- MCP security: [`./src/guardrails/mcp-security.ts`](./src/guardrails/mcp-security.ts)
|
|
540
|
+
|
|
541
|
+
## Examples
|
|
542
|
+
|
|
543
|
+
Browse 48+ runnable examples: [examples/README.md](./examples/README.md)
|
|
544
|
+
|
|
545
|
+
### Quick Starts
|
|
649
546
|
|
|
650
547
|
| Example | Description | File |
|
|
651
548
|
| -------------------------- | ------------------------------- | --------------------------------------------------------------------------------- |
|
|
652
549
|
| Simple combined protection | Minimal input and output setup | [07a-simple-combined-protection.ts](./examples/07a-simple-combined-protection.ts) |
|
|
653
550
|
| Auto retry on output | Retry until output meets a rule | [32-auto-retry-output.ts](./examples/32-auto-retry-output.ts) |
|
|
654
|
-
| LLM judge auto-retry | Judge feedback drives retry | [
|
|
655
|
-
| Expected tool use retry | Enforce/guide tool usage | [34-expected-tool-use-retry.ts](./examples/34-expected-tool-use-retry.ts) |
|
|
551
|
+
| LLM judge auto-retry | Judge feedback drives retry | [35-judge-auto-retry.ts](./examples/35-judge-auto-retry.ts) |
|
|
656
552
|
| Weather assistant | End-to-end input/output + retry | [33-blog-post-weather-assistant.ts](./examples/33-blog-post-weather-assistant.ts) |
|
|
657
553
|
|
|
658
|
-
Input
|
|
554
|
+
### Input Safety
|
|
659
555
|
|
|
660
556
|
| Example | Description | File |
|
|
661
557
|
| ------------------ | ----------------------------------- | --------------------------------------------------------------- |
|
|
@@ -664,7 +560,7 @@ Input safety
|
|
|
664
560
|
| PII detection | Detect PII before calling the model | [03-pii-detection.ts](./examples/03-pii-detection.ts) |
|
|
665
561
|
| Rate limiting | Simple per-user rate limit | [13-rate-limiting.ts](./examples/13-rate-limiting.ts) |
|
|
666
562
|
|
|
667
|
-
Output
|
|
563
|
+
### Output Safety
|
|
668
564
|
|
|
669
565
|
| Example | Description | File |
|
|
670
566
|
| ----------------------- | ----------------------------------- | ------------------------------------------------------------------------- |
|
|
@@ -672,7 +568,7 @@ Output safety
|
|
|
672
568
|
| Sensitive output filter | Filter secrets and PII in responses | [05-sensitive-output-filter.ts](./examples/05-sensitive-output-filter.ts) |
|
|
673
569
|
| Hallucination detection | Flag uncertain factual claims | [19-hallucination-detection.ts](./examples/19-hallucination-detection.ts) |
|
|
674
570
|
|
|
675
|
-
Streaming
|
|
571
|
+
### Streaming
|
|
676
572
|
|
|
677
573
|
| Example | Description | File |
|
|
678
574
|
| ----------------- | ---------------------------------- | --------------------------------------------------------------------------------- |
|
|
@@ -680,7 +576,7 @@ Streaming
|
|
|
680
576
|
| Streaming quality | Quality checks with streaming | [12-streaming-quality.ts](./examples/12-streaming-quality.ts) |
|
|
681
577
|
| Early termination | Stop streams early when blocked | [28-streaming-early-termination.ts](./examples/28-streaming-early-termination.ts) |
|
|
682
578
|
|
|
683
|
-
Advanced
|
|
579
|
+
### Advanced
|
|
684
580
|
|
|
685
581
|
| Example | Description | File |
|
|
686
582
|
| -------------------------- | ----------------------------- | ------------------------------------------------------------------------------- |
|
|
@@ -689,30 +585,43 @@ Advanced
|
|
|
689
585
|
| SQL code safety | Basic SQL safety checks | [24-sql-code-safety.ts](./examples/24-sql-code-safety.ts) |
|
|
690
586
|
| Role hierarchy enforcement | Enforce role rules in prompts | [23-role-hierarchy-enforcement.ts](./examples/23-role-hierarchy-enforcement.ts) |
|
|
691
587
|
|
|
692
|
-
##
|
|
588
|
+
## Migration from v3.x
|
|
693
589
|
|
|
694
|
-
|
|
695
|
-
- AI SDK: Compatible with AI SDK 5 (`ai@^5`); wraps any model
|
|
696
|
-
- For `generateObject`: for strict object validation, run `executeOutputGuardrails()` after generation
|
|
590
|
+
API naming has been improved in v4.x (old names still work but are deprecated):
|
|
697
591
|
|
|
698
|
-
|
|
592
|
+
```ts
|
|
593
|
+
// Before (v3.x - still works but deprecated)
|
|
594
|
+
import { wrapWithGuardrails, InputBlockedError } from 'ai-sdk-guardrails';
|
|
595
|
+
const model = wrapWithGuardrails(openai('gpt-4o'), { ... });
|
|
699
596
|
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
B -->|Valid| C[AI Model]
|
|
704
|
-
B -->|Blocked| X[No API Call]
|
|
705
|
-
C --> D[Output Guardrails]
|
|
706
|
-
D -->|Clean| E[Response]
|
|
707
|
-
D -->|Blocked| R[Retry/Replace/Throw]
|
|
597
|
+
// After (v4.x - recommended)
|
|
598
|
+
import { withGuardrails, GuardrailsInputError } from 'ai-sdk-guardrails';
|
|
599
|
+
const model = withGuardrails(openai('gpt-4o'), { ... });
|
|
708
600
|
```
|
|
709
601
|
|
|
710
|
-
|
|
602
|
+
Changes:
|
|
603
|
+
|
|
604
|
+
- `wrapWithGuardrails` → `withGuardrails`
|
|
605
|
+
- `wrapAgentWithGuardrails` → `withAgentGuardrails`
|
|
606
|
+
- `InputBlockedError` → `GuardrailsInputError`
|
|
607
|
+
- `OutputBlockedError` → `GuardrailsOutputError`
|
|
608
|
+
|
|
609
|
+
## Compatibility
|
|
610
|
+
|
|
611
|
+
- **Runtime**: Node.js 18+ recommended
|
|
612
|
+
- **AI SDK**: Compatible with AI SDK 5.x (`ai@^5`)
|
|
613
|
+
- **TypeScript**: Full type safety with TypeScript 5+
|
|
614
|
+
- **Works with any model**: OpenAI, Anthropic, Mistral, Groq, etc.
|
|
615
|
+
|
|
616
|
+
## Why This Library?
|
|
617
|
+
|
|
618
|
+
**Non-invasive**: Guardrails are middleware. Your existing code, telemetry (Langfuse, Helicone), and logging stay intact.
|
|
619
|
+
|
|
620
|
+
**Production-ready**: Used in production by teams who need compliance, security, and cost control without rebuilding their infrastructure.
|
|
621
|
+
|
|
622
|
+
**Developer experience**: One line to add safety. Progressive complexity - start simple, add advanced features when needed.
|
|
711
623
|
|
|
712
|
-
-
|
|
713
|
-
- Composable: run multiple guardrails in any order
|
|
714
|
-
- Type-safe: rich TypeScript types and inference
|
|
715
|
-
- Sensible defaults: zero-config to start, full control when you need it
|
|
624
|
+
**Type-safe**: Rich TypeScript types and inference throughout.
|
|
716
625
|
|
|
717
626
|
## Contributing
|
|
718
627
|
|
|
@@ -720,4 +629,4 @@ Issues and PRs are welcome.
|
|
|
720
629
|
|
|
721
630
|
## License
|
|
722
631
|
|
|
723
|
-
MIT © Jag Reehal. See LICENSE for details.
|
|
632
|
+
MIT © Jag Reehal. See [LICENSE](./LICENSE) for details.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ai-sdk-guardrails",
|
|
3
|
-
"version": "5.0.
|
|
3
|
+
"version": "5.0.1",
|
|
4
4
|
"description": "Input and output guardrails middleware for Vercel AI SDK.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai",
|
|
@@ -54,37 +54,37 @@
|
|
|
54
54
|
"type": "module",
|
|
55
55
|
"dependencies": {
|
|
56
56
|
"@ai-sdk/provider": "2.0.0",
|
|
57
|
-
"ai": "5.0.
|
|
57
|
+
"ai": "5.0.57",
|
|
58
58
|
"zod": "^4.1.11"
|
|
59
59
|
},
|
|
60
60
|
"devDependencies": {
|
|
61
|
-
"@ai-sdk/groq": "^2.0.
|
|
62
|
-
"@ai-sdk/mistral": "2.0.
|
|
63
|
-
"@ai-sdk/openai": "2.0.
|
|
61
|
+
"@ai-sdk/groq": "^2.0.22",
|
|
62
|
+
"@ai-sdk/mistral": "2.0.17",
|
|
63
|
+
"@ai-sdk/openai": "2.0.40",
|
|
64
64
|
"@arethetypeswrong/cli": "^0.18.2",
|
|
65
65
|
"@changesets/cli": "^2.29.7",
|
|
66
66
|
"@eslint/js": "^9.36.0",
|
|
67
67
|
"@total-typescript/ts-reset": "^0.6.1",
|
|
68
68
|
"@total-typescript/tsconfig": "^1.0.4",
|
|
69
69
|
"@types/eslint-config-prettier": "^6.11.3",
|
|
70
|
-
"@types/node": "^24.
|
|
71
|
-
"@typescript-eslint/eslint-plugin": "^8.
|
|
72
|
-
"@typescript-eslint/parser": "^8.
|
|
73
|
-
"ai-sdk-ollama": "^0.
|
|
70
|
+
"@types/node": "^24.6.0",
|
|
71
|
+
"@typescript-eslint/eslint-plugin": "^8.45.0",
|
|
72
|
+
"@typescript-eslint/parser": "^8.45.0",
|
|
73
|
+
"ai-sdk-ollama": "^0.10.0",
|
|
74
74
|
"autoevals": "^0.0.131",
|
|
75
75
|
"dotenv": "^17.2.2",
|
|
76
76
|
"eslint": "^9.36.0",
|
|
77
77
|
"eslint-config-prettier": "^10.1.8",
|
|
78
78
|
"eslint-plugin-unicorn": "^61.0.2",
|
|
79
79
|
"globals": "^16.4.0",
|
|
80
|
-
"mathjs": "^14.8.
|
|
81
|
-
"ollama": "^0.
|
|
82
|
-
"openai": "^5.23.
|
|
80
|
+
"mathjs": "^14.8.1",
|
|
81
|
+
"ollama": "^0.6.0",
|
|
82
|
+
"openai": "^5.23.1",
|
|
83
83
|
"prettier": "^3.6.2",
|
|
84
84
|
"tsup": "^8.5.0",
|
|
85
|
-
"tsx": "^4.20.
|
|
85
|
+
"tsx": "^4.20.6",
|
|
86
86
|
"typescript": "^5.9.2",
|
|
87
|
-
"typescript-eslint": "^8.
|
|
87
|
+
"typescript-eslint": "^8.45.0",
|
|
88
88
|
"vitest": "^3.2.4"
|
|
89
89
|
},
|
|
90
90
|
"scripts": {
|