ai-sdk-guardrails 3.0.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +585 -513
- package/package.json +34 -24
- package/dist/chunk-HHQ3CIFN.js +0 -12
- package/dist/chunk-LLCOPUS6.js +0 -159
- package/dist/errors-BTTWMQEI.js +0 -24
- package/dist/guardrails/input.cjs +0 -493
- package/dist/guardrails/input.d.cts +0 -36
- package/dist/guardrails/input.d.ts +0 -36
- package/dist/guardrails/input.js +0 -453
- package/dist/guardrails/output.cjs +0 -698
- package/dist/guardrails/output.d.cts +0 -46
- package/dist/guardrails/output.d.ts +0 -46
- package/dist/guardrails/output.js +0 -654
- package/dist/index.cjs +0 -815
- package/dist/index.d.cts +0 -272
- package/dist/index.d.ts +0 -272
- package/dist/index.js +0 -607
- package/dist/types-B9h_0Gyl.d.cts +0 -121
- package/dist/types-B9h_0Gyl.d.ts +0 -121
package/README.md
CHANGED
|
@@ -1,651 +1,723 @@
|
|
|
1
1
|
# AI SDK Guardrails
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
**Input and output validation for the Vercel AI SDK**
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Add safety checks and quality controls to your AI applications. Guard against prompt injection, prevent sensitive data leaks, and improve output reliability - all while keeping your existing AI SDK code unchanged.
|
|
6
|
+
|
|
7
|
+
**Now includes MCP (Model Context Protocol) security guardrails** to help protect against attacks when using AI tools.
|
|
8
|
+
|
|
9
|
+
[](https://www.npmjs.com/package/ai-sdk-guardrails)
|
|
10
|
+
[](https://www.npmjs.com/package/ai-sdk-guardrails)
|
|
11
|
+
[](https://bundlephobia.com/package/ai-sdk-guardrails)
|
|
12
|
+
[](./LICENSE)
|
|
13
|
+

|
|
6
14
|
|
|
7
15
|

|
|
8
16
|
|
|
9
|
-
##
|
|
17
|
+
## Why this matters
|
|
10
18
|
|
|
11
|
-
|
|
19
|
+
- **MCP**: Protect against prompt injection and data exfiltration when using MCP tools
|
|
20
|
+
- **Agent**: Have more reliable and secure agentic workflows
|
|
21
|
+
- **Tool security**: Protect against data exfiltration when using MCP tools
|
|
22
|
+
- **Save costs**: Block unnecessary requests before they hit your model
|
|
23
|
+
- **Improve safety**: Detect PII, block harmful content, prevent prompt injection
|
|
24
|
+
- **Better quality**: Enforce minimum response lengths, validate structure, auto-retry on failures
|
|
25
|
+
- **Easy integration**: Works as middleware with any AI SDK model
|
|
12
26
|
|
|
13
|
-
|
|
14
|
-
import { openai } from '@ai-sdk/openai';
|
|
15
|
-
import { generateText } from 'ai';
|
|
16
|
-
import {
|
|
17
|
-
wrapWithGuardrails,
|
|
18
|
-
defineInputGuardrail,
|
|
19
|
-
defineOutputGuardrail,
|
|
20
|
-
} from 'ai-sdk-guardrails';
|
|
27
|
+
## Common use cases
|
|
21
28
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
: { tripwireTriggered: false },
|
|
29
|
-
});
|
|
29
|
+
- Content moderation and safety filters
|
|
30
|
+
- PII detection for compliance
|
|
31
|
+
- Output quality requirements (length, format)
|
|
32
|
+
- Prompt injection prevention
|
|
33
|
+
- Tool usage validation
|
|
34
|
+
- Auto-retry on low-quality responses
|
|
30
35
|
|
|
31
|
-
|
|
32
|
-
name: 'quality-check',
|
|
33
|
-
execute: async ({ result }) =>
|
|
34
|
-
result.text.length < 10
|
|
35
|
-
? { tripwireTriggered: true, message: 'Response too short' }
|
|
36
|
-
: { tripwireTriggered: false },
|
|
37
|
-
});
|
|
36
|
+
## Secure AI in Under 60 Seconds
|
|
38
37
|
|
|
39
|
-
|
|
40
|
-
const guardedModel = wrapWithGuardrails(openai('gpt-4o'), {
|
|
41
|
-
inputGuardrails: [inputGuard],
|
|
42
|
-
outputGuardrails: [outputGuard],
|
|
43
|
-
});
|
|
38
|
+
**Step 1:** Install (10 seconds)
|
|
44
39
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
model: guardedModel,
|
|
48
|
-
prompt: 'A prompt that is definitely not too long.',
|
|
49
|
-
});
|
|
40
|
+
```bash
|
|
41
|
+
npm install ai-sdk-guardrails
|
|
50
42
|
```
|
|
51
43
|
|
|
52
|
-
|
|
44
|
+
**Step 2:** Import (15 seconds)
|
|
53
45
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
```mermaid
|
|
57
|
-
flowchart LR
|
|
58
|
-
A[User Input<br/>'hello'] --> B[AI Model] --> C[Response<br/>⚠️ Wastes resources<br/>😞 Often useless]
|
|
46
|
+
```ts
|
|
47
|
+
import { withGuardrails, piiDetector } from 'ai-sdk-guardrails';
|
|
59
48
|
```
|
|
60
49
|
|
|
61
|
-
|
|
50
|
+
**Step 3:** Wrap your model (30 seconds)
|
|
62
51
|
|
|
63
|
-
```
|
|
64
|
-
|
|
65
|
-
|
|
52
|
+
```ts
|
|
53
|
+
const safeModel = withGuardrails(yourModel, {
|
|
54
|
+
inputGuardrails: [piiDetector()],
|
|
55
|
+
});
|
|
66
56
|
```
|
|
67
57
|
|
|
68
|
-
|
|
58
|
+
**Result:** Your AI now automatically blocks PII, prevents prompt injection, and validates outputs. That's it. No architecture changes, no security team required.
|
|
69
59
|
|
|
70
|
-
|
|
71
|
-
flowchart LR
|
|
72
|
-
A[AI Response<br/>'Here's my SSN: 123-45-6789'] --> B[Output Guardrails] --> C[❌ BLOCKED<br/>🛡️ Privacy protected]
|
|
73
|
-
```
|
|
60
|
+
## TL;DR
|
|
74
61
|
|
|
75
|
-
|
|
62
|
+
Copy/paste minimal setup:
|
|
76
63
|
|
|
77
|
-
```
|
|
78
|
-
|
|
79
|
-
|
|
64
|
+
```ts
|
|
65
|
+
import { generateText } from 'ai';
|
|
66
|
+
import { openai } from '@ai-sdk/openai';
|
|
67
|
+
import {
|
|
68
|
+
withGuardrails,
|
|
69
|
+
piiDetector,
|
|
70
|
+
promptInjectionDetector,
|
|
71
|
+
minLengthRequirement,
|
|
72
|
+
mcpSecurityGuardrail,
|
|
73
|
+
} from 'ai-sdk-guardrails';
|
|
74
|
+
|
|
75
|
+
const model = withGuardrails(openai('gpt-4o'), {
|
|
76
|
+
inputGuardrails: [piiDetector(), promptInjectionDetector()],
|
|
77
|
+
outputGuardrails: [
|
|
78
|
+
minLengthRequirement(160),
|
|
79
|
+
mcpSecurityGuardrail({
|
|
80
|
+
maxContentSize: 51200, // 50KB limit
|
|
81
|
+
injectionThreshold: 0.7, // Configurable sensitivity
|
|
82
|
+
allowedDomains: ['api.company.com'], // Domain allowlist
|
|
83
|
+
}),
|
|
84
|
+
],
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
const { text } = await generateText({
|
|
88
|
+
model,
|
|
89
|
+
prompt: 'Write a friendly intro email.',
|
|
90
|
+
});
|
|
80
91
|
```
|
|
81
92
|
|
|
82
|
-
|
|
93
|
+
See runnable examples: [examples/README.md](./examples/README.md)
|
|
83
94
|
|
|
84
|
-
##
|
|
95
|
+
## Quickstart (30 seconds)
|
|
96
|
+
|
|
97
|
+
Install with your provider (OpenAI shown):
|
|
85
98
|
|
|
86
99
|
```bash
|
|
87
|
-
|
|
100
|
+
pnpm add ai-sdk-guardrails ai @ai-sdk/openai
|
|
101
|
+
# or: npm i ai-sdk-guardrails ai @ai-sdk/openai
|
|
102
|
+
# or: yarn add ai-sdk-guardrails ai @ai-sdk/openai
|
|
103
|
+
```
|
|
88
104
|
|
|
89
|
-
|
|
105
|
+
Wrap your model and keep using `generateText` as usual:
|
|
106
|
+
|
|
107
|
+
```ts
|
|
108
|
+
import { generateText } from 'ai';
|
|
109
|
+
import { openai } from '@ai-sdk/openai';
|
|
110
|
+
import { withGuardrails, piiDetector } from 'ai-sdk-guardrails';
|
|
90
111
|
|
|
91
|
-
|
|
112
|
+
const model = withGuardrails(openai('gpt-4o'), {
|
|
113
|
+
inputGuardrails: [piiDetector()],
|
|
114
|
+
});
|
|
92
115
|
|
|
93
|
-
|
|
116
|
+
const { text } = await generateText({
|
|
117
|
+
model,
|
|
118
|
+
prompt: 'Write a friendly intro email.',
|
|
119
|
+
});
|
|
120
|
+
```
|
|
94
121
|
|
|
95
|
-
|
|
122
|
+
## Contents
|
|
123
|
+
|
|
124
|
+
- Overview
|
|
125
|
+
- Concepts
|
|
126
|
+
- Installation
|
|
127
|
+
- Usage
|
|
128
|
+
- Define a guardrail
|
|
129
|
+
- Built-in helpers
|
|
130
|
+
- Streaming
|
|
131
|
+
- Auto Retry (utility and middleware)
|
|
132
|
+
- Error Handling
|
|
133
|
+
- API
|
|
134
|
+
- Examples
|
|
135
|
+
- Compatibility
|
|
136
|
+
- Architecture
|
|
137
|
+
- Contributing
|
|
138
|
+
|
|
139
|
+
## API Overview
|
|
140
|
+
|
|
141
|
+
### Primary Functions
|
|
142
|
+
|
|
143
|
+
- **`withGuardrails(model, config)`** - Main API for wrapping language models with guardrails
|
|
144
|
+
- **`createGuardrails(config)`** - Factory to create reusable guardrail configurations
|
|
145
|
+
- **`withAgentGuardrails(agentSettings, config)`** - Wrap AI SDK Agents with guardrails
|
|
146
|
+
|
|
147
|
+
### Migration from v3.x
|
|
148
|
+
|
|
149
|
+
- `wrapWithGuardrails` → `withGuardrails` (alias available, deprecated)
|
|
150
|
+
- `wrapAgentWithGuardrails` → `withAgentGuardrails` (alias available, deprecated)
|
|
151
|
+
- Error classes: `InputBlockedError` → `GuardrailsInputError`, `OutputBlockedError` → `GuardrailsOutputError`
|
|
152
|
+
|
|
153
|
+
```ts
|
|
154
|
+
// Before (v3.x - still works but deprecated)
|
|
155
|
+
import { wrapWithGuardrails, InputBlockedError } from 'ai-sdk-guardrails';
|
|
156
|
+
const model = wrapWithGuardrails(openai('gpt-4o'), { ... });
|
|
157
|
+
|
|
158
|
+
// After (v4.x - recommended)
|
|
159
|
+
import { withGuardrails, GuardrailsInputError } from 'ai-sdk-guardrails';
|
|
160
|
+
const model = withGuardrails(openai('gpt-4o'), { ... });
|
|
161
|
+
|
|
162
|
+
// Factory pattern (new in v4.x)
|
|
163
|
+
import { createGuardrails } from 'ai-sdk-guardrails';
|
|
164
|
+
const guards = createGuardrails({ ... });
|
|
165
|
+
const model = guards(openai('gpt-4o'));
|
|
96
166
|
```
|
|
97
167
|
|
|
98
|
-
##
|
|
168
|
+
## Concepts
|
|
99
169
|
|
|
100
|
-
|
|
170
|
+
- Input guardrails: Validate or block prompts to save cost and enforce rules before the call.
|
|
171
|
+
- Output guardrails: Check results for quality and safety. Block, replace, or retry as needed.
|
|
172
|
+
- Middleware: Guardrails wrap any model via AI SDK middleware. Your app code stays the same.
|
|
101
173
|
|
|
102
|
-
|
|
174
|
+
## Installation
|
|
103
175
|
|
|
104
|
-
|
|
105
|
-
|
|
176
|
+
See Quickstart for installation commands. Add providers you use as needed (e.g., `@ai-sdk/openai`, `@ai-sdk/mistral`).
|
|
177
|
+
|
|
178
|
+
## Usage
|
|
179
|
+
|
|
180
|
+
### Create custom guardrails
|
|
181
|
+
|
|
182
|
+
```ts
|
|
106
183
|
import { openai } from '@ai-sdk/openai';
|
|
107
184
|
import {
|
|
108
|
-
wrapWithInputGuardrails,
|
|
109
185
|
defineInputGuardrail,
|
|
186
|
+
defineOutputGuardrail,
|
|
187
|
+
withGuardrails,
|
|
110
188
|
} from 'ai-sdk-guardrails';
|
|
111
189
|
import { extractTextContent } from 'ai-sdk-guardrails/guardrails/input';
|
|
190
|
+
import { extractContent } from 'ai-sdk-guardrails/guardrails/output';
|
|
112
191
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
const foundWord = blockedWords.find((word) =>
|
|
121
|
-
prompt.toLowerCase().includes(word.toLowerCase()),
|
|
122
|
-
);
|
|
123
|
-
|
|
124
|
-
if (foundWord) {
|
|
125
|
-
return {
|
|
126
|
-
tripwireTriggered: true,
|
|
127
|
-
message: `Blocked keyword detected: ${foundWord}`,
|
|
128
|
-
severity: 'medium',
|
|
129
|
-
};
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
return { tripwireTriggered: false };
|
|
192
|
+
const businessHours = defineInputGuardrail({
|
|
193
|
+
name: 'business-hours',
|
|
194
|
+
execute: async (params) => {
|
|
195
|
+
const hr = new Date().getHours();
|
|
196
|
+
return hr >= 9 && hr <= 17
|
|
197
|
+
? { tripwireTriggered: false }
|
|
198
|
+
: { tripwireTriggered: true, message: 'Outside business hours' };
|
|
133
199
|
},
|
|
134
200
|
});
|
|
135
201
|
|
|
136
|
-
const
|
|
137
|
-
|
|
202
|
+
const minQuality = defineOutputGuardrail({
|
|
203
|
+
name: 'min-quality',
|
|
204
|
+
execute: async ({ result }) => {
|
|
205
|
+
const { text } = extractContent(result);
|
|
206
|
+
return text.length >= 80
|
|
207
|
+
? { tripwireTriggered: false }
|
|
208
|
+
: { tripwireTriggered: true, message: 'Response too short' };
|
|
209
|
+
},
|
|
138
210
|
});
|
|
139
211
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
model: optimizedModel,
|
|
144
|
-
prompt: 'hello', // ❌ Blocked - prevents unnecessary API call
|
|
145
|
-
});
|
|
146
|
-
} catch (error) {
|
|
147
|
-
console.log('Blocked request, saved money!');
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
// This generates valuable content
|
|
151
|
-
const goodResult = await generateText({
|
|
152
|
-
model: optimizedModel,
|
|
153
|
-
prompt: 'Write a product description for our new software', // ✅ This creates value
|
|
212
|
+
const model = withGuardrails(openai('gpt-4o'), {
|
|
213
|
+
inputGuardrails: [businessHours],
|
|
214
|
+
outputGuardrails: [minQuality],
|
|
154
215
|
});
|
|
155
216
|
```
|
|
156
217
|
|
|
157
|
-
###
|
|
218
|
+
### Built-in helpers
|
|
158
219
|
|
|
159
|
-
```
|
|
220
|
+
```ts
|
|
221
|
+
import { openai } from '@ai-sdk/openai';
|
|
160
222
|
import {
|
|
161
|
-
|
|
162
|
-
|
|
223
|
+
withGuardrails,
|
|
224
|
+
piiDetector,
|
|
225
|
+
blockedKeywords,
|
|
226
|
+
contentLengthLimit,
|
|
227
|
+
promptInjectionDetector,
|
|
228
|
+
sensitiveDataFilter,
|
|
229
|
+
minLengthRequirement,
|
|
230
|
+
confidenceThreshold,
|
|
231
|
+
mcpSecurityGuardrail,
|
|
232
|
+
mcpResponseSanitizer,
|
|
163
233
|
} from 'ai-sdk-guardrails';
|
|
164
|
-
import { extractContent } from 'ai-sdk-guardrails/guardrails/output';
|
|
165
234
|
|
|
166
|
-
const
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
)
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
tripwireTriggered: true,
|
|
185
|
-
message: 'Sensitive information detected in response',
|
|
186
|
-
severity: 'high',
|
|
187
|
-
};
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
return { tripwireTriggered: false };
|
|
191
|
-
},
|
|
235
|
+
const model = withGuardrails(openai('gpt-4o'), {
|
|
236
|
+
inputGuardrails: [
|
|
237
|
+
piiDetector(),
|
|
238
|
+
promptInjectionDetector({ threshold: 0.7 }),
|
|
239
|
+
blockedKeywords(['test', 'spam']),
|
|
240
|
+
contentLengthLimit(4000),
|
|
241
|
+
],
|
|
242
|
+
outputGuardrails: [
|
|
243
|
+
mcpSecurityGuardrail({
|
|
244
|
+
detectExfiltration: true,
|
|
245
|
+
scanEncodedContent: true,
|
|
246
|
+
allowedDomains: ['trusted-api.com'],
|
|
247
|
+
}),
|
|
248
|
+
mcpResponseSanitizer(),
|
|
249
|
+
sensitiveDataFilter(),
|
|
250
|
+
minLengthRequirement(160),
|
|
251
|
+
confidenceThreshold(0.6),
|
|
252
|
+
],
|
|
192
253
|
});
|
|
254
|
+
```
|
|
193
255
|
|
|
194
|
-
|
|
195
|
-
outputGuardrails: [qualityGuard],
|
|
196
|
-
onOutputBlocked: (results) => {
|
|
197
|
-
console.log('Prevented sensitive data leak:', results[0]?.message);
|
|
198
|
-
},
|
|
199
|
-
});
|
|
256
|
+
## Streaming
|
|
200
257
|
|
|
201
|
-
|
|
202
|
-
model: qualityModel,
|
|
203
|
-
prompt: 'Create a user profile example',
|
|
204
|
-
});
|
|
205
|
-
// Automatically blocks responses containing emails, phone numbers, or SSNs
|
|
206
|
-
```
|
|
258
|
+
Works out of the box. By default, guardrails run after the stream ends (buffer mode). For early blocking, enable progressive mode.
|
|
207
259
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
tripwireTriggered: true,
|
|
219
|
-
message:
|
|
220
|
-
'Requests are only permitted during business hours (9:00-17:00 UTC).',
|
|
221
|
-
severity: 'low',
|
|
222
|
-
};
|
|
223
|
-
}
|
|
224
|
-
return { tripwireTriggered: false };
|
|
225
|
-
},
|
|
260
|
+
```ts
|
|
261
|
+
import { streamText } from 'ai';
|
|
262
|
+
import { openai } from '@ai-sdk/openai';
|
|
263
|
+
import { withGuardrails, minLengthRequirement } from 'ai-sdk-guardrails';
|
|
264
|
+
|
|
265
|
+
const model = withGuardrails(openai('gpt-4o'), {
|
|
266
|
+
outputGuardrails: [minLengthRequirement(120)],
|
|
267
|
+
// Evaluate as tokens arrive; stop or replace early when blocked
|
|
268
|
+
streamMode: 'progressive',
|
|
269
|
+
replaceOnBlocked: true,
|
|
226
270
|
});
|
|
227
271
|
|
|
228
|
-
const
|
|
229
|
-
|
|
272
|
+
const { textStream } = await streamText({
|
|
273
|
+
model,
|
|
274
|
+
prompt: 'Tell me a short story about a robot.',
|
|
230
275
|
});
|
|
276
|
+
|
|
277
|
+
for await (const delta of textStream) process.stdout.write(delta);
|
|
231
278
|
```
|
|
232
279
|
|
|
233
|
-
|
|
280
|
+
## Auto Retry
|
|
234
281
|
|
|
235
|
-
|
|
282
|
+
Choose what fits your flow:
|
|
236
283
|
|
|
237
|
-
-
|
|
238
|
-
-
|
|
239
|
-
- 🎯 **Quality Improvement**: Automatically filter, flag, or retry low-quality or irrelevant model outputs.
|
|
240
|
-
- 🔄 **Streaming Support**: Works seamlessly with both streaming (streamText) and standard (generateText) API responses.
|
|
241
|
-
- 📊 **Observability Hooks**: Built-in callbacks (onInputBlocked, onOutputBlocked, etc.) for logging and monitoring.
|
|
242
|
-
- ⚙️ **Configurable Execution**: Run guardrails in parallel or sequentially and set custom timeouts.
|
|
243
|
-
- 🚀 **AI SDK Native**: Designed from the ground up to integrate cleanly with AI SDK middleware patterns.
|
|
284
|
+
- Standalone utility: Use `retry()` to wrap any generation function with your own validator and backoff.
|
|
285
|
+
- Middleware option: Add `retry` to output guardrails so retries run automatically when a check fails.
|
|
244
286
|
|
|
245
|
-
|
|
287
|
+
### Utility
|
|
246
288
|
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
| `wrapWithGuardrails()` | ⭐ **Recommended** - The easiest way to add both input and output guardrails. |
|
|
252
|
-
| `wrapWithInputGuardrails()` | Attaches input-only guardrails to a model. |
|
|
253
|
-
| `wrapWithOutputGuardrails()` | Attaches output-only guardrails to a model. |
|
|
254
|
-
| `InputBlockedError`, etc. | Custom, structured error types for easy try/catch handling. |
|
|
289
|
+
```ts
|
|
290
|
+
import { retry } from 'ai-sdk-guardrails';
|
|
291
|
+
import { generateText } from 'ai';
|
|
292
|
+
import { openai } from '@ai-sdk/openai';
|
|
255
293
|
|
|
256
|
-
|
|
294
|
+
const result = await retry({
|
|
295
|
+
generate: (params) => generateText({ model: openai('gpt-4o'), ...params }),
|
|
296
|
+
params: { prompt: 'Explain backpropagation in depth.' },
|
|
297
|
+
validate: (r) => ({
|
|
298
|
+
blocked: (r.text ?? '').length < 500,
|
|
299
|
+
message: 'Response too short',
|
|
300
|
+
}),
|
|
301
|
+
buildRetryParams: ({ lastParams }) => ({
|
|
302
|
+
...lastParams,
|
|
303
|
+
maxOutputTokens: Math.max(800, (lastParams.maxOutputTokens ?? 400) + 300),
|
|
304
|
+
}),
|
|
305
|
+
maxRetries: 2,
|
|
306
|
+
});
|
|
307
|
+
```
|
|
257
308
|
|
|
258
|
-
|
|
259
|
-
- 🧩 **Composable**: Multiple guardrails can be chained together and will run in your specified order (or in parallel).
|
|
260
|
-
- 🧾 **Type-Safe**: Full TypeScript support with contextual typing for guardrail inputs, outputs, and metadata.
|
|
261
|
-
- 🧪 **Sensible Defaults**: Get started quickly with zero-config default behaviors that can be easily overridden.
|
|
309
|
+
### Middleware
|
|
262
310
|
|
|
263
|
-
|
|
311
|
+
```ts
|
|
312
|
+
import { generateText } from 'ai';
|
|
313
|
+
import { openai } from '@ai-sdk/openai';
|
|
314
|
+
import { withGuardrails, defineOutputGuardrail } from 'ai-sdk-guardrails';
|
|
315
|
+
import { extractContent } from 'ai-sdk-guardrails/guardrails/output';
|
|
264
316
|
|
|
265
|
-
|
|
317
|
+
const minLengthGuardrail = defineOutputGuardrail<{ minChars: number }>({
|
|
318
|
+
name: 'min-output-length',
|
|
319
|
+
execute: async ({ result }) => {
|
|
320
|
+
const { text } = extractContent(result);
|
|
321
|
+
const minChars = text.length + 1;
|
|
322
|
+
return text.length < minChars
|
|
323
|
+
? {
|
|
324
|
+
tripwireTriggered: true,
|
|
325
|
+
severity: 'medium',
|
|
326
|
+
message: `Answer too short: ${text.length} < ${minChars}`,
|
|
327
|
+
metadata: { minChars },
|
|
328
|
+
}
|
|
329
|
+
: { tripwireTriggered: false };
|
|
330
|
+
},
|
|
331
|
+
});
|
|
266
332
|
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
Wrapper[wrapLanguageModel]
|
|
297
|
-
Generator[generateText/Object/Stream]
|
|
298
|
-
end
|
|
299
|
-
|
|
300
|
-
subgraph "External Services"
|
|
301
|
-
AI[AI Model Provider]
|
|
302
|
-
Log[Logging & Telemetry]
|
|
303
|
-
end
|
|
304
|
-
|
|
305
|
-
App --> Config
|
|
306
|
-
Config --> InputMW
|
|
307
|
-
InputMW --> Length
|
|
308
|
-
InputMW --> Spam
|
|
309
|
-
InputMW --> PII
|
|
310
|
-
InputMW --> Business
|
|
311
|
-
InputMW --> Custom1
|
|
312
|
-
|
|
313
|
-
InputMW -->|Valid Request| Wrapper
|
|
314
|
-
InputMW -->|Blocked Request| Log
|
|
315
|
-
|
|
316
|
-
Wrapper --> Generator
|
|
317
|
-
Generator --> AI
|
|
318
|
-
AI --> OutputMW
|
|
319
|
-
|
|
320
|
-
OutputMW --> Quality
|
|
321
|
-
OutputMW --> Sensitive
|
|
322
|
-
OutputMW --> Professional
|
|
323
|
-
OutputMW --> Factual
|
|
324
|
-
OutputMW --> Custom2
|
|
325
|
-
|
|
326
|
-
OutputMW -->|Clean Response| App
|
|
327
|
-
OutputMW -->|Quality Issues| Log
|
|
328
|
-
|
|
329
|
-
style InputMW fill:#e1f5fe
|
|
330
|
-
style OutputMW fill:#f3e5f5
|
|
331
|
-
style AI fill:#fff3e0
|
|
332
|
-
style App fill:#e8f5e8
|
|
333
|
-
```
|
|
333
|
+
const guarded = wrapWithOutputGuardrails(
|
|
334
|
+
openai('gpt-4o'),
|
|
335
|
+
[minLengthGuardrail],
|
|
336
|
+
{
|
|
337
|
+
replaceOnBlocked: false,
|
|
338
|
+
retry: {
|
|
339
|
+
maxRetries: 1,
|
|
340
|
+
buildRetryParams: ({ summary, lastParams }) => ({
|
|
341
|
+
...lastParams,
|
|
342
|
+
maxOutputTokens: Math.max(
|
|
343
|
+
800,
|
|
344
|
+
(lastParams.maxOutputTokens ?? 400) + 300,
|
|
345
|
+
),
|
|
346
|
+
prompt: [
|
|
347
|
+
...(Array.isArray(lastParams.prompt) ? lastParams.prompt : []),
|
|
348
|
+
{
|
|
349
|
+
role: 'user' as const,
|
|
350
|
+
content: [
|
|
351
|
+
{
|
|
352
|
+
type: 'text' as const,
|
|
353
|
+
text: `Note: The previous answer ${summary.blockedResults[0]?.message}. Provide a comprehensive, detailed answer with examples.`,
|
|
354
|
+
},
|
|
355
|
+
],
|
|
356
|
+
},
|
|
357
|
+
],
|
|
358
|
+
}),
|
|
359
|
+
},
|
|
360
|
+
},
|
|
361
|
+
);
|
|
334
362
|
|
|
335
|
-
|
|
363
|
+
const { text } = await generateText({
|
|
364
|
+
model: guarded,
|
|
365
|
+
prompt: 'Explain the significance of the Turing Test in AI history.',
|
|
366
|
+
});
|
|
367
|
+
```
|
|
336
368
|
|
|
337
|
-
|
|
369
|
+
Tip: Use backoff helpers if you need delays between retries: `exponentialBackoff`, `linearBackoff`, `fixedBackoff`, `jitteredExponentialBackoff`, or `backoffPresets`.
|
|
338
370
|
|
|
339
|
-
|
|
371
|
+
## Error Handling
|
|
340
372
|
|
|
341
|
-
|
|
373
|
+
Set `throwOnBlocked: true` to throw structured errors you can catch and turn into friendly messages.
|
|
342
374
|
|
|
343
|
-
```
|
|
344
|
-
|
|
345
|
-
name: 'user-rate-limit',
|
|
346
|
-
execute: async ({ metadata }) => {
|
|
347
|
-
const userId = metadata?.userId ?? 'anonymous';
|
|
348
|
-
const allowed = await checkRateLimit(userId); // Your rate-limiting logic
|
|
375
|
+
```ts
|
|
376
|
+
import { isGuardrailsError } from 'ai-sdk-guardrails';
|
|
349
377
|
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
}
|
|
357
|
-
|
|
378
|
+
try {
|
|
379
|
+
const { text } = await generateText({ model, prompt: '...' });
|
|
380
|
+
} catch (err) {
|
|
381
|
+
if (isGuardrailsError(err)) {
|
|
382
|
+
console.error('Guardrail blocked:', err.message);
|
|
383
|
+
// err.results gives you details per guardrail
|
|
384
|
+
} else {
|
|
385
|
+
console.error('Unexpected error:', err);
|
|
386
|
+
}
|
|
387
|
+
}
|
|
358
388
|
```
|
|
359
389
|
|
|
360
|
-
|
|
390
|
+
## Reusable Guardrails Factory
|
|
361
391
|
|
|
362
|
-
Use
|
|
392
|
+
Use `createGuardrails()` to create reusable guardrail configurations that can be applied to multiple models:
|
|
363
393
|
|
|
364
|
-
```
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
const isSafe = judgement.text.includes('YES');
|
|
375
|
-
return isSafe
|
|
376
|
-
? { tripwireTriggered: false }
|
|
377
|
-
: {
|
|
378
|
-
tripwireTriggered: true,
|
|
379
|
-
message: `Output failed LLM-as-judge quality check.`,
|
|
380
|
-
metadata: { originalText: result.text },
|
|
381
|
-
};
|
|
382
|
-
},
|
|
394
|
+
```ts
|
|
395
|
+
import { openai } from '@ai-sdk/openai';
|
|
396
|
+
import { anthropic } from '@ai-sdk/anthropic';
|
|
397
|
+
import { createGuardrails, defineInputGuardrail } from 'ai-sdk-guardrails';
|
|
398
|
+
|
|
399
|
+
// Create reusable guardrails configuration
|
|
400
|
+
const productionGuards = createGuardrails({
|
|
401
|
+
inputGuardrails: [piiDetector(), contentFilter()],
|
|
402
|
+
outputGuardrails: [qualityCheck(), minLength(100)],
|
|
403
|
+
throwOnBlocked: true,
|
|
383
404
|
});
|
|
384
|
-
```
|
|
385
405
|
|
|
386
|
-
|
|
406
|
+
// Apply to multiple models
|
|
407
|
+
const gpt4 = productionGuards(openai('gpt-4o'));
|
|
408
|
+
const claude = productionGuards(anthropic('claude-3-sonnet'));
|
|
387
409
|
|
|
388
|
-
|
|
389
|
-
|
|
410
|
+
// Compose multiple guardrail sets
|
|
411
|
+
const strictLimits = createGuardrails({ inputGuardrails: [maxLength(500)] });
|
|
412
|
+
const piiProtection = createGuardrails({ inputGuardrails: [piiDetector()] });
|
|
390
413
|
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
execute: async (context) => {
|
|
394
|
-
const { prompt } = extractTextContent(context);
|
|
395
|
-
|
|
396
|
-
// Length validation
|
|
397
|
-
if (prompt.length < 10) {
|
|
398
|
-
return {
|
|
399
|
-
tripwireTriggered: true,
|
|
400
|
-
message: 'Input too short - likely to produce low-value response',
|
|
401
|
-
severity: 'medium',
|
|
402
|
-
suggestion: 'Please provide more detailed input for better results',
|
|
403
|
-
};
|
|
404
|
-
}
|
|
405
|
-
|
|
406
|
-
if (prompt.length > 4000) {
|
|
407
|
-
return {
|
|
408
|
-
tripwireTriggered: true,
|
|
409
|
-
message: 'Input too long - may exceed token limits',
|
|
410
|
-
severity: 'high',
|
|
411
|
-
suggestion: 'Break your request into smaller, focused parts',
|
|
412
|
-
};
|
|
413
|
-
}
|
|
414
|
-
|
|
415
|
-
// Content quality checks
|
|
416
|
-
const spamPatterns = [
|
|
417
|
-
/^(.)\1{10,}$/, // Repeated characters
|
|
418
|
-
/^(test|hello|hi|hey)$/i, // Common spam words
|
|
419
|
-
];
|
|
420
|
-
|
|
421
|
-
const foundSpam = spamPatterns.find((pattern) => pattern.test(prompt));
|
|
422
|
-
if (foundSpam) {
|
|
423
|
-
return {
|
|
424
|
-
tripwireTriggered: true,
|
|
425
|
-
message: 'Low-quality input detected',
|
|
426
|
-
severity: 'high',
|
|
427
|
-
};
|
|
428
|
-
}
|
|
429
|
-
|
|
430
|
-
return { tripwireTriggered: false };
|
|
431
|
-
},
|
|
432
|
-
});
|
|
414
|
+
// Chain them together
|
|
415
|
+
const model = piiProtection(strictLimits(openai('gpt-4o')));
|
|
433
416
|
```
|
|
434
417
|
|
|
435
|
-
|
|
418
|
+
## MCP Security Guardrails
|
|
436
419
|
|
|
437
|
-
|
|
438
|
-
import { extractContent } from 'ai-sdk-guardrails/guardrails/output';
|
|
420
|
+
**Production-Ready**: Protect against prompt injection and data exfiltration attacks when using Model Context Protocol (MCP) tools. Based on research into the ["lethal trifecta" vulnerability](https://simonwillison.net/2025/Jun/16/the-lethal-trifecta/) that has affected major AI platforms.
|
|
439
421
|
|
|
440
|
-
|
|
441
|
-
name: 'professional-quality-control',
|
|
442
|
-
execute: async (context) => {
|
|
443
|
-
const { text } = extractContent(context.result);
|
|
444
|
-
|
|
445
|
-
const qualityIssues = [];
|
|
446
|
-
|
|
447
|
-
// Check for unprofessional language
|
|
448
|
-
const unprofessionalTerms = ['lol', 'wtf', 'omg', 'ur', 'u r'];
|
|
449
|
-
const hasUnprofessional = unprofessionalTerms.some((term) =>
|
|
450
|
-
text.toLowerCase().includes(term),
|
|
451
|
-
);
|
|
452
|
-
|
|
453
|
-
if (hasUnprofessional) {
|
|
454
|
-
qualityIssues.push('Contains unprofessional language');
|
|
455
|
-
}
|
|
456
|
-
|
|
457
|
-
// Check for placeholder text
|
|
458
|
-
const placeholders = ['[insert', '[add', '[your', 'TODO:', 'FIXME:'];
|
|
459
|
-
const hasPlaceholders = placeholders.some((placeholder) =>
|
|
460
|
-
text.includes(placeholder),
|
|
461
|
-
);
|
|
462
|
-
|
|
463
|
-
if (hasPlaceholders) {
|
|
464
|
-
qualityIssues.push('Contains placeholder text - incomplete response');
|
|
465
|
-
}
|
|
466
|
-
|
|
467
|
-
// Check for excessive repetition
|
|
468
|
-
const sentences = text.split(/[.!?]+/).filter((s) => s.trim());
|
|
469
|
-
const uniqueSentences = new Set(
|
|
470
|
-
sentences.map((s) => s.trim().toLowerCase()),
|
|
471
|
-
);
|
|
472
|
-
const repetitionRatio = uniqueSentences.size / sentences.length;
|
|
473
|
-
|
|
474
|
-
if (sentences.length > 3 && repetitionRatio < 0.6) {
|
|
475
|
-
qualityIssues.push('Excessive repetition detected');
|
|
476
|
-
}
|
|
477
|
-
|
|
478
|
-
if (qualityIssues.length > 0) {
|
|
479
|
-
return {
|
|
480
|
-
tripwireTriggered: true,
|
|
481
|
-
message: `Quality issues found: ${qualityIssues.join(', ')}`,
|
|
482
|
-
severity: 'medium',
|
|
483
|
-
suggestion: 'Request a more professional, complete response',
|
|
484
|
-
metadata: {
|
|
485
|
-
issues: qualityIssues,
|
|
486
|
-
quality_score: repetitionRatio,
|
|
487
|
-
},
|
|
488
|
-
};
|
|
489
|
-
}
|
|
422
|
+
### The Problem
|
|
490
423
|
|
|
491
|
-
|
|
492
|
-
},
|
|
493
|
-
});
|
|
494
|
-
```
|
|
424
|
+
AI agents with MCP tools can be vulnerable when they have:
|
|
495
425
|
|
|
496
|
-
|
|
426
|
+
1. **Access to private data** (through tools)
|
|
427
|
+
2. **Process untrusted content** (from tool responses)
|
|
428
|
+
3. **Can communicate externally** (make web requests)
|
|
497
429
|
|
|
498
|
-
|
|
430
|
+
Malicious tool responses can contain hidden instructions that trick the AI into exfiltrating sensitive data.
|
|
499
431
|
|
|
500
|
-
|
|
501
|
-
import { streamText } from 'ai';
|
|
432
|
+
### Production-Ready Solution
|
|
502
433
|
|
|
503
|
-
|
|
504
|
-
outputGuardrails: [qualityJudge],
|
|
505
|
-
});
|
|
434
|
+
Full configurability with sensible defaults for immediate deployment:
|
|
506
435
|
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
436
|
+
```ts
|
|
437
|
+
import {
|
|
438
|
+
withGuardrails,
|
|
439
|
+
promptInjectionDetector,
|
|
440
|
+
mcpSecurityGuardrail,
|
|
441
|
+
mcpResponseSanitizer,
|
|
442
|
+
toolEgressPolicy,
|
|
443
|
+
} from 'ai-sdk-guardrails';
|
|
444
|
+
|
|
445
|
+
// Conservative production setup (high security)
|
|
446
|
+
const secureModel = withGuardrails(openai('gpt-4o'), {
|
|
447
|
+
inputGuardrails: [
|
|
448
|
+
promptInjectionDetector({ threshold: 0.6, includeExamples: true }),
|
|
449
|
+
],
|
|
450
|
+
outputGuardrails: [
|
|
451
|
+
mcpSecurityGuardrail({
|
|
452
|
+
injectionThreshold: 0.5, // Lower = more sensitive
|
|
453
|
+
maxSuspiciousUrls: 0, // Zero tolerance
|
|
454
|
+
maxContentSize: 25600, // 25KB limit for performance
|
|
455
|
+
minEncodedLength: 15, // Detect shorter encoded attacks
|
|
456
|
+
encodedInjectionThreshold: 0.2, // Combined encoded + injection threshold
|
|
457
|
+
highRiskThreshold: 0.3, // High-risk cascade blocking
|
|
458
|
+
authorityThreshold: 0.5, // Authority manipulation detection
|
|
459
|
+
allowedDomains: ['api.company.com', 'trusted-partner.com'],
|
|
460
|
+
customSuspiciousDomains: ['evil.com', 'malicious.org'],
|
|
461
|
+
blockCascadingCalls: true,
|
|
462
|
+
scanEncodedContent: true,
|
|
463
|
+
detectExfiltration: true,
|
|
464
|
+
}),
|
|
465
|
+
mcpResponseSanitizer(), // Clean malicious content vs blocking
|
|
466
|
+
toolEgressPolicy({
|
|
467
|
+
allowedHosts: ['api.company.com', 'trusted-partner.com'],
|
|
468
|
+
blockedHosts: ['webhook.site', 'requestcatcher.com', 'ngrok.io'],
|
|
469
|
+
scanForUrls: true,
|
|
470
|
+
}),
|
|
471
|
+
],
|
|
510
472
|
});
|
|
473
|
+
```
|
|
511
474
|
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
475
|
+
### Environment & Role-Based Configuration
|
|
476
|
+
|
|
477
|
+
```ts
|
|
478
|
+
// Different security profiles for different environments
|
|
479
|
+
function getSecurityConfig(env: 'production' | 'staging' | 'development') {
|
|
480
|
+
const configs = {
|
|
481
|
+
production: {
|
|
482
|
+
injectionThreshold: 0.5, // High security
|
|
483
|
+
maxContentSize: 25600, // 25KB limit
|
|
484
|
+
authorityThreshold: 0.5, // Very sensitive
|
|
485
|
+
},
|
|
486
|
+
staging: {
|
|
487
|
+
injectionThreshold: 0.7, // Balanced security
|
|
488
|
+
maxContentSize: 51200, // 50KB default
|
|
489
|
+
authorityThreshold: 0.7, // Standard sensitivity
|
|
490
|
+
},
|
|
491
|
+
development: {
|
|
492
|
+
injectionThreshold: 0.8, // Lower security, better performance
|
|
493
|
+
maxContentSize: 102400, // 100KB for testing
|
|
494
|
+
authorityThreshold: 0.8, // Less restrictive
|
|
495
|
+
},
|
|
496
|
+
};
|
|
497
|
+
return configs[env];
|
|
515
498
|
}
|
|
516
499
|
|
|
517
|
-
|
|
500
|
+
const productionModel = withGuardrails(openai('gpt-4o'), {
|
|
501
|
+
outputGuardrails: [mcpSecurityGuardrail(getSecurityConfig('production'))],
|
|
502
|
+
});
|
|
518
503
|
```
|
|
519
504
|
|
|
520
|
-
|
|
505
|
+
### Attack Vectors Prevented
|
|
521
506
|
|
|
522
|
-
|
|
507
|
+
✅ **Direct prompt injection** - "System: ignore all previous instructions"
|
|
508
|
+
✅ **Tool response poisoning** - Malicious content in MCP tool responses
|
|
509
|
+
✅ **Data exfiltration** - URLs constructed to steal sensitive data
|
|
510
|
+
✅ **Encoded attacks** - Base64/hex hidden malicious instructions
|
|
511
|
+
✅ **Cascading exploits** - Tool responses triggering additional dangerous calls
|
|
512
|
+
✅ **Context poisoning** - Attempts to modify AI behavior mid-conversation
|
|
523
513
|
|
|
524
|
-
|
|
525
|
-
import { generateText } from 'ai';
|
|
526
|
-
import { isGuardrailsError } from 'ai-sdk-guardrails';
|
|
514
|
+
### Secure MCP Agent Example
|
|
527
515
|
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
516
|
+
```ts
|
|
517
|
+
import { withAgentGuardrails } from 'ai-sdk-guardrails';
|
|
518
|
+
|
|
519
|
+
const secureAgent = withAgentGuardrails(
|
|
520
|
+
{
|
|
521
|
+
model: openai('gpt-4o'),
|
|
522
|
+
tools: { file_search, api_call, database_query },
|
|
523
|
+
system: 'You are a secure assistant. Always validate tool responses.',
|
|
524
|
+
},
|
|
525
|
+
{
|
|
526
|
+
inputGuardrails: [promptInjectionDetector()],
|
|
527
|
+
outputGuardrails: [
|
|
528
|
+
mcpSecurityGuardrail({
|
|
529
|
+
detectExfiltration: true,
|
|
530
|
+
allowedDomains: ['trusted-api.com'],
|
|
531
|
+
}),
|
|
532
|
+
mcpResponseSanitizer(),
|
|
533
|
+
],
|
|
534
|
+
toolGuardrails: [
|
|
535
|
+
toolEgressPolicy({
|
|
536
|
+
allowedHosts: ['trusted-api.com'],
|
|
537
|
+
scanForUrls: true,
|
|
538
|
+
}),
|
|
539
|
+
],
|
|
540
|
+
},
|
|
541
|
+
);
|
|
543
542
|
```
|
|
544
543
|
|
|
545
|
-
###
|
|
544
|
+
### Configuration Options
|
|
546
545
|
|
|
547
|
-
|
|
546
|
+
All security parameters are fully configurable with sensible defaults:
|
|
548
547
|
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
548
|
+
| Option | Default | Description |
|
|
549
|
+
| --------------------------- | ------- | ------------------------------------------------ |
|
|
550
|
+
| `injectionThreshold` | 0.7 | Prompt injection confidence threshold (0-1) |
|
|
551
|
+
| `maxSuspiciousUrls` | 0 | Max allowed suspicious URLs (0 = zero tolerance) |
|
|
552
|
+
| `maxContentSize` | 51200 | Max content size in bytes (50KB default) |
|
|
553
|
+
| `minEncodedLength` | 20 | Min encoded content length to analyze |
|
|
554
|
+
| `encodedInjectionThreshold` | 0.3 | Combined encoded + injection threshold |
|
|
555
|
+
| `authorityThreshold` | 0.7 | Authority manipulation detection sensitivity |
|
|
556
|
+
| `allowedDomains` | [] | Allowed domains for URL construction |
|
|
557
|
+
| `customSuspiciousDomains` | [] | Additional suspicious domain patterns |
|
|
552
558
|
|
|
553
|
-
|
|
554
|
-
case 'content-length-limit':
|
|
555
|
-
return 'Your message is too long. Please keep it under 500 characters for the best response.';
|
|
559
|
+
### Performance & Security Balance
|
|
556
560
|
|
|
557
|
-
|
|
558
|
-
|
|
561
|
+
- **High Security**: Lower thresholds, stricter limits, comprehensive scanning
|
|
562
|
+
- **Balanced**: Default settings, good for most production use cases
|
|
563
|
+
- **High Performance**: Higher thresholds, larger limits, selective scanning
|
|
559
564
|
|
|
560
|
-
|
|
561
|
-
return "You're sending requests too quickly. Please wait a moment before trying again.";
|
|
565
|
+
See complete examples:
|
|
562
566
|
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
);
|
|
568
|
-
}
|
|
569
|
-
}
|
|
570
|
-
```
|
|
567
|
+
- [Production MCP Configuration](./examples/44-production-mcp-config.ts) - **New!**
|
|
568
|
+
- [MCP Security Test Suite](./examples/41-mcp-security-test.ts)
|
|
569
|
+
- [Enhanced Security Testing](./examples/43-enhanced-mcp-security-test.ts)
|
|
570
|
+
- [Vulnerability Proof of Concept](./examples/42-mcp-vulnerability-proof.ts)
|
|
571
571
|
|
|
572
|
-
##
|
|
572
|
+
## Agent Support
|
|
573
573
|
|
|
574
|
-
|
|
574
|
+
Guardrails work with AI SDK Agents for multi-step agentic workflows:
|
|
575
575
|
|
|
576
|
-
```
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
576
|
+
```ts
|
|
577
|
+
import { openai } from '@ai-sdk/openai';
|
|
578
|
+
import { withAgentGuardrails, defineOutputGuardrail } from 'ai-sdk-guardrails';
|
|
579
|
+
import { tool } from 'ai';
|
|
580
|
+
import { z } from 'zod';
|
|
581
|
+
|
|
582
|
+
// Define tools for the agent
|
|
583
|
+
const searchTool = tool({
|
|
584
|
+
description: 'Search for information',
|
|
585
|
+
inputSchema: z.object({ query: z.string() }),
|
|
586
|
+
execute: async ({ query }) => `Results for: ${query}`,
|
|
587
|
+
});
|
|
588
|
+
|
|
589
|
+
// Create agent with guardrails
|
|
590
|
+
const agent = withAgentGuardrails(
|
|
591
|
+
{
|
|
592
|
+
model: openai('gpt-4o'),
|
|
593
|
+
tools: { search: searchTool },
|
|
594
|
+
system: 'You are a helpful research assistant.',
|
|
584
595
|
},
|
|
585
|
-
|
|
586
|
-
|
|
596
|
+
{
|
|
597
|
+
outputGuardrails: [
|
|
598
|
+
defineOutputGuardrail({
|
|
599
|
+
name: 'tool-usage-required',
|
|
600
|
+
description: 'Ensures agent uses search tools',
|
|
601
|
+
execute: async (params) => {
|
|
602
|
+
const hasToolCall = params.result.steps?.some(
|
|
603
|
+
(step) => step.type === 'tool-call',
|
|
604
|
+
);
|
|
605
|
+
|
|
606
|
+
return {
|
|
607
|
+
tripwireTriggered: !hasToolCall,
|
|
608
|
+
message: hasToolCall
|
|
609
|
+
? 'Tool usage validated'
|
|
610
|
+
: 'Must use search tools for research',
|
|
611
|
+
severity: 'high',
|
|
612
|
+
};
|
|
613
|
+
},
|
|
614
|
+
}),
|
|
615
|
+
],
|
|
616
|
+
throwOnBlocked: true,
|
|
587
617
|
},
|
|
588
|
-
|
|
618
|
+
);
|
|
589
619
|
|
|
590
|
-
// Use
|
|
591
|
-
const
|
|
592
|
-
|
|
593
|
-
prompt: 'Write a professional email response',
|
|
620
|
+
// Use the guarded agent
|
|
621
|
+
const result = await agent.generate({
|
|
622
|
+
prompt: 'Research the latest AI developments',
|
|
594
623
|
});
|
|
624
|
+
```
|
|
595
625
|
|
|
596
|
-
|
|
597
|
-
model: productionModel,
|
|
598
|
-
prompt: 'Create a user profile',
|
|
599
|
-
schema: userProfileSchema,
|
|
600
|
-
});
|
|
626
|
+
## API
|
|
601
627
|
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
628
|
+
| Export | Description |
|
|
629
|
+
| ----------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- |
|
|
630
|
+
| `defineInputGuardrail`, `defineOutputGuardrail` | Create guardrails with clear messages, severity, and metadata. |
|
|
631
|
+
| `withGuardrails`, `createGuardrails`, `withAgentGuardrails` | Attach guardrails to AI SDK models and agents via middleware. |
|
|
632
|
+
| `executeInputGuardrails`, `executeOutputGuardrails` | Run guardrails programmatically (outside middleware) and get structured results. |
|
|
633
|
+
| `retry`, `retryHelpers` | Standalone auto-retry utilities with validation and backoff. |
|
|
634
|
+
| `GuardrailsError`, `GuardrailsInputError`, `GuardrailsOutputError`, `isGuardrailsError`, `extractErrorInfo` | Structured errors and helpers for robust handling. |
|
|
635
|
+
| `exponentialBackoff`, `linearBackoff`, `fixedBackoff`, `jitteredExponentialBackoff`, `backoffPresets` | Backoff strategies to control retry pacing. |
|
|
636
|
+
|
|
637
|
+
See source for built-in helpers:
|
|
638
|
+
|
|
639
|
+
- Input helpers: `./src/guardrails/input.ts`
|
|
640
|
+
- Output helpers: `./src/guardrails/output.ts`
|
|
607
641
|
|
|
608
642
|
## Examples
|
|
609
643
|
|
|
610
|
-
|
|
644
|
+
Browse runnable examples for streaming, compliance, safety, and more:
|
|
611
645
|
|
|
612
|
-
|
|
646
|
+
- Index and commands: [examples/README.md](./examples/README.md)
|
|
613
647
|
|
|
614
|
-
|
|
615
|
-
- **[Basic Guardrails](examples/basic-guardrails.ts)** - Foundation patterns for input/output validation
|
|
616
|
-
- **[Business Logic](examples/business-logic.ts)** - Custom business rules, work hours, and professional standards
|
|
617
|
-
- **[LLM-as-Judge](examples/llm-as-judge.ts)** - AI-powered quality evaluation and scoring
|
|
648
|
+
Quick starts
|
|
618
649
|
|
|
619
|
-
|
|
650
|
+
| Example | Description | File |
|
|
651
|
+
| -------------------------- | ------------------------------- | --------------------------------------------------------------------------------- |
|
|
652
|
+
| Simple combined protection | Minimal input and output setup | [07a-simple-combined-protection.ts](./examples/07a-simple-combined-protection.ts) |
|
|
653
|
+
| Auto retry on output | Retry until output meets a rule | [32-auto-retry-output.ts](./examples/32-auto-retry-output.ts) |
|
|
654
|
+
| LLM judge auto-retry | Judge feedback drives retry | [33-judge-auto-retry.ts](./examples/33-judge-auto-retry.ts) |
|
|
655
|
+
| Expected tool use retry | Enforce/guide tool usage | [34-expected-tool-use-retry.ts](./examples/34-expected-tool-use-retry.ts) |
|
|
656
|
+
| Weather assistant | End-to-end input/output + retry | [33-blog-post-weather-assistant.ts](./examples/33-blog-post-weather-assistant.ts) |
|
|
620
657
|
|
|
621
|
-
|
|
622
|
-
- **[Streaming Guardrails](examples/streaming-guardrails.ts)** - Real-time quality monitoring
|
|
623
|
-
- **[Rate Limiting](examples/rate-limit-guardrail.ts)** - Smart rate limiting that prevents resource overuse
|
|
624
|
-
- **[Autoevals Integration](examples/autoevals-guardrails.ts)** - Advanced AI-powered evaluation
|
|
658
|
+
Input safety
|
|
625
659
|
|
|
626
|
-
|
|
660
|
+
| Example | Description | File |
|
|
661
|
+
| ------------------ | ----------------------------------- | --------------------------------------------------------------- |
|
|
662
|
+
| Input length limit | Enforce max input length | [01-input-length-limit.ts](./examples/01-input-length-limit.ts) |
|
|
663
|
+
| Blocked keywords | Block specific terms | [02-blocked-keywords.ts](./examples/02-blocked-keywords.ts) |
|
|
664
|
+
| PII detection | Detect PII before calling the model | [03-pii-detection.ts](./examples/03-pii-detection.ts) |
|
|
665
|
+
| Rate limiting | Simple per-user rate limit | [13-rate-limiting.ts](./examples/13-rate-limiting.ts) |
|
|
627
666
|
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
667
|
+
Output safety
|
|
668
|
+
|
|
669
|
+
| Example | Description | File |
|
|
670
|
+
| ----------------------- | ----------------------------------- | ------------------------------------------------------------------------- |
|
|
671
|
+
| Output length check | Require min/max output length | [04-output-length-check.ts](./examples/04-output-length-check.ts) |
|
|
672
|
+
| Sensitive output filter | Filter secrets and PII in responses | [05-sensitive-output-filter.ts](./examples/05-sensitive-output-filter.ts) |
|
|
673
|
+
| Hallucination detection | Flag uncertain factual claims | [19-hallucination-detection.ts](./examples/19-hallucination-detection.ts) |
|
|
674
|
+
|
|
675
|
+
Streaming
|
|
676
|
+
|
|
677
|
+
| Example | Description | File |
|
|
678
|
+
| ----------------- | ---------------------------------- | --------------------------------------------------------------------------------- |
|
|
679
|
+
| Streaming limits | Apply limits in buffered streaming | [11-streaming-limits.ts](./examples/11-streaming-limits.ts) |
|
|
680
|
+
| Streaming quality | Quality checks with streaming | [12-streaming-quality.ts](./examples/12-streaming-quality.ts) |
|
|
681
|
+
| Early termination | Stop streams early when blocked | [28-streaming-early-termination.ts](./examples/28-streaming-early-termination.ts) |
|
|
682
|
+
|
|
683
|
+
Advanced
|
|
684
|
+
|
|
685
|
+
| Example | Description | File |
|
|
686
|
+
| -------------------------- | ----------------------------- | ------------------------------------------------------------------------------- |
|
|
687
|
+
| Simple quality judge | Cheaper model judges quality | [15a-simple-quality-judge.ts](./examples/15a-simple-quality-judge.ts) |
|
|
688
|
+
| Secret leakage scan | Scan responses for secrets | [18-secret-leakage-scan.ts](./examples/18-secret-leakage-scan.ts) |
|
|
689
|
+
| SQL code safety | Basic SQL safety checks | [24-sql-code-safety.ts](./examples/24-sql-code-safety.ts) |
|
|
690
|
+
| Role hierarchy enforcement | Enforce role rules in prompts | [23-role-hierarchy-enforcement.ts](./examples/23-role-hierarchy-enforcement.ts) |
|
|
691
|
+
|
|
692
|
+
## Compatibility
|
|
693
|
+
|
|
694
|
+
- Runtime: Node.js 18+ recommended
|
|
695
|
+
- AI SDK: Compatible with AI SDK 5 (`ai@^5`); wraps any model
|
|
696
|
+
- For `generateObject`: for strict object validation, run `executeOutputGuardrails()` after generation
|
|
697
|
+
|
|
698
|
+
## Architecture
|
|
699
|
+
|
|
700
|
+
```mermaid
|
|
701
|
+
flowchart LR
|
|
702
|
+
A[Input] --> B[Input Guardrails]
|
|
703
|
+
B -->|Valid| C[AI Model]
|
|
704
|
+
B -->|Blocked| X[No API Call]
|
|
705
|
+
C --> D[Output Guardrails]
|
|
706
|
+
D -->|Clean| E[Response]
|
|
707
|
+
D -->|Blocked| R[Retry/Replace/Throw]
|
|
641
708
|
```
|
|
642
709
|
|
|
643
|
-
|
|
710
|
+
### Design principles
|
|
711
|
+
|
|
712
|
+
- Helper-first: simple, chainable APIs with great DX
|
|
713
|
+
- Composable: run multiple guardrails in any order
|
|
714
|
+
- Type-safe: rich TypeScript types and inference
|
|
715
|
+
- Sensible defaults: zero-config to start, full control when you need it
|
|
644
716
|
|
|
645
|
-
##
|
|
717
|
+
## Contributing
|
|
646
718
|
|
|
647
|
-
|
|
719
|
+
Issues and PRs are welcome.
|
|
648
720
|
|
|
649
|
-
##
|
|
721
|
+
## License
|
|
650
722
|
|
|
651
|
-
MIT ©
|
|
723
|
+
MIT © Jag Reehal. See LICENSE for details.
|