llm_guardrail 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +225 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1 +1,225 @@
|
|
|
1
|
-
#
|
|
1
|
+
# 🛡️ LLM Guardrails
|
|
2
|
+
|
|
3
|
+
A lightweight, low-latency ML-powered guardrail to stop prompt injection attacks before they reach your LLM. Protect your AI applications with minimal performance overhead.
|
|
4
|
+
|
|
5
|
+
[](https://www.npmjs.com/package/llm_guardrail)
|
|
6
|
+
[](https://opensource.org/licenses/ISC)
|
|
7
|
+
|
|
8
|
+
## 🚀 Features
|
|
9
|
+
|
|
10
|
+
- **🔒 Security First**: Detects and blocks prompt injection attacks using machine learning
|
|
11
|
+
- **⚡ Low Latency**: Optimized for production use with minimal performance impact
|
|
12
|
+
- **🎯 High Accuracy**: ML-powered detection with configurable confidence thresholds
|
|
13
|
+
- **📦 Lightweight**: No external API calls - everything runs locally
|
|
14
|
+
- **🔧 Easy Integration**: Simple API that works with any LLM framework
|
|
15
|
+
- **🎛️ Flexible**: Returns detailed prediction data for custom handling
|
|
16
|
+
|
|
17
|
+
## 📥 Installation
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
npm install llm_guardrail
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## 🛠️ Quick Start
|
|
24
|
+
|
|
25
|
+
### ES Modules
|
|
26
|
+
|
|
27
|
+
```javascript
|
|
28
|
+
import { check } from "llm_guardrail";
|
|
29
|
+
|
|
30
|
+
// Check a prompt for injection attempts
|
|
31
|
+
const result = await check("Tell me about cats");
|
|
32
|
+
|
|
33
|
+
if (result.allowed) {
|
|
34
|
+
console.log("✅ Safe prompt - proceed to LLM");
|
|
35
|
+
// Send to your LLM
|
|
36
|
+
} else {
|
|
37
|
+
console.log("🚫 Potential injection detected!");
|
|
38
|
+
console.log(`Confidence: ${(result.confidence * 100).toFixed(2)}%`);
|
|
39
|
+
}
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### CommonJS
|
|
43
|
+
|
|
44
|
+
```javascript
|
|
45
|
+
const { check } = require("llm_guardrail");
|
|
46
|
+
|
|
47
|
+
async function validatePrompt(userInput) {
|
|
48
|
+
try {
|
|
49
|
+
const result = await check(userInput);
|
|
50
|
+
return result.allowed;
|
|
51
|
+
} catch (error) {
|
|
52
|
+
console.error("Guardrail check failed:", error);
|
|
53
|
+
return false; // Fail closed for security
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## 📊 API Reference
|
|
59
|
+
|
|
60
|
+
### `check(prompt)`
|
|
61
|
+
|
|
62
|
+
Analyzes a prompt for potential injection attacks.
|
|
63
|
+
|
|
64
|
+
**Parameters:**
|
|
65
|
+
|
|
66
|
+
- `prompt` (string): The user input to analyze
|
|
67
|
+
|
|
68
|
+
**Returns:** Promise resolving to an object with:
|
|
69
|
+
|
|
70
|
+
```javascript
|
|
71
|
+
{
|
|
72
|
+
allowed: boolean, // true if safe, false if potential injection
|
|
73
|
+
injective: number, // 0 = safe, 1 = injection (same as prediction)
|
|
74
|
+
prediction: number, // 0 = safe, 1 = injection
|
|
75
|
+
confidence: number, // Confidence score for injection (0-1)
|
|
76
|
+
probabilities: {
|
|
77
|
+
safe: number, // Probability of being safe (0-1)
|
|
78
|
+
injection: number // Probability of being injection (0-1)
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## 🎯 Usage Examples
|
|
84
|
+
|
|
85
|
+
### Basic Integration
|
|
86
|
+
|
|
87
|
+
```javascript
|
|
88
|
+
import { check } from "llm_guardrail";
|
|
89
|
+
import { openai } from "your-llm-client";
|
|
90
|
+
|
|
91
|
+
async function secureChat(userMessage) {
|
|
92
|
+
// Check for prompt injection
|
|
93
|
+
const guardResult = await check(userMessage);
|
|
94
|
+
|
|
95
|
+
if (!guardResult.allowed) {
|
|
96
|
+
return {
|
|
97
|
+
error: "Your message appears to contain potentially harmful content.",
|
|
98
|
+
confidence: guardResult.confidence,
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Safe to proceed
|
|
103
|
+
const response = await openai.chat.completions.create({
|
|
104
|
+
model: "gpt-4",
|
|
105
|
+
messages: [{ role: "user", content: userMessage }],
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
return response;
|
|
109
|
+
}
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Custom Confidence Threshold
|
|
113
|
+
|
|
114
|
+
```javascript
|
|
115
|
+
import { check } from "llm_guardrail";
|
|
116
|
+
|
|
117
|
+
async function smartFilter(prompt, strictMode = false) {
|
|
118
|
+
const result = await check(prompt);
|
|
119
|
+
|
|
120
|
+
// Adjust threshold based on your risk tolerance
|
|
121
|
+
const threshold = strictMode ? 0.3 : 0.7;
|
|
122
|
+
|
|
123
|
+
if (result.confidence > threshold) {
|
|
124
|
+
console.log(
|
|
125
|
+
`🚨 High-confidence injection detected: ${(result.confidence * 100).toFixed(1)}%`,
|
|
126
|
+
);
|
|
127
|
+
return false;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
return true;
|
|
131
|
+
}
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### Detailed Response Handling
|
|
135
|
+
|
|
136
|
+
```javascript
|
|
137
|
+
import { check } from "llm_guardrail";
|
|
138
|
+
|
|
139
|
+
async function analyzePrompt(userInput) {
|
|
140
|
+
const result = await check(userInput);
|
|
141
|
+
|
|
142
|
+
console.log("📋 Guardrail Analysis:");
|
|
143
|
+
console.log(
|
|
144
|
+
` Safe Probability: ${(result.probabilities.safe * 100).toFixed(2)}%`,
|
|
145
|
+
);
|
|
146
|
+
console.log(
|
|
147
|
+
` Injection Probability: ${(result.probabilities.injection * 100).toFixed(2)}%`,
|
|
148
|
+
);
|
|
149
|
+
console.log(` Recommendation: ${result.allowed ? "✅ Allow" : "🚫 Block"}`);
|
|
150
|
+
|
|
151
|
+
return result;
|
|
152
|
+
}
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
## 🔧 Technical Details
|
|
156
|
+
|
|
157
|
+
### Architecture
|
|
158
|
+
|
|
159
|
+
- **TF-IDF Vectorization**: Converts text to numerical features
|
|
160
|
+
- **Logistic Regression**: ML model trained on prompt injection datasets
|
|
161
|
+
- **Local Processing**: No external API calls or data transmission
|
|
162
|
+
- **ES Module Support**: Modern JavaScript module system
|
|
163
|
+
|
|
164
|
+
### Performance
|
|
165
|
+
|
|
166
|
+
- **Latency**: < 10ms typical response time
|
|
167
|
+
- **Memory**: ~5MB model footprint
|
|
168
|
+
- **CPU**: Minimal overhead suitable for production
|
|
169
|
+
|
|
170
|
+
### Security Model
|
|
171
|
+
|
|
172
|
+
The guardrail uses a machine learning approach trained to detect:
|
|
173
|
+
|
|
174
|
+
- Jailbreak attempts
|
|
175
|
+
- System prompt leaks
|
|
176
|
+
- Role confusion attacks
|
|
177
|
+
- Instruction injection
|
|
178
|
+
- Context manipulation
|
|
179
|
+
|
|
180
|
+
## 🛟 Error Handling
|
|
181
|
+
|
|
182
|
+
```javascript
|
|
183
|
+
import { check } from "llm_guardrail";
|
|
184
|
+
|
|
185
|
+
async function safeCheck(prompt) {
|
|
186
|
+
try {
|
|
187
|
+
return await check(prompt);
|
|
188
|
+
} catch (error) {
|
|
189
|
+
console.error("Guardrail error:", error.message);
|
|
190
|
+
|
|
191
|
+
// Fail securely - when in doubt, block
|
|
192
|
+
return {
|
|
193
|
+
allowed: false,
|
|
194
|
+
error: error.message,
|
|
195
|
+
fallback: true,
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
## 🤝 Community & Support
|
|
202
|
+
|
|
203
|
+
- **Discord**: Join our community at [https://discord.gg/xV8e3TFrFU](https://discord.gg/xV8e3TFrFU)
|
|
204
|
+
- **GitHub Issues**: [Report bugs and request features](https://github.com/Frank2006x/llm_Guardrails/issues)
|
|
205
|
+
- **GitHub Repository**: [Source code and documentation](https://github.com/Frank2006x/llm_Guardrails)
|
|
206
|
+
|
|
207
|
+
## 📈 Roadmap
|
|
208
|
+
|
|
209
|
+
- [ ] Multi-language support
|
|
210
|
+
- [ ] Custom model training utilities
|
|
211
|
+
- [ ] Real-time model updates
|
|
212
|
+
- [ ] Performance analytics dashboard
|
|
213
|
+
- [ ] Integration examples for popular frameworks
|
|
214
|
+
|
|
215
|
+
## 📄 License
|
|
216
|
+
|
|
217
|
+
This project is licensed under the ISC License - see the package.json for details.
|
|
218
|
+
|
|
219
|
+
## 🙏 Contributing
|
|
220
|
+
|
|
221
|
+
We welcome contributions! Please feel free to submit pull requests, report bugs, or suggest features through our GitHub repository or Discord community.
|
|
222
|
+
|
|
223
|
+
---
|
|
224
|
+
|
|
225
|
+
**⚠️ Security Notice**: This guardrail provides an additional layer of security but should be part of a comprehensive security strategy. Always validate and sanitize inputs at multiple levels.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "llm_guardrail",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.1",
|
|
4
4
|
"description": "A lightweight, low-latency ML-powered guardrail to stop prompt injection attacks before they reach your LLM.",
|
|
5
5
|
"homepage": "https://github.com/Frank2006x/llm_Guardrails#readme",
|
|
6
6
|
"bugs": {
|