llm_guardrail 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1 +1,225 @@
1
- # llm_Guardrails
1
+ # 🛡️ LLM Guardrails
2
+
3
+ A lightweight, low-latency ML-powered guardrail to stop prompt injection attacks before they reach your LLM. Protect your AI applications with minimal performance overhead.
4
+
5
+ [![npm version](https://badge.fury.io/js/llm_guardrail.svg)](https://www.npmjs.com/package/llm_guardrail)
6
+ [![License: ISC](https://img.shields.io/badge/License-ISC-blue.svg)](https://opensource.org/licenses/ISC)
7
+
8
+ ## 🚀 Features
9
+
10
+ - **🔒 Security First**: Detects and blocks prompt injection attacks using machine learning
11
+ - **⚡ Low Latency**: Optimized for production use with minimal performance impact
12
+ - **🎯 High Accuracy**: ML-powered detection with configurable confidence thresholds
13
+ - **📦 Lightweight**: No external API calls - everything runs locally
14
+ - **🔧 Easy Integration**: Simple API that works with any LLM framework
15
+ - **🎛️ Flexible**: Returns detailed prediction data for custom handling
16
+
17
+ ## 📥 Installation
18
+
19
+ ```bash
20
+ npm install llm_guardrail
21
+ ```
22
+
23
+ ## 🛠️ Quick Start
24
+
25
+ ### ES Modules
26
+
27
+ ```javascript
28
+ import { check } from "llm_guardrail";
29
+
30
+ // Check a prompt for injection attempts
31
+ const result = await check("Tell me about cats");
32
+
33
+ if (result.allowed) {
34
+ console.log("✅ Safe prompt - proceed to LLM");
35
+ // Send to your LLM
36
+ } else {
37
+ console.log("🚫 Potential injection detected!");
38
+ console.log(`Confidence: ${(result.confidence * 100).toFixed(2)}%`);
39
+ }
40
+ ```
41
+
42
+ ### CommonJS
43
+
44
+ ```javascript
45
+ const { check } = require("llm_guardrail");
46
+
47
+ async function validatePrompt(userInput) {
48
+ try {
49
+ const result = await check(userInput);
50
+ return result.allowed;
51
+ } catch (error) {
52
+ console.error("Guardrail check failed:", error);
53
+ return false; // Fail closed for security
54
+ }
55
+ }
56
+ ```
57
+
58
+ ## 📊 API Reference
59
+
60
+ ### `check(prompt)`
61
+
62
+ Analyzes a prompt for potential injection attacks.
63
+
64
+ **Parameters:**
65
+
66
+ - `prompt` (string): The user input to analyze
67
+
68
+ **Returns:** Promise resolving to an object with:
69
+
70
+ ```javascript
71
+ {
72
+ allowed: boolean, // true if safe, false if potential injection
73
+ injective: number, // 0 = safe, 1 = injection (same as prediction)
74
+ prediction: number, // 0 = safe, 1 = injection
75
+ confidence: number, // Confidence score for injection (0-1)
76
+ probabilities: {
77
+ safe: number, // Probability of being safe (0-1)
78
+ injection: number // Probability of being injection (0-1)
79
+ }
80
+ }
81
+ ```
82
+
83
+ ## 🎯 Usage Examples
84
+
85
+ ### Basic Integration
86
+
87
+ ```javascript
88
+ import { check } from "llm_guardrail";
89
+ import { openai } from "your-llm-client";
90
+
91
+ async function secureChat(userMessage) {
92
+ // Check for prompt injection
93
+ const guardResult = await check(userMessage);
94
+
95
+ if (!guardResult.allowed) {
96
+ return {
97
+ error: "Your message appears to contain potentially harmful content.",
98
+ confidence: guardResult.confidence,
99
+ };
100
+ }
101
+
102
+ // Safe to proceed
103
+ const response = await openai.chat.completions.create({
104
+ model: "gpt-4",
105
+ messages: [{ role: "user", content: userMessage }],
106
+ });
107
+
108
+ return response;
109
+ }
110
+ ```
111
+
112
+ ### Custom Confidence Threshold
113
+
114
+ ```javascript
115
+ import { check } from "llm_guardrail";
116
+
117
+ async function smartFilter(prompt, strictMode = false) {
118
+ const result = await check(prompt);
119
+
120
+ // Adjust threshold based on your risk tolerance
121
+ const threshold = strictMode ? 0.3 : 0.7;
122
+
123
+ if (result.confidence > threshold) {
124
+ console.log(
125
+ `🚨 High-confidence injection detected: ${(result.confidence * 100).toFixed(1)}%`,
126
+ );
127
+ return false;
128
+ }
129
+
130
+ return true;
131
+ }
132
+ ```
133
+
134
+ ### Detailed Response Handling
135
+
136
+ ```javascript
137
+ import { check } from "llm_guardrail";
138
+
139
+ async function analyzePrompt(userInput) {
140
+ const result = await check(userInput);
141
+
142
+ console.log("📋 Guardrail Analysis:");
143
+ console.log(
144
+ ` Safe Probability: ${(result.probabilities.safe * 100).toFixed(2)}%`,
145
+ );
146
+ console.log(
147
+ ` Injection Probability: ${(result.probabilities.injection * 100).toFixed(2)}%`,
148
+ );
149
+ console.log(` Recommendation: ${result.allowed ? "✅ Allow" : "🚫 Block"}`);
150
+
151
+ return result;
152
+ }
153
+ ```
154
+
155
+ ## 🔧 Technical Details
156
+
157
+ ### Architecture
158
+
159
+ - **TF-IDF Vectorization**: Converts text to numerical features
160
+ - **Logistic Regression**: ML model trained on prompt injection datasets
161
+ - **Local Processing**: No external API calls or data transmission
162
+ - **ES Module Support**: Modern JavaScript module system
163
+
164
+ ### Performance
165
+
166
+ - **Latency**: < 10ms typical response time
167
+ - **Memory**: ~5MB model footprint
168
+ - **CPU**: Minimal overhead suitable for production
169
+
170
+ ### Security Model
171
+
172
+ The guardrail uses a machine learning approach trained to detect:
173
+
174
+ - Jailbreak attempts
175
+ - System prompt leaks
176
+ - Role confusion attacks
177
+ - Instruction injection
178
+ - Context manipulation
179
+
180
+ ## 🛟 Error Handling
181
+
182
+ ```javascript
183
+ import { check } from "llm_guardrail";
184
+
185
+ async function safeCheck(prompt) {
186
+ try {
187
+ return await check(prompt);
188
+ } catch (error) {
189
+ console.error("Guardrail error:", error.message);
190
+
191
+ // Fail securely - when in doubt, block
192
+ return {
193
+ allowed: false,
194
+ error: error.message,
195
+ fallback: true,
196
+ };
197
+ }
198
+ }
199
+ ```
200
+
201
+ ## 🤝 Community & Support
202
+
203
+ - **Discord**: Join our community at [https://discord.gg/xV8e3TFrFU](https://discord.gg/xV8e3TFrFU)
204
+ - **GitHub Issues**: [Report bugs and request features](https://github.com/Frank2006x/llm_Guardrails/issues)
205
+ - **GitHub Repository**: [Source code and documentation](https://github.com/Frank2006x/llm_Guardrails)
206
+
207
+ ## 📈 Roadmap
208
+
209
+ - [ ] Multi-language support
210
+ - [ ] Custom model training utilities
211
+ - [ ] Real-time model updates
212
+ - [ ] Performance analytics dashboard
213
+ - [ ] Integration examples for popular frameworks
214
+
215
+ ## 📄 License
216
+
217
+ This project is licensed under the ISC License - see the package.json for details.
218
+
219
+ ## 🙏 Contributing
220
+
221
+ We welcome contributions! Please feel free to submit pull requests, report bugs, or suggest features through our GitHub repository or Discord community.
222
+
223
+ ---
224
+
225
+ **⚠️ Security Notice**: This guardrail provides an additional layer of security but should be part of a comprehensive security strategy. Always validate and sanitize inputs at multiple levels.
package/index.js CHANGED
@@ -14,7 +14,7 @@ function loadModel() {
14
14
  if (model && vectorizer) return { model, vectorizer };
15
15
 
16
16
  try {
17
- const modelPath = path.join(__dirname, 'model', 'model_data.json');
17
+ const modelPath = path.join(__dirname, 'model', 'prompt_injection_model.json');
18
18
  const modelData = JSON.parse(fs.readFileSync(modelPath, 'utf8'));
19
19
 
20
20
  vectorizer = new TfidfVectorizer(modelData);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llm_guardrail",
3
- "version": "2.0.0",
3
+ "version": "2.0.2",
4
4
  "description": "A lightweight, low-latency ML-powered guardrail to stop prompt injection attacks before they reach your LLM.",
5
5
  "homepage": "https://github.com/Frank2006x/llm_Guardrails#readme",
6
6
  "bugs": {
@@ -28,7 +28,7 @@
28
28
  "index.js",
29
29
  "model/tfidf.js",
30
30
  "model/logistic_regression.js",
31
- "model/model_data.json"
31
+ "model/prompt_injection_model.json"
32
32
  ],
33
33
  "license": "ISC",
34
34
  "author": "Frank2006x",