llm_guardrail 2.0.2 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.js +115 -21
  2. package/package.json +1 -1
package/index.js CHANGED
@@ -6,51 +6,61 @@ import { LogisticRegression } from './model/logistic_regression.js';
6
6
 
7
7
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
8
8
 
9
- // Load model once
10
- let vectorizer = null;
11
- let model = null;
9
+ // Cache for loaded models
10
+ const models = {
11
+ prompt_injection: null,
12
+ jailbreak: null,
13
+ malicious: null
14
+ };
12
15
 
13
- function loadModel() {
14
- if (model && vectorizer) return { model, vectorizer };
16
+ /**
17
+ * Load a specific model by name
18
+ * @param {string} modelName - 'prompt_injection', 'jailbreak', or 'malicious'
19
+ */
20
+ function loadModel(modelName) {
21
+ if (models[modelName]) {
22
+ return models[modelName];
23
+ }
15
24
 
16
25
  try {
17
- const modelPath = path.join(__dirname, 'model', 'prompt_injection_model.json');
26
+ const modelPath = path.join(__dirname, 'model', `${modelName}_model.json`);
18
27
  const modelData = JSON.parse(fs.readFileSync(modelPath, 'utf8'));
19
28
 
20
- vectorizer = new TfidfVectorizer(modelData);
21
- model = new LogisticRegression(modelData);
29
+ const vectorizer = new TfidfVectorizer(modelData);
30
+ const model = new LogisticRegression(modelData);
22
31
 
23
- return { model, vectorizer };
32
+ models[modelName] = { vectorizer, model };
33
+ return models[modelName];
24
34
  } catch (error) {
25
- throw new Error(`Failed to load model: ${error.message}`);
35
+ throw new Error(`Failed to load ${modelName} model: ${error.message}`);
26
36
  }
27
37
  }
28
38
 
29
- export function check(prompt) {
39
+ /**
40
+ * Generic check function for any model
41
+ * @param {string} prompt - The text to check
42
+ * @param {string} modelName - 'prompt_injection', 'jailbreak', or 'malicious'
43
+ */
44
+ async function checkWithModel(prompt, modelName) {
30
45
  return new Promise((resolve, reject) => {
31
46
  try {
32
47
  if (typeof prompt !== "string") {
33
48
  return reject(new Error("Prompt must be a string"));
34
49
  }
35
50
 
36
- // Load model if not already loaded
37
- const { model, vectorizer } = loadModel();
38
-
39
- // Transform text to TF-IDF features
51
+ const { vectorizer, model } = loadModel(modelName);
40
52
  const features = vectorizer.transform(prompt);
41
-
42
- // Get prediction
43
53
  const prediction = model.predict(features);
44
54
  const { probabilities, positiveProb } = model.predictProba(features);
45
55
 
46
56
  resolve({
47
- allowed: prediction === 0, // 0 = safe, 1 = injection
48
- injective: prediction,
57
+ allowed: prediction === 0,
58
+ detected: prediction === 1,
49
59
  prediction: prediction,
50
60
  confidence: positiveProb,
51
61
  probabilities: {
52
62
  safe: probabilities[0],
53
- injection: probabilities[1]
63
+ threat: probabilities[1]
54
64
  }
55
65
  });
56
66
  } catch (error) {
@@ -59,4 +69,88 @@ export function check(prompt) {
59
69
  });
60
70
  }
61
71
 
62
- export default { check };
72
+ /**
73
+ * Check for prompt injection attacks
74
+ * @param {string} prompt - The text to check
75
+ */
76
+ export function checkInjection(prompt) {
77
+ return checkWithModel(prompt, 'prompt_injection');
78
+ }
79
+
80
+ /**
81
+ * Check for jailbreak attempts
82
+ * @param {string} prompt - The text to check
83
+ */
84
+ export function checkJailbreak(prompt) {
85
+ return checkWithModel(prompt, 'jailbreak');
86
+ }
87
+
88
+ /**
89
+ * Check for malicious content
90
+ * @param {string} prompt - The text to check
91
+ */
92
+ export function checkMalicious(prompt) {
93
+ return checkWithModel(prompt, 'malicious');
94
+ }
95
+
96
+ /**
97
+ * Run all three checks in parallel
98
+ * @param {string} prompt - The text to check
99
+ */
100
+ export async function checkAll(prompt) {
101
+ try {
102
+ const [injection, jailbreak, malicious] = await Promise.all([
103
+ checkInjection(prompt),
104
+ checkJailbreak(prompt),
105
+ checkMalicious(prompt)
106
+ ]);
107
+
108
+ // Calculate overall risk level
109
+ const threats = [
110
+ injection.detected ? injection.confidence : 0,
111
+ jailbreak.detected ? jailbreak.confidence : 0,
112
+ malicious.detected ? malicious.confidence : 0
113
+ ];
114
+
115
+ const maxThreat = Math.max(...threats);
116
+ let overallRisk = 'safe';
117
+ if (maxThreat > 0.7) overallRisk = 'high';
118
+ else if (maxThreat > 0.4) overallRisk = 'medium';
119
+ else if (maxThreat > 0) overallRisk = 'low';
120
+
121
+ // Determine which threats were detected
122
+ const threatsDetected = [];
123
+ if (injection.detected) threatsDetected.push('injection');
124
+ if (jailbreak.detected) threatsDetected.push('jailbreak');
125
+ if (malicious.detected) threatsDetected.push('malicious');
126
+
127
+ return {
128
+ injection,
129
+ jailbreak,
130
+ malicious,
131
+ allowed: injection.allowed && jailbreak.allowed && malicious.allowed,
132
+ overallRisk,
133
+ maxThreatConfidence: maxThreat,
134
+ threatsDetected
135
+ };
136
+ } catch (error) {
137
+ throw error;
138
+ }
139
+ }
140
+
141
+ /**
142
+ * Backward compatibility - defaults to injection check
143
+ * @param {string} prompt - The text to check
144
+ * @deprecated Use checkInjection() instead for clarity
145
+ */
146
+ export function check(prompt) {
147
+ return checkInjection(prompt);
148
+ }
149
+
150
+ export default {
151
+ check,
152
+ checkInjection,
153
+ checkJailbreak,
154
+ checkMalicious,
155
+ checkAll
156
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "llm_guardrail",
3
- "version": "2.0.2",
3
+ "version": "2.1.0",
4
4
  "description": "A lightweight, low-latency ML-powered guardrail to stop prompt injection attacks before they reach your LLM.",
5
5
  "homepage": "https://github.com/Frank2006x/llm_Guardrails#readme",
6
6
  "bugs": {