llm_guardrail 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +62 -42
- package/model/logistic_regression.js +42 -0
- package/model/model_data.json +60024 -0
- package/model/tfidf.js +88 -0
- package/package.json +10 -1
- package/model/README.md +0 -0
- package/model/injective_model.joblib +0 -0
- package/model/jailbreak_model.joblib +0 -0
- package/model/malicious_model.joblib +0 -0
- package/model/predict.py +0 -32
- package/model/prompt_injection_detector.joblib +0 -0
package/index.js
CHANGED
|
@@ -1,42 +1,62 @@
|
|
|
1
|
-
import
|
|
2
|
-
import path from
|
|
3
|
-
import { fileURLToPath } from
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
const
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import { fileURLToPath } from 'url';
|
|
4
|
+
import { TfidfVectorizer } from './model/tfidf.js';
|
|
5
|
+
import { LogisticRegression } from './model/logistic_regression.js';
|
|
6
|
+
|
|
7
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
8
|
+
|
|
9
|
+
// Load model once
|
|
10
|
+
let vectorizer = null;
|
|
11
|
+
let model = null;
|
|
12
|
+
|
|
13
|
+
function loadModel() {
|
|
14
|
+
if (model && vectorizer) return { model, vectorizer };
|
|
15
|
+
|
|
16
|
+
try {
|
|
17
|
+
const modelPath = path.join(__dirname, 'model', 'model_data.json');
|
|
18
|
+
const modelData = JSON.parse(fs.readFileSync(modelPath, 'utf8'));
|
|
19
|
+
|
|
20
|
+
vectorizer = new TfidfVectorizer(modelData);
|
|
21
|
+
model = new LogisticRegression(modelData);
|
|
22
|
+
|
|
23
|
+
return { model, vectorizer };
|
|
24
|
+
} catch (error) {
|
|
25
|
+
throw new Error(`Failed to load model: ${error.message}`);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function check(prompt) {
|
|
30
|
+
return new Promise((resolve, reject) => {
|
|
31
|
+
try {
|
|
32
|
+
if (typeof prompt !== "string") {
|
|
33
|
+
return reject(new Error("Prompt must be a string"));
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Load model if not already loaded
|
|
37
|
+
const { model, vectorizer } = loadModel();
|
|
38
|
+
|
|
39
|
+
// Transform text to TF-IDF features
|
|
40
|
+
const features = vectorizer.transform(prompt);
|
|
41
|
+
|
|
42
|
+
// Get prediction
|
|
43
|
+
const prediction = model.predict(features);
|
|
44
|
+
const { probabilities, positiveProb } = model.predictProba(features);
|
|
45
|
+
|
|
46
|
+
resolve({
|
|
47
|
+
allowed: prediction === 0, // 0 = safe, 1 = injection
|
|
48
|
+
injective: prediction,
|
|
49
|
+
prediction: prediction,
|
|
50
|
+
confidence: positiveProb,
|
|
51
|
+
probabilities: {
|
|
52
|
+
safe: probabilities[0],
|
|
53
|
+
injection: probabilities[1]
|
|
54
|
+
}
|
|
55
|
+
});
|
|
56
|
+
} catch (error) {
|
|
57
|
+
reject(error);
|
|
58
|
+
}
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export default { check };
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
export class LogisticRegression {
|
|
2
|
+
constructor(modelData) {
|
|
3
|
+
this.coefficients = modelData.coefficients[0]; // For binary classification
|
|
4
|
+
this.intercept = modelData.intercept[0];
|
|
5
|
+
this.classes = modelData.classes;
|
|
6
|
+
this.threshold = modelData.threshold || 0.5;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Sigmoid function
|
|
11
|
+
*/
|
|
12
|
+
sigmoid(z) {
|
|
13
|
+
return 1 / (1 + Math.exp(-z));
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Predict probability for a feature vector
|
|
18
|
+
*/
|
|
19
|
+
predictProba(features) {
|
|
20
|
+
// Calculate z = w·x + b
|
|
21
|
+
let z = this.intercept;
|
|
22
|
+
for (let i = 0; i < features.length; i++) {
|
|
23
|
+
z += this.coefficients[i] * features[i];
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// Apply sigmoid
|
|
27
|
+
const probPositive = this.sigmoid(z);
|
|
28
|
+
|
|
29
|
+
return {
|
|
30
|
+
probabilities: [1 - probPositive, probPositive],
|
|
31
|
+
positiveProb: probPositive
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Predict class (0 or 1)
|
|
37
|
+
*/
|
|
38
|
+
predict(features) {
|
|
39
|
+
const { positiveProb } = this.predictProba(features);
|
|
40
|
+
return positiveProb >= this.threshold ? 1 : 0;
|
|
41
|
+
}
|
|
42
|
+
}
|