llm_guardrail 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +62 -42
- package/model/logistic_regression.js +42 -0
- package/model/model_data.json +60024 -0
- package/model/tfidf.js +88 -0
- package/package.json +10 -1
- package/model/README.md +0 -0
- package/model/injective_model.joblib +0 -0
- package/model/jailbreak_model.joblib +0 -0
- package/model/malicious_model.joblib +0 -0
- package/model/predict.py +0 -32
- package/model/prompt_injection_detector.joblib +0 -0
package/model/tfidf.js
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
export class TfidfVectorizer {
|
|
2
|
+
constructor(modelData) {
|
|
3
|
+
this.vocabulary = modelData.vocabulary;
|
|
4
|
+
this.idf = modelData.idf;
|
|
5
|
+
this.ngramRange = modelData.ngram_range;
|
|
6
|
+
this.lowercase = modelData.lowercase;
|
|
7
|
+
|
|
8
|
+
// Create reverse vocab mapping (index -> word)
|
|
9
|
+
this.indexToWord = {};
|
|
10
|
+
for (const [word, index] of Object.entries(this.vocabulary)) {
|
|
11
|
+
this.indexToWord[index] = word;
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Tokenize text into words
|
|
17
|
+
*/
|
|
18
|
+
tokenize(text) {
|
|
19
|
+
if (this.lowercase) {
|
|
20
|
+
text = text.toLowerCase();
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// Simple tokenization (matching sklearn's default)
|
|
24
|
+
return text.match(/\b\w\w+\b/g) || [];
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Generate n-grams from tokens
|
|
29
|
+
*/
|
|
30
|
+
generateNgrams(tokens, n) {
|
|
31
|
+
const ngrams = [];
|
|
32
|
+
for (let i = 0; i <= tokens.length - n; i++) {
|
|
33
|
+
ngrams.push(tokens.slice(i, i + n).join(' '));
|
|
34
|
+
}
|
|
35
|
+
return ngrams;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Get all n-grams for given range
|
|
40
|
+
*/
|
|
41
|
+
getAllNgrams(tokens) {
|
|
42
|
+
const allNgrams = [];
|
|
43
|
+
const [minN, maxN] = this.ngramRange;
|
|
44
|
+
|
|
45
|
+
for (let n = minN; n <= maxN; n++) {
|
|
46
|
+
allNgrams.push(...this.generateNgrams(tokens, n));
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
return allNgrams;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Transform text into TF-IDF vector
|
|
54
|
+
*/
|
|
55
|
+
transform(text) {
|
|
56
|
+
const tokens = this.tokenize(text);
|
|
57
|
+
const ngrams = this.getAllNgrams(tokens);
|
|
58
|
+
|
|
59
|
+
// Count term frequencies
|
|
60
|
+
const termFreq = {};
|
|
61
|
+
ngrams.forEach(ngram => {
|
|
62
|
+
termFreq[ngram] = (termFreq[ngram] || 0) + 1;
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
// Create sparse vector
|
|
66
|
+
const vector = new Array(Object.keys(this.vocabulary).length).fill(0);
|
|
67
|
+
|
|
68
|
+
// Calculate TF-IDF
|
|
69
|
+
for (const [term, freq] of Object.entries(termFreq)) {
|
|
70
|
+
if (term in this.vocabulary) {
|
|
71
|
+
const index = this.vocabulary[term];
|
|
72
|
+
const tf = freq; // raw count
|
|
73
|
+
const idf = this.idf[index];
|
|
74
|
+
vector[index] = tf * idf;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// L2 normalization
|
|
79
|
+
const norm = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0));
|
|
80
|
+
if (norm > 0) {
|
|
81
|
+
for (let i = 0; i < vector.length; i++) {
|
|
82
|
+
vector[i] /= norm;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return vector;
|
|
87
|
+
}
|
|
88
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "llm_guardrail",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "2.0.0",
|
|
4
4
|
"description": "A lightweight, low-latency ML-powered guardrail to stop prompt injection attacks before they reach your LLM.",
|
|
5
5
|
"homepage": "https://github.com/Frank2006x/llm_Guardrails#readme",
|
|
6
6
|
"bugs": {
|
|
@@ -24,11 +24,20 @@
|
|
|
24
24
|
"type": "git",
|
|
25
25
|
"url": "git+https://github.com/Frank2006x/llm_Guardrails.git"
|
|
26
26
|
},
|
|
27
|
+
"files": [
|
|
28
|
+
"index.js",
|
|
29
|
+
"model/tfidf.js",
|
|
30
|
+
"model/logistic_regression.js",
|
|
31
|
+
"model/model_data.json"
|
|
32
|
+
],
|
|
27
33
|
"license": "ISC",
|
|
28
34
|
"author": "Frank2006x",
|
|
29
35
|
"type": "module",
|
|
30
36
|
"main": "index.js",
|
|
31
37
|
"scripts": {
|
|
32
38
|
"test": "echo \"Error: no test specified\" && exit 1"
|
|
39
|
+
},
|
|
40
|
+
"dependencies": {
|
|
41
|
+
"ml-logistic-regression": "^2.0.0"
|
|
33
42
|
}
|
|
34
43
|
}
|
package/model/README.md
DELETED
|
File without changes
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/model/predict.py
DELETED
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
import sys
|
|
2
|
-
import json
|
|
3
|
-
import joblib
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
|
|
6
|
-
# ---- safety check ----
|
|
7
|
-
if len(sys.argv) < 2:
|
|
8
|
-
print(json.dumps({"error": "No input text provided"}))
|
|
9
|
-
sys.exit(1)
|
|
10
|
-
|
|
11
|
-
BASE_DIR = Path(__file__).resolve().parent
|
|
12
|
-
text = sys.argv[1]
|
|
13
|
-
|
|
14
|
-
# ---- load combined artifact ----
|
|
15
|
-
artifact = joblib.load(BASE_DIR / "prompt_injection_detector.joblib")
|
|
16
|
-
|
|
17
|
-
model = artifact["model"]
|
|
18
|
-
vectorizer = artifact["vectorizer"]
|
|
19
|
-
threshold = artifact.get("threshold", 0.5)
|
|
20
|
-
|
|
21
|
-
# ---- predict ----
|
|
22
|
-
X = vectorizer.transform([text])
|
|
23
|
-
prob = model.predict_proba(X)[0][1]
|
|
24
|
-
flagged = prob >= threshold
|
|
25
|
-
|
|
26
|
-
result = {
|
|
27
|
-
"injective": int(flagged),
|
|
28
|
-
"probability": round(float(prob), 4),
|
|
29
|
-
"threshold": threshold
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
print(json.dumps(result))
|
|
Binary file
|