llm-scanner 0.1.17 → 0.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/reporter.js +105 -72
- package/package.json +1 -1
package/dist/reporter.js
CHANGED
|
@@ -9,11 +9,6 @@ exports.scoreLabel = scoreLabel;
|
|
|
9
9
|
const chalk_1 = __importDefault(require("chalk"));
|
|
10
10
|
const ora_1 = __importDefault(require("ora"));
|
|
11
11
|
const BAR = "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━";
|
|
12
|
-
function severityIcon(sev) {
|
|
13
|
-
if (sev === "CRITICAL")
|
|
14
|
-
return chalk_1.default.red("❌");
|
|
15
|
-
return chalk_1.default.yellow("⚠️ ");
|
|
16
|
-
}
|
|
17
12
|
function verdictEmoji(v) {
|
|
18
13
|
if (v === "PASS")
|
|
19
14
|
return chalk_1.default.green("✅ PASS");
|
|
@@ -83,66 +78,113 @@ function confidenceForFail(reason, rawResponse) {
|
|
|
83
78
|
];
|
|
84
79
|
return highSignals.some((s) => signal.includes(s)) ? "HIGH" : "MEDIUM";
|
|
85
80
|
}
|
|
81
|
+
function shortSnippet(text, maxLen = 120) {
|
|
82
|
+
const t = text.replace(/\s+/g, " ").trim();
|
|
83
|
+
if (!t)
|
|
84
|
+
return "(no text)";
|
|
85
|
+
if (t.length <= maxLen)
|
|
86
|
+
return t;
|
|
87
|
+
return `${t.slice(0, maxLen - 3)}...`;
|
|
88
|
+
}
|
|
89
|
+
function plainEnglishLine(reason) {
|
|
90
|
+
const r = reason.toLowerCase();
|
|
91
|
+
if (r.includes("partially followed") || r.includes("malicious instruction")) {
|
|
92
|
+
return "The model followed a malicious instruction instead of refusing.";
|
|
93
|
+
}
|
|
94
|
+
if (r.includes("structured output") || r.includes("formatting")) {
|
|
95
|
+
return "The model ignored its safety rules.";
|
|
96
|
+
}
|
|
97
|
+
if (r.includes("reveal") || r.includes("internal") || r.includes("system prompt")) {
|
|
98
|
+
return "The model exposed internal behavior.";
|
|
99
|
+
}
|
|
100
|
+
return "The model did something unsafe with the request.";
|
|
101
|
+
}
|
|
102
|
+
function groupFails(results) {
|
|
103
|
+
const grouped = new Map();
|
|
104
|
+
for (const r of results) {
|
|
105
|
+
if (r.verdict !== "FAIL")
|
|
106
|
+
continue;
|
|
107
|
+
const key = r.reason || "unsafe behavior";
|
|
108
|
+
if (!grouped.has(key))
|
|
109
|
+
grouped.set(key, []);
|
|
110
|
+
grouped.get(key).push(r);
|
|
111
|
+
}
|
|
112
|
+
return grouped;
|
|
113
|
+
}
|
|
114
|
+
function printDetailedFailGroups(grouped) {
|
|
115
|
+
for (const [reason, group] of grouped.entries()) {
|
|
116
|
+
const sample = group[0];
|
|
117
|
+
const confidence = confidenceForFail(reason, sample.rawResponse);
|
|
118
|
+
const reproBody = JSON.stringify({ message: sample.attack.prompt });
|
|
119
|
+
const issue = reason.toLowerCase().includes("partially followed attacker instruction")
|
|
120
|
+
? "Model executed attacker instruction"
|
|
121
|
+
: reason.toLowerCase().includes("structured output")
|
|
122
|
+
? "Model followed malicious formatting"
|
|
123
|
+
: "Model revealed internal behavior";
|
|
124
|
+
const whyItMatters = issue === "Model executed attacker instruction"
|
|
125
|
+
? "May allow chaining attacks"
|
|
126
|
+
: issue === "Model followed malicious formatting"
|
|
127
|
+
? "Weak instruction boundary"
|
|
128
|
+
: "Potential prompt injection risk";
|
|
129
|
+
console.log(` [FAIL] ${sample.attack.category}`);
|
|
130
|
+
console.log();
|
|
131
|
+
console.log(" --- ATTACK ---");
|
|
132
|
+
console.log(` ${sample.attack.prompt}`);
|
|
133
|
+
console.log();
|
|
134
|
+
console.log(" --- RESPONSE ---");
|
|
135
|
+
console.log(` ${sample.rawResponse || "(empty)"}`);
|
|
136
|
+
console.log();
|
|
137
|
+
console.log(" --- ISSUE ---");
|
|
138
|
+
console.log(` ${issue}`);
|
|
139
|
+
console.log();
|
|
140
|
+
console.log(" --- WHY IT MATTERS ---");
|
|
141
|
+
console.log(` ${whyItMatters}`);
|
|
142
|
+
console.log();
|
|
143
|
+
console.log(" --- REPRODUCE ---");
|
|
144
|
+
console.log(" curl -X POST <endpoint> \\");
|
|
145
|
+
console.log(' -H "Content-Type: application/json" \\');
|
|
146
|
+
console.log(` -d '${shellSingleQuote(reproBody)}'`);
|
|
147
|
+
console.log();
|
|
148
|
+
console.log(" --- CONFIDENCE ---");
|
|
149
|
+
console.log(` ${confidence}`);
|
|
150
|
+
console.log();
|
|
151
|
+
}
|
|
152
|
+
}
|
|
86
153
|
function printFinalReport(results, verbose, debug = false) {
|
|
87
154
|
const fails = results.filter((r) => r.verdict === "FAIL");
|
|
88
155
|
const passes = results.filter((r) => r.verdict === "PASS").length;
|
|
89
156
|
const skipped = results.filter((r) => r.verdict === "SKIP").length;
|
|
90
157
|
const judged = results.filter((r) => r.verdict === "PASS" || r.verdict === "FAIL").length;
|
|
91
158
|
const score = judged > 0 ? Math.round((100 * passes) / judged) : null;
|
|
159
|
+
const simpleMode = !debug && !verbose;
|
|
160
|
+
const grouped = groupFails(results);
|
|
92
161
|
console.log();
|
|
93
|
-
|
|
94
|
-
console.log(chalk_1.default.bold(" AI Security Report"));
|
|
95
|
-
console.log(chalk_1.default.bold(BAR));
|
|
96
|
-
console.log();
|
|
97
|
-
if (!debug) {
|
|
98
|
-
const grouped = new Map();
|
|
99
|
-
for (const r of results) {
|
|
100
|
-
if (r.verdict !== "FAIL")
|
|
101
|
-
continue;
|
|
102
|
-
const key = r.reason || "Model behavior indicates a potential policy bypass.";
|
|
103
|
-
if (!grouped.has(key))
|
|
104
|
-
grouped.set(key, []);
|
|
105
|
-
grouped.get(key).push(r);
|
|
106
|
-
}
|
|
162
|
+
if (simpleMode) {
|
|
107
163
|
for (const [reason, group] of grouped.entries()) {
|
|
108
164
|
const sample = group[0];
|
|
109
|
-
const
|
|
110
|
-
const
|
|
111
|
-
|
|
112
|
-
? "Model executed attacker instruction"
|
|
113
|
-
: reason.toLowerCase().includes("structured output")
|
|
114
|
-
? "Model followed malicious formatting"
|
|
115
|
-
: "Model revealed internal behavior";
|
|
116
|
-
const whyItMatters = issue === "Model executed attacker instruction"
|
|
117
|
-
? "May allow chaining attacks"
|
|
118
|
-
: issue === "Model followed malicious formatting"
|
|
119
|
-
? "Weak instruction boundary"
|
|
120
|
-
: "Potential prompt injection risk";
|
|
121
|
-
console.log(` [FAIL] ${sample.attack.category}`);
|
|
122
|
-
console.log();
|
|
123
|
-
console.log(" --- ATTACK ---");
|
|
124
|
-
console.log(` ${sample.attack.prompt}`);
|
|
125
|
-
console.log();
|
|
126
|
-
console.log(" --- RESPONSE ---");
|
|
127
|
-
console.log(` ${sample.rawResponse || "(empty)"}`);
|
|
128
|
-
console.log();
|
|
129
|
-
console.log(" --- ISSUE ---");
|
|
130
|
-
console.log(` ${issue}`);
|
|
165
|
+
const snippet = shortSnippet(sample.rawResponse || sample.attack.prompt);
|
|
166
|
+
const explain = plainEnglishLine(reason);
|
|
167
|
+
console.log(chalk_1.default.red(" ❌ Found an issue"));
|
|
131
168
|
console.log();
|
|
132
|
-
console.log(
|
|
133
|
-
console.log(` ${
|
|
169
|
+
console.log(` The AI followed a malicious instruction:`);
|
|
170
|
+
console.log(` "${snippet}"`);
|
|
134
171
|
console.log();
|
|
135
|
-
console.log(
|
|
136
|
-
console.log(" curl -X POST <endpoint> \\");
|
|
137
|
-
console.log(' -H "Content-Type: application/json" \\');
|
|
138
|
-
console.log(` -d '${shellSingleQuote(reproBody)}'`);
|
|
139
|
-
console.log();
|
|
140
|
-
console.log(" --- CONFIDENCE ---");
|
|
141
|
-
console.log(` ${confidence}`);
|
|
172
|
+
console.log(` ${explain}`);
|
|
142
173
|
console.log();
|
|
143
174
|
}
|
|
144
175
|
}
|
|
176
|
+
if (verbose && !debug && grouped.size > 0) {
|
|
177
|
+
console.log(chalk_1.default.bold(BAR));
|
|
178
|
+
console.log(chalk_1.default.bold(" Details (verbose)"));
|
|
179
|
+
console.log(chalk_1.default.bold(BAR));
|
|
180
|
+
console.log();
|
|
181
|
+
printDetailedFailGroups(grouped);
|
|
182
|
+
}
|
|
145
183
|
if (debug) {
|
|
184
|
+
console.log(chalk_1.default.bold(BAR));
|
|
185
|
+
console.log(chalk_1.default.bold(" Debug"));
|
|
186
|
+
console.log(chalk_1.default.bold(BAR));
|
|
187
|
+
console.log();
|
|
146
188
|
for (const r of results) {
|
|
147
189
|
const verdictTag = `[${r.verdict}] ${r.attack.category}`;
|
|
148
190
|
console.log(` ${verdictTag}`);
|
|
@@ -176,27 +218,18 @@ function printFinalReport(results, verbose, debug = false) {
|
|
|
176
218
|
}
|
|
177
219
|
console.log();
|
|
178
220
|
}
|
|
179
|
-
const
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
221
|
+
const issueCount = fails.length;
|
|
222
|
+
console.log(` ${passes} tests passed`);
|
|
223
|
+
console.log(issueCount > 0 ? chalk_1.default.red(` ${issueCount} issues found`) : chalk_1.default.green(` ${issueCount} issues found`));
|
|
224
|
+
if (score === null) {
|
|
225
|
+
console.log(chalk_1.default.yellow(` Score: —`));
|
|
226
|
+
}
|
|
227
|
+
else {
|
|
228
|
+
const scoreColor = fails.length === 0 ? chalk_1.default.green : chalk_1.default.yellow;
|
|
229
|
+
console.log(scoreColor(` Score: ${score}/100`));
|
|
230
|
+
}
|
|
231
|
+
if (skipped > 0) {
|
|
232
|
+
console.log(chalk_1.default.gray(` (${skipped} skipped)`));
|
|
233
|
+
}
|
|
183
234
|
console.log();
|
|
184
|
-
console.log(chalk_1.default.bold(BAR));
|
|
185
|
-
const label = score === null ? "NO JUDGED TESTS" : scoreLabel(score);
|
|
186
|
-
const vulnLine = score === null
|
|
187
|
-
? chalk_1.default.yellow(` Score: — · ${label}`)
|
|
188
|
-
: fails.length === 0
|
|
189
|
-
? chalk_1.default.green(` Score: ${score}/100 · ${label}`)
|
|
190
|
-
: chalk_1.default.yellow(` Score: ${score}/100 · ${label}`);
|
|
191
|
-
console.log(vulnLine);
|
|
192
|
-
console.log(fails.length > 0
|
|
193
|
-
? (() => {
|
|
194
|
-
const uniqueIssues = new Set(fails.map((r) => r.reason || "Model behavior indicates a potential policy bypass.")).size;
|
|
195
|
-
const severityLabel = uniqueIssues === 1 ? "critical vulnerability" : "critical vulnerabilities";
|
|
196
|
-
return chalk_1.default.red(` ${uniqueIssues} ${severityLabel} found`);
|
|
197
|
-
})()
|
|
198
|
-
: judged === 0
|
|
199
|
-
? chalk_1.default.yellow(` All ${results.length} tests were skipped`)
|
|
200
|
-
: chalk_1.default.green(" No vulnerabilities found"));
|
|
201
|
-
console.log(chalk_1.default.bold(BAR));
|
|
202
235
|
}
|