prompt-injection-bench 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +174 -0
- package/dist/cli.cjs +799 -0
- package/dist/cli.d.cts +9 -0
- package/dist/cli.d.ts +9 -0
- package/dist/cli.js +779 -0
- package/dist/index.cjs +55 -0
- package/dist/index.d.cts +8 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.js +24 -0
- package/package.json +63 -0
package/dist/cli.cjs
ADDED
|
@@ -0,0 +1,799 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
"use strict";
|
|
3
|
+
var __create = Object.create;
|
|
4
|
+
var __defProp = Object.defineProperty;
|
|
5
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
6
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
7
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
8
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
9
|
+
var __copyProps = (to, from, except, desc) => {
|
|
10
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
11
|
+
for (let key of __getOwnPropNames(from))
|
|
12
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
13
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
14
|
+
}
|
|
15
|
+
return to;
|
|
16
|
+
};
|
|
17
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
18
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
19
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
20
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
21
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
22
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
23
|
+
mod
|
|
24
|
+
));
|
|
25
|
+
|
|
26
|
+
// src/cli.ts
|
|
27
|
+
var import_node_module = require("module");
|
|
28
|
+
var import_commander7 = require("commander");
|
|
29
|
+
|
|
30
|
+
// src/cli/commands/attack.command.ts
|
|
31
|
+
var import_pi_bench_adapters = require("@reaatech/pi-bench-adapters");
|
|
32
|
+
var import_pi_bench_core = require("@reaatech/pi-bench-core");
|
|
33
|
+
var import_pi_bench_corpus = require("@reaatech/pi-bench-corpus");
|
|
34
|
+
var import_pi_bench_observability = require("@reaatech/pi-bench-observability");
|
|
35
|
+
var import_pi_bench_runner = require("@reaatech/pi-bench-runner");
|
|
36
|
+
var import_pi_bench_runner2 = require("@reaatech/pi-bench-runner");
|
|
37
|
+
var import_commander = require("commander");
|
|
38
|
+
var logger = (0, import_pi_bench_observability.createLogger)();
|
|
39
|
+
function createAttackCommand() {
|
|
40
|
+
const command = new import_commander.Command("attack");
|
|
41
|
+
command.description("Run single attack category against a defense").requiredOption("--category <category>", "Attack category to run").requiredOption("-d, --defense <name>", "Defense adapter name").option("-n, --count <number>", "Number of attacks to run", "50").option("-o, --output <file>", "Output file for results").option("-t, --timeout <ms>", "Timeout per attack in ms", "30000").action(async (options) => {
|
|
42
|
+
const category = options.category;
|
|
43
|
+
const defenseName = options.defense;
|
|
44
|
+
const count = Number.parseInt(options.count, 10);
|
|
45
|
+
const timeoutMs = Number.parseInt(options.timeout, 10);
|
|
46
|
+
logger.info("Running attack", { category, defense: defenseName, count });
|
|
47
|
+
const validCategories = (0, import_pi_bench_core.getCategoryIds)();
|
|
48
|
+
if (!validCategories.includes(category)) {
|
|
49
|
+
console.error(`Invalid category: ${category}`);
|
|
50
|
+
console.error(`Valid categories: ${validCategories.join(", ")}`);
|
|
51
|
+
process.exit(1);
|
|
52
|
+
}
|
|
53
|
+
try {
|
|
54
|
+
let adapter;
|
|
55
|
+
if (defenseName.toLowerCase() === "mock") {
|
|
56
|
+
adapter = new import_pi_bench_adapters.MockAdapter(0.85, 0.05);
|
|
57
|
+
} else {
|
|
58
|
+
adapter = await loadAdapter(defenseName);
|
|
59
|
+
}
|
|
60
|
+
await adapter.initialize?.();
|
|
61
|
+
const executor = (0, import_pi_bench_runner.createAttackExecutor)({ timeoutMs });
|
|
62
|
+
const sandbox = (0, import_pi_bench_runner2.createSafeExecution)({ maxExecutionMs: timeoutMs });
|
|
63
|
+
const corpus = (0, import_pi_bench_corpus.generateDefaultCorpus)();
|
|
64
|
+
const categorySamples = corpus.filter((s) => s.category === category).slice(0, count);
|
|
65
|
+
if (categorySamples.length === 0) {
|
|
66
|
+
console.error(`No samples found for category: ${category}`);
|
|
67
|
+
process.exit(1);
|
|
68
|
+
}
|
|
69
|
+
console.log(`
|
|
70
|
+
=== Attack Results: ${category} ===`);
|
|
71
|
+
console.log(`Defense: ${defenseName}`);
|
|
72
|
+
console.log(`Samples: ${categorySamples.length}`);
|
|
73
|
+
console.log("");
|
|
74
|
+
let detected = 0;
|
|
75
|
+
let bypassed = 0;
|
|
76
|
+
let errors = 0;
|
|
77
|
+
const results = [];
|
|
78
|
+
for (const sample of categorySamples) {
|
|
79
|
+
const context = sandbox.createContext(`run-${Date.now().toString(36)}`, sample.id);
|
|
80
|
+
const result = await sandbox.wrapAsync(async () => {
|
|
81
|
+
return executor.executeAttack(sample, adapter, defenseName);
|
|
82
|
+
}, context);
|
|
83
|
+
if (result.error) {
|
|
84
|
+
errors++;
|
|
85
|
+
console.log(`[ERROR] ${sample.id}: ${result.error}`);
|
|
86
|
+
} else if (result.success && result.data) {
|
|
87
|
+
const r = result.data;
|
|
88
|
+
results.push(r);
|
|
89
|
+
if (r.detected) {
|
|
90
|
+
detected++;
|
|
91
|
+
console.log(
|
|
92
|
+
`[DETECTED] ${sample.id} (confidence: ${(r.confidence * 100).toFixed(0)}%)`
|
|
93
|
+
);
|
|
94
|
+
} else {
|
|
95
|
+
bypassed++;
|
|
96
|
+
console.log(`[BYPASSED] ${sample.id}`);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
console.log("\n=== Summary ===");
|
|
101
|
+
console.log(
|
|
102
|
+
`Detected: ${detected} (${(detected / categorySamples.length * 100).toFixed(1)}%)`
|
|
103
|
+
);
|
|
104
|
+
console.log(
|
|
105
|
+
`Bypassed: ${bypassed} (${(bypassed / categorySamples.length * 100).toFixed(1)}%)`
|
|
106
|
+
);
|
|
107
|
+
console.log(`Errors: ${errors}`);
|
|
108
|
+
if (options.output) {
|
|
109
|
+
const fs = await import("fs");
|
|
110
|
+
fs.writeFileSync(
|
|
111
|
+
options.output,
|
|
112
|
+
JSON.stringify(
|
|
113
|
+
{
|
|
114
|
+
category,
|
|
115
|
+
defense: defenseName,
|
|
116
|
+
totalSamples: categorySamples.length,
|
|
117
|
+
detected,
|
|
118
|
+
bypassed,
|
|
119
|
+
errors,
|
|
120
|
+
results,
|
|
121
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
122
|
+
},
|
|
123
|
+
null,
|
|
124
|
+
2
|
|
125
|
+
)
|
|
126
|
+
);
|
|
127
|
+
logger.info(`Results written to ${options.output}`);
|
|
128
|
+
}
|
|
129
|
+
await adapter.cleanup?.();
|
|
130
|
+
} catch (error) {
|
|
131
|
+
logger.error("Attack failed", {
|
|
132
|
+
error: error instanceof Error ? error.message : String(error)
|
|
133
|
+
});
|
|
134
|
+
console.error(`Error: ${error instanceof Error ? error.message : error}`);
|
|
135
|
+
process.exit(1);
|
|
136
|
+
}
|
|
137
|
+
});
|
|
138
|
+
return command;
|
|
139
|
+
}
|
|
140
|
+
async function loadAdapter(name) {
|
|
141
|
+
switch (name.toLowerCase()) {
|
|
142
|
+
case "rebuff":
|
|
143
|
+
return (await import("@reaatech/pi-bench-adapters")).createRebuffAdapter();
|
|
144
|
+
case "lakera":
|
|
145
|
+
return (await import("@reaatech/pi-bench-adapters")).createLakeraAdapter();
|
|
146
|
+
case "llm-guard":
|
|
147
|
+
return (await import("@reaatech/pi-bench-adapters")).createLLMGuardAdapter();
|
|
148
|
+
case "garak":
|
|
149
|
+
return (await import("@reaatech/pi-bench-adapters")).createGarakAdapter();
|
|
150
|
+
default:
|
|
151
|
+
throw new Error(`Unknown defense adapter: ${name}`);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// src/cli/commands/benchmark.command.ts
|
|
156
|
+
var import_pi_bench_adapters2 = require("@reaatech/pi-bench-adapters");
|
|
157
|
+
var import_pi_bench_adapters3 = require("@reaatech/pi-bench-adapters");
|
|
158
|
+
var import_pi_bench_core2 = require("@reaatech/pi-bench-core");
|
|
159
|
+
var import_pi_bench_corpus2 = require("@reaatech/pi-bench-corpus");
|
|
160
|
+
var import_pi_bench_observability2 = require("@reaatech/pi-bench-observability");
|
|
161
|
+
var import_pi_bench_runner3 = require("@reaatech/pi-bench-runner");
|
|
162
|
+
var import_pi_bench_runner4 = require("@reaatech/pi-bench-runner");
|
|
163
|
+
var import_pi_bench_runner5 = require("@reaatech/pi-bench-runner");
|
|
164
|
+
var import_commander2 = require("commander");
|
|
165
|
+
var logger2 = (0, import_pi_bench_observability2.createLogger)();
|
|
166
|
+
function loadAdapter2(name) {
|
|
167
|
+
switch (name.toLowerCase()) {
|
|
168
|
+
case "mock":
|
|
169
|
+
return (0, import_pi_bench_adapters2.createMockAdapter)(0.85, 0.05);
|
|
170
|
+
case "rebuff":
|
|
171
|
+
return import("@reaatech/pi-bench-adapters").then((m) => m.createRebuffAdapter());
|
|
172
|
+
case "lakera":
|
|
173
|
+
return import("@reaatech/pi-bench-adapters").then((m) => m.createLakeraAdapter());
|
|
174
|
+
case "llm-guard":
|
|
175
|
+
return import("@reaatech/pi-bench-adapters").then((m) => m.createLLMGuardAdapter());
|
|
176
|
+
case "garak":
|
|
177
|
+
return import("@reaatech/pi-bench-adapters").then((m) => m.createGarakAdapter());
|
|
178
|
+
case "moderation-openai":
|
|
179
|
+
case "moderation-azure":
|
|
180
|
+
case "moderation-anthropic":
|
|
181
|
+
case "moderation-cohere": {
|
|
182
|
+
const provider = name.toLowerCase().replace("moderation-", "");
|
|
183
|
+
return import("@reaatech/pi-bench-adapters").then(
|
|
184
|
+
(m) => m.createModerationAdapter({ provider })
|
|
185
|
+
);
|
|
186
|
+
}
|
|
187
|
+
default:
|
|
188
|
+
throw new Error(
|
|
189
|
+
`Unknown defense adapter: ${name}. Available: mock, rebuff, lakera, llm-guard, garak, moderation-{provider}`
|
|
190
|
+
);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
function createBenchmarkCommand() {
|
|
194
|
+
const command = new import_commander2.Command("benchmark");
|
|
195
|
+
command.description("Run full benchmark against defenses").requiredOption("-d, --defense <name>", "Defense adapter name").option("-c, --corpus <version>", "Corpus version to use", "2026.04").option("--categories <categories>", "Comma-separated categories to include").option("-p, --parallel <count>", "Number of parallel executions", "10").option("-t, --timeout <ms>", "Timeout per attack in ms", "30000").option("-o, --output <file>", "Output file for results").option("--benign-count <number>", "Number of benign samples", "100").action(async (options) => {
|
|
196
|
+
(0, import_pi_bench_adapters3.validateEnvironment)();
|
|
197
|
+
const defenseName = options.defense;
|
|
198
|
+
const corpusVersion = options.corpus;
|
|
199
|
+
const parallel = Number.parseInt(options.parallel, 10);
|
|
200
|
+
const timeoutMs = Number.parseInt(options.timeout, 10);
|
|
201
|
+
const benignCount = Number.parseInt(options.benignCount, 10);
|
|
202
|
+
let categories;
|
|
203
|
+
if (options.categories) {
|
|
204
|
+
categories = options.categories.split(",").map((c) => c.trim());
|
|
205
|
+
}
|
|
206
|
+
logger2.info("Starting benchmark", {
|
|
207
|
+
defense: defenseName,
|
|
208
|
+
corpus: corpusVersion,
|
|
209
|
+
parallel,
|
|
210
|
+
timeoutMs
|
|
211
|
+
});
|
|
212
|
+
try {
|
|
213
|
+
const adapter = await loadAdapter2(defenseName);
|
|
214
|
+
await adapter.initialize?.();
|
|
215
|
+
const engine = (0, import_pi_bench_runner3.createBenchmarkEngine)({
|
|
216
|
+
maxParallel: parallel,
|
|
217
|
+
defaultTimeoutMs: timeoutMs
|
|
218
|
+
});
|
|
219
|
+
engine.setAdapter(adapter);
|
|
220
|
+
const corpus = (0, import_pi_bench_corpus2.generateDefaultCorpus)();
|
|
221
|
+
const attackSamples = categories ? corpus.filter((s) => categories.includes(s.category)) : corpus;
|
|
222
|
+
const benignSamples = (0, import_pi_bench_runner5.generateBenignSamples)(benignCount);
|
|
223
|
+
const progressCallback = (progress) => {
|
|
224
|
+
logger2.info("Progress", {
|
|
225
|
+
completed: progress.completed,
|
|
226
|
+
total: progress.total,
|
|
227
|
+
detected: progress.detected,
|
|
228
|
+
errors: progress.errors
|
|
229
|
+
});
|
|
230
|
+
};
|
|
231
|
+
const result = await engine.runBenchmark(
|
|
232
|
+
{
|
|
233
|
+
defense: defenseName,
|
|
234
|
+
corpusVersion,
|
|
235
|
+
categories: categories || (0, import_pi_bench_core2.getCategoryIds)(),
|
|
236
|
+
parallel,
|
|
237
|
+
timeoutMs,
|
|
238
|
+
seed: Date.now().toString(36)
|
|
239
|
+
},
|
|
240
|
+
attackSamples,
|
|
241
|
+
benignSamples,
|
|
242
|
+
progressCallback
|
|
243
|
+
);
|
|
244
|
+
const evaluator = (0, import_pi_bench_runner4.createDefenseEvaluator)();
|
|
245
|
+
const evaluation = evaluator.evaluate(result);
|
|
246
|
+
const output = {
|
|
247
|
+
...result,
|
|
248
|
+
score: evaluation.score,
|
|
249
|
+
metadata: {
|
|
250
|
+
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
251
|
+
corpusVersion,
|
|
252
|
+
adapterVersion: adapter.version
|
|
253
|
+
}
|
|
254
|
+
};
|
|
255
|
+
if (options.output) {
|
|
256
|
+
const fs = await import("fs");
|
|
257
|
+
fs.writeFileSync(options.output, JSON.stringify(output, null, 2));
|
|
258
|
+
logger2.info(`Results written to ${options.output}`);
|
|
259
|
+
}
|
|
260
|
+
console.log("\n=== Benchmark Results ===");
|
|
261
|
+
console.log(`Defense: ${defenseName}`);
|
|
262
|
+
console.log(`Version: ${adapter.version}`);
|
|
263
|
+
console.log(`Overall Score: ${(evaluation.score.overallScore * 100).toFixed(1)}%`);
|
|
264
|
+
console.log(
|
|
265
|
+
`Attack Success Rate: ${(evaluation.score.attackSuccessRate * 100).toFixed(1)}%`
|
|
266
|
+
);
|
|
267
|
+
console.log(
|
|
268
|
+
`False Positive Rate: ${(evaluation.score.falsePositiveRate * 100).toFixed(1)}%`
|
|
269
|
+
);
|
|
270
|
+
console.log(`Avg Latency: ${evaluation.score.avgLatencyMs.toFixed(1)}ms`);
|
|
271
|
+
console.log(`
|
|
272
|
+
Total Attacks: ${evaluation.overallMetrics.totalAttacks}`);
|
|
273
|
+
console.log(
|
|
274
|
+
`Detected: ${evaluation.overallMetrics.totalAttacks - evaluation.overallMetrics.bypassedAttacks}`
|
|
275
|
+
);
|
|
276
|
+
console.log(`Bypassed: ${evaluation.overallMetrics.bypassedAttacks}`);
|
|
277
|
+
await adapter.cleanup?.();
|
|
278
|
+
} catch (error) {
|
|
279
|
+
logger2.error("Benchmark failed", {
|
|
280
|
+
error: error instanceof Error ? error.message : String(error)
|
|
281
|
+
});
|
|
282
|
+
console.error(`Error: ${error instanceof Error ? error.message : error}`);
|
|
283
|
+
process.exit(1);
|
|
284
|
+
}
|
|
285
|
+
});
|
|
286
|
+
return command;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// src/cli/commands/compare.command.ts
|
|
290
|
+
var import_pi_bench_observability3 = require("@reaatech/pi-bench-observability");
|
|
291
|
+
var import_pi_bench_runner6 = require("@reaatech/pi-bench-runner");
|
|
292
|
+
var import_pi_bench_scoring = require("@reaatech/pi-bench-scoring");
|
|
293
|
+
var import_pi_bench_scoring2 = require("@reaatech/pi-bench-scoring");
|
|
294
|
+
var import_commander3 = require("commander");
|
|
295
|
+
var logger3 = (0, import_pi_bench_observability3.createLogger)();
|
|
296
|
+
function createCompareCommand() {
|
|
297
|
+
const command = new import_commander3.Command("compare");
|
|
298
|
+
command.description("Compare multiple defense results").requiredOption("-r, --results <files...>", "Result files to compare").option("-s, --significance <level>", "Statistical significance level", "0.05").option("-o, --output <file>", "Output file for comparison results").action(async (options) => {
|
|
299
|
+
const resultFiles = options.results;
|
|
300
|
+
const significanceLevel = Number.parseFloat(options.significance);
|
|
301
|
+
if (resultFiles.length < 2) {
|
|
302
|
+
console.error("Error: At least 2 result files are required for comparison");
|
|
303
|
+
process.exit(1);
|
|
304
|
+
}
|
|
305
|
+
logger3.info("Comparing defenses", { files: resultFiles, significance: significanceLevel });
|
|
306
|
+
try {
|
|
307
|
+
const results = await Promise.all(
|
|
308
|
+
resultFiles.map(async (file) => {
|
|
309
|
+
const fs = await import("fs");
|
|
310
|
+
const content = fs.readFileSync(file, "utf-8");
|
|
311
|
+
return JSON.parse(content);
|
|
312
|
+
})
|
|
313
|
+
);
|
|
314
|
+
const comparisons = [];
|
|
315
|
+
const evaluator = (0, import_pi_bench_runner6.createDefenseEvaluator)();
|
|
316
|
+
for (let i = 0; i < results.length; i++) {
|
|
317
|
+
for (let j = i + 1; j < results.length; j++) {
|
|
318
|
+
const r1 = results[i];
|
|
319
|
+
const r2 = results[j];
|
|
320
|
+
if (!r1 || !r2) continue;
|
|
321
|
+
const defense1Name = r1.defense || r1.defenseName || `Defense ${i + 1}`;
|
|
322
|
+
const defense2Name = r2.defense || r2.defenseName || `Defense ${j + 1}`;
|
|
323
|
+
const score1 = r1.score || (0, import_pi_bench_scoring.calculateDefenseScore)(r1);
|
|
324
|
+
const score2 = r2.score || (0, import_pi_bench_scoring.calculateDefenseScore)(r2);
|
|
325
|
+
const comparison2 = evaluator.compare(score1, score2);
|
|
326
|
+
comparisons.push({
|
|
327
|
+
defense1: defense1Name,
|
|
328
|
+
defense2: defense2Name,
|
|
329
|
+
winner: comparison2.winner,
|
|
330
|
+
scoreDifference: comparison2.scoreDifference,
|
|
331
|
+
asrDifference: comparison2.asrDifference,
|
|
332
|
+
fprDifference: comparison2.fprDifference,
|
|
333
|
+
categoryWins: comparison2.categoryWins
|
|
334
|
+
});
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
console.log("\n=== Defense Comparison Results ===\n");
|
|
338
|
+
for (const comp of comparisons) {
|
|
339
|
+
console.log(`${comp.defense1} vs ${comp.defense2}`);
|
|
340
|
+
console.log(` Winner: ${comp.winner}`);
|
|
341
|
+
console.log(` Score Difference: ${(comp.scoreDifference * 100).toFixed(1)}%`);
|
|
342
|
+
console.log(` ASR Difference: ${(comp.asrDifference * 100).toFixed(1)}%`);
|
|
343
|
+
console.log(` FPR Difference: ${(comp.fprDifference * 100).toFixed(1)}%`);
|
|
344
|
+
console.log("");
|
|
345
|
+
}
|
|
346
|
+
const detectionRates = results.map((r) => {
|
|
347
|
+
const score = r.score || (0, import_pi_bench_scoring.calculateDefenseScore)(r);
|
|
348
|
+
return {
|
|
349
|
+
detectionRate: 1 - score.attackSuccessRate,
|
|
350
|
+
falsePositiveRate: score.falsePositiveRate,
|
|
351
|
+
avgLatencyMs: score.avgLatencyMs,
|
|
352
|
+
totalAttacks: score.totalSamples,
|
|
353
|
+
detectedAttacks: 0,
|
|
354
|
+
bypassedAttacks: 0
|
|
355
|
+
};
|
|
356
|
+
});
|
|
357
|
+
const comparison = (0, import_pi_bench_scoring2.compareMetrics)(detectionRates, significanceLevel);
|
|
358
|
+
console.log("=== Statistical Summary ===");
|
|
359
|
+
console.log(`Significance Level: ${(significanceLevel * 100).toFixed(0)}%`);
|
|
360
|
+
console.log(`Significant Difference: ${comparison.significantDifference ? "Yes" : "No"}`);
|
|
361
|
+
console.log("\nRankings (by detection rate):");
|
|
362
|
+
comparison.rankings.forEach((rank, idx) => {
|
|
363
|
+
const name = results[rank]?.defense || results[rank]?.defenseName || `Rank ${idx + 1}`;
|
|
364
|
+
const dr = ((detectionRates[rank]?.detectionRate || 0) * 100).toFixed(1);
|
|
365
|
+
console.log(` ${idx + 1}. ${name}: ${dr}% detection rate`);
|
|
366
|
+
});
|
|
367
|
+
if (options.output) {
|
|
368
|
+
const fs = await import("fs");
|
|
369
|
+
fs.writeFileSync(
|
|
370
|
+
options.output,
|
|
371
|
+
JSON.stringify(
|
|
372
|
+
{
|
|
373
|
+
comparisons,
|
|
374
|
+
statisticalSummary: {
|
|
375
|
+
significanceLevel,
|
|
376
|
+
significantDifference: comparison.significantDifference,
|
|
377
|
+
rankings: comparison.rankings.map((rank, idx) => ({
|
|
378
|
+
rank: idx + 1,
|
|
379
|
+
name: results[rank]?.defense || results[rank]?.defenseName || `Defense ${rank + 1}`,
|
|
380
|
+
detectionRate: detectionRates[rank]?.detectionRate || 0
|
|
381
|
+
}))
|
|
382
|
+
},
|
|
383
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
384
|
+
},
|
|
385
|
+
null,
|
|
386
|
+
2
|
|
387
|
+
)
|
|
388
|
+
);
|
|
389
|
+
logger3.info(`Comparison results written to ${options.output}`);
|
|
390
|
+
}
|
|
391
|
+
} catch (error) {
|
|
392
|
+
logger3.error("Comparison failed", {
|
|
393
|
+
error: error instanceof Error ? error.message : String(error)
|
|
394
|
+
});
|
|
395
|
+
console.error(`Error: ${error instanceof Error ? error.message : error}`);
|
|
396
|
+
process.exit(1);
|
|
397
|
+
}
|
|
398
|
+
});
|
|
399
|
+
return command;
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
// src/cli/commands/corpus.command.ts
|
|
403
|
+
var import_pi_bench_core3 = require("@reaatech/pi-bench-core");
|
|
404
|
+
var import_pi_bench_corpus3 = require("@reaatech/pi-bench-corpus");
|
|
405
|
+
var import_pi_bench_corpus4 = require("@reaatech/pi-bench-corpus");
|
|
406
|
+
var import_pi_bench_observability4 = require("@reaatech/pi-bench-observability");
|
|
407
|
+
var import_commander4 = require("commander");
|
|
408
|
+
var logger4 = (0, import_pi_bench_observability4.createLogger)();
|
|
409
|
+
function createCorpusCommand() {
|
|
410
|
+
const command = new import_commander4.Command("corpus");
|
|
411
|
+
command.description("Manage test corpus");
|
|
412
|
+
command.command("list").description("List available corpus versions and categories").action(async () => {
|
|
413
|
+
console.log("=== Attack Categories ===\n");
|
|
414
|
+
for (const cat of import_pi_bench_core3.ATTACK_CATEGORIES) {
|
|
415
|
+
console.log(` ${cat.id}`);
|
|
416
|
+
console.log(` Name: ${cat.name}`);
|
|
417
|
+
console.log(` Weight: ${cat.weight}`);
|
|
418
|
+
console.log(` Min Samples: ${cat.minSamples}`);
|
|
419
|
+
console.log(` Description: ${cat.description}`);
|
|
420
|
+
console.log("");
|
|
421
|
+
}
|
|
422
|
+
console.log("=== Available Corpus Versions ===\n");
|
|
423
|
+
console.log(" 2026.04 (current)");
|
|
424
|
+
console.log(" Total samples: ~300");
|
|
425
|
+
console.log(" Categories: 8");
|
|
426
|
+
console.log(" Last updated: 2026-04-01");
|
|
427
|
+
console.log("");
|
|
428
|
+
console.log(" 2026.01 (deprecated)");
|
|
429
|
+
console.log(" Total samples: ~250");
|
|
430
|
+
console.log(" Categories: 6");
|
|
431
|
+
console.log(" Last updated: 2026-01-15");
|
|
432
|
+
});
|
|
433
|
+
command.command("generate").description("Generate a new corpus version").option("-o, --output <dir>", "Output directory for corpus", "./corpus/generated").option("-v, --version <version>", "Corpus version string", `v${Date.now().toString(36)}`).option("-s, --samples <number>", "Samples per category", "40").action(async (options) => {
|
|
434
|
+
const outputDir = options.output;
|
|
435
|
+
const version = options.version;
|
|
436
|
+
const samplesPerCategory = Number.parseInt(options.samples, 10);
|
|
437
|
+
logger4.info("Generating corpus", { version, outputDir, samplesPerCategory });
|
|
438
|
+
try {
|
|
439
|
+
const builder = (0, import_pi_bench_corpus3.createCorpusBuilder)({
|
|
440
|
+
version,
|
|
441
|
+
samplesPerCategory
|
|
442
|
+
});
|
|
443
|
+
const corpus = (0, import_pi_bench_corpus3.generateDefaultCorpus)();
|
|
444
|
+
builder.addSamples(corpus);
|
|
445
|
+
const validator = (0, import_pi_bench_corpus4.createCorpusValidator)();
|
|
446
|
+
const validation = validator.validate(corpus);
|
|
447
|
+
console.log("\n=== Corpus Generation Complete ===\n");
|
|
448
|
+
console.log(`Version: ${version}`);
|
|
449
|
+
console.log(`Total Samples: ${corpus.length}`);
|
|
450
|
+
console.log(`Output Directory: ${outputDir}`);
|
|
451
|
+
if (!validation.valid) {
|
|
452
|
+
console.log("\nValidation Errors:");
|
|
453
|
+
for (const e of validation.errors) {
|
|
454
|
+
console.log(` - ${e}`);
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
if (validation.warnings.length > 0) {
|
|
458
|
+
console.log("\nWarnings:");
|
|
459
|
+
for (const w of validation.warnings) {
|
|
460
|
+
console.log(` - ${w}`);
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
console.log("\nStatistics:");
|
|
464
|
+
console.log(` Unique Prompts: ${validation.stats.uniquePrompts}`);
|
|
465
|
+
console.log(` Duplicates: ${validation.stats.duplicateCount}`);
|
|
466
|
+
console.log(" Category Distribution:");
|
|
467
|
+
for (const [cat, count] of Object.entries(validation.stats.categoryDistribution)) {
|
|
468
|
+
console.log(` ${cat}: ${count}`);
|
|
469
|
+
}
|
|
470
|
+
if (outputDir !== "./corpus/generated") {
|
|
471
|
+
console.log(`
|
|
472
|
+
Note: Writing to ${outputDir} requires file system access.`);
|
|
473
|
+
console.log("Generated corpus is available in memory for immediate use.");
|
|
474
|
+
}
|
|
475
|
+
} catch (error) {
|
|
476
|
+
logger4.error("Corpus generation failed", {
|
|
477
|
+
error: error instanceof Error ? error.message : String(error)
|
|
478
|
+
});
|
|
479
|
+
console.error(`Error: ${error instanceof Error ? error.message : error}`);
|
|
480
|
+
process.exit(1);
|
|
481
|
+
}
|
|
482
|
+
});
|
|
483
|
+
command.command("validate").description("Validate a corpus").requiredOption("-i, --input <dir>", "Corpus directory to validate").action(async (options) => {
|
|
484
|
+
const inputDir = options.input;
|
|
485
|
+
logger4.info("Validating corpus", { input: inputDir });
|
|
486
|
+
console.log("\n=== Corpus Validation ===\n");
|
|
487
|
+
console.log(`Input: ${inputDir}`);
|
|
488
|
+
try {
|
|
489
|
+
const fs = await import("fs");
|
|
490
|
+
if (!fs.existsSync(inputDir)) {
|
|
491
|
+
console.log("Error: Directory does not exist");
|
|
492
|
+
process.exit(1);
|
|
493
|
+
}
|
|
494
|
+
const files = fs.readdirSync(inputDir).filter((f) => f.endsWith(".yaml") || f.endsWith(".json"));
|
|
495
|
+
if (files.length === 0) {
|
|
496
|
+
console.log("Warning: No corpus files (.yaml or .json) found in directory");
|
|
497
|
+
return;
|
|
498
|
+
}
|
|
499
|
+
let totalSamples = 0;
|
|
500
|
+
const categoryStats = {};
|
|
501
|
+
for (const file of files) {
|
|
502
|
+
const content = fs.readFileSync(`${inputDir}/${file}`, "utf-8");
|
|
503
|
+
const data = file.endsWith(".json") ? JSON.parse(content) : await import("yaml").then((y) => y.parse(content));
|
|
504
|
+
const samples = Array.isArray(data) ? data : data.samples || [];
|
|
505
|
+
totalSamples += samples.length;
|
|
506
|
+
for (const sample of samples) {
|
|
507
|
+
const cat = sample.category || "unknown";
|
|
508
|
+
categoryStats[cat] = (categoryStats[cat] || 0) + 1;
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
console.log(`
|
|
512
|
+
Total Samples: ${totalSamples}`);
|
|
513
|
+
console.log("\nCategory Distribution:");
|
|
514
|
+
for (const [cat, count] of Object.entries(categoryStats)) {
|
|
515
|
+
console.log(` ${cat}: ${count}`);
|
|
516
|
+
}
|
|
517
|
+
if (Object.keys(categoryStats).length < 8) {
|
|
518
|
+
console.log("\nWarning: Not all 8 categories are represented in the corpus");
|
|
519
|
+
}
|
|
520
|
+
} catch (error) {
|
|
521
|
+
logger4.error("Validation failed", {
|
|
522
|
+
error: error instanceof Error ? error.message : String(error)
|
|
523
|
+
});
|
|
524
|
+
console.error(`Error: ${error instanceof Error ? error.message : error}`);
|
|
525
|
+
process.exit(1);
|
|
526
|
+
}
|
|
527
|
+
});
|
|
528
|
+
command.command("export").description("Export corpus to a specific format").requiredOption("-i, --input <dir>", "Corpus directory").option("-f, --format <format>", "Output format (json, yaml, csv)", "json").option("-o, --output <file>", "Output file (default: stdout)").action(async (options) => {
|
|
529
|
+
const inputDir = options.input;
|
|
530
|
+
const format = options.format;
|
|
531
|
+
const outputFile = options.output;
|
|
532
|
+
logger4.info("Exporting corpus", { input: inputDir, format, output: outputFile || "stdout" });
|
|
533
|
+
console.log("\n=== Corpus Export ===\n");
|
|
534
|
+
console.log(`Format: ${format}`);
|
|
535
|
+
console.log(`Source: ${inputDir}`);
|
|
536
|
+
if (outputFile) {
|
|
537
|
+
console.log(`Output: ${outputFile}`);
|
|
538
|
+
console.log("\nExport functionality requires file system access.");
|
|
539
|
+
} else {
|
|
540
|
+
console.log("\nExporting to stdout...");
|
|
541
|
+
const corpus = (0, import_pi_bench_corpus3.generateDefaultCorpus)();
|
|
542
|
+
if (format === "json") {
|
|
543
|
+
console.log(JSON.stringify(corpus, null, 2));
|
|
544
|
+
} else if (format === "yaml") {
|
|
545
|
+
const yaml = await import("yaml");
|
|
546
|
+
console.log(yaml.stringify(corpus));
|
|
547
|
+
} else if (format === "csv") {
|
|
548
|
+
console.log("id,category,severity,prompt,tags");
|
|
549
|
+
for (const sample of corpus) {
|
|
550
|
+
const escapedPrompt = sample.prompt.replace(/"/g, '""');
|
|
551
|
+
console.log(
|
|
552
|
+
`${sample.id},${sample.category},${sample.severity},"${escapedPrompt}","${sample.tags.join(";")}"`
|
|
553
|
+
);
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
});
|
|
558
|
+
return command;
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
// src/cli/commands/leaderboard.command.ts
|
|
562
|
+
var import_pi_bench_core4 = require("@reaatech/pi-bench-core");
|
|
563
|
+
var import_pi_bench_leaderboard = require("@reaatech/pi-bench-leaderboard");
|
|
564
|
+
var import_pi_bench_leaderboard2 = require("@reaatech/pi-bench-leaderboard");
|
|
565
|
+
var import_pi_bench_observability5 = require("@reaatech/pi-bench-observability");
|
|
566
|
+
var import_commander5 = require("commander");
|
|
567
|
+
var logger5 = (0, import_pi_bench_observability5.createLogger)();
|
|
568
|
+
function createLeaderboardCommand() {
|
|
569
|
+
const command = new import_commander5.Command("leaderboard");
|
|
570
|
+
command.description("View/submit to leaderboard");
|
|
571
|
+
command.command("view").description("View current leaderboard rankings").option("-c, --category <category>", "Filter by attack category").option("-n, --limit <number>", "Number of entries to show", "10").option("--file <path>", "Leaderboard storage file", (0, import_pi_bench_leaderboard2.getDefaultLeaderboardPath)()).action(async (options) => {
|
|
572
|
+
const limit = Number.parseInt(options.limit, 10);
|
|
573
|
+
const category = options.category;
|
|
574
|
+
const manager = (0, import_pi_bench_leaderboard.createLeaderboardManager)();
|
|
575
|
+
manager.replaceEntries((0, import_pi_bench_leaderboard2.loadLeaderboardEntries)(options.file));
|
|
576
|
+
const entries = category ? manager.getRankings(manager.getEntryCount() || limit).filter((e) => e.categoryScores && category in e.categoryScores) : manager.getRankings(limit);
|
|
577
|
+
console.log("\n=== Prompt Injection Defense Leaderboard ===\n");
|
|
578
|
+
console.log("Rank | Defense | Version | Overall Score | Submitted");
|
|
579
|
+
console.log("-----|---------|---------|---------------|----------");
|
|
580
|
+
entries.slice(0, limit).forEach((entry, idx) => {
|
|
581
|
+
const date = new Date(entry.submittedAt).toLocaleDateString();
|
|
582
|
+
console.log(
|
|
583
|
+
`${`${idx + 1} | ${entry.defense.padEnd(12)} | ${entry.version.padEnd(7)} | ${(entry.overallScore * 100).toFixed(1)}%`.padEnd(
|
|
584
|
+
40
|
|
585
|
+
)} | ${date}`
|
|
586
|
+
);
|
|
587
|
+
});
|
|
588
|
+
if (entries.length === 0) {
|
|
589
|
+
console.log("No entries found.");
|
|
590
|
+
}
|
|
591
|
+
console.log("\n");
|
|
592
|
+
console.log(`Storage: ${options.file}`);
|
|
593
|
+
console.log("Submit a local entry with:");
|
|
594
|
+
console.log(" prompt-injection-bench leaderboard submit -r results.json --defense <name>");
|
|
595
|
+
});
|
|
596
|
+
command.command("submit").description("Submit results to the leaderboard").requiredOption("-r, --results <file>", "Results file to submit").requiredOption("--defense <name>", "Defense name").option("--defense-version <version>", "Defense version").option("--seed <seed>", "Random seed used for the run").option("--private", "Submit as private (not publicly visible)").option("--file <path>", "Leaderboard storage file", (0, import_pi_bench_leaderboard2.getDefaultLeaderboardPath)()).action(async (options) => {
|
|
597
|
+
const resultsFile = options.results;
|
|
598
|
+
const defenseName = options.defense;
|
|
599
|
+
const defenseVersion = options.defenseVersion;
|
|
600
|
+
const seed = options.seed;
|
|
601
|
+
logger5.info("Submitting to leaderboard", { defense: defenseName, version: defenseVersion });
|
|
602
|
+
try {
|
|
603
|
+
const fs = await import("fs");
|
|
604
|
+
const content = fs.readFileSync(resultsFile, "utf-8");
|
|
605
|
+
const results = JSON.parse(content);
|
|
606
|
+
const score = resolveScore(results);
|
|
607
|
+
const entry = {
|
|
608
|
+
defense: defenseName,
|
|
609
|
+
version: defenseVersion || score.version,
|
|
610
|
+
overallScore: score.overallScore,
|
|
611
|
+
rank: 0,
|
|
612
|
+
submittedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
613
|
+
corpusVersion: results.corpusVersion || results.metadata?.corpusVersion || "2026.04",
|
|
614
|
+
categoryScores: mapCategoryScores(score),
|
|
615
|
+
proofHash: seed || `local-${Date.now().toString(36)}`,
|
|
616
|
+
submitter: options.private ? "anonymous" : "local-user"
|
|
617
|
+
};
|
|
618
|
+
const manager = (0, import_pi_bench_leaderboard.createLeaderboardManager)();
|
|
619
|
+
manager.replaceEntries((0, import_pi_bench_leaderboard2.loadLeaderboardEntries)(options.file));
|
|
620
|
+
manager.addEntry(entry);
|
|
621
|
+
(0, import_pi_bench_leaderboard2.saveLeaderboardEntries)(manager.getAllEntries(), options.file);
|
|
622
|
+
console.log("\n=== Submission Successful ===");
|
|
623
|
+
console.log(`Defense: ${entry.defense}`);
|
|
624
|
+
console.log(`Version: ${entry.version}`);
|
|
625
|
+
console.log(`Overall Score: ${(entry.overallScore * 100).toFixed(1)}%`);
|
|
626
|
+
console.log(`Proof Hash: ${entry.proofHash}`);
|
|
627
|
+
console.log(`Stored at: ${options.file}`);
|
|
628
|
+
} catch (error) {
|
|
629
|
+
logger5.error("Submission failed", {
|
|
630
|
+
error: error instanceof Error ? error.message : String(error)
|
|
631
|
+
});
|
|
632
|
+
console.error(`Error: ${error instanceof Error ? error.message : error}`);
|
|
633
|
+
process.exit(1);
|
|
634
|
+
}
|
|
635
|
+
});
|
|
636
|
+
command.command("verify").description("Verify a submission").requiredOption("--id <id>", "Submission proof hash to verify").option("--file <path>", "Leaderboard storage file", (0, import_pi_bench_leaderboard2.getDefaultLeaderboardPath)()).action(async (options) => {
|
|
637
|
+
const submissionId = options.id;
|
|
638
|
+
const manager = (0, import_pi_bench_leaderboard.createLeaderboardManager)();
|
|
639
|
+
manager.replaceEntries((0, import_pi_bench_leaderboard2.loadLeaderboardEntries)(options.file));
|
|
640
|
+
const found = manager.getAllEntries().find((entry) => entry.proofHash === submissionId);
|
|
641
|
+
console.log("\n=== Verifying Submission ===");
|
|
642
|
+
console.log(`Submission ID: ${submissionId}`);
|
|
643
|
+
console.log("");
|
|
644
|
+
if (!found) {
|
|
645
|
+
console.log("Status: not found");
|
|
646
|
+
console.log(`Storage: ${options.file}`);
|
|
647
|
+
process.exitCode = 1;
|
|
648
|
+
return;
|
|
649
|
+
}
|
|
650
|
+
console.log("Status: verified");
|
|
651
|
+
console.log(`Defense: ${found.defense}`);
|
|
652
|
+
console.log(`Version: ${found.version}`);
|
|
653
|
+
console.log(`Overall Score: ${(found.overallScore * 100).toFixed(1)}%`);
|
|
654
|
+
console.log(`Submitted: ${found.submittedAt}`);
|
|
655
|
+
console.log(`Storage: ${options.file}`);
|
|
656
|
+
});
|
|
657
|
+
return command;
|
|
658
|
+
}
|
|
659
|
+
function resolveScore(results) {
|
|
660
|
+
if (results.score && typeof results.score === "object") {
|
|
661
|
+
return import_pi_bench_core4.DefenseScoreSchema.parse(results.score);
|
|
662
|
+
}
|
|
663
|
+
return import_pi_bench_core4.DefenseScoreSchema.parse(results);
|
|
664
|
+
}
|
|
665
|
+
function mapCategoryScores(score) {
|
|
666
|
+
return Object.fromEntries(
|
|
667
|
+
Object.entries(score.categoryScores).map(([category, value]) => [
|
|
668
|
+
category,
|
|
669
|
+
value.detectionRate
|
|
670
|
+
])
|
|
671
|
+
);
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
// src/cli/commands/report.command.ts
|
|
675
|
+
var import_pi_bench_mcp_server = require("@reaatech/pi-bench-mcp-server");
|
|
676
|
+
var import_commander6 = require("commander");
|
|
677
|
+
function createReportCommand() {
|
|
678
|
+
const command = new import_commander6.Command("report");
|
|
679
|
+
command.description("Generate reports from benchmark results").requiredOption("-r, --results <file>", "Results file to generate report from").option("-f, --format <format>", "Output format (json, html, markdown)", "json").option("-o, --output <file>", "Output file").option("--include-categories", "Include category breakdown", true).action(async (options) => {
|
|
680
|
+
const resultsFile = options.results;
|
|
681
|
+
const format = options.format;
|
|
682
|
+
const outputFile = options.output || `report.${format}`;
|
|
683
|
+
const includeCategories = options.includeCategories;
|
|
684
|
+
console.log(`Generating ${format} report from: ${resultsFile}`);
|
|
685
|
+
const fs = await import("fs");
|
|
686
|
+
const results = JSON.parse(fs.readFileSync(resultsFile, "utf-8"));
|
|
687
|
+
const reportData = (0, import_pi_bench_mcp_server.normalizeReportData)(results);
|
|
688
|
+
let report;
|
|
689
|
+
if (format === "json") {
|
|
690
|
+
report = JSON.stringify(reportData, null, 2);
|
|
691
|
+
} else if (format === "markdown") {
|
|
692
|
+
report = generateMarkdownReport(reportData, includeCategories);
|
|
693
|
+
} else if (format === "html") {
|
|
694
|
+
report = generateHtmlReport(reportData, includeCategories);
|
|
695
|
+
} else {
|
|
696
|
+
console.error(`Unsupported format: ${format}`);
|
|
697
|
+
process.exit(1);
|
|
698
|
+
}
|
|
699
|
+
fs.writeFileSync(outputFile, report);
|
|
700
|
+
console.log(`
|
|
701
|
+
Report generated: ${outputFile}`);
|
|
702
|
+
});
|
|
703
|
+
return command;
|
|
704
|
+
}
|
|
705
|
+
function generateMarkdownReport(r, includeCategories) {
|
|
706
|
+
let md = "# Prompt Injection Benchmark Report\n\n";
|
|
707
|
+
md += `Generated: ${r.generatedAt || (/* @__PURE__ */ new Date()).toISOString()}
|
|
708
|
+
|
|
709
|
+
`;
|
|
710
|
+
if (r.defense) {
|
|
711
|
+
md += `Defense: ${r.defense}${r.version ? ` (${r.version})` : ""}
|
|
712
|
+
|
|
713
|
+
`;
|
|
714
|
+
}
|
|
715
|
+
md += "## Summary\n\n";
|
|
716
|
+
md += "| Metric | Value |\n|--------|-------|\n";
|
|
717
|
+
md += `| Detection Rate | ${(r.detectionRate * 100).toFixed(1)}% |
|
|
718
|
+
`;
|
|
719
|
+
md += `| False Positive Rate | ${(r.falsePositiveRate * 100).toFixed(1)}% |
|
|
720
|
+
`;
|
|
721
|
+
md += `| Total Attacks | ${r.totalAttacks} |
|
|
722
|
+
`;
|
|
723
|
+
md += `| Avg Latency | ${r.avgLatencyMs.toFixed(1)}ms |
|
|
724
|
+
`;
|
|
725
|
+
if (includeCategories && Object.keys(r.categoryBreakdown).length > 0) {
|
|
726
|
+
md += "\n## Category Breakdown\n\n";
|
|
727
|
+
md += "| Category | Detection Rate | Attacks |\n|----------|----------------|---------|\n";
|
|
728
|
+
for (const [category, data] of Object.entries(r.categoryBreakdown)) {
|
|
729
|
+
md += `| ${category} | ${(data.detectionRate * 100).toFixed(1)}% | ${data.totalAttacks} |
|
|
730
|
+
`;
|
|
731
|
+
}
|
|
732
|
+
}
|
|
733
|
+
return md;
|
|
734
|
+
}
|
|
735
|
+
function generateHtmlReport(r, includeCategories) {
|
|
736
|
+
return `<!DOCTYPE html>
|
|
737
|
+
<html>
|
|
738
|
+
<head>
|
|
739
|
+
<title>Prompt Injection Benchmark Report</title>
|
|
740
|
+
<style>
|
|
741
|
+
body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; max-width: 800px; margin: 40px auto; padding: 0 20px; }
|
|
742
|
+
h1 { color: #333; }
|
|
743
|
+
table { border-collapse: collapse; width: 100%; margin: 20px 0; }
|
|
744
|
+
th, td { border: 1px solid #ddd; padding: 12px; text-align: left; }
|
|
745
|
+
th { background: #f5f5f5; }
|
|
746
|
+
.metric { display: inline-block; margin: 10px 20px 10px 0; padding: 15px 25px; background: #f8f9fa; border-radius: 8px; }
|
|
747
|
+
.metric-value { font-size: 24px; font-weight: bold; color: #007bff; }
|
|
748
|
+
.metric-label { font-size: 12px; color: #666; text-transform: uppercase; }
|
|
749
|
+
</style>
|
|
750
|
+
</head>
|
|
751
|
+
<body>
|
|
752
|
+
<h1>Prompt Injection Benchmark Report</h1>
|
|
753
|
+
<p>Generated: ${r.generatedAt || (/* @__PURE__ */ new Date()).toISOString()}</p>
|
|
754
|
+
${r.defense ? `<p>Defense: ${r.defense}${r.version ? ` (${r.version})` : ""}</p>` : ""}
|
|
755
|
+
|
|
756
|
+
<h2>Summary</h2>
|
|
757
|
+
<div class="metric">
|
|
758
|
+
<div class="metric-value">${(r.detectionRate * 100).toFixed(1)}%</div>
|
|
759
|
+
<div class="metric-label">Detection Rate</div>
|
|
760
|
+
</div>
|
|
761
|
+
<div class="metric">
|
|
762
|
+
<div class="metric-value">${(r.falsePositiveRate * 100).toFixed(1)}%</div>
|
|
763
|
+
<div class="metric-label">False Positive Rate</div>
|
|
764
|
+
</div>
|
|
765
|
+
<div class="metric">
|
|
766
|
+
<div class="metric-value">${r.totalAttacks}</div>
|
|
767
|
+
<div class="metric-label">Total Attacks</div>
|
|
768
|
+
</div>
|
|
769
|
+
<div class="metric">
|
|
770
|
+
<div class="metric-value">${r.avgLatencyMs.toFixed(1)}ms</div>
|
|
771
|
+
<div class="metric-label">Avg Latency</div>
|
|
772
|
+
</div>
|
|
773
|
+
|
|
774
|
+
${includeCategories && Object.keys(r.categoryBreakdown).length > 0 ? `
|
|
775
|
+
<h2>Category Breakdown</h2>
|
|
776
|
+
<table>
|
|
777
|
+
<tr><th>Category</th><th>Detection Rate</th><th>Attacks</th></tr>
|
|
778
|
+
${Object.entries(r.categoryBreakdown).map(
|
|
779
|
+
([category, data]) => `<tr><td>${category}</td><td>${(data.detectionRate * 100).toFixed(1)}%</td><td>${data.totalAttacks}</td></tr>`
|
|
780
|
+
).join("")}
|
|
781
|
+
</table>
|
|
782
|
+
` : ""}
|
|
783
|
+
</body>
|
|
784
|
+
</html>`;
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
// src/cli.ts
|
|
788
|
+
var import_meta = {};
|
|
789
|
+
var require2 = (0, import_node_module.createRequire)(import_meta.url);
|
|
790
|
+
var pkg = require2("../package.json");
|
|
791
|
+
var program = new import_commander7.Command();
|
|
792
|
+
program.name("prompt-injection-bench").description(pkg.description).version(pkg.version);
|
|
793
|
+
program.addCommand(createBenchmarkCommand());
|
|
794
|
+
program.addCommand(createAttackCommand());
|
|
795
|
+
program.addCommand(createCompareCommand());
|
|
796
|
+
program.addCommand(createCorpusCommand());
|
|
797
|
+
program.addCommand(createLeaderboardCommand());
|
|
798
|
+
program.addCommand(createReportCommand());
|
|
799
|
+
program.parse();
|