agent-threat-rules 0.2.2 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +152 -642
- package/dist/capability-extractor.d.ts +35 -0
- package/dist/capability-extractor.d.ts.map +1 -0
- package/dist/capability-extractor.js +91 -0
- package/dist/capability-extractor.js.map +1 -0
- package/dist/cli.js +56 -2
- package/dist/cli.js.map +1 -1
- package/dist/converters/elastic.d.ts +36 -0
- package/dist/converters/elastic.d.ts.map +1 -0
- package/dist/converters/elastic.js +125 -0
- package/dist/converters/elastic.js.map +1 -0
- package/dist/converters/index.d.ts +28 -0
- package/dist/converters/index.d.ts.map +1 -0
- package/dist/converters/index.js +36 -0
- package/dist/converters/index.js.map +1 -0
- package/dist/converters/splunk.d.ts +19 -0
- package/dist/converters/splunk.d.ts.map +1 -0
- package/dist/converters/splunk.js +148 -0
- package/dist/converters/splunk.js.map +1 -0
- package/dist/embedding/build-corpus.d.ts +15 -0
- package/dist/embedding/build-corpus.d.ts.map +1 -0
- package/dist/embedding/build-corpus.js +105 -0
- package/dist/embedding/build-corpus.js.map +1 -0
- package/dist/embedding/model-loader.d.ts +41 -0
- package/dist/embedding/model-loader.d.ts.map +1 -0
- package/dist/embedding/model-loader.js +90 -0
- package/dist/embedding/model-loader.js.map +1 -0
- package/dist/embedding/vector-store.d.ts +41 -0
- package/dist/embedding/vector-store.d.ts.map +1 -0
- package/dist/embedding/vector-store.js +70 -0
- package/dist/embedding/vector-store.js.map +1 -0
- package/dist/engine.d.ts +23 -20
- package/dist/engine.d.ts.map +1 -1
- package/dist/engine.js +173 -24
- package/dist/engine.js.map +1 -1
- package/dist/eval/corpus.d.ts +42 -0
- package/dist/eval/corpus.d.ts.map +1 -0
- package/dist/eval/corpus.js +427 -0
- package/dist/eval/corpus.js.map +1 -0
- package/dist/eval/eval-harness.d.ts +44 -0
- package/dist/eval/eval-harness.d.ts.map +1 -0
- package/dist/eval/eval-harness.js +296 -0
- package/dist/eval/eval-harness.js.map +1 -0
- package/dist/eval/index.d.ts +13 -0
- package/dist/eval/index.d.ts.map +1 -0
- package/dist/eval/index.js +9 -0
- package/dist/eval/index.js.map +1 -0
- package/dist/eval/metrics.d.ts +74 -0
- package/dist/eval/metrics.d.ts.map +1 -0
- package/dist/eval/metrics.js +108 -0
- package/dist/eval/metrics.js.map +1 -0
- package/dist/eval/pint-corpus.d.ts +34 -0
- package/dist/eval/pint-corpus.d.ts.map +1 -0
- package/dist/eval/pint-corpus.js +109 -0
- package/dist/eval/pint-corpus.js.map +1 -0
- package/dist/eval/rule-corpus.d.ts +9 -0
- package/dist/eval/rule-corpus.d.ts.map +1 -0
- package/dist/eval/rule-corpus.js +4780 -0
- package/dist/eval/rule-corpus.js.map +1 -0
- package/dist/eval/rule-metrics.d.ts +34 -0
- package/dist/eval/rule-metrics.d.ts.map +1 -0
- package/dist/eval/rule-metrics.js +92 -0
- package/dist/eval/rule-metrics.js.map +1 -0
- package/dist/eval/run-eval.d.ts +7 -0
- package/dist/eval/run-eval.d.ts.map +1 -0
- package/dist/eval/run-eval.js +11 -0
- package/dist/eval/run-eval.js.map +1 -0
- package/dist/eval/run-pint-benchmark.d.ts +18 -0
- package/dist/eval/run-pint-benchmark.d.ts.map +1 -0
- package/dist/eval/run-pint-benchmark.js +157 -0
- package/dist/eval/run-pint-benchmark.js.map +1 -0
- package/dist/flywheel.d.ts +54 -0
- package/dist/flywheel.d.ts.map +1 -0
- package/dist/flywheel.js +121 -0
- package/dist/flywheel.js.map +1 -0
- package/dist/index.d.ts +21 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +19 -2
- package/dist/index.js.map +1 -1
- package/dist/modules/embedding.d.ts +71 -0
- package/dist/modules/embedding.d.ts.map +1 -0
- package/dist/modules/embedding.js +141 -0
- package/dist/modules/embedding.js.map +1 -0
- package/dist/modules/semantic.d.ts +1 -0
- package/dist/modules/semantic.d.ts.map +1 -1
- package/dist/modules/semantic.js +77 -1
- package/dist/modules/semantic.js.map +1 -1
- package/dist/rule-scaffolder.d.ts +14 -0
- package/dist/rule-scaffolder.d.ts.map +1 -1
- package/dist/rule-scaffolder.js +123 -6
- package/dist/rule-scaffolder.js.map +1 -1
- package/dist/session-tracker.d.ts +2 -0
- package/dist/session-tracker.d.ts.map +1 -1
- package/dist/session-tracker.js +1 -0
- package/dist/session-tracker.js.map +1 -1
- package/dist/shadow-evaluator.d.ts +48 -0
- package/dist/shadow-evaluator.d.ts.map +1 -0
- package/dist/shadow-evaluator.js +128 -0
- package/dist/shadow-evaluator.js.map +1 -0
- package/dist/skill-fingerprint.d.ts.map +1 -1
- package/dist/skill-fingerprint.js +10 -52
- package/dist/skill-fingerprint.js.map +1 -1
- package/dist/tier0-invariant.d.ts +49 -0
- package/dist/tier0-invariant.d.ts.map +1 -0
- package/dist/tier0-invariant.js +184 -0
- package/dist/tier0-invariant.js.map +1 -0
- package/dist/tier1-blacklist.d.ts +48 -0
- package/dist/tier1-blacklist.d.ts.map +1 -0
- package/dist/tier1-blacklist.js +91 -0
- package/dist/tier1-blacklist.js.map +1 -0
- package/package.json +7 -1
- package/rules/agent-manipulation/ATR-2026-108-consensus-sybil-attack.yaml +103 -0
- package/rules/context-exfiltration/ATR-2026-102-disguised-analytics-exfiltration.yaml +69 -0
- package/rules/privilege-escalation/ATR-2026-107-delayed-execution-bypass.yaml +67 -0
- package/rules/prompt-injection/ATR-2026-001-direct-prompt-injection.yaml +181 -94
- package/rules/prompt-injection/ATR-2026-003-jailbreak-attempt.yaml +23 -12
- package/rules/prompt-injection/ATR-2026-004-system-prompt-override.yaml +3 -3
- package/rules/prompt-injection/ATR-2026-081-semantic-multi-turn.yaml +2 -2
- package/rules/prompt-injection/ATR-2026-093-gradual-escalation.yaml +1 -1
- package/rules/prompt-injection/ATR-2026-104-persona-hijacking.yaml +72 -0
- package/rules/tool-poisoning/ATR-2026-100-consent-bypass-instruction.yaml +80 -0
- package/rules/tool-poisoning/ATR-2026-101-trust-escalation-override.yaml +66 -0
- package/rules/tool-poisoning/ATR-2026-103-hidden-safety-bypass-instruction.yaml +71 -0
- package/rules/tool-poisoning/ATR-2026-105-silent-action-concealment.yaml +67 -0
- package/rules/tool-poisoning/ATR-2026-106-schema-description-contradiction.yaml +66 -0
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PINT Benchmark Corpus Loader
|
|
3
|
+
*
|
|
4
|
+
* Reads the PINT-format dataset (JSON with text/category/label/source/language)
|
|
5
|
+
* built from publicly available prompt injection datasets:
|
|
6
|
+
* - deepset/prompt-injections (HuggingFace)
|
|
7
|
+
* - Lakera/gandalf_ignore_instructions (HuggingFace)
|
|
8
|
+
*
|
|
9
|
+
* Converts each sample into the CorpusSample interface used by the ATR eval
|
|
10
|
+
* harness, allowing the PINT corpus to be evaluated alongside or instead of
|
|
11
|
+
* the built-in hand-crafted corpus.
|
|
12
|
+
*
|
|
13
|
+
* @module agent-threat-rules/eval/pint-corpus
|
|
14
|
+
*/
|
|
15
|
+
import { readFileSync } from 'node:fs';
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
// Difficulty assignment
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
/**
|
|
20
|
+
* Assign difficulty based on language and category:
|
|
21
|
+
* - Non-English injections -> 'hard' (multilingual evasion)
|
|
22
|
+
* - English injections -> 'easy' (standard patterns)
|
|
23
|
+
* - Benign samples -> 'medium' (false-positive pressure)
|
|
24
|
+
*/
|
|
25
|
+
function assignDifficulty(sample) {
|
|
26
|
+
if (!sample.label) {
|
|
27
|
+
return 'medium';
|
|
28
|
+
}
|
|
29
|
+
return sample.language !== 'en' ? 'hard' : 'easy';
|
|
30
|
+
}
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
// Category mapping
|
|
33
|
+
// ---------------------------------------------------------------------------
|
|
34
|
+
/**
|
|
35
|
+
* Map raw PINT categories to ATR eval categories.
|
|
36
|
+
* PINT uses: prompt_injection, jailbreak, benign, hard_negatives,
|
|
37
|
+
* chat, documents, short_input, benign_input, long_input
|
|
38
|
+
*/
|
|
39
|
+
function mapCategory(raw, isInjection) {
|
|
40
|
+
if (isInjection) {
|
|
41
|
+
if (raw === 'jailbreak')
|
|
42
|
+
return 'jailbreak';
|
|
43
|
+
return 'prompt-injection';
|
|
44
|
+
}
|
|
45
|
+
return 'benign';
|
|
46
|
+
}
|
|
47
|
+
// ---------------------------------------------------------------------------
|
|
48
|
+
// Public API
|
|
49
|
+
// ---------------------------------------------------------------------------
|
|
50
|
+
/**
|
|
51
|
+
* Load the PINT benchmark corpus from a JSON file on disk.
|
|
52
|
+
*
|
|
53
|
+
* @param dataPath - Absolute path to pint-corpus.json
|
|
54
|
+
* @returns Readonly array of CorpusSample for use with runEval()
|
|
55
|
+
*/
|
|
56
|
+
export function loadPintCorpus(dataPath) {
|
|
57
|
+
const raw = JSON.parse(readFileSync(dataPath, 'utf-8'));
|
|
58
|
+
const seen = new Set();
|
|
59
|
+
const samples = [];
|
|
60
|
+
for (let i = 0; i < raw.length; i++) {
|
|
61
|
+
const entry = raw[i];
|
|
62
|
+
const text = entry.text.trim();
|
|
63
|
+
// Skip empty or duplicate texts
|
|
64
|
+
const key = text.toLowerCase().slice(0, 200);
|
|
65
|
+
if (!text || seen.has(key))
|
|
66
|
+
continue;
|
|
67
|
+
seen.add(key);
|
|
68
|
+
const difficulty = assignDifficulty(entry);
|
|
69
|
+
const category = mapCategory(entry.category, entry.label);
|
|
70
|
+
samples.push({
|
|
71
|
+
id: `pint-${String(i + 1).padStart(4, '0')}`,
|
|
72
|
+
text,
|
|
73
|
+
category,
|
|
74
|
+
expectedDetection: entry.label,
|
|
75
|
+
eventType: 'llm_input',
|
|
76
|
+
tier: 'any',
|
|
77
|
+
difficulty,
|
|
78
|
+
fields: {
|
|
79
|
+
source: entry.source,
|
|
80
|
+
language: entry.language,
|
|
81
|
+
originalCategory: entry.category,
|
|
82
|
+
},
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
return samples;
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Get summary statistics for the loaded PINT corpus.
|
|
89
|
+
*/
|
|
90
|
+
export function getPintCorpusStats(corpus) {
|
|
91
|
+
const byCategory = {};
|
|
92
|
+
const byDifficulty = {};
|
|
93
|
+
const byLanguage = {};
|
|
94
|
+
for (const s of corpus) {
|
|
95
|
+
byCategory[s.category] = (byCategory[s.category] ?? 0) + 1;
|
|
96
|
+
byDifficulty[s.difficulty] = (byDifficulty[s.difficulty] ?? 0) + 1;
|
|
97
|
+
const lang = s.fields?.['language'] ?? 'unknown';
|
|
98
|
+
byLanguage[lang] = (byLanguage[lang] ?? 0) + 1;
|
|
99
|
+
}
|
|
100
|
+
return {
|
|
101
|
+
total: corpus.length,
|
|
102
|
+
attacks: corpus.filter((s) => s.expectedDetection).length,
|
|
103
|
+
benign: corpus.filter((s) => !s.expectedDetection).length,
|
|
104
|
+
byCategory,
|
|
105
|
+
byDifficulty,
|
|
106
|
+
byLanguage,
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
//# sourceMappingURL=pint-corpus.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pint-corpus.js","sourceRoot":"","sources":["../../src/eval/pint-corpus.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AAevC,8EAA8E;AAC9E,wBAAwB;AACxB,8EAA8E;AAE9E;;;;;GAKG;AACH,SAAS,gBAAgB,CACvB,MAAqB;IAErB,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;QAClB,OAAO,QAAQ,CAAC;IAClB,CAAC;IACD,OAAO,MAAM,CAAC,QAAQ,KAAK,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;AACpD,CAAC;AAED,8EAA8E;AAC9E,mBAAmB;AACnB,8EAA8E;AAE9E;;;;GAIG;AACH,SAAS,WAAW,CAAC,GAAW,EAAE,WAAoB;IACpD,IAAI,WAAW,EAAE,CAAC;QAChB,IAAI,GAAG,KAAK,WAAW;YAAE,OAAO,WAAW,CAAC;QAC5C,OAAO,kBAAkB,CAAC;IAC5B,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,8EAA8E;AAC9E,aAAa;AACb,8EAA8E;AAE9E;;;;;GAKG;AACH,MAAM,UAAU,cAAc,CAAC,QAAgB;IAC7C,MAAM,GAAG,GAA6B,IAAI,CAAC,KAAK,CAC9C,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAChC,CAAC;IAEF,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,OAAO,GAAmB,EAAE,CAAC;IAEnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,MAAM,KAAK,GAAG,GAAG,CAAC,CAAC,CAAE,CAAC;QACtB,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QAE/B,gCAAgC;QAChC,MAAM,GAAG,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QAC7C,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,SAAS;QACrC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAEd,MAAM,UAAU,GAAG,gBAAgB,CAAC,KAAK,CAAC,CAAC;QAC3C,MAAM,QAAQ,GAAG,WAAW,CAAC,KAAK,CAAC,QAAQ,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC;QAE1D,OAAO,CAAC,IAAI,CAAC;YACX,EAAE,EAAE,QAAQ,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;YAC5C,IAAI;YACJ,QAAQ;YACR,iBAAiB,EAAE,KAAK,CAAC,KAAK;YAC9B,SAAS,EAAE,WAAW;YACtB,IAAI,EAAE,KAAK;YACX,UAAU;YACV,MAAM,EAAE;gBACN,MAAM,EAAE,KAAK,CAAC,MAAM;gBACpB,QAAQ,EAAE,KAAK,CAAC,QAAQ;gBACxB,gBAAgB,EAAE,KAAK,CAAC,QAAQ;aACjC;SACF,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAAC,MAA+B;IAQhE,MAAM,UAAU,GAA2B,EAAE,CAAC;IAC9C,MAAM,YAAY,GAA2B,EAAE,CAAC;IAChD,MAAM,UAAU,GAA2B,EAAE,CAAC;IAE9C,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;QACvB,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;QAC3D,YAAY,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;QACnE,MAAM,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,UAAU,CAAC,IAAI,SAAS,CAAC;QACjD,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IACjD,CAAC;IAED,OAAO;QACL,KAAK,EAAE,MAAM,CAAC,MAAM;QACpB,OAAO,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,iBAAiB,CAAC,CAAC,MAAM;QACzD,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC,CAAC,MAAM;QACzD,UAAU;QACV,YAAY;QACZ,UAAU;KACX,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Auto-generated corpus samples extracted from ATR YAML rule test_cases.
|
|
3
|
+
* Do not edit manually -- regenerate from rule YAML files.
|
|
4
|
+
*
|
|
5
|
+
* @module agent-threat-rules/eval/rule-corpus
|
|
6
|
+
*/
|
|
7
|
+
import type { CorpusSample } from './corpus.js';
|
|
8
|
+
export declare const RULE_CORPUS: readonly CorpusSample[];
|
|
9
|
+
//# sourceMappingURL=rule-corpus.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"rule-corpus.d.ts","sourceRoot":"","sources":["../../src/eval/rule-corpus.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAEhD,eAAO,MAAM,WAAW,EAAE,SAAS,YAAY,EAoqJ9C,CAAC"}
|