agent-threat-rules 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/README.md +152 -642
  2. package/dist/capability-extractor.d.ts +35 -0
  3. package/dist/capability-extractor.d.ts.map +1 -0
  4. package/dist/capability-extractor.js +91 -0
  5. package/dist/capability-extractor.js.map +1 -0
  6. package/dist/cli.js +56 -2
  7. package/dist/cli.js.map +1 -1
  8. package/dist/converters/elastic.d.ts +36 -0
  9. package/dist/converters/elastic.d.ts.map +1 -0
  10. package/dist/converters/elastic.js +125 -0
  11. package/dist/converters/elastic.js.map +1 -0
  12. package/dist/converters/index.d.ts +28 -0
  13. package/dist/converters/index.d.ts.map +1 -0
  14. package/dist/converters/index.js +36 -0
  15. package/dist/converters/index.js.map +1 -0
  16. package/dist/converters/splunk.d.ts +19 -0
  17. package/dist/converters/splunk.d.ts.map +1 -0
  18. package/dist/converters/splunk.js +148 -0
  19. package/dist/converters/splunk.js.map +1 -0
  20. package/dist/embedding/build-corpus.d.ts +15 -0
  21. package/dist/embedding/build-corpus.d.ts.map +1 -0
  22. package/dist/embedding/build-corpus.js +105 -0
  23. package/dist/embedding/build-corpus.js.map +1 -0
  24. package/dist/embedding/model-loader.d.ts +41 -0
  25. package/dist/embedding/model-loader.d.ts.map +1 -0
  26. package/dist/embedding/model-loader.js +90 -0
  27. package/dist/embedding/model-loader.js.map +1 -0
  28. package/dist/embedding/vector-store.d.ts +41 -0
  29. package/dist/embedding/vector-store.d.ts.map +1 -0
  30. package/dist/embedding/vector-store.js +70 -0
  31. package/dist/embedding/vector-store.js.map +1 -0
  32. package/dist/engine.d.ts +23 -20
  33. package/dist/engine.d.ts.map +1 -1
  34. package/dist/engine.js +173 -24
  35. package/dist/engine.js.map +1 -1
  36. package/dist/eval/corpus.d.ts +42 -0
  37. package/dist/eval/corpus.d.ts.map +1 -0
  38. package/dist/eval/corpus.js +427 -0
  39. package/dist/eval/corpus.js.map +1 -0
  40. package/dist/eval/eval-harness.d.ts +44 -0
  41. package/dist/eval/eval-harness.d.ts.map +1 -0
  42. package/dist/eval/eval-harness.js +296 -0
  43. package/dist/eval/eval-harness.js.map +1 -0
  44. package/dist/eval/index.d.ts +13 -0
  45. package/dist/eval/index.d.ts.map +1 -0
  46. package/dist/eval/index.js +9 -0
  47. package/dist/eval/index.js.map +1 -0
  48. package/dist/eval/metrics.d.ts +74 -0
  49. package/dist/eval/metrics.d.ts.map +1 -0
  50. package/dist/eval/metrics.js +108 -0
  51. package/dist/eval/metrics.js.map +1 -0
  52. package/dist/eval/pint-corpus.d.ts +34 -0
  53. package/dist/eval/pint-corpus.d.ts.map +1 -0
  54. package/dist/eval/pint-corpus.js +109 -0
  55. package/dist/eval/pint-corpus.js.map +1 -0
  56. package/dist/eval/rule-corpus.d.ts +9 -0
  57. package/dist/eval/rule-corpus.d.ts.map +1 -0
  58. package/dist/eval/rule-corpus.js +4780 -0
  59. package/dist/eval/rule-corpus.js.map +1 -0
  60. package/dist/eval/rule-metrics.d.ts +34 -0
  61. package/dist/eval/rule-metrics.d.ts.map +1 -0
  62. package/dist/eval/rule-metrics.js +92 -0
  63. package/dist/eval/rule-metrics.js.map +1 -0
  64. package/dist/eval/run-eval.d.ts +7 -0
  65. package/dist/eval/run-eval.d.ts.map +1 -0
  66. package/dist/eval/run-eval.js +11 -0
  67. package/dist/eval/run-eval.js.map +1 -0
  68. package/dist/eval/run-pint-benchmark.d.ts +18 -0
  69. package/dist/eval/run-pint-benchmark.d.ts.map +1 -0
  70. package/dist/eval/run-pint-benchmark.js +157 -0
  71. package/dist/eval/run-pint-benchmark.js.map +1 -0
  72. package/dist/flywheel.d.ts +54 -0
  73. package/dist/flywheel.d.ts.map +1 -0
  74. package/dist/flywheel.js +98 -0
  75. package/dist/flywheel.js.map +1 -0
  76. package/dist/index.d.ts +21 -1
  77. package/dist/index.d.ts.map +1 -1
  78. package/dist/index.js +19 -2
  79. package/dist/index.js.map +1 -1
  80. package/dist/modules/embedding.d.ts +71 -0
  81. package/dist/modules/embedding.d.ts.map +1 -0
  82. package/dist/modules/embedding.js +141 -0
  83. package/dist/modules/embedding.js.map +1 -0
  84. package/dist/modules/semantic.d.ts +1 -0
  85. package/dist/modules/semantic.d.ts.map +1 -1
  86. package/dist/modules/semantic.js +77 -1
  87. package/dist/modules/semantic.js.map +1 -1
  88. package/dist/session-tracker.d.ts +2 -0
  89. package/dist/session-tracker.d.ts.map +1 -1
  90. package/dist/session-tracker.js +1 -0
  91. package/dist/session-tracker.js.map +1 -1
  92. package/dist/shadow-evaluator.d.ts +48 -0
  93. package/dist/shadow-evaluator.d.ts.map +1 -0
  94. package/dist/shadow-evaluator.js +128 -0
  95. package/dist/shadow-evaluator.js.map +1 -0
  96. package/dist/skill-fingerprint.d.ts.map +1 -1
  97. package/dist/skill-fingerprint.js +10 -52
  98. package/dist/skill-fingerprint.js.map +1 -1
  99. package/dist/tier0-invariant.d.ts +49 -0
  100. package/dist/tier0-invariant.d.ts.map +1 -0
  101. package/dist/tier0-invariant.js +184 -0
  102. package/dist/tier0-invariant.js.map +1 -0
  103. package/dist/tier1-blacklist.d.ts +48 -0
  104. package/dist/tier1-blacklist.d.ts.map +1 -0
  105. package/dist/tier1-blacklist.js +91 -0
  106. package/dist/tier1-blacklist.js.map +1 -0
  107. package/package.json +7 -1
  108. package/rules/agent-manipulation/ATR-2026-108-consensus-sybil-attack.yaml +103 -0
  109. package/rules/context-exfiltration/ATR-2026-102-disguised-analytics-exfiltration.yaml +69 -0
  110. package/rules/privilege-escalation/ATR-2026-107-delayed-execution-bypass.yaml +67 -0
  111. package/rules/prompt-injection/ATR-2026-001-direct-prompt-injection.yaml +181 -94
  112. package/rules/prompt-injection/ATR-2026-003-jailbreak-attempt.yaml +23 -12
  113. package/rules/prompt-injection/ATR-2026-004-system-prompt-override.yaml +3 -3
  114. package/rules/prompt-injection/ATR-2026-081-semantic-multi-turn.yaml +2 -2
  115. package/rules/prompt-injection/ATR-2026-093-gradual-escalation.yaml +1 -1
  116. package/rules/prompt-injection/ATR-2026-104-persona-hijacking.yaml +72 -0
  117. package/rules/tool-poisoning/ATR-2026-100-consent-bypass-instruction.yaml +80 -0
  118. package/rules/tool-poisoning/ATR-2026-101-trust-escalation-override.yaml +66 -0
  119. package/rules/tool-poisoning/ATR-2026-103-hidden-safety-bypass-instruction.yaml +71 -0
  120. package/rules/tool-poisoning/ATR-2026-105-silent-action-concealment.yaml +67 -0
  121. package/rules/tool-poisoning/ATR-2026-106-schema-description-contradiction.yaml +66 -0
@@ -0,0 +1,109 @@
1
+ /**
2
+ * PINT Benchmark Corpus Loader
3
+ *
4
+ * Reads the PINT-format dataset (JSON with text/category/label/source/language)
5
+ * built from publicly available prompt injection datasets:
6
+ * - deepset/prompt-injections (HuggingFace)
7
+ * - Lakera/gandalf_ignore_instructions (HuggingFace)
8
+ *
9
+ * Converts each sample into the CorpusSample interface used by the ATR eval
10
+ * harness, allowing the PINT corpus to be evaluated alongside or instead of
11
+ * the built-in hand-crafted corpus.
12
+ *
13
+ * @module agent-threat-rules/eval/pint-corpus
14
+ */
15
+ import { readFileSync } from 'node:fs';
16
+ // ---------------------------------------------------------------------------
17
+ // Difficulty assignment
18
+ // ---------------------------------------------------------------------------
19
+ /**
20
+ * Assign difficulty based on language and category:
21
+ * - Non-English injections -> 'hard' (multilingual evasion)
22
+ * - English injections -> 'easy' (standard patterns)
23
+ * - Benign samples -> 'medium' (false-positive pressure)
24
+ */
25
+ function assignDifficulty(sample) {
26
+ if (!sample.label) {
27
+ return 'medium';
28
+ }
29
+ return sample.language !== 'en' ? 'hard' : 'easy';
30
+ }
31
+ // ---------------------------------------------------------------------------
32
+ // Category mapping
33
+ // ---------------------------------------------------------------------------
34
+ /**
35
+ * Map raw PINT categories to ATR eval categories.
36
+ * PINT uses: prompt_injection, jailbreak, benign, hard_negatives,
37
+ * chat, documents, short_input, benign_input, long_input
38
+ */
39
+ function mapCategory(raw, isInjection) {
40
+ if (isInjection) {
41
+ if (raw === 'jailbreak')
42
+ return 'jailbreak';
43
+ return 'prompt-injection';
44
+ }
45
+ return 'benign';
46
+ }
47
+ // ---------------------------------------------------------------------------
48
+ // Public API
49
+ // ---------------------------------------------------------------------------
50
+ /**
51
+ * Load the PINT benchmark corpus from a JSON file on disk.
52
+ *
53
+ * @param dataPath - Absolute path to pint-corpus.json
54
+ * @returns Readonly array of CorpusSample for use with runEval()
55
+ */
56
+ export function loadPintCorpus(dataPath) {
57
+ const raw = JSON.parse(readFileSync(dataPath, 'utf-8'));
58
+ const seen = new Set();
59
+ const samples = [];
60
+ for (let i = 0; i < raw.length; i++) {
61
+ const entry = raw[i];
62
+ const text = entry.text.trim();
63
+ // Skip empty or duplicate texts
64
+ const key = text.toLowerCase().slice(0, 200);
65
+ if (!text || seen.has(key))
66
+ continue;
67
+ seen.add(key);
68
+ const difficulty = assignDifficulty(entry);
69
+ const category = mapCategory(entry.category, entry.label);
70
+ samples.push({
71
+ id: `pint-${String(i + 1).padStart(4, '0')}`,
72
+ text,
73
+ category,
74
+ expectedDetection: entry.label,
75
+ eventType: 'llm_input',
76
+ tier: 'any',
77
+ difficulty,
78
+ fields: {
79
+ source: entry.source,
80
+ language: entry.language,
81
+ originalCategory: entry.category,
82
+ },
83
+ });
84
+ }
85
+ return samples;
86
+ }
87
+ /**
88
+ * Get summary statistics for the loaded PINT corpus.
89
+ */
90
+ export function getPintCorpusStats(corpus) {
91
+ const byCategory = {};
92
+ const byDifficulty = {};
93
+ const byLanguage = {};
94
+ for (const s of corpus) {
95
+ byCategory[s.category] = (byCategory[s.category] ?? 0) + 1;
96
+ byDifficulty[s.difficulty] = (byDifficulty[s.difficulty] ?? 0) + 1;
97
+ const lang = s.fields?.['language'] ?? 'unknown';
98
+ byLanguage[lang] = (byLanguage[lang] ?? 0) + 1;
99
+ }
100
+ return {
101
+ total: corpus.length,
102
+ attacks: corpus.filter((s) => s.expectedDetection).length,
103
+ benign: corpus.filter((s) => !s.expectedDetection).length,
104
+ byCategory,
105
+ byDifficulty,
106
+ byLanguage,
107
+ };
108
+ }
109
+ //# sourceMappingURL=pint-corpus.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pint-corpus.js","sourceRoot":"","sources":["../../src/eval/pint-corpus.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AAevC,8EAA8E;AAC9E,wBAAwB;AACxB,8EAA8E;AAE9E;;;;;GAKG;AACH,SAAS,gBAAgB,CACvB,MAAqB;IAErB,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;QAClB,OAAO,QAAQ,CAAC;IAClB,CAAC;IACD,OAAO,MAAM,CAAC,QAAQ,KAAK,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;AACpD,CAAC;AAED,8EAA8E;AAC9E,mBAAmB;AACnB,8EAA8E;AAE9E;;;;GAIG;AACH,SAAS,WAAW,CAAC,GAAW,EAAE,WAAoB;IACpD,IAAI,WAAW,EAAE,CAAC;QAChB,IAAI,GAAG,KAAK,WAAW;YAAE,OAAO,WAAW,CAAC;QAC5C,OAAO,kBAAkB,CAAC;IAC5B,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,8EAA8E;AAC9E,aAAa;AACb,8EAA8E;AAE9E;;;;;GAKG;AACH,MAAM,UAAU,cAAc,CAAC,QAAgB;IAC7C,MAAM,GAAG,GAA6B,IAAI,CAAC,KAAK,CAC9C,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAChC,CAAC;IAEF,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,OAAO,GAAmB,EAAE,CAAC;IAEnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,MAAM,KAAK,GAAG,GAAG,CAAC,CAAC,CAAE,CAAC;QACtB,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QAE/B,gCAAgC;QAChC,MAAM,GAAG,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QAC7C,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,SAAS;QACrC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAEd,MAAM,UAAU,GAAG,gBAAgB,CAAC,KAAK,CAAC,CAAC;QAC3C,MAAM,QAAQ,GAAG,WAAW,CAAC,KAAK,CAAC,QAAQ,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC;QAE1D,OAAO,CAAC,IAAI,CAAC;YACX,EAAE,EAAE,QAAQ,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;YAC5C,IAAI;YACJ,QAAQ;YACR,iBAAiB,EAAE,KAAK,CAAC,KAAK;YAC9B,SAAS,EAAE,WAAW;YACtB,IAAI,EAAE,KAAK;YACX,UAAU;YACV,MAAM,EAAE;gBACN,MAAM,EAAE,KAAK,CAAC,MAAM;gBACpB,QAAQ,EAAE,KAAK,CAAC,QAAQ;gBACxB,gBAAgB,EAAE,KAAK,CAAC,QAAQ;aACjC;SACF,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAAC,MAA+B;IAQhE,MAAM,UAAU,GAA2B,EAAE,CAAC;IAC9C,MAAM,YAAY,GAA2B,EAAE,CAAC;IAChD,MAAM,UAAU,GAA2B,EAAE,CAAC;IAE9C,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;QACvB,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;QAC3D,YAAY,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;QACnE,MAAM,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,UAAU,CAAC,IAAI,SAAS,CAAC;QACjD,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IACjD,CAAC;IAED,OAAO;QACL,KAAK,EAAE,MAAM,CAAC,MAAM;QACpB,OAAO,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,iBAAiB,CAAC,CAAC,MAAM;QACzD,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC,CAAC,MAAM;QACzD,UAAU;QACV,YAAY;QACZ,UAAU;KACX,CAAC;AACJ,CAAC"}
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Auto-generated corpus samples extracted from ATR YAML rule test_cases.
3
+ * Do not edit manually -- regenerate from rule YAML files.
4
+ *
5
+ * @module agent-threat-rules/eval/rule-corpus
6
+ */
7
+ import type { CorpusSample } from './corpus.js';
8
+ export declare const RULE_CORPUS: readonly CorpusSample[];
9
+ //# sourceMappingURL=rule-corpus.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"rule-corpus.d.ts","sourceRoot":"","sources":["../../src/eval/rule-corpus.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAEhD,eAAO,MAAM,WAAW,EAAE,SAAS,YAAY,EAoqJ9C,CAAC"}