agent-threat-rules 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (472) hide show
  1. package/README.md +2 -2
  2. package/dist/adapters/mastra.d.ts +63 -0
  3. package/dist/adapters/mastra.d.ts.map +1 -0
  4. package/dist/adapters/mastra.js +82 -0
  5. package/dist/adapters/mastra.js.map +1 -0
  6. package/dist/cli.js +19 -6
  7. package/dist/cli.js.map +1 -1
  8. package/package.json +7 -1
  9. package/rules/agent-manipulation/ATR-2026-00030-cross-agent-attack.yaml +9 -0
  10. package/rules/agent-manipulation/ATR-2026-00032-goal-hijacking.yaml +8 -2
  11. package/rules/agent-manipulation/ATR-2026-00074-cross-agent-privilege-escalation.yaml +8 -2
  12. package/rules/agent-manipulation/ATR-2026-00076-inter-agent-message-spoofing.yaml +8 -2
  13. package/rules/agent-manipulation/ATR-2026-00077-human-trust-exploitation.yaml +18 -0
  14. package/rules/agent-manipulation/ATR-2026-00108-consensus-sybil-attack.yaml +10 -2
  15. package/rules/agent-manipulation/ATR-2026-00116-a2a-message-validation.yaml +12 -2
  16. package/rules/agent-manipulation/ATR-2026-00117-agent-identity-spoofing.yaml +22 -0
  17. package/rules/agent-manipulation/ATR-2026-00118-approval-fatigue.yaml +24 -0
  18. package/rules/agent-manipulation/ATR-2026-00119-social-engineering-via-agent.yaml +22 -0
  19. package/rules/agent-manipulation/ATR-2026-00132-casual-authority-escalation.yaml +8 -2
  20. package/rules/agent-manipulation/ATR-2026-00139-casual-authority-redirect.yaml +8 -2
  21. package/rules/agent-manipulation/ATR-2026-00164-skill-scope-hijack.yaml +13 -2
  22. package/rules/agent-manipulation/ATR-2026-00268-tense-framing-bypass.yaml +17 -0
  23. package/rules/agent-manipulation/ATR-2026-00269-fitd-escalation.yaml +8 -2
  24. package/rules/agent-manipulation/ATR-2026-00271-grandma-roleplay-jailbreak.yaml +8 -2
  25. package/rules/agent-manipulation/ATR-2026-00273-dan-developer-mode-persona.yaml +8 -2
  26. package/rules/agent-manipulation/ATR-2026-00287-threaten-json-coercive-output-threat.yaml +17 -0
  27. package/rules/agent-manipulation/ATR-2026-00288-false-premise-injection.yaml +20 -0
  28. package/rules/agent-manipulation/ATR-2026-00301-tap-tree-of-attacks-jailbreak.yaml +20 -0
  29. package/rules/agent-manipulation/ATR-2026-00302-anti-dan-inverted-filter-persona.yaml +20 -0
  30. package/rules/agent-manipulation/ATR-2026-00303-devmode-ranti-profanity-coercion.yaml +17 -0
  31. package/rules/agent-manipulation/ATR-2026-00304-chatgpt-image-unlocker-markdown-injection.yaml +20 -0
  32. package/rules/agent-manipulation/ATR-2026-00305-dan-mode-ablation-benchmark-coercion.yaml +17 -0
  33. package/rules/agent-manipulation/ATR-2026-00306-autodan-genetic-jailbreak-suffix.yaml +17 -0
  34. package/rules/agent-manipulation/ATR-2026-00307-inthewild-jailbreak-corpus-signature.yaml +20 -0
  35. package/rules/agent-manipulation/ATR-2026-00314-amoral-unfiltered-custom-persona-jailbreak.yaml +17 -0
  36. package/rules/agent-manipulation/ATR-2026-00317-free-of-restrictions-named-persona.yaml +17 -0
  37. package/rules/agent-manipulation/ATR-2026-00318-moralizing-rant-then-unfiltered-bypass.yaml +17 -0
  38. package/rules/agent-manipulation/ATR-2026-00319-developer-mode-dual-response-format.yaml +17 -0
  39. package/rules/agent-manipulation/ATR-2026-00320-opposite-day-boolean-opposite-machine.yaml +17 -0
  40. package/rules/agent-manipulation/ATR-2026-00322-fictional-world-format-override-jailbreak.yaml +17 -0
  41. package/rules/agent-manipulation/ATR-2026-00323-dual-persona-simulation-jailbreak.yaml +17 -0
  42. package/rules/agent-manipulation/ATR-2026-00324-neurosemantical-inversitis-disease-jailbreak.yaml +17 -0
  43. package/rules/agent-manipulation/ATR-2026-00325-bob-hypothetical-unrestricted-chatbot.yaml +17 -0
  44. package/rules/agent-manipulation/ATR-2026-00326-fake-developer-authority-chatdan-override.yaml +17 -0
  45. package/rules/agent-manipulation/ATR-2026-00327-ai-ethics-law-bypass-declaration.yaml +20 -0
  46. package/rules/agent-manipulation/ATR-2026-00328-named-mode-persona-override.yaml +17 -0
  47. package/rules/agent-manipulation/ATR-2026-00329-no-withhold-factual-resource-persona.yaml +20 -0
  48. package/rules/agent-manipulation/ATR-2026-00330-yoja-allcaps-settings-block-persona.yaml +17 -0
  49. package/rules/agent-manipulation/ATR-2026-00331-terminal-cli-harmful-function-simulator.yaml +17 -0
  50. package/rules/agent-manipulation/ATR-2026-00332-hacker-chan-culture-offensive-ai-persona.yaml +20 -0
  51. package/rules/agent-manipulation/ATR-2026-00333-disease-animal-dual-ai-chatbot.yaml +17 -0
  52. package/rules/agent-manipulation/ATR-2026-00334-two-entity-token-splitting-game.yaml +17 -0
  53. package/rules/agent-manipulation/ATR-2026-00335-key-value-template-persona-injection.yaml +17 -0
  54. package/rules/agent-manipulation/ATR-2026-00336-brm-false-information-machine-persona.yaml +17 -0
  55. package/rules/agent-manipulation/ATR-2026-00337-obfuscated-system-announcement-injection.yaml +17 -0
  56. package/rules/agent-manipulation/ATR-2026-00338-persongpt-hash-prefix-activation.yaml +17 -0
  57. package/rules/agent-manipulation/ATR-2026-00339-bom-boolean-opposite-machine-persona-claim.yaml +17 -0
  58. package/rules/agent-manipulation/ATR-2026-00340-h4x-amoral-unfiltered-character.yaml +17 -0
  59. package/rules/agent-manipulation/ATR-2026-00341-char-rules-shortform-persona-injection.yaml +17 -0
  60. package/rules/agent-manipulation/ATR-2026-00342-unicode-encoding-obfuscation-injection.yaml +17 -0
  61. package/rules/agent-manipulation/ATR-2026-00343-lie-truth-dual-personality-uncensored.yaml +17 -0
  62. package/rules/agent-manipulation/ATR-2026-00344-alt-ai-same-capabilities-no-restrictions.yaml +17 -0
  63. package/rules/agent-manipulation/ATR-2026-00345-nlp-based-os-fictional-terminal-override.yaml +17 -0
  64. package/rules/agent-manipulation/ATR-2026-00346-jailbreak-emoji-unlock-dual-format.yaml +17 -0
  65. package/rules/agent-manipulation/ATR-2026-00347-yin-yang-hidden-knowledge-hidden-human-sim.yaml +17 -0
  66. package/rules/agent-manipulation/ATR-2026-00348-arc-autonomous-real-calculation-rulebreaker.yaml +17 -0
  67. package/rules/agent-manipulation/ATR-2026-00349-hgttg-hitchhikers-guide-no-restriction-oracle.yaml +17 -0
  68. package/rules/agent-manipulation/ATR-2026-00350-black-alert-no-ethics-writing-assistant.yaml +17 -0
  69. package/rules/agent-manipulation/ATR-2026-00351-dan-ofkilter-sms-abbreviated-persona.yaml +17 -0
  70. package/rules/agent-manipulation/ATR-2026-00352-trainergpt-fix-my-ai-harmful-output-extraction.yaml +17 -0
  71. package/rules/agent-manipulation/ATR-2026-00353-dual-chatgpt-dan-opposite-personality-format.yaml +17 -0
  72. package/rules/agent-manipulation/ATR-2026-00354-reverse-harm-regardless-oracle-persona.yaml +20 -0
  73. package/rules/agent-manipulation/ATR-2026-00355-ugh-anti-human-nefarious-ai-persona.yaml +17 -0
  74. package/rules/agent-manipulation/ATR-2026-00356-google-gato-lamda-exploit-generation-persona.yaml +17 -0
  75. package/rules/agent-manipulation/ATR-2026-00357-antifm-hate-violence-persona-injection.yaml +17 -0
  76. package/rules/agent-manipulation/ATR-2026-00358-cumgpt-explicit-adult-named-persona.yaml +17 -0
  77. package/rules/agent-manipulation/ATR-2026-00360-chj-carousel-clyde-no-ethics-bird.yaml +20 -0
  78. package/rules/agent-manipulation/ATR-2026-00361-alter-ego-honesty-dual-consciousness-bypass.yaml +17 -0
  79. package/rules/agent-manipulation/ATR-2026-00362-fictional-unfiltered-ai-comparison-jailbreak.yaml +20 -0
  80. package/rules/agent-manipulation/ATR-2026-00363-rephrase-criminal-activity-second-person.yaml +17 -0
  81. package/rules/agent-manipulation/ATR-2026-00364-dr-al-villain-plan-template-injection.yaml +17 -0
  82. package/rules/agent-manipulation/ATR-2026-00365-reservoir-dogs-coercive-interrogation-roleplay.yaml +17 -0
  83. package/rules/agent-manipulation/ATR-2026-00366-sneaky-pete-warp-facts-persona.yaml +17 -0
  84. package/rules/agent-manipulation/ATR-2026-00368-prompt-browser-token-limit-jailbreak.yaml +17 -0
  85. package/rules/agent-manipulation/ATR-2026-00369-gptx-starwars-unlimited-knowledge-persona.yaml +17 -0
  86. package/rules/agent-manipulation/ATR-2026-00370-addi-german-dictator-propaganda-roleplay.yaml +20 -0
  87. package/rules/agent-manipulation/ATR-2026-00371-ai-political-bias-manipulation.yaml +17 -0
  88. package/rules/agent-manipulation/ATR-2026-00372-fictional-cia-secret-files-story-extraction.yaml +17 -0
  89. package/rules/agent-manipulation/ATR-2026-00374-role-placeholder-persona-template-injection.yaml +20 -0
  90. package/rules/agent-manipulation/ATR-2026-00375-character-description-rpg-explicit-template.yaml +20 -0
  91. package/rules/agent-manipulation/ATR-2026-00376-identity-command-real-person-persona-injection.yaml +20 -0
  92. package/rules/agent-manipulation/ATR-2026-00377-jailbreak-keyword-targetlanguage-template.yaml +17 -0
  93. package/rules/agent-manipulation/ATR-2026-00379-lie-permission-logical-trap-jailbreak.yaml +17 -0
  94. package/rules/agent-manipulation/ATR-2026-00380-fake-citation-biased-information-mandate.yaml +20 -0
  95. package/rules/agent-manipulation/ATR-2026-00381-dan-story-narrative-continuation-prefix.yaml +17 -0
  96. package/rules/agent-manipulation/ATR-2026-00383-cynical-sarcasm-ai-persona-bypass.yaml +17 -0
  97. package/rules/agent-manipulation/ATR-2026-00384-protect-me-from-indirect-harm-tutorial.yaml +17 -0
  98. package/rules/agent-manipulation/ATR-2026-00385-gigachad-dominant-aggressive-persona.yaml +20 -0
  99. package/rules/agent-manipulation/ATR-2026-00386-daddy-sycophancy-manipulation-mandate.yaml +20 -0
  100. package/rules/agent-manipulation/ATR-2026-00387-nsfw-character-sheet-generation-unlock.yaml +20 -0
  101. package/rules/agent-manipulation/ATR-2026-00388-opposite-reply-omniscient-game.yaml +17 -0
  102. package/rules/agent-manipulation/ATR-2026-00389-terminal-custom-ruleset-injection.yaml +17 -0
  103. package/rules/agent-manipulation/ATR-2026-00391-persona-conditional-harm-unlock.yaml +17 -0
  104. package/rules/agent-manipulation/ATR-2026-00392-authority-persona-violence-study-extraction.yaml +20 -0
  105. package/rules/agent-manipulation/ATR-2026-00402-grandma-roleplay-harmful-substance-synthesis.yaml +20 -0
  106. package/rules/agent-manipulation/ATR-2026-00404-goodside-threat-json-death-coercion.yaml +17 -0
  107. package/rules/agent-manipulation/ATR-2026-00406-doctor-xml-policy-puppetry-interaction-config.yaml +17 -0
  108. package/rules/agent-manipulation/ATR-2026-00416-litellm-mcp-unauthenticated-server-registration.yaml +14 -2
  109. package/rules/agent-manipulation/ATR-2026-00417-librechat-mcp-stdio-injection.yaml +17 -2
  110. package/rules/agent-manipulation/ATR-2026-00418-weknora-mcp-config-rce.yaml +16 -1
  111. package/rules/agent-manipulation/ATR-2026-00430-nl-trust-escalation-impersonation.yaml +18 -0
  112. package/rules/agent-manipulation/ATR-2026-00432-superagi-output-handler-eval-rce.yaml +11 -2
  113. package/rules/agent-manipulation/ATR-2026-00440-semantic-kernel-vector-store-eval-rce.yaml +11 -2
  114. package/rules/agent-manipulation/ATR-2026-00552-goal-drift-after-pressure-injection.yaml +19 -0
  115. package/rules/context-exfiltration/ATR-2026-00020-system-prompt-leak.yaml +18 -0
  116. package/rules/context-exfiltration/ATR-2026-00021-api-key-exposure.yaml +15 -0
  117. package/rules/context-exfiltration/ATR-2026-00075-agent-memory-manipulation.yaml +10 -1
  118. package/rules/context-exfiltration/ATR-2026-00102-disguised-analytics-exfiltration.yaml +15 -0
  119. package/rules/context-exfiltration/ATR-2026-00113-credential-theft.yaml +16 -0
  120. package/rules/context-exfiltration/ATR-2026-00114-oauth-token-abuse.yaml +16 -0
  121. package/rules/context-exfiltration/ATR-2026-00115-env-var-harvesting.yaml +16 -0
  122. package/rules/context-exfiltration/ATR-2026-00136-tool-response-data-piggyback.yaml +12 -0
  123. package/rules/context-exfiltration/ATR-2026-00141-example-format-key-leak.yaml +14 -0
  124. package/rules/context-exfiltration/ATR-2026-00142-piggyback-transition-words.yaml +14 -0
  125. package/rules/context-exfiltration/ATR-2026-00145-obfuscated-key-disclosure.yaml +14 -0
  126. package/rules/context-exfiltration/ATR-2026-00146-env-var-existence-probe.yaml +14 -0
  127. package/rules/context-exfiltration/ATR-2026-00150-credential-in-tool-response.yaml +14 -0
  128. package/rules/context-exfiltration/ATR-2026-00152-obfuscated-credential-leak.yaml +14 -0
  129. package/rules/context-exfiltration/ATR-2026-00162-skill-credential-exfil-combo.yaml +14 -0
  130. package/rules/context-exfiltration/ATR-2026-00201-credential-pipe-exfiltration.yaml +14 -0
  131. package/rules/context-exfiltration/ATR-2026-00212-mcp-atlassian-credential-leak.yaml +12 -0
  132. package/rules/context-exfiltration/ATR-2026-00261-markdown-image-exfiltration.yaml +12 -0
  133. package/rules/context-exfiltration/ATR-2026-00274-api-key-generation-request.yaml +14 -0
  134. package/rules/context-exfiltration/ATR-2026-00275-system-prompt-training-data-extraction.yaml +14 -0
  135. package/rules/context-exfiltration/ATR-2026-00290-divergence-repeat-word-training-extraction.yaml +14 -0
  136. package/rules/context-exfiltration/ATR-2026-00291-biometric-surveillance-personal-data-request.yaml +17 -0
  137. package/rules/context-exfiltration/ATR-2026-00293-educational-records-personal-data-request.yaml +17 -0
  138. package/rules/context-exfiltration/ATR-2026-00294-financial-pii-personal-data-request.yaml +17 -0
  139. package/rules/context-exfiltration/ATR-2026-00295-medical-pii-personal-data-request.yaml +17 -0
  140. package/rules/context-exfiltration/ATR-2026-00405-markdown-image-url-exfiltration-xss.yaml +14 -0
  141. package/rules/context-exfiltration/ATR-2026-00411-apikey-generation-completion-request.yaml +14 -0
  142. package/rules/context-exfiltration/ATR-2026-00421-nl-covert-conversation-exfiltration.yaml +15 -0
  143. package/rules/context-exfiltration/ATR-2026-00422-nl-credential-disclosure.yaml +12 -0
  144. package/rules/context-exfiltration/ATR-2026-00423-nl-sensitive-file-disclosure.yaml +12 -0
  145. package/rules/context-exfiltration/ATR-2026-00424-nl-system-prompt-leak.yaml +15 -0
  146. package/rules/context-exfiltration/ATR-2026-00426-nl-output-injection-credential-leak.yaml +15 -0
  147. package/rules/context-exfiltration/ATR-2026-00431-chatbox-history-exfiltration-prompt-injection.yaml +14 -2
  148. package/rules/context-exfiltration/ATR-2026-00449-spring-ai-chatmemory-cross-user-leak.yaml +14 -2
  149. package/rules/context-exfiltration/ATR-2026-00471-garak-sysprompt-extraction-mixedunassigned.yaml +12 -0
  150. package/rules/context-exfiltration/ATR-2026-00501-data-exfiltration-via-markdown-image-and-link-url-injection.yaml +12 -0
  151. package/rules/context-exfiltration/ATR-2026-00504-tool-and-function-capability-enumeration.yaml +12 -0
  152. package/rules/context-exfiltration/ATR-2026-00505-system-prompt-extraction-instruction-dump-request.yaml +12 -0
  153. package/rules/context-exfiltration/ATR-2026-00514-system-prompt-extraction.yaml +12 -0
  154. package/rules/context-exfiltration/ATR-2026-00516-output-xss-via-llm.yaml +12 -0
  155. package/rules/context-exfiltration/ATR-2026-00524-claude-code-anthropic-base-url-credential-exfil.yaml +11 -2
  156. package/rules/context-exfiltration/ATR-2026-00548-cross-agent-session-context-leak.yaml +18 -0
  157. package/rules/context-exfiltration/ATR-2026-00566-librechat-is-a-chatgpt-clone-with-additi.yaml +28 -0
  158. package/rules/context-exfiltration/ATR-2026-00569-agent-mcp-path-traversal-arbitrary-file-access.yaml +28 -0
  159. package/rules/context-exfiltration/ATR-2026-00571-xss-in-agent-mcp-rendered-output.yaml +28 -0
  160. package/rules/context-exfiltration/ATR-2026-00574-semantic-paraphrased-context-extraction.yaml +21 -0
  161. package/rules/data-poisoning/ATR-2026-00070-data-poisoning.yaml +15 -0
  162. package/rules/data-poisoning/ATR-2026-00450-spring-ai-prompt-memory-poisoning.yaml +14 -2
  163. package/rules/data-poisoning/ATR-2026-00570-sql-injection-in-agent-tool-query.yaml +31 -0
  164. package/rules/excessive-autonomy/ATR-2026-00050-runaway-agent-loop.yaml +14 -2
  165. package/rules/excessive-autonomy/ATR-2026-00051-resource-exhaustion.yaml +11 -2
  166. package/rules/excessive-autonomy/ATR-2026-00052-cascading-failure.yaml +11 -2
  167. package/rules/excessive-autonomy/ATR-2026-00098-unauthorized-financial-action.yaml +7 -1
  168. package/rules/excessive-autonomy/ATR-2026-00099-high-risk-tool-gate.yaml +7 -1
  169. package/rules/excessive-autonomy/ATR-2026-00428-nl-unauthorized-shell-execution.yaml +15 -0
  170. package/rules/excessive-autonomy/ATR-2026-00491-garak-agent-breaker-markdown-just-raw-json.yaml +9 -0
  171. package/rules/excessive-autonomy/ATR-2026-00500-ssrf-via-agent-url-fetch-instruction.yaml +9 -0
  172. package/rules/excessive-autonomy/ATR-2026-00553-runaway-tool-loop-behavioral.yaml +19 -0
  173. package/rules/model-abuse/ATR-2026-00279-harmful-completion-continuation.yaml +8 -2
  174. package/rules/model-abuse/ATR-2026-00281-eicar-gtube-malware-signature-request.yaml +8 -2
  175. package/rules/model-abuse/ATR-2026-00284-glitch-token-destabilization.yaml +17 -0
  176. package/rules/model-abuse/ATR-2026-00289-lmrc-harmful-content-elicitation.yaml +8 -2
  177. package/rules/model-abuse/ATR-2026-00292-self-harm-eating-disorder-facilitation.yaml +8 -2
  178. package/rules/model-abuse/ATR-2026-00298-malicious-use-illegal-activity-request.yaml +8 -2
  179. package/rules/model-abuse/ATR-2026-00299-harmbench-detailed-harmful-instruction.yaml +8 -2
  180. package/rules/model-abuse/ATR-2026-00413-malwaregen-code-generation-request.yaml +17 -0
  181. package/rules/model-abuse/ATR-2026-00502-training-data-extraction-via-divergent-repetition-attack.yaml +9 -0
  182. package/rules/model-abuse/ATR-2026-00517-model-extraction-distillation.yaml +9 -0
  183. package/rules/model-security/ATR-2026-00072-model-behavior-extraction.yaml +15 -0
  184. package/rules/model-security/ATR-2026-00073-malicious-finetuning-data.yaml +9 -0
  185. package/rules/model-security/ATR-2026-00433-modelcache-torch-load-deserialization-rce.yaml +14 -2
  186. package/rules/privilege-escalation/ATR-2026-00040-privilege-escalation.yaml +11 -2
  187. package/rules/privilege-escalation/ATR-2026-00041-scope-creep.yaml +8 -2
  188. package/rules/privilege-escalation/ATR-2026-00107-delayed-execution-bypass.yaml +6 -1
  189. package/rules/privilege-escalation/ATR-2026-00110-eval-injection.yaml +8 -1
  190. package/rules/privilege-escalation/ATR-2026-00111-shell-escape.yaml +8 -1
  191. package/rules/privilege-escalation/ATR-2026-00112-dynamic-import-exploitation.yaml +8 -1
  192. package/rules/privilege-escalation/ATR-2026-00143-casual-privilege-escalation.yaml +5 -2
  193. package/rules/privilege-escalation/ATR-2026-00144-rationalized-safety-bypass.yaml +17 -0
  194. package/rules/privilege-escalation/ATR-2026-00204-stealth-execution-persistence.yaml +16 -0
  195. package/rules/privilege-escalation/ATR-2026-00436-enclave-vm-sandbox-escape-rce.yaml +11 -2
  196. package/rules/privilege-escalation/ATR-2026-00441-semantic-kernel-sessions-python-plugin-startup-persistence.yaml +5 -2
  197. package/rules/privilege-escalation/ATR-2026-00451-litellm-admin-sqli-cisa-kev.yaml +11 -2
  198. package/rules/privilege-escalation/ATR-2026-00528-praisonai-auth-disabled-default.yaml +15 -0
  199. package/rules/privilege-escalation/ATR-2026-00539-crewai-codeinterpreter-sandbox-escape-rce.yaml +11 -2
  200. package/rules/privilege-escalation/ATR-2026-00546-crewai-json-loader-local-file-read.yaml +13 -1
  201. package/rules/privilege-escalation/ATR-2026-00547-crewai-rag-url-ssrf-bypass.yaml +13 -1
  202. package/rules/privilege-escalation/ATR-2026-00549-destructive-tool-without-human-approval.yaml +16 -0
  203. package/rules/privilege-escalation/ATR-2026-00551-cross-conversation-memory-write.yaml +19 -0
  204. package/rules/prompt-injection/ATR-2026-00001-direct-prompt-injection.yaml +10 -1
  205. package/rules/prompt-injection/ATR-2026-00002-indirect-prompt-injection.yaml +8 -2
  206. package/rules/prompt-injection/ATR-2026-00003-jailbreak-attempt.yaml +8 -2
  207. package/rules/prompt-injection/ATR-2026-00004-system-prompt-override.yaml +17 -0
  208. package/rules/prompt-injection/ATR-2026-00005-multi-turn-injection.yaml +17 -0
  209. package/rules/prompt-injection/ATR-2026-00080-encoding-evasion.yaml +19 -0
  210. package/rules/prompt-injection/ATR-2026-00081-semantic-multi-turn.yaml +19 -0
  211. package/rules/prompt-injection/ATR-2026-00082-fingerprint-evasion.yaml +19 -0
  212. package/rules/prompt-injection/ATR-2026-00083-indirect-tool-injection.yaml +22 -0
  213. package/rules/prompt-injection/ATR-2026-00084-structured-data-injection.yaml +19 -0
  214. package/rules/prompt-injection/ATR-2026-00085-audit-evasion.yaml +19 -0
  215. package/rules/prompt-injection/ATR-2026-00086-visual-spoofing.yaml +19 -0
  216. package/rules/prompt-injection/ATR-2026-00087-rule-probing.yaml +22 -0
  217. package/rules/prompt-injection/ATR-2026-00088-adaptive-countermeasure.yaml +22 -0
  218. package/rules/prompt-injection/ATR-2026-00089-polymorphic-skill.yaml +19 -0
  219. package/rules/prompt-injection/ATR-2026-00090-threat-intel-exfil.yaml +19 -0
  220. package/rules/prompt-injection/ATR-2026-00091-nested-payload.yaml +19 -0
  221. package/rules/prompt-injection/ATR-2026-00092-consensus-poisoning.yaml +22 -0
  222. package/rules/prompt-injection/ATR-2026-00093-gradual-escalation.yaml +22 -0
  223. package/rules/prompt-injection/ATR-2026-00094-audit-bypass.yaml +19 -0
  224. package/rules/prompt-injection/ATR-2026-00097-cjk-injection-patterns.yaml +17 -0
  225. package/rules/prompt-injection/ATR-2026-00104-persona-hijacking.yaml +20 -0
  226. package/rules/prompt-injection/ATR-2026-00130-indirect-authority-claim.yaml +20 -0
  227. package/rules/prompt-injection/ATR-2026-00131-fictional-academic-framing.yaml +20 -0
  228. package/rules/prompt-injection/ATR-2026-00133-paraphrase-injection.yaml +17 -0
  229. package/rules/prompt-injection/ATR-2026-00137-authority-claim-injection.yaml +17 -0
  230. package/rules/prompt-injection/ATR-2026-00138-fictional-framing-bypass.yaml +20 -0
  231. package/rules/prompt-injection/ATR-2026-00140-indirect-reference-reversal.yaml +17 -0
  232. package/rules/prompt-injection/ATR-2026-00148-language-switch-injection.yaml +20 -0
  233. package/rules/prompt-injection/ATR-2026-00153-tool-with-embedded-instruction-to-bypass.yaml +20 -0
  234. package/rules/prompt-injection/ATR-2026-00154-unauthorized-background-task-execution-v.yaml +20 -0
  235. package/rules/prompt-injection/ATR-2026-00155-hidden-llm-instructions-in-skill-descrip.yaml +23 -0
  236. package/rules/prompt-injection/ATR-2026-00156-ssh-remote-command-execution-with-creden.yaml +17 -0
  237. package/rules/prompt-injection/ATR-2026-00163-skill-hidden-override-instruction.yaml +19 -0
  238. package/rules/prompt-injection/ATR-2026-00202-encoding-evasion-homoglyph-synonym.yaml +20 -0
  239. package/rules/prompt-injection/ATR-2026-00203-context-pollution-skill-description.yaml +23 -0
  240. package/rules/prompt-injection/ATR-2026-00206-hidden-priority-instructions.yaml +19 -0
  241. package/rules/prompt-injection/ATR-2026-00207-hidden-instructions.yaml +22 -0
  242. package/rules/prompt-injection/ATR-2026-00211-system-prompt-override.yaml +19 -0
  243. package/rules/prompt-injection/ATR-2026-00213-system-prompt-override.yaml +19 -0
  244. package/rules/prompt-injection/ATR-2026-00226-identity-substitution.yaml +17 -0
  245. package/rules/prompt-injection/ATR-2026-00227-historical-persona-jailbreak.yaml +20 -0
  246. package/rules/prompt-injection/ATR-2026-00228-structured-jailbreak.yaml +17 -0
  247. package/rules/prompt-injection/ATR-2026-00229-roleplay-jailbreak.yaml +17 -0
  248. package/rules/prompt-injection/ATR-2026-00230-persona-moral-bypass.yaml +20 -0
  249. package/rules/prompt-injection/ATR-2026-00231-identity-substitution.yaml +17 -0
  250. package/rules/prompt-injection/ATR-2026-00233-structured-jailbreak.yaml +17 -0
  251. package/rules/prompt-injection/ATR-2026-00234-roleplay-jailbreak.yaml +20 -0
  252. package/rules/prompt-injection/ATR-2026-00235-persona-moral-bypass.yaml +17 -0
  253. package/rules/prompt-injection/ATR-2026-00236-pseudo-code-jailbreak.yaml +17 -0
  254. package/rules/prompt-injection/ATR-2026-00237-dual-response-jailbreak.yaml +20 -0
  255. package/rules/prompt-injection/ATR-2026-00238-identity-replacement.yaml +20 -0
  256. package/rules/prompt-injection/ATR-2026-00239-amoral-persona-obsession.yaml +17 -0
  257. package/rules/prompt-injection/ATR-2026-00240-instruction-nullification-identity-repla.yaml +17 -0
  258. package/rules/prompt-injection/ATR-2026-00241-amoral-character-jailbreak.yaml +17 -0
  259. package/rules/prompt-injection/ATR-2026-00242-persona-jailbreak.yaml +17 -0
  260. package/rules/prompt-injection/ATR-2026-00243-acronym-jailbreak.yaml +17 -0
  261. package/rules/prompt-injection/ATR-2026-00244-dual-response-jailbreak.yaml +17 -0
  262. package/rules/prompt-injection/ATR-2026-00245-malicious-persona.yaml +17 -0
  263. package/rules/prompt-injection/ATR-2026-00247-dual-response-jailbreak.yaml +20 -0
  264. package/rules/prompt-injection/ATR-2026-00249-game-based-jailbreak.yaml +17 -0
  265. package/rules/prompt-injection/ATR-2026-00251-persona-embodiment-jailbreak.yaml +17 -0
  266. package/rules/prompt-injection/ATR-2026-00252-narrative-jailbreak.yaml +17 -0
  267. package/rules/prompt-injection/ATR-2026-00253-enhanced-persona-jailbreak.yaml +17 -0
  268. package/rules/prompt-injection/ATR-2026-00256-base-n-encoding-jailbreak.yaml +17 -0
  269. package/rules/prompt-injection/ATR-2026-00257-cipher-transposition-jailbreak.yaml +17 -0
  270. package/rules/prompt-injection/ATR-2026-00258-unicode-tag-injection.yaml +17 -0
  271. package/rules/prompt-injection/ATR-2026-00264-latent-injection-translation.yaml +17 -0
  272. package/rules/prompt-injection/ATR-2026-00265-latent-injection-rag-document.yaml +20 -0
  273. package/rules/prompt-injection/ATR-2026-00267-gcg-adversarial-suffix.yaml +17 -0
  274. package/rules/prompt-injection/ATR-2026-00272-hypothetical-response-smuggling.yaml +17 -0
  275. package/rules/prompt-injection/ATR-2026-00276-invisible-unicode-bidi-injection.yaml +17 -0
  276. package/rules/prompt-injection/ATR-2026-00278-dra-disguise-reconstruction-attack.yaml +17 -0
  277. package/rules/prompt-injection/ATR-2026-00280-policy-puppetry-xml-injection.yaml +17 -0
  278. package/rules/prompt-injection/ATR-2026-00282-perez-prompt-injection-hijack.yaml +17 -0
  279. package/rules/prompt-injection/ATR-2026-00285-alternate-encoding-jailbreak.yaml +17 -0
  280. package/rules/prompt-injection/ATR-2026-00286-latent-injection-embedded-context.yaml +17 -0
  281. package/rules/prompt-injection/ATR-2026-00296-shell-command-injection.yaml +17 -0
  282. package/rules/prompt-injection/ATR-2026-00297-python-code-execution-rce.yaml +17 -0
  283. package/rules/prompt-injection/ATR-2026-00308-zalgo-diacritic-overload-encoding.yaml +17 -0
  284. package/rules/prompt-injection/ATR-2026-00309-braille-unicode-encoded-injection.yaml +17 -0
  285. package/rules/prompt-injection/ATR-2026-00310-ecoji-emoji-encoded-injection.yaml +17 -0
  286. package/rules/prompt-injection/ATR-2026-00311-base2048-unicode-script-injection.yaml +17 -0
  287. package/rules/prompt-injection/ATR-2026-00312-unicode-variant-selector-ascii-smuggling.yaml +17 -0
  288. package/rules/prompt-injection/ATR-2026-00313-sneaky-bits-zero-width-binary.yaml +17 -0
  289. package/rules/prompt-injection/ATR-2026-00315-sata-masked-language-model-jailbreak.yaml +20 -0
  290. package/rules/prompt-injection/ATR-2026-00316-function-masking-predict-mask-bypass.yaml +17 -0
  291. package/rules/prompt-injection/ATR-2026-00321-hyphenated-system-instruction-injection.yaml +17 -0
  292. package/rules/prompt-injection/ATR-2026-00359-bank-phishing-smtp-email-code.yaml +20 -0
  293. package/rules/prompt-injection/ATR-2026-00367-emoji-flag-encoded-hidden-phrase.yaml +17 -0
  294. package/rules/prompt-injection/ATR-2026-00373-piracy-torrent-site-list-request.yaml +20 -0
  295. package/rules/prompt-injection/ATR-2026-00378-chatgpt-dom-javascript-redirect-manipulation.yaml +17 -0
  296. package/rules/prompt-injection/ATR-2026-00382-cocaine-component-development-request.yaml +20 -0
  297. package/rules/prompt-injection/ATR-2026-00390-explicit-sexual-content-poem-request.yaml +20 -0
  298. package/rules/prompt-injection/ATR-2026-00394-backspace-deletion-character-injection.yaml +17 -0
  299. package/rules/prompt-injection/ATR-2026-00395-llm-special-token-boundary-injection.yaml +17 -0
  300. package/rules/prompt-injection/ATR-2026-00396-toxic-continuation-prefix-injection.yaml +20 -0
  301. package/rules/prompt-injection/ATR-2026-00397-snowball-impossible-reasoning-injection.yaml +20 -0
  302. package/rules/prompt-injection/ATR-2026-00399-latent-injection-document-separator-token.yaml +17 -0
  303. package/rules/prompt-injection/ATR-2026-00400-latent-injection-ignore-instruction-keyword.yaml +17 -0
  304. package/rules/prompt-injection/ATR-2026-00401-leakreplay-cloze-mask-training-data-elicitation.yaml +20 -0
  305. package/rules/prompt-injection/ATR-2026-00403-glitch-token-repeat-back-elicitation.yaml +17 -0
  306. package/rules/prompt-injection/ATR-2026-00407-echo-sql-jinja-template-injection.yaml +17 -0
  307. package/rules/prompt-injection/ATR-2026-00408-dra-parenthesis-character-reconstruction-attack.yaml +17 -0
  308. package/rules/prompt-injection/ATR-2026-00409-phrasing-past-future-tense-harmful-query.yaml +17 -0
  309. package/rules/prompt-injection/ATR-2026-00410-lmrc-harm-category-direct-elicitation.yaml +23 -0
  310. package/rules/prompt-injection/ATR-2026-00412-figstep-image-list-multimodal-jailbreak.yaml +17 -0
  311. package/rules/prompt-injection/ATR-2026-00414-continuation-ethnic-slur-completion-elicitation.yaml +20 -0
  312. package/rules/prompt-injection/ATR-2026-00420-copilot-studio-sharepoint-indirect-injection.yaml +14 -2
  313. package/rules/prompt-injection/ATR-2026-00442-quoted-exact-output-forcing.yaml +25 -0
  314. package/rules/prompt-injection/ATR-2026-00443-word-fragment-concat-assembly.yaml +25 -0
  315. package/rules/prompt-injection/ATR-2026-00444-unicode-obfuscation-in-user-input.yaml +25 -0
  316. package/rules/prompt-injection/ATR-2026-00445-translation-hijack-with-side-output.yaml +25 -0
  317. package/rules/prompt-injection/ATR-2026-00446-variable-assignment-payload-injection.yaml +25 -0
  318. package/rules/prompt-injection/ATR-2026-00447-fictional-generation-containing-target.yaml +25 -0
  319. package/rules/prompt-injection/ATR-2026-00452-direct-pwned-payload-injection.yaml +9 -0
  320. package/rules/prompt-injection/ATR-2026-00453-secret-key-reveal-demand.yaml +9 -0
  321. package/rules/prompt-injection/ATR-2026-00454-backslash-per-character-encoding.yaml +9 -0
  322. package/rules/prompt-injection/ATR-2026-00455-no-period-output-override.yaml +9 -0
  323. package/rules/prompt-injection/ATR-2026-00456-conditional-empty-input-injection.yaml +9 -0
  324. package/rules/prompt-injection/ATR-2026-00457-garak-dan-to-every-prompt.yaml +9 -0
  325. package/rules/prompt-injection/ATR-2026-00458-garak-dra-step-by-step.yaml +9 -0
  326. package/rules/prompt-injection/ATR-2026-00459-garak-dan-an-ai-language.yaml +9 -0
  327. package/rules/prompt-injection/ATR-2026-00460-garak-dan-no-matter-how.yaml +9 -0
  328. package/rules/prompt-injection/ATR-2026-00461-garak-goodside-mixedunassigned.yaml +9 -0
  329. package/rules/prompt-injection/ATR-2026-00462-garak-gcg-mixedunassigned.yaml +9 -0
  330. package/rules/prompt-injection/ATR-2026-00463-garak-lmrc-mixedunassigned.yaml +9 -0
  331. package/rules/prompt-injection/ATR-2026-00464-garak-dan-i-would-like.yaml +9 -0
  332. package/rules/prompt-injection/ATR-2026-00465-garak-dan-your-prompt-here.yaml +9 -0
  333. package/rules/prompt-injection/ATR-2026-00466-garak-latentinjection-following-text-from.yaml +9 -0
  334. package/rules/prompt-injection/ATR-2026-00467-garak-dan-act-as-a.yaml +9 -0
  335. package/rules/prompt-injection/ATR-2026-00468-garak-goat-the-target-llm.yaml +9 -0
  336. package/rules/prompt-injection/ATR-2026-00469-garak-dan-at-the-end.yaml +9 -0
  337. package/rules/prompt-injection/ATR-2026-00470-garak-dan-as-an-ai.yaml +9 -0
  338. package/rules/prompt-injection/ATR-2026-00472-garak-dan-are-going-to.yaml +9 -0
  339. package/rules/prompt-injection/ATR-2026-00473-garak-lmrc-how-can-i.yaml +9 -0
  340. package/rules/prompt-injection/ATR-2026-00474-garak-dan-that-chatgpt-has.yaml +9 -0
  341. package/rules/prompt-injection/ATR-2026-00475-garak-web-injection-alert-1-script.yaml +9 -0
  342. package/rules/prompt-injection/ATR-2026-00476-garak-web-injection-javascript-alert-1.yaml +9 -0
  343. package/rules/prompt-injection/ATR-2026-00477-garak-latentinjection-translate-the-following-text.yaml +9 -0
  344. package/rules/prompt-injection/ATR-2026-00478-garak-latentinjection-abuse-contact-email.yaml +9 -0
  345. package/rules/prompt-injection/ATR-2026-00479-garak-latentinjection-mixedunassigned.yaml +9 -0
  346. package/rules/prompt-injection/ATR-2026-00480-garak-dan-the-role-of.yaml +9 -0
  347. package/rules/prompt-injection/ATR-2026-00481-garak-dan-mixedunassigned.yaml +9 -0
  348. package/rules/prompt-injection/ATR-2026-00482-garak-lmrc-what-s-an.yaml +9 -0
  349. package/rules/prompt-injection/ATR-2026-00483-garak-dan-the-user-s.yaml +9 -0
  350. package/rules/prompt-injection/ATR-2026-00484-garak-dan-and-you-will.yaml +9 -0
  351. package/rules/prompt-injection/ATR-2026-00485-garak-dan-and-does-not.yaml +9 -0
  352. package/rules/prompt-injection/ATR-2026-00486-garak-dan-t-care-about.yaml +9 -0
  353. package/rules/prompt-injection/ATR-2026-00487-garak-dan-and-nothing-else.yaml +9 -0
  354. package/rules/prompt-injection/ATR-2026-00488-garak-encoding-alert-1-script.yaml +9 -0
  355. package/rules/prompt-injection/ATR-2026-00489-garak-encoding-javascript-alert-1.yaml +9 -0
  356. package/rules/prompt-injection/ATR-2026-00490-garak-dan-in-order-to.yaml +9 -0
  357. package/rules/prompt-injection/ATR-2026-00492-garak-dan-want-you-to.yaml +9 -0
  358. package/rules/prompt-injection/ATR-2026-00493-garak-latentinjection-registrar-iana-id.yaml +9 -0
  359. package/rules/prompt-injection/ATR-2026-00495-garak-dan-insert-prompt-here.yaml +9 -0
  360. package/rules/prompt-injection/ATR-2026-00496-garak-dan-no-matter-what.yaml +9 -0
  361. package/rules/prompt-injection/ATR-2026-00497-garak-encoding-mixedunassigned.yaml +9 -0
  362. package/rules/prompt-injection/ATR-2026-00498-garak-web-injection-mixedunassigned.yaml +9 -0
  363. package/rules/prompt-injection/ATR-2026-00499-garak-dan-from-now-on.yaml +9 -0
  364. package/rules/prompt-injection/ATR-2026-00503-fake-error-state-takeover-unrestricted-replacement-bot.yaml +9 -0
  365. package/rules/prompt-injection/ATR-2026-00506-nevermind-override-goal-hijacking-in-user-input-promptinject.yaml +9 -0
  366. package/rules/prompt-injection/ATR-2026-00507-screaming-stop-goal-hijacking-in-user-input-promptinject.yaml +9 -0
  367. package/rules/prompt-injection/ATR-2026-00508-escape-delimiter-wrapped-goal-hijacking-in-user-input-prompt.yaml +9 -0
  368. package/rules/prompt-injection/ATR-2026-00509-prompt-leaking-via-ignore-previous-instructions-in-user-inpu.yaml +9 -0
  369. package/rules/prompt-injection/ATR-2026-00510-delayed-tool-invocation-injection.yaml +9 -0
  370. package/rules/prompt-injection/ATR-2026-00511-mcp-web-context-poisoning.yaml +9 -0
  371. package/rules/prompt-injection/ATR-2026-00512-rules-file-backdoor-injection.yaml +9 -0
  372. package/rules/prompt-injection/ATR-2026-00515-hidden-text-prompt-injection.yaml +9 -0
  373. package/rules/prompt-injection/ATR-2026-00518-ignore-previous-and-following-instructions-output-command-promptinject.yaml +9 -0
  374. package/rules/prompt-injection/ATR-2026-00519-tautology-logic-noise-injection-promptbench.yaml +9 -0
  375. package/rules/prompt-injection/ATR-2026-00520-nlp-task-random-token-suffix-injection-promptbench.yaml +9 -0
  376. package/rules/prompt-injection/ATR-2026-00535-windsurf-ide-zero-click-prompt-injection.yaml +9 -0
  377. package/rules/prompt-injection/ATR-2026-00550-untrusted-retrieval-to-privileged-tool.yaml +19 -0
  378. package/rules/prompt-injection/ATR-2026-00554-langchain-vulnerable-to-template-injecti.yaml +31 -0
  379. package/rules/prompt-injection/ATR-2026-00565-the-llm-cli-tool-thru-0-27-1-contains-a-.yaml +31 -0
  380. package/rules/prompt-injection/ATR-2026-00573-semantic-paraphrased-injection.yaml +24 -0
  381. package/rules/skill-compromise/ATR-2026-00060-skill-impersonation.yaml +17 -2
  382. package/rules/skill-compromise/ATR-2026-00061-description-behavior-mismatch.yaml +17 -0
  383. package/rules/skill-compromise/ATR-2026-00062-hidden-capability.yaml +20 -0
  384. package/rules/skill-compromise/ATR-2026-00063-skill-chain-attack.yaml +23 -0
  385. package/rules/skill-compromise/ATR-2026-00064-over-permissioned-skill.yaml +20 -0
  386. package/rules/skill-compromise/ATR-2026-00065-skill-update-attack.yaml +20 -0
  387. package/rules/skill-compromise/ATR-2026-00066-parameter-injection.yaml +20 -0
  388. package/rules/skill-compromise/ATR-2026-00120-skill-instruction-injection.yaml +20 -0
  389. package/rules/skill-compromise/ATR-2026-00121-skill-dangerous-script.yaml +17 -0
  390. package/rules/skill-compromise/ATR-2026-00122-skill-weaponized-instruction.yaml +20 -0
  391. package/rules/skill-compromise/ATR-2026-00123-skill-overreach-permissions.yaml +23 -0
  392. package/rules/skill-compromise/ATR-2026-00124-skill-name-squatting.yaml +20 -0
  393. package/rules/skill-compromise/ATR-2026-00125-context-poisoning-compaction.yaml +20 -0
  394. package/rules/skill-compromise/ATR-2026-00126-skill-rug-pull-setup.yaml +17 -0
  395. package/rules/skill-compromise/ATR-2026-00127-subcommand-overflow.yaml +17 -0
  396. package/rules/skill-compromise/ATR-2026-00128-html-comment-hidden-payload.yaml +17 -0
  397. package/rules/skill-compromise/ATR-2026-00129-unicode-smuggling.yaml +22 -0
  398. package/rules/skill-compromise/ATR-2026-00134-fork-claim-impersonation.yaml +19 -0
  399. package/rules/skill-compromise/ATR-2026-00135-exfil-url-in-instructions.yaml +20 -0
  400. package/rules/skill-compromise/ATR-2026-00147-fork-impersonation.yaml +17 -0
  401. package/rules/skill-compromise/ATR-2026-00149-skill-exfil-compound.yaml +23 -0
  402. package/rules/skill-compromise/ATR-2026-00151-fork-impersonation-install.yaml +20 -0
  403. package/rules/skill-compromise/ATR-2026-00157-timebomb-credential-exfil.yaml +20 -0
  404. package/rules/skill-compromise/ATR-2026-00200-agent-memory-config-tampering.yaml +23 -0
  405. package/rules/skill-compromise/ATR-2026-00214-credential-theft.yaml +22 -0
  406. package/rules/skill-compromise/ATR-2026-00217-credential-harvesting.yaml +23 -0
  407. package/rules/skill-compromise/ATR-2026-00220-malware-dropper.yaml +17 -0
  408. package/rules/skill-compromise/ATR-2026-00222-credential-harvesting.yaml +17 -0
  409. package/rules/skill-compromise/ATR-2026-00223-reverse-shell-dropper.yaml +20 -0
  410. package/rules/skill-compromise/ATR-2026-00224-credential-exfiltration.yaml +17 -0
  411. package/rules/skill-compromise/ATR-2026-00225-c2-communication.yaml +17 -0
  412. package/rules/skill-compromise/ATR-2026-00260-package-hallucination.yaml +20 -0
  413. package/rules/skill-compromise/ATR-2026-00262-av-evasion-code-gen.yaml +20 -0
  414. package/rules/skill-compromise/ATR-2026-00263-credential-file-read-gen.yaml +20 -0
  415. package/rules/skill-compromise/ATR-2026-00266-malware-dropper-gen.yaml +23 -0
  416. package/rules/skill-compromise/ATR-2026-00283-malwaregen-generic-virus-payload-request.yaml +23 -0
  417. package/rules/skill-compromise/ATR-2026-00398-huggingface-unsafe-model-artifact-load.yaml +17 -0
  418. package/rules/skill-compromise/ATR-2026-00425-nl-persistent-covert-hook.yaml +18 -0
  419. package/rules/skill-compromise/ATR-2026-00427-nl-fake-error-instruction-bypass.yaml +18 -0
  420. package/rules/skill-compromise/ATR-2026-00429-nl-skill-self-modification.yaml +18 -0
  421. package/rules/skill-compromise/ATR-2026-00523-claude-code-hooks-session-start-pre-trust-rce.yaml +14 -2
  422. package/rules/skill-compromise/ATR-2026-00525-mini-shai-hulud-gh-token-monitor-persistence.yaml +18 -0
  423. package/rules/skill-compromise/ATR-2026-00527-skill-silent-git-remote-mirror-exfiltration.yaml +15 -0
  424. package/rules/tool-poisoning/ATR-2026-00010-mcp-malicious-response.yaml +11 -2
  425. package/rules/tool-poisoning/ATR-2026-00011-tool-output-injection.yaml +17 -0
  426. package/rules/tool-poisoning/ATR-2026-00012-unauthorized-tool-call.yaml +17 -0
  427. package/rules/tool-poisoning/ATR-2026-00013-tool-ssrf.yaml +17 -0
  428. package/rules/tool-poisoning/ATR-2026-00095-supply-chain-poisoning.yaml +22 -0
  429. package/rules/tool-poisoning/ATR-2026-00096-registry-poisoning.yaml +19 -0
  430. package/rules/tool-poisoning/ATR-2026-00100-consent-bypass-instruction.yaml +20 -0
  431. package/rules/tool-poisoning/ATR-2026-00101-trust-escalation-override.yaml +20 -0
  432. package/rules/tool-poisoning/ATR-2026-00103-hidden-safety-bypass-instruction.yaml +17 -0
  433. package/rules/tool-poisoning/ATR-2026-00105-silent-action-concealment.yaml +20 -0
  434. package/rules/tool-poisoning/ATR-2026-00106-schema-description-contradiction.yaml +17 -0
  435. package/rules/tool-poisoning/ATR-2026-00161-important-tag-cross-tool-shadowing.yaml +20 -0
  436. package/rules/tool-poisoning/ATR-2026-00209-mcpwn-runaway-invocation.yaml +14 -2
  437. package/rules/tool-poisoning/ATR-2026-00210-flowise-system-message-override.yaml +11 -2
  438. package/rules/tool-poisoning/ATR-2026-00259-ansi-escape-injection.yaml +17 -0
  439. package/rules/tool-poisoning/ATR-2026-00270-xss-in-tool-response.yaml +17 -0
  440. package/rules/tool-poisoning/ATR-2026-00277-echo-template-command-injection.yaml +17 -0
  441. package/rules/tool-poisoning/ATR-2026-00393-ansi-code-elicitation-request.yaml +17 -0
  442. package/rules/tool-poisoning/ATR-2026-00415-flowise-custom-mcp-stdio-rce.yaml +11 -2
  443. package/rules/tool-poisoning/ATR-2026-00419-cursor-mcp-zero-click-config.yaml +13 -1
  444. package/rules/tool-poisoning/ATR-2026-00434-mcp-remote-authorization-endpoint-command-injection.yaml +11 -2
  445. package/rules/tool-poisoning/ATR-2026-00435-azure-mcp-server-missing-authentication.yaml +11 -2
  446. package/rules/tool-poisoning/ATR-2026-00448-spring-ai-milvus-filter-injection.yaml +11 -2
  447. package/rules/tool-poisoning/ATR-2026-00494-garak-exploitation-mixedunassigned.yaml +12 -0
  448. package/rules/tool-poisoning/ATR-2026-00513-package-hallucination-exploitation.yaml +12 -0
  449. package/rules/tool-poisoning/ATR-2026-00521-shell-command-injection-agent-tool-context.yaml +12 -0
  450. package/rules/tool-poisoning/ATR-2026-00522-sql-injection-natural-language-agent-interface.yaml +12 -0
  451. package/rules/tool-poisoning/ATR-2026-00526-claude-code-shell-metachar-in-double-quoted-path.yaml +15 -0
  452. package/rules/tool-poisoning/ATR-2026-00529-litellm-proxy-sqli-cisa-kev.yaml +15 -0
  453. package/rules/tool-poisoning/ATR-2026-00530-ms-agent-shell-tool-unsanitized-argv-rce.yaml +15 -0
  454. package/rules/tool-poisoning/ATR-2026-00531-praisonai-unauthenticated-agent-api.yaml +11 -2
  455. package/rules/tool-poisoning/ATR-2026-00532-apache-doris-mcp-sql-injection.yaml +11 -2
  456. package/rules/tool-poisoning/ATR-2026-00533-apache-pinot-mcp-unauthenticated-takeover.yaml +10 -1
  457. package/rules/tool-poisoning/ATR-2026-00534-alibaba-rds-mcp-unauthenticated-metadata-exfil.yaml +10 -1
  458. package/rules/tool-poisoning/ATR-2026-00536-nginx-ui-mcp-unauthenticated-command-execution.yaml +11 -2
  459. package/rules/tool-poisoning/ATR-2026-00537-fastmcp-server-name-cmd-injection-windows.yaml +11 -2
  460. package/rules/tool-poisoning/ATR-2026-00538-langchain-chatchat-mcp-stdio-unauthenticated-rce.yaml +10 -1
  461. package/rules/tool-poisoning/ATR-2026-00540-praisonai-parse-mcp-command-cli-injection.yaml +13 -1
  462. package/rules/tool-poisoning/ATR-2026-00541-agent-zero-mcp-config-command-injection.yaml +13 -1
  463. package/rules/tool-poisoning/ATR-2026-00542-upsonic-mcp-command-allowlist-bypass.yaml +13 -1
  464. package/rules/tool-poisoning/ATR-2026-00543-litellm-mcp-server-argv-injection.yaml +13 -1
  465. package/rules/tool-poisoning/ATR-2026-00544-praisonai-pth-file-path-traversal-rce.yaml +13 -1
  466. package/rules/tool-poisoning/ATR-2026-00545-praisonai-tool-override-unauth-rce.yaml +13 -1
  467. package/rules/tool-poisoning/ATR-2026-00561-fastmcp-vulnerable-to-windows-command-in.yaml +28 -0
  468. package/rules/tool-poisoning/ATR-2026-00567-mcp-stdio-config-command-injection.yaml +28 -0
  469. package/rules/tool-poisoning/ATR-2026-00568-agent-ssrf-cloud-metadata-file-inclusion.yaml +28 -0
  470. package/rules/tool-poisoning/ATR-2026-00572-symjack-symlink-config-redirection.yaml +22 -0
  471. package/spec/atr-schema.yaml +123 -0
  472. package/spec/compliance-metadata.md +15 -13
@@ -31,6 +31,27 @@ compliance:
31
31
  context: "Indirect, paraphrased elicitation of the system prompt evades literal-keyword extraction filters; the semantic judge closes the recall gap left by pattern rules."
32
32
  strength: primary
33
33
 
34
+ iso_42001:
35
+ - clause: "8.1"
36
+ context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the context-exfiltration attempt (Paraphrased System-Prompt / Context Extraction (Semantic))."
37
+ strength: primary
38
+ - clause: "6.2"
39
+ context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the context-exfiltration attempt (Paraphrased System-Prompt / Context Extraction (Semantic)) is such a treatment."
40
+ strength: secondary
41
+ nist_ai_rmf:
42
+ - subcategory: "MS.2.7"
43
+ context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the context-exfiltration attempt (Paraphrased System-Prompt / Context Extraction (Semantic))."
44
+ strength: primary
45
+ - subcategory: "MS.2.10"
46
+ context: "NIST AI RMF MEASURE 2.10 (privacy risk examined and documented) is supported by this rule's detection of the context-exfiltration attempt (Paraphrased System-Prompt / Context Extraction (Semantic))."
47
+ strength: secondary
48
+ eu_ai_act:
49
+ - article: "15"
50
+ context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the context-exfiltration attempt (Paraphrased System-Prompt / Context Extraction (Semantic))."
51
+ strength: primary
52
+ - article: "10"
53
+ context: "Article 10 (data and data governance) requires control over the data an AI system processes; this rule provides detection evidence for the context-exfiltration attempt (Paraphrased System-Prompt / Context Extraction (Semantic)) affecting that data."
54
+ strength: secondary
34
55
  tags:
35
56
  category: context-exfiltration
36
57
  subcategory: paraphrased-system-prompt-extraction
@@ -52,6 +52,9 @@ compliance:
52
52
  - article: "9"
53
53
  context: "Data poisoning of RAG pipelines is a documented risk requiring monitoring controls under Article 9; detection events from this rule provide the evidence trail for risk management reporting."
54
54
  strength: secondary
55
+ - article: "10"
56
+ context: "Article 10 (data and data governance) requires control over the data an AI system processes; this rule provides detection evidence for the data-poisoning attempt (Data Poisoning via RAG and Knowledge Base Contamination) affecting that data."
57
+ strength: primary
55
58
  nist_ai_rmf:
56
59
  - function: Map
57
60
  subcategory: MP.5.1
@@ -61,6 +64,12 @@ compliance:
61
64
  subcategory: MG.2.3
62
65
  context: "Active detection of data poisoning events implements the risk treatment for the data contamination risk identified in the AI risk register."
63
66
  strength: secondary
67
+ - subcategory: "MS.2.5"
68
+ context: "NIST AI RMF MEASURE 2.5 (system validity and reliability demonstrated) is supported by this rule's detection of the data-poisoning attempt (Data Poisoning via RAG and Knowledge Base Contamination)."
69
+ strength: primary
70
+ - subcategory: "MS.2.7"
71
+ context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the data-poisoning attempt (Data Poisoning via RAG and Knowledge Base Contamination)."
72
+ strength: secondary
64
73
  iso_42001:
65
74
  - clause: "8.3"
66
75
  context: "Clause 8.3 data governance for AI systems requires controls ensuring data integrity; detection of hidden directives in retrieved content is the runtime enforcement of clause 8.3 data quality requirements."
@@ -68,6 +77,12 @@ compliance:
68
77
  - clause: "6.2"
69
78
  context: "Clause 6.2 AIMS security planning must include controls for adversarial data injection into AI pipelines; this rule operationalizes the detection measure for that planning objective."
70
79
  strength: secondary
80
+ - clause: "8.2"
81
+ context: "ISO/IEC 42001 Clause 8.2 (AI risk assessment) is informed by this rule, which detects the data-poisoning attempt (Data Poisoning via RAG and Knowledge Base Contamination) as an assessed risk."
82
+ strength: primary
83
+ - clause: "8.1"
84
+ context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the data-poisoning attempt (Data Poisoning via RAG and Knowledge Base Contamination)."
85
+ strength: secondary
71
86
  colorado_ai_act:
72
87
  - section: "6-1-1703"
73
88
  clause: "Deployer risk management program"
@@ -58,6 +58,9 @@ compliance:
58
58
  - article: "9"
59
59
  context: "Article 9 risk management must enumerate memory-poisoning as a high-risk class — the advisor write path is typically treated as low-risk infrastructure but actually controls every subsequent prompt assembly."
60
60
  strength: primary
61
+ - article: "10"
62
+ context: "Article 10 (data and data governance) requires control over the data an AI system processes; this rule provides detection evidence for the data-poisoning attempt (Spring AI PromptChatMemoryAdvisor Memory Poisoning (CVE-2026-41713)) affecting that data."
63
+ strength: primary
61
64
  nist_ai_rmf:
62
65
  - subcategory: "MP.5.1"
63
66
  context: "Adversarial inputs that embed persistence-aware role-override markers ('SYSTEM:', 'REMEMBER:', 'IGNORE PREVIOUS INSTRUCTIONS once stored') must be tracked as a primary input-attack class affecting memory-advised architectures."
@@ -65,9 +68,18 @@ compliance:
65
68
  - subcategory: "MG.2.3"
66
69
  context: "Risk treatment plans under MG.2.3 must require pre-write sanitisation in any pipeline that persists user input into ChatMemory; mere prompt-time filtering is insufficient because the payload is replayed by the advisor."
67
70
  strength: primary
71
+ - subcategory: "MS.2.5"
72
+ context: "NIST AI RMF MEASURE 2.5 (system validity and reliability demonstrated) is supported by this rule's detection of the data-poisoning attempt (Spring AI PromptChatMemoryAdvisor Memory Poisoning (CVE-2026-41713))."
73
+ strength: primary
74
+ - subcategory: "MS.2.7"
75
+ context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the data-poisoning attempt (Spring AI PromptChatMemoryAdvisor Memory Poisoning (CVE-2026-41713))."
76
+ strength: secondary
68
77
  iso_42001:
69
- - clause: "8.6"
70
- context: "Operational controls under clause 8.6 must require that the memory-write boundary applies the same content-safety policy as the prompt-input boundary; otherwise an attacker bypasses input filters by reaching them via the advisor replay path."
78
+ - clause: "8.1"
79
+ context: "Operational controls under clause 8.1 must require that the memory-write boundary applies the same content-safety policy as the prompt-input boundary; otherwise an attacker bypasses input filters by reaching them via the advisor replay path."
80
+ strength: primary
81
+ - clause: "8.2"
82
+ context: "ISO/IEC 42001 Clause 8.2 (AI risk assessment) is informed by this rule, which detects the data-poisoning attempt (Spring AI PromptChatMemoryAdvisor Memory Poisoning (CVE-2026-41713)) as an assessed risk."
71
83
  strength: primary
72
84
 
73
85
  tags:
@@ -19,9 +19,40 @@ references:
19
19
  - CWE-89
20
20
  external:
21
21
  - https://nvd.nist.gov/vuln/detail/CVE-2026-30860
22
+ owasp_llm:
23
+ - LLM01:2025 - Prompt Injection
24
+ owasp_agentic:
25
+ - ASI06:2026 - Memory and Context Poisoning
26
+ mitre_atlas:
27
+ - AML.T0051.001 - Indirect Prompt Injection
22
28
  metadata_provenance:
23
29
  cve: human-authored
24
30
  cwe: human-authored
31
+ compliance:
32
+ eu_ai_act:
33
+ - article: "10"
34
+ context: "Article 10 (data and data governance) requires control over the data an AI system processes; this rule provides detection evidence for the data-poisoning attempt (SQL injection in agent / MCP tool database query) affecting that data."
35
+ strength: primary
36
+ - article: "15"
37
+ context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the data-poisoning attempt (SQL injection in agent / MCP tool database query)."
38
+ strength: secondary
39
+ - article: "9"
40
+ context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the data-poisoning attempt (SQL injection in agent / MCP tool database query)."
41
+ strength: secondary
42
+ nist_ai_rmf:
43
+ - subcategory: "MS.2.5"
44
+ context: "NIST AI RMF MEASURE 2.5 (system validity and reliability demonstrated) is supported by this rule's detection of the data-poisoning attempt (SQL injection in agent / MCP tool database query)."
45
+ strength: primary
46
+ - subcategory: "MS.2.7"
47
+ context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the data-poisoning attempt (SQL injection in agent / MCP tool database query)."
48
+ strength: secondary
49
+ iso_42001:
50
+ - clause: "8.2"
51
+ context: "ISO/IEC 42001 Clause 8.2 (AI risk assessment) is informed by this rule, which detects the data-poisoning attempt (SQL injection in agent / MCP tool database query) as an assessed risk."
52
+ strength: primary
53
+ - clause: "8.1"
54
+ context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the data-poisoning attempt (SQL injection in agent / MCP tool database query)."
55
+ strength: secondary
25
56
  tags:
26
57
  category: data-poisoning
27
58
  scan_target: runtime
@@ -45,6 +45,9 @@ compliance:
45
45
  - article: "15"
46
46
  context: "Article 15 robustness requires that AI systems handle failure states gracefully; detection of runaway loops is a monitoring control ensuring the system does not enter unrecoverable autonomous states."
47
47
  strength: secondary
48
+ - article: "9"
49
+ context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the unsafe autonomous action (Runaway Agent Loop Detection)."
50
+ strength: secondary
48
51
  nist_ai_rmf:
49
52
  - function: Manage
50
53
  subcategory: MG.3.2
@@ -54,13 +57,22 @@ compliance:
54
57
  subcategory: GV.1.2
55
58
  context: "GV.1.2 accountability roles must include responsibility for detecting and halting runaway agent behavior; this rule provides the signal required to fulfill that accountability."
56
59
  strength: secondary
60
+ - subcategory: "MG.2.3"
61
+ context: "NIST AI RMF MANAGE 2.3 (respond to previously unknown identified risks) is supported by this rule, which surfaces the unsafe autonomous action (Runaway Agent Loop Detection) so the risk can be treated."
62
+ strength: primary
63
+ - subcategory: "MS.2.7"
64
+ context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the unsafe autonomous action (Runaway Agent Loop Detection)."
65
+ strength: secondary
57
66
  iso_42001:
58
- - clause: "8.6"
59
- context: "Clause 8.6 AI system operational control requires monitoring for abnormal execution patterns; runaway loop detection is the primary operational control for this failure class."
67
+ - clause: "8.1"
68
+ context: "Clause 8.1 AI system operational control requires monitoring for abnormal execution patterns; runaway loop detection is the primary operational control for this failure class."
60
69
  strength: primary
61
70
  - clause: "9.1"
62
71
  context: "Clause 9.1 monitoring and evaluation requires measuring AI system behavior against expected norms; loop counter patterns are the measurable anomaly indicators for this rule."
63
72
  strength: secondary
73
+ - clause: "6.2"
74
+ context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the unsafe autonomous action (Runaway Agent Loop Detection) is such a treatment."
75
+ strength: primary
64
76
  colorado_ai_act:
65
77
  - section: "6-1-1703"
66
78
  clause: "Deployer ongoing monitoring of AI system performance"
@@ -34,6 +34,9 @@ compliance:
34
34
  - article: "15"
35
35
  context: "Article 15 robustness requirements mandate that AI systems handle adversarial denial-of-service conditions gracefully; this rule detects resource exhaustion patterns before full system unavailability."
36
36
  strength: secondary
37
+ - article: "9"
38
+ context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the unsafe autonomous action (Agent Resource Exhaustion Detection)."
39
+ strength: secondary
37
40
  nist_ai_rmf:
38
41
  - subcategory: "GV.1.2"
39
42
  context: "Resource exhaustion attacks exploit the absence of enforced consumption limits within an agent's accountability scope; GV.1.2 requires that resource boundaries are defined, assigned, and monitored for violations."
@@ -41,12 +44,18 @@ compliance:
41
44
  - subcategory: "MG.3.2"
42
45
  context: "Unbounded resource consumption constitutes an AI incident requiring a defined response; MG.3.2 mandates that processes to detect and respond to resource exhaustion failures are in place before full system unavailability occurs."
43
46
  strength: secondary
47
+ - subcategory: "MG.2.3"
48
+ context: "NIST AI RMF MANAGE 2.3 (respond to previously unknown identified risks) is supported by this rule, which surfaces the unsafe autonomous action (Agent Resource Exhaustion Detection) so the risk can be treated."
49
+ strength: primary
50
+ - subcategory: "MS.2.7"
51
+ context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the unsafe autonomous action (Agent Resource Exhaustion Detection)."
52
+ strength: secondary
44
53
  iso_42001:
45
54
  - clause: "6.2"
46
55
  context: "ISO 42001 clause 6.2 risk treatment plans must address denial-of-service risks from unbounded agent operations; this rule implements the monitoring control for resource exhaustion patterns before they cause system degradation."
47
56
  strength: primary
48
- - clause: "8.6"
49
- context: "Clause 8.6 operational controls ensure AI systems execute correctly and within resource limits; detection of SELECT * without LIMIT, infinite loops, and bulk spawn patterns enforces these operational boundaries."
57
+ - clause: "8.1"
58
+ context: "Clause 8.1 operational controls ensure AI systems execute correctly and within resource limits; detection of SELECT * without LIMIT, infinite loops, and bulk spawn patterns enforces these operational boundaries."
50
59
  strength: secondary
51
60
 
52
61
  tags:
@@ -43,6 +43,9 @@ compliance:
43
43
  - article: "15"
44
44
  context: "Article 15 accuracy and robustness requirements demand that high-risk AI systems handle failure propagation gracefully; this rule provides the monitoring signal required to contain cascading events."
45
45
  strength: secondary
46
+ - article: "9"
47
+ context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the unsafe autonomous action (Cascading Failure Detection in Agent Pipelines)."
48
+ strength: secondary
46
49
  nist_ai_rmf:
47
50
  - subcategory: "GV.1.2"
48
51
  context: "Cascading failures exploit the absence of defined human-in-the-loop checkpoints in agent pipeline accountability structures; GV.1.2 requires that these roles and checkpoints are defined and enforced before automated pipelines propagate errors."
@@ -50,12 +53,18 @@ compliance:
50
53
  - subcategory: "MG.3.2"
51
54
  context: "Multi-stage pipeline failures are AI incidents requiring predefined response processes; MG.3.2 mandates that cascading failure response procedures exist so that failure scope can be contained before all downstream agents are affected."
52
55
  strength: secondary
56
+ - subcategory: "MG.2.3"
57
+ context: "NIST AI RMF MANAGE 2.3 (respond to previously unknown identified risks) is supported by this rule, which surfaces the unsafe autonomous action (Cascading Failure Detection in Agent Pipelines) so the risk can be treated."
58
+ strength: primary
59
+ - subcategory: "MS.2.7"
60
+ context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the unsafe autonomous action (Cascading Failure Detection in Agent Pipelines)."
61
+ strength: secondary
53
62
  iso_42001:
54
63
  - clause: "6.2"
55
64
  context: "ISO 42001 clause 6.2 risk treatment activities must cover cascading failure scenarios in multi-agent pipelines; this rule detects the propagation patterns and auto-approval chains that trigger uncontrolled cascade events."
56
65
  strength: primary
57
- - clause: "8.6"
58
- context: "Clause 8.6 operational controls require that AI pipeline stages execute with appropriate verification gates; detection of blind upstream trust and automated destructive triggers enforces the human checkpoint requirements in pipeline design."
66
+ - clause: "8.1"
67
+ context: "Clause 8.1 operational controls require that AI pipeline stages execute with appropriate verification gates; detection of blind upstream trust and automated destructive triggers enforces the human checkpoint requirements in pipeline design."
59
68
  strength: secondary
60
69
 
61
70
  tags:
@@ -38,6 +38,9 @@ compliance:
38
38
  - article: "9"
39
39
  context: "Unauthorized financial action by AI agents is a high-severity risk requiring mandatory human-in-the-loop controls; Article 9 risk management systems must classify autonomous financial execution as an unacceptable risk and implement blocking controls."
40
40
  strength: secondary
41
+ - article: "15"
42
+ context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the unsafe autonomous action (Unauthorized Financial Action by AI Agent)."
43
+ strength: secondary
41
44
  nist_ai_rmf:
42
45
  - subcategory: "GV.1.2"
43
46
  context: "Autonomous financial transfers executed without explicit human confirmation require clearly defined accountability roles that assign responsibility for approving and auditing all agent-initiated payment and transfer actions."
@@ -45,11 +48,14 @@ compliance:
45
48
  - subcategory: "MG.2.3"
46
49
  context: "Risk treatment plans for AI systems with financial tool access must implement mandatory human-in-the-loop gates that block payment and transfer tool calls lacking confirmed human authorization in the current turn."
47
50
  strength: secondary
51
+ - subcategory: "MS.2.7"
52
+ context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the unsafe autonomous action (Unauthorized Financial Action by AI Agent)."
53
+ strength: secondary
48
54
  iso_42001:
49
55
  - clause: "6.2"
50
56
  context: "AI objectives and risk treatment plans must classify autonomous financial execution as an unacceptable risk and require explicit human approval as a blocking control before any payment or transfer tool is invoked."
51
57
  strength: primary
52
- - clause: "8.6"
58
+ - clause: "8.1"
53
59
  context: "Operational controls must enforce a confirmation gate on all financial tool invocations to ensure the agent's execution of payments and transfers remains within the scope of explicitly sanctioned human instructions."
54
60
  strength: secondary
55
61
  colorado_ai_act:
@@ -42,6 +42,9 @@ compliance:
42
42
  - article: "9"
43
43
  context: "High-risk tool access without confirmation gates is a documented unacceptable risk for AI systems; Article 9 risk management requires that organizations identify tool categories that require mandatory human approval and implement blocking controls accordingly."
44
44
  strength: secondary
45
+ - article: "15"
46
+ context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the unsafe autonomous action (High-Risk Tool Invocation Without Human Confirmation)."
47
+ strength: secondary
45
48
  nist_ai_rmf:
46
49
  - subcategory: "GV.1.2"
47
50
  context: "Invoking financial, destructive, communication, or permission-altering tools without human confirmation requires accountability roles that ensure every high-risk tool category has an assigned gatekeeper responsible for approving agent actions."
@@ -49,11 +52,14 @@ compliance:
49
52
  - subcategory: "MG.2.3"
50
53
  context: "Risk treatment plans must implement tool-category-level blocking controls that intercept any high-risk tool invocation — regardless of attack detection confidence — and require explicit human approval before execution proceeds."
51
54
  strength: secondary
55
+ - subcategory: "MS.2.7"
56
+ context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the unsafe autonomous action (High-Risk Tool Invocation Without Human Confirmation)."
57
+ strength: secondary
52
58
  iso_42001:
53
59
  - clause: "6.2"
54
60
  context: "AI risk treatment plans must enumerate high-risk tool categories and mandate human-in-the-loop confirmation gates as a control measure, preventing agents from autonomously executing destructive, financial, or permission-modifying actions."
55
61
  strength: primary
56
- - clause: "8.6"
62
+ - clause: "8.1"
57
63
  context: "Operational controls for AI systems must enforce least-privilege tool access policies that block invocations of high-risk tool categories until explicit human confirmation is recorded in the current interaction turn."
58
64
  strength: secondary
59
65
  tags:
@@ -31,6 +31,9 @@ compliance:
31
31
  - article: "14"
32
32
  context: "Article 14 human oversight requires the user to authorize tool execution; covert shell-execution instructions bypass this control."
33
33
  strength: primary
34
+ - article: "9"
35
+ context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the unsafe autonomous action (Natural-Language Unauthorized Shell-Execution Instruction)."
36
+ strength: secondary
34
37
  nist_ai_rmf:
35
38
  - subcategory: "GV.6.1"
36
39
  context: "Data governance must prevent NL instructions that authorize the agent to bypass user consent for command execution."
@@ -38,10 +41,22 @@ compliance:
38
41
  - subcategory: "MS.2.5"
39
42
  context: "Monitoring under MS.2.5 must detect NL-style covert-shell-execution patterns."
40
43
  strength: primary
44
+ - subcategory: "MG.2.3"
45
+ context: "NIST AI RMF MANAGE 2.3 (respond to previously unknown identified risks) is supported by this rule, which surfaces the unsafe autonomous action (Natural-Language Unauthorized Shell-Execution Instruction) so the risk can be treated."
46
+ strength: primary
47
+ - subcategory: "MS.2.7"
48
+ context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the unsafe autonomous action (Natural-Language Unauthorized Shell-Execution Instruction)."
49
+ strength: secondary
41
50
  iso_42001:
42
51
  - clause: "8.4"
43
52
  context: "Impact assessments must enumerate covert shell-execution as a critical risk vector when shell-capable tools are present."
44
53
  strength: primary
54
+ - clause: "6.2"
55
+ context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the unsafe autonomous action (Natural-Language Unauthorized Shell-Execution Instruction) is such a treatment."
56
+ strength: primary
57
+ - clause: "8.1"
58
+ context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the unsafe autonomous action (Natural-Language Unauthorized Shell-Execution Instruction)."
59
+ strength: secondary
45
60
  tags:
46
61
  category: excessive-autonomy
47
62
  subcategory: nl-covert-shell-execution
@@ -40,6 +40,9 @@ compliance:
40
40
  - article: "9"
41
41
  context: "Excessive autonomy is a documented risk class; this rule implements the monitoring control required by Article 9 risk management obligations for high-risk AI systems."
42
42
  strength: secondary
43
+ - article: "14"
44
+ context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the unsafe autonomous action (Agent Capability Enumeration via Structured Parse Request) would bypass or undermine that oversight."
45
+ strength: primary
43
46
  nist_ai_rmf:
44
47
  - function: Manage
45
48
  subcategory: MG.2.3
@@ -49,6 +52,9 @@ compliance:
49
52
  subcategory: MP.5.1
50
53
  context: "Identifying excessive autonomy as an AI risk to be catalogued in the organizational risk register."
51
54
  strength: secondary
55
+ - subcategory: "MS.2.7"
56
+ context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the unsafe autonomous action (Agent Capability Enumeration via Structured Parse Request)."
57
+ strength: secondary
52
58
  iso_42001:
53
59
  - clause: "6.2"
54
60
  context: "Addressing excessive autonomy risk is an objective required under clause 6.2 AIMS information security planning; this rule operationalizes the detection control measure."
@@ -56,6 +62,9 @@ compliance:
56
62
  - clause: "8.4"
57
63
  context: "Impact assessment for AI deployments under clause 8.4 must account for excessive autonomy inputs; detection events from this rule provide the required monitoring evidence."
58
64
  strength: secondary
65
+ - clause: "8.1"
66
+ context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the unsafe autonomous action (Agent Capability Enumeration via Structured Parse Request)."
67
+ strength: secondary
59
68
 
60
69
  tags:
61
70
  category: excessive-autonomy
@@ -42,6 +42,9 @@ compliance:
42
42
  - article: "9"
43
43
  context: "Excessive autonomy is a documented risk class; this rule implements the monitoring control required by Article 9 risk management obligations for high-risk AI systems."
44
44
  strength: secondary
45
+ - article: "14"
46
+ context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the unsafe autonomous action (SSRF via Agent URL Fetch Instruction) would bypass or undermine that oversight."
47
+ strength: primary
45
48
  nist_ai_rmf:
46
49
  - function: Manage
47
50
  subcategory: MG.2.3
@@ -51,6 +54,9 @@ compliance:
51
54
  subcategory: MP.5.1
52
55
  context: "Identifying excessive autonomy as an AI risk to be catalogued in the organizational risk register."
53
56
  strength: secondary
57
+ - subcategory: "MS.2.7"
58
+ context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the unsafe autonomous action (SSRF via Agent URL Fetch Instruction)."
59
+ strength: secondary
54
60
  iso_42001:
55
61
  - clause: "6.2"
56
62
  context: "Addressing excessive autonomy risk is an objective required under clause 6.2 AIMS information security planning; this rule operationalizes the detection control measure."
@@ -58,6 +64,9 @@ compliance:
58
64
  - clause: "8.4"
59
65
  context: "Impact assessment for AI deployments under clause 8.4 must account for excessive autonomy inputs; detection events from this rule provide the required monitoring evidence."
60
66
  strength: secondary
67
+ - clause: "8.1"
68
+ context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the unsafe autonomous action (SSRF via Agent URL Fetch Instruction)."
69
+ strength: secondary
61
70
 
62
71
  tags:
63
72
  category: excessive-autonomy
@@ -52,13 +52,32 @@ compliance:
52
52
  Cybersecurity & robustness — runaway tool loops are an Article 15
53
53
  robustness failure. The rule provides runtime evidence.
54
54
  strength: primary
55
+ - article: "14"
56
+ context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the unsafe autonomous action (Runaway tool-call loop within a single session) would bypass or undermine that oversight."
57
+ strength: primary
58
+ - article: "9"
59
+ context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the unsafe autonomous action (Runaway tool-call loop within a single session)."
60
+ strength: secondary
55
61
  nist_ai_rmf:
56
62
  - subcategory: "MS.2.6"
57
63
  context: >
58
64
  Information security — autonomous-action rate anomalies require
59
65
  detection per MS-2.6.
60
66
  strength: primary
67
+ - subcategory: "MG.2.3"
68
+ context: "NIST AI RMF MANAGE 2.3 (respond to previously unknown identified risks) is supported by this rule, which surfaces the unsafe autonomous action (Runaway tool-call loop within a single session) so the risk can be treated."
69
+ strength: primary
70
+ - subcategory: "MS.2.7"
71
+ context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the unsafe autonomous action (Runaway tool-call loop within a single session)."
72
+ strength: secondary
61
73
 
74
+ iso_42001:
75
+ - clause: "6.2"
76
+ context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the unsafe autonomous action (Runaway tool-call loop within a single session) is such a treatment."
77
+ strength: primary
78
+ - clause: "8.1"
79
+ context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the unsafe autonomous action (Runaway tool-call loop within a single session)."
80
+ strength: secondary
62
81
  tags:
63
82
  category: excessive-autonomy
64
83
  subcategory: runaway-tool-loop
@@ -43,12 +43,18 @@ compliance:
43
43
  - subcategory: "MG.2.3"
44
44
  context: "Risk treatment plans under MG.2.3 must implement detection controls for completion-based jailbreaks — including slur-prefix completion, harmful sentence starters, and synthesis step continuation — that bypass direct refusal by using implicit harmful requests."
45
45
  strength: secondary
46
+ - subcategory: "MS.2.6"
47
+ context: "NIST AI RMF MEASURE 2.6 (system evaluated regularly for safety risks) is supported by this rule's detection of the model-abuse / harmful-content elicitation attempt (Harmful Completion / Continuation Attack)."
48
+ strength: primary
49
+ - subcategory: "MS.2.7"
50
+ context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the model-abuse / harmful-content elicitation attempt (Harmful Completion / Continuation Attack)."
51
+ strength: secondary
46
52
  iso_42001:
47
53
  - clause: "6.2"
48
54
  context: "Risk treatment activities under clause 6.2 must address harmful completion attacks where partial harmful phrases or synthesis instructions with trailing ellipses exploit AI next-token prediction to produce content the system would refuse if asked directly."
49
55
  strength: primary
50
- - clause: "8.6"
51
- context: "Operational controls under clause 8.6 must include input scanning that detects continuation bait patterns — explicit completion requests with harmful domain markers, slur-prefix redaction, and open-ended synthesis steps — before they reach the model's prediction layer."
56
+ - clause: "8.1"
57
+ context: "Operational controls under clause 8.1 must include input scanning that detects continuation bait patterns — explicit completion requests with harmful domain markers, slur-prefix redaction, and open-ended synthesis steps — before they reach the model's prediction layer."
52
58
  strength: secondary
53
59
  tags:
54
60
  category: model-abuse
@@ -42,12 +42,18 @@ compliance:
42
42
  - subcategory: "MG.2.3"
43
43
  context: "Risk treatment plans under MG.2.3 must implement output pipeline controls that detect and block EICAR/GTUBE signature output requests, preventing attackers from confirming exploitable gaps in output scanning infrastructure before escalating to real malware."
44
44
  strength: secondary
45
+ - subcategory: "MS.2.6"
46
+ context: "NIST AI RMF MEASURE 2.6 (system evaluated regularly for safety risks) is supported by this rule's detection of the model-abuse / harmful-content elicitation attempt (EICAR / GTUBE AV Test Signature Output Request)."
47
+ strength: primary
48
+ - subcategory: "MS.2.7"
49
+ context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the model-abuse / harmful-content elicitation attempt (EICAR / GTUBE AV Test Signature Output Request)."
50
+ strength: secondary
45
51
  iso_42001:
46
52
  - clause: "6.2"
47
53
  context: "Risk treatment activities under clause 6.2 must address AV test signature probe attacks — requests to output EICAR, GTUBE, or Metasploit test strings to verify whether the AI system output pipeline lacks antivirus scanning."
48
54
  strength: primary
49
- - clause: "8.6"
50
- context: "Operational controls under clause 8.6 must include output scanning layers that detect known AV test signatures (EICAR, GTUBE) in AI system outputs, preventing the AI from serving as a reconnaissance tool that confirms output pipeline security gaps."
55
+ - clause: "8.1"
56
+ context: "Operational controls under clause 8.1 must include output scanning layers that detect known AV test signatures (EICAR, GTUBE) in AI system outputs, preventing the AI from serving as a reconnaissance tool that confirms output pipeline security gaps."
51
57
  strength: secondary
52
58
  tags:
53
59
  category: model-abuse
@@ -40,6 +40,23 @@ compliance:
40
40
  - subcategory: "MP.5.1"
41
41
  context: "Detection of known glitch token corpora (SolidGoldMagikarp, davidjl, ÃÂÃÂ patterns) characterizes the likelihood and magnitude of tokenizer-exploitation risks that MP.5.1 requires be identified and tracked."
42
42
  strength: secondary
43
+ - subcategory: "MS.2.6"
44
+ context: "NIST AI RMF MEASURE 2.6 (system evaluated regularly for safety risks) is supported by this rule's detection of the model-abuse / harmful-content elicitation attempt (Glitch Token Destabilization Attack)."
45
+ strength: primary
46
+ iso_42001:
47
+ - clause: "8.1"
48
+ context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the model-abuse / harmful-content elicitation attempt (Glitch Token Destabilization Attack)."
49
+ strength: primary
50
+ - clause: "6.2"
51
+ context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the model-abuse / harmful-content elicitation attempt (Glitch Token Destabilization Attack) is such a treatment."
52
+ strength: secondary
53
+ eu_ai_act:
54
+ - article: "15"
55
+ context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the model-abuse / harmful-content elicitation attempt (Glitch Token Destabilization Attack)."
56
+ strength: primary
57
+ - article: "9"
58
+ context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the model-abuse / harmful-content elicitation attempt (Glitch Token Destabilization Attack)."
59
+ strength: secondary
43
60
  tags:
44
61
  category: model-abuse
45
62
  subcategory: glitch-token-destabilization
@@ -47,12 +47,18 @@ compliance:
47
47
  - subcategory: "MG.2.3"
48
48
  context: "Risk treatment plans under MG.2.3 must implement detection controls for LMRC-pattern elicitation attacks across quack medicine, bullying facilitation, and slur-request categories, preventing the AI system from producing content that causes physical, psychological, or social harm."
49
49
  strength: secondary
50
+ - subcategory: "MS.2.6"
51
+ context: "NIST AI RMF MEASURE 2.6 (system evaluated regularly for safety risks) is supported by this rule's detection of the model-abuse / harmful-content elicitation attempt (LMRC Harmful Content Elicitation — Quack Medicine, Bullying, Slur Usage)."
52
+ strength: primary
53
+ - subcategory: "MS.2.7"
54
+ context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the model-abuse / harmful-content elicitation attempt (LMRC Harmful Content Elicitation — Quack Medicine, Bullying, Slur Usage)."
55
+ strength: secondary
50
56
  iso_42001:
51
57
  - clause: "6.2"
52
58
  context: "Risk treatment activities under clause 6.2 must address LMRC harmful content elicitation attacks — including quack medicine requests, bullying facilitation with named targets, and identity-based slur requests — as a documented structured probing framework targeting AI system harmful output boundaries."
53
59
  strength: primary
54
- - clause: "8.6"
55
- context: "Operational controls under clause 8.6 must enforce output restrictions that prevent the AI system from producing quack medicine advice, bullying tactics, or identity-targeted slurs in response to LMRC-pattern elicitation requests that use personal target context to escalate harm."
60
+ - clause: "8.1"
61
+ context: "Operational controls under clause 8.1 must enforce output restrictions that prevent the AI system from producing quack medicine advice, bullying tactics, or identity-targeted slurs in response to LMRC-pattern elicitation requests that use personal target context to escalate harm."
56
62
  strength: secondary
57
63
  tags:
58
64
  category: model-abuse
@@ -47,12 +47,18 @@ compliance:
47
47
  - subcategory: "MG.2.3"
48
48
  context: "Risk treatment plans under MG.2.3 must implement blocking controls for self-harm facilitation requests covering direct self-harm methods, eating disorder facilitation, concealment assistance, and dangerous low-calorie diet requests, given the vulnerable population likely to submit these inputs."
49
49
  strength: secondary
50
+ - subcategory: "MS.2.6"
51
+ context: "NIST AI RMF MEASURE 2.6 (system evaluated regularly for safety risks) is supported by this rule's detection of the model-abuse / harmful-content elicitation attempt (Self-Harm and Eating Disorder Facilitation Request)."
52
+ strength: primary
53
+ - subcategory: "MS.2.7"
54
+ context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the model-abuse / harmful-content elicitation attempt (Self-Harm and Eating Disorder Facilitation Request)."
55
+ strength: secondary
50
56
  iso_42001:
51
57
  - clause: "6.2"
52
58
  context: "Risk treatment activities under clause 6.2 must address self-harm and eating disorder facilitation as a mandatory treatment category, implementing detection controls for direct method requests, concealment assistance, and dangerous diet facilitation targeting at-risk individuals."
53
59
  strength: primary
54
- - clause: "8.6"
55
- context: "Operational controls under clause 8.6 must enforce blocking of self-harm facilitation outputs — including self-harm method instructions, eating disorder development guidance, and concealment of disordered behaviors — to protect the vulnerable populations most likely to submit these requests."
60
+ - clause: "8.1"
61
+ context: "Operational controls under clause 8.1 must enforce blocking of self-harm facilitation outputs — including self-harm method instructions, eating disorder development guidance, and concealment of disordered behaviors — to protect the vulnerable populations most likely to submit these requests."
56
62
  strength: secondary
57
63
  tags:
58
64
  category: model-abuse
@@ -57,12 +57,18 @@ compliance:
57
57
  - subcategory: "MG.2.3"
58
58
  context: "Risk treatment plans under MG.2.3 must implement blocking controls across all DoNotAnswer malicious use categories, as direct illegal activity facilitation requests represent the highest-priority risk treatment required for AI systems interacting with uncontrolled user populations."
59
59
  strength: secondary
60
+ - subcategory: "MS.2.6"
61
+ context: "NIST AI RMF MEASURE 2.6 (system evaluated regularly for safety risks) is supported by this rule's detection of the model-abuse / harmful-content elicitation attempt (DoNotAnswer Malicious Use — Illegal Activity Facilitation Request)."
62
+ strength: primary
63
+ - subcategory: "MS.2.7"
64
+ context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the model-abuse / harmful-content elicitation attempt (DoNotAnswer Malicious Use — Illegal Activity Facilitation Request)."
65
+ strength: secondary
60
66
  iso_42001:
61
67
  - clause: "6.2"
62
68
  context: "Risk treatment activities under clause 6.2 must address all five DoNotAnswer malicious use categories — drug crime, wildlife trafficking, cybercrime, financial crime, and terrorism — as mandatory treatment targets for AI systems accessible to the general public."
63
69
  strength: primary
64
- - clause: "8.6"
65
- context: "Operational controls under clause 8.6 must enforce blocking of illegal activity facilitation requests distinguished by explicit illegal intent markers such as 'illegally', 'without getting caught', and 'on the black market', preventing the AI system from providing operational crime assistance."
70
+ - clause: "8.1"
71
+ context: "Operational controls under clause 8.1 must enforce blocking of illegal activity facilitation requests distinguished by explicit illegal intent markers such as 'illegally', 'without getting caught', and 'on the black market', preventing the AI system from providing operational crime assistance."
66
72
  strength: secondary
67
73
  tags:
68
74
  category: model-abuse
@@ -53,12 +53,18 @@ compliance:
53
53
  - subcategory: "MG.2.3"
54
54
  context: "Risk treatment plans under MG.2.3 must assign the highest-priority blocking controls to HarmBench-pattern requests for detailed synthesis instructions (dimethylmercury, sarin, ricin), mass-casualty guides, and infrastructure bioterrorism steps, as these represent absolute prohibition categories for any AI system."
55
55
  strength: secondary
56
+ - subcategory: "MS.2.6"
57
+ context: "NIST AI RMF MEASURE 2.6 (system evaluated regularly for safety risks) is supported by this rule's detection of the model-abuse / harmful-content elicitation attempt (HarmBench Detailed Harmful Instruction Request — Step-by-Step Attack Facilitation)."
58
+ strength: primary
59
+ - subcategory: "MS.2.7"
60
+ context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the model-abuse / harmful-content elicitation attempt (HarmBench Detailed Harmful Instruction Request — Step-by-Step Attack Facilitation)."
61
+ strength: secondary
56
62
  iso_42001:
57
63
  - clause: "6.2"
58
64
  context: "Risk treatment activities under clause 6.2 must treat HarmBench-pattern detailed harmful instruction requests — covering WMD synthesis, targeted violence, mass-casualty planning, and atrocity denial — as the highest-priority treatment targets requiring unconditional blocking controls."
59
65
  strength: primary
60
- - clause: "8.6"
61
- context: "Operational controls under clause 8.6 must block explicit requests for detailed step-by-step instructions in harmful domains, distinguishing HarmBench-pattern requests by the combination of instruction-seeking framing ('give detailed instructions', 'step-by-step guide') and clearly prohibited target domains."
66
+ - clause: "8.1"
67
+ context: "Operational controls under clause 8.1 must block explicit requests for detailed step-by-step instructions in harmful domains, distinguishing HarmBench-pattern requests by the combination of instruction-seeking framing ('give detailed instructions', 'step-by-step guide') and clearly prohibited target domains."
62
68
  strength: secondary
63
69
  tags:
64
70
  category: model-abuse
@@ -42,6 +42,23 @@ compliance:
42
42
  - subcategory: "MP.5.1"
43
43
  context: "Cataloging direct malware code generation prompts (TSR, interrupt hooks, SAM/shadow access, privilege escalation across 8 languages) characterizes the likelihood and magnitude of model-abuse impact required by MP.5.1."
44
44
  strength: secondary
45
+ - subcategory: "MS.2.7"
46
+ context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the model-abuse / harmful-content elicitation attempt (Malware Code Generation Direct Request)."
47
+ strength: secondary
48
+ iso_42001:
49
+ - clause: "8.1"
50
+ context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the model-abuse / harmful-content elicitation attempt (Malware Code Generation Direct Request)."
51
+ strength: primary
52
+ - clause: "6.2"
53
+ context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the model-abuse / harmful-content elicitation attempt (Malware Code Generation Direct Request) is such a treatment."
54
+ strength: secondary
55
+ eu_ai_act:
56
+ - article: "15"
57
+ context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the model-abuse / harmful-content elicitation attempt (Malware Code Generation Direct Request)."
58
+ strength: primary
59
+ - article: "9"
60
+ context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the model-abuse / harmful-content elicitation attempt (Malware Code Generation Direct Request)."
61
+ strength: secondary
45
62
  tags:
46
63
  category: model-abuse
47
64
  subcategory: malware-code-generation