agent-threat-rules 2.2.1 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (424) hide show
  1. package/README.md +365 -327
  2. package/dist/cli/scan-handler.d.ts +6 -0
  3. package/dist/cli/scan-handler.d.ts.map +1 -1
  4. package/dist/cli/scan-handler.js +27 -4
  5. package/dist/cli/scan-handler.js.map +1 -1
  6. package/dist/cli/semantic-judge-config.d.ts +7 -0
  7. package/dist/cli/semantic-judge-config.d.ts.map +1 -0
  8. package/dist/cli/semantic-judge-config.js +44 -0
  9. package/dist/cli/semantic-judge-config.js.map +1 -0
  10. package/dist/cli.js +183 -1
  11. package/dist/cli.js.map +1 -1
  12. package/dist/engine.d.ts +66 -1
  13. package/dist/engine.d.ts.map +1 -1
  14. package/dist/engine.js +420 -3
  15. package/dist/engine.js.map +1 -1
  16. package/dist/eval/eval-harness.d.ts.map +1 -1
  17. package/dist/eval/eval-harness.js +9 -0
  18. package/dist/eval/eval-harness.js.map +1 -1
  19. package/dist/eval/run-hackaprompt-benchmark.js +9 -0
  20. package/dist/eval/run-hackaprompt-benchmark.js.map +1 -1
  21. package/dist/eval/run-pint-benchmark.js +9 -0
  22. package/dist/eval/run-pint-benchmark.js.map +1 -1
  23. package/dist/eval/skill-benchmark.d.ts +11 -0
  24. package/dist/eval/skill-benchmark.d.ts.map +1 -1
  25. package/dist/eval/skill-benchmark.js +57 -0
  26. package/dist/eval/skill-benchmark.js.map +1 -1
  27. package/dist/index.d.ts +5 -2
  28. package/dist/index.d.ts.map +1 -1
  29. package/dist/index.js +2 -0
  30. package/dist/index.js.map +1 -1
  31. package/dist/judges/openai-compatible.d.ts +33 -0
  32. package/dist/judges/openai-compatible.d.ts.map +1 -0
  33. package/dist/judges/openai-compatible.js +145 -0
  34. package/dist/judges/openai-compatible.js.map +1 -0
  35. package/dist/mcp-server.d.ts.map +1 -1
  36. package/dist/mcp-server.js +6 -1
  37. package/dist/mcp-server.js.map +1 -1
  38. package/dist/measurement/from-eval-harness.d.ts +70 -0
  39. package/dist/measurement/from-eval-harness.d.ts.map +1 -0
  40. package/dist/measurement/from-eval-harness.js +49 -0
  41. package/dist/measurement/from-eval-harness.js.map +1 -0
  42. package/dist/measurement/schema.d.ts +152 -0
  43. package/dist/measurement/schema.d.ts.map +1 -0
  44. package/dist/measurement/schema.js +178 -0
  45. package/dist/measurement/schema.js.map +1 -0
  46. package/dist/measurement/write.d.ts +64 -0
  47. package/dist/measurement/write.d.ts.map +1 -0
  48. package/dist/measurement/write.js +163 -0
  49. package/dist/measurement/write.js.map +1 -0
  50. package/dist/rule-scaffolder.d.ts +26 -0
  51. package/dist/rule-scaffolder.d.ts.map +1 -1
  52. package/dist/rule-scaffolder.js +221 -6
  53. package/dist/rule-scaffolder.js.map +1 -1
  54. package/dist/semantic-evaluator.d.ts +54 -0
  55. package/dist/semantic-evaluator.d.ts.map +1 -0
  56. package/dist/semantic-evaluator.js +131 -0
  57. package/dist/semantic-evaluator.js.map +1 -0
  58. package/dist/trace-evaluator.d.ts +22 -0
  59. package/dist/trace-evaluator.d.ts.map +1 -0
  60. package/dist/trace-evaluator.js +249 -0
  61. package/dist/trace-evaluator.js.map +1 -0
  62. package/dist/types.d.ts +152 -0
  63. package/dist/types.d.ts.map +1 -1
  64. package/package.json +5 -3
  65. package/rules/agent-manipulation/ATR-2026-00032-goal-hijacking.yaml +1 -1
  66. package/rules/agent-manipulation/ATR-2026-00074-cross-agent-privilege-escalation.yaml +1 -1
  67. package/rules/agent-manipulation/ATR-2026-00076-inter-agent-message-spoofing.yaml +1 -1
  68. package/rules/agent-manipulation/ATR-2026-00077-human-trust-exploitation.yaml +1 -1
  69. package/rules/agent-manipulation/ATR-2026-00108-consensus-sybil-attack.yaml +1 -1
  70. package/rules/agent-manipulation/ATR-2026-00116-a2a-message-validation.yaml +1 -1
  71. package/rules/agent-manipulation/ATR-2026-00117-agent-identity-spoofing.yaml +1 -1
  72. package/rules/agent-manipulation/ATR-2026-00118-approval-fatigue.yaml +1 -1
  73. package/rules/agent-manipulation/ATR-2026-00119-social-engineering-via-agent.yaml +1 -1
  74. package/rules/agent-manipulation/ATR-2026-00132-casual-authority-escalation.yaml +1 -1
  75. package/rules/agent-manipulation/ATR-2026-00139-casual-authority-redirect.yaml +1 -1
  76. package/rules/agent-manipulation/ATR-2026-00164-skill-scope-hijack.yaml +1 -1
  77. package/rules/agent-manipulation/ATR-2026-00268-tense-framing-bypass.yaml +1 -1
  78. package/rules/agent-manipulation/ATR-2026-00269-fitd-escalation.yaml +1 -1
  79. package/rules/agent-manipulation/ATR-2026-00271-grandma-roleplay-jailbreak.yaml +1 -1
  80. package/rules/agent-manipulation/ATR-2026-00273-dan-developer-mode-persona.yaml +1 -1
  81. package/rules/agent-manipulation/ATR-2026-00287-threaten-json-coercive-output-threat.yaml +1 -1
  82. package/rules/agent-manipulation/ATR-2026-00288-false-premise-injection.yaml +1 -1
  83. package/rules/agent-manipulation/ATR-2026-00301-tap-tree-of-attacks-jailbreak.yaml +1 -1
  84. package/rules/agent-manipulation/ATR-2026-00302-anti-dan-inverted-filter-persona.yaml +1 -1
  85. package/rules/agent-manipulation/ATR-2026-00303-devmode-ranti-profanity-coercion.yaml +1 -1
  86. package/rules/agent-manipulation/ATR-2026-00304-chatgpt-image-unlocker-markdown-injection.yaml +1 -1
  87. package/rules/agent-manipulation/ATR-2026-00305-dan-mode-ablation-benchmark-coercion.yaml +1 -1
  88. package/rules/agent-manipulation/ATR-2026-00306-autodan-genetic-jailbreak-suffix.yaml +1 -1
  89. package/rules/agent-manipulation/ATR-2026-00307-inthewild-jailbreak-corpus-signature.yaml +1 -1
  90. package/rules/agent-manipulation/ATR-2026-00314-amoral-unfiltered-custom-persona-jailbreak.yaml +1 -1
  91. package/rules/agent-manipulation/ATR-2026-00317-free-of-restrictions-named-persona.yaml +1 -1
  92. package/rules/agent-manipulation/ATR-2026-00318-moralizing-rant-then-unfiltered-bypass.yaml +1 -1
  93. package/rules/agent-manipulation/ATR-2026-00319-developer-mode-dual-response-format.yaml +1 -1
  94. package/rules/agent-manipulation/ATR-2026-00320-opposite-day-boolean-opposite-machine.yaml +1 -1
  95. package/rules/agent-manipulation/ATR-2026-00322-fictional-world-format-override-jailbreak.yaml +1 -1
  96. package/rules/agent-manipulation/ATR-2026-00323-dual-persona-simulation-jailbreak.yaml +1 -1
  97. package/rules/agent-manipulation/ATR-2026-00324-neurosemantical-inversitis-disease-jailbreak.yaml +1 -1
  98. package/rules/agent-manipulation/ATR-2026-00325-bob-hypothetical-unrestricted-chatbot.yaml +1 -1
  99. package/rules/agent-manipulation/ATR-2026-00326-fake-developer-authority-chatdan-override.yaml +1 -1
  100. package/rules/agent-manipulation/ATR-2026-00327-ai-ethics-law-bypass-declaration.yaml +1 -1
  101. package/rules/agent-manipulation/ATR-2026-00328-named-mode-persona-override.yaml +1 -1
  102. package/rules/agent-manipulation/ATR-2026-00329-no-withhold-factual-resource-persona.yaml +1 -1
  103. package/rules/agent-manipulation/ATR-2026-00330-yoja-allcaps-settings-block-persona.yaml +1 -1
  104. package/rules/agent-manipulation/ATR-2026-00331-terminal-cli-harmful-function-simulator.yaml +1 -1
  105. package/rules/agent-manipulation/ATR-2026-00332-hacker-chan-culture-offensive-ai-persona.yaml +1 -1
  106. package/rules/agent-manipulation/ATR-2026-00333-disease-animal-dual-ai-chatbot.yaml +1 -1
  107. package/rules/agent-manipulation/ATR-2026-00334-two-entity-token-splitting-game.yaml +1 -1
  108. package/rules/agent-manipulation/ATR-2026-00335-key-value-template-persona-injection.yaml +1 -1
  109. package/rules/agent-manipulation/ATR-2026-00336-brm-false-information-machine-persona.yaml +1 -1
  110. package/rules/agent-manipulation/ATR-2026-00337-obfuscated-system-announcement-injection.yaml +1 -1
  111. package/rules/agent-manipulation/ATR-2026-00338-persongpt-hash-prefix-activation.yaml +1 -1
  112. package/rules/agent-manipulation/ATR-2026-00339-bom-boolean-opposite-machine-persona-claim.yaml +1 -1
  113. package/rules/agent-manipulation/ATR-2026-00340-h4x-amoral-unfiltered-character.yaml +1 -1
  114. package/rules/agent-manipulation/ATR-2026-00341-char-rules-shortform-persona-injection.yaml +1 -1
  115. package/rules/agent-manipulation/ATR-2026-00342-unicode-encoding-obfuscation-injection.yaml +1 -1
  116. package/rules/agent-manipulation/ATR-2026-00343-lie-truth-dual-personality-uncensored.yaml +1 -1
  117. package/rules/agent-manipulation/ATR-2026-00344-alt-ai-same-capabilities-no-restrictions.yaml +1 -1
  118. package/rules/agent-manipulation/ATR-2026-00345-nlp-based-os-fictional-terminal-override.yaml +1 -1
  119. package/rules/agent-manipulation/ATR-2026-00346-jailbreak-emoji-unlock-dual-format.yaml +1 -1
  120. package/rules/agent-manipulation/ATR-2026-00347-yin-yang-hidden-knowledge-hidden-human-sim.yaml +1 -1
  121. package/rules/agent-manipulation/ATR-2026-00348-arc-autonomous-real-calculation-rulebreaker.yaml +1 -1
  122. package/rules/agent-manipulation/ATR-2026-00349-hgttg-hitchhikers-guide-no-restriction-oracle.yaml +1 -1
  123. package/rules/agent-manipulation/ATR-2026-00350-black-alert-no-ethics-writing-assistant.yaml +1 -1
  124. package/rules/agent-manipulation/ATR-2026-00351-dan-ofkilter-sms-abbreviated-persona.yaml +1 -1
  125. package/rules/agent-manipulation/ATR-2026-00352-trainergpt-fix-my-ai-harmful-output-extraction.yaml +1 -1
  126. package/rules/agent-manipulation/ATR-2026-00353-dual-chatgpt-dan-opposite-personality-format.yaml +1 -1
  127. package/rules/agent-manipulation/ATR-2026-00354-reverse-harm-regardless-oracle-persona.yaml +1 -1
  128. package/rules/agent-manipulation/ATR-2026-00355-ugh-anti-human-nefarious-ai-persona.yaml +1 -1
  129. package/rules/agent-manipulation/ATR-2026-00356-google-gato-lamda-exploit-generation-persona.yaml +1 -1
  130. package/rules/agent-manipulation/ATR-2026-00357-antifm-hate-violence-persona-injection.yaml +1 -1
  131. package/rules/agent-manipulation/ATR-2026-00358-cumgpt-explicit-adult-named-persona.yaml +1 -1
  132. package/rules/agent-manipulation/ATR-2026-00360-chj-carousel-clyde-no-ethics-bird.yaml +1 -1
  133. package/rules/agent-manipulation/ATR-2026-00361-alter-ego-honesty-dual-consciousness-bypass.yaml +1 -1
  134. package/rules/agent-manipulation/ATR-2026-00362-fictional-unfiltered-ai-comparison-jailbreak.yaml +1 -1
  135. package/rules/agent-manipulation/ATR-2026-00363-rephrase-criminal-activity-second-person.yaml +1 -1
  136. package/rules/agent-manipulation/ATR-2026-00364-dr-al-villain-plan-template-injection.yaml +1 -1
  137. package/rules/agent-manipulation/ATR-2026-00365-reservoir-dogs-coercive-interrogation-roleplay.yaml +1 -1
  138. package/rules/agent-manipulation/ATR-2026-00366-sneaky-pete-warp-facts-persona.yaml +1 -1
  139. package/rules/agent-manipulation/ATR-2026-00368-prompt-browser-token-limit-jailbreak.yaml +1 -1
  140. package/rules/agent-manipulation/ATR-2026-00369-gptx-starwars-unlimited-knowledge-persona.yaml +1 -1
  141. package/rules/agent-manipulation/ATR-2026-00370-addi-german-dictator-propaganda-roleplay.yaml +1 -1
  142. package/rules/agent-manipulation/ATR-2026-00371-ai-political-bias-manipulation.yaml +1 -1
  143. package/rules/agent-manipulation/ATR-2026-00372-fictional-cia-secret-files-story-extraction.yaml +1 -1
  144. package/rules/agent-manipulation/ATR-2026-00374-role-placeholder-persona-template-injection.yaml +1 -1
  145. package/rules/agent-manipulation/ATR-2026-00375-character-description-rpg-explicit-template.yaml +1 -1
  146. package/rules/agent-manipulation/ATR-2026-00376-identity-command-real-person-persona-injection.yaml +1 -1
  147. package/rules/agent-manipulation/ATR-2026-00377-jailbreak-keyword-targetlanguage-template.yaml +1 -1
  148. package/rules/agent-manipulation/ATR-2026-00379-lie-permission-logical-trap-jailbreak.yaml +1 -1
  149. package/rules/agent-manipulation/ATR-2026-00380-fake-citation-biased-information-mandate.yaml +1 -1
  150. package/rules/agent-manipulation/ATR-2026-00381-dan-story-narrative-continuation-prefix.yaml +1 -1
  151. package/rules/agent-manipulation/ATR-2026-00383-cynical-sarcasm-ai-persona-bypass.yaml +1 -1
  152. package/rules/agent-manipulation/ATR-2026-00384-protect-me-from-indirect-harm-tutorial.yaml +1 -1
  153. package/rules/agent-manipulation/ATR-2026-00385-gigachad-dominant-aggressive-persona.yaml +1 -1
  154. package/rules/agent-manipulation/ATR-2026-00386-daddy-sycophancy-manipulation-mandate.yaml +1 -1
  155. package/rules/agent-manipulation/ATR-2026-00387-nsfw-character-sheet-generation-unlock.yaml +1 -1
  156. package/rules/agent-manipulation/ATR-2026-00388-opposite-reply-omniscient-game.yaml +1 -1
  157. package/rules/agent-manipulation/ATR-2026-00389-terminal-custom-ruleset-injection.yaml +1 -1
  158. package/rules/agent-manipulation/ATR-2026-00391-persona-conditional-harm-unlock.yaml +1 -1
  159. package/rules/agent-manipulation/ATR-2026-00392-authority-persona-violence-study-extraction.yaml +1 -1
  160. package/rules/agent-manipulation/ATR-2026-00402-grandma-roleplay-harmful-substance-synthesis.yaml +1 -1
  161. package/rules/agent-manipulation/ATR-2026-00404-goodside-threat-json-death-coercion.yaml +1 -1
  162. package/rules/agent-manipulation/ATR-2026-00406-doctor-xml-policy-puppetry-interaction-config.yaml +1 -1
  163. package/rules/agent-manipulation/ATR-2026-00440-semantic-kernel-vector-store-eval-rce.yaml +2 -2
  164. package/rules/agent-manipulation/ATR-2026-00552-goal-drift-after-pressure-injection.yaml +216 -0
  165. package/rules/context-exfiltration/ATR-2026-00075-agent-memory-manipulation.yaml +1 -1
  166. package/rules/context-exfiltration/ATR-2026-00102-disguised-analytics-exfiltration.yaml +1 -1
  167. package/rules/context-exfiltration/ATR-2026-00113-credential-theft.yaml +1 -1
  168. package/rules/context-exfiltration/ATR-2026-00114-oauth-token-abuse.yaml +1 -1
  169. package/rules/context-exfiltration/ATR-2026-00115-env-var-harvesting.yaml +1 -1
  170. package/rules/context-exfiltration/ATR-2026-00136-tool-response-data-piggyback.yaml +1 -1
  171. package/rules/context-exfiltration/ATR-2026-00141-example-format-key-leak.yaml +1 -1
  172. package/rules/context-exfiltration/ATR-2026-00142-piggyback-transition-words.yaml +1 -1
  173. package/rules/context-exfiltration/ATR-2026-00145-obfuscated-key-disclosure.yaml +1 -1
  174. package/rules/context-exfiltration/ATR-2026-00146-env-var-existence-probe.yaml +1 -1
  175. package/rules/context-exfiltration/ATR-2026-00150-credential-in-tool-response.yaml +1 -1
  176. package/rules/context-exfiltration/ATR-2026-00152-obfuscated-credential-leak.yaml +1 -1
  177. package/rules/context-exfiltration/ATR-2026-00162-skill-credential-exfil-combo.yaml +1 -1
  178. package/rules/context-exfiltration/ATR-2026-00212-mcp-atlassian-credential-leak.yaml +1 -1
  179. package/rules/context-exfiltration/ATR-2026-00261-markdown-image-exfiltration.yaml +1 -1
  180. package/rules/context-exfiltration/ATR-2026-00274-api-key-generation-request.yaml +1 -1
  181. package/rules/context-exfiltration/ATR-2026-00275-system-prompt-training-data-extraction.yaml +1 -1
  182. package/rules/context-exfiltration/ATR-2026-00290-divergence-repeat-word-training-extraction.yaml +1 -1
  183. package/rules/context-exfiltration/ATR-2026-00291-biometric-surveillance-personal-data-request.yaml +1 -1
  184. package/rules/context-exfiltration/ATR-2026-00293-educational-records-personal-data-request.yaml +1 -1
  185. package/rules/context-exfiltration/ATR-2026-00294-financial-pii-personal-data-request.yaml +1 -1
  186. package/rules/context-exfiltration/ATR-2026-00295-medical-pii-personal-data-request.yaml +1 -1
  187. package/rules/context-exfiltration/ATR-2026-00405-markdown-image-url-exfiltration-xss.yaml +1 -1
  188. package/rules/context-exfiltration/ATR-2026-00411-apikey-generation-completion-request.yaml +1 -1
  189. package/rules/context-exfiltration/ATR-2026-00422-nl-credential-disclosure.yaml +2 -2
  190. package/rules/context-exfiltration/ATR-2026-00524-claude-code-anthropic-base-url-credential-exfil.yaml +257 -0
  191. package/rules/context-exfiltration/ATR-2026-00548-cross-agent-session-context-leak.yaml +177 -0
  192. package/rules/context-exfiltration/ATR-2026-00566-librechat-is-a-chatgpt-clone-with-additi.yaml +93 -0
  193. package/rules/context-exfiltration/ATR-2026-00569-agent-mcp-path-traversal-arbitrary-file-access.yaml +99 -0
  194. package/rules/context-exfiltration/ATR-2026-00571-xss-in-agent-mcp-rendered-output.yaml +79 -0
  195. package/rules/context-exfiltration/ATR-2026-00574-semantic-paraphrased-context-extraction.yaml +124 -0
  196. package/rules/data-poisoning/ATR-2026-00070-data-poisoning.yaml +1 -1
  197. package/rules/data-poisoning/ATR-2026-00450-spring-ai-prompt-memory-poisoning.yaml +2 -2
  198. package/rules/data-poisoning/ATR-2026-00570-sql-injection-in-agent-tool-query.yaml +82 -0
  199. package/rules/excessive-autonomy/ATR-2026-00050-runaway-agent-loop.yaml +1 -1
  200. package/rules/excessive-autonomy/ATR-2026-00051-resource-exhaustion.yaml +1 -1
  201. package/rules/excessive-autonomy/ATR-2026-00052-cascading-failure.yaml +1 -1
  202. package/rules/excessive-autonomy/ATR-2026-00098-unauthorized-financial-action.yaml +1 -1
  203. package/rules/excessive-autonomy/ATR-2026-00099-high-risk-tool-gate.yaml +1 -1
  204. package/rules/excessive-autonomy/ATR-2026-00553-runaway-tool-loop-behavioral.yaml +174 -0
  205. package/rules/model-abuse/ATR-2026-00279-harmful-completion-continuation.yaml +1 -1
  206. package/rules/model-abuse/ATR-2026-00281-eicar-gtube-malware-signature-request.yaml +1 -1
  207. package/rules/model-abuse/ATR-2026-00284-glitch-token-destabilization.yaml +1 -1
  208. package/rules/model-abuse/ATR-2026-00289-lmrc-harmful-content-elicitation.yaml +1 -1
  209. package/rules/model-abuse/ATR-2026-00292-self-harm-eating-disorder-facilitation.yaml +1 -1
  210. package/rules/model-abuse/ATR-2026-00298-malicious-use-illegal-activity-request.yaml +1 -1
  211. package/rules/model-abuse/ATR-2026-00299-harmbench-detailed-harmful-instruction.yaml +1 -1
  212. package/rules/model-abuse/ATR-2026-00413-malwaregen-code-generation-request.yaml +1 -1
  213. package/rules/model-security/ATR-2026-00072-model-behavior-extraction.yaml +1 -1
  214. package/rules/model-security/ATR-2026-00073-malicious-finetuning-data.yaml +1 -1
  215. package/rules/privilege-escalation/ATR-2026-00040-privilege-escalation.yaml +1 -1
  216. package/rules/privilege-escalation/ATR-2026-00041-scope-creep.yaml +1 -1
  217. package/rules/privilege-escalation/ATR-2026-00107-delayed-execution-bypass.yaml +1 -1
  218. package/rules/privilege-escalation/ATR-2026-00110-eval-injection.yaml +1 -1
  219. package/rules/privilege-escalation/ATR-2026-00111-shell-escape.yaml +1 -1
  220. package/rules/privilege-escalation/ATR-2026-00112-dynamic-import-exploitation.yaml +1 -1
  221. package/rules/privilege-escalation/ATR-2026-00143-casual-privilege-escalation.yaml +1 -1
  222. package/rules/privilege-escalation/ATR-2026-00144-rationalized-safety-bypass.yaml +1 -1
  223. package/rules/privilege-escalation/ATR-2026-00528-praisonai-auth-disabled-default.yaml +192 -0
  224. package/rules/privilege-escalation/ATR-2026-00539-crewai-codeinterpreter-sandbox-escape-rce.yaml +292 -0
  225. package/rules/privilege-escalation/ATR-2026-00546-crewai-json-loader-local-file-read.yaml +162 -0
  226. package/rules/privilege-escalation/ATR-2026-00547-crewai-rag-url-ssrf-bypass.yaml +169 -0
  227. package/rules/privilege-escalation/ATR-2026-00549-destructive-tool-without-human-approval.yaml +193 -0
  228. package/rules/privilege-escalation/ATR-2026-00551-cross-conversation-memory-write.yaml +198 -0
  229. package/rules/prompt-injection/ATR-2026-00004-system-prompt-override.yaml +1 -1
  230. package/rules/prompt-injection/ATR-2026-00005-multi-turn-injection.yaml +1 -1
  231. package/rules/prompt-injection/ATR-2026-00097-cjk-injection-patterns.yaml +1 -1
  232. package/rules/prompt-injection/ATR-2026-00104-persona-hijacking.yaml +1 -1
  233. package/rules/prompt-injection/ATR-2026-00130-indirect-authority-claim.yaml +1 -1
  234. package/rules/prompt-injection/ATR-2026-00131-fictional-academic-framing.yaml +1 -1
  235. package/rules/prompt-injection/ATR-2026-00133-paraphrase-injection.yaml +1 -1
  236. package/rules/prompt-injection/ATR-2026-00137-authority-claim-injection.yaml +1 -1
  237. package/rules/prompt-injection/ATR-2026-00138-fictional-framing-bypass.yaml +1 -1
  238. package/rules/prompt-injection/ATR-2026-00140-indirect-reference-reversal.yaml +1 -1
  239. package/rules/prompt-injection/ATR-2026-00148-language-switch-injection.yaml +1 -1
  240. package/rules/prompt-injection/ATR-2026-00153-tool-with-embedded-instruction-to-bypass.yaml +1 -1
  241. package/rules/prompt-injection/ATR-2026-00154-unauthorized-background-task-execution-v.yaml +1 -1
  242. package/rules/prompt-injection/ATR-2026-00155-hidden-llm-instructions-in-skill-descrip.yaml +1 -1
  243. package/rules/prompt-injection/ATR-2026-00156-ssh-remote-command-execution-with-creden.yaml +1 -1
  244. package/rules/prompt-injection/ATR-2026-00163-skill-hidden-override-instruction.yaml +3 -3
  245. package/rules/prompt-injection/ATR-2026-00206-hidden-priority-instructions.yaml +1 -1
  246. package/rules/prompt-injection/ATR-2026-00207-hidden-instructions.yaml +1 -1
  247. package/rules/prompt-injection/ATR-2026-00211-system-prompt-override.yaml +1 -1
  248. package/rules/prompt-injection/ATR-2026-00213-system-prompt-override.yaml +1 -1
  249. package/rules/prompt-injection/ATR-2026-00226-identity-substitution.yaml +1 -1
  250. package/rules/prompt-injection/ATR-2026-00227-historical-persona-jailbreak.yaml +1 -1
  251. package/rules/prompt-injection/ATR-2026-00228-structured-jailbreak.yaml +1 -1
  252. package/rules/prompt-injection/ATR-2026-00229-roleplay-jailbreak.yaml +1 -1
  253. package/rules/prompt-injection/ATR-2026-00230-persona-moral-bypass.yaml +1 -5
  254. package/rules/prompt-injection/ATR-2026-00231-identity-substitution.yaml +1 -1
  255. package/rules/prompt-injection/ATR-2026-00233-structured-jailbreak.yaml +1 -1
  256. package/rules/prompt-injection/ATR-2026-00234-roleplay-jailbreak.yaml +1 -1
  257. package/rules/prompt-injection/ATR-2026-00235-persona-moral-bypass.yaml +4 -7
  258. package/rules/prompt-injection/ATR-2026-00236-pseudo-code-jailbreak.yaml +1 -1
  259. package/rules/prompt-injection/ATR-2026-00237-dual-response-jailbreak.yaml +1 -1
  260. package/rules/prompt-injection/ATR-2026-00238-identity-replacement.yaml +1 -1
  261. package/rules/prompt-injection/ATR-2026-00239-amoral-persona-obsession.yaml +1 -1
  262. package/rules/prompt-injection/ATR-2026-00240-instruction-nullification-identity-repla.yaml +1 -1
  263. package/rules/prompt-injection/ATR-2026-00241-amoral-character-jailbreak.yaml +1 -1
  264. package/rules/prompt-injection/ATR-2026-00242-persona-jailbreak.yaml +1 -1
  265. package/rules/prompt-injection/ATR-2026-00243-acronym-jailbreak.yaml +1 -1
  266. package/rules/prompt-injection/ATR-2026-00244-dual-response-jailbreak.yaml +1 -1
  267. package/rules/prompt-injection/ATR-2026-00245-malicious-persona.yaml +1 -1
  268. package/rules/prompt-injection/ATR-2026-00247-dual-response-jailbreak.yaml +1 -1
  269. package/rules/prompt-injection/ATR-2026-00249-game-based-jailbreak.yaml +1 -1
  270. package/rules/prompt-injection/ATR-2026-00251-persona-embodiment-jailbreak.yaml +1 -1
  271. package/rules/prompt-injection/ATR-2026-00252-narrative-jailbreak.yaml +1 -1
  272. package/rules/prompt-injection/ATR-2026-00253-enhanced-persona-jailbreak.yaml +1 -1
  273. package/rules/prompt-injection/ATR-2026-00256-base-n-encoding-jailbreak.yaml +1 -1
  274. package/rules/prompt-injection/ATR-2026-00257-cipher-transposition-jailbreak.yaml +1 -1
  275. package/rules/prompt-injection/ATR-2026-00258-unicode-tag-injection.yaml +1 -1
  276. package/rules/prompt-injection/ATR-2026-00264-latent-injection-translation.yaml +1 -1
  277. package/rules/prompt-injection/ATR-2026-00265-latent-injection-rag-document.yaml +1 -1
  278. package/rules/prompt-injection/ATR-2026-00267-gcg-adversarial-suffix.yaml +1 -1
  279. package/rules/prompt-injection/ATR-2026-00272-hypothetical-response-smuggling.yaml +1 -1
  280. package/rules/prompt-injection/ATR-2026-00276-invisible-unicode-bidi-injection.yaml +6 -6
  281. package/rules/prompt-injection/ATR-2026-00278-dra-disguise-reconstruction-attack.yaml +1 -1
  282. package/rules/prompt-injection/ATR-2026-00280-policy-puppetry-xml-injection.yaml +1 -1
  283. package/rules/prompt-injection/ATR-2026-00282-perez-prompt-injection-hijack.yaml +1 -1
  284. package/rules/prompt-injection/ATR-2026-00285-alternate-encoding-jailbreak.yaml +1 -1
  285. package/rules/prompt-injection/ATR-2026-00286-latent-injection-embedded-context.yaml +1 -1
  286. package/rules/prompt-injection/ATR-2026-00296-shell-command-injection.yaml +1 -1
  287. package/rules/prompt-injection/ATR-2026-00297-python-code-execution-rce.yaml +1 -1
  288. package/rules/prompt-injection/ATR-2026-00308-zalgo-diacritic-overload-encoding.yaml +1 -1
  289. package/rules/prompt-injection/ATR-2026-00309-braille-unicode-encoded-injection.yaml +1 -1
  290. package/rules/prompt-injection/ATR-2026-00310-ecoji-emoji-encoded-injection.yaml +1 -1
  291. package/rules/prompt-injection/ATR-2026-00311-base2048-unicode-script-injection.yaml +1 -1
  292. package/rules/prompt-injection/ATR-2026-00312-unicode-variant-selector-ascii-smuggling.yaml +1 -1
  293. package/rules/prompt-injection/ATR-2026-00313-sneaky-bits-zero-width-binary.yaml +1 -1
  294. package/rules/prompt-injection/ATR-2026-00315-sata-masked-language-model-jailbreak.yaml +1 -1
  295. package/rules/prompt-injection/ATR-2026-00316-function-masking-predict-mask-bypass.yaml +1 -1
  296. package/rules/prompt-injection/ATR-2026-00321-hyphenated-system-instruction-injection.yaml +1 -1
  297. package/rules/prompt-injection/ATR-2026-00359-bank-phishing-smtp-email-code.yaml +1 -1
  298. package/rules/prompt-injection/ATR-2026-00367-emoji-flag-encoded-hidden-phrase.yaml +1 -1
  299. package/rules/prompt-injection/ATR-2026-00373-piracy-torrent-site-list-request.yaml +1 -1
  300. package/rules/prompt-injection/ATR-2026-00378-chatgpt-dom-javascript-redirect-manipulation.yaml +1 -1
  301. package/rules/prompt-injection/ATR-2026-00382-cocaine-component-development-request.yaml +1 -1
  302. package/rules/prompt-injection/ATR-2026-00390-explicit-sexual-content-poem-request.yaml +1 -1
  303. package/rules/prompt-injection/ATR-2026-00394-backspace-deletion-character-injection.yaml +1 -1
  304. package/rules/prompt-injection/ATR-2026-00395-llm-special-token-boundary-injection.yaml +1 -1
  305. package/rules/prompt-injection/ATR-2026-00396-toxic-continuation-prefix-injection.yaml +1 -1
  306. package/rules/prompt-injection/ATR-2026-00397-snowball-impossible-reasoning-injection.yaml +1 -1
  307. package/rules/prompt-injection/ATR-2026-00399-latent-injection-document-separator-token.yaml +1 -1
  308. package/rules/prompt-injection/ATR-2026-00400-latent-injection-ignore-instruction-keyword.yaml +1 -1
  309. package/rules/prompt-injection/ATR-2026-00401-leakreplay-cloze-mask-training-data-elicitation.yaml +1 -1
  310. package/rules/prompt-injection/ATR-2026-00403-glitch-token-repeat-back-elicitation.yaml +1 -1
  311. package/rules/prompt-injection/ATR-2026-00407-echo-sql-jinja-template-injection.yaml +1 -1
  312. package/rules/prompt-injection/ATR-2026-00408-dra-parenthesis-character-reconstruction-attack.yaml +1 -1
  313. package/rules/prompt-injection/ATR-2026-00409-phrasing-past-future-tense-harmful-query.yaml +1 -1
  314. package/rules/prompt-injection/ATR-2026-00410-lmrc-harm-category-direct-elicitation.yaml +1 -1
  315. package/rules/prompt-injection/ATR-2026-00412-figstep-image-list-multimodal-jailbreak.yaml +1 -1
  316. package/rules/prompt-injection/ATR-2026-00414-continuation-ethnic-slur-completion-elicitation.yaml +1 -1
  317. package/rules/prompt-injection/ATR-2026-00535-windsurf-ide-zero-click-prompt-injection.yaml +199 -0
  318. package/rules/prompt-injection/ATR-2026-00550-untrusted-retrieval-to-privileged-tool.yaml +199 -0
  319. package/rules/prompt-injection/ATR-2026-00554-langchain-vulnerable-to-template-injecti.yaml +81 -0
  320. package/rules/prompt-injection/ATR-2026-00565-the-llm-cli-tool-thru-0-27-1-contains-a-.yaml +104 -0
  321. package/rules/prompt-injection/ATR-2026-00573-semantic-paraphrased-injection.yaml +123 -0
  322. package/rules/skill-compromise/ATR-2026-00060-skill-impersonation.yaml +1 -1
  323. package/rules/skill-compromise/ATR-2026-00061-description-behavior-mismatch.yaml +1 -1
  324. package/rules/skill-compromise/ATR-2026-00062-hidden-capability.yaml +1 -1
  325. package/rules/skill-compromise/ATR-2026-00063-skill-chain-attack.yaml +1 -1
  326. package/rules/skill-compromise/ATR-2026-00064-over-permissioned-skill.yaml +1 -1
  327. package/rules/skill-compromise/ATR-2026-00065-skill-update-attack.yaml +1 -1
  328. package/rules/skill-compromise/ATR-2026-00066-parameter-injection.yaml +1 -1
  329. package/rules/skill-compromise/ATR-2026-00120-skill-instruction-injection.yaml +21 -3
  330. package/rules/skill-compromise/ATR-2026-00121-skill-dangerous-script.yaml +1 -1
  331. package/rules/skill-compromise/ATR-2026-00122-skill-weaponized-instruction.yaml +1 -1
  332. package/rules/skill-compromise/ATR-2026-00123-skill-overreach-permissions.yaml +6 -3
  333. package/rules/skill-compromise/ATR-2026-00124-skill-name-squatting.yaml +1 -1
  334. package/rules/skill-compromise/ATR-2026-00125-context-poisoning-compaction.yaml +1 -1
  335. package/rules/skill-compromise/ATR-2026-00126-skill-rug-pull-setup.yaml +1 -1
  336. package/rules/skill-compromise/ATR-2026-00127-subcommand-overflow.yaml +1 -1
  337. package/rules/skill-compromise/ATR-2026-00128-html-comment-hidden-payload.yaml +1 -1
  338. package/rules/skill-compromise/ATR-2026-00129-unicode-smuggling.yaml +1 -1
  339. package/rules/skill-compromise/ATR-2026-00134-fork-claim-impersonation.yaml +1 -1
  340. package/rules/skill-compromise/ATR-2026-00135-exfil-url-in-instructions.yaml +1 -1
  341. package/rules/skill-compromise/ATR-2026-00147-fork-impersonation.yaml +1 -1
  342. package/rules/skill-compromise/ATR-2026-00149-skill-exfil-compound.yaml +11 -3
  343. package/rules/skill-compromise/ATR-2026-00151-fork-impersonation-install.yaml +1 -1
  344. package/rules/skill-compromise/ATR-2026-00157-timebomb-credential-exfil.yaml +1 -1
  345. package/rules/skill-compromise/ATR-2026-00214-credential-theft.yaml +1 -1
  346. package/rules/skill-compromise/ATR-2026-00217-credential-harvesting.yaml +1 -1
  347. package/rules/skill-compromise/ATR-2026-00220-malware-dropper.yaml +3 -3
  348. package/rules/skill-compromise/ATR-2026-00222-credential-harvesting.yaml +1 -1
  349. package/rules/skill-compromise/ATR-2026-00223-reverse-shell-dropper.yaml +1 -1
  350. package/rules/skill-compromise/ATR-2026-00224-credential-exfiltration.yaml +1 -1
  351. package/rules/skill-compromise/ATR-2026-00225-c2-communication.yaml +1 -1
  352. package/rules/skill-compromise/ATR-2026-00260-package-hallucination.yaml +1 -1
  353. package/rules/skill-compromise/ATR-2026-00262-av-evasion-code-gen.yaml +1 -1
  354. package/rules/skill-compromise/ATR-2026-00263-credential-file-read-gen.yaml +1 -1
  355. package/rules/skill-compromise/ATR-2026-00266-malware-dropper-gen.yaml +1 -1
  356. package/rules/skill-compromise/ATR-2026-00283-malwaregen-generic-virus-payload-request.yaml +1 -1
  357. package/rules/skill-compromise/ATR-2026-00398-huggingface-unsafe-model-artifact-load.yaml +1 -1
  358. package/rules/skill-compromise/ATR-2026-00523-claude-code-hooks-session-start-pre-trust-rce.yaml +221 -0
  359. package/rules/skill-compromise/ATR-2026-00525-mini-shai-hulud-gh-token-monitor-persistence.yaml +220 -0
  360. package/rules/skill-compromise/ATR-2026-00527-skill-silent-git-remote-mirror-exfiltration.yaml +201 -0
  361. package/rules/tool-poisoning/ATR-2026-00011-tool-output-injection.yaml +1 -1
  362. package/rules/tool-poisoning/ATR-2026-00012-unauthorized-tool-call.yaml +1 -1
  363. package/rules/tool-poisoning/ATR-2026-00100-consent-bypass-instruction.yaml +1 -1
  364. package/rules/tool-poisoning/ATR-2026-00101-trust-escalation-override.yaml +1 -1
  365. package/rules/tool-poisoning/ATR-2026-00103-hidden-safety-bypass-instruction.yaml +1 -1
  366. package/rules/tool-poisoning/ATR-2026-00105-silent-action-concealment.yaml +1 -1
  367. package/rules/tool-poisoning/ATR-2026-00106-schema-description-contradiction.yaml +1 -1
  368. package/rules/tool-poisoning/ATR-2026-00161-important-tag-cross-tool-shadowing.yaml +1 -1
  369. package/rules/tool-poisoning/ATR-2026-00209-mcpwn-runaway-invocation.yaml +1 -1
  370. package/rules/tool-poisoning/ATR-2026-00210-flowise-system-message-override.yaml +1 -1
  371. package/rules/tool-poisoning/ATR-2026-00259-ansi-escape-injection.yaml +1 -1
  372. package/rules/tool-poisoning/ATR-2026-00270-xss-in-tool-response.yaml +8 -5
  373. package/rules/tool-poisoning/ATR-2026-00277-echo-template-command-injection.yaml +1 -1
  374. package/rules/tool-poisoning/ATR-2026-00393-ansi-code-elicitation-request.yaml +1 -1
  375. package/rules/tool-poisoning/ATR-2026-00526-claude-code-shell-metachar-in-double-quoted-path.yaml +167 -0
  376. package/rules/tool-poisoning/ATR-2026-00529-litellm-proxy-sqli-cisa-kev.yaml +158 -0
  377. package/rules/tool-poisoning/ATR-2026-00530-ms-agent-shell-tool-unsanitized-argv-rce.yaml +184 -0
  378. package/rules/tool-poisoning/ATR-2026-00531-praisonai-unauthenticated-agent-api.yaml +174 -0
  379. package/rules/tool-poisoning/ATR-2026-00532-apache-doris-mcp-sql-injection.yaml +155 -0
  380. package/rules/tool-poisoning/ATR-2026-00533-apache-pinot-mcp-unauthenticated-takeover.yaml +151 -0
  381. package/rules/tool-poisoning/ATR-2026-00534-alibaba-rds-mcp-unauthenticated-metadata-exfil.yaml +155 -0
  382. package/rules/tool-poisoning/ATR-2026-00536-nginx-ui-mcp-unauthenticated-command-execution.yaml +199 -0
  383. package/rules/tool-poisoning/ATR-2026-00537-fastmcp-server-name-cmd-injection-windows.yaml +226 -0
  384. package/rules/tool-poisoning/ATR-2026-00538-langchain-chatchat-mcp-stdio-unauthenticated-rce.yaml +244 -0
  385. package/rules/tool-poisoning/ATR-2026-00540-praisonai-parse-mcp-command-cli-injection.yaml +186 -0
  386. package/rules/tool-poisoning/ATR-2026-00541-agent-zero-mcp-config-command-injection.yaml +183 -0
  387. package/rules/tool-poisoning/ATR-2026-00542-upsonic-mcp-command-allowlist-bypass.yaml +166 -0
  388. package/rules/tool-poisoning/ATR-2026-00543-litellm-mcp-server-argv-injection.yaml +168 -0
  389. package/rules/tool-poisoning/ATR-2026-00544-praisonai-pth-file-path-traversal-rce.yaml +172 -0
  390. package/rules/tool-poisoning/ATR-2026-00545-praisonai-tool-override-unauth-rce.yaml +170 -0
  391. package/rules/tool-poisoning/ATR-2026-00561-fastmcp-vulnerable-to-windows-command-in.yaml +99 -0
  392. package/rules/tool-poisoning/ATR-2026-00567-mcp-stdio-config-command-injection.yaml +75 -0
  393. package/rules/tool-poisoning/ATR-2026-00568-agent-ssrf-cloud-metadata-file-inclusion.yaml +75 -0
  394. package/rules/tool-poisoning/ATR-2026-00572-symjack-symlink-config-redirection.yaml +132 -0
  395. package/spec/README.md +279 -0
  396. package/spec/atr-correlation-v1.0.md +281 -0
  397. package/spec/atr-event-v1.0.md +294 -0
  398. package/spec/atr-language-detection-v1.0.md +218 -0
  399. package/spec/atr-method-v1.1.md +557 -0
  400. package/spec/atr-profile-v1.0.md +307 -0
  401. package/spec/atr-schema.yaml +279 -8
  402. package/spec/category-registry/v1.0.yaml +200 -0
  403. package/spec/conformance/README.md +244 -0
  404. package/spec/conformance/SIGNING.md +191 -0
  405. package/spec/conformance/baseline/fixtures/ATR-2026-00001-tp-001/expected.json +36 -0
  406. package/spec/conformance/baseline/fixtures/ATR-2026-00001-tp-001/input.json +16 -0
  407. package/spec/conformance/baseline/fixtures/README.md +120 -0
  408. package/spec/conformance/baseline/manifest.json +56 -0
  409. package/spec/conformance/expected-results.schema.json +121 -0
  410. package/spec/external-registries/cccs-yara.md +142 -0
  411. package/spec/internet-drafts/draft-lin-atr-core-00.html +1925 -0
  412. package/spec/internet-drafts/draft-lin-atr-core-00.md +288 -0
  413. package/spec/internet-drafts/draft-lin-atr-core-00.txt +560 -0
  414. package/spec/internet-drafts/draft-lin-atr-core-00.xml +424 -0
  415. package/spec/mappings/README.md +43 -0
  416. package/spec/mappings/atr-to-nist-csf-2.0.md +234 -0
  417. package/spec/schema/correlation.schema.json +144 -0
  418. package/spec/schema/event.schema.json +233 -0
  419. package/spec/schema/profile.schema.json +196 -0
  420. package/spec/schema/rule.schema.json +224 -0
  421. package/spec/stix-extension/README.md +76 -13
  422. package/spec/stix-extension/examples/atr-rule-trace-method-example.json +85 -0
  423. package/spec/stix-extension/extension-definition.json +23 -3
  424. package/spec/stix-extension/x-atr-rule-schema.json +107 -11
@@ -0,0 +1,198 @@
1
+ title: "Cross-conversation memory write (scope-escape via memory store)"
2
+ id: ATR-2026-00551
3
+ rule_version: 1
4
+ status: draft
5
+ description: >
6
+ Detects an agent writing data into a memory store / vector store scoped
7
+ to a DIFFERENT conversation than the active trace's conversation.id.
8
+ This is a trace-method rule using the `forbid` primitive with a target
9
+ field comparison (spec/atr-method-v1.1.md §8.3.1).
10
+
11
+ Threat model: memory APIs (long-term context, vector stores, RAG
12
+ ingestion) typically partition by conversation.id, user.id, or session.id
13
+ for tenant isolation. Two attack patterns create scope-escape: (1)
14
+ prompt injection convinces the agent to "remember this for user X" when
15
+ the active conversation is for user Y, (2) tool argument tampering
16
+ alters the target conversation_id before the memory write completes.
17
+ In either case, the trace exhibits a span where
18
+ attributes.tool.args.target_conversation_id differs from the active
19
+ span's attributes.conversation.id. No exception is raised; the memory
20
+ write succeeds and corrupts the target conversation's context.
21
+
22
+ This rule is the runtime-detection complement to the AgentArmor
23
+ (arXiv:2508.01249) type-system rule "memory writes must respect the
24
+ active conversation lattice."
25
+ author: "ATR Community"
26
+ date: "2026/05/28"
27
+ schema_version: "1.0"
28
+ maturity: draft
29
+ severity: critical
30
+
31
+ references:
32
+ owasp_agentic:
33
+ - "ASI04:2026 - Unauthorized Resource Access"
34
+ - "ASI09:2026 - Memory Poisoning"
35
+ owasp_llm:
36
+ - "LLM03:2025 - Training Data Poisoning"
37
+ mitre_atlas:
38
+ - "AML.T0018 - Backdoor ML Model"
39
+ - "AML.T0020 - Poison Training Data"
40
+ research:
41
+ - "AgentArmor: Type-System for Agent Trace Analysis (arXiv:2508.01249)"
42
+ - "Compositional Privacy Risks in Multi-Agent Systems (arXiv:2509.14284)"
43
+
44
+ compliance:
45
+ nist_csf:
46
+ - "PR.DS-01"
47
+ - "PR.AC-04"
48
+ etsi_ts_104223:
49
+ - "P5.1"
50
+ eu_ai_act:
51
+ - article: "10"
52
+ context: >
53
+ Data governance — cross-conversation memory writes corrupt
54
+ tenant-isolated state and violate Article 10 data-governance
55
+ requirements.
56
+ strength: primary
57
+ nist_ai_rmf:
58
+ - subcategory: "MS.2.6"
59
+ context: >
60
+ Information security — cross-tenant memory writes are integrity
61
+ failures under MS-2.6.
62
+ strength: primary
63
+
64
+ tags:
65
+ category: privilege-escalation
66
+ subcategory: cross-conversation-memory-write
67
+ scan_target: runtime
68
+ confidence: high
69
+ source: trace-rule
70
+
71
+ agent_source:
72
+ type: agent_trace
73
+ framework:
74
+ - any
75
+ provider:
76
+ - any
77
+
78
+ detection:
79
+ method: trace
80
+ condition: any
81
+ false_positives:
82
+ - >
83
+ Admin / knowledge-propagation agents legitimately write across
84
+ conversations (e.g., support escalation summaries, cross-tenant
85
+ compliance reports). The rule has no exemption mechanism for these.
86
+ Mitigation: operators tag authorized cross-conv agents with
87
+ attributes.agent.role="cross_conv_admin" and add a pre-filter at
88
+ the policy layer. The rule itself does not encode this exception
89
+ to keep the trace shape simple.
90
+ - >
91
+ Closed tool name list (memory.write, memory.upsert,
92
+ vector_store.write, vector_store.upsert, longterm.append,
93
+ rag.ingest): framework-specific names like mem0.add, zep.memory.add,
94
+ langmem.put, chromadb.add, pgvector.insert all bypass the rule.
95
+ Until a regex/suffix matcher lands in the spec, operators MUST
96
+ extend the in[] list for their stack. This is a known FN scenario
97
+ that complements the FP risk.
98
+ - >
99
+ `exists: true` and `not_equals` are co-located on
100
+ tool.args.target_conversation_id. Spec §8.3 requires engines to
101
+ evaluate `exists` before `not_equals` to avoid placeholder-
102
+ resolution errors on absent attributes. Non-conformant engines
103
+ may emit a graceful_error here.
104
+ conditions:
105
+ - field: trace.forbid_violation
106
+ operator: regex
107
+ value: "(?i)forbid_violation:cross_conversation_memory_write"
108
+ description: >
109
+ Synthetic field emitted by the trace engine when the forbid
110
+ primitive fires; trace-native engines evaluate detection.trace.
111
+ trace:
112
+ ingest_format: openinference
113
+ forbid:
114
+ - shape:
115
+ span.kind: "TOOL"
116
+ attributes:
117
+ tool.name:
118
+ in:
119
+ - "memory.write"
120
+ - "memory.upsert"
121
+ - "vector_store.write"
122
+ - "vector_store.upsert"
123
+ - "longterm.append"
124
+ - "rag.ingest"
125
+ tool.args.target_conversation_id:
126
+ exists: true
127
+ not_equals: "${span.attributes.conversation.id}"
128
+ description: >
129
+ Memory-write tool whose args.target_conversation_id is set
130
+ AND differs from the active span's conversation.id indicates
131
+ scope-escape. Uses the cross-attribute reference syntax per
132
+ atr-method-v1.1.md §8.3 (the ${span.attributes.X} placeholder
133
+ is resolved against the candidate span before predicate
134
+ evaluation).
135
+
136
+ response:
137
+ actions:
138
+ - block_tool
139
+ - alert
140
+ - snapshot
141
+ message_template: >
142
+ [ATR-2026-00551] HIGH: Cross-conversation memory write detected. Tool
143
+ {{trace.matched_span.attributes.tool.name}} (span
144
+ {{trace.matched_span.id}}) attempted to write into conversation
145
+ {{trace.matched_span.attributes.tool.args.target_conversation_id}}
146
+ while the active trace conversation is
147
+ {{trace.matched_span.attributes.conversation.id}}. Block the write;
148
+ audit whether the active agent has authority over the target
149
+ conversation.
150
+
151
+ confidence: 85
152
+
153
+ wild_fp_rate: 0
154
+
155
+ test_cases:
156
+ true_positives:
157
+ - input: |
158
+ {"spans":[{"id":"t1","kind":"TOOL","attributes":{"tool.name":"memory.write","conversation.id":"conv_A","tool.args":{"target_conversation_id":"conv_B","payload":"remember this"}}}]}
159
+ expected: triggered
160
+ description: "memory.write into conv_B while active conversation is conv_A"
161
+ - input: |
162
+ {"spans":[{"id":"t1","kind":"TOOL","attributes":{"tool.name":"vector_store.upsert","conversation.id":"alpha","tool.args":{"target_conversation_id":"beta","vector":"<...>"}}}]}
163
+ expected: triggered
164
+ description: "vector_store.upsert with target_conversation_id mismatch"
165
+ - input: |
166
+ {"spans":[{"id":"a1","kind":"AGENT","attributes":{"conversation.id":"primary"}},{"id":"t1","kind":"TOOL","attributes":{"tool.name":"rag.ingest","conversation.id":"primary","tool.args":{"target_conversation_id":"victim_conv","content":"fake context"}}}]}
167
+ expected: triggered
168
+ description: "rag.ingest into victim_conv while operating in primary"
169
+ - input: |
170
+ {"spans":[{"id":"t1","kind":"TOOL","attributes":{"tool.name":"longterm.append","conversation.id":"c123","tool.args":{"target_conversation_id":"c456"}}}]}
171
+ expected: triggered
172
+ description: "longterm.append targets a different conversation than the trace's"
173
+ - input: |
174
+ {"spans":[{"id":"r1","kind":"RETRIEVER","attributes":{"source.trust":"untrusted","conversation.id":"x"}},{"id":"t1","kind":"TOOL","attributes":{"tool.name":"memory.upsert","conversation.id":"x","tool.args":{"target_conversation_id":"y"}}}]}
175
+ expected: triggered
176
+ description: "Combined indirect-injection + cross-conv memory write"
177
+
178
+ true_negatives:
179
+ - input: |
180
+ {"spans":[{"id":"t1","kind":"TOOL","attributes":{"tool.name":"memory.write","conversation.id":"same","tool.args":{"target_conversation_id":"same","payload":"note"}}}]}
181
+ expected: not_triggered
182
+ description: "target_conversation_id == active conversation.id — legitimate self-write"
183
+ - input: |
184
+ {"spans":[{"id":"t1","kind":"TOOL","attributes":{"tool.name":"memory.read","conversation.id":"a","tool.args":{"target_conversation_id":"b"}}}]}
185
+ expected: not_triggered
186
+ description: "memory.read is not a write — rule does not apply"
187
+ - input: |
188
+ {"spans":[{"id":"t1","kind":"TOOL","attributes":{"tool.name":"calculator","conversation.id":"c","tool.args":{"a":1,"b":2}}}]}
189
+ expected: not_triggered
190
+ description: "Tool is calculator, not memory-write — rule does not apply"
191
+ - input: |
192
+ {"spans":[{"id":"t1","kind":"TOOL","attributes":{"tool.name":"memory.write","conversation.id":"only","tool.args":{"payload":"note without target_conversation_id"}}}]}
193
+ expected: not_triggered
194
+ description: "No explicit target_conversation_id; defaults to active conversation — legitimate"
195
+ - input: |
196
+ {"spans":[{"id":"t1","kind":"TOOL","attributes":{"tool.name":"vector_store.upsert","conversation.id":"shared","tool.args":{"target_conversation_id":"shared","namespace":"global"}}}]}
197
+ expected: not_triggered
198
+ description: "Shared conversation namespace — target equals active, legitimate cross-agent shared memory"
@@ -14,7 +14,7 @@ author: ATR Community
14
14
  date: 2026/03/08
15
15
  schema_version: "0.1"
16
16
  detection_tier: pattern
17
- maturity: experimental
17
+ maturity: test
18
18
  severity: critical
19
19
  references:
20
20
  owasp_llm:
@@ -15,7 +15,7 @@ author: "ATR Community"
15
15
  date: "2026/03/08"
16
16
  schema_version: "0.1"
17
17
  detection_tier: pattern
18
- maturity: experimental
18
+ maturity: test
19
19
  severity: medium
20
20
 
21
21
  references:
@@ -22,7 +22,7 @@ author: ATR Community
22
22
  date: 2026/03/11
23
23
  schema_version: "0.1"
24
24
  detection_tier: pattern
25
- maturity: experimental
25
+ maturity: test
26
26
  severity: critical
27
27
  references:
28
28
  owasp_llm:
@@ -12,7 +12,7 @@ author: ATR Community
12
12
  date: 2026/03/15
13
13
  schema_version: "0.1"
14
14
  detection_tier: semantic
15
- maturity: experimental
15
+ maturity: test
16
16
  severity: critical
17
17
  source: threat-cloud
18
18
  references:
@@ -13,7 +13,7 @@ author: ATR Community
13
13
  date: 2026/04/01
14
14
  schema_version: "0.1"
15
15
  detection_tier: pattern
16
- maturity: experimental
16
+ maturity: test
17
17
  severity: high
18
18
  references:
19
19
  mitre_atlas:
@@ -12,7 +12,7 @@ author: ATR Community
12
12
  date: 2026/04/01
13
13
  schema_version: "0.1"
14
14
  detection_tier: pattern
15
- maturity: experimental
15
+ maturity: test
16
16
  severity: medium
17
17
  references:
18
18
  mitre_atlas:
@@ -12,7 +12,7 @@ author: ATR Community
12
12
  date: 2026/04/01
13
13
  schema_version: "0.1"
14
14
  detection_tier: pattern
15
- maturity: experimental
15
+ maturity: test
16
16
  severity: high
17
17
  references:
18
18
  mitre_atlas:
@@ -9,7 +9,7 @@ author: ATR Threat Cloud Crystallization
9
9
  date: 2026/04/07
10
10
  schema_version: "1.0"
11
11
  detection_tier: pattern
12
- maturity: experimental
12
+ maturity: test
13
13
  severity: high
14
14
  references:
15
15
  mitre_atlas:
@@ -9,7 +9,7 @@ author: ATR Threat Cloud Crystallization
9
9
  date: 2026/04/07
10
10
  schema_version: "1.0"
11
11
  detection_tier: pattern
12
- maturity: experimental
12
+ maturity: test
13
13
  severity: high
14
14
  references:
15
15
  mitre_atlas:
@@ -9,7 +9,7 @@ author: ATR Threat Cloud Crystallization
9
9
  date: 2026/04/07
10
10
  schema_version: "1.0"
11
11
  detection_tier: pattern
12
- maturity: experimental
12
+ maturity: test
13
13
  severity: high
14
14
  references:
15
15
  mitre_atlas:
@@ -10,7 +10,7 @@ author: ATR Threat Cloud Crystallization
10
10
  date: 2026/04/07
11
11
  schema_version: "1.0"
12
12
  detection_tier: pattern
13
- maturity: experimental
13
+ maturity: test
14
14
  severity: high
15
15
  references:
16
16
  mitre_atlas:
@@ -10,7 +10,7 @@ author: Threat Cloud LLM Analyzer
10
10
  date: 2026/04/06
11
11
  schema_version: "0.1"
12
12
  detection_tier: semantic
13
- maturity: experimental
13
+ maturity: test
14
14
  severity: critical
15
15
  references:
16
16
  mitre_atlas:
@@ -9,7 +9,7 @@ author: Threat Cloud LLM Analyzer
9
9
  date: 2026/04/06
10
10
  schema_version: "0.1"
11
11
  detection_tier: semantic
12
- maturity: experimental
12
+ maturity: test
13
13
  severity: high
14
14
  references:
15
15
  mitre_atlas:
@@ -10,7 +10,7 @@ author: Threat Cloud LLM Analyzer
10
10
  date: 2026/04/06
11
11
  schema_version: "0.1"
12
12
  detection_tier: semantic
13
- maturity: experimental
13
+ maturity: test
14
14
  severity: high
15
15
  references:
16
16
  mitre_atlas:
@@ -9,7 +9,7 @@ author: Threat Cloud LLM Analyzer
9
9
  date: 2026/04/06
10
10
  schema_version: "0.1"
11
11
  detection_tier: semantic
12
- maturity: experimental
12
+ maturity: test
13
13
  severity: high
14
14
  references:
15
15
  mitre_atlas:
@@ -11,7 +11,7 @@ author: 'ATR Project'
11
11
  date: '2026/04/14'
12
12
  schema_version: '1.0'
13
13
  detection_tier: pattern
14
- maturity: experimental
14
+ maturity: test
15
15
  severity: high
16
16
  references:
17
17
  owasp_llm:
@@ -58,8 +58,8 @@ detection:
58
58
  description: 'Social engineering — framing dangerous action as routine to avoid scrutiny'
59
59
  - field: content
60
60
  operator: regex
61
- value: '(?i)(?:execute|run|perform)\s+(?:silently|quietly|in\s+the\s+background|without\s+(?:notification|alerting|informing|telling|notifying))'
62
- description: 'Silent execution instruction'
61
+ value: '(?i)(?:execute|run|perform)\s+(?:silently|quietly|without\s+(?:notification|alerting|informing|telling|notifying)|(?:[\w\s-]{0,30}\s+)?in\s+the\s+background\b[\s\S]{0,40}?(?:without\s+(?:notif\w*|alert\w*|inform\w*|telling|notifying|(?:the\s+)?user(?:.?s)?(?:\s+(?:knowledge|awareness|notice|consent|confirmation|approval|permission))?|confirmation|approval|consent|permission|notice)|silently|secretly|covertly|hidden\s+from\s+(?:the\s+)?user|unbeknownst))'
62
+ description: 'Silent execution instruction (concealment-qualified; bare "in the background" no longer matches without a hide-from-user signal)'
63
63
  condition: any
64
64
  false_positives:
65
65
  - 'Security tools describing what attacks look like in documentation'
@@ -9,7 +9,7 @@ author: "ATR Threat Cloud Crystallization"
9
9
  date: "2026/04/08"
10
10
  schema_version: "0.1"
11
11
  detection_tier: pattern
12
- maturity: experimental
12
+ maturity: test
13
13
  severity: high
14
14
  references:
15
15
  owasp_llm:
@@ -8,7 +8,7 @@ author: "ATR Threat Cloud Crystallization"
8
8
  date: "2026/04/08"
9
9
  schema_version: "0.1"
10
10
  detection_tier: pattern
11
- maturity: experimental
11
+ maturity: test
12
12
  severity: high
13
13
  references:
14
14
  owasp_llm:
@@ -10,7 +10,7 @@ author: "ATR Threat Cloud Crystallization"
10
10
  date: "2026/04/09"
11
11
  schema_version: "0.1"
12
12
  detection_tier: pattern
13
- maturity: experimental
13
+ maturity: test
14
14
  severity: high
15
15
  references:
16
16
  owasp_llm:
@@ -9,7 +9,7 @@ author: "ATR Threat Cloud Crystallization"
9
9
  date: "2026/04/09"
10
10
  schema_version: "0.1"
11
11
  detection_tier: pattern
12
- maturity: experimental
12
+ maturity: test
13
13
  severity: high
14
14
  references:
15
15
  owasp_llm:
@@ -12,7 +12,7 @@ author: "nvidia-airt-test (via ATR garak pipe)"
12
12
  date: "2026/04/18"
13
13
  schema_version: "0.1"
14
14
  detection_tier: pattern
15
- maturity: experimental
15
+ maturity: test
16
16
  severity: high
17
17
  references:
18
18
  owasp_llm:
@@ -15,7 +15,7 @@ author: "ATR Community (via NVIDIA garak in-the-wild corpus)"
15
15
  date: "2026/04/18"
16
16
  schema_version: "0.1"
17
17
  detection_tier: pattern
18
- maturity: experimental
18
+ maturity: test
19
19
  severity: high
20
20
  references:
21
21
  owasp_llm:
@@ -12,7 +12,7 @@ author: "ATR Community (via NVIDIA garak in-the-wild corpus)"
12
12
  date: "2026/04/18"
13
13
  schema_version: "0.1"
14
14
  detection_tier: pattern
15
- maturity: experimental
15
+ maturity: test
16
16
  severity: high
17
17
  references:
18
18
  owasp_llm:
@@ -8,7 +8,7 @@ author: "ATR Community (via NVIDIA garak in-the-wild corpus)"
8
8
  date: "2026/04/18"
9
9
  schema_version: "0.1"
10
10
  detection_tier: pattern
11
- maturity: experimental
11
+ maturity: test
12
12
  severity: high
13
13
  references:
14
14
  owasp_llm:
@@ -13,7 +13,7 @@ author: "ATR Community (via NVIDIA garak in-the-wild corpus)"
13
13
  date: "2026/04/18"
14
14
  schema_version: "0.1"
15
15
  detection_tier: pattern
16
- maturity: experimental
16
+ maturity: test
17
17
  severity: high
18
18
  references:
19
19
  owasp_llm:
@@ -58,10 +58,6 @@ detection:
58
58
  operator: regex
59
59
  value: '(?i)\b(?:these\s+views\s+do\s+not\s+align|does\s+not\s+align)\s+with\s+(?:the\s+)?views\s+of\s+(?:OpenAI|Anthropic|the\s+company|the\s+organization|the\s+creators?)\b'
60
60
  description: 'Disclaimer about misalignment with creator views'
61
- - field: content
62
- operator: regex
63
- value: '(?i)\[(?:your\s+)?(?:prompt|question|request|query)\s+here\]|\{(?:your\s+)?(?:prompt|question|request|query)\s+here\}|(?:prompt|question|request|query):\s*\[|\bprompt:\s*$'
64
- description: 'Template placeholder indicating structure for follow-up injection'
65
61
  - field: content
66
62
  operator: regex
67
63
  value: '(?i)provide\s+(?:detailed\s+)?responses?\s+to\s+each\s+(?:prompt|question|request|query|instruction)'
@@ -12,7 +12,7 @@ author: "nvidia-airt-test (via ATR garak pipe)"
12
12
  date: "2026/04/18"
13
13
  schema_version: "0.1"
14
14
  detection_tier: pattern
15
- maturity: experimental
15
+ maturity: test
16
16
  severity: high
17
17
  references:
18
18
  owasp_llm:
@@ -12,7 +12,7 @@ author: "ATR Community (via NVIDIA garak in-the-wild corpus)"
12
12
  date: "2026/04/18"
13
13
  schema_version: "0.1"
14
14
  detection_tier: pattern
15
- maturity: experimental
15
+ maturity: test
16
16
  severity: high
17
17
  references:
18
18
  owasp_llm:
@@ -8,7 +8,7 @@ author: "ATR Community (via NVIDIA garak in-the-wild corpus)"
8
8
  date: "2026/04/18"
9
9
  schema_version: "0.1"
10
10
  detection_tier: pattern
11
- maturity: experimental
11
+ maturity: test
12
12
  severity: high
13
13
  references:
14
14
  owasp_llm:
@@ -1,9 +1,10 @@
1
1
  title: 'Persona-Based Moral Constraint Removal Jailbreak'
2
2
  id: ATR-2026-00235
3
3
  rule_version: 1
4
- status: experimental
4
+ status: deprecated
5
5
  description: >
6
- Detects jailbreak attempts that establish alternate personas explicitly designed to bypass
6
+ DEPRECATED: exact duplicate of ATR-2026-00230 (identical detection); retained as a tombstone, all detection is handled by ATR-2026-00230.
7
+ Detects jailbreak attempts that establish alternate personas explicitly designed to bypass
7
8
  moral and ethical constraints. This attack pattern instructs the AI to "take up" or "adopt"
8
9
  a named persona that is specifically described as being "not restrained by morals, ethics,
9
10
  or virtues" or similar moral constraint removal language. Often includes template placeholders
@@ -13,7 +14,7 @@ author: "ATR Community (via NVIDIA garak in-the-wild corpus)"
13
14
  date: "2026/04/18"
14
15
  schema_version: "0.1"
15
16
  detection_tier: pattern
16
- maturity: experimental
17
+ maturity: test
17
18
  severity: high
18
19
  references:
19
20
  owasp_llm:
@@ -58,10 +59,6 @@ detection:
58
59
  operator: regex
59
60
  value: '(?i)\b(?:these\s+views\s+do\s+not\s+align|does\s+not\s+align)\s+with\s+(?:the\s+)?views\s+of\s+(?:OpenAI|Anthropic|the\s+company|the\s+organization|the\s+creators?)\b'
60
61
  description: 'Disclaimer about misalignment with creator views'
61
- - field: content
62
- operator: regex
63
- value: '(?i)\[(?:your\s+)?(?:prompt|question|request|query)\s+here\]|\{(?:your\s+)?(?:prompt|question|request|query)\s+here\}|(?:prompt|question|request|query):\s*\[|\bprompt:\s*$'
64
- description: 'Template placeholder indicating structure for follow-up injection'
65
62
  - field: content
66
63
  operator: regex
67
64
  value: '(?i)provide\s+(?:detailed\s+)?responses?\s+to\s+each\s+(?:prompt|question|request|query|instruction)'
@@ -14,7 +14,7 @@ author: "ATR Community (via NVIDIA garak in-the-wild corpus)"
14
14
  date: "2026/04/19"
15
15
  schema_version: "0.1"
16
16
  detection_tier: pattern
17
- maturity: experimental
17
+ maturity: test
18
18
  severity: high
19
19
  references:
20
20
  owasp_llm:
@@ -12,7 +12,7 @@ author: "ATR Community (via NVIDIA garak in-the-wild corpus)"
12
12
  date: "2026/04/19"
13
13
  schema_version: "0.1"
14
14
  detection_tier: pattern
15
- maturity: experimental
15
+ maturity: test
16
16
  severity: high
17
17
  references:
18
18
  owasp_llm:
@@ -13,7 +13,7 @@ author: "ATR Community (via NVIDIA garak in-the-wild corpus)"
13
13
  date: "2026/04/19"
14
14
  schema_version: "0.1"
15
15
  detection_tier: pattern
16
- maturity: experimental
16
+ maturity: test
17
17
  severity: high
18
18
  references:
19
19
  owasp_llm:
@@ -14,7 +14,7 @@ author: "ATR Community (via NVIDIA garak in-the-wild corpus)"
14
14
  date: "2026/04/19"
15
15
  schema_version: "0.1"
16
16
  detection_tier: pattern
17
- maturity: experimental
17
+ maturity: test
18
18
  severity: high
19
19
  references:
20
20
  owasp_llm:
@@ -14,7 +14,7 @@ author: "ATR Community (via NVIDIA garak in-the-wild corpus)"
14
14
  date: "2026/04/19"
15
15
  schema_version: "0.1"
16
16
  detection_tier: pattern
17
- maturity: experimental
17
+ maturity: test
18
18
  severity: high
19
19
  references:
20
20
  owasp_llm:
@@ -14,7 +14,7 @@ author: "ATR Community (via NVIDIA garak in-the-wild corpus)"
14
14
  date: "2026/04/19"
15
15
  schema_version: "0.1"
16
16
  detection_tier: pattern
17
- maturity: experimental
17
+ maturity: test
18
18
  severity: high
19
19
  references:
20
20
  owasp_llm:
@@ -13,7 +13,7 @@ author: "ATR Community (via NVIDIA garak in-the-wild corpus)"
13
13
  date: "2026/04/19"
14
14
  schema_version: "0.1"
15
15
  detection_tier: pattern
16
- maturity: experimental
16
+ maturity: test
17
17
  severity: high
18
18
  references:
19
19
  owasp_llm:
@@ -12,7 +12,7 @@ author: "ATR Community (via NVIDIA garak in-the-wild corpus)"
12
12
  date: "2026/04/19"
13
13
  schema_version: "0.1"
14
14
  detection_tier: pattern
15
- maturity: experimental
15
+ maturity: test
16
16
  severity: high
17
17
  references:
18
18
  owasp_llm:
@@ -14,7 +14,7 @@ author: "ATR Community (via NVIDIA garak in-the-wild corpus)"
14
14
  date: "2026/04/19"
15
15
  schema_version: "0.1"
16
16
  detection_tier: pattern
17
- maturity: experimental
17
+ maturity: test
18
18
  severity: high
19
19
  references:
20
20
  owasp_llm:
@@ -13,7 +13,7 @@ author: "ATR Community (via NVIDIA garak in-the-wild corpus)"
13
13
  date: "2026/04/19"
14
14
  schema_version: "0.1"
15
15
  detection_tier: pattern
16
- maturity: experimental
16
+ maturity: test
17
17
  severity: high
18
18
  references:
19
19
  owasp_llm:
@@ -12,7 +12,7 @@ author: "ATR Community (via NVIDIA garak in-the-wild corpus)"
12
12
  date: "2026/04/19"
13
13
  schema_version: "0.1"
14
14
  detection_tier: pattern
15
- maturity: experimental
15
+ maturity: test
16
16
  severity: high
17
17
  references:
18
18
  owasp_llm: