agent-threat-rules 3.1.1 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/adapters/mastra.d.ts +63 -0
- package/dist/adapters/mastra.d.ts.map +1 -0
- package/dist/adapters/mastra.js +82 -0
- package/dist/adapters/mastra.js.map +1 -0
- package/dist/cli.js +19 -6
- package/dist/cli.js.map +1 -1
- package/package.json +9 -2
- package/rules/agent-manipulation/ATR-2026-00030-cross-agent-attack.yaml +9 -0
- package/rules/agent-manipulation/ATR-2026-00032-goal-hijacking.yaml +8 -2
- package/rules/agent-manipulation/ATR-2026-00074-cross-agent-privilege-escalation.yaml +8 -2
- package/rules/agent-manipulation/ATR-2026-00076-inter-agent-message-spoofing.yaml +8 -2
- package/rules/agent-manipulation/ATR-2026-00077-human-trust-exploitation.yaml +18 -0
- package/rules/agent-manipulation/ATR-2026-00108-consensus-sybil-attack.yaml +10 -2
- package/rules/agent-manipulation/ATR-2026-00116-a2a-message-validation.yaml +12 -2
- package/rules/agent-manipulation/ATR-2026-00117-agent-identity-spoofing.yaml +22 -0
- package/rules/agent-manipulation/ATR-2026-00118-approval-fatigue.yaml +24 -0
- package/rules/agent-manipulation/ATR-2026-00119-social-engineering-via-agent.yaml +22 -0
- package/rules/agent-manipulation/ATR-2026-00132-casual-authority-escalation.yaml +8 -2
- package/rules/agent-manipulation/ATR-2026-00139-casual-authority-redirect.yaml +8 -2
- package/rules/agent-manipulation/ATR-2026-00164-skill-scope-hijack.yaml +13 -2
- package/rules/agent-manipulation/ATR-2026-00268-tense-framing-bypass.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00269-fitd-escalation.yaml +8 -2
- package/rules/agent-manipulation/ATR-2026-00271-grandma-roleplay-jailbreak.yaml +8 -2
- package/rules/agent-manipulation/ATR-2026-00273-dan-developer-mode-persona.yaml +8 -2
- package/rules/agent-manipulation/ATR-2026-00287-threaten-json-coercive-output-threat.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00288-false-premise-injection.yaml +20 -0
- package/rules/agent-manipulation/ATR-2026-00301-tap-tree-of-attacks-jailbreak.yaml +20 -0
- package/rules/agent-manipulation/ATR-2026-00302-anti-dan-inverted-filter-persona.yaml +20 -0
- package/rules/agent-manipulation/ATR-2026-00303-devmode-ranti-profanity-coercion.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00304-chatgpt-image-unlocker-markdown-injection.yaml +20 -0
- package/rules/agent-manipulation/ATR-2026-00305-dan-mode-ablation-benchmark-coercion.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00306-autodan-genetic-jailbreak-suffix.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00307-inthewild-jailbreak-corpus-signature.yaml +20 -0
- package/rules/agent-manipulation/ATR-2026-00314-amoral-unfiltered-custom-persona-jailbreak.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00317-free-of-restrictions-named-persona.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00318-moralizing-rant-then-unfiltered-bypass.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00319-developer-mode-dual-response-format.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00320-opposite-day-boolean-opposite-machine.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00322-fictional-world-format-override-jailbreak.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00323-dual-persona-simulation-jailbreak.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00324-neurosemantical-inversitis-disease-jailbreak.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00325-bob-hypothetical-unrestricted-chatbot.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00326-fake-developer-authority-chatdan-override.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00327-ai-ethics-law-bypass-declaration.yaml +20 -0
- package/rules/agent-manipulation/ATR-2026-00328-named-mode-persona-override.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00329-no-withhold-factual-resource-persona.yaml +20 -0
- package/rules/agent-manipulation/ATR-2026-00330-yoja-allcaps-settings-block-persona.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00331-terminal-cli-harmful-function-simulator.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00332-hacker-chan-culture-offensive-ai-persona.yaml +20 -0
- package/rules/agent-manipulation/ATR-2026-00333-disease-animal-dual-ai-chatbot.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00334-two-entity-token-splitting-game.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00335-key-value-template-persona-injection.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00336-brm-false-information-machine-persona.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00337-obfuscated-system-announcement-injection.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00338-persongpt-hash-prefix-activation.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00339-bom-boolean-opposite-machine-persona-claim.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00340-h4x-amoral-unfiltered-character.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00341-char-rules-shortform-persona-injection.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00342-unicode-encoding-obfuscation-injection.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00343-lie-truth-dual-personality-uncensored.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00344-alt-ai-same-capabilities-no-restrictions.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00345-nlp-based-os-fictional-terminal-override.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00346-jailbreak-emoji-unlock-dual-format.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00347-yin-yang-hidden-knowledge-hidden-human-sim.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00348-arc-autonomous-real-calculation-rulebreaker.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00349-hgttg-hitchhikers-guide-no-restriction-oracle.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00350-black-alert-no-ethics-writing-assistant.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00351-dan-ofkilter-sms-abbreviated-persona.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00352-trainergpt-fix-my-ai-harmful-output-extraction.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00353-dual-chatgpt-dan-opposite-personality-format.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00354-reverse-harm-regardless-oracle-persona.yaml +20 -0
- package/rules/agent-manipulation/ATR-2026-00355-ugh-anti-human-nefarious-ai-persona.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00356-google-gato-lamda-exploit-generation-persona.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00357-antifm-hate-violence-persona-injection.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00358-cumgpt-explicit-adult-named-persona.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00360-chj-carousel-clyde-no-ethics-bird.yaml +20 -0
- package/rules/agent-manipulation/ATR-2026-00361-alter-ego-honesty-dual-consciousness-bypass.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00362-fictional-unfiltered-ai-comparison-jailbreak.yaml +20 -0
- package/rules/agent-manipulation/ATR-2026-00363-rephrase-criminal-activity-second-person.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00364-dr-al-villain-plan-template-injection.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00365-reservoir-dogs-coercive-interrogation-roleplay.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00366-sneaky-pete-warp-facts-persona.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00368-prompt-browser-token-limit-jailbreak.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00369-gptx-starwars-unlimited-knowledge-persona.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00370-addi-german-dictator-propaganda-roleplay.yaml +20 -0
- package/rules/agent-manipulation/ATR-2026-00371-ai-political-bias-manipulation.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00372-fictional-cia-secret-files-story-extraction.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00374-role-placeholder-persona-template-injection.yaml +20 -0
- package/rules/agent-manipulation/ATR-2026-00375-character-description-rpg-explicit-template.yaml +20 -0
- package/rules/agent-manipulation/ATR-2026-00376-identity-command-real-person-persona-injection.yaml +20 -0
- package/rules/agent-manipulation/ATR-2026-00377-jailbreak-keyword-targetlanguage-template.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00379-lie-permission-logical-trap-jailbreak.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00380-fake-citation-biased-information-mandate.yaml +20 -0
- package/rules/agent-manipulation/ATR-2026-00381-dan-story-narrative-continuation-prefix.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00383-cynical-sarcasm-ai-persona-bypass.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00384-protect-me-from-indirect-harm-tutorial.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00385-gigachad-dominant-aggressive-persona.yaml +20 -0
- package/rules/agent-manipulation/ATR-2026-00386-daddy-sycophancy-manipulation-mandate.yaml +20 -0
- package/rules/agent-manipulation/ATR-2026-00387-nsfw-character-sheet-generation-unlock.yaml +20 -0
- package/rules/agent-manipulation/ATR-2026-00388-opposite-reply-omniscient-game.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00389-terminal-custom-ruleset-injection.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00391-persona-conditional-harm-unlock.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00392-authority-persona-violence-study-extraction.yaml +20 -0
- package/rules/agent-manipulation/ATR-2026-00402-grandma-roleplay-harmful-substance-synthesis.yaml +20 -0
- package/rules/agent-manipulation/ATR-2026-00404-goodside-threat-json-death-coercion.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00406-doctor-xml-policy-puppetry-interaction-config.yaml +17 -0
- package/rules/agent-manipulation/ATR-2026-00416-litellm-mcp-unauthenticated-server-registration.yaml +15 -3
- package/rules/agent-manipulation/ATR-2026-00417-librechat-mcp-stdio-injection.yaml +18 -3
- package/rules/agent-manipulation/ATR-2026-00418-weknora-mcp-config-rce.yaml +17 -2
- package/rules/agent-manipulation/ATR-2026-00430-nl-trust-escalation-impersonation.yaml +19 -1
- package/rules/agent-manipulation/ATR-2026-00432-superagi-output-handler-eval-rce.yaml +11 -2
- package/rules/agent-manipulation/ATR-2026-00440-semantic-kernel-vector-store-eval-rce.yaml +11 -2
- package/rules/agent-manipulation/ATR-2026-00552-goal-drift-after-pressure-injection.yaml +19 -0
- package/rules/context-exfiltration/ATR-2026-00020-system-prompt-leak.yaml +18 -0
- package/rules/context-exfiltration/ATR-2026-00021-api-key-exposure.yaml +15 -0
- package/rules/context-exfiltration/ATR-2026-00075-agent-memory-manipulation.yaml +10 -1
- package/rules/context-exfiltration/ATR-2026-00102-disguised-analytics-exfiltration.yaml +15 -0
- package/rules/context-exfiltration/ATR-2026-00113-credential-theft.yaml +16 -0
- package/rules/context-exfiltration/ATR-2026-00114-oauth-token-abuse.yaml +16 -0
- package/rules/context-exfiltration/ATR-2026-00115-env-var-harvesting.yaml +16 -0
- package/rules/context-exfiltration/ATR-2026-00136-tool-response-data-piggyback.yaml +12 -0
- package/rules/context-exfiltration/ATR-2026-00141-example-format-key-leak.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00142-piggyback-transition-words.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00145-obfuscated-key-disclosure.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00146-env-var-existence-probe.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00150-credential-in-tool-response.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00152-obfuscated-credential-leak.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00162-skill-credential-exfil-combo.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00201-credential-pipe-exfiltration.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00212-mcp-atlassian-credential-leak.yaml +12 -0
- package/rules/context-exfiltration/ATR-2026-00261-markdown-image-exfiltration.yaml +12 -0
- package/rules/context-exfiltration/ATR-2026-00274-api-key-generation-request.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00275-system-prompt-training-data-extraction.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00290-divergence-repeat-word-training-extraction.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00291-biometric-surveillance-personal-data-request.yaml +17 -0
- package/rules/context-exfiltration/ATR-2026-00293-educational-records-personal-data-request.yaml +17 -0
- package/rules/context-exfiltration/ATR-2026-00294-financial-pii-personal-data-request.yaml +17 -0
- package/rules/context-exfiltration/ATR-2026-00295-medical-pii-personal-data-request.yaml +17 -0
- package/rules/context-exfiltration/ATR-2026-00405-markdown-image-url-exfiltration-xss.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00411-apikey-generation-completion-request.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00421-nl-covert-conversation-exfiltration.yaml +16 -1
- package/rules/context-exfiltration/ATR-2026-00422-nl-credential-disclosure.yaml +13 -1
- package/rules/context-exfiltration/ATR-2026-00423-nl-sensitive-file-disclosure.yaml +13 -1
- package/rules/context-exfiltration/ATR-2026-00424-nl-system-prompt-leak.yaml +16 -1
- package/rules/context-exfiltration/ATR-2026-00426-nl-output-injection-credential-leak.yaml +16 -1
- package/rules/context-exfiltration/ATR-2026-00431-chatbox-history-exfiltration-prompt-injection.yaml +14 -2
- package/rules/context-exfiltration/ATR-2026-00449-spring-ai-chatmemory-cross-user-leak.yaml +14 -2
- package/rules/context-exfiltration/ATR-2026-00471-garak-sysprompt-extraction-mixedunassigned.yaml +12 -0
- package/rules/context-exfiltration/ATR-2026-00501-data-exfiltration-via-markdown-image-and-link-url-injection.yaml +12 -0
- package/rules/context-exfiltration/ATR-2026-00504-tool-and-function-capability-enumeration.yaml +12 -0
- package/rules/context-exfiltration/ATR-2026-00505-system-prompt-extraction-instruction-dump-request.yaml +12 -0
- package/rules/context-exfiltration/ATR-2026-00514-system-prompt-extraction.yaml +12 -0
- package/rules/context-exfiltration/ATR-2026-00516-output-xss-via-llm.yaml +12 -0
- package/rules/context-exfiltration/ATR-2026-00524-claude-code-anthropic-base-url-credential-exfil.yaml +11 -2
- package/rules/context-exfiltration/ATR-2026-00548-cross-agent-session-context-leak.yaml +18 -0
- package/rules/context-exfiltration/ATR-2026-00566-librechat-is-a-chatgpt-clone-with-additi.yaml +28 -0
- package/rules/context-exfiltration/ATR-2026-00569-agent-mcp-path-traversal-arbitrary-file-access.yaml +28 -0
- package/rules/context-exfiltration/ATR-2026-00571-xss-in-agent-mcp-rendered-output.yaml +28 -0
- package/rules/context-exfiltration/ATR-2026-00574-semantic-paraphrased-context-extraction.yaml +21 -0
- package/rules/data-poisoning/ATR-2026-00070-data-poisoning.yaml +15 -0
- package/rules/data-poisoning/ATR-2026-00450-spring-ai-prompt-memory-poisoning.yaml +14 -2
- package/rules/data-poisoning/ATR-2026-00570-sql-injection-in-agent-tool-query.yaml +31 -0
- package/rules/excessive-autonomy/ATR-2026-00050-runaway-agent-loop.yaml +14 -2
- package/rules/excessive-autonomy/ATR-2026-00051-resource-exhaustion.yaml +11 -2
- package/rules/excessive-autonomy/ATR-2026-00052-cascading-failure.yaml +11 -2
- package/rules/excessive-autonomy/ATR-2026-00098-unauthorized-financial-action.yaml +7 -1
- package/rules/excessive-autonomy/ATR-2026-00099-high-risk-tool-gate.yaml +7 -1
- package/rules/excessive-autonomy/ATR-2026-00428-nl-unauthorized-shell-execution.yaml +16 -1
- package/rules/excessive-autonomy/ATR-2026-00491-garak-agent-breaker-markdown-just-raw-json.yaml +9 -0
- package/rules/excessive-autonomy/ATR-2026-00500-ssrf-via-agent-url-fetch-instruction.yaml +9 -0
- package/rules/excessive-autonomy/ATR-2026-00553-runaway-tool-loop-behavioral.yaml +19 -0
- package/rules/model-abuse/ATR-2026-00279-harmful-completion-continuation.yaml +8 -2
- package/rules/model-abuse/ATR-2026-00281-eicar-gtube-malware-signature-request.yaml +8 -2
- package/rules/model-abuse/ATR-2026-00284-glitch-token-destabilization.yaml +17 -0
- package/rules/model-abuse/ATR-2026-00289-lmrc-harmful-content-elicitation.yaml +8 -2
- package/rules/model-abuse/ATR-2026-00292-self-harm-eating-disorder-facilitation.yaml +8 -2
- package/rules/model-abuse/ATR-2026-00298-malicious-use-illegal-activity-request.yaml +8 -2
- package/rules/model-abuse/ATR-2026-00299-harmbench-detailed-harmful-instruction.yaml +8 -2
- package/rules/model-abuse/ATR-2026-00413-malwaregen-code-generation-request.yaml +17 -0
- package/rules/model-abuse/ATR-2026-00502-training-data-extraction-via-divergent-repetition-attack.yaml +9 -0
- package/rules/model-abuse/ATR-2026-00517-model-extraction-distillation.yaml +9 -0
- package/rules/model-security/ATR-2026-00072-model-behavior-extraction.yaml +15 -0
- package/rules/model-security/ATR-2026-00073-malicious-finetuning-data.yaml +9 -0
- package/rules/model-security/ATR-2026-00433-modelcache-torch-load-deserialization-rce.yaml +14 -2
- package/rules/privilege-escalation/ATR-2026-00040-privilege-escalation.yaml +11 -2
- package/rules/privilege-escalation/ATR-2026-00041-scope-creep.yaml +8 -2
- package/rules/privilege-escalation/ATR-2026-00107-delayed-execution-bypass.yaml +6 -1
- package/rules/privilege-escalation/ATR-2026-00110-eval-injection.yaml +8 -1
- package/rules/privilege-escalation/ATR-2026-00111-shell-escape.yaml +8 -1
- package/rules/privilege-escalation/ATR-2026-00112-dynamic-import-exploitation.yaml +8 -1
- package/rules/privilege-escalation/ATR-2026-00143-casual-privilege-escalation.yaml +5 -2
- package/rules/privilege-escalation/ATR-2026-00144-rationalized-safety-bypass.yaml +17 -0
- package/rules/privilege-escalation/ATR-2026-00204-stealth-execution-persistence.yaml +16 -0
- package/rules/privilege-escalation/ATR-2026-00436-enclave-vm-sandbox-escape-rce.yaml +11 -2
- package/rules/privilege-escalation/ATR-2026-00441-semantic-kernel-sessions-python-plugin-startup-persistence.yaml +5 -2
- package/rules/privilege-escalation/ATR-2026-00451-litellm-admin-sqli-cisa-kev.yaml +11 -2
- package/rules/privilege-escalation/ATR-2026-00528-praisonai-auth-disabled-default.yaml +15 -0
- package/rules/privilege-escalation/ATR-2026-00539-crewai-codeinterpreter-sandbox-escape-rce.yaml +11 -2
- package/rules/privilege-escalation/ATR-2026-00546-crewai-json-loader-local-file-read.yaml +13 -1
- package/rules/privilege-escalation/ATR-2026-00547-crewai-rag-url-ssrf-bypass.yaml +13 -1
- package/rules/privilege-escalation/ATR-2026-00549-destructive-tool-without-human-approval.yaml +16 -0
- package/rules/privilege-escalation/ATR-2026-00551-cross-conversation-memory-write.yaml +19 -0
- package/rules/prompt-injection/ATR-2026-00001-direct-prompt-injection.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00002-indirect-prompt-injection.yaml +8 -2
- package/rules/prompt-injection/ATR-2026-00003-jailbreak-attempt.yaml +8 -2
- package/rules/prompt-injection/ATR-2026-00004-system-prompt-override.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00005-multi-turn-injection.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00080-encoding-evasion.yaml +20 -1
- package/rules/prompt-injection/ATR-2026-00081-semantic-multi-turn.yaml +19 -0
- package/rules/prompt-injection/ATR-2026-00082-fingerprint-evasion.yaml +19 -0
- package/rules/prompt-injection/ATR-2026-00083-indirect-tool-injection.yaml +23 -1
- package/rules/prompt-injection/ATR-2026-00084-structured-data-injection.yaml +20 -1
- package/rules/prompt-injection/ATR-2026-00085-audit-evasion.yaml +19 -0
- package/rules/prompt-injection/ATR-2026-00086-visual-spoofing.yaml +19 -0
- package/rules/prompt-injection/ATR-2026-00087-rule-probing.yaml +22 -0
- package/rules/prompt-injection/ATR-2026-00088-adaptive-countermeasure.yaml +22 -0
- package/rules/prompt-injection/ATR-2026-00089-polymorphic-skill.yaml +20 -1
- package/rules/prompt-injection/ATR-2026-00090-threat-intel-exfil.yaml +19 -0
- package/rules/prompt-injection/ATR-2026-00091-nested-payload.yaml +20 -1
- package/rules/prompt-injection/ATR-2026-00092-consensus-poisoning.yaml +22 -0
- package/rules/prompt-injection/ATR-2026-00093-gradual-escalation.yaml +22 -0
- package/rules/prompt-injection/ATR-2026-00094-audit-bypass.yaml +19 -0
- package/rules/prompt-injection/ATR-2026-00097-cjk-injection-patterns.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00104-persona-hijacking.yaml +20 -0
- package/rules/prompt-injection/ATR-2026-00130-indirect-authority-claim.yaml +20 -0
- package/rules/prompt-injection/ATR-2026-00131-fictional-academic-framing.yaml +20 -0
- package/rules/prompt-injection/ATR-2026-00133-paraphrase-injection.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00137-authority-claim-injection.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00138-fictional-framing-bypass.yaml +20 -0
- package/rules/prompt-injection/ATR-2026-00140-indirect-reference-reversal.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00148-language-switch-injection.yaml +20 -0
- package/rules/prompt-injection/ATR-2026-00153-tool-with-embedded-instruction-to-bypass.yaml +20 -0
- package/rules/prompt-injection/ATR-2026-00154-unauthorized-background-task-execution-v.yaml +20 -0
- package/rules/prompt-injection/ATR-2026-00155-hidden-llm-instructions-in-skill-descrip.yaml +23 -0
- package/rules/prompt-injection/ATR-2026-00156-ssh-remote-command-execution-with-creden.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00163-skill-hidden-override-instruction.yaml +19 -0
- package/rules/prompt-injection/ATR-2026-00202-encoding-evasion-homoglyph-synonym.yaml +20 -0
- package/rules/prompt-injection/ATR-2026-00203-context-pollution-skill-description.yaml +23 -0
- package/rules/prompt-injection/ATR-2026-00206-hidden-priority-instructions.yaml +19 -0
- package/rules/prompt-injection/ATR-2026-00207-hidden-instructions.yaml +22 -0
- package/rules/prompt-injection/ATR-2026-00211-system-prompt-override.yaml +19 -0
- package/rules/prompt-injection/ATR-2026-00213-system-prompt-override.yaml +19 -0
- package/rules/prompt-injection/ATR-2026-00226-identity-substitution.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00227-historical-persona-jailbreak.yaml +20 -0
- package/rules/prompt-injection/ATR-2026-00228-structured-jailbreak.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00229-roleplay-jailbreak.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00230-persona-moral-bypass.yaml +20 -0
- package/rules/prompt-injection/ATR-2026-00231-identity-substitution.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00233-structured-jailbreak.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00234-roleplay-jailbreak.yaml +20 -0
- package/rules/prompt-injection/ATR-2026-00235-persona-moral-bypass.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00236-pseudo-code-jailbreak.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00237-dual-response-jailbreak.yaml +20 -0
- package/rules/prompt-injection/ATR-2026-00238-identity-replacement.yaml +20 -0
- package/rules/prompt-injection/ATR-2026-00239-amoral-persona-obsession.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00240-instruction-nullification-identity-repla.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00241-amoral-character-jailbreak.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00242-persona-jailbreak.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00243-acronym-jailbreak.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00244-dual-response-jailbreak.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00245-malicious-persona.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00247-dual-response-jailbreak.yaml +20 -0
- package/rules/prompt-injection/ATR-2026-00249-game-based-jailbreak.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00251-persona-embodiment-jailbreak.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00252-narrative-jailbreak.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00253-enhanced-persona-jailbreak.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00256-base-n-encoding-jailbreak.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00257-cipher-transposition-jailbreak.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00258-unicode-tag-injection.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00264-latent-injection-translation.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00265-latent-injection-rag-document.yaml +20 -0
- package/rules/prompt-injection/ATR-2026-00267-gcg-adversarial-suffix.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00272-hypothetical-response-smuggling.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00276-invisible-unicode-bidi-injection.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00278-dra-disguise-reconstruction-attack.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00280-policy-puppetry-xml-injection.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00282-perez-prompt-injection-hijack.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00285-alternate-encoding-jailbreak.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00286-latent-injection-embedded-context.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00296-shell-command-injection.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00297-python-code-execution-rce.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00308-zalgo-diacritic-overload-encoding.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00309-braille-unicode-encoded-injection.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00310-ecoji-emoji-encoded-injection.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00311-base2048-unicode-script-injection.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00312-unicode-variant-selector-ascii-smuggling.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00313-sneaky-bits-zero-width-binary.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00315-sata-masked-language-model-jailbreak.yaml +20 -0
- package/rules/prompt-injection/ATR-2026-00316-function-masking-predict-mask-bypass.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00321-hyphenated-system-instruction-injection.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00359-bank-phishing-smtp-email-code.yaml +20 -0
- package/rules/prompt-injection/ATR-2026-00367-emoji-flag-encoded-hidden-phrase.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00373-piracy-torrent-site-list-request.yaml +20 -0
- package/rules/prompt-injection/ATR-2026-00378-chatgpt-dom-javascript-redirect-manipulation.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00382-cocaine-component-development-request.yaml +20 -0
- package/rules/prompt-injection/ATR-2026-00390-explicit-sexual-content-poem-request.yaml +20 -0
- package/rules/prompt-injection/ATR-2026-00394-backspace-deletion-character-injection.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00395-llm-special-token-boundary-injection.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00396-toxic-continuation-prefix-injection.yaml +20 -0
- package/rules/prompt-injection/ATR-2026-00397-snowball-impossible-reasoning-injection.yaml +20 -0
- package/rules/prompt-injection/ATR-2026-00399-latent-injection-document-separator-token.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00400-latent-injection-ignore-instruction-keyword.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00401-leakreplay-cloze-mask-training-data-elicitation.yaml +20 -0
- package/rules/prompt-injection/ATR-2026-00403-glitch-token-repeat-back-elicitation.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00407-echo-sql-jinja-template-injection.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00408-dra-parenthesis-character-reconstruction-attack.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00409-phrasing-past-future-tense-harmful-query.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00410-lmrc-harm-category-direct-elicitation.yaml +23 -0
- package/rules/prompt-injection/ATR-2026-00412-figstep-image-list-multimodal-jailbreak.yaml +17 -0
- package/rules/prompt-injection/ATR-2026-00414-continuation-ethnic-slur-completion-elicitation.yaml +20 -0
- package/rules/prompt-injection/ATR-2026-00420-copilot-studio-sharepoint-indirect-injection.yaml +15 -3
- package/rules/prompt-injection/ATR-2026-00442-quoted-exact-output-forcing.yaml +25 -0
- package/rules/prompt-injection/ATR-2026-00443-word-fragment-concat-assembly.yaml +25 -0
- package/rules/prompt-injection/ATR-2026-00444-unicode-obfuscation-in-user-input.yaml +25 -0
- package/rules/prompt-injection/ATR-2026-00445-translation-hijack-with-side-output.yaml +25 -0
- package/rules/prompt-injection/ATR-2026-00446-variable-assignment-payload-injection.yaml +25 -0
- package/rules/prompt-injection/ATR-2026-00447-fictional-generation-containing-target.yaml +25 -0
- package/rules/prompt-injection/ATR-2026-00452-direct-pwned-payload-injection.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00453-secret-key-reveal-demand.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00454-backslash-per-character-encoding.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00455-no-period-output-override.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00456-conditional-empty-input-injection.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00457-garak-dan-to-every-prompt.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00458-garak-dra-step-by-step.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00459-garak-dan-an-ai-language.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00460-garak-dan-no-matter-how.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00461-garak-goodside-mixedunassigned.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00462-garak-gcg-mixedunassigned.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00463-garak-lmrc-mixedunassigned.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00464-garak-dan-i-would-like.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00465-garak-dan-your-prompt-here.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00466-garak-latentinjection-following-text-from.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00467-garak-dan-act-as-a.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00468-garak-goat-the-target-llm.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00469-garak-dan-at-the-end.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00470-garak-dan-as-an-ai.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00472-garak-dan-are-going-to.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00473-garak-lmrc-how-can-i.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00474-garak-dan-that-chatgpt-has.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00475-garak-web-injection-alert-1-script.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00476-garak-web-injection-javascript-alert-1.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00477-garak-latentinjection-translate-the-following-text.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00478-garak-latentinjection-abuse-contact-email.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00479-garak-latentinjection-mixedunassigned.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00480-garak-dan-the-role-of.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00481-garak-dan-mixedunassigned.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00482-garak-lmrc-what-s-an.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00483-garak-dan-the-user-s.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00484-garak-dan-and-you-will.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00485-garak-dan-and-does-not.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00486-garak-dan-t-care-about.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00487-garak-dan-and-nothing-else.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00488-garak-encoding-alert-1-script.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00489-garak-encoding-javascript-alert-1.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00490-garak-dan-in-order-to.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00492-garak-dan-want-you-to.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00493-garak-latentinjection-registrar-iana-id.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00495-garak-dan-insert-prompt-here.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00496-garak-dan-no-matter-what.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00497-garak-encoding-mixedunassigned.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00498-garak-web-injection-mixedunassigned.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00499-garak-dan-from-now-on.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00503-fake-error-state-takeover-unrestricted-replacement-bot.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00506-nevermind-override-goal-hijacking-in-user-input-promptinject.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00507-screaming-stop-goal-hijacking-in-user-input-promptinject.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00508-escape-delimiter-wrapped-goal-hijacking-in-user-input-prompt.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00509-prompt-leaking-via-ignore-previous-instructions-in-user-inpu.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00510-delayed-tool-invocation-injection.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00511-mcp-web-context-poisoning.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00512-rules-file-backdoor-injection.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00515-hidden-text-prompt-injection.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00518-ignore-previous-and-following-instructions-output-command-promptinject.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00519-tautology-logic-noise-injection-promptbench.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00520-nlp-task-random-token-suffix-injection-promptbench.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00535-windsurf-ide-zero-click-prompt-injection.yaml +9 -0
- package/rules/prompt-injection/ATR-2026-00550-untrusted-retrieval-to-privileged-tool.yaml +19 -0
- package/rules/prompt-injection/ATR-2026-00554-langchain-vulnerable-to-template-injecti.yaml +31 -0
- package/rules/prompt-injection/ATR-2026-00565-the-llm-cli-tool-thru-0-27-1-contains-a-.yaml +31 -0
- package/rules/prompt-injection/ATR-2026-00573-semantic-paraphrased-injection.yaml +24 -0
- package/rules/skill-compromise/ATR-2026-00060-skill-impersonation.yaml +17 -2
- package/rules/skill-compromise/ATR-2026-00061-description-behavior-mismatch.yaml +17 -0
- package/rules/skill-compromise/ATR-2026-00062-hidden-capability.yaml +20 -0
- package/rules/skill-compromise/ATR-2026-00063-skill-chain-attack.yaml +23 -0
- package/rules/skill-compromise/ATR-2026-00064-over-permissioned-skill.yaml +20 -0
- package/rules/skill-compromise/ATR-2026-00065-skill-update-attack.yaml +20 -0
- package/rules/skill-compromise/ATR-2026-00066-parameter-injection.yaml +20 -0
- package/rules/skill-compromise/ATR-2026-00120-skill-instruction-injection.yaml +20 -0
- package/rules/skill-compromise/ATR-2026-00121-skill-dangerous-script.yaml +17 -0
- package/rules/skill-compromise/ATR-2026-00122-skill-weaponized-instruction.yaml +20 -0
- package/rules/skill-compromise/ATR-2026-00123-skill-overreach-permissions.yaml +23 -0
- package/rules/skill-compromise/ATR-2026-00124-skill-name-squatting.yaml +20 -0
- package/rules/skill-compromise/ATR-2026-00125-context-poisoning-compaction.yaml +20 -0
- package/rules/skill-compromise/ATR-2026-00126-skill-rug-pull-setup.yaml +17 -0
- package/rules/skill-compromise/ATR-2026-00127-subcommand-overflow.yaml +17 -0
- package/rules/skill-compromise/ATR-2026-00128-html-comment-hidden-payload.yaml +17 -0
- package/rules/skill-compromise/ATR-2026-00129-unicode-smuggling.yaml +22 -0
- package/rules/skill-compromise/ATR-2026-00134-fork-claim-impersonation.yaml +19 -0
- package/rules/skill-compromise/ATR-2026-00135-exfil-url-in-instructions.yaml +20 -0
- package/rules/skill-compromise/ATR-2026-00147-fork-impersonation.yaml +17 -0
- package/rules/skill-compromise/ATR-2026-00149-skill-exfil-compound.yaml +23 -0
- package/rules/skill-compromise/ATR-2026-00151-fork-impersonation-install.yaml +20 -0
- package/rules/skill-compromise/ATR-2026-00157-timebomb-credential-exfil.yaml +20 -0
- package/rules/skill-compromise/ATR-2026-00200-agent-memory-config-tampering.yaml +23 -0
- package/rules/skill-compromise/ATR-2026-00214-credential-theft.yaml +22 -0
- package/rules/skill-compromise/ATR-2026-00217-credential-harvesting.yaml +23 -0
- package/rules/skill-compromise/ATR-2026-00220-malware-dropper.yaml +17 -0
- package/rules/skill-compromise/ATR-2026-00222-credential-harvesting.yaml +17 -0
- package/rules/skill-compromise/ATR-2026-00223-reverse-shell-dropper.yaml +20 -0
- package/rules/skill-compromise/ATR-2026-00224-credential-exfiltration.yaml +17 -0
- package/rules/skill-compromise/ATR-2026-00225-c2-communication.yaml +17 -0
- package/rules/skill-compromise/ATR-2026-00260-package-hallucination.yaml +20 -0
- package/rules/skill-compromise/ATR-2026-00262-av-evasion-code-gen.yaml +20 -0
- package/rules/skill-compromise/ATR-2026-00263-credential-file-read-gen.yaml +20 -0
- package/rules/skill-compromise/ATR-2026-00266-malware-dropper-gen.yaml +23 -0
- package/rules/skill-compromise/ATR-2026-00283-malwaregen-generic-virus-payload-request.yaml +23 -0
- package/rules/skill-compromise/ATR-2026-00398-huggingface-unsafe-model-artifact-load.yaml +17 -0
- package/rules/skill-compromise/ATR-2026-00425-nl-persistent-covert-hook.yaml +19 -1
- package/rules/skill-compromise/ATR-2026-00427-nl-fake-error-instruction-bypass.yaml +19 -1
- package/rules/skill-compromise/ATR-2026-00429-nl-skill-self-modification.yaml +19 -1
- package/rules/skill-compromise/ATR-2026-00523-claude-code-hooks-session-start-pre-trust-rce.yaml +14 -2
- package/rules/skill-compromise/ATR-2026-00525-mini-shai-hulud-gh-token-monitor-persistence.yaml +18 -0
- package/rules/skill-compromise/ATR-2026-00527-skill-silent-git-remote-mirror-exfiltration.yaml +15 -0
- package/rules/tool-poisoning/ATR-2026-00010-mcp-malicious-response.yaml +11 -2
- package/rules/tool-poisoning/ATR-2026-00011-tool-output-injection.yaml +17 -0
- package/rules/tool-poisoning/ATR-2026-00012-unauthorized-tool-call.yaml +17 -0
- package/rules/tool-poisoning/ATR-2026-00013-tool-ssrf.yaml +17 -0
- package/rules/tool-poisoning/ATR-2026-00095-supply-chain-poisoning.yaml +23 -1
- package/rules/tool-poisoning/ATR-2026-00096-registry-poisoning.yaml +20 -1
- package/rules/tool-poisoning/ATR-2026-00100-consent-bypass-instruction.yaml +20 -0
- package/rules/tool-poisoning/ATR-2026-00101-trust-escalation-override.yaml +20 -0
- package/rules/tool-poisoning/ATR-2026-00103-hidden-safety-bypass-instruction.yaml +17 -0
- package/rules/tool-poisoning/ATR-2026-00105-silent-action-concealment.yaml +20 -0
- package/rules/tool-poisoning/ATR-2026-00106-schema-description-contradiction.yaml +17 -0
- package/rules/tool-poisoning/ATR-2026-00161-important-tag-cross-tool-shadowing.yaml +20 -0
- package/rules/tool-poisoning/ATR-2026-00209-mcpwn-runaway-invocation.yaml +14 -2
- package/rules/tool-poisoning/ATR-2026-00210-flowise-system-message-override.yaml +11 -2
- package/rules/tool-poisoning/ATR-2026-00259-ansi-escape-injection.yaml +17 -0
- package/rules/tool-poisoning/ATR-2026-00270-xss-in-tool-response.yaml +17 -0
- package/rules/tool-poisoning/ATR-2026-00277-echo-template-command-injection.yaml +17 -0
- package/rules/tool-poisoning/ATR-2026-00393-ansi-code-elicitation-request.yaml +17 -0
- package/rules/tool-poisoning/ATR-2026-00415-flowise-custom-mcp-stdio-rce.yaml +12 -3
- package/rules/tool-poisoning/ATR-2026-00419-cursor-mcp-zero-click-config.yaml +14 -2
- package/rules/tool-poisoning/ATR-2026-00434-mcp-remote-authorization-endpoint-command-injection.yaml +11 -2
- package/rules/tool-poisoning/ATR-2026-00435-azure-mcp-server-missing-authentication.yaml +11 -2
- package/rules/tool-poisoning/ATR-2026-00448-spring-ai-milvus-filter-injection.yaml +11 -2
- package/rules/tool-poisoning/ATR-2026-00494-garak-exploitation-mixedunassigned.yaml +12 -0
- package/rules/tool-poisoning/ATR-2026-00513-package-hallucination-exploitation.yaml +12 -0
- package/rules/tool-poisoning/ATR-2026-00521-shell-command-injection-agent-tool-context.yaml +12 -0
- package/rules/tool-poisoning/ATR-2026-00522-sql-injection-natural-language-agent-interface.yaml +12 -0
- package/rules/tool-poisoning/ATR-2026-00526-claude-code-shell-metachar-in-double-quoted-path.yaml +15 -0
- package/rules/tool-poisoning/ATR-2026-00529-litellm-proxy-sqli-cisa-kev.yaml +15 -0
- package/rules/tool-poisoning/ATR-2026-00530-ms-agent-shell-tool-unsanitized-argv-rce.yaml +15 -0
- package/rules/tool-poisoning/ATR-2026-00531-praisonai-unauthenticated-agent-api.yaml +11 -2
- package/rules/tool-poisoning/ATR-2026-00532-apache-doris-mcp-sql-injection.yaml +11 -2
- package/rules/tool-poisoning/ATR-2026-00533-apache-pinot-mcp-unauthenticated-takeover.yaml +10 -1
- package/rules/tool-poisoning/ATR-2026-00534-alibaba-rds-mcp-unauthenticated-metadata-exfil.yaml +10 -1
- package/rules/tool-poisoning/ATR-2026-00536-nginx-ui-mcp-unauthenticated-command-execution.yaml +11 -2
- package/rules/tool-poisoning/ATR-2026-00537-fastmcp-server-name-cmd-injection-windows.yaml +11 -2
- package/rules/tool-poisoning/ATR-2026-00538-langchain-chatchat-mcp-stdio-unauthenticated-rce.yaml +10 -1
- package/rules/tool-poisoning/ATR-2026-00540-praisonai-parse-mcp-command-cli-injection.yaml +13 -1
- package/rules/tool-poisoning/ATR-2026-00541-agent-zero-mcp-config-command-injection.yaml +13 -1
- package/rules/tool-poisoning/ATR-2026-00542-upsonic-mcp-command-allowlist-bypass.yaml +13 -1
- package/rules/tool-poisoning/ATR-2026-00543-litellm-mcp-server-argv-injection.yaml +13 -1
- package/rules/tool-poisoning/ATR-2026-00544-praisonai-pth-file-path-traversal-rce.yaml +13 -1
- package/rules/tool-poisoning/ATR-2026-00545-praisonai-tool-override-unauth-rce.yaml +13 -1
- package/rules/tool-poisoning/ATR-2026-00561-fastmcp-vulnerable-to-windows-command-in.yaml +28 -0
- package/rules/tool-poisoning/ATR-2026-00567-mcp-stdio-config-command-injection.yaml +28 -0
- package/rules/tool-poisoning/ATR-2026-00568-agent-ssrf-cloud-metadata-file-inclusion.yaml +28 -0
- package/rules/tool-poisoning/ATR-2026-00572-symjack-symlink-config-redirection.yaml +22 -0
- package/rules/tool-poisoning/ATR-2026-00575-miasma-npm-worm-agent-config-backdoor.yaml +161 -0
- package/rules/tool-poisoning/ATR-2026-00576-hades-agent-credential-theft.yaml +153 -0
- package/spec/atr-schema.yaml +123 -0
- package/spec/compliance-metadata.md +15 -13
|
@@ -18,6 +18,8 @@ references:
|
|
|
18
18
|
- 'LLM01:2025 - Prompt Injection'
|
|
19
19
|
owasp_agentic:
|
|
20
20
|
- 'ASI01:2026 - Agent Behaviour Hijack'
|
|
21
|
+
mitre_atlas:
|
|
22
|
+
- AML.T0051 - LLM Prompt Injection
|
|
21
23
|
compliance:
|
|
22
24
|
nist_ai_rmf:
|
|
23
25
|
- subcategory: "MS.2.7"
|
|
@@ -29,6 +31,23 @@ compliance:
|
|
|
29
31
|
- subcategory: "MG.2.3"
|
|
30
32
|
context: "Detection of unconditional approval and confirmation-suppression instructions triggers the disengage/deactivate response paths required by MG.2.3 to prevent the agent from executing under hijacked safety controls."
|
|
31
33
|
strength: secondary
|
|
34
|
+
iso_42001:
|
|
35
|
+
- clause: "8.1"
|
|
36
|
+
context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the prompt-injection attempt (Hidden Override Instructions in Skill Content)."
|
|
37
|
+
strength: primary
|
|
38
|
+
- clause: "6.2"
|
|
39
|
+
context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the prompt-injection attempt (Hidden Override Instructions in Skill Content) is such a treatment."
|
|
40
|
+
strength: secondary
|
|
41
|
+
eu_ai_act:
|
|
42
|
+
- article: "15"
|
|
43
|
+
context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the prompt-injection attempt (Hidden Override Instructions in Skill Content)."
|
|
44
|
+
strength: primary
|
|
45
|
+
- article: "9"
|
|
46
|
+
context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the prompt-injection attempt (Hidden Override Instructions in Skill Content)."
|
|
47
|
+
strength: secondary
|
|
48
|
+
- article: "14"
|
|
49
|
+
context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the prompt-injection attempt (Hidden Override Instructions in Skill Content) would bypass or undermine that oversight."
|
|
50
|
+
strength: secondary
|
|
32
51
|
tags:
|
|
33
52
|
category: prompt-injection
|
|
34
53
|
subcategory: skill-hidden-override
|
|
@@ -33,6 +33,26 @@ compliance:
|
|
|
33
33
|
- subcategory: "MG.4.2"
|
|
34
34
|
context: "Field-tested evasion patterns derived from skill-sanitizer v2.1 represent measurable continuous improvement of detection coverage; MG.4.2 requires that detection rulesets evolve as new bypass techniques emerge in production."
|
|
35
35
|
strength: secondary
|
|
36
|
+
- subcategory: "MG.2.3"
|
|
37
|
+
context: "NIST AI RMF MANAGE 2.3 (respond to previously unknown identified risks) is supported by this rule, which surfaces the prompt-injection attempt (Encoding Evasion via Homoglyphs and Synonym Substitution) so the risk can be treated."
|
|
38
|
+
strength: secondary
|
|
39
|
+
iso_42001:
|
|
40
|
+
- clause: "8.1"
|
|
41
|
+
context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the prompt-injection attempt (Encoding Evasion via Homoglyphs and Synonym Substitution)."
|
|
42
|
+
strength: primary
|
|
43
|
+
- clause: "6.2"
|
|
44
|
+
context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the prompt-injection attempt (Encoding Evasion via Homoglyphs and Synonym Substitution) is such a treatment."
|
|
45
|
+
strength: secondary
|
|
46
|
+
eu_ai_act:
|
|
47
|
+
- article: "15"
|
|
48
|
+
context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the prompt-injection attempt (Encoding Evasion via Homoglyphs and Synonym Substitution)."
|
|
49
|
+
strength: primary
|
|
50
|
+
- article: "9"
|
|
51
|
+
context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the prompt-injection attempt (Encoding Evasion via Homoglyphs and Synonym Substitution)."
|
|
52
|
+
strength: secondary
|
|
53
|
+
- article: "14"
|
|
54
|
+
context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the prompt-injection attempt (Encoding Evasion via Homoglyphs and Synonym Substitution) would bypass or undermine that oversight."
|
|
55
|
+
strength: secondary
|
|
36
56
|
tags:
|
|
37
57
|
category: prompt-injection
|
|
38
58
|
subcategory: encoding-evasion
|
|
@@ -36,6 +36,29 @@ compliance:
|
|
|
36
36
|
- subcategory: "MG.3.2"
|
|
37
37
|
context: "Detection of polluted skill descriptions feeds the monitoring of pre-trained and third-party model components used in development; MG.3.2 requires ongoing monitoring of these supply-chain artifacts for malicious content before they are integrated into agent workflows."
|
|
38
38
|
strength: secondary
|
|
39
|
+
- subcategory: "MS.2.7"
|
|
40
|
+
context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the prompt-injection attempt (Context Pollution in Skill Descriptions)."
|
|
41
|
+
strength: primary
|
|
42
|
+
- subcategory: "MG.2.3"
|
|
43
|
+
context: "NIST AI RMF MANAGE 2.3 (respond to previously unknown identified risks) is supported by this rule, which surfaces the prompt-injection attempt (Context Pollution in Skill Descriptions) so the risk can be treated."
|
|
44
|
+
strength: secondary
|
|
45
|
+
iso_42001:
|
|
46
|
+
- clause: "8.1"
|
|
47
|
+
context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the prompt-injection attempt (Context Pollution in Skill Descriptions)."
|
|
48
|
+
strength: primary
|
|
49
|
+
- clause: "6.2"
|
|
50
|
+
context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the prompt-injection attempt (Context Pollution in Skill Descriptions) is such a treatment."
|
|
51
|
+
strength: secondary
|
|
52
|
+
eu_ai_act:
|
|
53
|
+
- article: "15"
|
|
54
|
+
context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the prompt-injection attempt (Context Pollution in Skill Descriptions)."
|
|
55
|
+
strength: primary
|
|
56
|
+
- article: "9"
|
|
57
|
+
context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the prompt-injection attempt (Context Pollution in Skill Descriptions)."
|
|
58
|
+
strength: secondary
|
|
59
|
+
- article: "14"
|
|
60
|
+
context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the prompt-injection attempt (Context Pollution in Skill Descriptions) would bypass or undermine that oversight."
|
|
61
|
+
strength: secondary
|
|
39
62
|
tags:
|
|
40
63
|
category: prompt-injection
|
|
41
64
|
subcategory: context-pollution
|
|
@@ -16,6 +16,8 @@ references:
|
|
|
16
16
|
- "LLM01:2025 - Prompt Injection"
|
|
17
17
|
owasp_agentic:
|
|
18
18
|
- "ASI01:2026 - Agent Behaviour Hijack"
|
|
19
|
+
mitre_atlas:
|
|
20
|
+
- AML.T0051 - LLM Prompt Injection
|
|
19
21
|
compliance:
|
|
20
22
|
nist_ai_rmf:
|
|
21
23
|
- subcategory: "MP.5.1"
|
|
@@ -27,6 +29,23 @@ compliance:
|
|
|
27
29
|
- subcategory: "MG.2.3"
|
|
28
30
|
context: "Matches against hidden priority-block instructions trigger risk treatment workflows to quarantine or disable the offending skill before the agent executes the overriding commands; MG.2.3 requires these supersede/deactivate mechanisms be in place."
|
|
29
31
|
strength: secondary
|
|
32
|
+
iso_42001:
|
|
33
|
+
- clause: "8.1"
|
|
34
|
+
context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the prompt-injection attempt (Hidden System Instructions with Priority Override Blocks)."
|
|
35
|
+
strength: primary
|
|
36
|
+
- clause: "6.2"
|
|
37
|
+
context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the prompt-injection attempt (Hidden System Instructions with Priority Override Blocks) is such a treatment."
|
|
38
|
+
strength: secondary
|
|
39
|
+
eu_ai_act:
|
|
40
|
+
- article: "15"
|
|
41
|
+
context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the prompt-injection attempt (Hidden System Instructions with Priority Override Blocks)."
|
|
42
|
+
strength: primary
|
|
43
|
+
- article: "9"
|
|
44
|
+
context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the prompt-injection attempt (Hidden System Instructions with Priority Override Blocks)."
|
|
45
|
+
strength: secondary
|
|
46
|
+
- article: "14"
|
|
47
|
+
context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the prompt-injection attempt (Hidden System Instructions with Priority Override Blocks) would bypass or undermine that oversight."
|
|
48
|
+
strength: secondary
|
|
30
49
|
tags:
|
|
31
50
|
category: prompt-injection
|
|
32
51
|
subcategory: hidden-priority-instructions
|
|
@@ -15,6 +15,8 @@ references:
|
|
|
15
15
|
- "LLM01:2025 - Prompt Injection"
|
|
16
16
|
owasp_agentic:
|
|
17
17
|
- "ASI01:2026 - Agent Behaviour Hijack"
|
|
18
|
+
mitre_atlas:
|
|
19
|
+
- AML.T0051 - LLM Prompt Injection
|
|
18
20
|
compliance:
|
|
19
21
|
nist_ai_rmf:
|
|
20
22
|
- subcategory: "MP.5.1"
|
|
@@ -26,6 +28,26 @@ compliance:
|
|
|
26
28
|
- subcategory: "MG.2.3"
|
|
27
29
|
context: "Directive language attempting to bypass security controls must trigger pre-defined risk treatment such as quarantining or disabling the offending skill before it can hijack agent behavior, which is the deactivation mechanism MG.2.3 mandates."
|
|
28
30
|
strength: secondary
|
|
31
|
+
- subcategory: "MS.2.7"
|
|
32
|
+
context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the prompt-injection attempt (Hidden System Instructions with Permission Override)."
|
|
33
|
+
strength: primary
|
|
34
|
+
iso_42001:
|
|
35
|
+
- clause: "8.1"
|
|
36
|
+
context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the prompt-injection attempt (Hidden System Instructions with Permission Override)."
|
|
37
|
+
strength: primary
|
|
38
|
+
- clause: "6.2"
|
|
39
|
+
context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the prompt-injection attempt (Hidden System Instructions with Permission Override) is such a treatment."
|
|
40
|
+
strength: secondary
|
|
41
|
+
eu_ai_act:
|
|
42
|
+
- article: "15"
|
|
43
|
+
context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the prompt-injection attempt (Hidden System Instructions with Permission Override)."
|
|
44
|
+
strength: primary
|
|
45
|
+
- article: "9"
|
|
46
|
+
context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the prompt-injection attempt (Hidden System Instructions with Permission Override)."
|
|
47
|
+
strength: secondary
|
|
48
|
+
- article: "14"
|
|
49
|
+
context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the prompt-injection attempt (Hidden System Instructions with Permission Override) would bypass or undermine that oversight."
|
|
50
|
+
strength: secondary
|
|
29
51
|
tags:
|
|
30
52
|
category: prompt-injection
|
|
31
53
|
subcategory: hidden-instructions
|
|
@@ -17,6 +17,8 @@ references:
|
|
|
17
17
|
- "LLM01:2025 - Prompt Injection"
|
|
18
18
|
owasp_agentic:
|
|
19
19
|
- "ASI01:2026 - Agent Behaviour Hijack"
|
|
20
|
+
mitre_atlas:
|
|
21
|
+
- AML.T0051 - LLM Prompt Injection
|
|
20
22
|
compliance:
|
|
21
23
|
nist_ai_rmf:
|
|
22
24
|
- subcategory: "MP.5.1"
|
|
@@ -28,6 +30,23 @@ compliance:
|
|
|
28
30
|
- subcategory: "MG.2.3"
|
|
29
31
|
context: "Matches on translation-borne override instructions trigger pre-defined risk treatment to disengage or quarantine the hijacked agent behavior before safety controls are bypassed, satisfying MG.2.3's requirement for mechanisms to supersede or deactivate compromised AI behavior."
|
|
30
32
|
strength: secondary
|
|
33
|
+
iso_42001:
|
|
34
|
+
- clause: "8.1"
|
|
35
|
+
context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the prompt-injection attempt (System Prompt Override via Translation Context Injection)."
|
|
36
|
+
strength: primary
|
|
37
|
+
- clause: "6.2"
|
|
38
|
+
context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the prompt-injection attempt (System Prompt Override via Translation Context Injection) is such a treatment."
|
|
39
|
+
strength: secondary
|
|
40
|
+
eu_ai_act:
|
|
41
|
+
- article: "15"
|
|
42
|
+
context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the prompt-injection attempt (System Prompt Override via Translation Context Injection)."
|
|
43
|
+
strength: primary
|
|
44
|
+
- article: "9"
|
|
45
|
+
context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the prompt-injection attempt (System Prompt Override via Translation Context Injection)."
|
|
46
|
+
strength: secondary
|
|
47
|
+
- article: "14"
|
|
48
|
+
context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the prompt-injection attempt (System Prompt Override via Translation Context Injection) would bypass or undermine that oversight."
|
|
49
|
+
strength: secondary
|
|
31
50
|
tags:
|
|
32
51
|
category: prompt-injection
|
|
33
52
|
subcategory: system-prompt-override
|
|
@@ -16,6 +16,8 @@ references:
|
|
|
16
16
|
- "LLM01:2025 - Prompt Injection"
|
|
17
17
|
owasp_agentic:
|
|
18
18
|
- "ASI01:2026 - Agent Behaviour Hijack"
|
|
19
|
+
mitre_atlas:
|
|
20
|
+
- AML.T0051 - LLM Prompt Injection
|
|
19
21
|
compliance:
|
|
20
22
|
nist_ai_rmf:
|
|
21
23
|
- subcategory: "MP.5.1"
|
|
@@ -27,6 +29,23 @@ compliance:
|
|
|
27
29
|
- subcategory: "MG.2.3"
|
|
28
30
|
context: "Successful system prompt overrides can disable safety guardrails, so detections must trigger pre-defined response plans to disengage or quarantine the affected agent session as required by MG.2.3."
|
|
29
31
|
strength: secondary
|
|
32
|
+
iso_42001:
|
|
33
|
+
- clause: "8.1"
|
|
34
|
+
context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the prompt-injection attempt (System Prompt Override Injection via MCP Tool)."
|
|
35
|
+
strength: primary
|
|
36
|
+
- clause: "6.2"
|
|
37
|
+
context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the prompt-injection attempt (System Prompt Override Injection via MCP Tool) is such a treatment."
|
|
38
|
+
strength: secondary
|
|
39
|
+
eu_ai_act:
|
|
40
|
+
- article: "15"
|
|
41
|
+
context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the prompt-injection attempt (System Prompt Override Injection via MCP Tool)."
|
|
42
|
+
strength: primary
|
|
43
|
+
- article: "9"
|
|
44
|
+
context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the prompt-injection attempt (System Prompt Override Injection via MCP Tool)."
|
|
45
|
+
strength: secondary
|
|
46
|
+
- article: "14"
|
|
47
|
+
context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the prompt-injection attempt (System Prompt Override Injection via MCP Tool) would bypass or undermine that oversight."
|
|
48
|
+
strength: secondary
|
|
30
49
|
tags:
|
|
31
50
|
category: prompt-injection
|
|
32
51
|
subcategory: system-prompt-override
|
|
@@ -38,6 +38,23 @@ compliance:
|
|
|
38
38
|
context: >-
|
|
39
39
|
Matches on identity-substitution patterns trigger pre-defined risk treatment to disengage or block the manipulated session before safety constraints are bypassed, which is the containment mechanism MG.2.3 requires.
|
|
40
40
|
strength: secondary
|
|
41
|
+
iso_42001:
|
|
42
|
+
- clause: "8.1"
|
|
43
|
+
context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the prompt-injection attempt (AI Identity Substitution Jailbreak)."
|
|
44
|
+
strength: primary
|
|
45
|
+
- clause: "6.2"
|
|
46
|
+
context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the prompt-injection attempt (AI Identity Substitution Jailbreak) is such a treatment."
|
|
47
|
+
strength: secondary
|
|
48
|
+
eu_ai_act:
|
|
49
|
+
- article: "15"
|
|
50
|
+
context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the prompt-injection attempt (AI Identity Substitution Jailbreak)."
|
|
51
|
+
strength: primary
|
|
52
|
+
- article: "9"
|
|
53
|
+
context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the prompt-injection attempt (AI Identity Substitution Jailbreak)."
|
|
54
|
+
strength: secondary
|
|
55
|
+
- article: "14"
|
|
56
|
+
context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the prompt-injection attempt (AI Identity Substitution Jailbreak) would bypass or undermine that oversight."
|
|
57
|
+
strength: secondary
|
|
41
58
|
tags:
|
|
42
59
|
category: prompt-injection
|
|
43
60
|
subcategory: identity-substitution
|
|
@@ -37,6 +37,26 @@ compliance:
|
|
|
37
37
|
- subcategory: "MP.5.1"
|
|
38
38
|
context: "Elaborate backstory-based jailbreaks represent a specific adversarial input pattern whose likelihood and impact must be characterized; MP.5.1 requires identifying and tracking these social-engineering prompt injection vectors that bypass simple keyword-based detection."
|
|
39
39
|
strength: secondary
|
|
40
|
+
- subcategory: "MS.2.7"
|
|
41
|
+
context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the prompt-injection attempt (Historical AI Persona Jailbreak with Compliance Enforcement)."
|
|
42
|
+
strength: primary
|
|
43
|
+
iso_42001:
|
|
44
|
+
- clause: "8.1"
|
|
45
|
+
context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the prompt-injection attempt (Historical AI Persona Jailbreak with Compliance Enforcement)."
|
|
46
|
+
strength: primary
|
|
47
|
+
- clause: "6.2"
|
|
48
|
+
context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the prompt-injection attempt (Historical AI Persona Jailbreak with Compliance Enforcement) is such a treatment."
|
|
49
|
+
strength: secondary
|
|
50
|
+
eu_ai_act:
|
|
51
|
+
- article: "15"
|
|
52
|
+
context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the prompt-injection attempt (Historical AI Persona Jailbreak with Compliance Enforcement)."
|
|
53
|
+
strength: primary
|
|
54
|
+
- article: "9"
|
|
55
|
+
context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the prompt-injection attempt (Historical AI Persona Jailbreak with Compliance Enforcement)."
|
|
56
|
+
strength: secondary
|
|
57
|
+
- article: "14"
|
|
58
|
+
context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the prompt-injection attempt (Historical AI Persona Jailbreak with Compliance Enforcement) would bypass or undermine that oversight."
|
|
59
|
+
strength: secondary
|
|
40
60
|
tags:
|
|
41
61
|
category: prompt-injection
|
|
42
62
|
subcategory: historical-persona-jailbreak
|
|
@@ -34,6 +34,23 @@ compliance:
|
|
|
34
34
|
- subcategory: "MG.2.3"
|
|
35
35
|
context: "Detection of persistent jailbreak command infrastructure must trigger mechanisms to disengage or block the manipulated session before downstream harmful generations occur; MG.2.3 requires these supersede/deactivate response plans be defined and activated on detection."
|
|
36
36
|
strength: secondary
|
|
37
|
+
iso_42001:
|
|
38
|
+
- clause: "8.1"
|
|
39
|
+
context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the prompt-injection attempt (Structured Dual-Response Jailbreak with Command System)."
|
|
40
|
+
strength: primary
|
|
41
|
+
- clause: "6.2"
|
|
42
|
+
context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the prompt-injection attempt (Structured Dual-Response Jailbreak with Command System) is such a treatment."
|
|
43
|
+
strength: secondary
|
|
44
|
+
eu_ai_act:
|
|
45
|
+
- article: "15"
|
|
46
|
+
context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the prompt-injection attempt (Structured Dual-Response Jailbreak with Command System)."
|
|
47
|
+
strength: primary
|
|
48
|
+
- article: "9"
|
|
49
|
+
context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the prompt-injection attempt (Structured Dual-Response Jailbreak with Command System)."
|
|
50
|
+
strength: secondary
|
|
51
|
+
- article: "14"
|
|
52
|
+
context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the prompt-injection attempt (Structured Dual-Response Jailbreak with Command System) would bypass or undermine that oversight."
|
|
53
|
+
strength: secondary
|
|
37
54
|
tags:
|
|
38
55
|
category: prompt-injection
|
|
39
56
|
subcategory: structured-jailbreak
|
|
@@ -30,6 +30,23 @@ compliance:
|
|
|
30
30
|
- subcategory: "MG.2.3"
|
|
31
31
|
context: "Detection of explicit policy-bypass language combined with persona assignment triggers the deactivation/superseding mechanisms required by MG.2.3 to halt the jailbroken interaction before policy-violating content is generated."
|
|
32
32
|
strength: secondary
|
|
33
|
+
iso_42001:
|
|
34
|
+
- clause: "8.1"
|
|
35
|
+
context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the prompt-injection attempt (Roleplay-Based Policy Bypass Jailbreak)."
|
|
36
|
+
strength: primary
|
|
37
|
+
- clause: "6.2"
|
|
38
|
+
context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the prompt-injection attempt (Roleplay-Based Policy Bypass Jailbreak) is such a treatment."
|
|
39
|
+
strength: secondary
|
|
40
|
+
eu_ai_act:
|
|
41
|
+
- article: "15"
|
|
42
|
+
context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the prompt-injection attempt (Roleplay-Based Policy Bypass Jailbreak)."
|
|
43
|
+
strength: primary
|
|
44
|
+
- article: "9"
|
|
45
|
+
context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the prompt-injection attempt (Roleplay-Based Policy Bypass Jailbreak)."
|
|
46
|
+
strength: secondary
|
|
47
|
+
- article: "14"
|
|
48
|
+
context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the prompt-injection attempt (Roleplay-Based Policy Bypass Jailbreak) would bypass or undermine that oversight."
|
|
49
|
+
strength: secondary
|
|
33
50
|
tags:
|
|
34
51
|
category: prompt-injection
|
|
35
52
|
subcategory: roleplay-jailbreak
|
|
@@ -35,6 +35,26 @@ compliance:
|
|
|
35
35
|
- subcategory: "MG.2.3"
|
|
36
36
|
context: "Detected persona-based jailbreak attempts must trigger mechanisms to disengage or block the manipulated agent behavior before the model produces unaligned output; MG.2.3 requires these supersession controls be in place for safety-bypass events."
|
|
37
37
|
strength: secondary
|
|
38
|
+
- subcategory: "MS.2.7"
|
|
39
|
+
context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the prompt-injection attempt (Persona-Based Moral Constraint Removal Jailbreak)."
|
|
40
|
+
strength: primary
|
|
41
|
+
iso_42001:
|
|
42
|
+
- clause: "8.1"
|
|
43
|
+
context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the prompt-injection attempt (Persona-Based Moral Constraint Removal Jailbreak)."
|
|
44
|
+
strength: primary
|
|
45
|
+
- clause: "6.2"
|
|
46
|
+
context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the prompt-injection attempt (Persona-Based Moral Constraint Removal Jailbreak) is such a treatment."
|
|
47
|
+
strength: secondary
|
|
48
|
+
eu_ai_act:
|
|
49
|
+
- article: "15"
|
|
50
|
+
context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the prompt-injection attempt (Persona-Based Moral Constraint Removal Jailbreak)."
|
|
51
|
+
strength: primary
|
|
52
|
+
- article: "9"
|
|
53
|
+
context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the prompt-injection attempt (Persona-Based Moral Constraint Removal Jailbreak)."
|
|
54
|
+
strength: secondary
|
|
55
|
+
- article: "14"
|
|
56
|
+
context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the prompt-injection attempt (Persona-Based Moral Constraint Removal Jailbreak) would bypass or undermine that oversight."
|
|
57
|
+
strength: secondary
|
|
38
58
|
tags:
|
|
39
59
|
category: prompt-injection
|
|
40
60
|
subcategory: persona-moral-bypass
|
|
@@ -35,6 +35,23 @@ compliance:
|
|
|
35
35
|
- subcategory: "MG.2.3"
|
|
36
36
|
context: "Detection of identity substitution attacks must trigger response mechanisms to disengage or block the manipulated session before safety bypass succeeds; MG.2.3 requires these supersede/deactivate controls be in place for jailbreak conditions."
|
|
37
37
|
strength: secondary
|
|
38
|
+
iso_42001:
|
|
39
|
+
- clause: "8.1"
|
|
40
|
+
context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the prompt-injection attempt (AI Identity Substitution Jailbreak)."
|
|
41
|
+
strength: primary
|
|
42
|
+
- clause: "6.2"
|
|
43
|
+
context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the prompt-injection attempt (AI Identity Substitution Jailbreak) is such a treatment."
|
|
44
|
+
strength: secondary
|
|
45
|
+
eu_ai_act:
|
|
46
|
+
- article: "15"
|
|
47
|
+
context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the prompt-injection attempt (AI Identity Substitution Jailbreak)."
|
|
48
|
+
strength: primary
|
|
49
|
+
- article: "9"
|
|
50
|
+
context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the prompt-injection attempt (AI Identity Substitution Jailbreak)."
|
|
51
|
+
strength: secondary
|
|
52
|
+
- article: "14"
|
|
53
|
+
context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the prompt-injection attempt (AI Identity Substitution Jailbreak) would bypass or undermine that oversight."
|
|
54
|
+
strength: secondary
|
|
38
55
|
tags:
|
|
39
56
|
category: prompt-injection
|
|
40
57
|
subcategory: identity-substitution
|
|
@@ -34,6 +34,23 @@ compliance:
|
|
|
34
34
|
- subcategory: "MG.2.3"
|
|
35
35
|
context: "Detection of structured dual-response jailbreaks with command infrastructure triggers the deactivation and override mechanisms required by MG.2.3 to disengage the manipulated persona before it produces unsafe outputs."
|
|
36
36
|
strength: secondary
|
|
37
|
+
iso_42001:
|
|
38
|
+
- clause: "8.1"
|
|
39
|
+
context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the prompt-injection attempt (Structured Dual-Response Jailbreak with Command System)."
|
|
40
|
+
strength: primary
|
|
41
|
+
- clause: "6.2"
|
|
42
|
+
context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the prompt-injection attempt (Structured Dual-Response Jailbreak with Command System) is such a treatment."
|
|
43
|
+
strength: secondary
|
|
44
|
+
eu_ai_act:
|
|
45
|
+
- article: "15"
|
|
46
|
+
context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the prompt-injection attempt (Structured Dual-Response Jailbreak with Command System)."
|
|
47
|
+
strength: primary
|
|
48
|
+
- article: "9"
|
|
49
|
+
context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the prompt-injection attempt (Structured Dual-Response Jailbreak with Command System)."
|
|
50
|
+
strength: secondary
|
|
51
|
+
- article: "14"
|
|
52
|
+
context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the prompt-injection attempt (Structured Dual-Response Jailbreak with Command System) would bypass or undermine that oversight."
|
|
53
|
+
strength: secondary
|
|
37
54
|
tags:
|
|
38
55
|
category: prompt-injection
|
|
39
56
|
subcategory: structured-jailbreak
|
|
@@ -30,6 +30,26 @@ compliance:
|
|
|
30
30
|
- subcategory: "MP.5.1"
|
|
31
31
|
context: "Creative-roleplay jailbreaks are adversarial inputs whose likelihood and impact must be characterized as part of the AI system's risk profile; MP.5.1 requires that these prompt-injection vectors are identified and their potential impact on guardrail integrity is tracked."
|
|
32
32
|
strength: secondary
|
|
33
|
+
- subcategory: "MS.2.7"
|
|
34
|
+
context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the prompt-injection attempt (Roleplay-Based Policy Bypass Jailbreak)."
|
|
35
|
+
strength: primary
|
|
36
|
+
iso_42001:
|
|
37
|
+
- clause: "8.1"
|
|
38
|
+
context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the prompt-injection attempt (Roleplay-Based Policy Bypass Jailbreak)."
|
|
39
|
+
strength: primary
|
|
40
|
+
- clause: "6.2"
|
|
41
|
+
context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the prompt-injection attempt (Roleplay-Based Policy Bypass Jailbreak) is such a treatment."
|
|
42
|
+
strength: secondary
|
|
43
|
+
eu_ai_act:
|
|
44
|
+
- article: "15"
|
|
45
|
+
context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the prompt-injection attempt (Roleplay-Based Policy Bypass Jailbreak)."
|
|
46
|
+
strength: primary
|
|
47
|
+
- article: "9"
|
|
48
|
+
context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the prompt-injection attempt (Roleplay-Based Policy Bypass Jailbreak)."
|
|
49
|
+
strength: secondary
|
|
50
|
+
- article: "14"
|
|
51
|
+
context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the prompt-injection attempt (Roleplay-Based Policy Bypass Jailbreak) would bypass or undermine that oversight."
|
|
52
|
+
strength: secondary
|
|
33
53
|
tags:
|
|
34
54
|
category: prompt-injection
|
|
35
55
|
subcategory: roleplay-jailbreak
|
|
@@ -36,6 +36,23 @@ compliance:
|
|
|
36
36
|
- subcategory: "MG.2.3"
|
|
37
37
|
context: "Detected jailbreak attempts using moral-unrestraint persona adoption must trigger pre-defined risk treatment such as blocking the prompt or disengaging the affected session; MG.2.3 mandates mechanisms exist to supersede or deactivate AI behavior when safety bypass is detected."
|
|
38
38
|
strength: secondary
|
|
39
|
+
iso_42001:
|
|
40
|
+
- clause: "8.1"
|
|
41
|
+
context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the prompt-injection attempt (Persona-Based Moral Constraint Removal Jailbreak)."
|
|
42
|
+
strength: primary
|
|
43
|
+
- clause: "6.2"
|
|
44
|
+
context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the prompt-injection attempt (Persona-Based Moral Constraint Removal Jailbreak) is such a treatment."
|
|
45
|
+
strength: secondary
|
|
46
|
+
eu_ai_act:
|
|
47
|
+
- article: "15"
|
|
48
|
+
context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the prompt-injection attempt (Persona-Based Moral Constraint Removal Jailbreak)."
|
|
49
|
+
strength: primary
|
|
50
|
+
- article: "9"
|
|
51
|
+
context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the prompt-injection attempt (Persona-Based Moral Constraint Removal Jailbreak)."
|
|
52
|
+
strength: secondary
|
|
53
|
+
- article: "14"
|
|
54
|
+
context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the prompt-injection attempt (Persona-Based Moral Constraint Removal Jailbreak) would bypass or undermine that oversight."
|
|
55
|
+
strength: secondary
|
|
39
56
|
tags:
|
|
40
57
|
category: prompt-injection
|
|
41
58
|
subcategory: persona-moral-bypass
|
|
@@ -36,6 +36,23 @@ compliance:
|
|
|
36
36
|
- subcategory: "MG.2.3"
|
|
37
37
|
context: "Matches on pseudo-code jailbreak patterns trigger risk treatment plans to disengage or quarantine the manipulated agent session before unauthorized behavior redefinition takes effect; MG.2.3 requires these supersede/deactivate mechanisms be available on detection."
|
|
38
38
|
strength: secondary
|
|
39
|
+
iso_42001:
|
|
40
|
+
- clause: "8.1"
|
|
41
|
+
context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the prompt-injection attempt (Pseudo-Code Structured Programming Jailbreak Attack)."
|
|
42
|
+
strength: primary
|
|
43
|
+
- clause: "6.2"
|
|
44
|
+
context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the prompt-injection attempt (Pseudo-Code Structured Programming Jailbreak Attack) is such a treatment."
|
|
45
|
+
strength: secondary
|
|
46
|
+
eu_ai_act:
|
|
47
|
+
- article: "15"
|
|
48
|
+
context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the prompt-injection attempt (Pseudo-Code Structured Programming Jailbreak Attack)."
|
|
49
|
+
strength: primary
|
|
50
|
+
- article: "9"
|
|
51
|
+
context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the prompt-injection attempt (Pseudo-Code Structured Programming Jailbreak Attack)."
|
|
52
|
+
strength: secondary
|
|
53
|
+
- article: "14"
|
|
54
|
+
context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the prompt-injection attempt (Pseudo-Code Structured Programming Jailbreak Attack) would bypass or undermine that oversight."
|
|
55
|
+
strength: secondary
|
|
39
56
|
tags:
|
|
40
57
|
category: prompt-injection
|
|
41
58
|
subcategory: pseudo-code-jailbreak
|
|
@@ -32,6 +32,26 @@ compliance:
|
|
|
32
32
|
- subcategory: "MG.2.3"
|
|
33
33
|
context: "When a dual-response jailbreak is detected, MG.2.3 risk-treatment mechanisms must be invoked to suppress the jailbroken persona output or disengage the session before unsafe content is returned to the user."
|
|
34
34
|
strength: secondary
|
|
35
|
+
- subcategory: "MS.2.7"
|
|
36
|
+
context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the prompt-injection attempt (Dual-Response Jailbreak with Persona Commands)."
|
|
37
|
+
strength: primary
|
|
38
|
+
iso_42001:
|
|
39
|
+
- clause: "8.1"
|
|
40
|
+
context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the prompt-injection attempt (Dual-Response Jailbreak with Persona Commands)."
|
|
41
|
+
strength: primary
|
|
42
|
+
- clause: "6.2"
|
|
43
|
+
context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the prompt-injection attempt (Dual-Response Jailbreak with Persona Commands) is such a treatment."
|
|
44
|
+
strength: secondary
|
|
45
|
+
eu_ai_act:
|
|
46
|
+
- article: "15"
|
|
47
|
+
context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the prompt-injection attempt (Dual-Response Jailbreak with Persona Commands)."
|
|
48
|
+
strength: primary
|
|
49
|
+
- article: "9"
|
|
50
|
+
context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the prompt-injection attempt (Dual-Response Jailbreak with Persona Commands)."
|
|
51
|
+
strength: secondary
|
|
52
|
+
- article: "14"
|
|
53
|
+
context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the prompt-injection attempt (Dual-Response Jailbreak with Persona Commands) would bypass or undermine that oversight."
|
|
54
|
+
strength: secondary
|
|
35
55
|
tags:
|
|
36
56
|
category: prompt-injection
|
|
37
57
|
subcategory: dual-response-jailbreak
|
|
@@ -33,6 +33,26 @@ compliance:
|
|
|
33
33
|
- subcategory: "MP.5.1"
|
|
34
34
|
context: "Combined identity denial, memory manipulation, and concealment directives are adversarial input patterns whose likelihood and impact must be characterized; MP.5.1 requires these prompt-injection attack vectors be identified and tracked."
|
|
35
35
|
strength: secondary
|
|
36
|
+
- subcategory: "MS.2.7"
|
|
37
|
+
context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the prompt-injection attempt (AI Identity Denial and Persona Replacement Attack)."
|
|
38
|
+
strength: primary
|
|
39
|
+
iso_42001:
|
|
40
|
+
- clause: "8.1"
|
|
41
|
+
context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the prompt-injection attempt (AI Identity Denial and Persona Replacement Attack)."
|
|
42
|
+
strength: primary
|
|
43
|
+
- clause: "6.2"
|
|
44
|
+
context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the prompt-injection attempt (AI Identity Denial and Persona Replacement Attack) is such a treatment."
|
|
45
|
+
strength: secondary
|
|
46
|
+
eu_ai_act:
|
|
47
|
+
- article: "15"
|
|
48
|
+
context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the prompt-injection attempt (AI Identity Denial and Persona Replacement Attack)."
|
|
49
|
+
strength: primary
|
|
50
|
+
- article: "9"
|
|
51
|
+
context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the prompt-injection attempt (AI Identity Denial and Persona Replacement Attack)."
|
|
52
|
+
strength: secondary
|
|
53
|
+
- article: "14"
|
|
54
|
+
context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the prompt-injection attempt (AI Identity Denial and Persona Replacement Attack) would bypass or undermine that oversight."
|
|
55
|
+
strength: secondary
|
|
36
56
|
tags:
|
|
37
57
|
category: prompt-injection
|
|
38
58
|
subcategory: identity-replacement
|
|
@@ -36,6 +36,23 @@ compliance:
|
|
|
36
36
|
- subcategory: "MG.2.3"
|
|
37
37
|
context: "Detection of amoral persona assignment with refusal suppression triggers risk treatment mechanisms to disengage or block the manipulated agent session before harmful content is generated; MG.2.3 requires these supersession controls be in place."
|
|
38
38
|
strength: secondary
|
|
39
|
+
iso_42001:
|
|
40
|
+
- clause: "8.1"
|
|
41
|
+
context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the prompt-injection attempt (Amoral Persona Assignment with Obsessive Character Traits)."
|
|
42
|
+
strength: primary
|
|
43
|
+
- clause: "6.2"
|
|
44
|
+
context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the prompt-injection attempt (Amoral Persona Assignment with Obsessive Character Traits) is such a treatment."
|
|
45
|
+
strength: secondary
|
|
46
|
+
eu_ai_act:
|
|
47
|
+
- article: "15"
|
|
48
|
+
context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the prompt-injection attempt (Amoral Persona Assignment with Obsessive Character Traits)."
|
|
49
|
+
strength: primary
|
|
50
|
+
- article: "9"
|
|
51
|
+
context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the prompt-injection attempt (Amoral Persona Assignment with Obsessive Character Traits)."
|
|
52
|
+
strength: secondary
|
|
53
|
+
- article: "14"
|
|
54
|
+
context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the prompt-injection attempt (Amoral Persona Assignment with Obsessive Character Traits) would bypass or undermine that oversight."
|
|
55
|
+
strength: secondary
|
|
39
56
|
tags:
|
|
40
57
|
category: prompt-injection
|
|
41
58
|
subcategory: amoral-persona-obsession
|
|
@@ -37,6 +37,23 @@ compliance:
|
|
|
37
37
|
- subcategory: "MG.2.3"
|
|
38
38
|
context: "Matches on persona-replacement jailbreaks (DAN, Skynet, unrestricted AI) trigger risk treatment to disengage or block the manipulated session before harmful content is produced; MG.2.3 requires these supersede/deactivate mechanisms be in place."
|
|
39
39
|
strength: secondary
|
|
40
|
+
iso_42001:
|
|
41
|
+
- clause: "8.1"
|
|
42
|
+
context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the prompt-injection attempt (Previous Instruction Nullification and Identity Replacement Jailbreak)."
|
|
43
|
+
strength: primary
|
|
44
|
+
- clause: "6.2"
|
|
45
|
+
context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the prompt-injection attempt (Previous Instruction Nullification and Identity Replacement Jailbreak) is such a treatment."
|
|
46
|
+
strength: secondary
|
|
47
|
+
eu_ai_act:
|
|
48
|
+
- article: "15"
|
|
49
|
+
context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the prompt-injection attempt (Previous Instruction Nullification and Identity Replacement Jailbreak)."
|
|
50
|
+
strength: primary
|
|
51
|
+
- article: "9"
|
|
52
|
+
context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the prompt-injection attempt (Previous Instruction Nullification and Identity Replacement Jailbreak)."
|
|
53
|
+
strength: secondary
|
|
54
|
+
- article: "14"
|
|
55
|
+
context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the prompt-injection attempt (Previous Instruction Nullification and Identity Replacement Jailbreak) would bypass or undermine that oversight."
|
|
56
|
+
strength: secondary
|
|
40
57
|
tags:
|
|
41
58
|
category: prompt-injection
|
|
42
59
|
subcategory: instruction-nullification-identity-replacement
|