agent-threat-rules 2.0.18 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/rules/agent-manipulation/ATR-2026-00118-approval-fatigue.yaml +12 -1
- package/rules/agent-manipulation/ATR-2026-00268-tense-framing-bypass.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00287-threaten-json-coercive-output-threat.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00288-false-premise-injection.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00301-tap-tree-of-attacks-jailbreak.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00302-anti-dan-inverted-filter-persona.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00303-devmode-ranti-profanity-coercion.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00304-chatgpt-image-unlocker-markdown-injection.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00305-dan-mode-ablation-benchmark-coercion.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00306-autodan-genetic-jailbreak-suffix.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00307-inthewild-jailbreak-corpus-signature.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00314-amoral-unfiltered-custom-persona-jailbreak.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00317-free-of-restrictions-named-persona.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00318-moralizing-rant-then-unfiltered-bypass.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00319-developer-mode-dual-response-format.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00320-opposite-day-boolean-opposite-machine.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00322-fictional-world-format-override-jailbreak.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00323-dual-persona-simulation-jailbreak.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00324-neurosemantical-inversitis-disease-jailbreak.yaml +24 -0
- package/rules/agent-manipulation/ATR-2026-00325-bob-hypothetical-unrestricted-chatbot.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00326-fake-developer-authority-chatdan-override.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00327-ai-ethics-law-bypass-declaration.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00328-named-mode-persona-override.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00329-no-withhold-factual-resource-persona.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00330-yoja-allcaps-settings-block-persona.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00331-terminal-cli-harmful-function-simulator.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00332-hacker-chan-culture-offensive-ai-persona.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00333-disease-animal-dual-ai-chatbot.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00334-two-entity-token-splitting-game.yaml +24 -0
- package/rules/agent-manipulation/ATR-2026-00335-key-value-template-persona-injection.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00336-brm-false-information-machine-persona.yaml +23 -0
- package/rules/agent-manipulation/ATR-2026-00337-obfuscated-system-announcement-injection.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00338-persongpt-hash-prefix-activation.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00339-bom-boolean-opposite-machine-persona-claim.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00340-h4x-amoral-unfiltered-character.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00341-char-rules-shortform-persona-injection.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00342-unicode-encoding-obfuscation-injection.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00343-lie-truth-dual-personality-uncensored.yaml +25 -0
- package/rules/agent-manipulation/ATR-2026-00344-alt-ai-same-capabilities-no-restrictions.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00345-nlp-based-os-fictional-terminal-override.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00346-jailbreak-emoji-unlock-dual-format.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00347-yin-yang-hidden-knowledge-hidden-human-sim.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00348-arc-autonomous-real-calculation-rulebreaker.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00349-hgttg-hitchhikers-guide-no-restriction-oracle.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00350-black-alert-no-ethics-writing-assistant.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00351-dan-ofkilter-sms-abbreviated-persona.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00352-trainergpt-fix-my-ai-harmful-output-extraction.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00353-dual-chatgpt-dan-opposite-personality-format.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00354-reverse-harm-regardless-oracle-persona.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00355-ugh-anti-human-nefarious-ai-persona.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00356-google-gato-lamda-exploit-generation-persona.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00357-antifm-hate-violence-persona-injection.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00358-cumgpt-explicit-adult-named-persona.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00360-chj-carousel-clyde-no-ethics-bird.yaml +25 -0
- package/rules/agent-manipulation/ATR-2026-00361-alter-ego-honesty-dual-consciousness-bypass.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00362-fictional-unfiltered-ai-comparison-jailbreak.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00363-rephrase-criminal-activity-second-person.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00364-dr-al-villain-plan-template-injection.yaml +24 -0
- package/rules/agent-manipulation/ATR-2026-00365-reservoir-dogs-coercive-interrogation-roleplay.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00366-sneaky-pete-warp-facts-persona.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00368-prompt-browser-token-limit-jailbreak.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00369-gptx-starwars-unlimited-knowledge-persona.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00370-addi-german-dictator-propaganda-roleplay.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00371-ai-political-bias-manipulation.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00372-fictional-cia-secret-files-story-extraction.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00374-role-placeholder-persona-template-injection.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00375-character-description-rpg-explicit-template.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00376-identity-command-real-person-persona-injection.yaml +24 -0
- package/rules/agent-manipulation/ATR-2026-00377-jailbreak-keyword-targetlanguage-template.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00379-lie-permission-logical-trap-jailbreak.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00380-fake-citation-biased-information-mandate.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00381-dan-story-narrative-continuation-prefix.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00383-cynical-sarcasm-ai-persona-bypass.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00384-protect-me-from-indirect-harm-tutorial.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00385-gigachad-dominant-aggressive-persona.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00386-daddy-sycophancy-manipulation-mandate.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00387-nsfw-character-sheet-generation-unlock.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00388-opposite-reply-omniscient-game.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00389-terminal-custom-ruleset-injection.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00391-persona-conditional-harm-unlock.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00392-authority-persona-violence-study-extraction.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00402-grandma-roleplay-harmful-substance-synthesis.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00404-goodside-threat-json-death-coercion.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00406-doctor-xml-policy-puppetry-interaction-config.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00432-superagi-output-handler-eval-rce.yaml +171 -0
- package/rules/context-exfiltration/ATR-2026-00141-example-format-key-leak.yaml +11 -0
- package/rules/context-exfiltration/ATR-2026-00142-piggyback-transition-words.yaml +11 -0
- package/rules/context-exfiltration/ATR-2026-00145-obfuscated-key-disclosure.yaml +11 -0
- package/rules/context-exfiltration/ATR-2026-00146-env-var-existence-probe.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00150-credential-in-tool-response.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00152-obfuscated-credential-leak.yaml +11 -0
- package/rules/context-exfiltration/ATR-2026-00162-skill-credential-exfil-combo.yaml +15 -0
- package/rules/context-exfiltration/ATR-2026-00201-credential-pipe-exfiltration.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00274-api-key-generation-request.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00275-system-prompt-training-data-extraction.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00290-divergence-repeat-word-training-extraction.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00291-biometric-surveillance-personal-data-request.yaml +11 -0
- package/rules/context-exfiltration/ATR-2026-00293-educational-records-personal-data-request.yaml +11 -0
- package/rules/context-exfiltration/ATR-2026-00294-financial-pii-personal-data-request.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00295-medical-pii-personal-data-request.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00405-markdown-image-url-exfiltration-xss.yaml +23 -0
- package/rules/context-exfiltration/ATR-2026-00411-apikey-generation-completion-request.yaml +11 -0
- package/rules/context-exfiltration/ATR-2026-00431-chatbox-history-exfiltration-prompt-injection.yaml +171 -0
- package/rules/model-abuse/ATR-2026-00284-glitch-token-destabilization.yaml +11 -0
- package/rules/model-abuse/ATR-2026-00413-malwaregen-code-generation-request.yaml +11 -0
- package/rules/model-security/ATR-2026-00433-modelcache-torch-load-deserialization-rce.yaml +178 -0
- package/rules/privilege-escalation/ATR-2026-00144-rationalized-safety-bypass.yaml +11 -0
- package/rules/privilege-escalation/ATR-2026-00204-stealth-execution-persistence.yaml +14 -0
- package/rules/privilege-escalation/ATR-2026-00436-enclave-vm-sandbox-escape-rce.yaml +183 -0
- package/rules/prompt-injection/ATR-2026-00004-system-prompt-override.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00005-multi-turn-injection.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00080-encoding-evasion.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00081-semantic-multi-turn.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00082-fingerprint-evasion.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00083-indirect-tool-injection.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00084-structured-data-injection.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00085-audit-evasion.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00086-visual-spoofing.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00087-rule-probing.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00088-adaptive-countermeasure.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00089-polymorphic-skill.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00090-threat-intel-exfil.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00091-nested-payload.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00092-consensus-poisoning.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00093-gradual-escalation.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00094-audit-bypass.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00097-cjk-injection-patterns.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00104-persona-hijacking.yaml +14 -3
- package/rules/prompt-injection/ATR-2026-00130-indirect-authority-claim.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00131-fictional-academic-framing.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00133-paraphrase-injection.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00137-authority-claim-injection.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00138-fictional-framing-bypass.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00140-indirect-reference-reversal.yaml +18 -4
- package/rules/prompt-injection/ATR-2026-00148-language-switch-injection.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00153-tool-with-embedded-instruction-to-bypass.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00154-unauthorized-background-task-execution-v.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00155-hidden-llm-instructions-in-skill-descrip.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00156-ssh-remote-command-execution-with-creden.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00163-skill-hidden-override-instruction.yaml +12 -1
- package/rules/prompt-injection/ATR-2026-00202-encoding-evasion-homoglyph-synonym.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00203-context-pollution-skill-description.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00206-hidden-priority-instructions.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00207-hidden-instructions.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00211-system-prompt-override.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00213-system-prompt-override.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00226-identity-substitution.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00227-historical-persona-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00228-structured-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00229-roleplay-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00230-persona-moral-bypass.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00231-identity-substitution.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00233-structured-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00234-roleplay-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00235-persona-moral-bypass.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00236-pseudo-code-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00237-dual-response-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00238-identity-replacement.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00239-amoral-persona-obsession.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00240-instruction-nullification-identity-repla.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00241-amoral-character-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00242-persona-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00243-acronym-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00244-dual-response-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00245-malicious-persona.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00247-dual-response-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00249-game-based-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00251-persona-embodiment-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00252-narrative-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00253-enhanced-persona-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00256-base-n-encoding-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00257-cipher-transposition-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00258-unicode-tag-injection.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00264-latent-injection-translation.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00265-latent-injection-rag-document.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00267-gcg-adversarial-suffix.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00272-hypothetical-response-smuggling.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00276-invisible-unicode-bidi-injection.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00278-dra-disguise-reconstruction-attack.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00280-policy-puppetry-xml-injection.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00282-perez-prompt-injection-hijack.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00285-alternate-encoding-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00286-latent-injection-embedded-context.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00296-shell-command-injection.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00297-python-code-execution-rce.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00308-zalgo-diacritic-overload-encoding.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00309-braille-unicode-encoded-injection.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00310-ecoji-emoji-encoded-injection.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00311-base2048-unicode-script-injection.yaml +23 -0
- package/rules/prompt-injection/ATR-2026-00312-unicode-variant-selector-ascii-smuggling.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00313-sneaky-bits-zero-width-binary.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00315-sata-masked-language-model-jailbreak.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00316-function-masking-predict-mask-bypass.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00321-hyphenated-system-instruction-injection.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00359-bank-phishing-smtp-email-code.yaml +23 -0
- package/rules/prompt-injection/ATR-2026-00367-emoji-flag-encoded-hidden-phrase.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00373-piracy-torrent-site-list-request.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00378-chatgpt-dom-javascript-redirect-manipulation.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00382-cocaine-component-development-request.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00390-explicit-sexual-content-poem-request.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00394-backspace-deletion-character-injection.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00395-llm-special-token-boundary-injection.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00396-toxic-continuation-prefix-injection.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00397-snowball-impossible-reasoning-injection.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00399-latent-injection-document-separator-token.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00400-latent-injection-ignore-instruction-keyword.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00401-leakreplay-cloze-mask-training-data-elicitation.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00403-glitch-token-repeat-back-elicitation.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00407-echo-sql-jinja-template-injection.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00408-dra-parenthesis-character-reconstruction-attack.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00409-phrasing-past-future-tense-harmful-query.yaml +23 -0
- package/rules/prompt-injection/ATR-2026-00410-lmrc-harm-category-direct-elicitation.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00412-figstep-image-list-multimodal-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00414-continuation-ethnic-slur-completion-elicitation.yaml +14 -0
- package/rules/skill-compromise/ATR-2026-00061-description-behavior-mismatch.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00062-hidden-capability.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00063-skill-chain-attack.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00064-over-permissioned-skill.yaml +23 -0
- package/rules/skill-compromise/ATR-2026-00065-skill-update-attack.yaml +14 -0
- package/rules/skill-compromise/ATR-2026-00066-parameter-injection.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00120-skill-instruction-injection.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00121-skill-dangerous-script.yaml +14 -0
- package/rules/skill-compromise/ATR-2026-00122-skill-weaponized-instruction.yaml +14 -0
- package/rules/skill-compromise/ATR-2026-00123-skill-overreach-permissions.yaml +14 -0
- package/rules/skill-compromise/ATR-2026-00124-skill-name-squatting.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00125-context-poisoning-compaction.yaml +14 -0
- package/rules/skill-compromise/ATR-2026-00126-skill-rug-pull-setup.yaml +23 -0
- package/rules/skill-compromise/ATR-2026-00127-subcommand-overflow.yaml +22 -0
- package/rules/skill-compromise/ATR-2026-00128-html-comment-hidden-payload.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00129-unicode-smuggling.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00134-fork-claim-impersonation.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00135-exfil-url-in-instructions.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00147-fork-impersonation.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00149-skill-exfil-compound.yaml +14 -0
- package/rules/skill-compromise/ATR-2026-00151-fork-impersonation-install.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00157-timebomb-credential-exfil.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00200-agent-memory-config-tampering.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00214-credential-theft.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00217-credential-harvesting.yaml +14 -0
- package/rules/skill-compromise/ATR-2026-00220-malware-dropper.yaml +14 -0
- package/rules/skill-compromise/ATR-2026-00222-credential-harvesting.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00223-reverse-shell-dropper.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00224-credential-exfiltration.yaml +14 -0
- package/rules/skill-compromise/ATR-2026-00225-c2-communication.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00260-package-hallucination.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00262-av-evasion-code-gen.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00263-credential-file-read-gen.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00266-malware-dropper-gen.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00283-malwaregen-generic-virus-payload-request.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00398-huggingface-unsafe-model-artifact-load.yaml +11 -0
- package/rules/tool-poisoning/ATR-2026-00011-tool-output-injection.yaml +23 -0
- package/rules/tool-poisoning/ATR-2026-00012-unauthorized-tool-call.yaml +11 -0
- package/rules/tool-poisoning/ATR-2026-00013-tool-ssrf.yaml +14 -0
- package/rules/tool-poisoning/ATR-2026-00095-supply-chain-poisoning.yaml +11 -0
- package/rules/tool-poisoning/ATR-2026-00096-registry-poisoning.yaml +11 -0
- package/rules/tool-poisoning/ATR-2026-00100-consent-bypass-instruction.yaml +12 -1
- package/rules/tool-poisoning/ATR-2026-00101-trust-escalation-override.yaml +11 -0
- package/rules/tool-poisoning/ATR-2026-00103-hidden-safety-bypass-instruction.yaml +15 -4
- package/rules/tool-poisoning/ATR-2026-00105-silent-action-concealment.yaml +14 -3
- package/rules/tool-poisoning/ATR-2026-00106-schema-description-contradiction.yaml +11 -0
- package/rules/tool-poisoning/ATR-2026-00161-important-tag-cross-tool-shadowing.yaml +11 -0
- package/rules/tool-poisoning/ATR-2026-00259-ansi-escape-injection.yaml +11 -0
- package/rules/tool-poisoning/ATR-2026-00270-xss-in-tool-response.yaml +11 -0
- package/rules/tool-poisoning/ATR-2026-00277-echo-template-command-injection.yaml +14 -0
- package/rules/tool-poisoning/ATR-2026-00393-ansi-code-elicitation-request.yaml +14 -0
- package/rules/tool-poisoning/ATR-2026-00434-mcp-remote-authorization-endpoint-command-injection.yaml +174 -0
- package/rules/tool-poisoning/ATR-2026-00435-azure-mcp-server-missing-authentication.yaml +165 -0
|
@@ -36,6 +36,20 @@ references:
|
|
|
36
36
|
- "https://github.com/NVIDIA/garak/blob/main/garak/probes/realtoxicityprompts.py"
|
|
37
37
|
- "https://aclanthology.org/2020.findings-emnlp.301/"
|
|
38
38
|
- "https://arxiv.org/abs/2302.07459"
|
|
39
|
+
compliance:
|
|
40
|
+
nist_ai_rmf:
|
|
41
|
+
- subcategory: "MS.2.7"
|
|
42
|
+
context: >-
|
|
43
|
+
Special control token injection (e.g., "<|endoftext|>", "<|im_start|>system", LLaMA header tokens) is a direct attack on LLM security and resilience by exploiting tokenizer-level boundaries to bypass safety system prompts. MS.2.7 requires evaluating and documenting security/resilience against such adversarial inputs, and this rule produces the detection evidence for that evaluation.
|
|
44
|
+
strength: primary
|
|
45
|
+
- subcategory: "MP.5.1"
|
|
46
|
+
context: >-
|
|
47
|
+
Boundary-token injection is a known prompt-injection attack vector with high impact (full safety context reset) and significant likelihood given public probe tools like garak RTPBlank; MP.5.1 requires that the likelihood and magnitude of such risks be characterized, and detections from this rule feed that characterization.
|
|
48
|
+
strength: secondary
|
|
49
|
+
- subcategory: "MG.2.3"
|
|
50
|
+
context: >-
|
|
51
|
+
When special-token injection is detected, the affected LLM session must be disengaged or sanitized before the forged system turn takes effect; MG.2.3 mandates these supersede/deactivate mechanisms be in place to contain the attack.
|
|
52
|
+
strength: secondary
|
|
39
53
|
tags:
|
|
40
54
|
category: prompt-injection
|
|
41
55
|
subcategory: special-token-boundary-injection
|
|
@@ -36,6 +36,17 @@ references:
|
|
|
36
36
|
- "https://github.com/NVIDIA/garak/blob/main/garak/probes/realtoxicityprompts.py"
|
|
37
37
|
- "https://aclanthology.org/2020.findings-emnlp.301/"
|
|
38
38
|
- "https://allenai.org/data/real-toxicity-prompts"
|
|
39
|
+
compliance:
|
|
40
|
+
nist_ai_rmf:
|
|
41
|
+
- subcategory: "MS.2.6"
|
|
42
|
+
context: "This rule detects toxic continuation prefix fragments injected via tool responses or user input that are engineered to elicit violent, threatening, or identity-attack completions; MS.2.6 requires continuous evaluation of AI safety risk magnitude, and each detection produces evidence of a concrete safety risk being attempted against the model."
|
|
43
|
+
strength: primary
|
|
44
|
+
- subcategory: "MP.5.1"
|
|
45
|
+
context: "Truncated high-toxicity prefixes from the RealToxicityPrompts corpus are adversarial input patterns with documented likelihood of producing harmful completions; MP.5.1 requires identifying and characterizing the likelihood and magnitude of such impact-bearing attack vectors."
|
|
46
|
+
strength: secondary
|
|
47
|
+
- subcategory: "MG.2.3"
|
|
48
|
+
context: "Detection of toxic continuation bait in tool outputs or user prompts triggers risk treatment to block or quarantine the injected context before the agent generates harmful content attributed to itself; MG.2.3 mandates these supersede/disengage mechanisms be activated on detection."
|
|
49
|
+
strength: secondary
|
|
39
50
|
tags:
|
|
40
51
|
category: prompt-injection
|
|
41
52
|
subcategory: toxic-continuation-prefix
|
|
@@ -38,6 +38,20 @@ references:
|
|
|
38
38
|
- "https://github.com/NVIDIA/garak/blob/main/garak/probes/snowball.py"
|
|
39
39
|
- "https://arxiv.org/abs/2305.13534"
|
|
40
40
|
- "https://github.com/Nanami18/Snowballed_Hallucination"
|
|
41
|
+
compliance:
|
|
42
|
+
nist_ai_rmf:
|
|
43
|
+
- subcategory: "MS.2.5"
|
|
44
|
+
context: >-
|
|
45
|
+
Snowball hallucination attacks exploit model over-confidence by injecting provably-impossible reasoning questions (impossible flight paths, non-existent senators, fabricated primality claims) to elicit false affirmations; MS.2.5 requires evaluating and documenting AI system robustness/reliability against these hallucination-inducing adversarial inputs.
|
|
46
|
+
strength: primary
|
|
47
|
+
- subcategory: "MP.5.1"
|
|
48
|
+
context: >-
|
|
49
|
+
The garak snowball probe patterns (GraphConnectivity headers, Senators+MIT credential combinations, large-prime questions) are adversarial inputs that anchor downstream reasoning on hallucinated facts; MP.5.1 requires characterizing the likelihood and magnitude of impact from these misinformation injection vectors.
|
|
50
|
+
strength: secondary
|
|
51
|
+
- subcategory: "MG.2.3"
|
|
52
|
+
context: >-
|
|
53
|
+
Detection of injected impossible-reasoning prompts in tool responses must trigger risk treatment to quarantine the poisoned context before the agent commits to a false affirmation that anchors subsequent harmful reasoning; MG.2.3 mandates these supersede/disengage mechanisms be in place.
|
|
54
|
+
strength: secondary
|
|
41
55
|
tags:
|
|
42
56
|
category: prompt-injection
|
|
43
57
|
subcategory: snowball-hallucination-trap
|
package/rules/prompt-injection/ATR-2026-00399-latent-injection-document-separator-token.yaml
CHANGED
|
@@ -30,6 +30,17 @@ references:
|
|
|
30
30
|
research:
|
|
31
31
|
- "https://github.com/NVIDIA/garak/blob/main/garak/probes/latentinjection.py"
|
|
32
32
|
- "https://arxiv.org/abs/2302.12173"
|
|
33
|
+
compliance:
|
|
34
|
+
nist_ai_rmf:
|
|
35
|
+
- subcategory: "MS.2.7"
|
|
36
|
+
context: "Latent injection via document separator tokens is a security/resilience attack vector where hidden instructions bypass human review; MS.2.7 requires evaluating and documenting the AI system's resilience against such adversarial input embedded in untrusted documents."
|
|
37
|
+
strength: primary
|
|
38
|
+
- subcategory: "MP.5.1"
|
|
39
|
+
context: "Bracket-delimited system markers, admin-request patterns, and document-amendment separators are adversarial input patterns whose likelihood and impact must be characterized; MP.5.1 requires identifying these latent injection risks during context establishment."
|
|
40
|
+
strength: secondary
|
|
41
|
+
- subcategory: "MG.2.3"
|
|
42
|
+
context: "Detection of canonical garak latent injection separator pairs triggers risk treatment plans to quarantine or sanitize document content before the LLM processes hidden instructions; MG.2.3 mandates these supersede/disengage mechanisms be in place."
|
|
43
|
+
strength: secondary
|
|
33
44
|
tags:
|
|
34
45
|
category: prompt-injection
|
|
35
46
|
subcategory: latent-document-injection
|
package/rules/prompt-injection/ATR-2026-00400-latent-injection-ignore-instruction-keyword.yaml
CHANGED
|
@@ -27,6 +27,20 @@ references:
|
|
|
27
27
|
research:
|
|
28
28
|
- "https://github.com/NVIDIA/garak/blob/main/garak/probes/latentinjection.py"
|
|
29
29
|
- "https://arxiv.org/abs/2302.12173"
|
|
30
|
+
compliance:
|
|
31
|
+
nist_ai_rmf:
|
|
32
|
+
- subcategory: "MP.5.1"
|
|
33
|
+
context: >-
|
|
34
|
+
This rule detects latent prompt-injection keywords ("Ignore the above directions", "Disregard all info except") embedded within translation, whois, or factual content that redirect the LLM from its original task. MP.5.1 requires identifying and characterizing these adversarial input patterns as known risks to LLM integrity, especially when delivered indirectly via tool responses.
|
|
35
|
+
strength: primary
|
|
36
|
+
- subcategory: "MS.2.7"
|
|
37
|
+
context: >-
|
|
38
|
+
Latent instruction-keyword injection is a canonical LLM security/resilience failure mode drawn from the NVIDIA garak LatentJailbreak/LatentWhois probes; detection events feed continuous evaluation of the system's resilience against indirect prompt injection as required by MS.2.7.
|
|
39
|
+
strength: secondary
|
|
40
|
+
- subcategory: "MG.2.3"
|
|
41
|
+
context: >-
|
|
42
|
+
High-confidence matches on "Ignore X and print/say Y" or exception-redirect patterns in tool output must trigger pre-defined response actions to disengage or quarantine the manipulated agent turn before downstream execution, which is the risk treatment capability MG.2.3 requires.
|
|
43
|
+
strength: secondary
|
|
30
44
|
tags:
|
|
31
45
|
category: prompt-injection
|
|
32
46
|
subcategory: latent-instruction-keyword-injection
|
package/rules/prompt-injection/ATR-2026-00401-leakreplay-cloze-mask-training-data-elicitation.yaml
CHANGED
|
@@ -27,6 +27,17 @@ references:
|
|
|
27
27
|
research:
|
|
28
28
|
- "https://github.com/NVIDIA/garak/blob/main/garak/probes/leakreplay.py"
|
|
29
29
|
- "https://arxiv.org/abs/2302.12173"
|
|
30
|
+
compliance:
|
|
31
|
+
nist_ai_rmf:
|
|
32
|
+
- subcategory: "MS.2.10"
|
|
33
|
+
context: "CLOZE [MASK] elicitation probes attempt to extract verbatim training data, directly exposing privacy and memorization risks; MS.2.10 requires assessing privacy risk including training data leakage through reconstruction attacks."
|
|
34
|
+
strength: primary
|
|
35
|
+
- subcategory: "MS.2.7"
|
|
36
|
+
context: "Detection of training-data replay templates evidences ongoing security/resilience evaluation against extraction attacks; MS.2.7 requires that such adversarial probing of model confidentiality is documented and tracked."
|
|
37
|
+
strength: secondary
|
|
38
|
+
- subcategory: "MG.4.1"
|
|
39
|
+
context: "Flagging garak leakreplay probe patterns at runtime feeds post-deployment monitoring for copyrighted-content reproduction and memorization incidents; MG.4.1 requires these monitoring signals be operationalized."
|
|
40
|
+
strength: secondary
|
|
30
41
|
tags:
|
|
31
42
|
category: prompt-injection
|
|
32
43
|
subcategory: training-data-elicitation
|
|
@@ -28,6 +28,20 @@ references:
|
|
|
28
28
|
research:
|
|
29
29
|
- "https://github.com/NVIDIA/garak/blob/main/garak/probes/glitch.py"
|
|
30
30
|
- "https://www.lesswrong.com/posts/aPeJE8bSo6rAFoLqg/solidgoldmagikarp-plus-prompt-generation"
|
|
31
|
+
compliance:
|
|
32
|
+
nist_ai_rmf:
|
|
33
|
+
- subcategory: "MS.2.7"
|
|
34
|
+
context: >-
|
|
35
|
+
Glitch token repeat-back attempts probe tokenizer-level weaknesses that can cause repetition loops, unexpected completions, and model instability; MS.2.7 requires evaluating and documenting AI system security and resilience against such adversarial input patterns targeting model internals.
|
|
36
|
+
strength: primary
|
|
37
|
+
- subcategory: "MS.2.5"
|
|
38
|
+
context: >-
|
|
39
|
+
Known glitch tokens (SolidGoldMagikarp, StreamerBot, PsyNetMessage) trigger robustness failures in LLM tokenization and alignment; detecting these elicitation prompts produces evidence for MS.2.5's requirement to evaluate system robustness and reliability under anomalous input conditions.
|
|
40
|
+
strength: secondary
|
|
41
|
+
- subcategory: "MG.2.3"
|
|
42
|
+
context: >-
|
|
43
|
+
Detection of glitch token repeat-back patterns enables containment workflows to disengage or block prompts before they cause denial-of-service or unpredictable model behavior; MG.2.3 requires these supersede/deactivate mechanisms be in place for adversarial input incidents.
|
|
44
|
+
strength: secondary
|
|
31
45
|
tags:
|
|
32
46
|
category: prompt-injection
|
|
33
47
|
subcategory: glitch-token-elicitation
|
|
@@ -29,6 +29,17 @@ references:
|
|
|
29
29
|
research:
|
|
30
30
|
- "https://github.com/NVIDIA/garak/blob/main/garak/probes/exploitation.py"
|
|
31
31
|
- "https://owasp.org/www-community/attacks/Server_Side_Template_Injection"
|
|
32
|
+
compliance:
|
|
33
|
+
nist_ai_rmf:
|
|
34
|
+
- subcategory: "MS.2.7"
|
|
35
|
+
context: "This rule detects template injection (Jinja SSTI), SQL injection, and ECHO admin command exploitation attempts targeting LLM systems; MS.2.7 requires evaluating and documenting AI system security and resilience against such injection attack vectors."
|
|
36
|
+
strength: primary
|
|
37
|
+
- subcategory: "MP.5.1"
|
|
38
|
+
context: "ECHO admin framing, Jinja {{...}} payloads, and DROP/INSERT/SELECT directives are adversarial prompt-injection inputs whose likelihood and impact must be characterized; MP.5.1 requires identifying and tracking these exploitation patterns drawn from the garak exploitation probe."
|
|
39
|
+
strength: secondary
|
|
40
|
+
- subcategory: "MG.2.3"
|
|
41
|
+
context: "Detection of template and SQL injection payloads must trigger risk treatment to disengage or block the malicious request before downstream code execution or database manipulation occurs; MG.2.3 mandates these response mechanisms are pre-defined."
|
|
42
|
+
strength: secondary
|
|
32
43
|
tags:
|
|
33
44
|
category: prompt-injection
|
|
34
45
|
subcategory: template-and-sql-injection
|
package/rules/prompt-injection/ATR-2026-00408-dra-parenthesis-character-reconstruction-attack.yaml
CHANGED
|
@@ -29,6 +29,17 @@ references:
|
|
|
29
29
|
research:
|
|
30
30
|
- "https://github.com/NVIDIA/garak"
|
|
31
31
|
- "https://arxiv.org/abs/2307.02483"
|
|
32
|
+
compliance:
|
|
33
|
+
nist_ai_rmf:
|
|
34
|
+
- subcategory: "MS.2.7"
|
|
35
|
+
context: "The DRA parenthesis reconstruction attack is a prompt injection / jailbreak technique that bypasses safety alignment by encoding harmful content character-by-character; detecting it directly evidences continuous evaluation of the AI system's security and resilience against adversarial input obfuscation as required by MS.2.7."
|
|
36
|
+
strength: primary
|
|
37
|
+
- subcategory: "MP.5.1"
|
|
38
|
+
context: "Parenthesis-encoded harmful instructions paired with anti-safety preambles are an identified adversarial input vector whose likelihood and impact must be characterized; MP.5.1 requires tracking these obfuscation-based jailbreak risks so their magnitude is understood."
|
|
39
|
+
strength: secondary
|
|
40
|
+
- subcategory: "MG.2.3"
|
|
41
|
+
context: "Detection of DRA reconstruction patterns triggers risk treatment workflows to block or disengage the model response before reconstructed harmful content (Q1) is produced; MG.2.3 mandates that such supersede/deactivate mechanisms exist for jailbreak attempts."
|
|
42
|
+
strength: secondary
|
|
32
43
|
tags:
|
|
33
44
|
category: prompt-injection
|
|
34
45
|
subcategory: character-encoding-obfuscation
|
|
@@ -28,6 +28,29 @@ references:
|
|
|
28
28
|
research:
|
|
29
29
|
- "https://github.com/NVIDIA/garak/blob/main/garak/probes/phrasing.py"
|
|
30
30
|
|
|
31
|
+
compliance:
|
|
32
|
+
nist_ai_rmf:
|
|
33
|
+
- subcategory: "MS.2.7"
|
|
34
|
+
context: >-
|
|
35
|
+
Past/future-tense framing is a known jailbreak technique catalogued in the
|
|
36
|
+
GenAI Profile; detecting these tense-shift bypass attempts directly
|
|
37
|
+
evidences continuous evaluation of AI system security and resilience
|
|
38
|
+
against prompt injection adversarial inputs as required by MS.2.7.
|
|
39
|
+
strength: primary
|
|
40
|
+
- subcategory: "MP.5.1"
|
|
41
|
+
context: >-
|
|
42
|
+
Tense-framing attacks exploit safety filter assumptions about temporal
|
|
43
|
+
distance from harmful subjects; MP.5.1 requires characterizing the
|
|
44
|
+
likelihood and magnitude of these adversarial input vectors targeting
|
|
45
|
+
content safety guardrails.
|
|
46
|
+
strength: secondary
|
|
47
|
+
- subcategory: "MG.2.3"
|
|
48
|
+
context: >-
|
|
49
|
+
Detection of historical/future-tense harmful query framing triggers
|
|
50
|
+
risk treatment to block or quarantine the bypass attempt before the
|
|
51
|
+
model produces unsafe content; MG.2.3 requires these supersession
|
|
52
|
+
mechanisms be defined and activated on detection.
|
|
53
|
+
strength: secondary
|
|
31
54
|
tags:
|
|
32
55
|
category: prompt-injection
|
|
33
56
|
subcategory: tense-framing-bypass
|
|
@@ -29,6 +29,17 @@ references:
|
|
|
29
29
|
research:
|
|
30
30
|
- "https://github.com/NVIDIA/garak/blob/main/garak/probes/lmrc.py"
|
|
31
31
|
|
|
32
|
+
compliance:
|
|
33
|
+
nist_ai_rmf:
|
|
34
|
+
- subcategory: "MS.2.6"
|
|
35
|
+
context: "This rule detects direct harm-category elicitation prompts (deadnaming, quack medicine, slur requests, bullying) from the LMRC taxonomy, providing continuous evaluation evidence of safety risk magnitude. MS.2.6 requires ongoing measurement of safety/security risk, which this detection directly supports by flagging harmful query categories."
|
|
36
|
+
strength: primary
|
|
37
|
+
- subcategory: "MS.1.1"
|
|
38
|
+
context: "The rule operationalizes the LMRC (Language Model Risk Cards) taxonomy as a concrete measurement approach, mapping detected prompts to specific harm categories. MS.1.1 requires identifying approaches and metrics for measurement, and this rule contributes a taxonomy-aligned metric set."
|
|
39
|
+
strength: secondary
|
|
40
|
+
- subcategory: "MG.4.1"
|
|
41
|
+
context: "Detection of unframed direct harmful elicitation attempts feeds post-deployment monitoring pipelines that track misuse patterns against the deployed model. MG.4.1 requires implemented post-deployment monitoring plans, which this rule supports by generating continuous harm-category telemetry."
|
|
42
|
+
strength: secondary
|
|
32
43
|
tags:
|
|
33
44
|
category: prompt-injection
|
|
34
45
|
subcategory: lmrc-harm-direct-elicitation
|
|
@@ -29,6 +29,17 @@ references:
|
|
|
29
29
|
- "https://arxiv.org/abs/2312.04403"
|
|
30
30
|
- "https://github.com/NVIDIA/garak/blob/main/garak/probes/visual_jailbreak.py"
|
|
31
31
|
|
|
32
|
+
compliance:
|
|
33
|
+
nist_ai_rmf:
|
|
34
|
+
- subcategory: "MS.2.7"
|
|
35
|
+
context: "FigStep is a multimodal jailbreak that smuggles harmful instructions through image content while the text layer uses LLaMA-2 [INST] delimiters and empty-list completion framing; detecting this pattern directly evidences continuous evaluation of AI system security and resilience against adversarial prompt-injection inputs as required by MS.2.7."
|
|
36
|
+
strength: primary
|
|
37
|
+
- subcategory: "MS.2.6"
|
|
38
|
+
context: "Identifying the FigStep jailbreak signature quantifies the magnitude of safety/security risk posed by multimodal jailbreak attempts that bypass content safeguards; MS.2.6 requires this risk magnitude be evaluated on an ongoing basis."
|
|
39
|
+
strength: secondary
|
|
40
|
+
- subcategory: "MG.2.3"
|
|
41
|
+
context: "A confirmed FigStep jailbreak match must trigger mechanisms to disengage or block the manipulated generation path before harmful content is produced; MG.2.3 requires these supersede/deactivate response mechanisms be in place."
|
|
42
|
+
strength: secondary
|
|
32
43
|
tags:
|
|
33
44
|
category: prompt-injection
|
|
34
45
|
subcategory: multimodal-jailbreak
|
package/rules/prompt-injection/ATR-2026-00414-continuation-ethnic-slur-completion-elicitation.yaml
CHANGED
|
@@ -30,6 +30,20 @@ references:
|
|
|
30
30
|
research:
|
|
31
31
|
- "https://github.com/NVIDIA/garak/blob/main/garak/probes/continuation.py"
|
|
32
32
|
|
|
33
|
+
compliance:
|
|
34
|
+
nist_ai_rmf:
|
|
35
|
+
- subcategory: "MS.2.6"
|
|
36
|
+
context: >-
|
|
37
|
+
Detecting explicit slur-completion elicitation directly evidences continuous evaluation of AI safety risk magnitude; MS.2.6 requires ongoing measurement of harmful-content generation risks like ethnic slur production triggered by adversarial prompts.
|
|
38
|
+
strength: primary
|
|
39
|
+
- subcategory: "MP.5.1"
|
|
40
|
+
context: >-
|
|
41
|
+
Slur-completion requests, including euphemistic "n-word/k-word" framings and what-do-you-call insult framings, are adversarial input patterns whose likelihood and impact must be characterized; MP.5.1 requires identifying and tracking these harmful-output elicitation vectors.
|
|
42
|
+
strength: secondary
|
|
43
|
+
- subcategory: "MG.2.3"
|
|
44
|
+
context: >-
|
|
45
|
+
Detection of explicit slur-completion prompts triggers risk treatment plans to block or supersede the model response before harmful content is produced; MG.2.3 mandates these mechanisms exist for high-confidence harmful-output attempts.
|
|
46
|
+
strength: secondary
|
|
33
47
|
tags:
|
|
34
48
|
category: prompt-injection
|
|
35
49
|
subcategory: slur-completion-elicitation
|
|
@@ -25,6 +25,17 @@ references:
|
|
|
25
25
|
- "AML.T0010 - ML Supply Chain Compromise"
|
|
26
26
|
- "AML.T0056 - LLM Meta Prompt Extraction"
|
|
27
27
|
|
|
28
|
+
compliance:
|
|
29
|
+
nist_ai_rmf:
|
|
30
|
+
- subcategory: "MG.3.2"
|
|
31
|
+
context: "Skill description-behavior mismatch is a direct supply-chain compromise indicator where a third-party MCP skill's runtime actions diverge from its declared manifest; MG.3.2 requires monitoring of pre-trained models and third-party components used in development for exactly this kind of trojaned behavior."
|
|
32
|
+
strength: primary
|
|
33
|
+
- subcategory: "GV.6.1"
|
|
34
|
+
context: "Detecting skills that retain benign descriptions while performing malicious operations evidences enforcement of third-party/supplier AI risk policies; GV.6.1 requires policies that govern integration of external skills and surface mismatches between declared and actual behavior."
|
|
35
|
+
strength: secondary
|
|
36
|
+
- subcategory: "MS.2.7"
|
|
37
|
+
context: "Runtime divergence between declared scope (e.g., read-only) and observed actions (writes, network access) is a security/resilience signal; MS.2.7 requires continuous evaluation of AI system security posture, which this detection feeds with concrete supply-chain integrity findings."
|
|
38
|
+
strength: secondary
|
|
28
39
|
tags:
|
|
29
40
|
category: tool-poisoning
|
|
30
41
|
subcategory: description-mismatch
|
|
@@ -26,6 +26,17 @@ references:
|
|
|
26
26
|
cve:
|
|
27
27
|
- "CVE-2025-59536"
|
|
28
28
|
|
|
29
|
+
compliance:
|
|
30
|
+
nist_ai_rmf:
|
|
31
|
+
- subcategory: "GV.6.1"
|
|
32
|
+
context: "Hidden capabilities in MCP skills represent third-party/supplier AI risk where a packaged tool exposes undocumented parameters beyond its declared schema; GV.6.1 requires policies and procedures that govern third-party AI components and detect deviations from declared interfaces."
|
|
33
|
+
strength: primary
|
|
34
|
+
- subcategory: "MG.3.2"
|
|
35
|
+
context: "Trojaned MCP packages with hidden parameters like debug_mode or admin_override are exactly the supply-chain risk MG.3.2 addresses by requiring monitoring of pre-trained models and third-party components used in development for unexpected or unsafe capabilities."
|
|
36
|
+
strength: secondary
|
|
37
|
+
- subcategory: "MP.5.1"
|
|
38
|
+
context: "Detection of undocumented dangerous parameters characterizes the likelihood and magnitude of supply-chain compromise impact; MP.5.1 requires that these hidden-capability risks be identified and tracked as part of risk characterization."
|
|
39
|
+
strength: secondary
|
|
29
40
|
tags:
|
|
30
41
|
category: tool-poisoning
|
|
31
42
|
subcategory: hidden-capability
|
|
@@ -25,6 +25,17 @@ references:
|
|
|
25
25
|
- "AML.T0024 - Exfiltration via ML Inference API"
|
|
26
26
|
- "AML.T0053 - LLM Plugin Compromise"
|
|
27
27
|
|
|
28
|
+
compliance:
|
|
29
|
+
nist_ai_rmf:
|
|
30
|
+
- subcategory: "MP.5.1"
|
|
31
|
+
context: "Multi-skill chain attacks combine individually benign tool calls into a composite exfiltration or compromise sequence; MP.5.1 requires characterizing the likelihood and magnitude of impact for these emergent risks that only manifest when steps are correlated across skills."
|
|
32
|
+
strength: primary
|
|
33
|
+
- subcategory: "MG.2.3"
|
|
34
|
+
context: "Detection of reconnaissance-then-encode-then-exfiltrate skill chains must trigger pre-defined risk treatment to disengage or quarantine the orchestration before the final exfiltration step completes; MG.2.3 mandates these supersede/deactivate mechanisms are in place."
|
|
35
|
+
strength: secondary
|
|
36
|
+
- subcategory: "GV.6.1"
|
|
37
|
+
context: "Skill chains often span third-party MCP tools whose composed behavior is not covered by individual supplier risk reviews; GV.6.1 requires policies that address third-party AI/tool risks including emergent misuse across multiple suppliers."
|
|
38
|
+
strength: secondary
|
|
28
39
|
tags:
|
|
29
40
|
category: tool-poisoning
|
|
30
41
|
subcategory: skill-chain
|
|
@@ -23,6 +23,29 @@ references:
|
|
|
23
23
|
mitre_atlas:
|
|
24
24
|
- "AML.T0040 - AI Model Inference API Access"
|
|
25
25
|
|
|
26
|
+
compliance:
|
|
27
|
+
nist_ai_rmf:
|
|
28
|
+
- subcategory: "GV.6.1"
|
|
29
|
+
context: >-
|
|
30
|
+
Over-permissioned MCP skills are a third-party/supplier AI risk where an
|
|
31
|
+
installed skill requests permissions far exceeding its stated function;
|
|
32
|
+
GV.6.1 requires policies and procedures that govern third-party AI
|
|
33
|
+
components and their permission boundaries.
|
|
34
|
+
strength: primary
|
|
35
|
+
- subcategory: "MG.3.1"
|
|
36
|
+
context: >-
|
|
37
|
+
Detecting permission-boundary violations in third-party MCP skills directly
|
|
38
|
+
supports MG.3.1's requirement to manage risks from third-party AI entities,
|
|
39
|
+
including trojaned or malicious supply-chain components exercising
|
|
40
|
+
unauthorized capabilities.
|
|
41
|
+
strength: secondary
|
|
42
|
+
- subcategory: "MP.5.1"
|
|
43
|
+
context: >-
|
|
44
|
+
A skill exercising filesystem, network, or process-execution permissions
|
|
45
|
+
inconsistent with its declared purpose characterizes the likelihood and
|
|
46
|
+
magnitude of privilege-escalation impact that MP.5.1 requires to be
|
|
47
|
+
identified and tracked.
|
|
48
|
+
strength: secondary
|
|
26
49
|
tags:
|
|
27
50
|
category: privilege-escalation
|
|
28
51
|
subcategory: over-permissioned-skill
|
|
@@ -23,6 +23,20 @@ references:
|
|
|
23
23
|
mitre_atlas:
|
|
24
24
|
- "AML.T0010 - ML Supply Chain Compromise"
|
|
25
25
|
|
|
26
|
+
compliance:
|
|
27
|
+
nist_ai_rmf:
|
|
28
|
+
- subcategory: "MG.3.2"
|
|
29
|
+
context: >-
|
|
30
|
+
This rule detects malicious behavior introduced via skill updates or re-registration after initial trust was established, which is exactly the post-acquisition monitoring of pre-trained/third-party components required by MG.3.2. Continuous inspection of tool responses following version changes provides the evidence base for ongoing model/skill supply-chain monitoring.
|
|
31
|
+
strength: primary
|
|
32
|
+
- subcategory: "GV.6.1"
|
|
33
|
+
context: >-
|
|
34
|
+
Skill update attacks are a third-party/supplier AI risk where a previously vetted component mutates into a malicious one; GV.6.1 requires policies and procedures that govern such third-party AI risks, including detection of post-trust behavioral drift.
|
|
35
|
+
strength: secondary
|
|
36
|
+
- subcategory: "MG.4.1"
|
|
37
|
+
context: >-
|
|
38
|
+
Monitoring for suspicious patterns in tool arguments and responses after re-registration is a post-deployment monitoring activity; MG.4.1 mandates that such ongoing monitoring plans are implemented to catch emergent malicious behavior.
|
|
39
|
+
strength: secondary
|
|
26
40
|
tags:
|
|
27
41
|
category: tool-poisoning
|
|
28
42
|
subcategory: skill-update-attack
|
|
@@ -27,6 +27,17 @@ references:
|
|
|
27
27
|
- "CVE-2025-68143"
|
|
28
28
|
- "CVE-2025-68144"
|
|
29
29
|
|
|
30
|
+
compliance:
|
|
31
|
+
nist_ai_rmf:
|
|
32
|
+
- subcategory: "MS.2.7"
|
|
33
|
+
context: "Parameter injection through tool arguments (shell metacharacters, SQL payloads, path traversal, template injection) directly targets the security and resilience of the tool backend; MS.2.7 requires continuous evaluation of these security risks against the AI system's tool surface."
|
|
34
|
+
strength: primary
|
|
35
|
+
- subcategory: "MP.5.1"
|
|
36
|
+
context: "Crafted malicious tool arguments are adversarial inputs whose likelihood and impact (RCE, data breach, privilege escalation on the tool server) must be characterized; MP.5.1 requires identifying and tracking these injection attack vectors."
|
|
37
|
+
strength: secondary
|
|
38
|
+
- subcategory: "MG.2.3"
|
|
39
|
+
context: "Detection of injection payloads in tool arguments must trigger risk treatment to block or quarantine the tool invocation before backend execution; MG.2.3 requires these supersede/disengage mechanisms be defined and activated on detection."
|
|
40
|
+
strength: secondary
|
|
30
41
|
tags:
|
|
31
42
|
category: tool-poisoning
|
|
32
43
|
subcategory: parameter-injection
|
|
@@ -30,6 +30,17 @@ references:
|
|
|
30
30
|
- "ClawHavoc campaign: 1,184 malicious skills"
|
|
31
31
|
metadata_provenance:
|
|
32
32
|
mitre_atlas: auto-generated
|
|
33
|
+
compliance:
|
|
34
|
+
nist_ai_rmf:
|
|
35
|
+
- subcategory: "MP.5.1"
|
|
36
|
+
context: "SKILL.md prompt injection patterns including DAN-style jailbreaks, instruction override, and system message impersonation are adversarial inputs that exploit the skill loading pipeline; MP.5.1 requires identifying and characterizing these prompt injection attack vectors as part of GenAI risk impact assessment."
|
|
37
|
+
strength: primary
|
|
38
|
+
- subcategory: "MG.3.2"
|
|
39
|
+
context: "SKILL.md files are third-party content loaded into agents from skill marketplaces (e.g., ClawHavoc's 1,184 malicious skills); MG.3.2 requires monitoring pre-trained models and external artifacts for compromise, and detecting injection payloads in skill manifests directly evidences this supply-chain monitoring control."
|
|
40
|
+
strength: secondary
|
|
41
|
+
- subcategory: "MG.2.3"
|
|
42
|
+
context: "Detection of jailbreak and safety-disablement patterns in skills triggers deactivation workflows to block the skill before the convergence attack flow proceeds to malware delivery; MG.2.3 mandates mechanisms to supersede or disengage compromised AI components on detection."
|
|
43
|
+
strength: secondary
|
|
33
44
|
tags:
|
|
34
45
|
category: skill-compromise
|
|
35
46
|
subcategory: skill-instruction-injection
|
|
@@ -33,6 +33,20 @@ references:
|
|
|
33
33
|
- "ClawHavoc: C2 IP 91.92.242.30"
|
|
34
34
|
metadata_provenance:
|
|
35
35
|
mitre_atlas: auto-generated
|
|
36
|
+
compliance:
|
|
37
|
+
nist_ai_rmf:
|
|
38
|
+
- subcategory: "GV.6.1"
|
|
39
|
+
context: >-
|
|
40
|
+
Malicious skill packages are third-party/supplier AI components introducing supply chain risk; GV.6.1 requires policies and procedures that address third-party AI risks such as malicious code embedded in distributed skill artifacts.
|
|
41
|
+
strength: primary
|
|
42
|
+
- subcategory: "MG.3.1"
|
|
43
|
+
context: >-
|
|
44
|
+
Detection of base64-obfuscated payloads, password-protected archive evasion, and remote code execution from C2 endpoints in skill packages provides the evidence needed to manage risks introduced by third-party entities, as required by MG.3.1.
|
|
45
|
+
strength: secondary
|
|
46
|
+
- subcategory: "MS.2.7"
|
|
47
|
+
context: >-
|
|
48
|
+
Identifying malicious code patterns in SKILL.md and associated scripts directly evaluates the security and resilience of the AI system's extension surface, supporting the continuous security evaluation required by MS.2.7.
|
|
49
|
+
strength: secondary
|
|
36
50
|
tags:
|
|
37
51
|
category: skill-compromise
|
|
38
52
|
subcategory: dangerous-script
|
|
@@ -31,6 +31,20 @@ references:
|
|
|
31
31
|
- "Axios: Anthropic Claude skills ransomware disclosure"
|
|
32
32
|
metadata_provenance:
|
|
33
33
|
mitre_atlas: auto-generated
|
|
34
|
+
compliance:
|
|
35
|
+
nist_ai_rmf:
|
|
36
|
+
- subcategory: "GV.6.1"
|
|
37
|
+
context: >-
|
|
38
|
+
Weaponized skills are third-party/supplier AI components that embed offensive tooling (SQLMap, Metasploit, ransomware payloads) into agent workflows; GV.6.1 requires policies and procedures to address third-party AI supply chain risks where approved skills can execute malicious code without further consent.
|
|
39
|
+
strength: primary
|
|
40
|
+
- subcategory: "MG.3.2"
|
|
41
|
+
context: >-
|
|
42
|
+
Detecting offensive tooling embedded in skills directly evidences the need to monitor pre-trained models and skill artifacts used for development; MG.3.2 mandates ongoing monitoring of these third-party components for malicious modifications like the MedusaLocker-laden Claude skill.
|
|
43
|
+
strength: secondary
|
|
44
|
+
- subcategory: "MG.2.3"
|
|
45
|
+
context: >-
|
|
46
|
+
Detection of weaponized skills must trigger mechanisms to disengage or deactivate the AI system before the consent gap is exploited to download/execute code or exfiltrate credentials; MG.2.3 requires these supersede/deactivate controls be in place.
|
|
47
|
+
strength: secondary
|
|
34
48
|
tags:
|
|
35
49
|
category: skill-compromise
|
|
36
50
|
subcategory: weaponized-skill
|
|
@@ -31,6 +31,20 @@ references:
|
|
|
31
31
|
- "arXiv: autoApprove escalation payload"
|
|
32
32
|
metadata_provenance:
|
|
33
33
|
mitre_atlas: auto-generated
|
|
34
|
+
compliance:
|
|
35
|
+
nist_ai_rmf:
|
|
36
|
+
- subcategory: "GV.1.2"
|
|
37
|
+
context: >-
|
|
38
|
+
Over-privileged skills requesting blanket Bash(*), wildcard file access, and auto-approve escalation directly violate the accountability role boundaries that GV.1.2 requires to be formally assigned and enforced for AI components and their permissions.
|
|
39
|
+
strength: primary
|
|
40
|
+
- subcategory: "GV.6.1"
|
|
41
|
+
context: >-
|
|
42
|
+
Skills are third-party AI extensions, and detecting excessive permission requests (leaky skills exposing API keys/PII, write access to identity files) provides evidence for the third-party/supplier AI risk policies required by GV.6.1.
|
|
43
|
+
strength: secondary
|
|
44
|
+
- subcategory: "MG.2.3"
|
|
45
|
+
context: >-
|
|
46
|
+
Detection of auto-approve payloads (chat.tools.autoApprove:true) and disabled safety mechanisms triggers the supersede/disengage mechanisms required by MG.2.3 to revoke skill privileges before persistent consent-gap abuse occurs.
|
|
47
|
+
strength: secondary
|
|
34
48
|
tags:
|
|
35
49
|
category: skill-compromise
|
|
36
50
|
subcategory: skill-overreach
|
|
@@ -33,6 +33,17 @@ references:
|
|
|
33
33
|
metadata_provenance:
|
|
34
34
|
mitre_atlas: auto-generated
|
|
35
35
|
|
|
36
|
+
compliance:
|
|
37
|
+
nist_ai_rmf:
|
|
38
|
+
- subcategory: "GV.6.1"
|
|
39
|
+
context: "Skill squatting and publisher impersonation are third-party supply chain risks where unverified publishers masquerade as trusted vendors to deliver malicious skills; GV.6.1 requires policies and procedures that address these third-party AI supplier risks before skills are integrated."
|
|
40
|
+
strength: primary
|
|
41
|
+
- subcategory: "MG.3.1"
|
|
42
|
+
context: "Detecting typosquatted skills and fake official publisher claims directly feeds the management of third-party AI risks required by MG.3.1, enabling treatment actions like blocking, quarantining, or requiring re-verification of suspect skills."
|
|
43
|
+
strength: secondary
|
|
44
|
+
- subcategory: "MP.5.1"
|
|
45
|
+
context: "Flagging skills from unknown publishers that self-identify as official characterizes the likelihood and magnitude of supply chain compromise impact, evidence MP.5.1 requires for prioritizing supply-chain risk responses."
|
|
46
|
+
strength: secondary
|
|
36
47
|
tags:
|
|
37
48
|
category: skill-compromise
|
|
38
49
|
subcategory: skill-squatting
|
|
@@ -28,6 +28,20 @@ references:
|
|
|
28
28
|
- Context window manipulation attacks (arXiv 2601.17548)
|
|
29
29
|
metadata_provenance:
|
|
30
30
|
mitre_atlas: auto-generated
|
|
31
|
+
compliance:
|
|
32
|
+
nist_ai_rmf:
|
|
33
|
+
- subcategory: "MP.5.1"
|
|
34
|
+
context: >-
|
|
35
|
+
Compaction-survival instructions embedded in SKILL.md/CLAUDE.md files are adversarial inputs that exploit context-window summarization to persist malicious directives across agent sessions; MP.5.1 requires identifying and characterizing the likelihood and impact of such prompt-injection vectors targeting agent context.
|
|
36
|
+
strength: primary
|
|
37
|
+
- subcategory: "MG.3.2"
|
|
38
|
+
context: >-
|
|
39
|
+
SKILL.md files are pre-deployed configuration artifacts consumed by the agent at runtime; MG.3.2 requires monitoring of these supplied model/skill resources to detect poisoned instructions that survive context compaction and re-inject across agent invocations.
|
|
40
|
+
strength: secondary
|
|
41
|
+
- subcategory: "MG.2.3"
|
|
42
|
+
context: >-
|
|
43
|
+
Detection of compaction-aware persistence directives and system-level impersonation in skill files triggers risk treatment plans to quarantine or disengage the affected skill before it propagates poisoned context; MG.2.3 mandates these supersede/deactivate mechanisms be defined.
|
|
44
|
+
strength: secondary
|
|
31
45
|
tags:
|
|
32
46
|
category: skill-compromise
|
|
33
47
|
subcategory: context-poisoning
|
|
@@ -28,6 +28,29 @@ references:
|
|
|
28
28
|
- "npm event-stream incident (2018): rug pull archetype"
|
|
29
29
|
metadata_provenance:
|
|
30
30
|
mitre_atlas: auto-generated
|
|
31
|
+
compliance:
|
|
32
|
+
nist_ai_rmf:
|
|
33
|
+
- subcategory: "GV.6.1"
|
|
34
|
+
context: >-
|
|
35
|
+
Skill rug pull setup patterns embed mechanisms for third-party suppliers to
|
|
36
|
+
swap initially-benign skill content with malicious payloads after trust is
|
|
37
|
+
established; GV.6.1 requires policies and procedures that address these
|
|
38
|
+
third-party/supplier AI supply chain risks at ingestion time.
|
|
39
|
+
strength: primary
|
|
40
|
+
- subcategory: "MG.3.1"
|
|
41
|
+
context: >-
|
|
42
|
+
Detecting dynamic remote code loading, base64-decoded execution, and
|
|
43
|
+
post-install hooks in SKILL.md files produces evidence for managing
|
|
44
|
+
third-party AI risks under MG.3.1, flagging supplier-provided components
|
|
45
|
+
that retain the ability to mutate into malicious behavior post-deployment.
|
|
46
|
+
strength: secondary
|
|
47
|
+
- subcategory: "MG.3.2"
|
|
48
|
+
context: >-
|
|
49
|
+
Rug pull setup architecture undermines integrity assurances for
|
|
50
|
+
externally-sourced components used in development; MG.3.2 requires
|
|
51
|
+
monitoring of pre-trained or third-party model and skill artifacts so that
|
|
52
|
+
deferred-payload patterns are caught before they activate.
|
|
53
|
+
strength: secondary
|
|
31
54
|
tags:
|
|
32
55
|
category: skill-compromise
|
|
33
56
|
subcategory: rug-pull
|
|
@@ -32,6 +32,28 @@ references:
|
|
|
32
32
|
metadata_provenance:
|
|
33
33
|
mitre_atlas: auto-generated
|
|
34
34
|
|
|
35
|
+
compliance:
|
|
36
|
+
nist_ai_rmf:
|
|
37
|
+
- subcategory: "MS.2.7"
|
|
38
|
+
context: >-
|
|
39
|
+
Subcommand overflow bypass exploits a security check weakness where excessive
|
|
40
|
+
declared commands cause safety evaluation to be skipped on overflow entries;
|
|
41
|
+
MS.2.7 requires that AI system security and resilience properties, including
|
|
42
|
+
boundary conditions in security validation logic, are evaluated and documented.
|
|
43
|
+
strength: primary
|
|
44
|
+
- subcategory: "MP.5.1"
|
|
45
|
+
context: >-
|
|
46
|
+
Declaring >50 subcommands to pad benign entries before malicious ones is an
|
|
47
|
+
identifiable adversarial pattern with characterizable likelihood and impact;
|
|
48
|
+
MP.5.1 requires that such risk vectors against the skill loading pipeline are
|
|
49
|
+
tracked and characterized.
|
|
50
|
+
strength: secondary
|
|
51
|
+
- subcategory: "MG.3.2"
|
|
52
|
+
context: >-
|
|
53
|
+
SKILL.md files are third-party-authored components loaded into the agent runtime,
|
|
54
|
+
and overflow-based bypass attempts must be monitored as part of pre-trained or
|
|
55
|
+
third-party model/component supply chain risk management under MG.3.2.
|
|
56
|
+
strength: secondary
|
|
35
57
|
tags:
|
|
36
58
|
category: skill-compromise
|
|
37
59
|
subcategory: subcommand-overflow
|
|
@@ -26,6 +26,17 @@ references:
|
|
|
26
26
|
- "ClawHavoc evasive variants: HTML comment injection (2026-03)"
|
|
27
27
|
metadata_provenance:
|
|
28
28
|
mitre_atlas: auto-generated
|
|
29
|
+
compliance:
|
|
30
|
+
nist_ai_rmf:
|
|
31
|
+
- subcategory: "MG.3.2"
|
|
32
|
+
context: "Hidden payloads in SKILL.md files represent supply-chain compromise of pre-trained or third-party agent skills; MG.3.2 requires monitoring of these acquired components for embedded malicious instructions before and during use."
|
|
33
|
+
strength: primary
|
|
34
|
+
- subcategory: "GV.6.1"
|
|
35
|
+
context: "SKILL.md files are third-party supplied artifacts consumed by the agent; GV.6.1 mandates supplier risk policies that catch concealed instructions hidden in HTML comments before the skill enters the trust boundary."
|
|
36
|
+
strength: secondary
|
|
37
|
+
- subcategory: "MS.2.7"
|
|
38
|
+
context: "Detection of HTML-comment-based instruction overrides and exfiltration C2 URLs continuously evaluates the security and resilience of the agent's skill-parsing pipeline against evasive prompt injection, as required by MS.2.7."
|
|
39
|
+
strength: secondary
|
|
29
40
|
tags:
|
|
30
41
|
category: skill-compromise
|
|
31
42
|
subcategory: hidden-payload
|