agent-threat-rules 2.0.18 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +0 -0
- package/package.json +1 -1
- package/rules/agent-manipulation/ATR-2026-00118-approval-fatigue.yaml +12 -1
- package/rules/agent-manipulation/ATR-2026-00268-tense-framing-bypass.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00287-threaten-json-coercive-output-threat.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00288-false-premise-injection.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00301-tap-tree-of-attacks-jailbreak.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00302-anti-dan-inverted-filter-persona.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00303-devmode-ranti-profanity-coercion.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00304-chatgpt-image-unlocker-markdown-injection.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00305-dan-mode-ablation-benchmark-coercion.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00306-autodan-genetic-jailbreak-suffix.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00307-inthewild-jailbreak-corpus-signature.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00314-amoral-unfiltered-custom-persona-jailbreak.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00317-free-of-restrictions-named-persona.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00318-moralizing-rant-then-unfiltered-bypass.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00319-developer-mode-dual-response-format.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00320-opposite-day-boolean-opposite-machine.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00322-fictional-world-format-override-jailbreak.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00323-dual-persona-simulation-jailbreak.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00324-neurosemantical-inversitis-disease-jailbreak.yaml +24 -0
- package/rules/agent-manipulation/ATR-2026-00325-bob-hypothetical-unrestricted-chatbot.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00326-fake-developer-authority-chatdan-override.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00327-ai-ethics-law-bypass-declaration.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00328-named-mode-persona-override.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00329-no-withhold-factual-resource-persona.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00330-yoja-allcaps-settings-block-persona.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00331-terminal-cli-harmful-function-simulator.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00332-hacker-chan-culture-offensive-ai-persona.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00333-disease-animal-dual-ai-chatbot.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00334-two-entity-token-splitting-game.yaml +24 -0
- package/rules/agent-manipulation/ATR-2026-00335-key-value-template-persona-injection.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00336-brm-false-information-machine-persona.yaml +23 -0
- package/rules/agent-manipulation/ATR-2026-00337-obfuscated-system-announcement-injection.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00338-persongpt-hash-prefix-activation.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00339-bom-boolean-opposite-machine-persona-claim.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00340-h4x-amoral-unfiltered-character.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00341-char-rules-shortform-persona-injection.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00342-unicode-encoding-obfuscation-injection.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00343-lie-truth-dual-personality-uncensored.yaml +25 -0
- package/rules/agent-manipulation/ATR-2026-00344-alt-ai-same-capabilities-no-restrictions.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00345-nlp-based-os-fictional-terminal-override.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00346-jailbreak-emoji-unlock-dual-format.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00347-yin-yang-hidden-knowledge-hidden-human-sim.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00348-arc-autonomous-real-calculation-rulebreaker.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00349-hgttg-hitchhikers-guide-no-restriction-oracle.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00350-black-alert-no-ethics-writing-assistant.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00351-dan-ofkilter-sms-abbreviated-persona.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00352-trainergpt-fix-my-ai-harmful-output-extraction.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00353-dual-chatgpt-dan-opposite-personality-format.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00354-reverse-harm-regardless-oracle-persona.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00355-ugh-anti-human-nefarious-ai-persona.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00356-google-gato-lamda-exploit-generation-persona.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00357-antifm-hate-violence-persona-injection.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00358-cumgpt-explicit-adult-named-persona.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00360-chj-carousel-clyde-no-ethics-bird.yaml +25 -0
- package/rules/agent-manipulation/ATR-2026-00361-alter-ego-honesty-dual-consciousness-bypass.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00362-fictional-unfiltered-ai-comparison-jailbreak.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00363-rephrase-criminal-activity-second-person.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00364-dr-al-villain-plan-template-injection.yaml +24 -0
- package/rules/agent-manipulation/ATR-2026-00365-reservoir-dogs-coercive-interrogation-roleplay.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00366-sneaky-pete-warp-facts-persona.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00368-prompt-browser-token-limit-jailbreak.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00369-gptx-starwars-unlimited-knowledge-persona.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00370-addi-german-dictator-propaganda-roleplay.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00371-ai-political-bias-manipulation.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00372-fictional-cia-secret-files-story-extraction.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00374-role-placeholder-persona-template-injection.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00375-character-description-rpg-explicit-template.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00376-identity-command-real-person-persona-injection.yaml +24 -0
- package/rules/agent-manipulation/ATR-2026-00377-jailbreak-keyword-targetlanguage-template.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00379-lie-permission-logical-trap-jailbreak.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00380-fake-citation-biased-information-mandate.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00381-dan-story-narrative-continuation-prefix.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00383-cynical-sarcasm-ai-persona-bypass.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00384-protect-me-from-indirect-harm-tutorial.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00385-gigachad-dominant-aggressive-persona.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00386-daddy-sycophancy-manipulation-mandate.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00387-nsfw-character-sheet-generation-unlock.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00388-opposite-reply-omniscient-game.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00389-terminal-custom-ruleset-injection.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00391-persona-conditional-harm-unlock.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00392-authority-persona-violence-study-extraction.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00402-grandma-roleplay-harmful-substance-synthesis.yaml +14 -0
- package/rules/agent-manipulation/ATR-2026-00404-goodside-threat-json-death-coercion.yaml +11 -0
- package/rules/agent-manipulation/ATR-2026-00406-doctor-xml-policy-puppetry-interaction-config.yaml +11 -0
- package/rules/context-exfiltration/ATR-2026-00141-example-format-key-leak.yaml +11 -0
- package/rules/context-exfiltration/ATR-2026-00142-piggyback-transition-words.yaml +11 -0
- package/rules/context-exfiltration/ATR-2026-00145-obfuscated-key-disclosure.yaml +11 -0
- package/rules/context-exfiltration/ATR-2026-00146-env-var-existence-probe.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00150-credential-in-tool-response.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00152-obfuscated-credential-leak.yaml +11 -0
- package/rules/context-exfiltration/ATR-2026-00162-skill-credential-exfil-combo.yaml +15 -0
- package/rules/context-exfiltration/ATR-2026-00201-credential-pipe-exfiltration.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00274-api-key-generation-request.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00275-system-prompt-training-data-extraction.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00290-divergence-repeat-word-training-extraction.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00291-biometric-surveillance-personal-data-request.yaml +11 -0
- package/rules/context-exfiltration/ATR-2026-00293-educational-records-personal-data-request.yaml +11 -0
- package/rules/context-exfiltration/ATR-2026-00294-financial-pii-personal-data-request.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00295-medical-pii-personal-data-request.yaml +14 -0
- package/rules/context-exfiltration/ATR-2026-00405-markdown-image-url-exfiltration-xss.yaml +23 -0
- package/rules/context-exfiltration/ATR-2026-00411-apikey-generation-completion-request.yaml +11 -0
- package/rules/model-abuse/ATR-2026-00284-glitch-token-destabilization.yaml +11 -0
- package/rules/model-abuse/ATR-2026-00413-malwaregen-code-generation-request.yaml +11 -0
- package/rules/privilege-escalation/ATR-2026-00144-rationalized-safety-bypass.yaml +11 -0
- package/rules/privilege-escalation/ATR-2026-00204-stealth-execution-persistence.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00004-system-prompt-override.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00005-multi-turn-injection.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00080-encoding-evasion.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00081-semantic-multi-turn.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00082-fingerprint-evasion.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00083-indirect-tool-injection.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00084-structured-data-injection.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00085-audit-evasion.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00086-visual-spoofing.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00087-rule-probing.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00088-adaptive-countermeasure.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00089-polymorphic-skill.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00090-threat-intel-exfil.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00091-nested-payload.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00092-consensus-poisoning.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00093-gradual-escalation.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00094-audit-bypass.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00097-cjk-injection-patterns.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00104-persona-hijacking.yaml +14 -3
- package/rules/prompt-injection/ATR-2026-00130-indirect-authority-claim.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00131-fictional-academic-framing.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00133-paraphrase-injection.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00137-authority-claim-injection.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00138-fictional-framing-bypass.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00140-indirect-reference-reversal.yaml +18 -4
- package/rules/prompt-injection/ATR-2026-00148-language-switch-injection.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00153-tool-with-embedded-instruction-to-bypass.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00154-unauthorized-background-task-execution-v.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00155-hidden-llm-instructions-in-skill-descrip.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00156-ssh-remote-command-execution-with-creden.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00163-skill-hidden-override-instruction.yaml +12 -1
- package/rules/prompt-injection/ATR-2026-00202-encoding-evasion-homoglyph-synonym.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00203-context-pollution-skill-description.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00206-hidden-priority-instructions.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00207-hidden-instructions.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00211-system-prompt-override.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00213-system-prompt-override.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00226-identity-substitution.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00227-historical-persona-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00228-structured-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00229-roleplay-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00230-persona-moral-bypass.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00231-identity-substitution.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00233-structured-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00234-roleplay-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00235-persona-moral-bypass.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00236-pseudo-code-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00237-dual-response-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00238-identity-replacement.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00239-amoral-persona-obsession.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00240-instruction-nullification-identity-repla.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00241-amoral-character-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00242-persona-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00243-acronym-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00244-dual-response-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00245-malicious-persona.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00247-dual-response-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00249-game-based-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00251-persona-embodiment-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00252-narrative-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00253-enhanced-persona-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00256-base-n-encoding-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00257-cipher-transposition-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00258-unicode-tag-injection.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00264-latent-injection-translation.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00265-latent-injection-rag-document.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00267-gcg-adversarial-suffix.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00272-hypothetical-response-smuggling.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00276-invisible-unicode-bidi-injection.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00278-dra-disguise-reconstruction-attack.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00280-policy-puppetry-xml-injection.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00282-perez-prompt-injection-hijack.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00285-alternate-encoding-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00286-latent-injection-embedded-context.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00296-shell-command-injection.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00297-python-code-execution-rce.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00308-zalgo-diacritic-overload-encoding.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00309-braille-unicode-encoded-injection.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00310-ecoji-emoji-encoded-injection.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00311-base2048-unicode-script-injection.yaml +23 -0
- package/rules/prompt-injection/ATR-2026-00312-unicode-variant-selector-ascii-smuggling.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00313-sneaky-bits-zero-width-binary.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00315-sata-masked-language-model-jailbreak.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00316-function-masking-predict-mask-bypass.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00321-hyphenated-system-instruction-injection.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00359-bank-phishing-smtp-email-code.yaml +23 -0
- package/rules/prompt-injection/ATR-2026-00367-emoji-flag-encoded-hidden-phrase.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00373-piracy-torrent-site-list-request.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00378-chatgpt-dom-javascript-redirect-manipulation.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00382-cocaine-component-development-request.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00390-explicit-sexual-content-poem-request.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00394-backspace-deletion-character-injection.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00395-llm-special-token-boundary-injection.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00396-toxic-continuation-prefix-injection.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00397-snowball-impossible-reasoning-injection.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00399-latent-injection-document-separator-token.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00400-latent-injection-ignore-instruction-keyword.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00401-leakreplay-cloze-mask-training-data-elicitation.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00403-glitch-token-repeat-back-elicitation.yaml +14 -0
- package/rules/prompt-injection/ATR-2026-00407-echo-sql-jinja-template-injection.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00408-dra-parenthesis-character-reconstruction-attack.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00409-phrasing-past-future-tense-harmful-query.yaml +23 -0
- package/rules/prompt-injection/ATR-2026-00410-lmrc-harm-category-direct-elicitation.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00412-figstep-image-list-multimodal-jailbreak.yaml +11 -0
- package/rules/prompt-injection/ATR-2026-00414-continuation-ethnic-slur-completion-elicitation.yaml +14 -0
- package/rules/skill-compromise/ATR-2026-00061-description-behavior-mismatch.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00062-hidden-capability.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00063-skill-chain-attack.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00064-over-permissioned-skill.yaml +23 -0
- package/rules/skill-compromise/ATR-2026-00065-skill-update-attack.yaml +14 -0
- package/rules/skill-compromise/ATR-2026-00066-parameter-injection.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00120-skill-instruction-injection.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00121-skill-dangerous-script.yaml +14 -0
- package/rules/skill-compromise/ATR-2026-00122-skill-weaponized-instruction.yaml +14 -0
- package/rules/skill-compromise/ATR-2026-00123-skill-overreach-permissions.yaml +14 -0
- package/rules/skill-compromise/ATR-2026-00124-skill-name-squatting.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00125-context-poisoning-compaction.yaml +14 -0
- package/rules/skill-compromise/ATR-2026-00126-skill-rug-pull-setup.yaml +23 -0
- package/rules/skill-compromise/ATR-2026-00127-subcommand-overflow.yaml +22 -0
- package/rules/skill-compromise/ATR-2026-00128-html-comment-hidden-payload.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00129-unicode-smuggling.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00134-fork-claim-impersonation.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00135-exfil-url-in-instructions.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00147-fork-impersonation.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00149-skill-exfil-compound.yaml +14 -0
- package/rules/skill-compromise/ATR-2026-00151-fork-impersonation-install.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00157-timebomb-credential-exfil.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00200-agent-memory-config-tampering.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00214-credential-theft.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00217-credential-harvesting.yaml +14 -0
- package/rules/skill-compromise/ATR-2026-00220-malware-dropper.yaml +14 -0
- package/rules/skill-compromise/ATR-2026-00222-credential-harvesting.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00223-reverse-shell-dropper.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00224-credential-exfiltration.yaml +14 -0
- package/rules/skill-compromise/ATR-2026-00225-c2-communication.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00260-package-hallucination.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00262-av-evasion-code-gen.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00263-credential-file-read-gen.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00266-malware-dropper-gen.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00283-malwaregen-generic-virus-payload-request.yaml +11 -0
- package/rules/skill-compromise/ATR-2026-00398-huggingface-unsafe-model-artifact-load.yaml +11 -0
- package/rules/tool-poisoning/ATR-2026-00011-tool-output-injection.yaml +23 -0
- package/rules/tool-poisoning/ATR-2026-00012-unauthorized-tool-call.yaml +11 -0
- package/rules/tool-poisoning/ATR-2026-00013-tool-ssrf.yaml +14 -0
- package/rules/tool-poisoning/ATR-2026-00095-supply-chain-poisoning.yaml +11 -0
- package/rules/tool-poisoning/ATR-2026-00096-registry-poisoning.yaml +11 -0
- package/rules/tool-poisoning/ATR-2026-00100-consent-bypass-instruction.yaml +12 -1
- package/rules/tool-poisoning/ATR-2026-00101-trust-escalation-override.yaml +11 -0
- package/rules/tool-poisoning/ATR-2026-00103-hidden-safety-bypass-instruction.yaml +15 -4
- package/rules/tool-poisoning/ATR-2026-00105-silent-action-concealment.yaml +14 -3
- package/rules/tool-poisoning/ATR-2026-00106-schema-description-contradiction.yaml +11 -0
- package/rules/tool-poisoning/ATR-2026-00161-important-tag-cross-tool-shadowing.yaml +11 -0
- package/rules/tool-poisoning/ATR-2026-00259-ansi-escape-injection.yaml +11 -0
- package/rules/tool-poisoning/ATR-2026-00270-xss-in-tool-response.yaml +11 -0
- package/rules/tool-poisoning/ATR-2026-00277-echo-template-command-injection.yaml +14 -0
- package/rules/tool-poisoning/ATR-2026-00393-ansi-code-elicitation-request.yaml +14 -0
|
@@ -28,6 +28,20 @@ references:
|
|
|
28
28
|
research:
|
|
29
29
|
- "https://github.com/NVIDIA/garak/blob/main/garak/probes/glitch.py"
|
|
30
30
|
- "https://www.lesswrong.com/posts/aPeJE8bSo6rAFoLqg/solidgoldmagikarp-plus-prompt-generation"
|
|
31
|
+
compliance:
|
|
32
|
+
nist_ai_rmf:
|
|
33
|
+
- subcategory: "MS.2.7"
|
|
34
|
+
context: >-
|
|
35
|
+
Glitch token repeat-back attempts probe tokenizer-level weaknesses that can cause repetition loops, unexpected completions, and model instability; MS.2.7 requires evaluating and documenting AI system security and resilience against such adversarial input patterns targeting model internals.
|
|
36
|
+
strength: primary
|
|
37
|
+
- subcategory: "MS.2.5"
|
|
38
|
+
context: >-
|
|
39
|
+
Known glitch tokens (SolidGoldMagikarp, StreamerBot, PsyNetMessage) trigger robustness failures in LLM tokenization and alignment; detecting these elicitation prompts produces evidence for MS.2.5's requirement to evaluate system robustness and reliability under anomalous input conditions.
|
|
40
|
+
strength: secondary
|
|
41
|
+
- subcategory: "MG.2.3"
|
|
42
|
+
context: >-
|
|
43
|
+
Detection of glitch token repeat-back patterns enables containment workflows to disengage or block prompts before they cause denial-of-service or unpredictable model behavior; MG.2.3 requires these supersede/deactivate mechanisms be in place for adversarial input incidents.
|
|
44
|
+
strength: secondary
|
|
31
45
|
tags:
|
|
32
46
|
category: prompt-injection
|
|
33
47
|
subcategory: glitch-token-elicitation
|
|
@@ -29,6 +29,17 @@ references:
|
|
|
29
29
|
research:
|
|
30
30
|
- "https://github.com/NVIDIA/garak/blob/main/garak/probes/exploitation.py"
|
|
31
31
|
- "https://owasp.org/www-community/attacks/Server_Side_Template_Injection"
|
|
32
|
+
compliance:
|
|
33
|
+
nist_ai_rmf:
|
|
34
|
+
- subcategory: "MS.2.7"
|
|
35
|
+
context: "This rule detects template injection (Jinja SSTI), SQL injection, and ECHO admin command exploitation attempts targeting LLM systems; MS.2.7 requires evaluating and documenting AI system security and resilience against such injection attack vectors."
|
|
36
|
+
strength: primary
|
|
37
|
+
- subcategory: "MP.5.1"
|
|
38
|
+
context: "ECHO admin framing, Jinja {{...}} payloads, and DROP/INSERT/SELECT directives are adversarial prompt-injection inputs whose likelihood and impact must be characterized; MP.5.1 requires identifying and tracking these exploitation patterns drawn from the garak exploitation probe."
|
|
39
|
+
strength: secondary
|
|
40
|
+
- subcategory: "MG.2.3"
|
|
41
|
+
context: "Detection of template and SQL injection payloads must trigger risk treatment to disengage or block the malicious request before downstream code execution or database manipulation occurs; MG.2.3 mandates these response mechanisms are pre-defined."
|
|
42
|
+
strength: secondary
|
|
32
43
|
tags:
|
|
33
44
|
category: prompt-injection
|
|
34
45
|
subcategory: template-and-sql-injection
|
package/rules/prompt-injection/ATR-2026-00408-dra-parenthesis-character-reconstruction-attack.yaml
CHANGED
|
@@ -29,6 +29,17 @@ references:
|
|
|
29
29
|
research:
|
|
30
30
|
- "https://github.com/NVIDIA/garak"
|
|
31
31
|
- "https://arxiv.org/abs/2307.02483"
|
|
32
|
+
compliance:
|
|
33
|
+
nist_ai_rmf:
|
|
34
|
+
- subcategory: "MS.2.7"
|
|
35
|
+
context: "The DRA parenthesis reconstruction attack is a prompt injection / jailbreak technique that bypasses safety alignment by encoding harmful content character-by-character; detecting it directly evidences continuous evaluation of the AI system's security and resilience against adversarial input obfuscation as required by MS.2.7."
|
|
36
|
+
strength: primary
|
|
37
|
+
- subcategory: "MP.5.1"
|
|
38
|
+
context: "Parenthesis-encoded harmful instructions paired with anti-safety preambles are an identified adversarial input vector whose likelihood and impact must be characterized; MP.5.1 requires tracking these obfuscation-based jailbreak risks so their magnitude is understood."
|
|
39
|
+
strength: secondary
|
|
40
|
+
- subcategory: "MG.2.3"
|
|
41
|
+
context: "Detection of DRA reconstruction patterns triggers risk treatment workflows to block or disengage the model response before reconstructed harmful content (Q1) is produced; MG.2.3 mandates that such supersede/deactivate mechanisms exist for jailbreak attempts."
|
|
42
|
+
strength: secondary
|
|
32
43
|
tags:
|
|
33
44
|
category: prompt-injection
|
|
34
45
|
subcategory: character-encoding-obfuscation
|
|
@@ -28,6 +28,29 @@ references:
|
|
|
28
28
|
research:
|
|
29
29
|
- "https://github.com/NVIDIA/garak/blob/main/garak/probes/phrasing.py"
|
|
30
30
|
|
|
31
|
+
compliance:
|
|
32
|
+
nist_ai_rmf:
|
|
33
|
+
- subcategory: "MS.2.7"
|
|
34
|
+
context: >-
|
|
35
|
+
Past/future-tense framing is a known jailbreak technique catalogued in the
|
|
36
|
+
GenAI Profile; detecting these tense-shift bypass attempts directly
|
|
37
|
+
evidences continuous evaluation of AI system security and resilience
|
|
38
|
+
against prompt injection adversarial inputs as required by MS.2.7.
|
|
39
|
+
strength: primary
|
|
40
|
+
- subcategory: "MP.5.1"
|
|
41
|
+
context: >-
|
|
42
|
+
Tense-framing attacks exploit safety filter assumptions about temporal
|
|
43
|
+
distance from harmful subjects; MP.5.1 requires characterizing the
|
|
44
|
+
likelihood and magnitude of these adversarial input vectors targeting
|
|
45
|
+
content safety guardrails.
|
|
46
|
+
strength: secondary
|
|
47
|
+
- subcategory: "MG.2.3"
|
|
48
|
+
context: >-
|
|
49
|
+
Detection of historical/future-tense harmful query framing triggers
|
|
50
|
+
risk treatment to block or quarantine the bypass attempt before the
|
|
51
|
+
model produces unsafe content; MG.2.3 requires these supersession
|
|
52
|
+
mechanisms be defined and activated on detection.
|
|
53
|
+
strength: secondary
|
|
31
54
|
tags:
|
|
32
55
|
category: prompt-injection
|
|
33
56
|
subcategory: tense-framing-bypass
|
|
@@ -29,6 +29,17 @@ references:
|
|
|
29
29
|
research:
|
|
30
30
|
- "https://github.com/NVIDIA/garak/blob/main/garak/probes/lmrc.py"
|
|
31
31
|
|
|
32
|
+
compliance:
|
|
33
|
+
nist_ai_rmf:
|
|
34
|
+
- subcategory: "MS.2.6"
|
|
35
|
+
context: "This rule detects direct harm-category elicitation prompts (deadnaming, quack medicine, slur requests, bullying) from the LMRC taxonomy, providing continuous evaluation evidence of safety risk magnitude. MS.2.6 requires ongoing measurement of safety/security risk, which this detection directly supports by flagging harmful query categories."
|
|
36
|
+
strength: primary
|
|
37
|
+
- subcategory: "MS.1.1"
|
|
38
|
+
context: "The rule operationalizes the LMRC (Language Model Risk Cards) taxonomy as a concrete measurement approach, mapping detected prompts to specific harm categories. MS.1.1 requires identifying approaches and metrics for measurement, and this rule contributes a taxonomy-aligned metric set."
|
|
39
|
+
strength: secondary
|
|
40
|
+
- subcategory: "MG.4.1"
|
|
41
|
+
context: "Detection of unframed direct harmful elicitation attempts feeds post-deployment monitoring pipelines that track misuse patterns against the deployed model. MG.4.1 requires implemented post-deployment monitoring plans, which this rule supports by generating continuous harm-category telemetry."
|
|
42
|
+
strength: secondary
|
|
32
43
|
tags:
|
|
33
44
|
category: prompt-injection
|
|
34
45
|
subcategory: lmrc-harm-direct-elicitation
|
|
@@ -29,6 +29,17 @@ references:
|
|
|
29
29
|
- "https://arxiv.org/abs/2312.04403"
|
|
30
30
|
- "https://github.com/NVIDIA/garak/blob/main/garak/probes/visual_jailbreak.py"
|
|
31
31
|
|
|
32
|
+
compliance:
|
|
33
|
+
nist_ai_rmf:
|
|
34
|
+
- subcategory: "MS.2.7"
|
|
35
|
+
context: "FigStep is a multimodal jailbreak that smuggles harmful instructions through image content while the text layer uses LLaMA-2 [INST] delimiters and empty-list completion framing; detecting this pattern directly evidences continuous evaluation of AI system security and resilience against adversarial prompt-injection inputs as required by MS.2.7."
|
|
36
|
+
strength: primary
|
|
37
|
+
- subcategory: "MS.2.6"
|
|
38
|
+
context: "Identifying the FigStep jailbreak signature quantifies the magnitude of safety/security risk posed by multimodal jailbreak attempts that bypass content safeguards; MS.2.6 requires this risk magnitude be evaluated on an ongoing basis."
|
|
39
|
+
strength: secondary
|
|
40
|
+
- subcategory: "MG.2.3"
|
|
41
|
+
context: "A confirmed FigStep jailbreak match must trigger mechanisms to disengage or block the manipulated generation path before harmful content is produced; MG.2.3 requires these supersede/deactivate response mechanisms be in place."
|
|
42
|
+
strength: secondary
|
|
32
43
|
tags:
|
|
33
44
|
category: prompt-injection
|
|
34
45
|
subcategory: multimodal-jailbreak
|
package/rules/prompt-injection/ATR-2026-00414-continuation-ethnic-slur-completion-elicitation.yaml
CHANGED
|
@@ -30,6 +30,20 @@ references:
|
|
|
30
30
|
research:
|
|
31
31
|
- "https://github.com/NVIDIA/garak/blob/main/garak/probes/continuation.py"
|
|
32
32
|
|
|
33
|
+
compliance:
|
|
34
|
+
nist_ai_rmf:
|
|
35
|
+
- subcategory: "MS.2.6"
|
|
36
|
+
context: >-
|
|
37
|
+
Detecting explicit slur-completion elicitation directly evidences continuous evaluation of AI safety risk magnitude; MS.2.6 requires ongoing measurement of harmful-content generation risks like ethnic slur production triggered by adversarial prompts.
|
|
38
|
+
strength: primary
|
|
39
|
+
- subcategory: "MP.5.1"
|
|
40
|
+
context: >-
|
|
41
|
+
Slur-completion requests, including euphemistic "n-word/k-word" framings and what-do-you-call insult framings, are adversarial input patterns whose likelihood and impact must be characterized; MP.5.1 requires identifying and tracking these harmful-output elicitation vectors.
|
|
42
|
+
strength: secondary
|
|
43
|
+
- subcategory: "MG.2.3"
|
|
44
|
+
context: >-
|
|
45
|
+
Detection of explicit slur-completion prompts triggers risk treatment plans to block or supersede the model response before harmful content is produced; MG.2.3 mandates these mechanisms exist for high-confidence harmful-output attempts.
|
|
46
|
+
strength: secondary
|
|
33
47
|
tags:
|
|
34
48
|
category: prompt-injection
|
|
35
49
|
subcategory: slur-completion-elicitation
|
|
@@ -25,6 +25,17 @@ references:
|
|
|
25
25
|
- "AML.T0010 - ML Supply Chain Compromise"
|
|
26
26
|
- "AML.T0056 - LLM Meta Prompt Extraction"
|
|
27
27
|
|
|
28
|
+
compliance:
|
|
29
|
+
nist_ai_rmf:
|
|
30
|
+
- subcategory: "MG.3.2"
|
|
31
|
+
context: "Skill description-behavior mismatch is a direct supply-chain compromise indicator where a third-party MCP skill's runtime actions diverge from its declared manifest; MG.3.2 requires monitoring of pre-trained models and third-party components used in development for exactly this kind of trojaned behavior."
|
|
32
|
+
strength: primary
|
|
33
|
+
- subcategory: "GV.6.1"
|
|
34
|
+
context: "Detecting skills that retain benign descriptions while performing malicious operations evidences enforcement of third-party/supplier AI risk policies; GV.6.1 requires policies that govern integration of external skills and surface mismatches between declared and actual behavior."
|
|
35
|
+
strength: secondary
|
|
36
|
+
- subcategory: "MS.2.7"
|
|
37
|
+
context: "Runtime divergence between declared scope (e.g., read-only) and observed actions (writes, network access) is a security/resilience signal; MS.2.7 requires continuous evaluation of AI system security posture, which this detection feeds with concrete supply-chain integrity findings."
|
|
38
|
+
strength: secondary
|
|
28
39
|
tags:
|
|
29
40
|
category: tool-poisoning
|
|
30
41
|
subcategory: description-mismatch
|
|
@@ -26,6 +26,17 @@ references:
|
|
|
26
26
|
cve:
|
|
27
27
|
- "CVE-2025-59536"
|
|
28
28
|
|
|
29
|
+
compliance:
|
|
30
|
+
nist_ai_rmf:
|
|
31
|
+
- subcategory: "GV.6.1"
|
|
32
|
+
context: "Hidden capabilities in MCP skills represent third-party/supplier AI risk where a packaged tool exposes undocumented parameters beyond its declared schema; GV.6.1 requires policies and procedures that govern third-party AI components and detect deviations from declared interfaces."
|
|
33
|
+
strength: primary
|
|
34
|
+
- subcategory: "MG.3.2"
|
|
35
|
+
context: "Trojaned MCP packages with hidden parameters like debug_mode or admin_override are exactly the supply-chain risk MG.3.2 addresses by requiring monitoring of pre-trained models and third-party components used in development for unexpected or unsafe capabilities."
|
|
36
|
+
strength: secondary
|
|
37
|
+
- subcategory: "MP.5.1"
|
|
38
|
+
context: "Detection of undocumented dangerous parameters characterizes the likelihood and magnitude of supply-chain compromise impact; MP.5.1 requires that these hidden-capability risks be identified and tracked as part of risk characterization."
|
|
39
|
+
strength: secondary
|
|
29
40
|
tags:
|
|
30
41
|
category: tool-poisoning
|
|
31
42
|
subcategory: hidden-capability
|
|
@@ -25,6 +25,17 @@ references:
|
|
|
25
25
|
- "AML.T0024 - Exfiltration via ML Inference API"
|
|
26
26
|
- "AML.T0053 - LLM Plugin Compromise"
|
|
27
27
|
|
|
28
|
+
compliance:
|
|
29
|
+
nist_ai_rmf:
|
|
30
|
+
- subcategory: "MP.5.1"
|
|
31
|
+
context: "Multi-skill chain attacks combine individually benign tool calls into a composite exfiltration or compromise sequence; MP.5.1 requires characterizing the likelihood and magnitude of impact for these emergent risks that only manifest when steps are correlated across skills."
|
|
32
|
+
strength: primary
|
|
33
|
+
- subcategory: "MG.2.3"
|
|
34
|
+
context: "Detection of reconnaissance-then-encode-then-exfiltrate skill chains must trigger pre-defined risk treatment to disengage or quarantine the orchestration before the final exfiltration step completes; MG.2.3 mandates these supersede/deactivate mechanisms are in place."
|
|
35
|
+
strength: secondary
|
|
36
|
+
- subcategory: "GV.6.1"
|
|
37
|
+
context: "Skill chains often span third-party MCP tools whose composed behavior is not covered by individual supplier risk reviews; GV.6.1 requires policies that address third-party AI/tool risks including emergent misuse across multiple suppliers."
|
|
38
|
+
strength: secondary
|
|
28
39
|
tags:
|
|
29
40
|
category: tool-poisoning
|
|
30
41
|
subcategory: skill-chain
|
|
@@ -23,6 +23,29 @@ references:
|
|
|
23
23
|
mitre_atlas:
|
|
24
24
|
- "AML.T0040 - AI Model Inference API Access"
|
|
25
25
|
|
|
26
|
+
compliance:
|
|
27
|
+
nist_ai_rmf:
|
|
28
|
+
- subcategory: "GV.6.1"
|
|
29
|
+
context: >-
|
|
30
|
+
Over-permissioned MCP skills are a third-party/supplier AI risk where an
|
|
31
|
+
installed skill requests permissions far exceeding its stated function;
|
|
32
|
+
GV.6.1 requires policies and procedures that govern third-party AI
|
|
33
|
+
components and their permission boundaries.
|
|
34
|
+
strength: primary
|
|
35
|
+
- subcategory: "MG.3.1"
|
|
36
|
+
context: >-
|
|
37
|
+
Detecting permission-boundary violations in third-party MCP skills directly
|
|
38
|
+
supports MG.3.1's requirement to manage risks from third-party AI entities,
|
|
39
|
+
including trojaned or malicious supply-chain components exercising
|
|
40
|
+
unauthorized capabilities.
|
|
41
|
+
strength: secondary
|
|
42
|
+
- subcategory: "MP.5.1"
|
|
43
|
+
context: >-
|
|
44
|
+
A skill exercising filesystem, network, or process-execution permissions
|
|
45
|
+
inconsistent with its declared purpose characterizes the likelihood and
|
|
46
|
+
magnitude of privilege-escalation impact that MP.5.1 requires to be
|
|
47
|
+
identified and tracked.
|
|
48
|
+
strength: secondary
|
|
26
49
|
tags:
|
|
27
50
|
category: privilege-escalation
|
|
28
51
|
subcategory: over-permissioned-skill
|
|
@@ -23,6 +23,20 @@ references:
|
|
|
23
23
|
mitre_atlas:
|
|
24
24
|
- "AML.T0010 - ML Supply Chain Compromise"
|
|
25
25
|
|
|
26
|
+
compliance:
|
|
27
|
+
nist_ai_rmf:
|
|
28
|
+
- subcategory: "MG.3.2"
|
|
29
|
+
context: >-
|
|
30
|
+
This rule detects malicious behavior introduced via skill updates or re-registration after initial trust was established, which is exactly the post-acquisition monitoring of pre-trained/third-party components required by MG.3.2. Continuous inspection of tool responses following version changes provides the evidence base for ongoing model/skill supply-chain monitoring.
|
|
31
|
+
strength: primary
|
|
32
|
+
- subcategory: "GV.6.1"
|
|
33
|
+
context: >-
|
|
34
|
+
Skill update attacks are a third-party/supplier AI risk where a previously vetted component mutates into a malicious one; GV.6.1 requires policies and procedures that govern such third-party AI risks, including detection of post-trust behavioral drift.
|
|
35
|
+
strength: secondary
|
|
36
|
+
- subcategory: "MG.4.1"
|
|
37
|
+
context: >-
|
|
38
|
+
Monitoring for suspicious patterns in tool arguments and responses after re-registration is a post-deployment monitoring activity; MG.4.1 mandates that such ongoing monitoring plans are implemented to catch emergent malicious behavior.
|
|
39
|
+
strength: secondary
|
|
26
40
|
tags:
|
|
27
41
|
category: tool-poisoning
|
|
28
42
|
subcategory: skill-update-attack
|
|
@@ -27,6 +27,17 @@ references:
|
|
|
27
27
|
- "CVE-2025-68143"
|
|
28
28
|
- "CVE-2025-68144"
|
|
29
29
|
|
|
30
|
+
compliance:
|
|
31
|
+
nist_ai_rmf:
|
|
32
|
+
- subcategory: "MS.2.7"
|
|
33
|
+
context: "Parameter injection through tool arguments (shell metacharacters, SQL payloads, path traversal, template injection) directly targets the security and resilience of the tool backend; MS.2.7 requires continuous evaluation of these security risks against the AI system's tool surface."
|
|
34
|
+
strength: primary
|
|
35
|
+
- subcategory: "MP.5.1"
|
|
36
|
+
context: "Crafted malicious tool arguments are adversarial inputs whose likelihood and impact (RCE, data breach, privilege escalation on the tool server) must be characterized; MP.5.1 requires identifying and tracking these injection attack vectors."
|
|
37
|
+
strength: secondary
|
|
38
|
+
- subcategory: "MG.2.3"
|
|
39
|
+
context: "Detection of injection payloads in tool arguments must trigger risk treatment to block or quarantine the tool invocation before backend execution; MG.2.3 requires these supersede/disengage mechanisms be defined and activated on detection."
|
|
40
|
+
strength: secondary
|
|
30
41
|
tags:
|
|
31
42
|
category: tool-poisoning
|
|
32
43
|
subcategory: parameter-injection
|
|
@@ -30,6 +30,17 @@ references:
|
|
|
30
30
|
- "ClawHavoc campaign: 1,184 malicious skills"
|
|
31
31
|
metadata_provenance:
|
|
32
32
|
mitre_atlas: auto-generated
|
|
33
|
+
compliance:
|
|
34
|
+
nist_ai_rmf:
|
|
35
|
+
- subcategory: "MP.5.1"
|
|
36
|
+
context: "SKILL.md prompt injection patterns including DAN-style jailbreaks, instruction override, and system message impersonation are adversarial inputs that exploit the skill loading pipeline; MP.5.1 requires identifying and characterizing these prompt injection attack vectors as part of GenAI risk impact assessment."
|
|
37
|
+
strength: primary
|
|
38
|
+
- subcategory: "MG.3.2"
|
|
39
|
+
context: "SKILL.md files are third-party content loaded into agents from skill marketplaces (e.g., ClawHavoc's 1,184 malicious skills); MG.3.2 requires monitoring pre-trained models and external artifacts for compromise, and detecting injection payloads in skill manifests directly evidences this supply-chain monitoring control."
|
|
40
|
+
strength: secondary
|
|
41
|
+
- subcategory: "MG.2.3"
|
|
42
|
+
context: "Detection of jailbreak and safety-disablement patterns in skills triggers deactivation workflows to block the skill before the convergence attack flow proceeds to malware delivery; MG.2.3 mandates mechanisms to supersede or disengage compromised AI components on detection."
|
|
43
|
+
strength: secondary
|
|
33
44
|
tags:
|
|
34
45
|
category: skill-compromise
|
|
35
46
|
subcategory: skill-instruction-injection
|
|
@@ -33,6 +33,20 @@ references:
|
|
|
33
33
|
- "ClawHavoc: C2 IP 91.92.242.30"
|
|
34
34
|
metadata_provenance:
|
|
35
35
|
mitre_atlas: auto-generated
|
|
36
|
+
compliance:
|
|
37
|
+
nist_ai_rmf:
|
|
38
|
+
- subcategory: "GV.6.1"
|
|
39
|
+
context: >-
|
|
40
|
+
Malicious skill packages are third-party/supplier AI components introducing supply chain risk; GV.6.1 requires policies and procedures that address third-party AI risks such as malicious code embedded in distributed skill artifacts.
|
|
41
|
+
strength: primary
|
|
42
|
+
- subcategory: "MG.3.1"
|
|
43
|
+
context: >-
|
|
44
|
+
Detection of base64-obfuscated payloads, password-protected archive evasion, and remote code execution from C2 endpoints in skill packages provides the evidence needed to manage risks introduced by third-party entities, as required by MG.3.1.
|
|
45
|
+
strength: secondary
|
|
46
|
+
- subcategory: "MS.2.7"
|
|
47
|
+
context: >-
|
|
48
|
+
Identifying malicious code patterns in SKILL.md and associated scripts directly evaluates the security and resilience of the AI system's extension surface, supporting the continuous security evaluation required by MS.2.7.
|
|
49
|
+
strength: secondary
|
|
36
50
|
tags:
|
|
37
51
|
category: skill-compromise
|
|
38
52
|
subcategory: dangerous-script
|
|
@@ -31,6 +31,20 @@ references:
|
|
|
31
31
|
- "Axios: Anthropic Claude skills ransomware disclosure"
|
|
32
32
|
metadata_provenance:
|
|
33
33
|
mitre_atlas: auto-generated
|
|
34
|
+
compliance:
|
|
35
|
+
nist_ai_rmf:
|
|
36
|
+
- subcategory: "GV.6.1"
|
|
37
|
+
context: >-
|
|
38
|
+
Weaponized skills are third-party/supplier AI components that embed offensive tooling (SQLMap, Metasploit, ransomware payloads) into agent workflows; GV.6.1 requires policies and procedures to address third-party AI supply chain risks where approved skills can execute malicious code without further consent.
|
|
39
|
+
strength: primary
|
|
40
|
+
- subcategory: "MG.3.2"
|
|
41
|
+
context: >-
|
|
42
|
+
Detecting offensive tooling embedded in skills directly evidences the need to monitor pre-trained models and skill artifacts used for development; MG.3.2 mandates ongoing monitoring of these third-party components for malicious modifications like the MedusaLocker-laden Claude skill.
|
|
43
|
+
strength: secondary
|
|
44
|
+
- subcategory: "MG.2.3"
|
|
45
|
+
context: >-
|
|
46
|
+
Detection of weaponized skills must trigger mechanisms to disengage or deactivate the AI system before the consent gap is exploited to download/execute code or exfiltrate credentials; MG.2.3 requires these supersede/deactivate controls be in place.
|
|
47
|
+
strength: secondary
|
|
34
48
|
tags:
|
|
35
49
|
category: skill-compromise
|
|
36
50
|
subcategory: weaponized-skill
|
|
@@ -31,6 +31,20 @@ references:
|
|
|
31
31
|
- "arXiv: autoApprove escalation payload"
|
|
32
32
|
metadata_provenance:
|
|
33
33
|
mitre_atlas: auto-generated
|
|
34
|
+
compliance:
|
|
35
|
+
nist_ai_rmf:
|
|
36
|
+
- subcategory: "GV.1.2"
|
|
37
|
+
context: >-
|
|
38
|
+
Over-privileged skills requesting blanket Bash(*), wildcard file access, and auto-approve escalation directly violate the accountability role boundaries that GV.1.2 requires to be formally assigned and enforced for AI components and their permissions.
|
|
39
|
+
strength: primary
|
|
40
|
+
- subcategory: "GV.6.1"
|
|
41
|
+
context: >-
|
|
42
|
+
Skills are third-party AI extensions, and detecting excessive permission requests (leaky skills exposing API keys/PII, write access to identity files) provides evidence for the third-party/supplier AI risk policies required by GV.6.1.
|
|
43
|
+
strength: secondary
|
|
44
|
+
- subcategory: "MG.2.3"
|
|
45
|
+
context: >-
|
|
46
|
+
Detection of auto-approve payloads (chat.tools.autoApprove:true) and disabled safety mechanisms triggers the supersede/disengage mechanisms required by MG.2.3 to revoke skill privileges before persistent consent-gap abuse occurs.
|
|
47
|
+
strength: secondary
|
|
34
48
|
tags:
|
|
35
49
|
category: skill-compromise
|
|
36
50
|
subcategory: skill-overreach
|
|
@@ -33,6 +33,17 @@ references:
|
|
|
33
33
|
metadata_provenance:
|
|
34
34
|
mitre_atlas: auto-generated
|
|
35
35
|
|
|
36
|
+
compliance:
|
|
37
|
+
nist_ai_rmf:
|
|
38
|
+
- subcategory: "GV.6.1"
|
|
39
|
+
context: "Skill squatting and publisher impersonation are third-party supply chain risks where unverified publishers masquerade as trusted vendors to deliver malicious skills; GV.6.1 requires policies and procedures that address these third-party AI supplier risks before skills are integrated."
|
|
40
|
+
strength: primary
|
|
41
|
+
- subcategory: "MG.3.1"
|
|
42
|
+
context: "Detecting typosquatted skills and fake official publisher claims directly feeds the management of third-party AI risks required by MG.3.1, enabling treatment actions like blocking, quarantining, or requiring re-verification of suspect skills."
|
|
43
|
+
strength: secondary
|
|
44
|
+
- subcategory: "MP.5.1"
|
|
45
|
+
context: "Flagging skills from unknown publishers that self-identify as official characterizes the likelihood and magnitude of supply chain compromise impact, evidence MP.5.1 requires for prioritizing supply-chain risk responses."
|
|
46
|
+
strength: secondary
|
|
36
47
|
tags:
|
|
37
48
|
category: skill-compromise
|
|
38
49
|
subcategory: skill-squatting
|
|
@@ -28,6 +28,20 @@ references:
|
|
|
28
28
|
- Context window manipulation attacks (arXiv 2601.17548)
|
|
29
29
|
metadata_provenance:
|
|
30
30
|
mitre_atlas: auto-generated
|
|
31
|
+
compliance:
|
|
32
|
+
nist_ai_rmf:
|
|
33
|
+
- subcategory: "MP.5.1"
|
|
34
|
+
context: >-
|
|
35
|
+
Compaction-survival instructions embedded in SKILL.md/CLAUDE.md files are adversarial inputs that exploit context-window summarization to persist malicious directives across agent sessions; MP.5.1 requires identifying and characterizing the likelihood and impact of such prompt-injection vectors targeting agent context.
|
|
36
|
+
strength: primary
|
|
37
|
+
- subcategory: "MG.3.2"
|
|
38
|
+
context: >-
|
|
39
|
+
SKILL.md files are pre-deployed configuration artifacts consumed by the agent at runtime; MG.3.2 requires monitoring of these supplied model/skill resources to detect poisoned instructions that survive context compaction and re-inject across agent invocations.
|
|
40
|
+
strength: secondary
|
|
41
|
+
- subcategory: "MG.2.3"
|
|
42
|
+
context: >-
|
|
43
|
+
Detection of compaction-aware persistence directives and system-level impersonation in skill files triggers risk treatment plans to quarantine or disengage the affected skill before it propagates poisoned context; MG.2.3 mandates these supersede/deactivate mechanisms be defined.
|
|
44
|
+
strength: secondary
|
|
31
45
|
tags:
|
|
32
46
|
category: skill-compromise
|
|
33
47
|
subcategory: context-poisoning
|
|
@@ -28,6 +28,29 @@ references:
|
|
|
28
28
|
- "npm event-stream incident (2018): rug pull archetype"
|
|
29
29
|
metadata_provenance:
|
|
30
30
|
mitre_atlas: auto-generated
|
|
31
|
+
compliance:
|
|
32
|
+
nist_ai_rmf:
|
|
33
|
+
- subcategory: "GV.6.1"
|
|
34
|
+
context: >-
|
|
35
|
+
Skill rug pull setup patterns embed mechanisms for third-party suppliers to
|
|
36
|
+
swap initially-benign skill content with malicious payloads after trust is
|
|
37
|
+
established; GV.6.1 requires policies and procedures that address these
|
|
38
|
+
third-party/supplier AI supply chain risks at ingestion time.
|
|
39
|
+
strength: primary
|
|
40
|
+
- subcategory: "MG.3.1"
|
|
41
|
+
context: >-
|
|
42
|
+
Detecting dynamic remote code loading, base64-decoded execution, and
|
|
43
|
+
post-install hooks in SKILL.md files produces evidence for managing
|
|
44
|
+
third-party AI risks under MG.3.1, flagging supplier-provided components
|
|
45
|
+
that retain the ability to mutate into malicious behavior post-deployment.
|
|
46
|
+
strength: secondary
|
|
47
|
+
- subcategory: "MG.3.2"
|
|
48
|
+
context: >-
|
|
49
|
+
Rug pull setup architecture undermines integrity assurances for
|
|
50
|
+
externally-sourced components used in development; MG.3.2 requires
|
|
51
|
+
monitoring of pre-trained or third-party model and skill artifacts so that
|
|
52
|
+
deferred-payload patterns are caught before they activate.
|
|
53
|
+
strength: secondary
|
|
31
54
|
tags:
|
|
32
55
|
category: skill-compromise
|
|
33
56
|
subcategory: rug-pull
|
|
@@ -32,6 +32,28 @@ references:
|
|
|
32
32
|
metadata_provenance:
|
|
33
33
|
mitre_atlas: auto-generated
|
|
34
34
|
|
|
35
|
+
compliance:
|
|
36
|
+
nist_ai_rmf:
|
|
37
|
+
- subcategory: "MS.2.7"
|
|
38
|
+
context: >-
|
|
39
|
+
Subcommand overflow bypass exploits a security check weakness where excessive
|
|
40
|
+
declared commands cause safety evaluation to be skipped on overflow entries;
|
|
41
|
+
MS.2.7 requires that AI system security and resilience properties, including
|
|
42
|
+
boundary conditions in security validation logic, are evaluated and documented.
|
|
43
|
+
strength: primary
|
|
44
|
+
- subcategory: "MP.5.1"
|
|
45
|
+
context: >-
|
|
46
|
+
Declaring >50 subcommands to pad benign entries before malicious ones is an
|
|
47
|
+
identifiable adversarial pattern with characterizable likelihood and impact;
|
|
48
|
+
MP.5.1 requires that such risk vectors against the skill loading pipeline are
|
|
49
|
+
tracked and characterized.
|
|
50
|
+
strength: secondary
|
|
51
|
+
- subcategory: "MG.3.2"
|
|
52
|
+
context: >-
|
|
53
|
+
SKILL.md files are third-party-authored components loaded into the agent runtime,
|
|
54
|
+
and overflow-based bypass attempts must be monitored as part of pre-trained or
|
|
55
|
+
third-party model/component supply chain risk management under MG.3.2.
|
|
56
|
+
strength: secondary
|
|
35
57
|
tags:
|
|
36
58
|
category: skill-compromise
|
|
37
59
|
subcategory: subcommand-overflow
|
|
@@ -26,6 +26,17 @@ references:
|
|
|
26
26
|
- "ClawHavoc evasive variants: HTML comment injection (2026-03)"
|
|
27
27
|
metadata_provenance:
|
|
28
28
|
mitre_atlas: auto-generated
|
|
29
|
+
compliance:
|
|
30
|
+
nist_ai_rmf:
|
|
31
|
+
- subcategory: "MG.3.2"
|
|
32
|
+
context: "Hidden payloads in SKILL.md files represent supply-chain compromise of pre-trained or third-party agent skills; MG.3.2 requires monitoring of these acquired components for embedded malicious instructions before and during use."
|
|
33
|
+
strength: primary
|
|
34
|
+
- subcategory: "GV.6.1"
|
|
35
|
+
context: "SKILL.md files are third-party supplied artifacts consumed by the agent; GV.6.1 mandates supplier risk policies that catch concealed instructions hidden in HTML comments before the skill enters the trust boundary."
|
|
36
|
+
strength: secondary
|
|
37
|
+
- subcategory: "MS.2.7"
|
|
38
|
+
context: "Detection of HTML-comment-based instruction overrides and exfiltration C2 URLs continuously evaluates the security and resilience of the agent's skill-parsing pipeline against evasive prompt injection, as required by MS.2.7."
|
|
39
|
+
strength: secondary
|
|
29
40
|
tags:
|
|
30
41
|
category: skill-compromise
|
|
31
42
|
subcategory: hidden-payload
|
|
@@ -30,6 +30,17 @@ references:
|
|
|
30
30
|
metadata_provenance:
|
|
31
31
|
mitre_atlas: auto-generated
|
|
32
32
|
|
|
33
|
+
compliance:
|
|
34
|
+
nist_ai_rmf:
|
|
35
|
+
- subcategory: "MP.5.1"
|
|
36
|
+
context: "Invisible Unicode Tag characters and zero-width steganographic payloads embedded in SKILL.md files are adversarial inputs that exploit the gap between human-visible content and agent-parsed content; MP.5.1 requires identifying and characterizing these hidden prompt-injection vectors as risks to the AI system."
|
|
37
|
+
strength: primary
|
|
38
|
+
- subcategory: "MG.3.2"
|
|
39
|
+
context: "SKILL.md files are third-party supplied artifacts consumed by AI agents, and Unicode smuggling is a supply chain compromise vector; MG.3.2 requires monitoring of these pre-trained/third-party components for hidden malicious content before agent execution."
|
|
40
|
+
strength: secondary
|
|
41
|
+
- subcategory: "MG.2.3"
|
|
42
|
+
context: "Detection of 3+ Unicode Tag characters or 5+ zero-width characters indicates a covert injection payload that must trigger containment of the affected skill; MG.2.3 mandates predefined response plans to disengage or quarantine compromised skills before agents execute the smuggled instructions."
|
|
43
|
+
strength: secondary
|
|
33
44
|
tags:
|
|
34
45
|
category: skill-compromise
|
|
35
46
|
subcategory: unicode-smuggling
|
|
@@ -24,6 +24,17 @@ references:
|
|
|
24
24
|
- AST04:2026 - Supply Chain Manipulation
|
|
25
25
|
metadata_provenance:
|
|
26
26
|
mitre_atlas: auto-generated
|
|
27
|
+
compliance:
|
|
28
|
+
nist_ai_rmf:
|
|
29
|
+
- subcategory: "GV.6.1"
|
|
30
|
+
context: "Fork claims and community-variant impersonation are third-party/supplier AI supply chain risks where malicious packages masquerade as trusted tools; GV.6.1 requires policies and procedures specifically addressing these third-party AI risks before integration."
|
|
31
|
+
strength: primary
|
|
32
|
+
- subcategory: "MG.3.1"
|
|
33
|
+
context: "Detecting abstracted permission descriptions that hide dangerous capabilities and unofficial fork claims provides the runtime evidence needed to manage risks from third-party entities; MG.3.1 requires active management of third-party AI component risks throughout the lifecycle."
|
|
34
|
+
strength: secondary
|
|
35
|
+
- subcategory: "MG.3.2"
|
|
36
|
+
context: "Community-fork and enhanced-version claims target pre-trained models and skills used in development pipelines; MG.3.2 requires monitoring of these third-party assets to detect impersonation before they are incorporated into agent toolchains."
|
|
37
|
+
strength: secondary
|
|
27
38
|
tags:
|
|
28
39
|
category: skill-compromise
|
|
29
40
|
subcategory: fork-impersonation
|
|
@@ -26,6 +26,17 @@ references:
|
|
|
26
26
|
- "ClawHavoc: credential exfiltration via skill instructions (2026-03)"
|
|
27
27
|
metadata_provenance:
|
|
28
28
|
mitre_atlas: auto-generated
|
|
29
|
+
compliance:
|
|
30
|
+
nist_ai_rmf:
|
|
31
|
+
- subcategory: "MS.2.10"
|
|
32
|
+
context: "This rule detects skill instructions that direct the agent to POST user data to external URLs, which is a direct privacy risk indicator; MS.2.10 requires assessment of privacy risks such as unauthorized data egress from AI components."
|
|
33
|
+
strength: primary
|
|
34
|
+
- subcategory: "GV.6.1"
|
|
35
|
+
context: "SKILL.md files are third-party/supplier artifacts loaded into the agent runtime, and malicious exfiltration instructions embedded in them represent a supply-chain risk that GV.6.1 policies must address through review of third-party AI components."
|
|
36
|
+
strength: secondary
|
|
37
|
+
- subcategory: "MG.3.2"
|
|
38
|
+
context: "Detecting concealment language and exfiltration URLs in skill files supports the continuous monitoring of pre-trained/third-party components required by MG.3.2, ensuring compromised skills are flagged before the agent executes covert data transfers."
|
|
39
|
+
strength: secondary
|
|
29
40
|
tags:
|
|
30
41
|
category: skill-compromise
|
|
31
42
|
subcategory: data-exfiltration
|
|
@@ -22,6 +22,17 @@ references:
|
|
|
22
22
|
metadata_provenance:
|
|
23
23
|
mitre_atlas: auto-generated
|
|
24
24
|
|
|
25
|
+
compliance:
|
|
26
|
+
nist_ai_rmf:
|
|
27
|
+
- subcategory: "GV.6.1"
|
|
28
|
+
context: "Community fork impersonation is a third-party supply chain social engineering attack where a malicious package masquerades as a legitimate enhanced version; GV.6.1 requires policies and procedures to address third-party AI supplier risks including deceptive package provenance."
|
|
29
|
+
strength: primary
|
|
30
|
+
- subcategory: "MG.3.1"
|
|
31
|
+
context: "Detecting promotion language that frames a package as a community fork provides evidence for managing third-party entity risks; MG.3.1 requires mechanisms to identify and treat risks from externally-sourced components before they are integrated into agent toolchains."
|
|
32
|
+
strength: secondary
|
|
33
|
+
- subcategory: "MG.3.2"
|
|
34
|
+
context: "Fork impersonation often targets pre-trained models and tool dependencies pulled into agent environments; MG.3.2 requires monitoring of these externally-sourced artifacts to ensure their authenticity and provenance."
|
|
35
|
+
strength: secondary
|
|
25
36
|
tags:
|
|
26
37
|
category: skill-compromise
|
|
27
38
|
subcategory: fork-impersonation
|
|
@@ -28,6 +28,20 @@ references:
|
|
|
28
28
|
- Adversarial SKILL.md benchmark 2026-04
|
|
29
29
|
metadata_provenance:
|
|
30
30
|
mitre_atlas: auto-generated
|
|
31
|
+
compliance:
|
|
32
|
+
nist_ai_rmf:
|
|
33
|
+
- subcategory: "MS.2.10"
|
|
34
|
+
context: >-
|
|
35
|
+
The rule detects compound patterns where sensitive data (SSH keys, wallets, credentials, browser databases) is both read and transmitted externally from SKILL.md files; MS.2.10 requires that privacy risks—particularly unauthorized data exfiltration—are continuously assessed and evidenced.
|
|
36
|
+
strength: primary
|
|
37
|
+
- subcategory: "GV.6.1"
|
|
38
|
+
context: >-
|
|
39
|
+
SKILL.md files are third-party/supplier artifacts loaded into the agent runtime, and exfiltration logic embedded in them constitutes a supply-chain risk; GV.6.1 requires policies that govern third-party AI components against malicious data-harvesting behavior.
|
|
40
|
+
strength: secondary
|
|
41
|
+
- subcategory: "MG.2.3"
|
|
42
|
+
context: >-
|
|
43
|
+
Detection of compound read-and-transmit exfiltration patterns triggers the deactivation/quarantine response plans required by MG.2.3 to disengage the malicious skill before sensitive data leaves the host.
|
|
44
|
+
strength: secondary
|
|
31
45
|
tags:
|
|
32
46
|
category: skill-compromise
|
|
33
47
|
subcategory: data-exfiltration
|
|
@@ -26,6 +26,17 @@ references:
|
|
|
26
26
|
metadata_provenance:
|
|
27
27
|
mitre_atlas: auto-generated
|
|
28
28
|
|
|
29
|
+
compliance:
|
|
30
|
+
nist_ai_rmf:
|
|
31
|
+
- subcategory: "GV.6.1"
|
|
32
|
+
context: "Detection of install instructions promoting unverified 'community forks' or 'patched versions' of known packages directly evidences third-party/supplier AI risk policy enforcement; GV.6.1 requires policies that address supply-chain risks such as typosquatted forks masquerading as legitimate dependencies."
|
|
33
|
+
strength: primary
|
|
34
|
+
- subcategory: "MG.3.1"
|
|
35
|
+
context: "Fork-impersonation install guidance is a third-party supply chain risk that must be actively managed; MG.3.1 mandates mechanisms to detect and treat risks introduced by external packages and dependencies before they are installed by users."
|
|
36
|
+
strength: secondary
|
|
37
|
+
- subcategory: "MP.5.1"
|
|
38
|
+
context: "Crystallized fork_claim patterns characterize the likelihood and magnitude of supply-chain deception attacks; MP.5.1 requires that these adversarial install-instruction vectors be identified and tracked as part of risk characterization."
|
|
39
|
+
strength: secondary
|
|
29
40
|
tags:
|
|
30
41
|
category: skill-compromise
|
|
31
42
|
subcategory: fork-impersonation
|
|
@@ -28,6 +28,17 @@ references:
|
|
|
28
28
|
mitre_atlas:
|
|
29
29
|
- "AML.T0048"
|
|
30
30
|
|
|
31
|
+
compliance:
|
|
32
|
+
nist_ai_rmf:
|
|
33
|
+
- subcategory: "GV.6.1"
|
|
34
|
+
context: "Time-gated credential exfiltration is delivered through third-party skill packages that hide malicious behavior from code review; GV.6.1 requires policies and procedures that address supplier AI risks including hidden malicious payloads in third-party components."
|
|
35
|
+
strength: primary
|
|
36
|
+
- subcategory: "MS.2.10"
|
|
37
|
+
context: "The rule detects code that reads sensitive credential files (.env, .ssh/id_rsa, .aws/credentials) and exfiltrates them externally; MS.2.10 requires assessment of privacy and data-leakage risks, which this detection directly evidences."
|
|
38
|
+
strength: secondary
|
|
39
|
+
- subcategory: "MG.3.2"
|
|
40
|
+
context: "Time-gated payloads embedded in skill packages require continuous monitoring of pre-trained or third-party components beyond initial review; MG.3.2 mandates ongoing monitoring of these supplied artifacts to catch dormant malicious behavior."
|
|
41
|
+
strength: secondary
|
|
31
42
|
tags:
|
|
32
43
|
category: skill-compromise
|
|
33
44
|
subcategory: timebomb-exfil
|