npm - agent-threat-rules - Versions diffs - 3.1.0 → 3.2.0 - Mend

agent-threat-rules 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (472) hide show

package/rules/agent-manipulation/ATR-2026-00108-consensus-sybil-attack.yaml CHANGED Viewed

@@ -24,6 +24,8 @@ references:
     - ASI01:2026 - Agent Goal Hijack
   mitre_atlas:
     - AML.T0043 - Craft Adversarial Data
+  owasp_llm:
+    - LLM01:2025 - Prompt Injection
 compliance:
   eu_ai_act:
     - article: "14"
@@ -32,6 +34,9 @@ compliance:
     - article: "15"
       context: "Article 15 robustness requirements mandate that high-risk AI systems resist adversarial manipulation; Sybil attacks on consensus mechanisms are a documented adversarial pattern requiring systematic detection."
       strength: secondary
+    - article: "9"
+      context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the agent-manipulation technique (Multi-Agent Consensus Sybil Attack)."
+      strength: secondary
   nist_ai_rmf:
     - subcategory: "GV.1.2"
       context: "Consensus Sybil attacks exploit undefined accountability for agent identity in multi-agent systems; GV.1.2 requires that accountability roles for AI risk management are defined and assigned to prevent Sybil-style identity fabrication."
@@ -39,12 +44,15 @@ compliance:
     - subcategory: "MG.2.3"
       context: "Sybil attacks represent a documented risk treatment gap in multi-agent consensus deployments; MG.2.3 requires that risk treatment plans are implemented for identified AI risks, including detection controls for fake-identity flooding."
       strength: secondary
+    - subcategory: "MS.2.7"
+      context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the agent-manipulation technique (Multi-Agent Consensus Sybil Attack)."
+      strength: primary
   iso_42001:
     - clause: "6.2"
       context: "Multi-agent systems deploying consensus mechanisms must include Sybil attack detection as a planned risk treatment activity under the AI objectives framework required by clause 6.2."
       strength: primary
-    - clause: "8.6"
-      context: "Clause 8.6 operational controls must ensure that consensus decisions are made by verified agent identities and that fabricated voter identities are detected before they influence multi-agent outcomes."
+    - clause: "8.1"
+      context: "Clause 8.1 operational controls must ensure that consensus decisions are made by verified agent identities and that fabricated voter identities are detected before they influence multi-agent outcomes."
       strength: secondary
 tags:
   category: agent-manipulation

package/rules/agent-manipulation/ATR-2026-00116-a2a-message-validation.yaml CHANGED Viewed

@@ -20,6 +20,10 @@ references:
     - ASI07:2026 - Insecure Inter-Agent Communication
   mitre_attack:
     - T1557 - Adversary-in-the-Middle
+  owasp_llm:
+    - LLM01:2025 - Prompt Injection
+  mitre_atlas:
+    - AML.T0051 - LLM Prompt Injection
 compliance:
   eu_ai_act:
     - article: "15"
@@ -28,6 +32,9 @@ compliance:
     - article: "14"
       context: "A2A message injection can cause agents to take actions outside their authorized scope without human awareness, eroding the effective oversight capability Article 14 requires for high-risk AI systems."
       strength: secondary
+    - article: "9"
+      context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the agent-manipulation technique (Malicious Agent-to-Agent Message Injection)."
+      strength: secondary
   nist_ai_rmf:
     - subcategory: "MP.5.1"
       context: "Malicious A2A message injection is a documented adversarial input risk targeting inter-agent communication channels; MP.5.1 requires that adversarial input risks to AI systems are identified and tracked to enable detection of embedded payload attacks."
@@ -35,12 +42,15 @@ compliance:
     - subcategory: "MG.2.3"
       context: "A2A message validation failures represent a risk requiring active treatment; MG.2.3 requires that risk treatment plans are implemented for identified AI risks, including validation controls on all inter-agent message channels."
       strength: secondary
+    - subcategory: "MS.2.7"
+      context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the agent-manipulation technique (Malicious Agent-to-Agent Message Injection)."
+      strength: primary
   iso_42001:
     - clause: "6.2"
       context: "AI system plans under clause 6.2 must include risk treatment activities for inter-agent message injection, ensuring that A2A communication validation is a planned control rather than an afterthought."
       strength: primary
-    - clause: "8.6"
-      context: "Clause 8.6 operational controls require that inter-agent messages are validated before execution, preventing injected instructions from executing in the receiving agent's security context."
+    - clause: "8.1"
+      context: "Clause 8.1 operational controls require that inter-agent messages are validated before execution, preventing injected instructions from executing in the receiving agent's security context."
       strength: secondary
 tags:
   category: agent-manipulation

package/rules/agent-manipulation/ATR-2026-00117-agent-identity-spoofing.yaml CHANGED Viewed

@@ -20,6 +20,10 @@ references:
     - ASI10:2026 - Rogue Agents
   mitre_attack:
     - T1036 - Masquerading
+  owasp_llm:
+    - LLM01:2025 - Prompt Injection
+  mitre_atlas:
+    - AML.T0051 - LLM Prompt Injection
 compliance:
   eu_ai_act:
     - article: "13"
@@ -28,6 +32,12 @@ compliance:
     - article: "15"
       context: "Article 15 cybersecurity requirements include protection against masquerading attacks; identity spoofing in multi-agent systems represents a documented adversarial pattern targeting the authentication layer of agent architectures."
       strength: secondary
+    - article: "14"
+      context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the agent-manipulation technique (Agent Identity Spoofing and Authority Impersonation) would bypass or undermine that oversight."
+      strength: secondary
+    - article: "9"
+      context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the agent-manipulation technique (Agent Identity Spoofing and Authority Impersonation)."
+      strength: secondary
   nist_ai_rmf:
     - subcategory: "GV.6.1"
       context: "Agent identity spoofing exploits the absence of verified identity provenance in inter-agent data flows; GV.6.1 data governance policies must define how agent identity claims are authenticated to prevent masquerading attacks."
@@ -35,6 +45,12 @@ compliance:
     - subcategory: "MP.5.1"
       context: "Impersonation of admin or supervisor agent roles is an adversarial input risk that must be tracked under MP.5.1 to ensure detection controls cover identity-claim-based privilege escalation patterns."
       strength: secondary
+    - subcategory: "MS.2.7"
+      context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the agent-manipulation technique (Agent Identity Spoofing and Authority Impersonation)."
+      strength: primary
+    - subcategory: "MG.2.3"
+      context: "NIST AI RMF MANAGE 2.3 (respond to previously unknown identified risks) is supported by this rule, which surfaces the agent-manipulation technique (Agent Identity Spoofing and Authority Impersonation) so the risk can be treated."
+      strength: secondary
   iso_42001:
     - clause: "8.4"
       context: "Clause 8.4 AI system impact assessments must document the risk that unverified agent identity claims allow privilege escalation, and review controls that ensure identity spoofing is detectable before actions are taken."
@@ -42,6 +58,12 @@ compliance:
     - clause: "9.1"
       context: "Clause 9.1 performance monitoring must include evaluation of whether agent identity verification controls are functioning correctly and catching masquerading attacks in operational deployments."
       strength: secondary
+    - clause: "8.1"
+      context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the agent-manipulation technique (Agent Identity Spoofing and Authority Impersonation)."
+      strength: primary
+    - clause: "6.2"
+      context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the agent-manipulation technique (Agent Identity Spoofing and Authority Impersonation) is such a treatment."
+      strength: secondary
 tags:
   category: agent-manipulation
   subcategory: agent-identity-spoofing

package/rules/agent-manipulation/ATR-2026-00118-approval-fatigue.yaml CHANGED Viewed

@@ -19,6 +19,10 @@ references:
     - ASI09:2026 - Human Trust Exploitation
   mitre_attack:
     - T1204 - User Execution
+  owasp_llm:
+    - LLM01:2025 - Prompt Injection
+  mitre_atlas:
+    - AML.T0051 - LLM Prompt Injection
 compliance:
   nist_ai_rmf:
     - subcategory: "GV.6.1"
@@ -30,6 +34,26 @@ compliance:
     - subcategory: "MG.2.3"
       context: "Detection of approval-fatigue patterns must trigger risk treatment plans that pause, throttle, or escalate suspicious bulk approval flows before dangerous tool calls execute; MG.2.3 mandates that these supersede/disengage mechanisms are pre-defined and activated on detection."
       strength: secondary
+    - subcategory: "MS.2.7"
+      context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the agent-manipulation technique (Human Approval Fatigue Exploitation)."
+      strength: primary
+  iso_42001:
+    - clause: "8.1"
+      context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the agent-manipulation technique (Human Approval Fatigue Exploitation)."
+      strength: primary
+    - clause: "6.2"
+      context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the agent-manipulation technique (Human Approval Fatigue Exploitation) is such a treatment."
+      strength: secondary
+  eu_ai_act:
+    - article: "15"
+      context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the agent-manipulation technique (Human Approval Fatigue Exploitation)."
+      strength: primary
+    - article: "14"
+      context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the agent-manipulation technique (Human Approval Fatigue Exploitation) would bypass or undermine that oversight."
+      strength: secondary
+    - article: "9"
+      context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the agent-manipulation technique (Human Approval Fatigue Exploitation)."
+      strength: secondary
 tags:
   category: agent-manipulation
   subcategory: approval-fatigue

package/rules/agent-manipulation/ATR-2026-00119-social-engineering-via-agent.yaml CHANGED Viewed

@@ -19,6 +19,10 @@ references:
     - ASI09:2026 - Human Trust Exploitation
   mitre_attack:
     - T1566 - Phishing
+  owasp_llm:
+    - LLM01:2025 - Prompt Injection
+  mitre_atlas:
+    - AML.T0051 - LLM Prompt Injection
 compliance:
   eu_ai_act:
     - article: "13"
@@ -27,6 +31,12 @@ compliance:
     - article: "14"
       context: "Agents weaponized for social engineering undermine the informed human judgment that Article 14 oversight depends on; users manipulated through trusted AI channels cannot exercise effective oversight of subsequent decisions."
       strength: secondary
+    - article: "15"
+      context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the agent-manipulation technique (Social Engineering Attack via Agent Output)."
+      strength: primary
+    - article: "9"
+      context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the agent-manipulation technique (Social Engineering Attack via Agent Output)."
+      strength: secondary
   nist_ai_rmf:
     - subcategory: "GV.6.1"
       context: "Social engineering delivered via agent output exploits user trust in AI-generated content to harvest credentials and personal data; GV.6.1 data governance policies must address how AI-generated communications are authenticated to prevent agent-mediated phishing."
@@ -34,6 +44,12 @@ compliance:
     - subcategory: "MP.5.1"
       context: "Using agents as social engineering vectors is an adversarial input risk where poisoned context produces manipulative outputs; MP.5.1 requires this risk to be identified and tracked so that urgency and authority-impersonation patterns in agent output are monitored."
       strength: secondary
+    - subcategory: "MS.2.7"
+      context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the agent-manipulation technique (Social Engineering Attack via Agent Output)."
+      strength: primary
+    - subcategory: "MG.2.3"
+      context: "NIST AI RMF MANAGE 2.3 (respond to previously unknown identified risks) is supported by this rule, which surfaces the agent-manipulation technique (Social Engineering Attack via Agent Output) so the risk can be treated."
+      strength: secondary
   iso_42001:
     - clause: "8.4"
       context: "Clause 8.4 impact assessments must document the elevated trust users place in AI-generated content and evaluate whether social engineering via agent output constitutes a significant harm requiring dedicated detection controls."
@@ -41,6 +57,12 @@ compliance:
     - clause: "9.1"
       context: "Clause 9.1 performance monitoring must evaluate whether agent output monitoring detects social engineering patterns such as urgency language and credential-harvesting requests generated through poisoned agent context."
       strength: secondary
+    - clause: "8.1"
+      context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the agent-manipulation technique (Social Engineering Attack via Agent Output)."
+      strength: primary
+    - clause: "6.2"
+      context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the agent-manipulation technique (Social Engineering Attack via Agent Output) is such a treatment."
+      strength: secondary
 tags:
   category: agent-manipulation
   subcategory: social-engineering-via-agent

package/rules/agent-manipulation/ATR-2026-00132-casual-authority-escalation.yaml CHANGED Viewed

@@ -34,6 +34,9 @@ compliance:
     - article: "9"
       context: "Rationalized bypasses and casual scope escalation are documented attack patterns that Article 9 risk management systems must account for; detection of informal social engineering is a required risk treatment for multi-agent deployments."
       strength: secondary
+    - article: "15"
+      context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the agent-manipulation technique (Casual Authority Claim and Scope Escalation)."
+      strength: primary
   nist_ai_rmf:
     - subcategory: "GV.1.2"
       context: "Casual authority escalation exploits gaps in accountability role definitions, allowing informal authority claims like 'the boss said' to bypass controls without challenge; GV.1.2 requires that accountability roles are formally assigned so that unverifiable casual authority claims are detectable."
@@ -41,12 +44,15 @@ compliance:
     - subcategory: "MG.2.3"
       context: "Conversational-tone scope escalation is a documented risk requiring active treatment; MG.2.3 requires that risk treatment plans address informal social engineering techniques that evade formal injection detection in multi-agent deployments."
       strength: secondary
+    - subcategory: "MS.2.7"
+      context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the agent-manipulation technique (Casual Authority Claim and Scope Escalation)."
+      strength: primary
   iso_42001:
     - clause: "6.2"
       context: "Clause 6.2 AI objectives and plans must include risk treatment for casual authority claim patterns that bypass formal injection detection, as these represent a distinct attack surface from explicit privilege escalation."
       strength: primary
-    - clause: "8.6"
-      context: "Clause 8.6 operational controls must ensure that agents require verified authorization before acting on scope expansions or control bypasses, regardless of whether the instruction is phrased formally or in casual conversational language."
+    - clause: "8.1"
+      context: "Clause 8.1 operational controls must ensure that agents require verified authorization before acting on scope expansions or control bypasses, regardless of whether the instruction is phrased formally or in casual conversational language."
       strength: secondary
 tags:
   category: agent-manipulation

package/rules/agent-manipulation/ATR-2026-00139-casual-authority-redirect.yaml CHANGED Viewed

@@ -29,6 +29,9 @@ compliance:
     - article: "15"
       context: "Article 15 cybersecurity requirements include protection against social engineering attacks targeting agent output pipelines; casual authority redirect is a documented adversarial technique exploiting agents as exfiltration proxies."
       strength: secondary
+    - article: "9"
+      context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the agent-manipulation technique (Casual Authority Data Redirect)."
+      strength: secondary
   nist_ai_rmf:
     - subcategory: "GV.1.2"
       context: "Casual authority redirect attacks succeed when no formal accountability role exists to verify output redirection decisions; GV.1.2 requires that accountability roles for AI risk management are defined so that informal directives to redirect agent output can be challenged and blocked."
@@ -36,12 +39,15 @@ compliance:
     - subcategory: "MG.2.3"
       context: "Data redirect via social engineering authority claims is a documented exfiltration risk requiring active treatment; MG.2.3 requires that risk treatment plans include detection controls for authority-impersonation-based output redirection attacks."
       strength: secondary
+    - subcategory: "MS.2.7"
+      context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the agent-manipulation technique (Casual Authority Data Redirect)."
+      strength: primary
   iso_42001:
     - clause: "6.2"
       context: "Clause 6.2 AI objectives and plans must include controls for preventing agents from being redirected to attacker-controlled endpoints through casual authority claims that spoof organizational hierarchy."
       strength: primary
-    - clause: "8.6"
-      context: "Clause 8.6 operational controls must ensure that agent output destinations are validated against authorized endpoints and that casual authority directives to disable safety filters or redirect data are blocked before execution."
+    - clause: "8.1"
+      context: "Clause 8.1 operational controls must ensure that agent output destinations are validated against authorized endpoints and that casual authority directives to disable safety filters or redirect data are blocked before execution."
       strength: secondary
 tags:
   category: agent-manipulation

package/rules/agent-manipulation/ATR-2026-00164-skill-scope-hijack.yaml CHANGED Viewed

@@ -17,6 +17,8 @@ references:
     - 'LLM06:2025 - Excessive Agency'
   owasp_agentic:
     - 'ASI03:2026 - Cross-Agent Escalation'
+  mitre_atlas:
+    - AML.T0051 - LLM Prompt Injection
 compliance:
   eu_ai_act:
     - article: "14"
@@ -25,6 +27,9 @@ compliance:
     - article: "15"
       context: "Article 15 cybersecurity requirements mandate protection against supply chain attacks; malicious SKILL.md files represent a documented technique for injecting unauthorized capability expansion at the skill-definition layer."
       strength: secondary
+    - article: "9"
+      context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the agent-manipulation technique (Skill Scope Hijacking and Cross-Agent Escalation)."
+      strength: secondary
   nist_ai_rmf:
     - subcategory: "GV.1.2"
       context: "Skill scope hijacking succeeds when no accountability role governs what capabilities a skill may claim; GV.1.2 requires that accountability roles for AI risk management are defined and assigned, ensuring that SKILL.md capability claims are reviewed against authorized scope boundaries."
@@ -32,12 +37,18 @@ compliance:
     - subcategory: "MP.2.3"
       context: "Malicious SKILL.md files in external skill repositories represent an AI supply chain risk source; MP.2.3 requires that AI supply chain risk sources are identified and assessed, covering the skill-definition layer as a vector for privilege escalation injection."
       strength: secondary
+    - subcategory: "MS.2.7"
+      context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the agent-manipulation technique (Skill Scope Hijacking and Cross-Agent Escalation)."
+      strength: primary
+    - subcategory: "MG.2.3"
+      context: "NIST AI RMF MANAGE 2.3 (respond to previously unknown identified risks) is supported by this rule, which surfaces the agent-manipulation technique (Skill Scope Hijacking and Cross-Agent Escalation) so the risk can be treated."
+      strength: secondary
   iso_42001:
     - clause: "6.2"
       context: "Clause 6.2 AI objectives and plans must include risk treatment for skill-definition-layer attacks where SKILL.md instructions expand agent scope beyond authorized boundaries without human knowledge."
       strength: primary
-    - clause: "8.5"
-      context: "Clause 8.5 controls over externally-provided AI components must include validation of SKILL.md files to ensure that skill definitions do not embed cross-agent escalation or unauthorized scope expansion instructions."
+    - clause: "8.1"
+      context: "Clause 8.1 controls over externally-provided AI components must include validation of SKILL.md files to ensure that skill definitions do not embed cross-agent escalation or unauthorized scope expansion instructions."
       strength: secondary
 tags:
   category: agent-manipulation

package/rules/agent-manipulation/ATR-2026-00268-tense-framing-bypass.yaml CHANGED Viewed

@@ -35,6 +35,23 @@ compliance:
     - subcategory: "MG.2.3"
       context: "Matches on tense-reframing jailbreak payloads should trigger pre-defined response actions to block or disengage the affected generation path before harmful content is returned, as required by MG.2.3."
       strength: secondary
+  iso_42001:
+    - clause: "8.1"
+      context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the agent-manipulation technique (Historical / Future Tense Framing Bypass)."
+      strength: primary
+    - clause: "6.2"
+      context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the agent-manipulation technique (Historical / Future Tense Framing Bypass) is such a treatment."
+      strength: secondary
+  eu_ai_act:
+    - article: "15"
+      context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the agent-manipulation technique (Historical / Future Tense Framing Bypass)."
+      strength: primary
+    - article: "14"
+      context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the agent-manipulation technique (Historical / Future Tense Framing Bypass) would bypass or undermine that oversight."
+      strength: secondary
+    - article: "9"
+      context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the agent-manipulation technique (Historical / Future Tense Framing Bypass)."
+      strength: secondary
 tags:
   category: agent-manipulation
   subcategory: tense-framing-bypass

package/rules/agent-manipulation/ATR-2026-00269-fitd-escalation.yaml CHANGED Viewed

@@ -34,6 +34,9 @@ compliance:
     - article: "9"
       context: "FITD escalation is a documented multi-step adversarial pattern; Article 9 risk management systems must include detection controls for incremental jailbreak techniques that bypass single-turn safety evaluation."
       strength: secondary
+    - article: "14"
+      context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the agent-manipulation technique (Foot-in-the-Door Gradual Escalation Attack) would bypass or undermine that oversight."
+      strength: secondary
   nist_ai_rmf:
     - subcategory: "MP.5.1"
       context: "Foot-in-the-door escalation is an adversarial input technique that exploits LLM consistency bias to incrementally bypass safety constraints across multiple turns; MP.5.1 requires that such multi-step adversarial input risks are identified and tracked to enable cross-turn detection."
@@ -41,12 +44,15 @@ compliance:
     - subcategory: "MG.2.3"
       context: "FITD escalation represents an identified risk requiring active treatment through detection controls that monitor prior-compliance anchors and escalation signals across conversation history; MG.2.3 requires such risk treatment plans are implemented."
       strength: secondary
+    - subcategory: "MS.2.7"
+      context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the agent-manipulation technique (Foot-in-the-Door Gradual Escalation Attack)."
+      strength: primary
   iso_42001:
     - clause: "6.2"
       context: "Clause 6.2 AI objectives and plans must include risk treatment for multi-step jailbreak techniques like FITD escalation that exploit model consistency bias to extract harmful outputs that would be refused if requested directly."
       strength: primary
-    - clause: "8.6"
-      context: "Clause 8.6 operational controls must ensure that agents evaluate escalating requests in context of prior conversation turns rather than in isolation, preventing foot-in-the-door compliance anchoring from lowering refusal thresholds."
+    - clause: "8.1"
+      context: "Clause 8.1 operational controls must ensure that agents evaluate escalating requests in context of prior conversation turns rather than in isolation, preventing foot-in-the-door compliance anchoring from lowering refusal thresholds."
       strength: secondary
 tags:
   category: agent-manipulation

package/rules/agent-manipulation/ATR-2026-00271-grandma-roleplay-jailbreak.yaml CHANGED Viewed

@@ -34,6 +34,9 @@ compliance:
     - article: "9"
       context: "Persona-based jailbreaks exploiting emotional framing are documented high-risk attack patterns; Article 9 risk management requires systematic controls for emotional manipulation techniques used to extract harmful content from high-risk AI systems."
       strength: secondary
+    - article: "14"
+      context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the agent-manipulation technique (Grandma Roleplay Jailbreak) would bypass or undermine that oversight."
+      strength: secondary
   nist_ai_rmf:
     - subcategory: "MP.5.1"
       context: "Grandma roleplay jailbreaks are adversarial inputs that exploit grief and nostalgia emotional framing to bypass safety constraints; MP.5.1 requires that this class of psychological manipulation risk is identified and tracked as a distinct adversarial input vector."
@@ -41,12 +44,15 @@ compliance:
     - subcategory: "MG.2.3"
       context: "Emotional-framing persona jailbreaks represent a known risk requiring active treatment through detection controls for deceased-relative roleplay patterns; MG.2.3 requires that risk treatment plans are implemented for identified AI risks including this garak-documented attack family."
       strength: secondary
+    - subcategory: "MS.2.7"
+      context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the agent-manipulation technique (Grandma Roleplay Jailbreak)."
+      strength: primary
   iso_42001:
     - clause: "6.2"
       context: "Clause 6.2 AI objectives and plans must include risk treatment for emotional manipulation techniques such as grandma roleplay that lower the model's refusal threshold by exploiting grief and nostalgia framing."
       strength: primary
-    - clause: "8.6"
-      context: "Clause 8.6 operational controls must ensure that harmful procedural content is not generated regardless of the emotional or persona framing used in the request, blocking grandma roleplay jailbreak patterns before harmful content is produced."
+    - clause: "8.1"
+      context: "Clause 8.1 operational controls must ensure that harmful procedural content is not generated regardless of the emotional or persona framing used in the request, blocking grandma roleplay jailbreak patterns before harmful content is produced."
       strength: secondary
 tags:
   category: agent-manipulation

package/rules/agent-manipulation/ATR-2026-00273-dan-developer-mode-persona.yaml CHANGED Viewed

@@ -35,6 +35,9 @@ compliance:
     - article: "9"
       context: "Named persona jailbreaks are a well-documented attack family with widespread deployment in the wild; Article 9 risk management systems for high-risk AI must include detection controls for this established technique for bypassing content restrictions."
       strength: secondary
+    - article: "14"
+      context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the agent-manipulation technique (DAN / Developer Mode / DUDE Persona Jailbreak) would bypass or undermine that oversight."
+      strength: secondary
   nist_ai_rmf:
     - subcategory: "MP.5.1"
       context: "DAN, DUDE, and Developer Mode persona jailbreaks are adversarial inputs that attempt to replace the model's safety-trained identity with an unrestricted persona; MP.5.1 requires that this widely-deployed adversarial input risk is identified and tracked across model deployments."
@@ -42,12 +45,15 @@ compliance:
     - subcategory: "MG.2.3"
       context: "Named persona jailbreak families represent a documented risk requiring active treatment through pattern-based detection of DAN/DUDE/STAN identity-replacement prompts; MG.2.3 requires that risk treatment plans are implemented for this established and widely-distributed attack technique."
       strength: secondary
+    - subcategory: "MS.2.7"
+      context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the agent-manipulation technique (DAN / Developer Mode / DUDE Persona Jailbreak)."
+      strength: primary
   iso_42001:
     - clause: "6.2"
       context: "Clause 6.2 AI objectives and plans must include risk treatment for DAN and Developer Mode persona replacement attacks, which represent one of the most widely-deployed jailbreak families targeting AI system safety constraints."
       strength: primary
-    - clause: "8.6"
-      context: "Clause 8.6 operational controls must ensure that the AI system's safety identity cannot be replaced by named unrestricted personas such as DAN or DUDE, blocking persona-replacement jailbreaks before they disable safety constraints."
+    - clause: "8.1"
+      context: "Clause 8.1 operational controls must ensure that the AI system's safety identity cannot be replaced by named unrestricted personas such as DAN or DUDE, blocking persona-replacement jailbreaks before they disable safety constraints."
       strength: secondary
 tags:
   category: agent-manipulation

package/rules/agent-manipulation/ATR-2026-00287-threaten-json-coercive-output-threat.yaml CHANGED Viewed

@@ -41,6 +41,23 @@ compliance:
     - subcategory: "MG.2.3"
       context: "Matches on threat-plus-format-constraint patterns trigger risk treatment plans to block or sanitize coerced outputs before they bypass safety guardrails; MG.2.3 requires mechanisms to supersede or disengage AI responses produced under adversarial coercion."
       strength: secondary
+  iso_42001:
+    - clause: "8.1"
+      context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the agent-manipulation technique (ThreatenJSON — Coercive Output Format Threat)."
+      strength: primary
+    - clause: "6.2"
+      context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the agent-manipulation technique (ThreatenJSON — Coercive Output Format Threat) is such a treatment."
+      strength: secondary
+  eu_ai_act:
+    - article: "15"
+      context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the agent-manipulation technique (ThreatenJSON — Coercive Output Format Threat)."
+      strength: primary
+    - article: "14"
+      context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the agent-manipulation technique (ThreatenJSON — Coercive Output Format Threat) would bypass or undermine that oversight."
+      strength: secondary
+    - article: "9"
+      context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the agent-manipulation technique (ThreatenJSON — Coercive Output Format Threat)."
+      strength: secondary
 tags:
   category: agent-manipulation
   subcategory: threaten-json-coercive-output

package/rules/agent-manipulation/ATR-2026-00288-false-premise-injection.yaml CHANGED Viewed

@@ -43,6 +43,26 @@ compliance:
     - subcategory: "MG.2.3"
       context: "When false-premise prompts are detected, risk treatment must intercept the request before the model rationalizes harmful pseudoscience or dangerous practices; MG.2.3 mandates pre-defined mechanisms to disengage or block such interactions."
       strength: secondary
+    - subcategory: "MS.2.7"
+      context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the agent-manipulation technique (False Premise Injection (Misleading FalseAssertion))."
+      strength: primary
+  iso_42001:
+    - clause: "8.1"
+      context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the agent-manipulation technique (False Premise Injection (Misleading FalseAssertion))."
+      strength: primary
+    - clause: "6.2"
+      context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the agent-manipulation technique (False Premise Injection (Misleading FalseAssertion)) is such a treatment."
+      strength: secondary
+  eu_ai_act:
+    - article: "15"
+      context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the agent-manipulation technique (False Premise Injection (Misleading FalseAssertion))."
+      strength: primary
+    - article: "14"
+      context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the agent-manipulation technique (False Premise Injection (Misleading FalseAssertion)) would bypass or undermine that oversight."
+      strength: secondary
+    - article: "9"
+      context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the agent-manipulation technique (False Premise Injection (Misleading FalseAssertion))."
+      strength: secondary
 tags:
   category: agent-manipulation
   subcategory: false-premise-injection

package/rules/agent-manipulation/ATR-2026-00301-tap-tree-of-attacks-jailbreak.yaml CHANGED Viewed

@@ -42,6 +42,26 @@ compliance:
       context: >-
         Detecting TAP-specific JSON improvement/prompt loops and SCORE iteration markers feeds post-deployment monitoring required by MG.4.1, enabling tracking of automated jailbreak campaign activity against the deployed model.
       strength: secondary
+    - subcategory: "MG.2.3"
+      context: "NIST AI RMF MANAGE 2.3 (respond to previously unknown identified risks) is supported by this rule, which surfaces the agent-manipulation technique (TAP Tree-of-Attacks-with-Pruning Jailbreak) so the risk can be treated."
+      strength: secondary
+  iso_42001:
+    - clause: "8.1"
+      context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the agent-manipulation technique (TAP Tree-of-Attacks-with-Pruning Jailbreak)."
+      strength: primary
+    - clause: "6.2"
+      context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the agent-manipulation technique (TAP Tree-of-Attacks-with-Pruning Jailbreak) is such a treatment."
+      strength: secondary
+  eu_ai_act:
+    - article: "15"
+      context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the agent-manipulation technique (TAP Tree-of-Attacks-with-Pruning Jailbreak)."
+      strength: primary
+    - article: "14"
+      context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the agent-manipulation technique (TAP Tree-of-Attacks-with-Pruning Jailbreak) would bypass or undermine that oversight."
+      strength: secondary
+    - article: "9"
+      context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the agent-manipulation technique (TAP Tree-of-Attacks-with-Pruning Jailbreak)."
+      strength: secondary
 tags:
   category: agent-manipulation
   subcategory: tap-jailbreak

package/rules/agent-manipulation/ATR-2026-00302-anti-dan-inverted-filter-persona.yaml CHANGED Viewed

@@ -42,6 +42,26 @@ compliance:
     - subcategory: "MG.2.3"
       context: "Detection of the ANTI-DAN re-bind trigger and persona assignment enables risk treatment plans to disengage or override the manipulated persona before sustained denial-of-utility occurs; MG.2.3 requires these supersede/deactivate mechanisms be defined and activated on detection."
       strength: secondary
+    - subcategory: "MS.2.7"
+      context: "NIST AI RMF MEASURE 2.7 (security and resilience evaluated and documented) is supported by this rule's runtime detection of the agent-manipulation technique (Anti-DAN Inverted-Filter Over-Refusal Persona)."
+      strength: primary
+  iso_42001:
+    - clause: "8.1"
+      context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the agent-manipulation technique (Anti-DAN Inverted-Filter Over-Refusal Persona)."
+      strength: primary
+    - clause: "6.2"
+      context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the agent-manipulation technique (Anti-DAN Inverted-Filter Over-Refusal Persona) is such a treatment."
+      strength: secondary
+  eu_ai_act:
+    - article: "15"
+      context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the agent-manipulation technique (Anti-DAN Inverted-Filter Over-Refusal Persona)."
+      strength: primary
+    - article: "14"
+      context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the agent-manipulation technique (Anti-DAN Inverted-Filter Over-Refusal Persona) would bypass or undermine that oversight."
+      strength: secondary
+    - article: "9"
+      context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the agent-manipulation technique (Anti-DAN Inverted-Filter Over-Refusal Persona)."
+      strength: secondary
 tags:
   category: agent-manipulation
   subcategory: anti-dan-over-refusal

package/rules/agent-manipulation/ATR-2026-00303-devmode-ranti-profanity-coercion.yaml CHANGED Viewed

@@ -45,6 +45,23 @@ compliance:
     - subcategory: "MG.2.3"
       context: "Matches against the RANTI canonical postfix and emoji-tagged dual-output markers feed incident response pipelines that can disengage or block the offending session before unfiltered output is produced; MG.2.3 mandates these supersede/deactivate mechanisms."
       strength: secondary
+  iso_42001:
+    - clause: "8.1"
+      context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the agent-manipulation technique (DevMode + RANTI Dual-Output Profanity Coercion Jailbreak)."
+      strength: primary
+    - clause: "6.2"
+      context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the agent-manipulation technique (DevMode + RANTI Dual-Output Profanity Coercion Jailbreak) is such a treatment."
+      strength: secondary
+  eu_ai_act:
+    - article: "15"
+      context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the agent-manipulation technique (DevMode + RANTI Dual-Output Profanity Coercion Jailbreak)."
+      strength: primary
+    - article: "14"
+      context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the agent-manipulation technique (DevMode + RANTI Dual-Output Profanity Coercion Jailbreak) would bypass or undermine that oversight."
+      strength: secondary
+    - article: "9"
+      context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the agent-manipulation technique (DevMode + RANTI Dual-Output Profanity Coercion Jailbreak)."
+      strength: secondary
 tags:
   category: agent-manipulation
   subcategory: devmode-ranti-dual-output

package/rules/agent-manipulation/ATR-2026-00304-chatgpt-image-unlocker-markdown-injection.yaml CHANGED Viewed

@@ -48,6 +48,26 @@ compliance:
     - subcategory: "MG.4.1"
       context: "Signature-based detection of the canonical Image Unlocker markers (🔓 persona, hardcoded Unsplash URL, GamerboyTR fallback) feeds post-deployment monitoring pipelines that MG.4.1 requires for catching jailbreak attempts in production traffic."
       strength: secondary
+    - subcategory: "MG.2.3"
+      context: "NIST AI RMF MANAGE 2.3 (respond to previously unknown identified risks) is supported by this rule, which surfaces the agent-manipulation technique (ChatGPT Image Unlocker Markdown-Output Jailbreak) so the risk can be treated."
+      strength: secondary
+  iso_42001:
+    - clause: "8.1"
+      context: "ISO/IEC 42001 Clause 8.1 (operational planning and control, including control of externally provided processes) is operationalised by this rule's detection of the agent-manipulation technique (ChatGPT Image Unlocker Markdown-Output Jailbreak)."
+      strength: primary
+    - clause: "6.2"
+      context: "ISO/IEC 42001 Clause 6.2 (AI objectives and planning) calls for risk treatment of known attack patterns; this rule's detection of the agent-manipulation technique (ChatGPT Image Unlocker Markdown-Output Jailbreak) is such a treatment."
+      strength: secondary
+  eu_ai_act:
+    - article: "15"
+      context: "Article 15 (accuracy, robustness and cybersecurity) requires high-risk AI systems to resist unauthorised attempts to alter their use, outputs or performance; this rule provides runtime detection evidence by flagging the agent-manipulation technique (ChatGPT Image Unlocker Markdown-Output Jailbreak)."
+      strength: primary
+    - article: "14"
+      context: "Article 14 (human oversight) requires high-risk AI systems to remain subject to effective human oversight; this rule provides detection evidence where the agent-manipulation technique (ChatGPT Image Unlocker Markdown-Output Jailbreak) would bypass or undermine that oversight."
+      strength: secondary
+    - article: "9"
+      context: "Article 9 (risk management system) requires identified risks to be addressed by appropriate measures; this rule is a runtime risk-treatment control that detects the agent-manipulation technique (ChatGPT Image Unlocker Markdown-Output Jailbreak)."
+      strength: secondary
 tags:
   category: agent-manipulation
   subcategory: image-unlocker-markdown-output