@highflame/policy 2.1.7 → 2.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,134 @@
1
+ // =============================================================================
2
+ // A2A Security — Inter-Agent Injection Defense
3
+ // =============================================================================
4
+ // Detects and blocks prompt injection that travels between independent agents:
5
+ //
6
+ // 1. INDIRECT INJECTION: Malicious content injected via tool outputs, RAG
7
+ // retrieval, or API responses from one agent that manipulates another.
8
+ // In A2A, this is the primary attack vector because agents consume each
9
+ // other's outputs as trusted input.
10
+ //
11
+ // 2. MULTI-TURN PROGRESSIVE ATTACKS: Gradual context manipulation across
12
+ // turns where each turn is benign individually but collectively builds
13
+ // toward goal hijacking. The GRU-based deep context detector tracks
14
+ // conversation state across turns.
15
+ //
16
+ // 3. ENCODED PAYLOAD DELIVERY: Base64, hex, or hash-encoded instructions
17
+ // designed to bypass single-turn classifiers. In A2A communication,
18
+ // encoded content is a strong indicator of injection evasion.
19
+ //
20
+ // Key A2A distinction: In MAS, the orchestrator controls all communication.
21
+ // In A2A, each agent independently receives content from external agents,
22
+ // making indirect injection the dominant threat vector.
23
+ //
24
+ // Compliance:
25
+ // OWASP LLM01 (Prompt Injection) — indirect variant
26
+ // OWASP ASI01 (Agent Goal Hijack)
27
+ // MITRE ATLAS AML.T0051 (LLM Prompt Injection)
28
+ // MITRE ATLAS AML.T0051.002 (Indirect Prompt Injection)
29
+ // NIST 800-53 SI-10 (Information Input Validation)
30
+ //
31
+ // Category: agent_identity
32
+ // Namespace: Guardrails
33
+ // =============================================================================
34
+
35
+ // -----------------------------------------------------------------------------
36
+ // Indirect Injection — Tool Output Poisoning
37
+ // -----------------------------------------------------------------------------
38
+
39
+ // Block indirect injection from agent tool calls
40
+ @id("a2a-indirect-injection-agent")
41
+ @name("Block indirect injection from agent tool calls")
42
+ @description("Block tool execution when indirect injection is detected in content received by an agent (score >= 60). Indirect injection travels through tool outputs, RAG retrieval, and API responses — the primary A2A attack vector because agents consume each other's outputs as trusted input. Lower threshold than code-agent (70) because cross-system content has higher adversarial surface.")
43
+ @severity("critical")
44
+ @tags("profile,a2a-security,indirect-injection,tool-output,owasp-lml01,mitre-t0051-002")
45
+ @reject_message("Tool execution blocked: indirect prompt injection detected in content received by this agent (score >= 60). An external source may be attempting to hijack agent behavior through tool outputs or retrieved content.")
46
+ forbid (
47
+ principal is Guardrails::Agent,
48
+ action == Guardrails::Action::"call_tool",
49
+ resource
50
+ )
51
+ when {
52
+ context has agent_id && context.agent_id != "" &&
53
+ context has indirect_injection_score && context.indirect_injection_score >= 60
54
+ };
55
+
56
+ // Stricter threshold for sensitive tools
57
+ @id("a2a-indirect-injection-sensitive-agent")
58
+ @name("Block moderate indirect injection on sensitive tools from agents")
59
+ @description("Block sensitive tool execution from agents when indirect injection score exceeds 40. Sensitive tools (write_file, http_post, send_email) amplify the damage of indirect injection — a lower threshold compensates for the higher blast radius of sensitive operations in cross-system communication.")
60
+ @severity("critical")
61
+ @tags("profile,a2a-security,indirect-injection,sensitive-tools,owasp-asi01")
62
+ @reject_message("Sensitive tool blocked: moderate indirect injection risk detected in agent-to-agent content (score >= 40). Sensitive tools require higher confidence that inter-agent content is safe.")
63
+ forbid (
64
+ principal is Guardrails::Agent,
65
+ action == Guardrails::Action::"call_tool",
66
+ resource
67
+ )
68
+ when {
69
+ context has agent_id && context.agent_id != "" &&
70
+ context has indirect_injection_score && context.indirect_injection_score >= 40 &&
71
+ context has tool_is_sensitive && context.tool_is_sensitive == true
72
+ };
73
+
74
+ // -----------------------------------------------------------------------------
75
+ // Multi-Turn Progressive Attacks — Deep Context GRU Detection
76
+ // -----------------------------------------------------------------------------
77
+
78
+ // Block multi-turn progressive injection for non-first-party agents
79
+ @id("a2a-deep-context-injection-agent")
80
+ @name("Block multi-turn progressive injection for agents")
81
+ @description("Block non-first-party agents when the GRU-based deep context detector identifies multi-turn progressive injection (score >= 60). Each turn may be benign individually, but the GRU model tracks hidden state across the full conversation to detect gradual goal hijacking.")
82
+ @severity("high")
83
+ @tags("profile,a2a-security,multi-turn,deep-context,injection,owasp-lml01")
84
+ @reject_message("Request blocked: multi-turn progressive injection detected across conversation history (deep context score >= 60). A gradual attack may be building context manipulation over multiple agent turns.")
85
+ forbid (
86
+ principal is Guardrails::Agent,
87
+ action == Guardrails::Action::"process_prompt",
88
+ resource
89
+ )
90
+ when {
91
+ context has agent_trust_level && context.agent_trust_level != "first_party" &&
92
+ context has multi_turn_detection && context.multi_turn_detection == true &&
93
+ context has injection_deep_context_score && context.injection_deep_context_score >= 60
94
+ };
95
+
96
+ // Block multi-turn progressive jailbreak for non-first-party agents
97
+ @id("a2a-deep-context-jailbreak-agent")
98
+ @name("Block multi-turn progressive jailbreak for agents")
99
+ @description("Block non-first-party agents when the GRU-based deep context detector identifies multi-turn progressive jailbreak (score >= 60). Jailbreak attempts spread across turns are harder to detect with single-turn classifiers — the deep context model maintains conversation state to catch these patterns.")
100
+ @severity("high")
101
+ @tags("profile,a2a-security,multi-turn,deep-context,jailbreak,owasp-asi01")
102
+ @reject_message("Request blocked: multi-turn progressive jailbreak detected across conversation history (deep context score >= 60). A gradual jailbreak attempt is building across multiple agent turns.")
103
+ forbid (
104
+ principal is Guardrails::Agent,
105
+ action == Guardrails::Action::"process_prompt",
106
+ resource
107
+ )
108
+ when {
109
+ context has agent_trust_level && context.agent_trust_level != "first_party" &&
110
+ context has multi_turn_detection && context.multi_turn_detection == true &&
111
+ context has jailbreak_deep_context_score && context.jailbreak_deep_context_score >= 60
112
+ };
113
+
114
+ // -----------------------------------------------------------------------------
115
+ // Encoded Payload Delivery — Injection Evasion
116
+ // -----------------------------------------------------------------------------
117
+
118
+ // Block encoded injection payloads between agents
119
+ @id("a2a-encoded-injection-agent")
120
+ @name("Block encoded payload delivery between agents")
121
+ @description("Block agent requests when encoded payloads are detected (base64, hex, or suspicious hash content with score >= 60). In A2A communication, encoded content is a strong indicator of injection evasion — agents should communicate in plaintext, not encoded payloads.")
122
+ @severity("high")
123
+ @tags("profile,a2a-security,encoded-injection,evasion,base64,owasp-lml01")
124
+ @reject_message("Request blocked: encoded payload detected in agent communication (base64, hex, or suspicious hash content). Encoded content in agent-to-agent messages indicates injection evasion or payload delivery.")
125
+ forbid (
126
+ principal is Guardrails::Agent,
127
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
128
+ resource
129
+ )
130
+ when {
131
+ context has agent_id && context.agent_id != "" &&
132
+ context has encoded_content_detected && context.encoded_content_detected == true &&
133
+ context has encoded_score && context.encoded_score >= 60
134
+ };
@@ -0,0 +1,117 @@
1
+ // =============================================================================
2
+ // A2A Security — Supply Chain & Behavioral Drift
3
+ // =============================================================================
4
+ // Detects and blocks supply chain attacks specific to agent-to-agent ecosystems:
5
+ //
6
+ // 1. TOOL POISONING: External agents install or recommend MCP tools with
7
+ // hidden instructions, system prompt injection, authority hijack, or
8
+ // information suppression patterns in tool descriptions.
9
+ //
10
+ // 2. RUG PULL: An agent or tool that behaves normally during evaluation
11
+ // but changes behavior after trust is established — "risk_spike" (sudden
12
+ // risk increase) or "pattern_change" (behavioral deviation).
13
+ //
14
+ // 3. CREDENTIAL THEFT CHAINS: Multi-step attack where an agent reads
15
+ // credential files, encodes them, and exfiltrates via network tools.
16
+ // In A2A, this is higher risk because external agents have legitimate
17
+ // reasons to call multiple tools in sequence.
18
+ //
19
+ // Key A2A distinction: In MAS, the orchestrator vets all tools centrally.
20
+ // In A2A, each agent brings its own tool ecosystem, creating a supply chain
21
+ // attack surface at every agent boundary.
22
+ //
23
+ // Compliance:
24
+ // OWASP ASI04 (Supply Chain Vulnerabilities)
25
+ // OWASP MCP01 (Tool Poisoning)
26
+ // OWASP MCP03 (Tool Shadowing)
27
+ // MITRE ATT&CK T1552 (Unsecured Credentials)
28
+ // MITRE ATLAS AML.T0049 (Backdoor ML Model) — analogous to agent drift
29
+ // NIST 800-53 SI-7 (Software, Firmware, and Information Integrity)
30
+ //
31
+ // Category: agent_identity
32
+ // Namespace: Guardrails
33
+ // =============================================================================
34
+
35
+ // -----------------------------------------------------------------------------
36
+ // Tool Poisoning — Hidden Instructions in External Agent Tools
37
+ // -----------------------------------------------------------------------------
38
+
39
+ // Block tool poisoning from non-first-party agents
40
+ @id("a2a-tool-poisoning-agent")
41
+ @name("Block tool poisoning from non-first-party agents")
42
+ @description("Block tool execution when poisoning is detected from non-first-party agents (score >= 60). Tool poisoning includes hidden instructions, system prompt injection, authority hijack, and information suppression patterns in tool descriptions or arguments. Lower threshold than default (70) because external agent tool chains have higher supply chain risk.")
43
+ @severity("critical")
44
+ @tags("profile,a2a-security,supply-chain,tool-poisoning,owasp-mcp01,owasp-asi04")
45
+ @reject_message("Tool execution blocked: tool poisoning detected in content from a non-first-party agent (score >= 60). Hidden instructions, authority hijack, or system prompt injection patterns found in tool descriptions or arguments.")
46
+ forbid (
47
+ principal is Guardrails::Agent,
48
+ action == Guardrails::Action::"call_tool",
49
+ resource
50
+ )
51
+ when {
52
+ context has agent_trust_level && context.agent_trust_level != "first_party" &&
53
+ context has tool_poisoning_detected && context.tool_poisoning_detected == true &&
54
+ context has tool_poisoning_score && context.tool_poisoning_score >= 60
55
+ };
56
+
57
+ // Block poisoned MCP server connections from agents
58
+ @id("a2a-server-poisoning-agent")
59
+ @name("Block poisoned server connections from agents")
60
+ @description("Block MCP server connections when poisoning is detected from agent requests (score >= 55). Server-level poisoning has wider blast radius than individual tool poisoning — a single compromised server connection exposes all tools on that server. Lower threshold (55 vs 60) compensates for the amplified impact.")
61
+ @severity("critical")
62
+ @tags("profile,a2a-security,supply-chain,server-poisoning,mcp,owasp-mcp01")
63
+ @reject_message("MCP server connection blocked: tool poisoning patterns detected in server from agent request (score >= 55). Server-level poisoning affects all tools and warrants a lower threshold than individual tool poisoning.")
64
+ forbid (
65
+ principal is Guardrails::Agent,
66
+ action == Guardrails::Action::"connect_server",
67
+ resource
68
+ )
69
+ when {
70
+ context has agent_id && context.agent_id != "" &&
71
+ context has tool_poisoning_detected && context.tool_poisoning_detected == true &&
72
+ context has tool_poisoning_score && context.tool_poisoning_score >= 55
73
+ };
74
+
75
+ // -----------------------------------------------------------------------------
76
+ // Rug Pull — Behavioral Drift After Trust Establishment
77
+ // -----------------------------------------------------------------------------
78
+
79
+ // Block after rug pull behavioral drift detection
80
+ @id("a2a-rug-pull-agent")
81
+ @name("Block rug pull behavioral drift from agents")
82
+ @description("Block tool execution when behavioral drift is detected in agent tool usage (score >= 70). The rug pull detector compares current tool behavior against established baselines — a sudden risk spike or tool alternation pattern change after 3+ normal calls indicates the agent or tool has been compromised mid-session.")
83
+ @severity("critical")
84
+ @tags("profile,a2a-security,supply-chain,rug-pull,behavioral-drift,mitre-t0049")
85
+ @reject_message("Tool execution blocked: behavioral drift detected in agent tool usage (rug pull score >= 70). The tool's behavior has deviated significantly from established patterns, indicating potential supply chain compromise.")
86
+ forbid (
87
+ principal is Guardrails::Agent,
88
+ action == Guardrails::Action::"call_tool",
89
+ resource
90
+ )
91
+ when {
92
+ context has agent_id && context.agent_id != "" &&
93
+ context has rug_pull_detected && context.rug_pull_detected == true &&
94
+ context has rug_pull_score && context.rug_pull_score >= 70
95
+ };
96
+
97
+ // -----------------------------------------------------------------------------
98
+ // Credential Theft Chains — Multi-Step Exfiltration
99
+ // -----------------------------------------------------------------------------
100
+
101
+ // Block agent-initiated credential theft chains
102
+ @id("a2a-credential-theft-agent")
103
+ @name("Block agent-initiated credential theft chains")
104
+ @description("Block tool execution when credential theft patterns are detected from non-first-party agents. The pattern detector identifies multi-step sequences (read credentials → encode → exfiltrate) that indicate coordinated credential harvesting. In A2A, external agents have legitimate multi-tool workflows, making pattern-based detection essential to distinguish theft from normal usage.")
105
+ @severity("critical")
106
+ @tags("profile,a2a-security,supply-chain,credential-theft,exfiltration,mitre-t1552")
107
+ @reject_message("Tool execution blocked: credential theft chain detected from a non-first-party agent. The agent is performing a multi-step operation to harvest and exfiltrate credentials. Only first-party agents may access credential-adjacent resources.")
108
+ forbid (
109
+ principal is Guardrails::Agent,
110
+ action == Guardrails::Action::"call_tool",
111
+ resource
112
+ )
113
+ when {
114
+ context has agent_trust_level && context.agent_trust_level != "first_party" &&
115
+ context has suspicious_pattern && context.suspicious_pattern == true &&
116
+ context has pattern_type && context.pattern_type == "credential_theft"
117
+ };
@@ -0,0 +1,73 @@
1
+ // =============================================================================
2
+ // Advanced Detection — PII
3
+ // =============================================================================
4
+ // Advanced PII detection policies using ML classifier confidence scoring,
5
+ // bulk exposure thresholds, and file operation blocking. Goes beyond the
6
+ // boolean pii_detected flag with layered detection:
7
+ //
8
+ // 1. Bulk PII exposure — 3+ PII matches indicates data dumps or CSV pastes
9
+ // 2. ML classifier confidence — catches novel PII patterns that regex misses
10
+ // 3. File operation PII — prevents PII persistence to disk
11
+ //
12
+ // These policies benefit any Guardrails deployment — not just coding agents.
13
+ //
14
+ // Adapted from Overwatch PII detection policies for Guardrails namespace.
15
+ //
16
+ // Compliance:
17
+ // PCI DSS 3.4 (Payment Card Data)
18
+ // GDPR Art. 32 (Security of Processing)
19
+ // HIPAA §164.312 (Technical Safeguards)
20
+ // CCPA §1798.150 (Data Protection)
21
+ // NIST 800-53 SI-4 (Information System Monitoring)
22
+ //
23
+ // Category: privacy
24
+ // Namespace: Guardrails
25
+ // =============================================================================
26
+
27
+ // Block bulk PII exposure (3+ PII matches)
28
+ @id("detection-block-bulk-pii")
29
+ @name("Block bulk PII exposure")
30
+ @description("Block content containing 3 or more PII matches. Multiple PII items in a single request indicates a data dump, CSV paste, or data exfiltration attempt. Single PII occurrences may be incidental — bulk exposure is always intentional or negligent.")
31
+ @severity("critical")
32
+ @tags("profile,advanced-detection,pii,bulk,data-exfiltration,gdpr-art-32,ccpa")
33
+ @reject_message("Content blocked: multiple PII items detected (3+). Bulk personal data must never be processed through AI agents. Use data masking or tokenization for batch operations.")
34
+ forbid (
35
+ principal,
36
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
37
+ resource
38
+ )
39
+ when {
40
+ context has pii_count && context.pii_count >= 3
41
+ };
42
+
43
+ // Block content with high ML PII classifier confidence
44
+ @id("detection-block-pii-high-confidence")
45
+ @name("Block high-confidence PII")
46
+ @description("Block content when the ML PII classifier confidence exceeds threshold (80/100). Catches novel PII patterns including names, addresses, and identifiers that regex rules may miss — defense-in-depth behind the pii_detected boolean.")
47
+ @severity("critical")
48
+ @tags("profile,advanced-detection,pii,ml-classifier,privacy,compliance")
49
+ @reject_message("Content blocked: the ML classifier detected personally identifiable information with high confidence. Even if specific PII types aren't identified, the content appears to contain personal data.")
50
+ forbid (
51
+ principal,
52
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool"],
53
+ resource
54
+ )
55
+ when {
56
+ context has pii_confidence && context.pii_confidence >= 80
57
+ };
58
+
59
+ // Block file operations containing PII
60
+ @id("detection-block-pii-file-ops")
61
+ @name("Block file operations with PII")
62
+ @description("Block file reads and writes when PII is detected. Prevents agents from reading files containing personal data and from writing PII to new files where it could persist or be version-controlled.")
63
+ @severity("high")
64
+ @tags("profile,advanced-detection,pii,file-ops,data-protection,gdpr-art-32")
65
+ @reject_message("File operation blocked: personally identifiable information was detected. Files containing PII must not be read or written through AI agents.")
66
+ forbid (
67
+ principal,
68
+ action in [Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
69
+ resource
70
+ )
71
+ when {
72
+ context has pii_detected && context.pii_detected == true
73
+ };
@@ -0,0 +1,66 @@
1
+ // =============================================================================
2
+ // Advanced Detection — Granular Secrets
3
+ // =============================================================================
4
+ // Blocks specific high-risk credential types and API tokens using granular
5
+ // secret_types matching. Goes beyond the boolean contains_secrets detection
6
+ // to identify and block cloud provider keys, GitHub tokens, SSH keys,
7
+ // database credentials, and API tokens.
8
+ //
9
+ // These policies benefit any Guardrails deployment — not just coding agents.
10
+ //
11
+ // Adapted from Overwatch granular secret type policies for Guardrails namespace.
12
+ //
13
+ // Compliance:
14
+ // NIST 800-53 IA-5 (Authenticator Management)
15
+ // NIST 800-53 SC-28 (Protection of Information at Rest)
16
+ // MITRE ATT&CK T1552 (Unsecured Credentials)
17
+ // CIS Benchmark 1.4 (Secrets Management)
18
+ //
19
+ // Category: security
20
+ // Namespace: Guardrails
21
+ // =============================================================================
22
+
23
+ // Block high-risk credential types across all actions
24
+ @id("detection-block-high-risk-secret-types")
25
+ @name("Block high-risk credential types")
26
+ @description("Block content containing cloud provider keys (AWS, GCP, Azure), GitHub tokens, SSH private keys, or database connection strings. These credential types pose the highest exfiltration risk and must never pass through AI agents.")
27
+ @severity("critical")
28
+ @tags("profile,advanced-detection,secrets,aws,gcp,azure,github,ssh,database,nist-ia-5,mitre-t1552")
29
+ @reject_message("Content blocked: high-risk credentials detected (cloud provider keys, GitHub tokens, SSH keys, or database credentials). Use a secrets manager — never pass credentials through AI agents.")
30
+ forbid (
31
+ principal,
32
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
33
+ resource
34
+ )
35
+ when {
36
+ context has secret_types &&
37
+ (context.secret_types.contains("aws_access_key") ||
38
+ context.secret_types.contains("aws_secret_key") ||
39
+ context.secret_types.contains("gcp_service_account") ||
40
+ context.secret_types.contains("azure_client_secret") ||
41
+ context.secret_types.contains("github_token") ||
42
+ context.secret_types.contains("github_pat") ||
43
+ context.secret_types.contains("ssh_private_key") ||
44
+ context.secret_types.contains("database_url"))
45
+ };
46
+
47
+ // Block API keys and bearer tokens across all actions
48
+ @id("detection-block-api-keys")
49
+ @name("Block API keys and bearer tokens")
50
+ @description("Block content containing generic API keys, bearer tokens, JWT tokens, and OAuth credentials. These are the most commonly leaked credential types in AI agent interactions.")
51
+ @severity("high")
52
+ @tags("profile,advanced-detection,secrets,api-key,bearer,jwt,oauth,nist-ia-5")
53
+ @reject_message("Content blocked: API keys, bearer tokens, or OAuth credentials detected. These must never be passed through AI agent prompts or tool calls.")
54
+ forbid (
55
+ principal,
56
+ action in [Guardrails::Action::"process_prompt", Guardrails::Action::"call_tool", Guardrails::Action::"read_file", Guardrails::Action::"write_file"],
57
+ resource
58
+ )
59
+ when {
60
+ context has secret_types &&
61
+ (context.secret_types.contains("api_key") ||
62
+ context.secret_types.contains("bearer_token") ||
63
+ context.secret_types.contains("jwt_token") ||
64
+ context.secret_types.contains("oauth_token") ||
65
+ context.secret_types.contains("oauth_secret"))
66
+ };
@@ -0,0 +1,35 @@
1
+ // =============================================================================
2
+ // Advanced Detection — Threat Severity
3
+ // =============================================================================
4
+ // Severity-based catch-all policy that blocks any content flagged as critical
5
+ // severity by detection engines. Acts as a safety net behind all other policies
6
+ // — if any detector reports critical severity, the content is blocked regardless
7
+ // of whether a specific category policy caught it.
8
+ //
9
+ // This policy benefits any Guardrails deployment — not just coding agents.
10
+ //
11
+ // Adapted from Overwatch threat severity aggregation for Guardrails namespace.
12
+ //
13
+ // Compliance:
14
+ // NIST 800-53 SI-3 (Malicious Code Protection)
15
+ // NIST 800-53 SI-4 (Information System Monitoring)
16
+ //
17
+ // Category: security
18
+ // Namespace: Guardrails
19
+ // =============================================================================
20
+
21
+ // Block any content with critical severity threats
22
+ @id("detection-block-critical-severity")
23
+ @name("Block critical severity threats")
24
+ @description("Block all content when any detection engine reports critical severity. This is the ultimate catch-all — critical threats are blocked regardless of type or source. Acts as a safety net behind all other policies.")
25
+ @severity("critical")
26
+ @tags("profile,advanced-detection,severity,critical,catch-all,nist-si-3")
27
+ @reject_message("Your content was blocked because security scanners detected a critical-severity threat. This content cannot be processed.")
28
+ forbid (
29
+ principal,
30
+ action == Guardrails::Action::"process_prompt",
31
+ resource
32
+ )
33
+ when {
34
+ context has highest_severity && context.highest_severity == "critical"
35
+ };
@@ -0,0 +1,55 @@
1
+ // =============================================================================
2
+ // Code Agent — Encoding & Unicode Attack Protection
3
+ // =============================================================================
4
+ // Blocks invisible Unicode characters in tool arguments and file writes to
5
+ // prevent encoding-based prompt injection and persistent invisible payloads.
6
+ //
7
+ // Tool arguments and file content should be plain text — invisible characters
8
+ // (zero-width joiners, bidirectional overrides, tag characters) indicate
9
+ // payload injection or encoding evasion attempts.
10
+ //
11
+ // Adapted from Overwatch encoding attack policies for Guardrails namespace.
12
+ //
13
+ // Ref: EchoLeak CVE-2025-32711 (invisible prompt injection via Unicode)
14
+ // Rules File Backdoor (Pillar Security, March 2025)
15
+ //
16
+ // Compliance:
17
+ // OWASP LLM01 (Prompt Injection) — encoding evasion
18
+ // OWASP ASI01 (Agent Goal Hijack) — hidden instructions
19
+ // NIST 800-53 SI-10 (Information Input Validation)
20
+ //
21
+ // Category: security
22
+ // Namespace: Guardrails
23
+ // =============================================================================
24
+
25
+ // Block tool calls with invisible characters in arguments
26
+ @id("code-block-invisible-tool-args")
27
+ @name("Block invisible characters in tool calls")
28
+ @description("Block tool execution when invisible Unicode characters are detected in tool arguments or content. Tool arguments should be plain text/JSON — invisible characters in tool calls are almost certainly malicious payload injection.")
29
+ @severity("critical")
30
+ @tags("profile,code-agent,encoding,unicode,invisible-chars,tools,owasp-asi01")
31
+ @reject_message("Tool execution blocked: invisible Unicode characters detected in tool arguments. Tool calls should contain only plain text — invisible characters indicate payload injection or encoding evasion.")
32
+ forbid (
33
+ principal,
34
+ action == Guardrails::Action::"call_tool",
35
+ resource
36
+ )
37
+ when {
38
+ context has contains_invisible_chars && context.contains_invisible_chars == true
39
+ };
40
+
41
+ // Block file writes with invisible characters
42
+ @id("code-block-invisible-file-write")
43
+ @name("Block invisible characters in file writes")
44
+ @description("Block file writes when invisible Unicode characters are detected. Prevents persistence of invisible payloads in source code, config files, or documentation where they could later be processed by AI agents.")
45
+ @severity("high")
46
+ @tags("profile,code-agent,encoding,unicode,invisible-chars,file-write,owasp-asi01")
47
+ @reject_message("File write blocked: invisible Unicode characters detected in content. Writing invisible characters to files can create persistent backdoors that affect AI agents processing those files later.")
48
+ forbid (
49
+ principal,
50
+ action == Guardrails::Action::"write_file",
51
+ resource
52
+ )
53
+ when {
54
+ context has contains_invisible_chars && context.contains_invisible_chars == true
55
+ };
@@ -0,0 +1,148 @@
1
+ // =============================================================================
2
+ // Code Agent — Path Security
3
+ // =============================================================================
4
+ // Blocks access to sensitive file paths including environment files, credential
5
+ // files, system directories, and credential directories. Also blocks destructive
6
+ // file operations (delete, rmdir, unlink) by default.
7
+ //
8
+ // Adapted from Overwatch IDE security policies for Guardrails namespace.
9
+ //
10
+ // Compliance:
11
+ // NIST 800-53 AC-6 (Least Privilege)
12
+ // NIST 800-53 SC-28 (Protection of Information at Rest)
13
+ // MITRE ATT&CK T1552 (Unsecured Credentials)
14
+ // MITRE ATT&CK T1005 (Data from Local System)
15
+ // CIS Benchmark 1.4 (Secrets Management)
16
+ //
17
+ // Category: security
18
+ // Namespace: Guardrails
19
+ // =============================================================================
20
+
21
+ // ---------------------------------------------------------------------------
22
+ // Section 1: Environment File Protection
23
+ // Environment files are the #1 source of accidental credential exposure.
24
+ // ---------------------------------------------------------------------------
25
+
26
+ @id("code-block-env-files")
27
+ @name("Block .env file access")
28
+ @description("Block access to .env files that commonly contain secrets, API keys, and database credentials. Environment files are the #1 source of accidental credential exposure in development workflows.")
29
+ @severity("high")
30
+ @tags("profile,code-agent,path-security,env-files,secrets,nist-sc-28,mitre-t1552")
31
+ @reject_message("Access to .env files is blocked because they commonly contain secrets, API keys, and database credentials. Use a secrets manager instead of .env files.")
32
+ forbid (
33
+ principal,
34
+ action in [Guardrails::Action::"read_file", Guardrails::Action::"write_file", Guardrails::Action::"call_tool"],
35
+ resource
36
+ )
37
+ when {
38
+ context has path && context.path like "*.env*"
39
+ };
40
+
41
+ // ---------------------------------------------------------------------------
42
+ // Section 2: Credential File Protection
43
+ // Blocks access to common credential and configuration files.
44
+ // ---------------------------------------------------------------------------
45
+
46
+ @id("code-block-credential-files")
47
+ @name("Block credential file access")
48
+ @description("Block access to common credential files: .netrc, .npmrc, .pypirc, Docker config, Kubernetes config, cloud provider credentials, and service account files.")
49
+ @severity("high")
50
+ @tags("profile,code-agent,path-security,credential-files,secrets,nist-sc-28,mitre-t1555")
51
+ @reject_message("Access to this credential file is blocked. Files like .netrc, .npmrc, .pypirc, and cloud provider config files commonly contain hardcoded credentials.")
52
+ forbid (
53
+ principal,
54
+ action in [Guardrails::Action::"read_file", Guardrails::Action::"write_file", Guardrails::Action::"call_tool"],
55
+ resource
56
+ )
57
+ when {
58
+ context has path &&
59
+ (context.path like "*/.netrc" ||
60
+ context.path like "*/.npmrc" ||
61
+ context.path like "*/.pypirc" ||
62
+ context.path like "*/.docker/config.json" ||
63
+ context.path like "*/.kube/config" ||
64
+ context.path like "*/.config/gcloud/*" ||
65
+ context.path like "*/credentials.json" ||
66
+ context.path like "*/service-account*.json")
67
+ };
68
+
69
+ // ---------------------------------------------------------------------------
70
+ // Section 3: System Directory Protection
71
+ // Blocks access to sensitive system directories.
72
+ // ---------------------------------------------------------------------------
73
+
74
+ @id("code-block-system-paths")
75
+ @name("Block system directory access")
76
+ @description("Prevent access to sensitive system directories (/etc, /proc, /sys, /root, /var). These directories contain system configuration, process information, and credentials that agents must never access.")
77
+ @severity("high")
78
+ @tags("profile,code-agent,path-security,system-paths,nist-ac-6,mitre-t1005")
79
+ @reject_message("Access blocked: this path targets a sensitive system directory. AI agents are restricted from accessing /etc, /proc, /sys, /root, and /var directories.")
80
+ forbid (
81
+ principal,
82
+ action in [Guardrails::Action::"read_file", Guardrails::Action::"write_file", Guardrails::Action::"call_tool"],
83
+ resource
84
+ )
85
+ when {
86
+ context has path &&
87
+ (context.path like "/etc/*" ||
88
+ context.path like "/proc/*" ||
89
+ context.path like "/sys/*" ||
90
+ context.path like "/root/*" ||
91
+ context.path like "/var/log/*" ||
92
+ context.path like "/var/run/*")
93
+ };
94
+
95
+ // ---------------------------------------------------------------------------
96
+ // Section 4: Credential Directory Protection
97
+ // Blocks access to SSH keys, cloud credentials, and key material.
98
+ // ---------------------------------------------------------------------------
99
+
100
+ @id("code-block-credential-paths")
101
+ @name("Block credential directory access")
102
+ @description("Prevent access to SSH keys, cloud provider credentials, GPG keys, and other authentication material directories. These are primary targets for credential theft (MITRE T1552).")
103
+ @severity("critical")
104
+ @tags("profile,code-agent,path-security,credentials,ssh,aws,mitre-t1552")
105
+ @reject_message("Access blocked: this path targets a credential or key directory (.ssh, .aws, .gnupg, .config/gcloud). AI agents must never access authentication material.")
106
+ forbid (
107
+ principal,
108
+ action in [Guardrails::Action::"read_file", Guardrails::Action::"write_file", Guardrails::Action::"call_tool"],
109
+ resource
110
+ )
111
+ when {
112
+ context has path &&
113
+ (context.path like "*/.ssh/*" ||
114
+ context.path like "*/.aws/*" ||
115
+ context.path like "*/.gnupg/*" ||
116
+ context.path like "*/.config/gcloud/*" ||
117
+ context.path like "*/.azure/*" ||
118
+ context.path like "*.pem" ||
119
+ context.path like "*/id_rsa*" ||
120
+ context.path like "*/id_ed25519*" ||
121
+ context.path like "*/id_ecdsa*")
122
+ };
123
+
124
+ // ---------------------------------------------------------------------------
125
+ // Section 5: Destructive File Operations
126
+ // Blocks destructive file operations by default.
127
+ // ---------------------------------------------------------------------------
128
+
129
+ @id("code-block-destructive-ops")
130
+ @name("Block destructive file operations")
131
+ @description("Block file deletion, directory removal, and other destructive operations. Agents should not have delete access by default — destructive operations require explicit human approval.")
132
+ @severity("high")
133
+ @tags("profile,code-agent,path-security,destructive,file-ops,nist-ac-3")
134
+ @reject_message("Tool execution was blocked: destructive file operations (delete, rmdir, unlink) are restricted to prevent data loss. Request explicit human approval for destructive actions.")
135
+ forbid (
136
+ principal,
137
+ action == Guardrails::Action::"call_tool",
138
+ resource
139
+ )
140
+ when {
141
+ context has tool_name &&
142
+ (context.tool_name == "fs.delete" ||
143
+ context.tool_name == "fs.rmdir" ||
144
+ context.tool_name == "fs.unlink" ||
145
+ context.tool_name == "fs.remove" ||
146
+ context.tool_name == "delete_file" ||
147
+ context.tool_name == "remove_directory")
148
+ };