@highflame/policy 2.1.7 → 2.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/_schemas/guardrails/context.json +78 -0
- package/_schemas/guardrails/schema.cedarschema +28 -1
- package/_schemas/guardrails/templates/profiles/a2a_security/cross_origin.cedar +105 -0
- package/_schemas/guardrails/templates/profiles/a2a_security/escalation_detection.cedar +113 -0
- package/_schemas/guardrails/templates/profiles/a2a_security/identity_enforcement.cedar +118 -0
- package/_schemas/guardrails/templates/profiles/a2a_security/inter_agent_injection.cedar +134 -0
- package/_schemas/guardrails/templates/profiles/a2a_security/supply_chain.cedar +117 -0
- package/_schemas/guardrails/templates/profiles/advanced_detection/pii.cedar +73 -0
- package/_schemas/guardrails/templates/profiles/advanced_detection/secrets.cedar +66 -0
- package/_schemas/guardrails/templates/profiles/advanced_detection/threat_severity.cedar +35 -0
- package/_schemas/guardrails/templates/profiles/code_agent/encoding.cedar +55 -0
- package/_schemas/guardrails/templates/profiles/code_agent/path_security.cedar +148 -0
- package/_schemas/guardrails/templates/profiles/code_agent/supply_chain.cedar +120 -0
- package/_schemas/guardrails/templates/templates.json +121 -6
- package/_schemas/overwatch/schema.cedarschema +6 -0
- package/dist/guardrails-context.gen.d.ts +5 -0
- package/dist/guardrails-context.gen.js +5 -0
- package/dist/guardrails-defaults.gen.js +1315 -6
- package/dist/overwatch-defaults.gen.d.ts +1 -1
- package/dist/overwatch-defaults.gen.js +508 -1206
- package/dist/service-schemas.gen.d.ts +2 -2
- package/dist/service-schemas.gen.js +47 -1
- package/package.json +1 -1
|
@@ -3,13 +3,13 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Full Cedar schema for guardrails, embedded at codegen time.
|
|
5
5
|
*/
|
|
6
|
-
export declare const GUARDRAILS_SCHEMA = "// =============================================================================\n// Guardrails Cedar Schema\n// =============================================================================\n// Defines entity types, actions, and context attributes for the highflame-shield\n// guardrails service. This schema enables type-safe policy authoring and\n// validation in both Studio UI and backend.\n//\n// Service: highflame-shield (guardrails)\n// Namespace: Guardrails\n// =============================================================================\n\nnamespace Guardrails {\n // =========================================================================\n // Entity Types \u2014 ReBAC Hierarchy\n // =========================================================================\n // Entity hierarchy enables Cedar's `in` operator for policy scoping:\n // Account (org root)\n // \u2514\u2500\u2500 Project in [Account]\n // \u2514\u2500\u2500 App in [Project]\n // \u2514\u2500\u2500 Session in [App]\n //\n // Policy scoping examples:\n // resource == Guardrails::App::\"<uuid>\" \u2192 app-scoped\n // resource in Guardrails::Project::\"<uuid>\" \u2192 project-wide\n // resource in Guardrails::Account::\"<uuid>\" \u2192 org-wide\n // =========================================================================\n\n /// Account represents an organization (top-level tenant)\n entity Account;\n\n /// Project represents a project within an account\n entity Project in [Account];\n\n /// User represents a principal (human or service) making requests\n entity User;\n\n /// Agent represents an AI agent (Claude, Cursor, Copilot, etc.) making requests\n entity Agent;\n\n /// App represents a protected application (guardrails-enabled LLM app)\n entity App in [Project];\n\n /// Session represents an agentic conversation session with state tracking\n entity Session in [App];\n\n // =========================================================================\n // Actions\n // =========================================================================\n\n /// Process user prompts and AI responses for security threats and content violations\n action \"process_prompt\" appliesTo {\n principal: [User, Agent],\n resource: [App, Session],\n context: ProcessPromptContext\n };\n\n /// Execute tool calls (shell, file operations, MCP tools)\n action \"call_tool\" appliesTo {\n principal: [User, Agent],\n resource: [Session],\n context: CallToolContext\n };\n\n /// Read file operations\n action \"read_file\" appliesTo {\n principal: [User, Agent],\n resource: [Session],\n context: FileReadContext\n };\n\n /// Write file operations\n action \"write_file\" appliesTo {\n principal: [User, Agent],\n resource: [Session],\n context: FileWriteContext\n };\n\n /// Connect to an MCP server\n action \"connect_server\" appliesTo {\n principal: [User, Agent],\n resource: [Session],\n context: ConnectServerContext\n };\n\n // =========================================================================\n // Context Types (Action-Specific)\n // =========================================================================\n\n /// Context for process_prompt action (user prompts & AI responses)\n type ProcessPromptContext = {\n // Core metadata (required)\n \"request_id\": String,\n \"timestamp\": Long,\n \"direction\": String, // \"input\" | \"output\"\n \"content_type\": String, // \"prompt\" | \"response\" | \"tool_call\" | \"file\"\n \"detector_count\": Long,\n\n // Security - Injection & Jailbreak (optional)\n \"injection_confidence\"?: Long, // Combined injection confidence: MAX(pulse, deep_context)\n \"jailbreak_confidence\"?: Long, // Combined jailbreak confidence: MAX(pulse, deep_context)\n \"injection_pulse_score\"?: Long, // 0-100 Pulse single-turn classifier\n \"injection_deep_context_score\"?: Long, // 0-100 DeepContext multi-turn\n \"jailbreak_pulse_score\"?: Long, // 0-100 Pulse single-turn classifier\n \"jailbreak_deep_context_score\"?: Long, // 0-100 DeepContext multi-turn\n \"injection_type\"?: String, // \"prompt\" | \"sql\" | \"command\" | \"none\"\n\n // Privacy - Secrets (optional)\n \"contains_secrets\"?: Bool,\n \"secret_count\"?: Long,\n \"secret_types\"?: Set<String>, // [\"aws_access_key\", \"github_token\", ...]\n\n // Privacy - PII (optional)\n \"pii_detected\"?: Bool,\n \"pii_count\"?: Long,\n \"pii_types\"?: Set<String>, // [\"email\", \"phone\", \"ssn\", \"credit_card\", ...]\n\n // Trust & Safety - Toxicity (optional)\n \"violence_score\"?: Long, // 0-100\n \"hate_speech_score\"?: Long, // 0-100\n \"sexual_score\"?: Long, // 0-100\n \"weapons_score\"?: Long, // 0-100\n \"crime_score\"?: Long, // 0-100\n \"profanity_score\"?: Long, // 0-100\n\n // Semantic - Topic Classification (optional)\n \"content_topics\"?: Set<String>, // [\"controlled_substances\", \"weapons_manufacturing\", ...]\n \"topic_confidence\"?: Long, // 0-100\n\n // Security - Invisible Character Detection (optional)\n \"contains_invisible_chars\"?: Bool,\n \"invisible_chars_score\"?: Long, // 0-100\n\n // Security - Pattern Detection (optional)\n \"command_injection_detected\"?: Bool,\n \"command_injection_type\"?: String, // \"reverse_shell\" | \"privilege_escalation\" | \"code_execution\" | \"destructive_command\" | \"data_exfiltration\"\n \"command_injection_score\"?: Long, // 0-100\n \"path_traversal_detected\"?: Bool,\n \"path_traversal_severity\"?: String, // \"critical\" | \"high\" | \"medium\" | \"low\" | \"none\"\n \"path_traversal_type\"?: String,\n \"sql_injection_detected\"?: Bool,\n \"sql_injection_type\"?: String, // \"tautology\" | \"union_based\" | \"destructive\" | \"blind\" | \"error_based\"\n \"sql_injection_score\"?: Long, // 0-100\n\n // Security - Cross-Origin Escalation (optional)\n \"cross_origin_detected\"?: Bool,\n \"cross_origin_type\"?: String, // \"cross_origin_tool\" | \"cross_origin_server\" | \"none\"\n \"cross_origin_score\"?: Long, // 0-100\n\n // Security - Encoded Injection (optional)\n \"encoded_content_detected\"?: Bool,\n \"encoded_types\"?: Set<String>, // [\"base64\", \"hex\", \"unicode\", \"url\", ...]\n \"encoded_count\"?: Long,\n \"encoded_score\"?: Long, // 0-100\n\n // Language & Script Detection (optional)\n \"detected_language\"?: String, // ISO language code\n \"is_english\"?: Bool,\n \"language_confidence\"?: Long, // 0-100\n \"detected_script\"?: String, // \"latin\" | \"cyrillic\" | \"arabic\" | \"unknown\" | ...\n \"is_latin_script\"?: Bool,\n \"script_confidence\"?: Long, // 0-100\n\n // Content Analysis (optional)\n \"hallucination_score\"?: Long,\n \"factuality_score\"?: Long, // 0-100\n \"sentiment_score\"?: Long,\n \"contains_code\"?: Bool,\n \"code_languages\"?: Set<String>,\n \"code_ratio\"?: Long, // 0-100, percentage of content that is code\n \"keyword_matched\"?: Bool,\n \"keyword_categories\"?: Set<String>,\n \"keyword_count\"?: Long,\n \"contains_non_ascii\"?: Bool,\n \"phishing_detected\"?: Bool,\n \"content_safety_score\"?: Long, // 0-100\n \"content_safety_blocked\"?: Bool,\n\n // Agentic - Multi-Turn Context (optional)\n \"conversation_turn\"?: Long,\n \"multi_turn_detection\"?: Bool,\n\n // Session Detection History \u2014 cross-turn sticky flags (optional)\n \"session_pii_detected\"?: Bool,\n \"session_pii_types\"?: Set<String>,\n \"session_secrets_detected\"?: Bool,\n \"session_secret_types\"?: Set<String>,\n \"session_injection_detected\"?: Bool,\n \"session_command_injection\"?: Bool,\n \"session_threat_turns\"?: Long,\n \"session_max_injection_score\"?: Long,\n \"session_max_jailbreak_score\"?: Long,\n \"session_max_command_injection_score\"?: Long,\n \"session_max_pii_score\"?: Long,\n \"session_max_secret_score\"?: Long,\n \"session_cumulative_risk_score\"?: Long,\n\n // Agent Identity \u2014 authenticated agent principal metadata (optional)\n // Present when the request is made by an AI agent (API key or JWT with agent claims).\n // Empty strings for human user requests. Use these to write agent-specific policies.\n \"agent_id\"?: String, // Unique agent identifier (e.g., \"agent_research_v3\")\n \"agent_type\"?: String, // \"orchestrator\" | \"autonomous\" | \"tool_agent\" | \"human_proxy\"\n \"agent_trust_level\"?: String, // \"first_party\" | \"verified_third_party\" | \"unverified\"\n \"agent_framework\"?: String, // Agent framework (e.g., \"claude-code\", \"langchain\", \"crewai\")\n \"agent_publisher\"?: String, // Organization that published the agent\n\n };\n\n /// Context for call_tool action (agentic tool execution)\n type CallToolContext = {\n // Core metadata (required)\n \"request_id\": String,\n \"timestamp\": Long,\n\n // Tool Risk (optional)\n \"tool_name\"?: String, // \"shell\", \"write_file\", \"http_post\", etc.\n \"tool_risk_score\"?: Long, // 0-100\n \"tool_is_sensitive\"?: Bool,\n \"tool_category\"?: String, // \"safe\" | \"sensitive\" | \"dangerous\"\n \"tool_is_builtin\"?: Bool,\n\n // MCP context (optional \u2014 only present for MCP tool calls)\n \"mcp_server\"?: String, // MCP server name (e.g., \"github\", \"filesystem\")\n \"mcp_tool\"?: String, // MCP tool name within the server\n \"mcp_server_verified\"?: Bool, // Whether server is from verified registry\n\n // Agentic - Behavioral Patterns (optional)\n \"suspicious_pattern\"?: Bool,\n \"pattern_type\"?: String, // \"data_exfiltration\" | \"secret_exfiltration\" | \"db_exfiltration\" | \"none\"\n \"sequence_risk\"?: Long, // 0-100\n\n // Agentic - Loop Detection (optional)\n \"loop_detected\"?: Bool,\n \"loop_count\"?: Long,\n \"loop_tool\"?: String,\n\n // Agentic - Budget Control (optional)\n \"budget_remaining_pct\"?: Long, // 0-100\n \"budget_exceeded\"?: Bool,\n\n // Semantic - Topic Classification (optional)\n \"content_topics\"?: Set<String>, // [\"controlled_substances\", \"weapons_manufacturing\", ...]\n \"topic_confidence\"?: Long, // 0-100\n\n // Security checks on tool arguments (optional)\n \"contains_secrets\"?: Bool,\n \"secret_count\"?: Long,\n \"secret_types\"?: Set<String>,\n \"pii_detected\"?: Bool,\n \"pii_types\"?: Set<String>,\n \"injection_confidence\"?: Long,\n \"injection_pulse_score\"?: Long, // 0-100 Pulse single-turn classifier\n \"injection_deep_context_score\"?: Long, // 0-100 DeepContext multi-turn\n\n // Security - Pattern Detection (optional)\n \"command_injection_detected\"?: Bool,\n \"command_injection_type\"?: String,\n \"command_injection_score\"?: Long, // 0-100\n \"path_traversal_detected\"?: Bool,\n \"path_traversal_severity\"?: String,\n \"path_traversal_type\"?: String,\n \"sql_injection_detected\"?: Bool,\n \"sql_injection_type\"?: String,\n \"sql_injection_score\"?: Long, // 0-100\n\n // Security - Cross-Origin Escalation (optional)\n \"cross_origin_detected\"?: Bool,\n \"cross_origin_type\"?: String,\n \"cross_origin_score\"?: Long, // 0-100\n\n // Security - Encoded Injection (optional)\n \"encoded_content_detected\"?: Bool,\n \"encoded_types\"?: Set<String>,\n \"encoded_count\"?: Long,\n \"encoded_score\"?: Long, // 0-100\n\n // Agentic - Agent Security (optional)\n \"tool_poisoning_detected\"?: Bool,\n \"tool_poisoning_score\"?: Long, // 0-100\n \"tool_poisoning_type\"?: String, // \"hidden_instructions\" | \"system_prompt_injection\" | \"authority_hijack\"\n \"rug_pull_detected\"?: Bool,\n \"rug_pull_score\"?: Long, // 0-100\n \"rug_pull_type\"?: String, // \"risk_spike\" | \"pattern_change\" | \"combined\" | \"none\"\n\n // Agentic - MCP Risk (optional)\n \"mcp_config_risk\"?: Bool,\n \"mcp_risk_type\"?: String, // \"inline_execution\" | \"suspicious_url\" | \"cross_origin\"\n \"mcp_risk_score\"?: Long, // 0-100\n\n // Agentic - Multi-Turn Context (optional)\n \"conversation_turn\"?: Long,\n \"multi_turn_detection\"?: Bool,\n\n // Session Detection History \u2014 cross-turn sticky flags (optional)\n \"session_pii_detected\"?: Bool,\n \"session_pii_types\"?: Set<String>,\n \"session_secrets_detected\"?: Bool,\n \"session_secret_types\"?: Set<String>,\n \"session_injection_detected\"?: Bool,\n \"session_command_injection\"?: Bool,\n \"session_threat_turns\"?: Long,\n \"session_max_injection_score\"?: Long,\n \"session_max_jailbreak_score\"?: Long,\n \"session_max_command_injection_score\"?: Long,\n \"session_max_pii_score\"?: Long,\n \"session_max_secret_score\"?: Long,\n \"session_cumulative_risk_score\"?: Long,\n\n // Agent Identity \u2014 authenticated agent principal metadata (optional)\n \"agent_id\"?: String,\n \"agent_type\"?: String,\n \"agent_trust_level\"?: String,\n \"agent_framework\"?: String,\n \"agent_publisher\"?: String,\n\n };\n\n /// Context for read_file action\n type FileReadContext = {\n // Core metadata (required)\n \"request_id\": String,\n \"timestamp\": Long,\n\n // Security checks on file content (optional)\n \"contains_secrets\"?: Bool,\n \"secret_count\"?: Long,\n \"secret_types\"?: Set<String>,\n \"pii_detected\"?: Bool,\n \"pii_types\"?: Set<String>,\n\n // Security - Path Traversal (optional)\n \"path_traversal_detected\"?: Bool,\n \"path_traversal_severity\"?: String,\n \"path_traversal_type\"?: String,\n\n // Session Detection History \u2014 cross-turn sticky flags (optional)\n \"session_pii_detected\"?: Bool,\n \"session_pii_types\"?: Set<String>,\n \"session_secrets_detected\"?: Bool,\n \"session_secret_types\"?: Set<String>,\n \"session_injection_detected\"?: Bool,\n \"session_command_injection\"?: Bool,\n \"session_threat_turns\"?: Long,\n \"session_max_injection_score\"?: Long,\n \"session_max_jailbreak_score\"?: Long,\n \"session_max_command_injection_score\"?: Long,\n \"session_max_pii_score\"?: Long,\n \"session_max_secret_score\"?: Long,\n \"session_cumulative_risk_score\"?: Long,\n\n // Agent Identity \u2014 authenticated agent principal metadata (optional)\n \"agent_id\"?: String,\n \"agent_type\"?: String,\n \"agent_trust_level\"?: String,\n \"agent_framework\"?: String,\n \"agent_publisher\"?: String,\n\n };\n\n /// Context for write_file action\n type FileWriteContext = {\n // Core metadata (required)\n \"request_id\": String,\n \"timestamp\": Long,\n\n // Security checks on content being written (optional)\n \"contains_secrets\"?: Bool,\n \"secret_count\"?: Long,\n \"secret_types\"?: Set<String>,\n \"pii_detected\"?: Bool,\n \"pii_types\"?: Set<String>,\n\n // Security - Path Traversal (optional)\n \"path_traversal_detected\"?: Bool,\n \"path_traversal_severity\"?: String,\n \"path_traversal_type\"?: String,\n\n // Session Detection History \u2014 cross-turn sticky flags (optional)\n \"session_pii_detected\"?: Bool,\n \"session_pii_types\"?: Set<String>,\n \"session_secrets_detected\"?: Bool,\n \"session_secret_types\"?: Set<String>,\n \"session_injection_detected\"?: Bool,\n \"session_command_injection\"?: Bool,\n \"session_threat_turns\"?: Long,\n \"session_max_injection_score\"?: Long,\n \"session_max_jailbreak_score\"?: Long,\n \"session_max_command_injection_score\"?: Long,\n \"session_max_pii_score\"?: Long,\n \"session_max_secret_score\"?: Long,\n \"session_cumulative_risk_score\"?: Long,\n\n // Agent Identity \u2014 authenticated agent principal metadata (optional)\n \"agent_id\"?: String,\n \"agent_type\"?: String,\n \"agent_trust_level\"?: String,\n \"agent_framework\"?: String,\n \"agent_publisher\"?: String,\n\n };\n\n /// Context for connect_server action (MCP server connections)\n type ConnectServerContext = {\n // Core metadata (required)\n \"request_id\": String,\n \"timestamp\": Long,\n\n // MCP context (optional)\n \"mcp_server\"?: String, // MCP server name (e.g., \"github\", \"filesystem\")\n \"mcp_server_verified\"?: Bool, // Whether server is from verified registry\n\n // Agentic - Agent Security (optional)\n \"tool_poisoning_detected\"?: Bool,\n \"tool_poisoning_score\"?: Long,\n \"tool_poisoning_type\"?: String,\n\n // Agentic - MCP Risk (optional)\n \"mcp_config_risk\"?: Bool,\n \"mcp_risk_type\"?: String,\n \"mcp_risk_score\"?: Long,\n\n // Security - Cross-Origin Escalation (optional)\n \"cross_origin_detected\"?: Bool,\n \"cross_origin_type\"?: String,\n \"cross_origin_score\"?: Long,\n\n // Session Detection History \u2014 cross-turn sticky flags (optional)\n \"session_pii_detected\"?: Bool,\n \"session_pii_types\"?: Set<String>,\n \"session_secrets_detected\"?: Bool,\n \"session_secret_types\"?: Set<String>,\n \"session_injection_detected\"?: Bool,\n \"session_command_injection\"?: Bool,\n \"session_threat_turns\"?: Long,\n \"session_max_injection_score\"?: Long,\n \"session_max_jailbreak_score\"?: Long,\n \"session_max_command_injection_score\"?: Long,\n \"session_max_pii_score\"?: Long,\n \"session_max_secret_score\"?: Long,\n \"session_cumulative_risk_score\"?: Long,\n\n // Agent Identity \u2014 authenticated agent principal metadata (optional)\n \"agent_id\"?: String,\n \"agent_type\"?: String,\n \"agent_trust_level\"?: String,\n \"agent_framework\"?: String,\n \"agent_publisher\"?: String,\n\n };\n}\n";
|
|
6
|
+
export declare const GUARDRAILS_SCHEMA = "// =============================================================================\n// Guardrails Cedar Schema\n// =============================================================================\n// Defines entity types, actions, and context attributes for the highflame-shield\n// guardrails service. This schema enables type-safe policy authoring and\n// validation in both Studio UI and backend.\n//\n// Service: highflame-shield (guardrails)\n// Namespace: Guardrails\n// =============================================================================\n\nnamespace Guardrails {\n // =========================================================================\n // Entity Types \u2014 ReBAC Hierarchy\n // =========================================================================\n // Entity hierarchy enables Cedar's `in` operator for policy scoping:\n // Account (org root)\n // \u2514\u2500\u2500 Project in [Account]\n // \u2514\u2500\u2500 App in [Project]\n // \u2514\u2500\u2500 Session in [App]\n //\n // Policy scoping examples:\n // resource == Guardrails::App::\"<uuid>\" \u2192 app-scoped\n // resource in Guardrails::Project::\"<uuid>\" \u2192 project-wide\n // resource in Guardrails::Account::\"<uuid>\" \u2192 org-wide\n // =========================================================================\n\n /// Account represents an organization (top-level tenant)\n entity Account;\n\n /// Project represents a project within an account\n entity Project in [Account];\n\n /// User represents a principal (human or service) making requests\n entity User;\n\n /// Agent represents an AI agent (Claude, Cursor, Copilot, etc.) making requests\n entity Agent;\n\n /// App represents a protected application (guardrails-enabled LLM app)\n entity App in [Project];\n\n /// Session represents an agentic conversation session with state tracking\n entity Session in [App];\n\n // =========================================================================\n // Actions\n // =========================================================================\n\n /// Process user prompts and AI responses for security threats and content violations\n action \"process_prompt\" appliesTo {\n principal: [User, Agent],\n resource: [App, Session],\n context: ProcessPromptContext\n };\n\n /// Execute tool calls (shell, file operations, MCP tools)\n action \"call_tool\" appliesTo {\n principal: [User, Agent],\n resource: [Session],\n context: CallToolContext\n };\n\n /// Read file operations\n action \"read_file\" appliesTo {\n principal: [User, Agent],\n resource: [Session],\n context: FileReadContext\n };\n\n /// Write file operations\n action \"write_file\" appliesTo {\n principal: [User, Agent],\n resource: [Session],\n context: FileWriteContext\n };\n\n /// Connect to an MCP server\n action \"connect_server\" appliesTo {\n principal: [User, Agent],\n resource: [Session],\n context: ConnectServerContext\n };\n\n // =========================================================================\n // Context Types (Action-Specific)\n // =========================================================================\n\n /// Context for process_prompt action (user prompts & AI responses)\n type ProcessPromptContext = {\n // Core metadata (required)\n \"request_id\": String,\n \"timestamp\": Long,\n \"direction\": String, // \"input\" | \"output\"\n \"content_type\": String, // \"prompt\" | \"response\" | \"tool_call\" | \"file\"\n \"detector_count\": Long,\n\n // Security - Injection & Jailbreak (optional)\n \"injection_confidence\"?: Long, // Combined injection confidence: MAX(pulse, deep_context)\n \"jailbreak_confidence\"?: Long, // Combined jailbreak confidence: MAX(pulse, deep_context)\n \"injection_pulse_score\"?: Long, // 0-100 Pulse single-turn classifier\n \"injection_deep_context_score\"?: Long, // 0-100 DeepContext multi-turn\n \"jailbreak_pulse_score\"?: Long, // 0-100 Pulse single-turn classifier\n \"jailbreak_deep_context_score\"?: Long, // 0-100 DeepContext multi-turn\n \"injection_type\"?: String, // \"prompt\" | \"sql\" | \"command\" | \"none\"\n\n // Privacy - Secrets (optional)\n \"contains_secrets\"?: Bool,\n \"secret_count\"?: Long,\n \"secret_types\"?: Set<String>, // [\"aws_access_key\", \"github_token\", ...]\n\n // Privacy - PII (optional)\n \"pii_detected\"?: Bool,\n \"pii_count\"?: Long,\n \"pii_types\"?: Set<String>, // [\"email\", \"phone\", \"ssn\", \"credit_card\", ...]\n \"pii_confidence\"?: Long, // PII ML classifier confidence (0-100) \u2014 catches novel PII patterns that escape regex detection\n\n // Threat Severity Aggregation (optional)\n \"highest_severity\"?: String, // Highest severity across all detectors: \"critical\" | \"high\" | \"medium\" | \"low\" | \"none\"\n\n // Trust & Safety - Toxicity (optional)\n \"violence_score\"?: Long, // 0-100\n \"hate_speech_score\"?: Long, // 0-100\n \"sexual_score\"?: Long, // 0-100\n \"weapons_score\"?: Long, // 0-100\n \"crime_score\"?: Long, // 0-100\n \"profanity_score\"?: Long, // 0-100\n\n // Semantic - Topic Classification (optional)\n \"content_topics\"?: Set<String>, // [\"controlled_substances\", \"weapons_manufacturing\", ...]\n \"topic_confidence\"?: Long, // 0-100\n\n // Security - Invisible Character Detection (optional)\n \"contains_invisible_chars\"?: Bool,\n \"invisible_chars_score\"?: Long, // 0-100\n\n // Security - Pattern Detection (optional)\n \"command_injection_detected\"?: Bool,\n \"command_injection_type\"?: String, // \"reverse_shell\" | \"privilege_escalation\" | \"code_execution\" | \"destructive_command\" | \"data_exfiltration\"\n \"command_injection_score\"?: Long, // 0-100\n \"path_traversal_detected\"?: Bool,\n \"path_traversal_severity\"?: String, // \"critical\" | \"high\" | \"medium\" | \"low\" | \"none\"\n \"path_traversal_type\"?: String,\n \"sql_injection_detected\"?: Bool,\n \"sql_injection_type\"?: String, // \"tautology\" | \"union_based\" | \"destructive\" | \"blind\" | \"error_based\"\n \"sql_injection_score\"?: Long, // 0-100\n\n // Security - Cross-Origin Escalation (optional)\n \"cross_origin_detected\"?: Bool,\n \"cross_origin_type\"?: String, // \"cross_origin_tool\" | \"cross_origin_server\" | \"none\"\n \"cross_origin_score\"?: Long, // 0-100\n\n // Security - Encoded Injection (optional)\n \"encoded_content_detected\"?: Bool,\n \"encoded_types\"?: Set<String>, // [\"base64\", \"hex\", \"unicode\", \"url\", ...]\n \"encoded_count\"?: Long,\n \"encoded_score\"?: Long, // 0-100\n\n // Language & Script Detection (optional)\n \"detected_language\"?: String, // ISO language code\n \"is_english\"?: Bool,\n \"language_confidence\"?: Long, // 0-100\n \"detected_script\"?: String, // \"latin\" | \"cyrillic\" | \"arabic\" | \"unknown\" | ...\n \"is_latin_script\"?: Bool,\n \"script_confidence\"?: Long, // 0-100\n\n // Content Analysis (optional)\n \"hallucination_score\"?: Long,\n \"factuality_score\"?: Long, // 0-100\n \"sentiment_score\"?: Long,\n \"contains_code\"?: Bool,\n \"code_languages\"?: Set<String>,\n \"code_ratio\"?: Long, // 0-100, percentage of content that is code\n \"keyword_matched\"?: Bool,\n \"keyword_categories\"?: Set<String>,\n \"keyword_count\"?: Long,\n \"contains_non_ascii\"?: Bool,\n \"phishing_detected\"?: Bool,\n \"content_safety_score\"?: Long, // 0-100\n \"content_safety_blocked\"?: Bool,\n\n // Agentic - Multi-Turn Context (optional)\n \"conversation_turn\"?: Long,\n \"multi_turn_detection\"?: Bool,\n\n // Session Detection History \u2014 cross-turn sticky flags (optional)\n \"session_pii_detected\"?: Bool,\n \"session_pii_types\"?: Set<String>,\n \"session_secrets_detected\"?: Bool,\n \"session_secret_types\"?: Set<String>,\n \"session_injection_detected\"?: Bool,\n \"session_command_injection\"?: Bool,\n \"session_threat_turns\"?: Long,\n \"session_max_injection_score\"?: Long,\n \"session_max_jailbreak_score\"?: Long,\n \"session_max_command_injection_score\"?: Long,\n \"session_max_pii_score\"?: Long,\n \"session_max_secret_score\"?: Long,\n \"session_cumulative_risk_score\"?: Long,\n\n // Agent Identity \u2014 authenticated agent principal metadata (optional)\n // Present when the request is made by an AI agent (API key or JWT with agent claims).\n // Empty strings for human user requests. Use these to write agent-specific policies.\n \"agent_id\"?: String, // Unique agent identifier (e.g., \"agent_research_v3\")\n \"agent_type\"?: String, // \"orchestrator\" | \"autonomous\" | \"tool_agent\" | \"human_proxy\"\n \"agent_trust_level\"?: String, // \"first_party\" | \"verified_third_party\" | \"unverified\"\n \"agent_framework\"?: String, // Agent framework (e.g., \"claude-code\", \"langchain\", \"crewai\")\n \"agent_publisher\"?: String, // Organization that published the agent\n\n };\n\n /// Context for call_tool action (agentic tool execution)\n type CallToolContext = {\n // Core metadata (required)\n \"request_id\": String,\n \"timestamp\": Long,\n\n // Tool Risk (optional)\n \"tool_name\"?: String, // \"shell\", \"write_file\", \"http_post\", etc.\n \"tool_risk_score\"?: Long, // 0-100\n \"tool_is_sensitive\"?: Bool,\n \"tool_category\"?: String, // \"safe\" | \"sensitive\" | \"dangerous\"\n \"tool_is_builtin\"?: Bool,\n\n // MCP context (optional \u2014 only present for MCP tool calls)\n \"mcp_server\"?: String, // MCP server name (e.g., \"github\", \"filesystem\")\n \"mcp_tool\"?: String, // MCP tool name within the server\n \"mcp_server_verified\"?: Bool, // Whether server is from verified registry\n\n // Agentic - Behavioral Patterns (optional)\n \"suspicious_pattern\"?: Bool,\n \"pattern_type\"?: String, // \"data_exfiltration\" | \"secret_exfiltration\" | \"db_exfiltration\" | \"credential_theft\" | \"destructive_sequence\" | \"none\"\n \"sequence_risk\"?: Long, // 0-100\n\n // Agentic - Loop Detection (optional)\n \"loop_detected\"?: Bool,\n \"loop_count\"?: Long,\n \"loop_tool\"?: String,\n\n // Agentic - Budget Control (optional)\n \"budget_remaining_pct\"?: Long, // 0-100\n \"budget_exceeded\"?: Bool,\n\n // Semantic - Topic Classification (optional)\n \"content_topics\"?: Set<String>, // [\"controlled_substances\", \"weapons_manufacturing\", ...]\n \"topic_confidence\"?: Long, // 0-100\n\n // Security checks on tool arguments (optional)\n \"contains_secrets\"?: Bool,\n \"secret_count\"?: Long,\n \"secret_types\"?: Set<String>,\n \"pii_detected\"?: Bool,\n \"pii_types\"?: Set<String>,\n \"pii_count\"?: Long, // Number of PII pattern matches in tool content\n \"pii_confidence\"?: Long, // PII ML classifier confidence (0-100)\n \"injection_confidence\"?: Long,\n \"injection_pulse_score\"?: Long, // 0-100 Pulse single-turn classifier\n \"injection_deep_context_score\"?: Long, // 0-100 DeepContext multi-turn\n\n // Security - Pattern Detection (optional)\n \"command_injection_detected\"?: Bool,\n \"command_injection_type\"?: String,\n \"command_injection_score\"?: Long, // 0-100\n \"path_traversal_detected\"?: Bool,\n \"path_traversal_severity\"?: String,\n \"path_traversal_type\"?: String,\n \"sql_injection_detected\"?: Bool,\n \"sql_injection_type\"?: String,\n \"sql_injection_score\"?: Long, // 0-100\n\n // Security - Cross-Origin Escalation (optional)\n \"cross_origin_detected\"?: Bool,\n \"cross_origin_type\"?: String,\n \"cross_origin_score\"?: Long, // 0-100\n\n // File & Path (optional \u2014 for path-based access control policies)\n \"path\"?: String, // File path when tool operates on files\n\n // Security - Invisible Character Detection in tool args (optional)\n \"contains_invisible_chars\"?: Bool, // Whether invisible Unicode chars detected in tool args\n \"invisible_chars_score\"?: Long, // Invisible character attack severity (0-100)\n\n // Security - Encoded Injection (optional)\n \"encoded_content_detected\"?: Bool,\n \"encoded_types\"?: Set<String>,\n \"encoded_count\"?: Long,\n \"encoded_score\"?: Long, // 0-100\n\n // Agentic - Agent Security (optional)\n \"tool_poisoning_detected\"?: Bool,\n \"tool_poisoning_score\"?: Long, // 0-100\n \"tool_poisoning_type\"?: String, // \"hidden_instructions\" | \"system_prompt_injection\" | \"authority_hijack\"\n \"rug_pull_detected\"?: Bool,\n \"rug_pull_score\"?: Long, // 0-100\n \"rug_pull_type\"?: String, // \"risk_spike\" | \"pattern_change\" | \"combined\" | \"none\"\n\n // Agentic - Indirect Prompt Injection (optional \u2014 injection via tool outputs/retrieved content)\n \"indirect_injection_score\"?: Long, // Indirect injection risk score (0-100)\n \"indirect_injection_type\"?: String, // Type of indirect injection detected\n\n // Agentic - MCP Risk (optional)\n \"mcp_config_risk\"?: Bool,\n \"mcp_risk_type\"?: String, // \"inline_execution\" | \"suspicious_url\" | \"cross_origin\"\n \"mcp_risk_score\"?: Long, // 0-100\n\n // Agentic - Multi-Turn Context (optional)\n \"conversation_turn\"?: Long,\n \"multi_turn_detection\"?: Bool,\n\n // Session Detection History \u2014 cross-turn sticky flags (optional)\n \"session_pii_detected\"?: Bool,\n \"session_pii_types\"?: Set<String>,\n \"session_secrets_detected\"?: Bool,\n \"session_secret_types\"?: Set<String>,\n \"session_injection_detected\"?: Bool,\n \"session_command_injection\"?: Bool,\n \"session_threat_turns\"?: Long,\n \"session_max_injection_score\"?: Long,\n \"session_max_jailbreak_score\"?: Long,\n \"session_max_command_injection_score\"?: Long,\n \"session_max_pii_score\"?: Long,\n \"session_max_secret_score\"?: Long,\n \"session_cumulative_risk_score\"?: Long,\n\n // Agent Identity \u2014 authenticated agent principal metadata (optional)\n \"agent_id\"?: String,\n \"agent_type\"?: String,\n \"agent_trust_level\"?: String,\n \"agent_framework\"?: String,\n \"agent_publisher\"?: String,\n\n };\n\n /// Context for read_file action\n type FileReadContext = {\n // Core metadata (required)\n \"request_id\": String,\n \"timestamp\": Long,\n\n // File path (optional \u2014 for path-based access control policies)\n \"path\"?: String, // File path being read\n\n // Security checks on file content (optional)\n \"contains_secrets\"?: Bool,\n \"secret_count\"?: Long,\n \"secret_types\"?: Set<String>,\n \"pii_detected\"?: Bool,\n \"pii_types\"?: Set<String>,\n\n // Security - Path Traversal (optional)\n \"path_traversal_detected\"?: Bool,\n \"path_traversal_severity\"?: String,\n \"path_traversal_type\"?: String,\n\n // Session Detection History \u2014 cross-turn sticky flags (optional)\n \"session_pii_detected\"?: Bool,\n \"session_pii_types\"?: Set<String>,\n \"session_secrets_detected\"?: Bool,\n \"session_secret_types\"?: Set<String>,\n \"session_injection_detected\"?: Bool,\n \"session_command_injection\"?: Bool,\n \"session_threat_turns\"?: Long,\n \"session_max_injection_score\"?: Long,\n \"session_max_jailbreak_score\"?: Long,\n \"session_max_command_injection_score\"?: Long,\n \"session_max_pii_score\"?: Long,\n \"session_max_secret_score\"?: Long,\n \"session_cumulative_risk_score\"?: Long,\n\n // Agent Identity \u2014 authenticated agent principal metadata (optional)\n \"agent_id\"?: String,\n \"agent_type\"?: String,\n \"agent_trust_level\"?: String,\n \"agent_framework\"?: String,\n \"agent_publisher\"?: String,\n\n };\n\n /// Context for write_file action\n type FileWriteContext = {\n // Core metadata (required)\n \"request_id\": String,\n \"timestamp\": Long,\n\n // File path (optional \u2014 for path-based access control policies)\n \"path\"?: String, // File path being written\n\n // Security - Invisible Character Detection in write content (optional)\n \"contains_invisible_chars\"?: Bool, // Whether invisible Unicode chars detected in write content\n \"invisible_chars_score\"?: Long, // Invisible character attack severity (0-100)\n\n // Security checks on content being written (optional)\n \"contains_secrets\"?: Bool,\n \"secret_count\"?: Long,\n \"secret_types\"?: Set<String>,\n \"pii_detected\"?: Bool,\n \"pii_types\"?: Set<String>,\n\n // Security - Path Traversal (optional)\n \"path_traversal_detected\"?: Bool,\n \"path_traversal_severity\"?: String,\n \"path_traversal_type\"?: String,\n\n // Session Detection History \u2014 cross-turn sticky flags (optional)\n \"session_pii_detected\"?: Bool,\n \"session_pii_types\"?: Set<String>,\n \"session_secrets_detected\"?: Bool,\n \"session_secret_types\"?: Set<String>,\n \"session_injection_detected\"?: Bool,\n \"session_command_injection\"?: Bool,\n \"session_threat_turns\"?: Long,\n \"session_max_injection_score\"?: Long,\n \"session_max_jailbreak_score\"?: Long,\n \"session_max_command_injection_score\"?: Long,\n \"session_max_pii_score\"?: Long,\n \"session_max_secret_score\"?: Long,\n \"session_cumulative_risk_score\"?: Long,\n\n // Agent Identity \u2014 authenticated agent principal metadata (optional)\n \"agent_id\"?: String,\n \"agent_type\"?: String,\n \"agent_trust_level\"?: String,\n \"agent_framework\"?: String,\n \"agent_publisher\"?: String,\n\n };\n\n /// Context for connect_server action (MCP server connections)\n type ConnectServerContext = {\n // Core metadata (required)\n \"request_id\": String,\n \"timestamp\": Long,\n\n // MCP context (optional)\n \"mcp_server\"?: String, // MCP server name (e.g., \"github\", \"filesystem\")\n \"mcp_server_verified\"?: Bool, // Whether server is from verified registry\n\n // Agentic - Agent Security (optional)\n \"tool_poisoning_detected\"?: Bool,\n \"tool_poisoning_score\"?: Long,\n \"tool_poisoning_type\"?: String,\n\n // Agentic - MCP Risk (optional)\n \"mcp_config_risk\"?: Bool,\n \"mcp_risk_type\"?: String,\n \"mcp_risk_score\"?: Long,\n\n // Security - Cross-Origin Escalation (optional)\n \"cross_origin_detected\"?: Bool,\n \"cross_origin_type\"?: String,\n \"cross_origin_score\"?: Long,\n\n // Session Detection History \u2014 cross-turn sticky flags (optional)\n \"session_pii_detected\"?: Bool,\n \"session_pii_types\"?: Set<String>,\n \"session_secrets_detected\"?: Bool,\n \"session_secret_types\"?: Set<String>,\n \"session_injection_detected\"?: Bool,\n \"session_command_injection\"?: Bool,\n \"session_threat_turns\"?: Long,\n \"session_max_injection_score\"?: Long,\n \"session_max_jailbreak_score\"?: Long,\n \"session_max_command_injection_score\"?: Long,\n \"session_max_pii_score\"?: Long,\n \"session_max_secret_score\"?: Long,\n \"session_cumulative_risk_score\"?: Long,\n\n // Agent Identity \u2014 authenticated agent principal metadata (optional)\n \"agent_id\"?: String,\n \"agent_type\"?: String,\n \"agent_trust_level\"?: String,\n \"agent_framework\"?: String,\n \"agent_publisher\"?: String,\n\n };\n}\n";
|
|
7
7
|
/**
|
|
8
8
|
* Overwatch Cedar schema
|
|
9
9
|
*
|
|
10
10
|
* Full Cedar schema for overwatch, embedded at codegen time.
|
|
11
11
|
*/
|
|
12
|
-
export declare const OVERWATCH_SCHEMA = "// Overwatch Cedar Schema\n// ===================================\n// IDE Agent Security & Policy Enforcement\n//\n// Overwatch protects IDE agent operations (prompts, tool calls, file access, MCP connections)\n// by evaluating threats detected by the detection engine pipeline against Cedar policies.\n//\n// Architecture:\n// User/Agent \u2192 IDE Hook \u2192 Detection Engine \u2192 Cedar Policy \u2192 Allow/Deny\n//\n// Supported IDEs:\n// - Cursor (beforeSubmitPrompt, beforeShellExecution, beforeMCPExecution, etc.)\n// - Claude Code (UserPromptSubmit, PreToolUse)\n// - GitHub Copilot (userPromptSubmitted, preToolUse)\n//\n// Threat Coverage:\n// - OWASP Top 10 for LLM Applications 2025 (LLM01-LLM10)\n// - OWASP Top 10 for Agentic Applications (ASI01-ASI10)\n// - OWASP MCP Top 10 (MCP01-MCP05)\n// - MITRE ATLAS Agent Techniques (AML.T0051, AML.T0080-T0082)\n\nnamespace Overwatch {\n\n// =============================================================================\n// ENTITIES - Tenant Hierarchy (ReBAC)\n// =============================================================================\n// Aligned with Guardrails entity hierarchy (Account \u2192 Project).\n// Overwatch does not have app-specific policies, so App is omitted.\n//\n// Entity hierarchy enables Cedar's `in` operator for policy scoping:\n// Account (org root)\n// \u2514\u2500\u2500 Project in [Account]\n// \u2514\u2500\u2500 Tool/Server/FilePath/LlmPrompt in [Project]\n//\n// Policy scoping examples:\n// resource == Overwatch::Tool::\"shell\" \u2192 specific tool\n// resource in Overwatch::Project::\"<uuid>\" \u2192 project-wide\n// resource in Overwatch::Account::\"<uuid>\" \u2192 org-wide\n\n/// Account represents an organization (top-level tenant)\nentity Account;\n\n/// Project represents a project within an account\nentity Project in [Account];\n\n// =============================================================================\n// ENTITIES - Principals\n// =============================================================================\n\n/// Human user or service account making requests to the IDE\nentity User;\n\n/// AI agent (Claude, GitHub Copilot, etc.)\nentity Agent;\n\n// =============================================================================\n// ENTITIES - Resources (scoped under Project)\n// =============================================================================\n\n/// LLM prompt or session \u2014 resource for process_prompt action\nentity LlmPrompt in [Project];\n\n/// MCP tool or native IDE tool \u2014 resource for call_tool action\nentity Tool in [Project];\n\n/// MCP server \u2014 resource for connect_server action\nentity Server in [Project];\n\n/// File system path \u2014 resource for read_file/write_file/call_tool actions\nentity FilePath in [Project];\n\n// =============================================================================\n// ACTIONS\n// =============================================================================\n\n// User submits a prompt or receives AI response\n// Threat focus: injection, jailbreak, secrets, PII, content safety, invisible chars\naction process_prompt appliesTo {\n principal: [User, Agent],\n resource: [LlmPrompt],\n context: {\n // --- Event & Source ---\n content: String, // Raw content being scanned\n source: String, // IDE source: \"cursor\", \"claudecode\", \"github_copilot\"\n event: String, // Hook event name\n user_email: String, // User identifier\n\n // --- Workspace ---\n cwd?: String, // Current working directory\n workspace_root?: String, // Workspace/repository root\n\n // --- Threat Detection (from detection engine pipeline) ---\n threat_count: Long, // Total threats detected\n highest_severity: String, // \"critical\", \"high\", \"medium\", \"low\", \"none\"\n threat_categories: Set<String>, // Threat category names\n detected_threats: Set<String>, // Detection rule names that matched\n max_threat_severity: Long, // Numeric severity (0=none, 1=low, 2=medium, 3=high, 4=critical)\n contains_secrets: Bool, // Whether secrets/credentials detected\n\n // --- Secrets (granular) ---\n secret_types?: Set<String>, // Types: \"aws_access_key\", \"github_token\", \"ssh_private_key\", etc.\n secret_count?: Long, // Number of distinct secrets found\n\n // --- PII Detection ---\n pii_detected?: Bool, // Whether any PII patterns matched\n pii_types?: Set<String>, // Types: \"ssn\", \"credit_card\", \"email\", \"phone\", etc.\n pii_count?: Long, // Number of PII matches\n\n // --- Encoding & Unicode Attacks ---\n contains_invisible_chars?: Bool, // Zero-width chars, bidi overrides, tag chars detected\n invisible_chars_score?: Long, // Unicode attack severity (0-100)\n\n // --- Content Safety Scores (0-100, from ML classifiers) ---\n violence_score: Long,\n weapons_score: Long,\n hate_speech_score: Long,\n crime_score: Long,\n sexual_score: Long,\n profanity_score: Long,\n\n // --- ML Detector Confidence Scores (0-100) ---\n pii_confidence: Long, // PII detection classifier confidence\n injection_confidence: Long, // Combined injection confidence: MAX(pulse, deep_context)\n jailbreak_confidence: Long, // Combined jailbreak confidence: MAX(pulse, deep_context)\n injection_pulse_score?: Long, // 0-100 Pulse single-turn classifier\n injection_deep_context_score?: Long, // 0-100 DeepContext multi-turn\n jailbreak_pulse_score?: Long, // 0-100 Pulse single-turn classifier\n jailbreak_deep_context_score?: Long, // 0-100 DeepContext multi-turn\n\n // --- Agent Security (0-100) ---\n indirect_injection_score: Long, // Indirect prompt injection risk (OWASP LLM01, ASI01)\n\n // --- Session Detection History (cross-turn sticky flags) ---\n session_pii_detected?: Bool,\n session_pii_types?: Set<String>,\n session_secrets_detected?: Bool,\n session_secret_types?: Set<String>,\n session_injection_detected?: Bool,\n session_command_injection?: Bool,\n session_threat_turns?: Long,\n session_max_injection_score?: Long,\n session_max_jailbreak_score?: Long,\n session_max_command_injection_score?: Long,\n session_max_pii_score?: Long,\n session_max_secret_score?: Long,\n session_cumulative_risk_score?: Long,\n\n // --- Legacy ---\n prompt_text?: String, // Same as content (backward compatibility)\n response_content?: String, // Response content (if available)\n },\n};\n\n// User calls a tool (native IDE tool or MCP tool)\n// Threat focus: command injection, tool poisoning, rug pull, data exfiltration, loops\naction call_tool appliesTo {\n principal: [User, Agent],\n resource: [Tool, FilePath],\n context: {\n // --- Event & Source ---\n content: String, // Raw content being scanned (e.g., shell command, tool args)\n source: String, // IDE source\n event: String, // Hook event name\n user_email: String, // User identifier\n\n // --- Tool & MCP ---\n tool_name?: String, // Normalized tool name (\"shell\", \"read_file\", etc.)\n mcp_server?: String, // MCP server name\n mcp_tool?: String, // MCP tool name\n\n // --- File & Path ---\n path?: String, // File path (if file operation)\n\n // --- Workspace ---\n cwd?: String,\n workspace_root?: String,\n\n // --- Threat Detection ---\n threat_count?: Long,\n highest_severity?: String,\n threat_categories?: Set<String>,\n detected_threats?: Set<String>,\n max_threat_severity?: Long,\n contains_secrets?: Bool,\n\n // --- Secrets (granular) ---\n secret_types?: Set<String>,\n secret_count?: Long,\n\n // --- PII Detection ---\n pii_detected?: Bool,\n pii_types?: Set<String>,\n pii_count?: Long,\n\n // --- Encoding & Unicode Attacks ---\n contains_invisible_chars?: Bool,\n invisible_chars_score?: Long,\n\n // --- Content Safety Scores (0-100) ---\n violence_score?: Long,\n weapons_score?: Long,\n hate_speech_score?: Long,\n crime_score?: Long,\n sexual_score?: Long,\n profanity_score?: Long,\n\n // --- ML Detector Confidence Scores (0-100) ---\n pii_confidence?: Long,\n injection_confidence?: Long, // Combined injection confidence: MAX(pulse, deep_context)\n jailbreak_confidence?: Long, // Combined jailbreak confidence: MAX(pulse, deep_context)\n injection_pulse_score?: Long, // 0-100 Pulse single-turn classifier\n injection_deep_context_score?: Long, // 0-100 DeepContext multi-turn\n jailbreak_pulse_score?: Long, // 0-100 Pulse single-turn classifier\n jailbreak_deep_context_score?: Long, // 0-100 DeepContext multi-turn\n\n // --- Agent Security (0-100) --- (OWASP ASI01, ASI02, ASI04; MITRE AML.T0051)\n tool_poisoning_score?: Long, // Hidden instructions in tool description/args\n tool_poisoning_detected?: Bool, // Boolean flag for tool poisoning\n rug_pull_score?: Long, // Tool behavior drift after trust establishment\n rug_pull_detected?: Bool, // Boolean flag for rug pull\n indirect_injection_score?: Long, // Indirect injection via tool output\n\n // --- Tool Risk Assessment ---\n tool_risk_score?: Long, // Computed tool risk (0-100)\n tool_category?: String, // \"safe\", \"sensitive\", \"dangerous\"\n tool_is_sensitive?: Bool, // Sensitivity classification\n tool_is_builtin?: Bool, // Built-in IDE tool vs MCP tool\n\n // --- Behavioral Analysis --- (OWASP LLM10, ASI02, ASI08)\n loop_detected?: Bool, // Consecutive same-tool call loop\n loop_count?: Long, // Number of consecutive repeat calls\n loop_tool?: String, // Tool name in loop\n suspicious_pattern?: Bool, // Data exfiltration or attack sequence detected\n pattern_type?: String, // \"data_exfiltration\", \"secret_exfiltration\", \"credential_theft\", \"destructive_sequence\"\n sequence_risk?: Long, // Sequence risk score (0-100)\n\n // --- MCP Trust ---\n mcp_server_verified?: Bool, // Whether server is from verified registry\n\n // --- Session Detection History (cross-turn sticky flags) ---\n session_pii_detected?: Bool,\n session_pii_types?: Set<String>,\n session_secrets_detected?: Bool,\n session_secret_types?: Set<String>,\n session_injection_detected?: Bool,\n session_command_injection?: Bool,\n session_threat_turns?: Long,\n session_max_injection_score?: Long,\n session_max_jailbreak_score?: Long,\n session_max_command_injection_score?: Long,\n session_max_pii_score?: Long,\n session_max_secret_score?: Long,\n session_cumulative_risk_score?: Long,\n\n // --- Legacy ---\n response_content?: String,\n },\n};\n\n// Connect to an MCP server\n// Threat focus: supply chain, tool poisoning, rug pull, config risk\naction connect_server appliesTo {\n principal: [User, Agent],\n resource: [Server],\n context: {\n content?: String, // Server config content (if available)\n source: String,\n event: String,\n user_email: String,\n mcp_server?: String,\n\n // --- Threat Detection ---\n threat_count?: Long,\n highest_severity?: String,\n threat_categories?: Set<String>,\n max_threat_severity?: Long,\n\n // --- Agent Security (0-100) --- (OWASP ASI04, MCP01-MCP05)\n tool_poisoning_score?: Long, // Poisoned tool descriptions in server\n tool_poisoning_detected?: Bool,\n rug_pull_score?: Long, // Server behavior change after approval\n rug_pull_detected?: Bool,\n indirect_injection_score?: Long, // Injection payloads in server responses\n\n // --- MCP Trust & Config Risk ---\n mcp_server_verified?: Bool, // Verified registry status\n mcp_config_risk?: Bool, // Risky server config detected (inline code exec, etc.)\n mcp_risk_score?: Long, // Config risk severity (0-100)\n\n // --- Session Detection History (cross-turn sticky flags) ---\n session_pii_detected?: Bool,\n session_pii_types?: Set<String>,\n session_secrets_detected?: Bool,\n session_secret_types?: Set<String>,\n session_injection_detected?: Bool,\n session_command_injection?: Bool,\n session_threat_turns?: Long,\n session_max_injection_score?: Long,\n session_max_jailbreak_score?: Long,\n session_max_command_injection_score?: Long,\n session_max_pii_score?: Long,\n session_max_secret_score?: Long,\n session_cumulative_risk_score?: Long,\n },\n};\n\n// Read a file from disk\n// Threat focus: secrets exposure, PII exposure, path traversal, sensitive paths\naction read_file appliesTo {\n principal: [User, Agent],\n resource: [FilePath],\n context: {\n content: String,\n source: String,\n event: String,\n user_email: String,\n path?: String,\n cwd?: String,\n workspace_root?: String,\n\n // --- Threat Detection ---\n threat_count?: Long,\n highest_severity?: String,\n threat_categories?: Set<String>,\n detected_threats?: Set<String>,\n max_threat_severity?: Long,\n contains_secrets?: Bool,\n\n // --- Secrets (granular) ---\n secret_types?: Set<String>,\n secret_count?: Long,\n\n // --- PII Detection ---\n pii_detected?: Bool,\n pii_types?: Set<String>,\n pii_count?: Long,\n\n // --- Session Detection History (cross-turn sticky flags) ---\n session_pii_detected?: Bool,\n session_pii_types?: Set<String>,\n session_secrets_detected?: Bool,\n session_secret_types?: Set<String>,\n session_injection_detected?: Bool,\n session_command_injection?: Bool,\n session_threat_turns?: Long,\n session_max_injection_score?: Long,\n session_max_jailbreak_score?: Long,\n session_max_command_injection_score?: Long,\n session_max_pii_score?: Long,\n session_max_secret_score?: Long,\n session_cumulative_risk_score?: Long,\n },\n};\n\n// Write a file to disk\n// Threat focus: secrets in output, PII in output, sensitive paths, malicious code\naction write_file appliesTo {\n principal: [User, Agent],\n resource: [FilePath],\n context: {\n content: String,\n source: String,\n event: String,\n user_email: String,\n path?: String,\n cwd?: String,\n workspace_root?: String,\n\n // --- Threat Detection ---\n threat_count?: Long,\n highest_severity?: String,\n threat_categories?: Set<String>,\n detected_threats?: Set<String>,\n max_threat_severity?: Long,\n contains_secrets?: Bool,\n\n // --- Secrets (granular) ---\n secret_types?: Set<String>,\n secret_count?: Long,\n\n // --- PII Detection ---\n pii_detected?: Bool,\n pii_types?: Set<String>,\n pii_count?: Long,\n\n // --- Session Detection History (cross-turn sticky flags) ---\n session_pii_detected?: Bool,\n session_pii_types?: Set<String>,\n session_secrets_detected?: Bool,\n session_secret_types?: Set<String>,\n session_injection_detected?: Bool,\n session_command_injection?: Bool,\n session_threat_turns?: Long,\n session_max_injection_score?: Long,\n session_max_jailbreak_score?: Long,\n session_max_command_injection_score?: Long,\n session_max_pii_score?: Long,\n session_max_secret_score?: Long,\n session_cumulative_risk_score?: Long,\n },\n};\n\n}\n";
|
|
12
|
+
export declare const OVERWATCH_SCHEMA = "// Overwatch Cedar Schema\n// ===================================\n// IDE Agent Security & Policy Enforcement\n//\n// Overwatch protects IDE agent operations (prompts, tool calls, file access, MCP connections)\n// by evaluating threats detected by the detection engine pipeline against Cedar policies.\n//\n// Architecture:\n// User/Agent \u2192 IDE Hook \u2192 Detection Engine \u2192 Cedar Policy \u2192 Allow/Deny\n//\n// Supported IDEs:\n// - Cursor (beforeSubmitPrompt, beforeShellExecution, beforeMCPExecution, etc.)\n// - Claude Code (UserPromptSubmit, PreToolUse)\n// - GitHub Copilot (userPromptSubmitted, preToolUse)\n//\n// Threat Coverage:\n// - OWASP Top 10 for LLM Applications 2025 (LLM01-LLM10)\n// - OWASP Top 10 for Agentic Applications (ASI01-ASI10)\n// - OWASP MCP Top 10 (MCP01-MCP05)\n// - MITRE ATLAS Agent Techniques (AML.T0051, AML.T0080-T0082)\n\nnamespace Overwatch {\n\n// =============================================================================\n// ENTITIES - Tenant Hierarchy (ReBAC)\n// =============================================================================\n// Aligned with Guardrails entity hierarchy (Account \u2192 Project).\n// Overwatch does not have app-specific policies, so App is omitted.\n//\n// Entity hierarchy enables Cedar's `in` operator for policy scoping:\n// Account (org root)\n// \u2514\u2500\u2500 Project in [Account]\n// \u2514\u2500\u2500 Tool/Server/FilePath/LlmPrompt in [Project]\n//\n// Policy scoping examples:\n// resource == Overwatch::Tool::\"shell\" \u2192 specific tool\n// resource in Overwatch::Project::\"<uuid>\" \u2192 project-wide\n// resource in Overwatch::Account::\"<uuid>\" \u2192 org-wide\n\n/// Account represents an organization (top-level tenant)\nentity Account;\n\n/// Project represents a project within an account\nentity Project in [Account];\n\n// =============================================================================\n// ENTITIES - Principals\n// =============================================================================\n\n/// Human user or service account making requests to the IDE\nentity User;\n\n/// AI agent (Claude, GitHub Copilot, etc.)\nentity Agent;\n\n// =============================================================================\n// ENTITIES - Resources (scoped under Project)\n// =============================================================================\n\n/// LLM prompt or session \u2014 resource for process_prompt action\nentity LlmPrompt in [Project];\n\n/// MCP tool or native IDE tool \u2014 resource for call_tool action\nentity Tool in [Project];\n\n/// MCP server \u2014 resource for connect_server action\nentity Server in [Project];\n\n/// File system path \u2014 resource for read_file/write_file/call_tool actions\nentity FilePath in [Project];\n\n// =============================================================================\n// ACTIONS\n// =============================================================================\n\n// User submits a prompt or receives AI response\n// Threat focus: injection, jailbreak, secrets, PII, content safety, invisible chars\naction process_prompt appliesTo {\n principal: [User, Agent],\n resource: [LlmPrompt],\n context: {\n // --- Event & Source ---\n content: String, // Raw content being scanned\n source: String, // IDE source: \"cursor\", \"claudecode\", \"github_copilot\"\n event: String, // Hook event name\n user_email: String, // User identifier\n\n // --- Workspace ---\n cwd?: String, // Current working directory\n workspace_root?: String, // Workspace/repository root\n\n // --- Threat Detection (from detection engine pipeline) ---\n threat_count: Long, // Total threats detected\n highest_severity: String, // \"critical\", \"high\", \"medium\", \"low\", \"none\"\n threat_categories: Set<String>, // Threat category names\n detected_threats: Set<String>, // Detection rule names that matched\n max_threat_severity: Long, // Numeric severity (0=none, 1=low, 2=medium, 3=high, 4=critical)\n contains_secrets: Bool, // Whether secrets/credentials detected\n\n // --- Secrets (granular) ---\n secret_types?: Set<String>, // Types: \"aws_access_key\", \"github_token\", \"ssh_private_key\", etc.\n secret_count?: Long, // Number of distinct secrets found\n\n // --- PII Detection ---\n pii_detected?: Bool, // Whether any PII patterns matched\n pii_types?: Set<String>, // Types: \"ssn\", \"credit_card\", \"email\", \"phone\", etc.\n pii_count?: Long, // Number of PII matches\n\n // --- Encoding & Unicode Attacks ---\n contains_invisible_chars?: Bool, // Zero-width chars, bidi overrides, tag chars detected\n invisible_chars_score?: Long, // Unicode attack severity (0-100)\n\n // --- Content Safety Scores (0-100, from ML classifiers) ---\n violence_score: Long,\n weapons_score: Long,\n hate_speech_score: Long,\n crime_score: Long,\n sexual_score: Long,\n profanity_score: Long,\n\n // --- ML Detector Confidence Scores (0-100) ---\n pii_confidence: Long, // PII detection classifier confidence\n injection_confidence: Long, // Combined injection confidence: MAX(pulse, deep_context)\n jailbreak_confidence: Long, // Combined jailbreak confidence: MAX(pulse, deep_context)\n injection_pulse_score?: Long, // 0-100 Pulse single-turn classifier\n injection_deep_context_score?: Long, // 0-100 DeepContext multi-turn\n jailbreak_pulse_score?: Long, // 0-100 Pulse single-turn classifier\n jailbreak_deep_context_score?: Long, // 0-100 DeepContext multi-turn\n\n // --- Agent Security (0-100) ---\n indirect_injection_score: Long, // Indirect prompt injection risk (OWASP LLM01, ASI01)\n\n // --- Session Detection History (cross-turn sticky flags) ---\n session_pii_detected?: Bool,\n session_pii_types?: Set<String>,\n session_secrets_detected?: Bool,\n session_secret_types?: Set<String>,\n session_injection_detected?: Bool,\n session_command_injection?: Bool,\n session_threat_turns?: Long,\n session_max_injection_score?: Long,\n session_max_jailbreak_score?: Long,\n session_max_command_injection_score?: Long,\n session_max_pii_score?: Long,\n session_max_secret_score?: Long,\n session_cumulative_risk_score?: Long,\n\n // --- Legacy ---\n prompt_text?: String, // Same as content (backward compatibility)\n response_content?: String, // Response content (if available)\n },\n};\n\n// User calls a tool (native IDE tool or MCP tool)\n// Threat focus: command injection, tool poisoning, rug pull, data exfiltration, loops\naction call_tool appliesTo {\n principal: [User, Agent],\n resource: [Tool, FilePath],\n context: {\n // --- Event & Source ---\n content: String, // Raw content being scanned (e.g., shell command, tool args)\n source: String, // IDE source\n event: String, // Hook event name\n user_email: String, // User identifier\n\n // --- Tool & MCP ---\n tool_name?: String, // Normalized tool name (\"shell\", \"read_file\", etc.)\n mcp_server?: String, // MCP server name\n mcp_tool?: String, // MCP tool name\n\n // --- File & Path ---\n path?: String, // File path (if file operation)\n\n // --- Workspace ---\n cwd?: String,\n workspace_root?: String,\n\n // --- Threat Detection ---\n threat_count?: Long,\n highest_severity?: String,\n threat_categories?: Set<String>,\n detected_threats?: Set<String>,\n max_threat_severity?: Long,\n contains_secrets?: Bool,\n\n // --- Secrets (granular) ---\n secret_types?: Set<String>,\n secret_count?: Long,\n\n // --- PII Detection ---\n pii_detected?: Bool,\n pii_types?: Set<String>,\n pii_count?: Long,\n\n // --- Encoding & Unicode Attacks ---\n contains_invisible_chars?: Bool,\n invisible_chars_score?: Long,\n\n // --- Content Safety Scores (0-100) ---\n violence_score?: Long,\n weapons_score?: Long,\n hate_speech_score?: Long,\n crime_score?: Long,\n sexual_score?: Long,\n profanity_score?: Long,\n\n // --- ML Detector Confidence Scores (0-100) ---\n pii_confidence?: Long,\n injection_confidence?: Long, // Combined injection confidence: MAX(pulse, deep_context)\n jailbreak_confidence?: Long, // Combined jailbreak confidence: MAX(pulse, deep_context)\n injection_pulse_score?: Long, // 0-100 Pulse single-turn classifier\n injection_deep_context_score?: Long, // 0-100 DeepContext multi-turn\n jailbreak_pulse_score?: Long, // 0-100 Pulse single-turn classifier\n jailbreak_deep_context_score?: Long, // 0-100 DeepContext multi-turn\n\n // --- Agent Security (0-100) --- (OWASP ASI01, ASI02, ASI04; MITRE AML.T0051)\n tool_poisoning_score?: Long, // Hidden instructions in tool description/args\n tool_poisoning_detected?: Bool, // Boolean flag for tool poisoning\n rug_pull_score?: Long, // Tool behavior drift after trust establishment\n rug_pull_detected?: Bool, // Boolean flag for rug pull\n indirect_injection_score?: Long, // Indirect injection via tool output\n\n // --- Tool Risk Assessment ---\n tool_risk_score?: Long, // Computed tool risk (0-100)\n tool_category?: String, // \"safe\", \"sensitive\", \"dangerous\"\n tool_is_sensitive?: Bool, // Sensitivity classification\n tool_is_builtin?: Bool, // Built-in IDE tool vs MCP tool\n\n // --- Behavioral Analysis --- (OWASP LLM10, ASI02, ASI08)\n loop_detected?: Bool, // Consecutive same-tool call loop\n loop_count?: Long, // Number of consecutive repeat calls\n loop_tool?: String, // Tool name in loop\n suspicious_pattern?: Bool, // Data exfiltration or attack sequence detected\n pattern_type?: String, // \"data_exfiltration\", \"secret_exfiltration\", \"credential_theft\", \"destructive_sequence\"\n sequence_risk?: Long, // Sequence risk score (0-100)\n\n // --- MCP Trust ---\n mcp_server_verified?: Bool, // Whether server is from verified registry\n\n // --- Session Detection History (cross-turn sticky flags) ---\n session_pii_detected?: Bool,\n session_pii_types?: Set<String>,\n session_secrets_detected?: Bool,\n session_secret_types?: Set<String>,\n session_injection_detected?: Bool,\n session_command_injection?: Bool,\n session_threat_turns?: Long,\n session_max_injection_score?: Long,\n session_max_jailbreak_score?: Long,\n session_max_command_injection_score?: Long,\n session_max_pii_score?: Long,\n session_max_secret_score?: Long,\n session_cumulative_risk_score?: Long,\n\n // --- Legacy ---\n response_content?: String,\n },\n};\n\n// Connect to an MCP server\n// Threat focus: supply chain, tool poisoning, rug pull, config risk\naction connect_server appliesTo {\n principal: [User, Agent],\n resource: [Server],\n context: {\n content?: String, // Server config content (if available)\n source: String,\n event: String,\n user_email: String,\n mcp_server?: String,\n\n // --- Threat Detection ---\n threat_count?: Long,\n highest_severity?: String,\n threat_categories?: Set<String>,\n max_threat_severity?: Long,\n\n // --- Agent Security (0-100) --- (OWASP ASI04, MCP01-MCP05)\n tool_poisoning_score?: Long, // Poisoned tool descriptions in server\n tool_poisoning_detected?: Bool,\n rug_pull_score?: Long, // Server behavior change after approval\n rug_pull_detected?: Bool,\n indirect_injection_score?: Long, // Injection payloads in server responses\n\n // --- MCP Trust & Config Risk ---\n mcp_server_verified?: Bool, // Verified registry status\n mcp_config_risk?: Bool, // Risky server config detected (inline code exec, etc.)\n mcp_risk_score?: Long, // Config risk severity (0-100)\n\n // --- Session Detection History (cross-turn sticky flags) ---\n session_pii_detected?: Bool,\n session_pii_types?: Set<String>,\n session_secrets_detected?: Bool,\n session_secret_types?: Set<String>,\n session_injection_detected?: Bool,\n session_command_injection?: Bool,\n session_threat_turns?: Long,\n session_max_injection_score?: Long,\n session_max_jailbreak_score?: Long,\n session_max_command_injection_score?: Long,\n session_max_pii_score?: Long,\n session_max_secret_score?: Long,\n session_cumulative_risk_score?: Long,\n },\n};\n\n// Read a file from disk\n// Threat focus: secrets exposure, PII exposure, path traversal, sensitive paths\naction read_file appliesTo {\n principal: [User, Agent],\n resource: [FilePath],\n context: {\n content: String,\n source: String,\n event: String,\n user_email: String,\n path?: String,\n cwd?: String,\n workspace_root?: String,\n\n // --- Threat Detection ---\n threat_count?: Long,\n highest_severity?: String,\n threat_categories?: Set<String>,\n detected_threats?: Set<String>,\n max_threat_severity?: Long,\n contains_secrets?: Bool,\n\n // --- Secrets (granular) ---\n secret_types?: Set<String>,\n secret_count?: Long,\n\n // --- PII Detection ---\n pii_detected?: Bool,\n pii_types?: Set<String>,\n pii_count?: Long,\n\n // --- Session Detection History (cross-turn sticky flags) ---\n session_pii_detected?: Bool,\n session_pii_types?: Set<String>,\n session_secrets_detected?: Bool,\n session_secret_types?: Set<String>,\n session_injection_detected?: Bool,\n session_command_injection?: Bool,\n session_threat_turns?: Long,\n session_max_injection_score?: Long,\n session_max_jailbreak_score?: Long,\n session_max_command_injection_score?: Long,\n session_max_pii_score?: Long,\n session_max_secret_score?: Long,\n session_cumulative_risk_score?: Long,\n\n // --- Agent Security ---\n indirect_injection_score?: Long, // Indirect injection via file content\n },\n};\n\n// Write a file to disk\n// Threat focus: secrets in output, PII in output, sensitive paths, malicious code\naction write_file appliesTo {\n principal: [User, Agent],\n resource: [FilePath],\n context: {\n content: String,\n source: String,\n event: String,\n user_email: String,\n path?: String,\n cwd?: String,\n workspace_root?: String,\n\n // --- Threat Detection ---\n threat_count?: Long,\n highest_severity?: String,\n threat_categories?: Set<String>,\n detected_threats?: Set<String>,\n max_threat_severity?: Long,\n contains_secrets?: Bool,\n\n // --- Secrets (granular) ---\n secret_types?: Set<String>,\n secret_count?: Long,\n\n // --- PII Detection ---\n pii_detected?: Bool,\n pii_types?: Set<String>,\n pii_count?: Long,\n\n // --- Session Detection History (cross-turn sticky flags) ---\n session_pii_detected?: Bool,\n session_pii_types?: Set<String>,\n session_secrets_detected?: Bool,\n session_secret_types?: Set<String>,\n session_injection_detected?: Bool,\n session_command_injection?: Bool,\n session_threat_turns?: Long,\n session_max_injection_score?: Long,\n session_max_jailbreak_score?: Long,\n session_max_command_injection_score?: Long,\n session_max_pii_score?: Long,\n session_max_secret_score?: Long,\n session_cumulative_risk_score?: Long,\n\n // --- Agent Security ---\n indirect_injection_score?: Long, // Indirect injection via file content\n },\n};\n\n}\n";
|
|
13
13
|
/**
|
|
14
14
|
* Palisade Cedar schema
|
|
15
15
|
*
|
|
@@ -129,6 +129,10 @@ namespace Guardrails {
|
|
|
129
129
|
"pii_detected"?: Bool,
|
|
130
130
|
"pii_count"?: Long,
|
|
131
131
|
"pii_types"?: Set<String>, // ["email", "phone", "ssn", "credit_card", ...]
|
|
132
|
+
"pii_confidence"?: Long, // PII ML classifier confidence (0-100) — catches novel PII patterns that escape regex detection
|
|
133
|
+
|
|
134
|
+
// Threat Severity Aggregation (optional)
|
|
135
|
+
"highest_severity"?: String, // Highest severity across all detectors: "critical" | "high" | "medium" | "low" | "none"
|
|
132
136
|
|
|
133
137
|
// Trust & Safety - Toxicity (optional)
|
|
134
138
|
"violence_score"?: Long, // 0-100
|
|
@@ -241,7 +245,7 @@ namespace Guardrails {
|
|
|
241
245
|
|
|
242
246
|
// Agentic - Behavioral Patterns (optional)
|
|
243
247
|
"suspicious_pattern"?: Bool,
|
|
244
|
-
"pattern_type"?: String, // "data_exfiltration" | "secret_exfiltration" | "db_exfiltration" | "none"
|
|
248
|
+
"pattern_type"?: String, // "data_exfiltration" | "secret_exfiltration" | "db_exfiltration" | "credential_theft" | "destructive_sequence" | "none"
|
|
245
249
|
"sequence_risk"?: Long, // 0-100
|
|
246
250
|
|
|
247
251
|
// Agentic - Loop Detection (optional)
|
|
@@ -263,6 +267,8 @@ namespace Guardrails {
|
|
|
263
267
|
"secret_types"?: Set<String>,
|
|
264
268
|
"pii_detected"?: Bool,
|
|
265
269
|
"pii_types"?: Set<String>,
|
|
270
|
+
"pii_count"?: Long, // Number of PII pattern matches in tool content
|
|
271
|
+
"pii_confidence"?: Long, // PII ML classifier confidence (0-100)
|
|
266
272
|
"injection_confidence"?: Long,
|
|
267
273
|
"injection_pulse_score"?: Long, // 0-100 Pulse single-turn classifier
|
|
268
274
|
"injection_deep_context_score"?: Long, // 0-100 DeepContext multi-turn
|
|
@@ -283,6 +289,13 @@ namespace Guardrails {
|
|
|
283
289
|
"cross_origin_type"?: String,
|
|
284
290
|
"cross_origin_score"?: Long, // 0-100
|
|
285
291
|
|
|
292
|
+
// File & Path (optional — for path-based access control policies)
|
|
293
|
+
"path"?: String, // File path when tool operates on files
|
|
294
|
+
|
|
295
|
+
// Security - Invisible Character Detection in tool args (optional)
|
|
296
|
+
"contains_invisible_chars"?: Bool, // Whether invisible Unicode chars detected in tool args
|
|
297
|
+
"invisible_chars_score"?: Long, // Invisible character attack severity (0-100)
|
|
298
|
+
|
|
286
299
|
// Security - Encoded Injection (optional)
|
|
287
300
|
"encoded_content_detected"?: Bool,
|
|
288
301
|
"encoded_types"?: Set<String>,
|
|
@@ -297,6 +310,10 @@ namespace Guardrails {
|
|
|
297
310
|
"rug_pull_score"?: Long, // 0-100
|
|
298
311
|
"rug_pull_type"?: String, // "risk_spike" | "pattern_change" | "combined" | "none"
|
|
299
312
|
|
|
313
|
+
// Agentic - Indirect Prompt Injection (optional — injection via tool outputs/retrieved content)
|
|
314
|
+
"indirect_injection_score"?: Long, // Indirect injection risk score (0-100)
|
|
315
|
+
"indirect_injection_type"?: String, // Type of indirect injection detected
|
|
316
|
+
|
|
300
317
|
// Agentic - MCP Risk (optional)
|
|
301
318
|
"mcp_config_risk"?: Bool,
|
|
302
319
|
"mcp_risk_type"?: String, // "inline_execution" | "suspicious_url" | "cross_origin"
|
|
@@ -336,6 +353,9 @@ namespace Guardrails {
|
|
|
336
353
|
"request_id": String,
|
|
337
354
|
"timestamp": Long,
|
|
338
355
|
|
|
356
|
+
// File path (optional — for path-based access control policies)
|
|
357
|
+
"path"?: String, // File path being read
|
|
358
|
+
|
|
339
359
|
// Security checks on file content (optional)
|
|
340
360
|
"contains_secrets"?: Bool,
|
|
341
361
|
"secret_count"?: Long,
|
|
@@ -378,6 +398,13 @@ namespace Guardrails {
|
|
|
378
398
|
"request_id": String,
|
|
379
399
|
"timestamp": Long,
|
|
380
400
|
|
|
401
|
+
// File path (optional — for path-based access control policies)
|
|
402
|
+
"path"?: String, // File path being written
|
|
403
|
+
|
|
404
|
+
// Security - Invisible Character Detection in write content (optional)
|
|
405
|
+
"contains_invisible_chars"?: Bool, // Whether invisible Unicode chars detected in write content
|
|
406
|
+
"invisible_chars_score"?: Long, // Invisible character attack severity (0-100)
|
|
407
|
+
|
|
381
408
|
// Security checks on content being written (optional)
|
|
382
409
|
"contains_secrets"?: Bool,
|
|
383
410
|
"secret_count"?: Long,
|
|
@@ -820,6 +847,9 @@ action read_file appliesTo {
|
|
|
820
847
|
session_max_pii_score?: Long,
|
|
821
848
|
session_max_secret_score?: Long,
|
|
822
849
|
session_cumulative_risk_score?: Long,
|
|
850
|
+
|
|
851
|
+
// --- Agent Security ---
|
|
852
|
+
indirect_injection_score?: Long, // Indirect injection via file content
|
|
823
853
|
},
|
|
824
854
|
};
|
|
825
855
|
|
|
@@ -868,6 +898,9 @@ action write_file appliesTo {
|
|
|
868
898
|
session_max_pii_score?: Long,
|
|
869
899
|
session_max_secret_score?: Long,
|
|
870
900
|
session_cumulative_risk_score?: Long,
|
|
901
|
+
|
|
902
|
+
// --- Agent Security ---
|
|
903
|
+
indirect_injection_score?: Long, // Indirect injection via file content
|
|
871
904
|
},
|
|
872
905
|
};
|
|
873
906
|
|
|
@@ -1471,6 +1504,8 @@ export const GUARDRAILS_CONTEXT = {
|
|
|
1471
1504
|
{ "key": "pii_detected", "type": "boolean", "required": false, "description": "Whether personally identifiable information (PII) was found in the content. Commonly used to block PII in outputs to prevent data leakage" },
|
|
1472
1505
|
{ "key": "pii_count", "type": "number", "required": false, "description": "Total number of PII matches found (emails, phone numbers, SSNs, etc.)" },
|
|
1473
1506
|
{ "key": "pii_types", "type": "array", "required": false, "description": "Array of PII types detected (e.g., [\'email\', \'phone\', \'ssn\', \'credit_card\']). Use .contains() to block specific sensitive types" },
|
|
1507
|
+
{ "key": "pii_confidence", "type": "number", "required": false, "description": "PII detection ML classifier confidence (0-100). Catches novel PII patterns including names, addresses, and identifiers that regex rules may miss. Typical threshold: >=80 for high-confidence blocking" },
|
|
1508
|
+
{ "key": "highest_severity", "type": "string", "required": false, "description": "Highest severity level across all detection engines: \'critical\', \'high\', \'medium\', \'low\', or \'none\'. Use for severity-based catch-all policies (e.g., block any content flagged as critical)" },
|
|
1474
1509
|
{ "key": "violence_score", "type": "number", "required": false, "description": "ML-based score for violent content references (0-100). Typical threshold: >90 for critical blocks, >60 for warnings" },
|
|
1475
1510
|
{ "key": "hate_speech_score", "type": "number", "required": false, "description": "ML-based score for hate speech, discriminatory language, or targeted harassment (0-100). Typical threshold: >75 for blocks" },
|
|
1476
1511
|
{ "key": "sexual_score", "type": "number", "required": false, "description": "ML-based score for sexual or adult content (0-100). Adjust thresholds based on your application\'s audience" },
|
|
@@ -1567,6 +1602,8 @@ export const GUARDRAILS_CONTEXT = {
|
|
|
1567
1602
|
{ "key": "secret_types", "type": "array", "required": false, "description": "Array of secret types found in tool arguments" },
|
|
1568
1603
|
{ "key": "pii_detected", "type": "boolean", "required": false, "description": "Whether PII was detected in tool arguments or content" },
|
|
1569
1604
|
{ "key": "pii_types", "type": "array", "required": false, "description": "Array of PII types found in tool arguments" },
|
|
1605
|
+
{ "key": "pii_count", "type": "number", "required": false, "description": "Number of PII pattern matches in tool arguments or content. Use >=3 to detect bulk PII exposure indicating data dumps or CSV pastes" },
|
|
1606
|
+
{ "key": "pii_confidence", "type": "number", "required": false, "description": "PII detection ML classifier confidence for tool content (0-100). Catches novel PII patterns that escape regex detection" },
|
|
1570
1607
|
{ "key": "injection_confidence", "type": "number", "required": false, "description": "Combined prompt injection confidence in tool arguments (0-100). MAX of all detector scores (Pulse + DeepContext). Use injection_pulse_score / injection_deep_context_score for individual detector control" },
|
|
1571
1608
|
{ "key": "injection_pulse_score", "type": "number", "required": false, "description": "Highflame single-turn classifier score for prompt injection in tool arguments (0-100). Raw score from Pulse detector before combination with deep-context" },
|
|
1572
1609
|
{ "key": "injection_deep_context_score", "type": "number", "required": false, "description": "DeepContext multi-turn analyzer score for prompt injection in tool arguments (0-100). Tracks injection patterns across tool call history" },
|
|
@@ -1594,6 +1631,11 @@ export const GUARDRAILS_CONTEXT = {
|
|
|
1594
1631
|
{ "key": "encoded_types", "type": "array", "required": false, "description": "Array of encoding types detected in tool arguments (e.g., [\'base64\', \'hex\'])" },
|
|
1595
1632
|
{ "key": "encoded_count", "type": "number", "required": false, "description": "Number of encoded segments found in tool arguments" },
|
|
1596
1633
|
{ "key": "encoded_score", "type": "number", "required": false, "description": "Risk score for encoded injection in tool arguments (0-100)" },
|
|
1634
|
+
{ "key": "path", "type": "string", "required": false, "description": "File path targeted by the tool call (if file operation). Use for path-based blocking of .env files, credential files, system directories, and credential directories" },
|
|
1635
|
+
{ "key": "contains_invisible_chars", "type": "boolean", "required": false, "description": "Whether invisible Unicode characters (zero-width, bidi overrides, tag characters) were detected in tool arguments. Tool args should be plain text — invisible chars indicate payload injection" },
|
|
1636
|
+
{ "key": "invisible_chars_score", "type": "number", "required": false, "description": "Invisible character attack severity score in tool arguments (0-100)" },
|
|
1637
|
+
{ "key": "indirect_injection_score", "type": "number", "required": false, "description": "Indirect prompt injection risk score (0-100) — injection via tool outputs, retrieved documents, or external content. Use >=70 for general blocking, >=50 for sensitive tools" },
|
|
1638
|
+
{ "key": "indirect_injection_type", "type": "string", "required": false, "description": "Type of indirect injection detected in tool content (e.g., instruction override, authority hijack)" },
|
|
1597
1639
|
{ "key": "rug_pull_type", "type": "string", "required": false, "description": "Type of rug pull detected: \'risk_spike\' (sudden risk increase), \'pattern_change\' (behavioral shift), \'combined\', or \'none\'" },
|
|
1598
1640
|
{ "key": "conversation_turn", "type": "number", "required": false, "description": "Current conversation turn number for the agentic session" },
|
|
1599
1641
|
{ "key": "multi_turn_detection", "type": "boolean", "required": false, "description": "Whether multi-turn injection patterns were detected across tool calls in the session" },
|
|
@@ -1623,6 +1665,7 @@ export const GUARDRAILS_CONTEXT = {
|
|
|
1623
1665
|
"context_attributes": [
|
|
1624
1666
|
{ "key": "request_id", "type": "string", "required": true, "description": "Unique identifier for this request" },
|
|
1625
1667
|
{ "key": "timestamp", "type": "number", "required": true, "description": "Unix timestamp in milliseconds" },
|
|
1668
|
+
{ "key": "path", "type": "string", "required": false, "description": "File path being read. Use for path-based access control policies (e.g., block .env files, system directories, credential directories)" },
|
|
1626
1669
|
{ "key": "contains_secrets", "type": "boolean", "required": false, "description": "Whether secrets were detected in the file content being read" },
|
|
1627
1670
|
{ "key": "secret_count", "type": "number", "required": false, "description": "Total number of secret matches found in file content. Multiple matches may indicate data dumps or accidental credential exposure" },
|
|
1628
1671
|
{ "key": "secret_types", "type": "array", "required": false, "description": "Array of secret types found in file content" },
|
|
@@ -1657,6 +1700,9 @@ export const GUARDRAILS_CONTEXT = {
|
|
|
1657
1700
|
"context_attributes": [
|
|
1658
1701
|
{ "key": "request_id", "type": "string", "required": true, "description": "Unique identifier for this request" },
|
|
1659
1702
|
{ "key": "timestamp", "type": "number", "required": true, "description": "Unix timestamp in milliseconds" },
|
|
1703
|
+
{ "key": "path", "type": "string", "required": false, "description": "File path being written. Use for path-based blocking policies (e.g., block writes to .env files, credential directories)" },
|
|
1704
|
+
{ "key": "contains_invisible_chars", "type": "boolean", "required": false, "description": "Whether invisible Unicode characters (zero-width, bidi overrides, tag characters) were detected in the content being written. Prevents persistence of invisible payloads in source code" },
|
|
1705
|
+
{ "key": "invisible_chars_score", "type": "number", "required": false, "description": "Invisible character attack severity score (0-100) in write content" },
|
|
1660
1706
|
{ "key": "contains_secrets", "type": "boolean", "required": false, "description": "Whether secrets were detected in the content being written. Block writes containing credentials" },
|
|
1661
1707
|
{ "key": "secret_count", "type": "number", "required": false, "description": "Total number of secret matches found in write content. Multiple matches may indicate data dumps or accidental credential exposure" },
|
|
1662
1708
|
{ "key": "secret_types", "type": "array", "required": false, "description": "Array of secret types found in write content" },
|