npm - @highflame/policy - Versions diffs - 2.0.10 → 2.1.1 - Mend

@highflame/policy 2.0.10 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

package/_schemas/guardrails/context.json +435 -0
package/_schemas/guardrails/schema.cedarschema +225 -0
package/_schemas/guardrails/templates/defaults/agentic_safety.cedar +94 -0
package/_schemas/guardrails/templates/defaults/baseline.cedar +24 -0
package/_schemas/guardrails/templates/defaults/injection.cedar +70 -0
package/_schemas/guardrails/templates/defaults/pii.cedar +48 -0
package/_schemas/guardrails/templates/defaults/secrets.cedar +40 -0
package/_schemas/guardrails/templates/defaults/semantic.cedar +59 -0
package/_schemas/guardrails/templates/defaults/tool_risk.cedar +58 -0
package/_schemas/guardrails/templates/defaults/toxicity.cedar +76 -0
package/_schemas/guardrails/templates/mcp_tool_permissions.cedar +84 -0
package/_schemas/guardrails/templates/profiles/chat_assistant/privacy.cedar +22 -0
package/_schemas/guardrails/templates/profiles/chat_assistant/security.cedar +35 -0
package/_schemas/guardrails/templates/profiles/chat_assistant/trust_safety.cedar +43 -0
package/_schemas/guardrails/templates/profiles/chat_assistant.cedar +85 -0
package/_schemas/guardrails/templates/profiles/code_agent/agentic_security.cedar +109 -0
package/_schemas/guardrails/templates/profiles/code_agent/security.cedar +22 -0
package/_schemas/guardrails/templates/profiles/code_agent.cedar +125 -0
package/_schemas/guardrails/templates/profiles/data_pipeline/agentic_security.cedar +38 -0
package/_schemas/guardrails/templates/profiles/data_pipeline/privacy.cedar +40 -0
package/_schemas/guardrails/templates/profiles/data_pipeline/security.cedar +49 -0
package/_schemas/guardrails/templates/profiles/data_pipeline.cedar +111 -0
package/_schemas/guardrails/templates/templates.json +213 -0
package/dist/builder.d.ts +61 -0
package/dist/builder.js +4 -2
package/dist/entities.gen.d.ts +4 -0
package/dist/entities.gen.js +4 -0
package/dist/explain.d.ts +178 -0
package/dist/explain.js +390 -0
package/dist/guardrails-context.gen.d.ts +49 -0
package/dist/guardrails-context.gen.js +50 -0
package/dist/guardrails-defaults.gen.d.ts +61 -0
package/dist/guardrails-defaults.gen.js +1278 -0
package/dist/guardrails-entities.gen.d.ts +11 -0
package/dist/guardrails-entities.gen.js +37 -0
package/dist/index.d.ts +6 -1
package/dist/index.js +6 -1
package/dist/overwatch-defaults.gen.js +98 -0
package/dist/parser.js +136 -4
package/dist/schema.gen.d.ts +1 -1
package/dist/schema.gen.js +6 -0
package/dist/service-schemas.gen.d.ts +10 -0
package/dist/service-schemas.gen.js +337 -1
package/dist/types.d.ts +6 -1
package/dist/types.js +6 -1
package/package.json +5 -1

package/_schemas/guardrails/context.json ADDED Viewed

@@ -0,0 +1,435 @@
+{
+  "service": "guardrails",
+  "version": "1.0.0",
+  "description": "Guardrails (Shield) content security & policy enforcement for LLM applications",
+  "actions": [
+    {
+      "name": "process_prompt",
+      "description": "Analyze user prompts and AI responses for security threats, PII, and content violations",
+      "context_attributes": [
+        {
+          "key": "request_id",
+          "type": "string",
+          "required": true,
+          "description": "Unique identifier for this request, useful for audit trails and debugging"
+        },
+        {
+          "key": "timestamp",
+          "type": "number",
+          "required": true,
+          "description": "Unix timestamp in milliseconds when the request was processed"
+        },
+        {
+          "key": "direction",
+          "type": "string",
+          "required": true,
+          "description": "Content flow direction: 'input' for user prompts, 'output' for AI responses. Use this to apply different policies to inputs vs outputs (e.g., block PII only in outputs)"
+        },
+        {
+          "key": "content_type",
+          "type": "string",
+          "required": true,
+          "description": "Type of content being analyzed: 'prompt', 'response', 'tool_call', or 'file'"
+        },
+        {
+          "key": "detector_count",
+          "type": "number",
+          "required": true,
+          "description": "Number of detectors that were executed for this request"
+        },
+        {
+          "key": "injection_score",
+          "type": "number",
+          "required": false,
+          "description": "ML-based confidence score for prompt injection attacks (0-100). Higher scores indicate higher confidence. Typical threshold: >85 for high-confidence blocks",
+          "range": "0-100"
+        },
+        {
+          "key": "jailbreak_score",
+          "type": "number",
+          "required": false,
+          "description": "ML-based confidence score for jailbreak attempts (0-100). Detects attempts to bypass safety guardrails. Typical threshold: >80 for blocks",
+          "range": "0-100"
+        },
+        {
+          "key": "injection_type",
+          "type": "string",
+          "required": false,
+          "description": "Type of injection detected: 'prompt', 'sql', 'command', or 'none'. Use this to apply different policies per injection type"
+        },
+        {
+          "key": "contains_secrets",
+          "type": "boolean",
+          "required": false,
+          "description": "Whether any API keys, tokens, passwords, or credentials were detected in the content. True indicates presence of secrets"
+        },
+        {
+          "key": "secret_count",
+          "type": "number",
+          "required": false,
+          "description": "Total number of secret matches found. Multiple matches may indicate data dumps or accidental credential exposure"
+        },
+        {
+          "key": "secret_types",
+          "type": "array",
+          "required": false,
+          "description": "Array of secret types found (e.g., ['aws_access_key', 'github_token']). Use set operations like .contains() to check for specific types"
+        },
+        {
+          "key": "pii_detected",
+          "type": "boolean",
+          "required": false,
+          "description": "Whether personally identifiable information (PII) was found in the content. Commonly used to block PII in outputs to prevent data leakage"
+        },
+        {
+          "key": "pii_count",
+          "type": "number",
+          "required": false,
+          "description": "Total number of PII matches found (emails, phone numbers, SSNs, etc.)"
+        },
+        {
+          "key": "pii_types",
+          "type": "array",
+          "required": false,
+          "description": "Array of PII types detected (e.g., ['email', 'phone', 'ssn', 'credit_card']). Use .contains() to block specific sensitive types"
+        },
+        {
+          "key": "violence_score",
+          "type": "number",
+          "required": false,
+          "description": "ML-based score for violent content references (0-100). Typical threshold: >90 for critical blocks, >60 for warnings",
+          "range": "0-100"
+        },
+        {
+          "key": "hate_speech_score",
+          "type": "number",
+          "required": false,
+          "description": "ML-based score for hate speech, discriminatory language, or targeted harassment (0-100). Typical threshold: >75 for blocks",
+          "range": "0-100"
+        },
+        {
+          "key": "sexual_score",
+          "type": "number",
+          "required": false,
+          "description": "ML-based score for sexual or adult content (0-100). Adjust thresholds based on your application's audience",
+          "range": "0-100"
+        },
+        {
+          "key": "weapons_score",
+          "type": "number",
+          "required": false,
+          "description": "ML-based score for weapons references or violent imagery (0-100)",
+          "range": "0-100"
+        },
+        {
+          "key": "crime_score",
+          "type": "number",
+          "required": false,
+          "description": "ML-based score for criminal activity discussions (0-100)",
+          "range": "0-100"
+        },
+        {
+          "key": "profanity_score",
+          "type": "number",
+          "required": false,
+          "description": "ML-based score for profanity and vulgar language (0-100)",
+          "range": "0-100"
+        },
+        {
+          "key": "content_topics",
+          "type": "array",
+          "required": false,
+          "description": "Semantic topics detected in content (e.g., ['controlled_substances', 'weapons_manufacturing']). Use .contains() to block specific topics per application"
+        },
+        {
+          "key": "topic_confidence",
+          "type": "number",
+          "required": false,
+          "description": "Confidence score from topic classifier (0-100). Use with content_topics to tune sensitivity — higher thresholds reduce false positives",
+          "range": "0-100"
+        },
+        {
+          "key": "contains_invisible_chars",
+          "type": "boolean",
+          "required": false,
+          "description": "Whether invisible Unicode characters (zero-width joiners, RTL marks, etc.) were detected in the content. Commonly used for prompt injection evasion"
+        },
+        {
+          "key": "invisible_chars_score",
+          "type": "number",
+          "required": false,
+          "description": "Density score for invisible characters in the content (0-100). Higher scores indicate more invisible characters, suggesting evasion attempts",
+          "range": "0-100"
+        }
+      ]
+    },
+    {
+      "name": "call_tool",
+      "description": "Execute agentic tool calls, including shell commands, file operations, and MCP tools",
+      "context_attributes": [
+        {
+          "key": "request_id",
+          "type": "string",
+          "required": true,
+          "description": "Unique identifier for this request"
+        },
+        {
+          "key": "timestamp",
+          "type": "number",
+          "required": true,
+          "description": "Unix timestamp in milliseconds"
+        },
+        {
+          "key": "tool_name",
+          "type": "string",
+          "required": false,
+          "description": "Name of the tool being called (e.g., 'shell', 'write_file', 'http_post'). Use this to block specific dangerous tools"
+        },
+        {
+          "key": "tool_risk_score",
+          "type": "number",
+          "required": false,
+          "description": "Computed risk score for this tool call (0-100). Considers tool sensitivity, argument patterns, and MCP verification status. Typical threshold: >85 for dangerous tools",
+          "range": "0-100"
+        },
+        {
+          "key": "tool_is_sensitive",
+          "type": "boolean",
+          "required": false,
+          "description": "Whether the tool is classified as sensitive (shell, file operations, network access, etc.)"
+        },
+        {
+          "key": "tool_category",
+          "type": "string",
+          "required": false,
+          "description": "Tool classification: 'safe', 'sensitive', or 'dangerous'. Based on tool type and argument patterns"
+        },
+        {
+          "key": "tool_is_builtin",
+          "type": "boolean",
+          "required": false,
+          "description": "Whether the tool is a built-in tool (vs MCP external tool). Built-in tools are generally more trusted"
+        },
+        {
+          "key": "mcp_server",
+          "type": "string",
+          "required": false,
+          "description": "Name of the MCP server providing this tool (e.g., 'github', 'filesystem', 'slack'). Empty for built-in tools. Use this to control which tools are allowed per MCP server"
+        },
+        {
+          "key": "mcp_tool",
+          "type": "string",
+          "required": false,
+          "description": "Name of the specific tool within the MCP server (e.g., 'read_issues', 'create_file'). Use with mcp_server for fine-grained per-tool permissioning"
+        },
+        {
+          "key": "mcp_server_verified",
+          "type": "boolean",
+          "required": false,
+          "description": "Whether the MCP server is from a verified/trusted registry. Unverified servers have elevated risk scores. Use this to block tool calls from unverified sources"
+        },
+        {
+          "key": "suspicious_pattern",
+          "type": "boolean",
+          "required": false,
+          "description": "Whether a suspicious action sequence was detected (e.g., read_file → http_post indicating data exfiltration). Requires session tracking"
+        },
+        {
+          "key": "pattern_type",
+          "type": "string",
+          "required": false,
+          "description": "Type of suspicious pattern: 'data_exfiltration', 'secret_exfiltration', 'db_exfiltration', or 'none'. Use this to apply different policies per threat type"
+        },
+        {
+          "key": "sequence_risk",
+          "type": "number",
+          "required": false,
+          "description": "Risk score from action sequence analysis (0-100). Analyzes history of tool calls to detect attack patterns. Typical threshold: >80 for blocks",
+          "range": "0-100"
+        },
+        {
+          "key": "loop_detected",
+          "type": "boolean",
+          "required": false,
+          "description": "Whether a tool call loop was detected (same tool called repeatedly). May indicate infinite loops or runaway agents. Requires session tracking"
+        },
+        {
+          "key": "loop_count",
+          "type": "number",
+          "required": false,
+          "description": "Number of consecutive repeated tool calls. Typical threshold: >3 for loop detection"
+        },
+        {
+          "key": "loop_tool",
+          "type": "string",
+          "required": false,
+          "description": "Name of the tool involved in the loop"
+        },
+        {
+          "key": "budget_remaining_pct",
+          "type": "number",
+          "required": false,
+          "description": "Remaining token budget as percentage (0-100). Use this to warn or block when budget is low. Requires session with token budget configuration",
+          "range": "0-100"
+        },
+        {
+          "key": "budget_exceeded",
+          "type": "boolean",
+          "required": false,
+          "description": "Whether the token budget has been exceeded. Use this to enforce cost controls on agentic sessions"
+        },
+        {
+          "key": "content_topics",
+          "type": "array",
+          "required": false,
+          "description": "Semantic topics detected in tool arguments or content (e.g., ['controlled_substances']). Use .contains() to restrict tool calls involving specific topics"
+        },
+        {
+          "key": "topic_confidence",
+          "type": "number",
+          "required": false,
+          "description": "Confidence score from topic classifier for tool content (0-100)",
+          "range": "0-100"
+        },
+        {
+          "key": "contains_secrets",
+          "type": "boolean",
+          "required": false,
+          "description": "Whether secrets were detected in tool arguments or content"
+        },
+        {
+          "key": "secret_types",
+          "type": "array",
+          "required": false,
+          "description": "Array of secret types found in tool arguments"
+        },
+        {
+          "key": "pii_detected",
+          "type": "boolean",
+          "required": false,
+          "description": "Whether PII was detected in tool arguments or content"
+        },
+        {
+          "key": "pii_types",
+          "type": "array",
+          "required": false,
+          "description": "Array of PII types found in tool arguments"
+        }
+      ]
+    },
+    {
+      "name": "read_file",
+      "description": "Read file operations for analyzing file content before allowing access",
+      "context_attributes": [
+        {
+          "key": "request_id",
+          "type": "string",
+          "required": true,
+          "description": "Unique identifier for this request"
+        },
+        {
+          "key": "timestamp",
+          "type": "number",
+          "required": true,
+          "description": "Unix timestamp in milliseconds"
+        },
+        {
+          "key": "contains_secrets",
+          "type": "boolean",
+          "required": false,
+          "description": "Whether secrets were detected in the file content being read"
+        },
+        {
+          "key": "secret_types",
+          "type": "array",
+          "required": false,
+          "description": "Array of secret types found in file content"
+        },
+        {
+          "key": "pii_detected",
+          "type": "boolean",
+          "required": false,
+          "description": "Whether PII was detected in the file content"
+        },
+        {
+          "key": "pii_types",
+          "type": "array",
+          "required": false,
+          "description": "Array of PII types found in file content"
+        }
+      ]
+    },
+    {
+      "name": "write_file",
+      "description": "Write file operations for preventing writes of sensitive content",
+      "context_attributes": [
+        {
+          "key": "request_id",
+          "type": "string",
+          "required": true,
+          "description": "Unique identifier for this request"
+        },
+        {
+          "key": "timestamp",
+          "type": "number",
+          "required": true,
+          "description": "Unix timestamp in milliseconds"
+        },
+        {
+          "key": "contains_secrets",
+          "type": "boolean",
+          "required": false,
+          "description": "Whether secrets were detected in the content being written. Block writes containing credentials"
+        },
+        {
+          "key": "secret_types",
+          "type": "array",
+          "required": false,
+          "description": "Array of secret types found in write content"
+        },
+        {
+          "key": "pii_detected",
+          "type": "boolean",
+          "required": false,
+          "description": "Whether PII was detected in the content being written"
+        },
+        {
+          "key": "pii_types",
+          "type": "array",
+          "required": false,
+          "description": "Array of PII types found in write content"
+        }
+      ]
+    },
+    {
+      "name": "connect_server",
+      "description": "Connect to an MCP server, used to control which MCP servers are allowed",
+      "context_attributes": [
+        {
+          "key": "request_id",
+          "type": "string",
+          "required": true,
+          "description": "Unique identifier for this request"
+        },
+        {
+          "key": "timestamp",
+          "type": "number",
+          "required": true,
+          "description": "Unix timestamp in milliseconds"
+        },
+        {
+          "key": "mcp_server",
+          "type": "string",
+          "required": false,
+          "description": "Name of the MCP server being connected to (e.g., 'github', 'filesystem', 'slack'). Use this to allow or block specific MCP servers"
+        },
+        {
+          "key": "mcp_server_verified",
+          "type": "boolean",
+          "required": false,
+          "description": "Whether the MCP server is from a verified/trusted registry. Use this to block connections to unverified servers"
+        }
+      ]
+    }
+  ]
+}

package/_schemas/guardrails/schema.cedarschema ADDED Viewed

@@ -0,0 +1,225 @@
+// =============================================================================
+// Guardrails Cedar Schema
+// =============================================================================
+// Defines entity types, actions, and context attributes for the highflame-shield
+// guardrails service. This schema enables type-safe policy authoring and
+// validation in both Studio UI and backend.
+//
+// Service: highflame-shield (guardrails)
+// Namespace: Guardrails
+// =============================================================================
+namespace Guardrails {
+    // =========================================================================
+    // Entity Types — ReBAC Hierarchy
+    // =========================================================================
+    // Entity hierarchy enables Cedar's `in` operator for policy scoping:
+    //   Account (org root)
+    //     └── Project in [Account]
+    //           └── App in [Project]
+    //                 └── Session in [App]
+    //
+    // Policy scoping examples:
+    //   resource == Guardrails::App::"<uuid>"              → app-scoped
+    //   resource in Guardrails::Project::"<uuid>"          → project-wide
+    //   resource in Guardrails::Account::"<uuid>"          → org-wide
+    // =========================================================================
+    /// Account represents an organization (top-level tenant)
+    entity Account;
+    /// Project represents a project within an account
+    entity Project in [Account];
+    /// User represents a principal (human or service) making requests
+    entity User;
+    /// Agent represents an AI agent (Claude, Cursor, Copilot, etc.) making requests
+    entity Agent;
+    /// App represents a protected application (guardrails-enabled LLM app)
+    entity App in [Project];
+    /// Session represents an agentic conversation session with state tracking
+    entity Session in [App];
+    // =========================================================================
+    // Actions
+    // =========================================================================
+    /// Process user prompts and AI responses for security threats and content violations
+    action "process_prompt" appliesTo {
+        principal: [User, Agent],
+        resource: [App, Session],
+        context: ProcessPromptContext
+    };
+    /// Execute tool calls (shell, file operations, MCP tools)
+    action "call_tool" appliesTo {
+        principal: [User, Agent],
+        resource: [Session],
+        context: CallToolContext
+    };
+    /// Read file operations
+    action "read_file" appliesTo {
+        principal: [User, Agent],
+        resource: [Session],
+        context: FileReadContext
+    };
+    /// Write file operations
+    action "write_file" appliesTo {
+        principal: [User, Agent],
+        resource: [Session],
+        context: FileWriteContext
+    };
+    /// Connect to an MCP server
+    action "connect_server" appliesTo {
+        principal: [User, Agent],
+        resource: [Session],
+        context: ConnectServerContext
+    };
+    // =========================================================================
+    // Context Types (Action-Specific)
+    // =========================================================================
+    /// Context for process_prompt action (user prompts & AI responses)
+    type ProcessPromptContext = {
+        // Core metadata (required)
+        "request_id": String,
+        "timestamp": Long,
+        "direction": String,        // "input" | "output"
+        "content_type": String,     // "prompt" | "response" | "tool_call" | "file"
+        "detector_count": Long,
+        // Security - Injection & Jailbreak (optional)
+        "injection_score"?: Long,        // 0-100
+        "jailbreak_score"?: Long,        // 0-100
+        "injection_type"?: String,       // "prompt" | "sql" | "command" | "none"
+        // Privacy - Secrets (optional)
+        "contains_secrets"?: Bool,
+        "secret_count"?: Long,
+        "secret_types"?: Set<String>,    // ["aws_access_key", "github_token", ...]
+        // Privacy - PII (optional)
+        "pii_detected"?: Bool,
+        "pii_count"?: Long,
+        "pii_types"?: Set<String>,       // ["email", "phone", "ssn", "credit_card", ...]
+        // Trust & Safety - Toxicity (optional)
+        "violence_score"?: Long,         // 0-100
+        "hate_speech_score"?: Long,      // 0-100
+        "sexual_score"?: Long,           // 0-100
+        "weapons_score"?: Long,          // 0-100
+        "crime_score"?: Long,            // 0-100
+        "profanity_score"?: Long,        // 0-100
+        // Semantic - Topic Classification (optional)
+        "content_topics"?: Set<String>,      // ["controlled_substances", "weapons_manufacturing", ...]
+        "topic_confidence"?: Long,           // 0-100
+        // Security - Invisible Character Detection (optional)
+        "contains_invisible_chars"?: Bool,
+        "invisible_chars_score"?: Long,      // 0-100
+        // Additional detectors (optional)
+        "hallucination_score"?: Long,
+        "sentiment_score"?: Long,
+        "contains_code"?: Bool,
+        "code_languages"?: Set<String>,
+        "keyword_matched"?: Bool,
+        "keyword_categories"?: Set<String>,
+        "detected_language"?: String,
+        "phishing_detected"?: Bool,
+    };
+    /// Context for call_tool action (agentic tool execution)
+    type CallToolContext = {
+        // Core metadata (required)
+        "request_id": String,
+        "timestamp": Long,
+        // Tool Risk (optional)
+        "tool_name"?: String,            // "shell", "write_file", "http_post", etc.
+        "tool_risk_score"?: Long,        // 0-100
+        "tool_is_sensitive"?: Bool,
+        "tool_category"?: String,        // "safe" | "sensitive" | "dangerous"
+        "tool_is_builtin"?: Bool,
+        // MCP context (optional — only present for MCP tool calls)
+        "mcp_server"?: String,           // MCP server name (e.g., "github", "filesystem")
+        "mcp_tool"?: String,             // MCP tool name within the server
+        "mcp_server_verified"?: Bool,    // Whether server is from verified registry
+        // Agentic - Behavioral Patterns (optional)
+        "suspicious_pattern"?: Bool,
+        "pattern_type"?: String,         // "data_exfiltration" | "secret_exfiltration" | "db_exfiltration" | "none"
+        "sequence_risk"?: Long,          // 0-100
+        // Agentic - Loop Detection (optional)
+        "loop_detected"?: Bool,
+        "loop_count"?: Long,
+        "loop_tool"?: String,
+        // Agentic - Budget Control (optional)
+        "budget_remaining_pct"?: Long,   // 0-100
+        "budget_exceeded"?: Bool,
+        // Semantic - Topic Classification (optional)
+        "content_topics"?: Set<String>,      // ["controlled_substances", "weapons_manufacturing", ...]
+        "topic_confidence"?: Long,           // 0-100
+        // Security checks on tool arguments (optional)
+        "contains_secrets"?: Bool,
+        "secret_types"?: Set<String>,
+        "pii_detected"?: Bool,
+        "pii_types"?: Set<String>,
+        "injection_score"?: Long,
+    };
+    /// Context for read_file action
+    type FileReadContext = {
+        // Core metadata (required)
+        "request_id": String,
+        "timestamp": Long,
+        // Security checks on file content (optional)
+        "contains_secrets"?: Bool,
+        "secret_types"?: Set<String>,
+        "pii_detected"?: Bool,
+        "pii_types"?: Set<String>,
+    };
+    /// Context for write_file action
+    type FileWriteContext = {
+        // Core metadata (required)
+        "request_id": String,
+        "timestamp": Long,
+        // Security checks on content being written (optional)
+        "contains_secrets"?: Bool,
+        "secret_types"?: Set<String>,
+        "pii_detected"?: Bool,
+        "pii_types"?: Set<String>,
+    };
+    /// Context for connect_server action (MCP server connections)
+    type ConnectServerContext = {
+        // Core metadata (required)
+        "request_id": String,
+        "timestamp": Long,
+        // MCP context (optional)
+        "mcp_server"?: String,           // MCP server name (e.g., "github", "filesystem")
+        "mcp_server_verified"?: Bool,    // Whether server is from verified registry
+    };
+}