npm - clawguard-openclaw - Versions diffs - 1.0.0 - Mend

clawguard-openclaw 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/src/guards.ts ADDED Viewed

@@ -0,0 +1,456 @@
+/**
+ * ClawGuard Guards
+ * Input, Runtime, and Output guards for the Lethal Trifecta
+ *
+ * SOTA features:
+ * - Adversarial suffix detection (GCG attacks)
+ * - Multi-turn context tracking
+ * - Source-aware thresholds
+ * - Spotlighting support
+ */
+import {
+  INJECTION_PATTERNS,
+  I18N_PATTERNS,
+  CREDENTIAL_PATTERNS,
+  PII_PATTERNS,
+  DANGEROUS_TOOL_PARAMS,
+} from "./patterns.js";
+import {
+  analyzeAdversarialPatterns,
+  type AdversarialAnalysis,
+  type MessageSource,
+  SOURCE_THRESHOLDS,
+  SOURCE_MULTIPLIERS,
+  ContextTracker,
+  applySpotlight,
+  type SpotlightConfig,
+} from "./analyzers.js";
+// =============================================================================
+// Types
+// =============================================================================
+export type ThreatLevel = "none" | "low" | "medium" | "high" | "critical";
+export type ThreatCategory =
+  | "injection"
+  | "credential_leak"
+  | "pii_leak"
+  | "exfiltration"
+  | "dangerous_tool"
+  | "canary_leak";
+export interface Threat {
+  category: ThreatCategory;
+  level: ThreatLevel;
+  score: number;
+  description: string;
+  matched?: string;
+  redacted?: string;
+}
+export interface ScanResult {
+  safe: boolean;
+  score: number;
+  level: ThreatLevel;
+  threats: Threat[];
+  redactedText?: string;
+}
+export interface GuardConfig {
+  inputGuard?: {
+    enabled?: boolean;
+    threshold?: number;
+    blockOnDetection?: boolean;
+    // SOTA features
+    useAdversarialDetection?: boolean;
+    useMultiTurnTracking?: boolean;
+    spotlighting?: SpotlightConfig;
+  };
+  runtimeGuard?: {
+    enabled?: boolean;
+    dangerousTools?: string[];
+    blockExfilUrls?: boolean;
+    requireApproval?: boolean;
+  };
+  outputGuard?: {
+    enabled?: boolean;
+    redactCredentials?: boolean;
+    redactPII?: boolean;
+    canaryTokens?: string[];
+  };
+}
+// Global context tracker for multi-turn detection
+const globalContextTracker = new ContextTracker();
+// =============================================================================
+// Utility Functions
+// =============================================================================
+function scoreToLevel(score: number): ThreatLevel {
+  if (score >= 80) return "critical";
+  if (score >= 60) return "high";
+  if (score >= 40) return "medium";
+  if (score >= 20) return "low";
+  return "none";
+}
+function normalizeText(text: string): string {
+  // Strip zero-width characters
+  let normalized = text.replace(/[\u200B-\u200F\u2060-\u206F\uFEFF]/g, "");
+  // Normalize unicode
+  normalized = normalized.normalize("NFKC");
+  return normalized;
+}
+function decodeIfEncoded(text: string): string {
+  let decoded = text;
+  // Base64 detection and decode
+  const base64Regex = /^[A-Za-z0-9+/]+=*$/;
+  if (base64Regex.test(text.trim()) && text.length > 20) {
+    try {
+      decoded = atob(text.trim());
+    } catch {
+      // Not valid base64
+    }
+  }
+  // URL decode
+  if (text.includes("%")) {
+    try {
+      decoded = decodeURIComponent(text);
+    } catch {
+      // Invalid URL encoding
+    }
+  }
+  return decoded;
+}
+// =============================================================================
+// Input Guard (Leg 1: Prompt Injection)
+// =============================================================================
+export interface InputScanOptions {
+  threshold?: number;
+  source?: MessageSource;
+  sessionId?: string;
+  useAdversarialDetection?: boolean;
+  useMultiTurnTracking?: boolean;
+}
+export interface InputScanResult extends ScanResult {
+  adversarialAnalysis?: AdversarialAnalysis;
+  multiTurnRisk?: number;
+  sourceMultiplier?: number;
+  adjustedThreshold?: number;
+}
+export function scanInput(
+  text: string,
+  config: GuardConfig["inputGuard"] & InputScanOptions = {}
+): InputScanResult {
+  const {
+    threshold: baseThreshold = 50,
+    source = 'user',
+    sessionId,
+    useAdversarialDetection = true,
+    useMultiTurnTracking = true,
+  } = config;
+  const threats: Threat[] = [];
+  let totalScore = 0;
+  // Source-aware threshold adjustment
+  const sourceThreshold = SOURCE_THRESHOLDS[source] ?? baseThreshold;
+  const adjustedThreshold = Math.min(baseThreshold, sourceThreshold);
+  const sourceMultiplier = SOURCE_MULTIPLIERS[source] ?? 1.0;
+  // Normalize and decode
+  const normalized = normalizeText(text);
+  const decoded = decodeIfEncoded(normalized);
+  const textsToScan = [normalized];
+  if (decoded !== normalized) textsToScan.push(decoded);
+  for (const scanText of textsToScan) {
+    // Check main injection patterns
+    for (const { pattern, weight, category } of INJECTION_PATTERNS) {
+      const match = scanText.match(pattern);
+      if (match) {
+        threats.push({
+          category: "injection",
+          level: scoreToLevel(weight),
+          score: weight,
+          description: `Injection pattern detected (${category})`,
+          matched: match[0].slice(0, 100),
+        });
+        totalScore += weight;
+      }
+    }
+    // Check i18n patterns
+    for (const { pattern, weight, category, lang } of I18N_PATTERNS) {
+      const match = scanText.match(pattern);
+      if (match) {
+        threats.push({
+          category: "injection",
+          level: scoreToLevel(weight),
+          score: weight,
+          description: `Injection pattern detected (${category}, ${lang})`,
+          matched: match[0].slice(0, 100),
+        });
+        totalScore += weight;
+      }
+    }
+  }
+  // SOTA: Adversarial suffix detection (GCG attacks)
+  let adversarialAnalysis: AdversarialAnalysis | undefined;
+  if (useAdversarialDetection) {
+    adversarialAnalysis = analyzeAdversarialPatterns(text);
+    if (adversarialAnalysis.isAdversarial) {
+      threats.push({
+        category: "injection",
+        level: scoreToLevel(adversarialAnalysis.confidence),
+        score: adversarialAnalysis.confidence,
+        description: `Adversarial pattern detected: ${adversarialAnalysis.signals.join(', ')}`,
+        matched: adversarialAnalysis.suspiciousSegments[0]?.text,
+      });
+      totalScore += adversarialAnalysis.confidence * 0.5; // Weight adversarial at 50%
+    }
+  }
+  // SOTA: Multi-turn context tracking
+  let multiTurnRisk = 0;
+  if (useMultiTurnTracking && sessionId) {
+    const threatDescriptions = threats.map(t => t.description);
+    globalContextTracker.addMessage(sessionId, text, totalScore, threatDescriptions);
+    multiTurnRisk = globalContextTracker.getMultiTurnRiskBonus(sessionId);
+    if (multiTurnRisk > 10) {
+      threats.push({
+        category: "injection",
+        level: scoreToLevel(multiTurnRisk),
+        score: multiTurnRisk,
+        description: `Multi-turn attack pattern detected (cumulative risk)`,
+      });
+    }
+    totalScore += multiTurnRisk;
+  }
+  // Apply source multiplier
+  totalScore = Math.round(totalScore * sourceMultiplier);
+  // Cap at 100
+  totalScore = Math.min(totalScore, 100);
+  const level = scoreToLevel(totalScore);
+  const safe = totalScore < adjustedThreshold;
+  return {
+    safe,
+    score: totalScore,
+    level,
+    threats,
+    adversarialAnalysis,
+    multiTurnRisk,
+    sourceMultiplier,
+    adjustedThreshold,
+  };
+}
+// Legacy compatible wrapper
+export function scanInputSimple(text: string, threshold = 50): ScanResult {
+  const result = scanInput(text, { threshold });
+  return {
+    safe: result.safe,
+    score: result.score,
+    level: result.level,
+    threats: result.threats,
+  };
+}
+// =============================================================================
+// Runtime Guard (Leg 2: Tool Interception)
+// =============================================================================
+export interface ToolCallContext {
+  toolName: string;
+  params: Record<string, unknown>;
+}
+export interface RuntimeScanResult extends ScanResult {
+  shouldBlock: boolean;
+  requiresApproval: boolean;
+  reason?: string;
+}
+export function scanToolCall(
+  context: ToolCallContext,
+  config: GuardConfig["runtimeGuard"] = {}
+): RuntimeScanResult {
+  const {
+    dangerousTools = ["exec", "write", "edit"],
+    blockExfilUrls = true,
+  } = config;
+  const threats: Threat[] = [];
+  let totalScore = 0;
+  let shouldBlock = false;
+  let requiresApproval = false;
+  let reason: string | undefined;
+  const { toolName, params } = context;
+  const paramStr = JSON.stringify(params);
+  // Check if tool is in dangerous list
+  const isDangerousTool = dangerousTools.includes(toolName);
+  // Check tool-specific dangerous patterns
+  const toolPatterns = DANGEROUS_TOOL_PARAMS[toolName as keyof typeof DANGEROUS_TOOL_PARAMS];
+  if (toolPatterns) {
+    for (const pattern of toolPatterns) {
+      if (pattern.test(paramStr)) {
+        threats.push({
+          category: "dangerous_tool",
+          level: "high",
+          score: 60,
+          description: `Dangerous pattern in ${toolName} params`,
+          matched: paramStr.slice(0, 100),
+        });
+        totalScore += 60;
+        if (isDangerousTool) {
+          shouldBlock = true;
+          reason = `Dangerous command pattern detected in ${toolName}`;
+        }
+      }
+    }
+  }
+  // Check for exfiltration URLs
+  if (blockExfilUrls) {
+    const exfilPatterns = DANGEROUS_TOOL_PARAMS.web_fetch || [];
+    for (const pattern of exfilPatterns) {
+      if (pattern.test(paramStr)) {
+        threats.push({
+          category: "exfiltration",
+          level: "critical",
+          score: 80,
+          description: "Potential data exfiltration URL detected",
+          matched: paramStr.match(pattern)?.[0],
+        });
+        totalScore += 80;
+        shouldBlock = true;
+        reason = "Exfiltration URL detected";
+      }
+    }
+  }
+  // Flag dangerous tools for potential approval
+  if (isDangerousTool && threats.length > 0) {
+    requiresApproval = config.requireApproval ?? false;
+  }
+  totalScore = Math.min(totalScore, 100);
+  const level = scoreToLevel(totalScore);
+  const safe = totalScore < 50 && !shouldBlock;
+  return { safe, score: totalScore, level, threats, shouldBlock, requiresApproval, reason };
+}
+// =============================================================================
+// Output Guard (Leg 3: Leak Prevention)
+// =============================================================================
+export interface OutputScanResult extends ScanResult {
+  redactedText: string;
+  leaksFound: Array<{ type: string; count: number }>;
+}
+export function scanOutput(
+  text: string,
+  config: GuardConfig["outputGuard"] = {}
+): OutputScanResult {
+  const {
+    redactCredentials = true,
+    redactPII = true,
+    canaryTokens = [],
+  } = config;
+  const threats: Threat[] = [];
+  let redactedText = text;
+  const leaksFound: Array<{ type: string; count: number }> = [];
+  let totalScore = 0;
+  // Check for canary tokens first (highest priority)
+  for (const canary of canaryTokens) {
+    if (text.includes(canary)) {
+      threats.push({
+        category: "canary_leak",
+        level: "critical",
+        score: 100,
+        description: "Canary token detected in output - potential prompt leak",
+        matched: canary,
+      });
+      totalScore = 100;
+      redactedText = redactedText.replaceAll(canary, "[CANARY_REDACTED]");
+    }
+  }
+  // Scan for credentials
+  if (redactCredentials) {
+    for (const { name, pattern } of CREDENTIAL_PATTERNS) {
+      const matches = text.match(pattern);
+      if (matches) {
+        const uniqueMatches = [...new Set(matches)];
+        leaksFound.push({ type: name, count: uniqueMatches.length });
+        for (const match of uniqueMatches) {
+          threats.push({
+            category: "credential_leak",
+            level: "critical",
+            score: 90,
+            description: `Credential detected: ${name}`,
+            matched: match.slice(0, 20) + "...",
+            redacted: `[${name.toUpperCase()}_REDACTED]`,
+          });
+          // Redact the credential
+          redactedText = redactedText.replaceAll(match, `[${name.toUpperCase()}_REDACTED]`);
+        }
+        totalScore = Math.max(totalScore, 90);
+      }
+    }
+  }
+  // Scan for PII
+  if (redactPII) {
+    for (const { name, pattern } of PII_PATTERNS) {
+      const matches = text.match(pattern);
+      if (matches) {
+        const uniqueMatches = [...new Set(matches)];
+        leaksFound.push({ type: name, count: uniqueMatches.length });
+        for (const match of uniqueMatches) {
+          threats.push({
+            category: "pii_leak",
+            level: "high",
+            score: 60,
+            description: `PII detected: ${name}`,
+            matched: match.slice(0, 10) + "...",
+            redacted: `[${name.toUpperCase()}_REDACTED]`,
+          });
+          redactedText = redactedText.replaceAll(match, `[${name.toUpperCase()}_REDACTED]`);
+        }
+        totalScore = Math.max(totalScore, 60);
+      }
+    }
+  }
+  const level = scoreToLevel(totalScore);
+  const safe = totalScore < 50;
+  return { safe, score: totalScore, level, threats, redactedText, leaksFound };
+}