npm - @openbmb/clawxrouter - Versions diffs - 1.0.4 - Mend

@openbmb/clawxrouter 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/config.example.json +204 -0
package/index.ts +398 -0
package/openclaw.plugin.json +97 -0
package/package.json +48 -0
package/prompts/detection-system.md +50 -0
package/prompts/token-saver-judge.md +25 -0
package/src/config-schema.ts +210 -0
package/src/dashboard-config-io.ts +25 -0
package/src/detector.ts +230 -0
package/src/guard-agent.ts +86 -0
package/src/hooks.ts +1428 -0
package/src/live-config.ts +75 -0
package/src/llm-desensitize-worker.ts +7 -0
package/src/llm-detect-worker.ts +7 -0
package/src/local-model.ts +723 -0
package/src/memory-isolation.ts +403 -0
package/src/privacy-proxy.ts +683 -0
package/src/prompt-loader.ts +101 -0
package/src/provider.ts +268 -0
package/src/router-pipeline.ts +380 -0
package/src/routers/configurable.ts +208 -0
package/src/routers/privacy.ts +102 -0
package/src/routers/token-saver.ts +273 -0
package/src/rules.ts +320 -0
package/src/session-manager.ts +377 -0
package/src/session-state.ts +471 -0
package/src/stats-dashboard.ts +3402 -0
package/src/sync-desensitize.ts +48 -0
package/src/sync-detect.ts +49 -0
package/src/token-stats.ts +358 -0
package/src/types.ts +269 -0
package/src/utils.ts +283 -0
package/src/worker-loader.mjs +25 -0

package/src/types.ts ADDED Viewed

@@ -0,0 +1,269 @@
+export type SensitivityLevel = "S1" | "S2" | "S3";
+export type SensitivityLevelNumeric = 1 | 2 | 3;
+export type DetectorType = "ruleDetector" | "localModelDetector";
+export type Checkpoint = "onUserMessage" | "onToolCallProposed" | "onToolCallExecuted";
+/**
+ * Edge provider API protocol type.
+ *   - "openai-compatible": POST /v1/chat/completions (Ollama, vLLM, LiteLLM, LocalAI, LMStudio, SGLang …)
+ *   - "ollama-native":     POST /api/chat (Ollama native API, supports streaming natively)
+ *   - "custom":            User-supplied module exporting a callChat function
+ */
+export type EdgeProviderType = "openai-compatible" | "ollama-native" | "custom";
+export type PrivacyConfig = {
+  enabled?: boolean;
+  /** S2 handling: "proxy" strips PII via local HTTP proxy (default), "local" routes to local model */
+  s2Policy?: "proxy" | "local";
+  /** Port for the privacy proxy server (default: 8403) */
+  proxyPort?: number;
+  checkpoints?: {
+    onUserMessage?: DetectorType[];
+    onToolCallProposed?: DetectorType[];
+    onToolCallExecuted?: DetectorType[];
+  };
+  rules?: {
+    keywords?: {
+      S2?: string[];
+      S3?: string[];
+    };
+    /** Regex patterns for matching sensitive content (strings are compiled to RegExp) */
+    patterns?: {
+      S2?: string[];
+      S3?: string[];
+    };
+    tools?: {
+      S2?: {
+        tools?: string[];
+        paths?: string[];
+      };
+      S3?: {
+        tools?: string[];
+        paths?: string[];
+      };
+    };
+  };
+  localModel?: {
+    enabled?: boolean;
+    /** API protocol type (default: "openai-compatible") */
+    type?: EdgeProviderType;
+    /** Provider name for OpenClaw routing (e.g. "ollama", "vllm", "lmstudio") */
+    provider?: string;
+    model?: string;
+    endpoint?: string;
+    apiKey?: string;
+    /** Path to custom provider module (type="custom" only). Must export callChat(). */
+    module?: string;
+  };
+  guardAgent?: {
+    id?: string;
+    workspace?: string;
+    /** Full model reference in "provider/model" format (e.g. "ollama/llama3.2:3b", "vllm/qwen2.5:7b") */
+    model?: string;
+  };
+  session?: {
+    isolateGuardHistory?: boolean;
+    /** Base directory for session histories (default: ~/.openclaw) */
+    baseDir?: string;
+    /**
+     * Inject full-track conversation history as context when routing to
+     * local models (S3 / S2-local). This replaces the sanitized placeholders
+     * ("🔒 [Private content]") with actual previous sensitive interactions
+     * so the local model has full conversational context.
+     * Default: true (when isolateGuardHistory is true)
+     */
+    injectDualHistory?: boolean;
+    /** Max number of messages to inject from dual-track history (default: 20) */
+    historyLimit?: number;
+  };
+  /**
+   * Additional provider names to treat as "local" (safe for S3 routing).
+   * Built-in local providers: ollama, llama.cpp, localai, llamafile, lmstudio, vllm, mlx, sglang, tgi.
+   * Add custom entries here if you run your own inference backend.
+   */
+  localProviders?: string[];
+  /**
+   * Tool names exempt from privacy pipeline detection and PII redaction.
+   * Default: empty (no tools are exempt). Users can opt-in via config.
+   */
+  toolAllowlist?: string[];
+  /**
+   * Per-model pricing for cloud API cost estimation (USD per 1M tokens).
+   * Keys are model name strings; lookup tries exact match, then substring match.
+   */
+  modelPricing?: Record<string, {
+    inputPer1M?: number;
+    outputPer1M?: number;
+  }>;
+  /**
+   * Toggle high-false-positive redaction rules individually.
+   * All default to false (off) to avoid over-redaction.
+   */
+  redaction?: RedactionOptions;
+};
+export type RedactionOptions = {
+  /** Internal IP addresses (10.x, 172.16-31.x, 192.168.x). Default: false */
+  internalIp?: boolean;
+  /** Email addresses. Default: false */
+  email?: boolean;
+  /** .env file content (KEY=VALUE lines). Default: false */
+  envVar?: boolean;
+  /** Credit card number pattern (13-19 digits). Default: false */
+  creditCard?: boolean;
+  /** Chinese mobile phone number (1[3-9]x 11 digits). Default: false */
+  chinesePhone?: boolean;
+  /** Chinese ID card number (18 digits / 17+X). Default: false */
+  chineseId?: boolean;
+  /** Chinese address patterns (省/市/区/路/号 etc.). Default: false */
+  chineseAddress?: boolean;
+  /** PIN / pin code contextual rule. Default: false */
+  pin?: boolean;
+};
+export type DetectionContext = {
+  checkpoint: Checkpoint;
+  message?: string;
+  toolName?: string;
+  toolParams?: Record<string, unknown>;
+  toolResult?: unknown;
+  sessionKey?: string;
+  agentId?: string;
+  recentContext?: string[];
+  /** When true, routers should skip the `enabled` check (dry-run from dashboard). */
+  dryRun?: boolean;
+};
+export type DetectionResult = {
+  level: SensitivityLevel;
+  levelNumeric: SensitivityLevelNumeric;
+  reason?: string;
+  detectorType: DetectorType;
+  confidence?: number;
+};
+// ── Router Pipeline Types ───────────────────────────────────────────────
+export type RouterAction = "passthrough" | "redirect" | "transform" | "block";
+export type RouterDecision = {
+  level: SensitivityLevel;
+  action?: RouterAction;
+  target?: {
+    provider: string;
+    model: string;
+    /** Set by pipeline merge when the winning provider (clawxrouter-privacy) differs
+     *  from the router that originally selected the model.
+     *  Used by hooks to stash the correct provider endpoint for the proxy. */
+    originalProvider?: string;
+  };
+  /** When action is "transform", the transformed prompt content */
+  transformedContent?: string;
+  reason?: string;
+  confidence?: number;
+  routerId?: string;
+};
+/**
+ * Interface for pluggable routers.
+ * The built-in "privacy" router wraps the existing detector + desensitization logic.
+ * Users can implement custom routers (cost optimization, content filtering, etc.)
+ * and register them in the pipeline config.
+ */
+export interface ClawXrouterRouter {
+  id: string;
+  detect(
+    context: DetectionContext,
+    config: Record<string, unknown>,
+  ): Promise<RouterDecision>;
+}
+export type RouterRegistration = {
+  enabled?: boolean;
+  /** "builtin" for privacy/rules, "custom" for user modules, "configurable" for dashboard-created */
+  type?: "builtin" | "custom" | "configurable";
+  /** Path to custom router module (type="custom" only) */
+  module?: string;
+  /** Arbitrary config passed to the router's detect() */
+  options?: Record<string, unknown>;
+  /**
+   * Merge weight (0–100, default 50). Higher weight wins when multiple routers
+   * produce non-passthrough decisions at the same sensitivity level.
+   * Safety routers (privacy) should use high weights; optimization routers
+   * (token-saver) should use lower weights so they only take effect when
+   * safety routers pass through.
+   */
+  weight?: number;
+};
+export type PipelineConfig = {
+  onUserMessage?: string[];
+  onToolCallProposed?: string[];
+  onToolCallExecuted?: string[];
+};
+// ── Session / History Types ─────────────────────────────────────────────
+export type SessionPrivacyState = {
+  sessionKey: string;
+  /** @deprecated Replaced by per-turn currentTurnLevel. Kept for backward compat. */
+  isPrivate: boolean;
+  highestLevel: SensitivityLevel;
+  /** Highest sensitivity level detected in the CURRENT turn (reset each turn). */
+  currentTurnLevel: SensitivityLevel;
+  detectionHistory: Array<{
+    timestamp: number;
+    level: SensitivityLevel;
+    checkpoint: Checkpoint;
+    reason?: string;
+    routerId?: string;
+    action?: string;
+    target?: string;
+    loopId?: string;
+  }>;
+};
+export type LoopMeta = {
+  loopId: string;
+  sessionKey: string;
+  userMessagePreview: string;
+  startedAt: number;
+  highestLevel: SensitivityLevel;
+  routingTier?: string;
+  routedModel?: string;
+  routerAction?: string;
+};
+export function levelToNumeric(level: SensitivityLevel): SensitivityLevelNumeric {
+  switch (level) {
+    case "S1":
+      return 1;
+    case "S2":
+      return 2;
+    case "S3":
+      return 3;
+  }
+}
+export function numericToLevel(numeric: SensitivityLevelNumeric): SensitivityLevel {
+  switch (numeric) {
+    case 1:
+      return "S1";
+    case 2:
+      return "S2";
+    case 3:
+      return "S3";
+    default:
+      return "S1";
+  }
+}
+export function maxLevel(...levels: SensitivityLevel[]): SensitivityLevel {
+  if (levels.length === 0) return "S1";
+  const numeric = levels.map(levelToNumeric);
+  const max = Math.max(...numeric) as SensitivityLevelNumeric;
+  return numericToLevel(max);
+}

package/src/utils.ts ADDED Viewed

@@ -0,0 +1,283 @@
+/**
+ * Normalize path for comparison (expand ~, resolve relative paths)
+ */
+export function normalizePath(path: string): string {
+  if (path.startsWith("~/")) {
+    const home = process.env.HOME || process.env.USERPROFILE || "~";
+    return path.replace("~", home);
+  }
+  return path;
+}
+/**
+ * Check if a path matches any of the patterns
+ */
+export function matchesPathPattern(path: string, patterns: string[]): boolean {
+  const normalizedPath = normalizePath(path);
+  for (const pattern of patterns) {
+    const normalizedPattern = normalizePath(pattern);
+    // Exact match
+    if (normalizedPath === normalizedPattern) {
+      return true;
+    }
+    // Prefix match (directory)
+    if (normalizedPath.startsWith(normalizedPattern + "/") ||
+        normalizedPath.startsWith(normalizedPattern + "\\")) {
+      return true;
+    }
+    // Suffix match (file extension)
+    if (pattern.startsWith("*") && normalizedPath.endsWith(pattern.slice(1))) {
+      return true;
+    }
+  }
+  return false;
+}
+/**
+ * Extract paths from tool parameters
+ */
+export function extractPathsFromParams(params: Record<string, unknown>): string[] {
+  const paths: string[] = [];
+  // Common path parameter names
+  const pathKeys = ["path", "file", "filepath", "filename", "dir", "directory", "target", "source"];
+  for (const key of pathKeys) {
+    const value = params[key];
+    if (typeof value === "string" && value.trim()) {
+      paths.push(value.trim());
+    }
+  }
+  // Extract filesystem paths embedded in command strings
+  const commandKeys = ["command", "cmd", "script"];
+  for (const key of commandKeys) {
+    const value = params[key];
+    if (typeof value === "string" && value.trim()) {
+      paths.push(...extractPathsFromCommand(value));
+    }
+  }
+  // Also check nested objects
+  for (const value of Object.values(params)) {
+    if (value && typeof value === "object" && !Array.isArray(value)) {
+      paths.push(...extractPathsFromParams(value as Record<string, unknown>));
+    }
+  }
+  return paths;
+}
+/**
+ * Extract filesystem paths from a shell command string.
+ * Matches absolute paths (/...) and home-relative paths (~/).
+ */
+function extractPathsFromCommand(command: string): string[] {
+  const pathRegex = /(?:\/[\w.\-]+(?:\/[\w.\-]*)*|~\/[\w.\-]+(?:\/[\w.\-]*)*)/g;
+  const matches = command.match(pathRegex);
+  return matches ?? [];
+}
+/**
+ * Sanitize sensitive information from text (comprehensive rule-based redaction).
+ * Used for S2 desensitization: redact known patterns then forward to cloud.
+ *
+ * Two-phase approach:
+ *   Phase 1 – Pattern-based: well-known formats (SSH keys, API keys, IPs, etc.)
+ *   Phase 2 – Context-based: keyword + connecting words + value
+ *             e.g. "password is in abc123" → "[REDACTED:PASSWORD]"
+ *
+ * Some rules are opt-in via `RedactionOptions` to avoid false positives.
+ */
+export function redactSensitiveInfo(text: string, opts?: import("./types.js").RedactionOptions): string {
+  let redacted = text;
+  // ── Phase 1: Pattern-based redaction (always on — low false-positive) ─────
+  // Redact SSH private key blocks
+  redacted = redacted.replace(
+    /-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----[\s\S]*?-----END (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----/g,
+    "[REDACTED:PRIVATE_KEY]"
+  );
+  // Redact API keys (sk-xxx, key-xxx patterns)
+  redacted = redacted.replace(/\b(?:sk|key|token)-[A-Za-z0-9]{16,}\b/g, "[REDACTED:KEY]");
+  // Redact AWS Access Key IDs
+  redacted = redacted.replace(/AKIA[0-9A-Z]{16}/g, "[REDACTED:AWS_KEY]");
+  // Redact database connection strings
+  redacted = redacted.replace(
+    /(?:mysql|postgres|postgresql|mongodb|redis|amqp):\/\/[^\s"']+/gi,
+    "[REDACTED:DB_CONNECTION]"
+  );
+  // ── Phase 1a: Opt-in pattern rules (off by default to avoid false positives) ──
+  if (opts?.internalIp) {
+    redacted = redacted.replace(
+      /\b(?:10|172\.(?:1[6-9]|2\d|3[01])|192\.168)\.\d{1,3}\.\d{1,3}\b/g,
+      "[REDACTED:INTERNAL_IP]"
+    );
+  }
+  if (opts?.email) {
+    redacted = redacted.replace(/[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}/g, "[REDACTED:EMAIL]");
+  }
+  if (opts?.envVar) {
+    redacted = redacted.replace(
+      /^(?:export\s+)?[A-Z_]{2,}=(?:["'])?[^\s"']+(?:["'])?$/gm,
+      "[REDACTED:ENV_VAR]"
+    );
+  }
+  if (opts?.creditCard) {
+    redacted = redacted.replace(
+      /\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{1,7}\b/g,
+      "[REDACTED:CARD_NUMBER]"
+    );
+  }
+  // ── Phase 1b: Chinese PII pattern-based redaction (opt-in) ─────────────────
+  if (opts?.chinesePhone) {
+    redacted = redacted.replace(/(?<!\d)1[3-9]\d{9}(?!\d)/g, "[REDACTED:PHONE]");
+  }
+  if (opts?.chineseId) {
+    redacted = redacted.replace(/(?<!\d)\d{17}[\dXx](?!\d)/g, "[REDACTED:ID]");
+  }
+  // Chinese delivery tracking numbers (keyword-gated, low false-positive — always on)
+  redacted = redacted.replace(
+    /(?:快递单号|运单号|取件码)[：:\s]*[A-Za-z0-9]{6,20}/g,
+    "[REDACTED:DELIVERY]"
+  );
+  // Door access codes following keywords (keyword-gated, low false-positive — always on)
+  redacted = redacted.replace(
+    /(?:门禁码|门禁密码|门锁密码|开门密码)[：:\s]*[A-Za-z0-9#*]{3,12}/g,
+    "[REDACTED:ACCESS_CODE]"
+  );
+  if (opts?.chineseAddress) {
+    redacted = redacted.replace(
+      /[\u4e00-\u9fa5]{2,}(?:省|市|区|县|镇|路|街|巷|弄|号|栋|幢|室|楼|单元|门牌)\d*[\u4e00-\u9fa5\d]*/g,
+      "[REDACTED:ADDRESS]"
+    );
+  }
+  // ── Phase 2: Context-based redaction ──────────────────────────────────────
+  // Match: <keyword> <connecting words> <actual value>
+  // This catches patterns like "password is abc123", "credit card number is in 12896489bf"
+  //
+  // Two CONNECT modes:
+  //   STRICT — requires a verb (is/are/was) or delimiter (=/:) before the value.
+  //            Used for broad keywords like "credit card" to avoid false positives.
+  //   LOOSE  — also accepts a plain space between keyword and value.
+  //            Used for credential keywords like "password" where the next word is very
+  //            likely the value.
+  const STRICT_CONNECT = "(?:\\s+(?:is|are|was|were)(?:\\s+(?:in|at|on|of|for))*|\\s*[=:])\\s*";
+  const LOOSE_CONNECT = "(?:\\s+(?:is|are|was|were)(?:\\s+(?:in|at|on|of|for))*\\s*|\\s*[=:]\\s*|\\s+)";
+  const contextualRules: Array<{ pattern: RegExp; label: string }> = [
+    {
+      pattern: new RegExp(`(?:password|passwd|pwd|passcode)${LOOSE_CONNECT}["']?([^\\s"']{2,})["']?`, "gi"),
+      label: "PASSWORD",
+    },
+    {
+      pattern: new RegExp(`(?:credit\\s*card|card\\s*(?:number|no\\.?))${STRICT_CONNECT}["']?([^\\s"']{2,})["']?`, "gi"),
+      label: "CARD",
+    },
+    {
+      pattern: new RegExp(`(?:api[_\\s]?key|access[_\\s]?key|SECRET_KEY|API_KEY)${LOOSE_CONNECT}["']?([^\\s"']{2,})["']?`, "gi"),
+      label: "API_KEY",
+    },
+    {
+      pattern: new RegExp(`(?:secret)${STRICT_CONNECT}["']?([^\\s"']{2,})["']?`, "gi"),
+      label: "SECRET",
+    },
+    {
+      pattern: new RegExp(`(?:(?:auth[_\\s]?)?token|bearer)${LOOSE_CONNECT}["']?([^\\s"']{2,})["']?`, "gi"),
+      label: "TOKEN",
+    },
+    {
+      pattern: new RegExp(`(?:credential|cred)s?${LOOSE_CONNECT}["']?([^\\s"']{2,})["']?`, "gi"),
+      label: "CREDENTIAL",
+    },
+    {
+      pattern: new RegExp(`(?:ssn|social\\s*security(?:\\s*(?:number|no\\.?))?)${STRICT_CONNECT}["']?([^\\s"']{2,})["']?`, "gi"),
+      label: "SSN",
+    },
+  ];
+  if (opts?.pin) {
+    contextualRules.push({
+      pattern: new RegExp(`(?:pin(?:\\s*(?:code|number))?)${STRICT_CONNECT}["']?([^\\s"']{2,})["']?`, "gi"),
+      label: "PIN",
+    });
+  }
+  for (const rule of contextualRules) {
+    redacted = redacted.replace(rule.pattern, `[REDACTED:${rule.label}]`);
+  }
+  return redacted;
+}
+/**
+ * Check if a path refers to protected memory/history directories that cloud models should not access.
+ */
+export function isProtectedMemoryPath(filePath: string, baseDir: string = "~/.openclaw"): boolean {
+  const normalizedFile = normalizePath(filePath);
+  const normalizedBase = normalizePath(baseDir);
+  const escapedBase = normalizedBase.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+  // Patterns that cloud models must NOT read
+  const protectedPaths = [
+    `${escapedBase}/agents/[^/]+/sessions/full`,
+    `${escapedBase}/[^/]+/MEMORY-FULL\\.md`,
+    `${escapedBase}/[^/]+/memory-full`,
+  ];
+  for (const regexStr of protectedPaths) {
+    const regex = new RegExp(`^${regexStr}`);
+    if (regex.test(normalizedFile)) {
+      return true;
+    }
+  }
+  // Also check for direct "full" history paths
+  if (
+    normalizedFile.includes("/sessions/full/") ||
+    normalizedFile.includes("/memory-full/") ||
+    normalizedFile.endsWith("/MEMORY-FULL.md")
+  ) {
+    return true;
+  }
+  return false;
+}
+/**
+ * Resolve the default base URL for a provider based on its name and API type.
+ */
+export function resolveDefaultBaseUrl(provider: string, api?: string): string {
+  const p = provider.toLowerCase();
+  const a = (api ?? "").toLowerCase();
+  if (p === "google" || p.includes("gemini") || p.includes("vertex") ||
+      a.includes("google") || a.includes("gemini")) {
+    return "https://generativelanguage.googleapis.com/v1beta";
+  }
+  if (p === "anthropic" || a === "anthropic-messages") {
+    return "https://api.anthropic.com";
+  }
+  return "https://api.openai.com/v1";
+}

package/src/worker-loader.mjs ADDED Viewed

@@ -0,0 +1,25 @@
+/**
+ * Minimal ESM resolve hook that maps .js imports to .ts files
+ * when the .js file does not exist on disk.
+ *
+ * Needed because Node.js v25 strips TS types natively but does NOT
+ * rewrite ".js" → ".ts" in import specifiers the way tsx/ts-node do.
+ * Worker threads spawned by synckit therefore fail to resolve
+ * co-located .ts modules imported via the conventional ".js" extension.
+ */
+import { register } from "node:module";
+const hooks = [
+  "export async function resolve(specifier, context, nextResolve) {",
+  "  if (specifier.endsWith('.js') && !specifier.startsWith('node:')) {",
+  "    try {",
+  "      return await nextResolve(specifier.replace(/\\.js$/, '.ts'), context);",
+  "    } catch {",
+  "      // .ts variant not found — fall through to original specifier",
+  "    }",
+  "  }",
+  "  return nextResolve(specifier, context);",
+  "}",
+].join("\n");
+register("data:text/javascript," + encodeURIComponent(hooks), import.meta.url);