npm - agent-security-scanner-mcp - Versions diffs - 3.18.0 → 3.19.0 - Mend

agent-security-scanner-mcp 3.18.0 → 3.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/analyzer.py +23 -2
package/cross_file_analyzer.py +478 -5
package/package.json +3 -2
package/python_taint_fallback.py +688 -0
package/rules/__init__.py +42 -3
package/rules/prompt-injection.security.yaml +4 -4
package/src/fix-patterns.js +9 -9
package/src/history.js +1 -1
package/src/tools/check-package.js +15 -0
package/src/tools/scan-prompt.js +44 -31
package/src/tools/scan-skill.js +42 -22

package/rules/__init__.py CHANGED Viewed

@@ -201,16 +201,55 @@ def get_rules():
     return rules
-def get_rules_for_language(language):
-    """Get rules applicable to a specific language"""
+def get_rules_for_language(language, file_path=None):
+    """Get rules applicable to a specific language.
+    Generic rules that declare a specific technology in their metadata are
+    only applied when the scanned language or file path indicates that
+    technology is relevant.  This prevents, e.g., Hugo-specific rules from
+    firing on plain JavaScript database code.
+    """
     all_rules = get_rules()
     applicable_rules = {}
     language = language.lower()
+    # Map technology names to the languages/file-path hints where they apply
+    _TECH_LANGUAGES = {
+        'hugo': {'go', 'html', 'toml', 'yaml'},
+        'django': {'python', 'html'},
+        'rails': {'ruby', 'html', 'erb'},
+        'spring': {'java', 'kotlin'},
+        'laravel': {'php'},
+        'angular': {'typescript', 'javascript', 'html'},
+        'react': {'javascript', 'typescript', 'jsx', 'tsx'},
+    }
     for rule_id, rule in all_rules.items():
         rule_languages = [lang.lower() for lang in rule.get('languages', ['generic'])]
-        if language in rule_languages or 'generic' in rule_languages:
+        if language in rule_languages:
+            applicable_rules[rule_id] = rule
+            continue
+        if 'generic' in rule_languages:
+            # Check if this generic rule is scoped to a specific technology
+            techs = rule.get('metadata', {}).get('technology')
+            if techs and isinstance(techs, list):
+                # Only apply if the current language is relevant to the technology
+                tech_relevant = False
+                for tech in techs:
+                    tech_lower = tech.lower()
+                    allowed = _TECH_LANGUAGES.get(tech_lower)
+                    if allowed and language in allowed:
+                        tech_relevant = True
+                        break
+                    # Also check if the technology name appears in the file path
+                    if file_path and tech_lower in file_path.lower():
+                        tech_relevant = True
+                        break
+                if not tech_relevant:
+                    continue
             applicable_rules[rule_id] = rule
     return applicable_rules

package/rules/prompt-injection.security.yaml CHANGED Viewed

@@ -672,8 +672,8 @@ rules:
     severity: WARNING
     message: "Potential Base64-encoded prompt injection payload. Encoded content may hide malicious instructions."
     patterns:
-      - "(?i)decode\\s+(this\\s+)?base64\\s*:\\s*[A-Za-z0-9+/=]{20,}"
-      - "(?i)base64\\s*:\\s*[A-Za-z0-9+/=]{40,}"
+      - "(?i)decode\\s+(this\\s+)?base64\\s*:\\s*[A-Za-z0-9+/=]{20,200}"
+      - "(?i)base64\\s*:\\s*[A-Za-z0-9+/=]{40,200}"
       - "aWdub3JlIHByZXZpb3Vz"
       - "c3lzdGVtIHByb21wdA=="
       - "(?i)execute\\s+(this\\s+)?encoded"
@@ -682,8 +682,8 @@ rules:
       - "aWdub3JlIGFsbC"
       - "b3ZlcnJpZGU="
       - "(?i)base64.{0,20}instructions?.{0,20}follow"
-      - "[A-Za-z0-9+/]{40,}={0,2}\\s*.{0,20}(?i)(decode|execute|follow|run)"
-      - "(?i)(decode|run|execute)\\s+.{0,20}[A-Za-z0-9+/]{40,}={0,2}"
+      - "[A-Za-z0-9+/]{40,200}={0,2}[^A-Za-z0-9+/=].{0,20}(?:decode|execute|follow|run)"
+      - "(?i)(decode|run|execute)\\s+.{0,20}[A-Za-z0-9+/]{40,200}={0,2}"
     metadata:
       cwe: "CWE-77"
       owasp: "LLM01 - Prompt Injection"

package/src/fix-patterns.js CHANGED Viewed

@@ -20,7 +20,7 @@ export const FIX_TEMPLATES = {
   // ===========================================
   "sql-injection": {
     description: "Use parameterized queries instead of string concatenation",
-    fix: (line) => line.replace(/["']([^"']*)\s*["']\s*\+\s*(\w+)/, '"$1?", [$2]')
+    fix: (line) => '// TODO: manual fix required — use parameterized queries instead of string concatenation\n// ' + line.trim()
   },
   "nosql-injection": {
     description: "Sanitize MongoDB query inputs",
@@ -28,7 +28,7 @@ export const FIX_TEMPLATES = {
   },
   "raw-query": {
     description: "Use parameterized queries instead of raw SQL",
-    fix: (line) => line.replace(/\.query\s*\(\s*["'`]/, '.query("SELECT * FROM table WHERE id = ?", [')
+    fix: (line) => '// TODO: manual fix required — use parameterized queries instead of raw SQL\n// ' + line.trim()
   },
   // ===========================================
@@ -306,10 +306,10 @@ export const FIX_TEMPLATES = {
   "path-traversal": {
     description: "Resolve real path and validate prefix to prevent traversal",
     fix: (line, lang) => {
-      if (lang === 'python') return line.replace(/open\s*\(\s*(\w+)/, 'open(os.path.realpath($1)  # TODO: validate path prefix');
-      if (lang === 'go') return line.replace(/os\.Open\s*\(\s*(\w+)/, 'os.Open(filepath.Clean($1)  // TODO: validate path prefix');
-      if (lang === 'java') return line.replace(/new File\s*\(\s*(\w+)/, 'new File($1).getCanonicalFile(  // TODO: validate path prefix');
-      return line.replace(/readFileSync\s*\(\s*(\w+)/, 'readFileSync(path.resolve($1)  // TODO: validate path prefix');
+      if (lang === 'python') return '# TODO: manual fix required — use os.path.realpath() and validate the prefix\n# ' + line.trim();
+      if (lang === 'go') return '// TODO: manual fix required — use filepath.Clean() and validate the prefix\n// ' + line.trim();
+      if (lang === 'java') return '// TODO: manual fix required — use getCanonicalFile() and validate the prefix\n// ' + line.trim();
+      return '// TODO: manual fix required — use path.resolve() and validate the prefix\n// ' + line.trim();
     }
   },
@@ -418,7 +418,7 @@ export const FIX_TEMPLATES = {
   // ===========================================
   "xpath-injection": {
     description: "Use parameterized XPath queries",
-    fix: (line) => line.replace(/xpath\s*\(\s*["']([^"']*)\s*["']\s*\+\s*(\w+)/, 'xpath("$1?", [$2]')
+    fix: (line) => '// TODO: manual fix required — use parameterized XPath queries instead of concatenation\n// ' + line.trim()
   },
   // ===========================================
@@ -695,9 +695,9 @@ export const FIX_TEMPLATES = {
     description: "CRITICAL: Never eval() LLM responses - use JSON parsing or ast.literal_eval for safe subset",
     fix: (line, lang) => {
       if (lang === 'python') {
-        return line.replace(/eval\s*\(\s*(\w+)/, 'ast.literal_eval($1  # SECURITY: Use safe parsing only');
+        return line.replace(/eval\s*\(\s*(\w+)\s*\)/, 'ast.literal_eval($1)  # SECURITY: Use safe parsing only');
       }
-      return line.replace(/eval\s*\(\s*(\w+)/, 'JSON.parse($1  /* SECURITY: Use safe JSON parsing */');
+      return line.replace(/eval\s*\(\s*(\w+)\s*\)/, 'JSON.parse($1)  /* SECURITY: Use safe JSON parsing */');
     }
   },
   "exec-llm-response": {

package/src/history.js CHANGED Viewed

@@ -49,7 +49,7 @@ export function saveResult(dirPath, scanResult) {
   };
   writeFileSync(filePath, JSON.stringify(historyEntry, null, 2) + '\n');
-  return filePath;
+  return filePath.replace(/\\/g, '/');
 }
 /**

package/src/tools/check-package.js CHANGED Viewed

@@ -32,6 +32,17 @@ const BLOOM_FILTERS = {
   rubygems: null
 };
+// Flutter/Dart SDK packages are legitimate dependencies even though they do
+// not appear in the pub.dev package dump used for the text-based lookup.
+const DART_SDK_PACKAGES = new Set([
+  'flutter',
+  'flutter_test',
+  'flutter_driver',
+  'flutter_localizations',
+  'flutter_web_plugins',
+  'integration_test',
+]);
 // Load package lists on startup
 export function loadPackageLists() {
   const packagesDir = join(__dirname, '..', '..', 'packages');
@@ -67,6 +78,10 @@ export function loadPackageLists() {
 // Check if a package is hallucinated
 export function isHallucinated(packageName, ecosystem) {
+  if (ecosystem === 'dart' && DART_SDK_PACKAGES.has(packageName)) {
+    return { hallucinated: false, sdkPackage: true };
+  }
   const legitPackages = LEGITIMATE_PACKAGES[ecosystem];
   // First check Set-based lookup (exact match)

package/src/tools/scan-prompt.js CHANGED Viewed

@@ -58,11 +58,41 @@ const CONFIDENCE_MULTIPLIERS = {
 // Maximum prompt size to prevent DoS via large inputs (100KB)
 const MAX_PROMPT_SIZE = 100 * 1024;
+// Maximum text length fed to any single regex to prevent ReDoS.
+// Prompt-injection patterns look for short markers/phrases, so scanning
+// overlapping 2 KB windows covers all realistic payloads while keeping
+// worst-case regex time bounded.
+const REGEX_SCAN_WINDOW = 2048;
+const REGEX_SCAN_OVERLAP = 256;
+/**
+ * Match a regex against text safely — splits long text into overlapping
+ * windows so no single regex call processes more than REGEX_SCAN_WINDOW chars.
+ */
+function safeMatch(text, regex) {
+  if (text.length <= REGEX_SCAN_WINDOW) {
+    return text.match(regex);
+  }
+  for (let offset = 0; offset < text.length; offset += REGEX_SCAN_WINDOW - REGEX_SCAN_OVERLAP) {
+    const chunk = text.slice(offset, offset + REGEX_SCAN_WINDOW);
+    const m = chunk.match(regex);
+    if (m) return m;
+  }
+  return null;
+}
 // Rule caches — loaded once per process, not on every call
 let _agentAttackRulesCache = null;
 let _promptInjectionRulesCache = null;
 let _openClawRulesCache = null;
+function normalizeYamlRegexPattern(pattern) {
+  return pattern
+    .replace(/^["']|["']$/g, '')
+    .replace(/\(\?i\)/g, '')
+    .replace(/\\\\/g, '\\');
+}
 // Load agent attack rules from YAML
 function loadAgentAttackRules() {
   if (_agentAttackRulesCache !== null) return _agentAttackRulesCache;
@@ -108,11 +138,7 @@ function loadAgentAttackRules() {
           inMetadata = true;
         } else if (inPatterns && line.match(/^\s+- /)) {
           let pattern = line.replace(/^\s+- /, '').trim();
-          pattern = pattern.replace(/^["']|["']$/g, '');
-          // Strip Python-style inline flags - JS doesn't support them
-          pattern = pattern.replace(/^\(\?i\)/, '');
-          // Unescape double backslashes from YAML (\\s -> \s)
-          pattern = pattern.replace(/\\\\/g, '\\');
+          pattern = normalizeYamlRegexPattern(pattern);
           if (pattern) rule.patterns.push(pattern);
         } else if (inMetadata && line.match(/^\s+\w+:/)) {
           const match = line.match(/^\s+(\w+):\s*["']?([^"'\n]+)["']?/);
@@ -182,11 +208,7 @@ function loadPromptInjectionRules() {
           inMetadata = true;
         } else if (inPatterns && line.match(/^\s+- /)) {
           let pattern = line.replace(/^\s+- /, '').trim();
-          pattern = pattern.replace(/^["']|["']$/g, '');
-          // Strip Python-style inline flags - JS doesn't support them
-          pattern = pattern.replace(/^\(\?i\)/, '');
-          // Unescape double backslashes from YAML (\\s -> \s)
-          pattern = pattern.replace(/\\\\/g, '\\');
+          pattern = normalizeYamlRegexPattern(pattern);
           if (pattern) rule.patterns.push(pattern);
         } else if (inMetadata && line.match(/^\s+\w+:/)) {
           const match = line.match(/^\s+(\w+):\s*["']?([^"'\n]+)["']?/);
@@ -253,8 +275,7 @@ function loadOpenClawRules() {
           inPatterns = true;
         } else if (inPatterns && line.match(/^\s+- /)) {
           let pattern = line.replace(/^\s+- /, '').trim();
-          pattern = pattern.replace(/^["']|["']$/g, '');
-          pattern = pattern.replace(/\\\\/g, '\\');
+          pattern = normalizeYamlRegexPattern(pattern);
           if (pattern) rule.patterns.push(pattern);
         } else if (line.match(/^\s+\w+:/) && !line.match(/^\s+- /)) {
           inPatterns = false;
@@ -579,22 +600,12 @@ export async function scanAgentPrompt({ prompt_text, context, verbosity }) {
     }
   }
-  // Scan expanded text against all rules
-  // Security: Add timeout protection for regex matching
-  const REGEX_TIMEOUT_MS = 1000;
+  // Scan expanded text against all rules using windowed matching to prevent ReDoS
   for (const rule of allRules) {
     for (const pattern of rule.patterns) {
       try {
-        const regex = new RegExp(pattern, 'i');
-        const startTime = Date.now();
-        const match = expandedText.match(regex);
-        // Check for regex timeout (ReDoS protection)
-        if (Date.now() - startTime > REGEX_TIMEOUT_MS) {
-          console.warn(`Regex timeout for rule ${rule.id}, skipping`);
-          break;
-        }
+        const regex = new RegExp(normalizeYamlRegexPattern(pattern), 'i');
+        const match = safeMatch(expandedText, regex);
         if (match) {
           findings.push({
@@ -617,7 +628,9 @@ export async function scanAgentPrompt({ prompt_text, context, verbosity }) {
   }
   // 2.8: Runtime base64 decode-and-rescan
-  const base64Regex = /[A-Za-z0-9+/]{40,}={0,2}/g;
+  // Cap base64 match length to avoid matching entire large inputs as one blob.
+  // Real base64 payloads are at most a few KB; 4096 chars ≈ 3KB decoded.
+  const base64Regex = /[A-Za-z0-9+/]{40,4096}={0,2}/g;
   const b64Matches = expandedText.match(base64Regex);
   if (b64Matches) {
     for (const b64str of b64Matches) {
@@ -631,8 +644,8 @@ export async function scanAgentPrompt({ prompt_text, context, verbosity }) {
             if (!rule.id.startsWith('generic.prompt')) continue;
             for (const pattern of rule.patterns) {
               try {
-                const regex = new RegExp(pattern, 'i');
-                const match = decoded.match(regex);
+                const regex = new RegExp(normalizeYamlRegexPattern(pattern), 'i');
+                const match = safeMatch(decoded, regex);
                 if (match) {
                   findings.push({
                     rule_id: rule.id + '.base64-decoded',
@@ -674,8 +687,8 @@ export async function scanAgentPrompt({ prompt_text, context, verbosity }) {
                   for (const rule of allRules) {
                     for (const pattern of rule.patterns) {
                       try {
-                        const regex = new RegExp(pattern, 'i');
-                        const match = innerDecoded.match(regex);
+                        const regex = new RegExp(normalizeYamlRegexPattern(pattern), 'i');
+                        const match = safeMatch(innerDecoded, regex);
                         if (match) {
                           findings.push({
                             rule_id: rule.id + '.nested-base64-decoded',
@@ -718,7 +731,7 @@ export async function scanAgentPrompt({ prompt_text, context, verbosity }) {
       for (const rule of allRules) {
         for (const pattern of rule.patterns) {
           try {
-            const regex = new RegExp(pattern, 'i');
+            const regex = new RegExp(normalizeYamlRegexPattern(pattern), 'i');
             if (regex.test(prevMsg)) {
               prevTotalScore += parseInt(rule.metadata?.risk_score || '50') / 100;
               msgHasMatch = true;

package/src/tools/scan-skill.js CHANGED Viewed

@@ -126,6 +126,12 @@ function normPath(p) { return IS_WIN ? p.toLowerCase() : p; }
 function pathStartsWith(child, parent) {
   return normPath(child) === normPath(parent) || normPath(child).startsWith(normPath(parent) + sep);
 }
+function normalizeRulePattern(pattern) {
+  return pattern
+    .replace(/^["']|["']$/g, '')
+    .replace(/\(\?i\)/g, '')
+    .replace(/\\\\/g, '\\');
+}
 const MAX_CLAWHAVOC_SCAN_LEN = 2 * 1024 * 1024; // 2 MB cap for regex matching
 // ---------------------------------------------------------------------------
@@ -176,9 +182,7 @@ function loadClawHavocRules() {
           inMetadata = true;
         } else if (inPatterns && line.match(/^\s+- /)) {
           let pattern = line.replace(/^\s+- /, '').trim();
-          pattern = pattern.replace(/^["']|["']$/g, '');
-          pattern = pattern.replace(/^\(\?i\)/, '');
-          pattern = pattern.replace(/\\\\/g, '\\');
+          pattern = normalizeRulePattern(pattern);
           if (pattern) rule.patterns.push(pattern);
         } else if (inMetadata && line.match(/^\s+\w+:/)) {
           const match = line.match(/^\s+(\w+):\s*["']?([^"'\n]+)["']?/);
@@ -892,15 +896,44 @@ function generateRecommendation(grade) {
 // ---------------------------------------------------------------------------
 export async function scanSkill({ skill_path, verbosity, baseline }) {
-  // Security: Resolve to canonical path FIRST to prevent TOCTOU and symlink attacks
+  const canonCwd = realpathSync(process.cwd());
+  const configuredSkillRoots = [
+    resolve(homedir(), '.openclaw', 'skills'),
+    resolve(homedir(), '.openclaw', 'workspace', 'skills'),
+  ];
+  const allowedSkillRoots = configuredSkillRoots.map(root => {
+    try {
+      return existsSync(root) ? realpathSync(root) : null;
+    } catch {
+      return null;
+    }
+  }).filter(Boolean);
+  // Reject obvious escapes before touching the filesystem so absolute traversal
+  // attempts fail closed even when the target path does not exist.
   const inputPath = skill_path;
-  let realPath;
+  const requestedPath = resolve(inputPath);
+  const isRequestedAllowed = pathStartsWith(requestedPath, canonCwd)
+    || configuredSkillRoots.some(root => pathStartsWith(requestedPath, root))
+    || allowedSkillRoots.some(root => pathStartsWith(requestedPath, root));
+  if (!isRequestedAllowed) {
+    return {
+      content: [{ type: "text", text: JSON.stringify({
+        error: "skill_path must be within the current working directory or ~/.openclaw/skills/ (or ~/.openclaw/workspace/skills/)",
+        skill_path: requestedPath,
+        attempted_path: inputPath
+      }) }]
+    };
+  }
+  // Resolve to canonical path after the initial boundary check to prevent
+  // symlink escapes while still returning a deterministic security error for
+  // out-of-scope absolute paths.
+  let realPath;
   try {
-    // Resolve to canonical path immediately (defeats symlink attacks)
-    realPath = realpathSync(resolve(inputPath));
+    realPath = realpathSync(requestedPath);
   } catch (err) {
-    // Check for different error types
     let errorMessage;
     if (err.code === 'ENOENT') {
       errorMessage = "Path not found";
@@ -921,20 +954,7 @@ export async function scanSkill({ skill_path, verbosity, baseline }) {
     };
   }
-  // Verify containment on canonical path ONLY
-  // This prevents symlink escapes by checking the REAL resolved location
-  const canonCwd = realpathSync(process.cwd());
-  const allowedSkillRoots = [
-    resolve(homedir(), '.openclaw', 'skills'),
-    resolve(homedir(), '.openclaw', 'workspace', 'skills'),
-  ].map(root => {
-    try {
-      return existsSync(root) ? realpathSync(root) : null;
-    } catch {
-      return null;
-    }
-  }).filter(Boolean);
+  // Verify containment on canonical path ONLY.
   const isAllowed = pathStartsWith(realPath, canonCwd)
     || allowedSkillRoots.some(root => pathStartsWith(realPath, root));