npm - agent-security-scanner-mcp - Versions diffs - 3.17.2 → 3.19.0 - Mend

agent-security-scanner-mcp 3.17.2 → 3.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/analyzer.py +26 -2
package/cross_file_analyzer.py +478 -5
package/package.json +3 -2
package/python_taint_fallback.py +688 -0
package/rules/__init__.py +42 -3
package/rules/prompt-injection.security.yaml +4 -4
package/rules/semantic-security.yaml +679 -0
package/src/fix-patterns.js +9 -9
package/src/history.js +1 -1
package/src/tools/check-package.js +15 -0
package/src/tools/scan-prompt.js +44 -31
package/src/tools/scan-security.js +33 -4
package/src/tools/scan-skill.js +54 -22

package/src/fix-patterns.js CHANGED Viewed

@@ -20,7 +20,7 @@ export const FIX_TEMPLATES = {
   // ===========================================
   "sql-injection": {
     description: "Use parameterized queries instead of string concatenation",
-    fix: (line) => line.replace(/["']([^"']*)\s*["']\s*\+\s*(\w+)/, '"$1?", [$2]')
+    fix: (line) => '// TODO: manual fix required — use parameterized queries instead of string concatenation\n// ' + line.trim()
   },
   "nosql-injection": {
     description: "Sanitize MongoDB query inputs",
@@ -28,7 +28,7 @@ export const FIX_TEMPLATES = {
   },
   "raw-query": {
     description: "Use parameterized queries instead of raw SQL",
-    fix: (line) => line.replace(/\.query\s*\(\s*["'`]/, '.query("SELECT * FROM table WHERE id = ?", [')
+    fix: (line) => '// TODO: manual fix required — use parameterized queries instead of raw SQL\n// ' + line.trim()
   },
   // ===========================================
@@ -306,10 +306,10 @@ export const FIX_TEMPLATES = {
   "path-traversal": {
     description: "Resolve real path and validate prefix to prevent traversal",
     fix: (line, lang) => {
-      if (lang === 'python') return line.replace(/open\s*\(\s*(\w+)/, 'open(os.path.realpath($1)  # TODO: validate path prefix');
-      if (lang === 'go') return line.replace(/os\.Open\s*\(\s*(\w+)/, 'os.Open(filepath.Clean($1)  // TODO: validate path prefix');
-      if (lang === 'java') return line.replace(/new File\s*\(\s*(\w+)/, 'new File($1).getCanonicalFile(  // TODO: validate path prefix');
-      return line.replace(/readFileSync\s*\(\s*(\w+)/, 'readFileSync(path.resolve($1)  // TODO: validate path prefix');
+      if (lang === 'python') return '# TODO: manual fix required — use os.path.realpath() and validate the prefix\n# ' + line.trim();
+      if (lang === 'go') return '// TODO: manual fix required — use filepath.Clean() and validate the prefix\n// ' + line.trim();
+      if (lang === 'java') return '// TODO: manual fix required — use getCanonicalFile() and validate the prefix\n// ' + line.trim();
+      return '// TODO: manual fix required — use path.resolve() and validate the prefix\n// ' + line.trim();
     }
   },
@@ -418,7 +418,7 @@ export const FIX_TEMPLATES = {
   // ===========================================
   "xpath-injection": {
     description: "Use parameterized XPath queries",
-    fix: (line) => line.replace(/xpath\s*\(\s*["']([^"']*)\s*["']\s*\+\s*(\w+)/, 'xpath("$1?", [$2]')
+    fix: (line) => '// TODO: manual fix required — use parameterized XPath queries instead of concatenation\n// ' + line.trim()
   },
   // ===========================================
@@ -695,9 +695,9 @@ export const FIX_TEMPLATES = {
     description: "CRITICAL: Never eval() LLM responses - use JSON parsing or ast.literal_eval for safe subset",
     fix: (line, lang) => {
       if (lang === 'python') {
-        return line.replace(/eval\s*\(\s*(\w+)/, 'ast.literal_eval($1  # SECURITY: Use safe parsing only');
+        return line.replace(/eval\s*\(\s*(\w+)\s*\)/, 'ast.literal_eval($1)  # SECURITY: Use safe parsing only');
       }
-      return line.replace(/eval\s*\(\s*(\w+)/, 'JSON.parse($1  /* SECURITY: Use safe JSON parsing */');
+      return line.replace(/eval\s*\(\s*(\w+)\s*\)/, 'JSON.parse($1)  /* SECURITY: Use safe JSON parsing */');
     }
   },
   "exec-llm-response": {

package/src/history.js CHANGED Viewed

@@ -49,7 +49,7 @@ export function saveResult(dirPath, scanResult) {
   };
   writeFileSync(filePath, JSON.stringify(historyEntry, null, 2) + '\n');
-  return filePath;
+  return filePath.replace(/\\/g, '/');
 }
 /**

package/src/tools/check-package.js CHANGED Viewed

@@ -32,6 +32,17 @@ const BLOOM_FILTERS = {
   rubygems: null
 };
+// Flutter/Dart SDK packages are legitimate dependencies even though they do
+// not appear in the pub.dev package dump used for the text-based lookup.
+const DART_SDK_PACKAGES = new Set([
+  'flutter',
+  'flutter_test',
+  'flutter_driver',
+  'flutter_localizations',
+  'flutter_web_plugins',
+  'integration_test',
+]);
 // Load package lists on startup
 export function loadPackageLists() {
   const packagesDir = join(__dirname, '..', '..', 'packages');
@@ -67,6 +78,10 @@ export function loadPackageLists() {
 // Check if a package is hallucinated
 export function isHallucinated(packageName, ecosystem) {
+  if (ecosystem === 'dart' && DART_SDK_PACKAGES.has(packageName)) {
+    return { hallucinated: false, sdkPackage: true };
+  }
   const legitPackages = LEGITIMATE_PACKAGES[ecosystem];
   // First check Set-based lookup (exact match)

package/src/tools/scan-prompt.js CHANGED Viewed

@@ -58,11 +58,41 @@ const CONFIDENCE_MULTIPLIERS = {
 // Maximum prompt size to prevent DoS via large inputs (100KB)
 const MAX_PROMPT_SIZE = 100 * 1024;
+// Maximum text length fed to any single regex to prevent ReDoS.
+// Prompt-injection patterns look for short markers/phrases, so scanning
+// overlapping 2 KB windows covers all realistic payloads while keeping
+// worst-case regex time bounded.
+const REGEX_SCAN_WINDOW = 2048;
+const REGEX_SCAN_OVERLAP = 256;
+/**
+ * Match a regex against text safely — splits long text into overlapping
+ * windows so no single regex call processes more than REGEX_SCAN_WINDOW chars.
+ */
+function safeMatch(text, regex) {
+  if (text.length <= REGEX_SCAN_WINDOW) {
+    return text.match(regex);
+  }
+  for (let offset = 0; offset < text.length; offset += REGEX_SCAN_WINDOW - REGEX_SCAN_OVERLAP) {
+    const chunk = text.slice(offset, offset + REGEX_SCAN_WINDOW);
+    const m = chunk.match(regex);
+    if (m) return m;
+  }
+  return null;
+}
 // Rule caches — loaded once per process, not on every call
 let _agentAttackRulesCache = null;
 let _promptInjectionRulesCache = null;
 let _openClawRulesCache = null;
+function normalizeYamlRegexPattern(pattern) {
+  return pattern
+    .replace(/^["']|["']$/g, '')
+    .replace(/\(\?i\)/g, '')
+    .replace(/\\\\/g, '\\');
+}
 // Load agent attack rules from YAML
 function loadAgentAttackRules() {
   if (_agentAttackRulesCache !== null) return _agentAttackRulesCache;
@@ -108,11 +138,7 @@ function loadAgentAttackRules() {
           inMetadata = true;
         } else if (inPatterns && line.match(/^\s+- /)) {
           let pattern = line.replace(/^\s+- /, '').trim();
-          pattern = pattern.replace(/^["']|["']$/g, '');
-          // Strip Python-style inline flags - JS doesn't support them
-          pattern = pattern.replace(/^\(\?i\)/, '');
-          // Unescape double backslashes from YAML (\\s -> \s)
-          pattern = pattern.replace(/\\\\/g, '\\');
+          pattern = normalizeYamlRegexPattern(pattern);
           if (pattern) rule.patterns.push(pattern);
         } else if (inMetadata && line.match(/^\s+\w+:/)) {
           const match = line.match(/^\s+(\w+):\s*["']?([^"'\n]+)["']?/);
@@ -182,11 +208,7 @@ function loadPromptInjectionRules() {
           inMetadata = true;
         } else if (inPatterns && line.match(/^\s+- /)) {
           let pattern = line.replace(/^\s+- /, '').trim();
-          pattern = pattern.replace(/^["']|["']$/g, '');
-          // Strip Python-style inline flags - JS doesn't support them
-          pattern = pattern.replace(/^\(\?i\)/, '');
-          // Unescape double backslashes from YAML (\\s -> \s)
-          pattern = pattern.replace(/\\\\/g, '\\');
+          pattern = normalizeYamlRegexPattern(pattern);
           if (pattern) rule.patterns.push(pattern);
         } else if (inMetadata && line.match(/^\s+\w+:/)) {
           const match = line.match(/^\s+(\w+):\s*["']?([^"'\n]+)["']?/);
@@ -253,8 +275,7 @@ function loadOpenClawRules() {
           inPatterns = true;
         } else if (inPatterns && line.match(/^\s+- /)) {
           let pattern = line.replace(/^\s+- /, '').trim();
-          pattern = pattern.replace(/^["']|["']$/g, '');
-          pattern = pattern.replace(/\\\\/g, '\\');
+          pattern = normalizeYamlRegexPattern(pattern);
           if (pattern) rule.patterns.push(pattern);
         } else if (line.match(/^\s+\w+:/) && !line.match(/^\s+- /)) {
           inPatterns = false;
@@ -579,22 +600,12 @@ export async function scanAgentPrompt({ prompt_text, context, verbosity }) {
     }
   }
-  // Scan expanded text against all rules
-  // Security: Add timeout protection for regex matching
-  const REGEX_TIMEOUT_MS = 1000;
+  // Scan expanded text against all rules using windowed matching to prevent ReDoS
   for (const rule of allRules) {
     for (const pattern of rule.patterns) {
       try {
-        const regex = new RegExp(pattern, 'i');
-        const startTime = Date.now();
-        const match = expandedText.match(regex);
-        // Check for regex timeout (ReDoS protection)
-        if (Date.now() - startTime > REGEX_TIMEOUT_MS) {
-          console.warn(`Regex timeout for rule ${rule.id}, skipping`);
-          break;
-        }
+        const regex = new RegExp(normalizeYamlRegexPattern(pattern), 'i');
+        const match = safeMatch(expandedText, regex);
         if (match) {
           findings.push({
@@ -617,7 +628,9 @@ export async function scanAgentPrompt({ prompt_text, context, verbosity }) {
   }
   // 2.8: Runtime base64 decode-and-rescan
-  const base64Regex = /[A-Za-z0-9+/]{40,}={0,2}/g;
+  // Cap base64 match length to avoid matching entire large inputs as one blob.
+  // Real base64 payloads are at most a few KB; 4096 chars ≈ 3KB decoded.
+  const base64Regex = /[A-Za-z0-9+/]{40,4096}={0,2}/g;
   const b64Matches = expandedText.match(base64Regex);
   if (b64Matches) {
     for (const b64str of b64Matches) {
@@ -631,8 +644,8 @@ export async function scanAgentPrompt({ prompt_text, context, verbosity }) {
             if (!rule.id.startsWith('generic.prompt')) continue;
             for (const pattern of rule.patterns) {
               try {
-                const regex = new RegExp(pattern, 'i');
-                const match = decoded.match(regex);
+                const regex = new RegExp(normalizeYamlRegexPattern(pattern), 'i');
+                const match = safeMatch(decoded, regex);
                 if (match) {
                   findings.push({
                     rule_id: rule.id + '.base64-decoded',
@@ -674,8 +687,8 @@ export async function scanAgentPrompt({ prompt_text, context, verbosity }) {
                   for (const rule of allRules) {
                     for (const pattern of rule.patterns) {
                       try {
-                        const regex = new RegExp(pattern, 'i');
-                        const match = innerDecoded.match(regex);
+                        const regex = new RegExp(normalizeYamlRegexPattern(pattern), 'i');
+                        const match = safeMatch(innerDecoded, regex);
                         if (match) {
                           findings.push({
                             rule_id: rule.id + '.nested-base64-decoded',
@@ -718,7 +731,7 @@ export async function scanAgentPrompt({ prompt_text, context, verbosity }) {
       for (const rule of allRules) {
         for (const pattern of rule.patterns) {
           try {
-            const regex = new RegExp(pattern, 'i');
+            const regex = new RegExp(normalizeYamlRegexPattern(pattern), 'i');
             if (regex.test(prevMsg)) {
               prevTotalScore += parseInt(rule.metadata?.risk_score || '50') / 100;
               msgHasMatch = true;

package/src/tools/scan-security.js CHANGED Viewed

@@ -7,6 +7,7 @@ import { deduplicateFindings } from '../dedup.js';
 import { applyContextFilter, detectFrameworks, applyFrameworkAdjustments } from '../context.js';
 import { loadConfig, shouldExcludeFile, applyConfig } from '../config.js';
 import { discoverProjectContext } from './project-context.js';
+import { runSemanticAnalysis, isSemanticAnalysisAvailable } from '../semantic-integration.js';
 const MAX_FILE_SIZE = 1024 * 1024;  // 1MB - skip files larger than this to avoid timeouts
@@ -14,9 +15,10 @@ export const scanSecuritySchema = {
   file_path: z.string().describe("Path to the file to scan"),
   output_format: z.enum(['json', 'sarif']).optional().describe("Output format: 'json' (default) or 'sarif' for GitHub/GitLab integration"),
   verbosity: z.enum(['minimal', 'compact', 'full']).optional().describe("Response detail level: 'minimal' (counts only), 'compact' (default, actionable info), 'full' (complete metadata)"),
-  engine: z.enum(['auto', 'ast', 'regex']).optional().describe("Analysis engine: 'auto' (default, AST with regex fallback), 'ast' (tree-sitter only), 'regex' (regex only)"),
+  engine: z.enum(['auto', 'ast', 'regex', 'semantic', 'all']).optional().describe("Analysis engine: 'auto' (default, AST+semantic with regex fallback), 'ast' (tree-sitter only), 'regex' (regex only), 'semantic' (semantic/CPG only), 'all' (all engines)"),
   project_context: z.boolean().optional().describe("Include project context (framework, security middleware, dependencies)"),
-  include_context: z.boolean().optional().describe("Include surrounding code context for each issue")
+  include_context: z.boolean().optional().describe("Include surrounding code context for each issue"),
+  enable_semantic: z.boolean().optional().describe("Enable semantic/CPG analysis (default: true if available)")
 };
 // Verbosity formatters
@@ -64,7 +66,7 @@ function formatFull(file_path, language, issues) {
   };
 }
-export async function scanSecurity({ file_path, output_format, verbosity, engine, project_context, include_context }) {
+export async function scanSecurity({ file_path, output_format, verbosity, engine, project_context, include_context, enable_semantic }) {
   if (!existsSync(file_path)) {
     return {
       content: [{ type: "text", text: JSON.stringify({ error: "File not found" }) }]
@@ -101,7 +103,34 @@ export async function scanSecurity({ file_path, output_format, verbosity, engine
     };
   }
-  const rawIssues = await runAnalyzerAsync(file_path, engine || 'auto');
+  // Determine which engines to run
+  const engineMode = engine || 'auto';
+  const shouldRunSemantic = (enable_semantic !== false) &&
+    (engineMode === 'auto' || engineMode === 'semantic' || engineMode === 'all') &&
+    isSemanticAnalysisAvailable();
+  // Run primary analysis (AST/regex)
+  let rawIssues = [];
+  if (engineMode !== 'semantic') {
+    rawIssues = await runAnalyzerAsync(file_path, engineMode === 'all' ? 'auto' : engineMode);
+    if (rawIssues.error) {
+      return {
+        content: [{ type: "text", text: JSON.stringify(rawIssues) }]
+      };
+    }
+  }
+  // Run semantic analysis if enabled
+  if (shouldRunSemantic) {
+    try {
+      const semanticFindings = await runSemanticAnalysis(file_path);
+      if (semanticFindings && semanticFindings.length > 0) {
+        rawIssues = rawIssues.concat(semanticFindings);
+      }
+    } catch (error) {
+      console.error('[SEMANTIC] Analysis failed, continuing without semantic findings:', error.message);
+    }
+  }
   if (rawIssues.error) {
     return {

package/src/tools/scan-skill.js CHANGED Viewed

@@ -126,6 +126,12 @@ function normPath(p) { return IS_WIN ? p.toLowerCase() : p; }
 function pathStartsWith(child, parent) {
   return normPath(child) === normPath(parent) || normPath(child).startsWith(normPath(parent) + sep);
 }
+function normalizeRulePattern(pattern) {
+  return pattern
+    .replace(/^["']|["']$/g, '')
+    .replace(/\(\?i\)/g, '')
+    .replace(/\\\\/g, '\\');
+}
 const MAX_CLAWHAVOC_SCAN_LEN = 2 * 1024 * 1024; // 2 MB cap for regex matching
 // ---------------------------------------------------------------------------
@@ -176,9 +182,7 @@ function loadClawHavocRules() {
           inMetadata = true;
         } else if (inPatterns && line.match(/^\s+- /)) {
           let pattern = line.replace(/^\s+- /, '').trim();
-          pattern = pattern.replace(/^["']|["']$/g, '');
-          pattern = pattern.replace(/^\(\?i\)/, '');
-          pattern = pattern.replace(/\\\\/g, '\\');
+          pattern = normalizeRulePattern(pattern);
           if (pattern) rule.patterns.push(pattern);
         } else if (inMetadata && line.match(/^\s+\w+:/)) {
           const match = line.match(/^\s+(\w+):\s*["']?([^"'\n]+)["']?/);
@@ -892,37 +896,65 @@ function generateRecommendation(grade) {
 // ---------------------------------------------------------------------------
 export async function scanSkill({ skill_path, verbosity, baseline }) {
-  // Security: Resolve to canonical path FIRST to prevent TOCTOU and symlink attacks
+  const canonCwd = realpathSync(process.cwd());
+  const configuredSkillRoots = [
+    resolve(homedir(), '.openclaw', 'skills'),
+    resolve(homedir(), '.openclaw', 'workspace', 'skills'),
+  ];
+  const allowedSkillRoots = configuredSkillRoots.map(root => {
+    try {
+      return existsSync(root) ? realpathSync(root) : null;
+    } catch {
+      return null;
+    }
+  }).filter(Boolean);
+  // Reject obvious escapes before touching the filesystem so absolute traversal
+  // attempts fail closed even when the target path does not exist.
   const inputPath = skill_path;
-  let realPath;
+  const requestedPath = resolve(inputPath);
+  const isRequestedAllowed = pathStartsWith(requestedPath, canonCwd)
+    || configuredSkillRoots.some(root => pathStartsWith(requestedPath, root))
+    || allowedSkillRoots.some(root => pathStartsWith(requestedPath, root));
+  if (!isRequestedAllowed) {
+    return {
+      content: [{ type: "text", text: JSON.stringify({
+        error: "skill_path must be within the current working directory or ~/.openclaw/skills/ (or ~/.openclaw/workspace/skills/)",
+        skill_path: requestedPath,
+        attempted_path: inputPath
+      }) }]
+    };
+  }
+  // Resolve to canonical path after the initial boundary check to prevent
+  // symlink escapes while still returning a deterministic security error for
+  // out-of-scope absolute paths.
+  let realPath;
   try {
-    // Resolve to canonical path immediately (defeats symlink attacks)
-    realPath = realpathSync(resolve(inputPath));
+    realPath = realpathSync(requestedPath);
   } catch (err) {
+    let errorMessage;
+    if (err.code === 'ENOENT') {
+      errorMessage = "Path not found";
+    } else if (err.code === 'ELOOP') {
+      errorMessage = "Symlink loop detected";
+    } else if (err.code === 'EACCES') {
+      errorMessage = "Permission denied";
+    } else {
+      errorMessage = "Invalid path";
+    }
     return {
       content: [{ type: "text", text: JSON.stringify({
-        error: "Invalid path, symlink loop, or permission denied",
+        error: errorMessage,
         skill_path: inputPath,
         details: err.message
       }) }]
     };
   }
-  // Verify containment on canonical path ONLY
-  // This prevents symlink escapes by checking the REAL resolved location
-  const canonCwd = realpathSync(process.cwd());
-  const allowedSkillRoots = [
-    resolve(homedir(), '.openclaw', 'skills'),
-    resolve(homedir(), '.openclaw', 'workspace', 'skills'),
-  ].map(root => {
-    try {
-      return existsSync(root) ? realpathSync(root) : null;
-    } catch {
-      return null;
-    }
-  }).filter(Boolean);
+  // Verify containment on canonical path ONLY.
   const isAllowed = pathStartsWith(realPath, canonCwd)
     || allowedSkillRoots.some(root => pathStartsWith(realPath, root));