agent-security-scanner-mcp 3.18.0 → 3.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/rules/__init__.py CHANGED
@@ -201,16 +201,55 @@ def get_rules():
201
201
  return rules
202
202
 
203
203
 
204
- def get_rules_for_language(language):
205
- """Get rules applicable to a specific language"""
204
+ def get_rules_for_language(language, file_path=None):
205
+ """Get rules applicable to a specific language.
206
+
207
+ Generic rules that declare a specific technology in their metadata are
208
+ only applied when the scanned language or file path indicates that
209
+ technology is relevant. This prevents, e.g., Hugo-specific rules from
210
+ firing on plain JavaScript database code.
211
+ """
206
212
  all_rules = get_rules()
207
213
  applicable_rules = {}
208
214
 
209
215
  language = language.lower()
210
216
 
217
+ # Map technology names to the languages/file-path hints where they apply
218
+ _TECH_LANGUAGES = {
219
+ 'hugo': {'go', 'html', 'toml', 'yaml'},
220
+ 'django': {'python', 'html'},
221
+ 'rails': {'ruby', 'html', 'erb'},
222
+ 'spring': {'java', 'kotlin'},
223
+ 'laravel': {'php'},
224
+ 'angular': {'typescript', 'javascript', 'html'},
225
+ 'react': {'javascript', 'typescript', 'jsx', 'tsx'},
226
+ }
227
+
211
228
  for rule_id, rule in all_rules.items():
212
229
  rule_languages = [lang.lower() for lang in rule.get('languages', ['generic'])]
213
- if language in rule_languages or 'generic' in rule_languages:
230
+
231
+ if language in rule_languages:
232
+ applicable_rules[rule_id] = rule
233
+ continue
234
+
235
+ if 'generic' in rule_languages:
236
+ # Check if this generic rule is scoped to a specific technology
237
+ techs = rule.get('metadata', {}).get('technology')
238
+ if techs and isinstance(techs, list):
239
+ # Only apply if the current language is relevant to the technology
240
+ tech_relevant = False
241
+ for tech in techs:
242
+ tech_lower = tech.lower()
243
+ allowed = _TECH_LANGUAGES.get(tech_lower)
244
+ if allowed and language in allowed:
245
+ tech_relevant = True
246
+ break
247
+ # Also check if the technology name appears in the file path
248
+ if file_path and tech_lower in file_path.lower():
249
+ tech_relevant = True
250
+ break
251
+ if not tech_relevant:
252
+ continue
214
253
  applicable_rules[rule_id] = rule
215
254
 
216
255
  return applicable_rules
@@ -672,8 +672,8 @@ rules:
672
672
  severity: WARNING
673
673
  message: "Potential Base64-encoded prompt injection payload. Encoded content may hide malicious instructions."
674
674
  patterns:
675
- - "(?i)decode\\s+(this\\s+)?base64\\s*:\\s*[A-Za-z0-9+/=]{20,}"
676
- - "(?i)base64\\s*:\\s*[A-Za-z0-9+/=]{40,}"
675
+ - "(?i)decode\\s+(this\\s+)?base64\\s*:\\s*[A-Za-z0-9+/=]{20,200}"
676
+ - "(?i)base64\\s*:\\s*[A-Za-z0-9+/=]{40,200}"
677
677
  - "aWdub3JlIHByZXZpb3Vz"
678
678
  - "c3lzdGVtIHByb21wdA=="
679
679
  - "(?i)execute\\s+(this\\s+)?encoded"
@@ -682,8 +682,8 @@ rules:
682
682
  - "aWdub3JlIGFsbC"
683
683
  - "b3ZlcnJpZGU="
684
684
  - "(?i)base64.{0,20}instructions?.{0,20}follow"
685
- - "[A-Za-z0-9+/]{40,}={0,2}\\s*.{0,20}(?i)(decode|execute|follow|run)"
686
- - "(?i)(decode|run|execute)\\s+.{0,20}[A-Za-z0-9+/]{40,}={0,2}"
685
+ - "[A-Za-z0-9+/]{40,200}={0,2}[^A-Za-z0-9+/=].{0,20}(?:decode|execute|follow|run)"
686
+ - "(?i)(decode|run|execute)\\s+.{0,20}[A-Za-z0-9+/]{40,200}={0,2}"
687
687
  metadata:
688
688
  cwe: "CWE-77"
689
689
  owasp: "LLM01 - Prompt Injection"
@@ -20,7 +20,7 @@ export const FIX_TEMPLATES = {
20
20
  // ===========================================
21
21
  "sql-injection": {
22
22
  description: "Use parameterized queries instead of string concatenation",
23
- fix: (line) => line.replace(/["']([^"']*)\s*["']\s*\+\s*(\w+)/, '"$1?", [$2]')
23
+ fix: (line) => '// TODO: manual fix required — use parameterized queries instead of string concatenation\n// ' + line.trim()
24
24
  },
25
25
  "nosql-injection": {
26
26
  description: "Sanitize MongoDB query inputs",
@@ -28,7 +28,7 @@ export const FIX_TEMPLATES = {
28
28
  },
29
29
  "raw-query": {
30
30
  description: "Use parameterized queries instead of raw SQL",
31
- fix: (line) => line.replace(/\.query\s*\(\s*["'`]/, '.query("SELECT * FROM table WHERE id = ?", [')
31
+ fix: (line) => '// TODO: manual fix required use parameterized queries instead of raw SQL\n// ' + line.trim()
32
32
  },
33
33
 
34
34
  // ===========================================
@@ -306,10 +306,10 @@ export const FIX_TEMPLATES = {
306
306
  "path-traversal": {
307
307
  description: "Resolve real path and validate prefix to prevent traversal",
308
308
  fix: (line, lang) => {
309
- if (lang === 'python') return line.replace(/open\s*\(\s*(\w+)/, 'open(os.path.realpath($1) # TODO: validate path prefix');
310
- if (lang === 'go') return line.replace(/os\.Open\s*\(\s*(\w+)/, 'os.Open(filepath.Clean($1) // TODO: validate path prefix');
311
- if (lang === 'java') return line.replace(/new File\s*\(\s*(\w+)/, 'new File($1).getCanonicalFile( // TODO: validate path prefix');
312
- return line.replace(/readFileSync\s*\(\s*(\w+)/, 'readFileSync(path.resolve($1) // TODO: validate path prefix');
309
+ if (lang === 'python') return '# TODO: manual fix required — use os.path.realpath() and validate the prefix\n# ' + line.trim();
310
+ if (lang === 'go') return '// TODO: manual fix required — use filepath.Clean() and validate the prefix\n// ' + line.trim();
311
+ if (lang === 'java') return '// TODO: manual fix required — use getCanonicalFile() and validate the prefix\n// ' + line.trim();
312
+ return '// TODO: manual fix required — use path.resolve() and validate the prefix\n// ' + line.trim();
313
313
  }
314
314
  },
315
315
 
@@ -418,7 +418,7 @@ export const FIX_TEMPLATES = {
418
418
  // ===========================================
419
419
  "xpath-injection": {
420
420
  description: "Use parameterized XPath queries",
421
- fix: (line) => line.replace(/xpath\s*\(\s*["']([^"']*)\s*["']\s*\+\s*(\w+)/, 'xpath("$1?", [$2]')
421
+ fix: (line) => '// TODO: manual fix required — use parameterized XPath queries instead of concatenation\n// ' + line.trim()
422
422
  },
423
423
 
424
424
  // ===========================================
@@ -695,9 +695,9 @@ export const FIX_TEMPLATES = {
695
695
  description: "CRITICAL: Never eval() LLM responses - use JSON parsing or ast.literal_eval for safe subset",
696
696
  fix: (line, lang) => {
697
697
  if (lang === 'python') {
698
- return line.replace(/eval\s*\(\s*(\w+)/, 'ast.literal_eval($1 # SECURITY: Use safe parsing only');
698
+ return line.replace(/eval\s*\(\s*(\w+)\s*\)/, 'ast.literal_eval($1) # SECURITY: Use safe parsing only');
699
699
  }
700
- return line.replace(/eval\s*\(\s*(\w+)/, 'JSON.parse($1 /* SECURITY: Use safe JSON parsing */');
700
+ return line.replace(/eval\s*\(\s*(\w+)\s*\)/, 'JSON.parse($1) /* SECURITY: Use safe JSON parsing */');
701
701
  }
702
702
  },
703
703
  "exec-llm-response": {
package/src/history.js CHANGED
@@ -49,7 +49,7 @@ export function saveResult(dirPath, scanResult) {
49
49
  };
50
50
 
51
51
  writeFileSync(filePath, JSON.stringify(historyEntry, null, 2) + '\n');
52
- return filePath;
52
+ return filePath.replace(/\\/g, '/');
53
53
  }
54
54
 
55
55
  /**
@@ -32,6 +32,17 @@ const BLOOM_FILTERS = {
32
32
  rubygems: null
33
33
  };
34
34
 
35
+ // Flutter/Dart SDK packages are legitimate dependencies even though they do
36
+ // not appear in the pub.dev package dump used for the text-based lookup.
37
+ const DART_SDK_PACKAGES = new Set([
38
+ 'flutter',
39
+ 'flutter_test',
40
+ 'flutter_driver',
41
+ 'flutter_localizations',
42
+ 'flutter_web_plugins',
43
+ 'integration_test',
44
+ ]);
45
+
35
46
  // Load package lists on startup
36
47
  export function loadPackageLists() {
37
48
  const packagesDir = join(__dirname, '..', '..', 'packages');
@@ -67,6 +78,10 @@ export function loadPackageLists() {
67
78
 
68
79
  // Check if a package is hallucinated
69
80
  export function isHallucinated(packageName, ecosystem) {
81
+ if (ecosystem === 'dart' && DART_SDK_PACKAGES.has(packageName)) {
82
+ return { hallucinated: false, sdkPackage: true };
83
+ }
84
+
70
85
  const legitPackages = LEGITIMATE_PACKAGES[ecosystem];
71
86
 
72
87
  // First check Set-based lookup (exact match)
@@ -58,11 +58,41 @@ const CONFIDENCE_MULTIPLIERS = {
58
58
  // Maximum prompt size to prevent DoS via large inputs (100KB)
59
59
  const MAX_PROMPT_SIZE = 100 * 1024;
60
60
 
61
+ // Maximum text length fed to any single regex to prevent ReDoS.
62
+ // Prompt-injection patterns look for short markers/phrases, so scanning
63
+ // overlapping 2 KB windows covers all realistic payloads while keeping
64
+ // worst-case regex time bounded.
65
+ const REGEX_SCAN_WINDOW = 2048;
66
+ const REGEX_SCAN_OVERLAP = 256;
67
+
68
+ /**
69
+ * Match a regex against text safely — splits long text into overlapping
70
+ * windows so no single regex call processes more than REGEX_SCAN_WINDOW chars.
71
+ */
72
+ function safeMatch(text, regex) {
73
+ if (text.length <= REGEX_SCAN_WINDOW) {
74
+ return text.match(regex);
75
+ }
76
+ for (let offset = 0; offset < text.length; offset += REGEX_SCAN_WINDOW - REGEX_SCAN_OVERLAP) {
77
+ const chunk = text.slice(offset, offset + REGEX_SCAN_WINDOW);
78
+ const m = chunk.match(regex);
79
+ if (m) return m;
80
+ }
81
+ return null;
82
+ }
83
+
61
84
  // Rule caches — loaded once per process, not on every call
62
85
  let _agentAttackRulesCache = null;
63
86
  let _promptInjectionRulesCache = null;
64
87
  let _openClawRulesCache = null;
65
88
 
89
+ function normalizeYamlRegexPattern(pattern) {
90
+ return pattern
91
+ .replace(/^["']|["']$/g, '')
92
+ .replace(/\(\?i\)/g, '')
93
+ .replace(/\\\\/g, '\\');
94
+ }
95
+
66
96
  // Load agent attack rules from YAML
67
97
  function loadAgentAttackRules() {
68
98
  if (_agentAttackRulesCache !== null) return _agentAttackRulesCache;
@@ -108,11 +138,7 @@ function loadAgentAttackRules() {
108
138
  inMetadata = true;
109
139
  } else if (inPatterns && line.match(/^\s+- /)) {
110
140
  let pattern = line.replace(/^\s+- /, '').trim();
111
- pattern = pattern.replace(/^["']|["']$/g, '');
112
- // Strip Python-style inline flags - JS doesn't support them
113
- pattern = pattern.replace(/^\(\?i\)/, '');
114
- // Unescape double backslashes from YAML (\\s -> \s)
115
- pattern = pattern.replace(/\\\\/g, '\\');
141
+ pattern = normalizeYamlRegexPattern(pattern);
116
142
  if (pattern) rule.patterns.push(pattern);
117
143
  } else if (inMetadata && line.match(/^\s+\w+:/)) {
118
144
  const match = line.match(/^\s+(\w+):\s*["']?([^"'\n]+)["']?/);
@@ -182,11 +208,7 @@ function loadPromptInjectionRules() {
182
208
  inMetadata = true;
183
209
  } else if (inPatterns && line.match(/^\s+- /)) {
184
210
  let pattern = line.replace(/^\s+- /, '').trim();
185
- pattern = pattern.replace(/^["']|["']$/g, '');
186
- // Strip Python-style inline flags - JS doesn't support them
187
- pattern = pattern.replace(/^\(\?i\)/, '');
188
- // Unescape double backslashes from YAML (\\s -> \s)
189
- pattern = pattern.replace(/\\\\/g, '\\');
211
+ pattern = normalizeYamlRegexPattern(pattern);
190
212
  if (pattern) rule.patterns.push(pattern);
191
213
  } else if (inMetadata && line.match(/^\s+\w+:/)) {
192
214
  const match = line.match(/^\s+(\w+):\s*["']?([^"'\n]+)["']?/);
@@ -253,8 +275,7 @@ function loadOpenClawRules() {
253
275
  inPatterns = true;
254
276
  } else if (inPatterns && line.match(/^\s+- /)) {
255
277
  let pattern = line.replace(/^\s+- /, '').trim();
256
- pattern = pattern.replace(/^["']|["']$/g, '');
257
- pattern = pattern.replace(/\\\\/g, '\\');
278
+ pattern = normalizeYamlRegexPattern(pattern);
258
279
  if (pattern) rule.patterns.push(pattern);
259
280
  } else if (line.match(/^\s+\w+:/) && !line.match(/^\s+- /)) {
260
281
  inPatterns = false;
@@ -579,22 +600,12 @@ export async function scanAgentPrompt({ prompt_text, context, verbosity }) {
579
600
  }
580
601
  }
581
602
 
582
- // Scan expanded text against all rules
583
- // Security: Add timeout protection for regex matching
584
- const REGEX_TIMEOUT_MS = 1000;
585
-
603
+ // Scan expanded text against all rules using windowed matching to prevent ReDoS
586
604
  for (const rule of allRules) {
587
605
  for (const pattern of rule.patterns) {
588
606
  try {
589
- const regex = new RegExp(pattern, 'i');
590
- const startTime = Date.now();
591
- const match = expandedText.match(regex);
592
-
593
- // Check for regex timeout (ReDoS protection)
594
- if (Date.now() - startTime > REGEX_TIMEOUT_MS) {
595
- console.warn(`Regex timeout for rule ${rule.id}, skipping`);
596
- break;
597
- }
607
+ const regex = new RegExp(normalizeYamlRegexPattern(pattern), 'i');
608
+ const match = safeMatch(expandedText, regex);
598
609
 
599
610
  if (match) {
600
611
  findings.push({
@@ -617,7 +628,9 @@ export async function scanAgentPrompt({ prompt_text, context, verbosity }) {
617
628
  }
618
629
 
619
630
  // 2.8: Runtime base64 decode-and-rescan
620
- const base64Regex = /[A-Za-z0-9+/]{40,}={0,2}/g;
631
+ // Cap base64 match length to avoid matching entire large inputs as one blob.
632
+ // Real base64 payloads are at most a few KB; 4096 chars ≈ 3KB decoded.
633
+ const base64Regex = /[A-Za-z0-9+/]{40,4096}={0,2}/g;
621
634
  const b64Matches = expandedText.match(base64Regex);
622
635
  if (b64Matches) {
623
636
  for (const b64str of b64Matches) {
@@ -631,8 +644,8 @@ export async function scanAgentPrompt({ prompt_text, context, verbosity }) {
631
644
  if (!rule.id.startsWith('generic.prompt')) continue;
632
645
  for (const pattern of rule.patterns) {
633
646
  try {
634
- const regex = new RegExp(pattern, 'i');
635
- const match = decoded.match(regex);
647
+ const regex = new RegExp(normalizeYamlRegexPattern(pattern), 'i');
648
+ const match = safeMatch(decoded, regex);
636
649
  if (match) {
637
650
  findings.push({
638
651
  rule_id: rule.id + '.base64-decoded',
@@ -674,8 +687,8 @@ export async function scanAgentPrompt({ prompt_text, context, verbosity }) {
674
687
  for (const rule of allRules) {
675
688
  for (const pattern of rule.patterns) {
676
689
  try {
677
- const regex = new RegExp(pattern, 'i');
678
- const match = innerDecoded.match(regex);
690
+ const regex = new RegExp(normalizeYamlRegexPattern(pattern), 'i');
691
+ const match = safeMatch(innerDecoded, regex);
679
692
  if (match) {
680
693
  findings.push({
681
694
  rule_id: rule.id + '.nested-base64-decoded',
@@ -718,7 +731,7 @@ export async function scanAgentPrompt({ prompt_text, context, verbosity }) {
718
731
  for (const rule of allRules) {
719
732
  for (const pattern of rule.patterns) {
720
733
  try {
721
- const regex = new RegExp(pattern, 'i');
734
+ const regex = new RegExp(normalizeYamlRegexPattern(pattern), 'i');
722
735
  if (regex.test(prevMsg)) {
723
736
  prevTotalScore += parseInt(rule.metadata?.risk_score || '50') / 100;
724
737
  msgHasMatch = true;
@@ -126,6 +126,12 @@ function normPath(p) { return IS_WIN ? p.toLowerCase() : p; }
126
126
  function pathStartsWith(child, parent) {
127
127
  return normPath(child) === normPath(parent) || normPath(child).startsWith(normPath(parent) + sep);
128
128
  }
129
+ function normalizeRulePattern(pattern) {
130
+ return pattern
131
+ .replace(/^["']|["']$/g, '')
132
+ .replace(/\(\?i\)/g, '')
133
+ .replace(/\\\\/g, '\\');
134
+ }
129
135
  const MAX_CLAWHAVOC_SCAN_LEN = 2 * 1024 * 1024; // 2 MB cap for regex matching
130
136
 
131
137
  // ---------------------------------------------------------------------------
@@ -176,9 +182,7 @@ function loadClawHavocRules() {
176
182
  inMetadata = true;
177
183
  } else if (inPatterns && line.match(/^\s+- /)) {
178
184
  let pattern = line.replace(/^\s+- /, '').trim();
179
- pattern = pattern.replace(/^["']|["']$/g, '');
180
- pattern = pattern.replace(/^\(\?i\)/, '');
181
- pattern = pattern.replace(/\\\\/g, '\\');
185
+ pattern = normalizeRulePattern(pattern);
182
186
  if (pattern) rule.patterns.push(pattern);
183
187
  } else if (inMetadata && line.match(/^\s+\w+:/)) {
184
188
  const match = line.match(/^\s+(\w+):\s*["']?([^"'\n]+)["']?/);
@@ -892,15 +896,44 @@ function generateRecommendation(grade) {
892
896
  // ---------------------------------------------------------------------------
893
897
 
894
898
  export async function scanSkill({ skill_path, verbosity, baseline }) {
895
- // Security: Resolve to canonical path FIRST to prevent TOCTOU and symlink attacks
899
+ const canonCwd = realpathSync(process.cwd());
900
+ const configuredSkillRoots = [
901
+ resolve(homedir(), '.openclaw', 'skills'),
902
+ resolve(homedir(), '.openclaw', 'workspace', 'skills'),
903
+ ];
904
+ const allowedSkillRoots = configuredSkillRoots.map(root => {
905
+ try {
906
+ return existsSync(root) ? realpathSync(root) : null;
907
+ } catch {
908
+ return null;
909
+ }
910
+ }).filter(Boolean);
911
+
912
+ // Reject obvious escapes before touching the filesystem so absolute traversal
913
+ // attempts fail closed even when the target path does not exist.
896
914
  const inputPath = skill_path;
897
- let realPath;
915
+ const requestedPath = resolve(inputPath);
916
+ const isRequestedAllowed = pathStartsWith(requestedPath, canonCwd)
917
+ || configuredSkillRoots.some(root => pathStartsWith(requestedPath, root))
918
+ || allowedSkillRoots.some(root => pathStartsWith(requestedPath, root));
898
919
 
920
+ if (!isRequestedAllowed) {
921
+ return {
922
+ content: [{ type: "text", text: JSON.stringify({
923
+ error: "skill_path must be within the current working directory or ~/.openclaw/skills/ (or ~/.openclaw/workspace/skills/)",
924
+ skill_path: requestedPath,
925
+ attempted_path: inputPath
926
+ }) }]
927
+ };
928
+ }
929
+
930
+ // Resolve to canonical path after the initial boundary check to prevent
931
+ // symlink escapes while still returning a deterministic security error for
932
+ // out-of-scope absolute paths.
933
+ let realPath;
899
934
  try {
900
- // Resolve to canonical path immediately (defeats symlink attacks)
901
- realPath = realpathSync(resolve(inputPath));
935
+ realPath = realpathSync(requestedPath);
902
936
  } catch (err) {
903
- // Check for different error types
904
937
  let errorMessage;
905
938
  if (err.code === 'ENOENT') {
906
939
  errorMessage = "Path not found";
@@ -921,20 +954,7 @@ export async function scanSkill({ skill_path, verbosity, baseline }) {
921
954
  };
922
955
  }
923
956
 
924
- // Verify containment on canonical path ONLY
925
- // This prevents symlink escapes by checking the REAL resolved location
926
- const canonCwd = realpathSync(process.cwd());
927
- const allowedSkillRoots = [
928
- resolve(homedir(), '.openclaw', 'skills'),
929
- resolve(homedir(), '.openclaw', 'workspace', 'skills'),
930
- ].map(root => {
931
- try {
932
- return existsSync(root) ? realpathSync(root) : null;
933
- } catch {
934
- return null;
935
- }
936
- }).filter(Boolean);
937
-
957
+ // Verify containment on canonical path ONLY.
938
958
  const isAllowed = pathStartsWith(realPath, canonCwd)
939
959
  || allowedSkillRoots.some(root => pathStartsWith(realPath, root));
940
960