agent-security-scanner-mcp 3.18.0 → 3.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/analyzer.py +23 -2
- package/cross_file_analyzer.py +478 -5
- package/package.json +3 -2
- package/python_taint_fallback.py +688 -0
- package/rules/__init__.py +42 -3
- package/rules/prompt-injection.security.yaml +4 -4
- package/src/fix-patterns.js +9 -9
- package/src/history.js +1 -1
- package/src/tools/check-package.js +15 -0
- package/src/tools/scan-prompt.js +44 -31
- package/src/tools/scan-skill.js +42 -22
package/analyzer.py
CHANGED
|
@@ -45,6 +45,12 @@ try:
|
|
|
45
45
|
except ImportError:
|
|
46
46
|
HAS_TAINT_ANALYZER = False
|
|
47
47
|
|
|
48
|
+
try:
|
|
49
|
+
from python_taint_fallback import analyze_python_taint
|
|
50
|
+
HAS_PYTHON_TAINT_FALLBACK = True
|
|
51
|
+
except ImportError:
|
|
52
|
+
HAS_PYTHON_TAINT_FALLBACK = False
|
|
53
|
+
|
|
48
54
|
# Semantic analyzer flag (JavaScript-based, optional)
|
|
49
55
|
HAS_SEMANTIC_ANALYZER = True # Implemented in src/semantic-analyzer.js
|
|
50
56
|
|
|
@@ -106,7 +112,7 @@ def analyze_file_regex(file_path):
|
|
|
106
112
|
issues = []
|
|
107
113
|
try:
|
|
108
114
|
language = detect_language(file_path)
|
|
109
|
-
all_rules = get_rules_for_language(language)
|
|
115
|
+
all_rules = get_rules_for_language(language, file_path)
|
|
110
116
|
# Filter out rules whose paths.include/exclude don't match this file
|
|
111
117
|
rules = {}
|
|
112
118
|
basename = os.path.basename(file_path)
|
|
@@ -161,7 +167,22 @@ def analyze_file_regex(file_path):
|
|
|
161
167
|
issue['engine'] = 'regex-fallback'
|
|
162
168
|
issues.extend(fallback_issues)
|
|
163
169
|
except ImportError:
|
|
164
|
-
|
|
170
|
+
source = None
|
|
171
|
+
|
|
172
|
+
# Python taint fallback using stdlib ast (no tree-sitter required)
|
|
173
|
+
if language == 'python' and HAS_PYTHON_TAINT_FALLBACK:
|
|
174
|
+
try:
|
|
175
|
+
if source is None:
|
|
176
|
+
with open(file_path, 'r', errors='replace') as f:
|
|
177
|
+
source = f.read()
|
|
178
|
+
taint_issues = analyze_python_taint(source, file_path)
|
|
179
|
+
for issue in taint_issues:
|
|
180
|
+
issue.setdefault('engine', 'taint')
|
|
181
|
+
issue.get('metadata', {}).setdefault('analysis_mode', 'regex-fallback')
|
|
182
|
+
issues.extend(taint_issues)
|
|
183
|
+
print(f"[REGEX] Python taint fallback: {len(taint_issues)} finding(s)", file=sys.stderr)
|
|
184
|
+
except Exception as e:
|
|
185
|
+
print(f"[REGEX] Python taint fallback error: {e}", file=sys.stderr)
|
|
165
186
|
|
|
166
187
|
seen = set()
|
|
167
188
|
unique = []
|
package/cross_file_analyzer.py
CHANGED
|
@@ -3,7 +3,9 @@
|
|
|
3
3
|
|
|
4
4
|
Builds an import graph across local files, runs per-file analysis,
|
|
5
5
|
and propagates taint warnings when a file imports from another file
|
|
6
|
-
that has ERROR-severity findings.
|
|
6
|
+
that has ERROR-severity findings. Also performs parameter-aware
|
|
7
|
+
cross-file taint matching by building export summaries of dangerous
|
|
8
|
+
functions and tracing tainted data through import bindings and call sites.
|
|
7
9
|
"""
|
|
8
10
|
|
|
9
11
|
import json
|
|
@@ -15,6 +17,389 @@ import sys
|
|
|
15
17
|
from analyzer import analyze_file
|
|
16
18
|
|
|
17
19
|
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
# Sink detection patterns used by extract_dangerous_functions_regex
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
|
|
24
|
+
# (regex matching a sink call inside a function body, rule_id)
|
|
25
|
+
_JS_SINK_PATTERNS = [
|
|
26
|
+
# SQL injection: string ending quote/tick then + variable, on a line with SQL keyword
|
|
27
|
+
# This broad pattern catches "...WHERE id = " + id and "...LIKE '%" + term
|
|
28
|
+
(r'''["'`]\s*\+\s*(\w+)''', 'sql-injection', r'(?:SELECT|INSERT|UPDATE|DELETE|CREATE|DROP|ALTER)\b'),
|
|
29
|
+
# SQL injection via template literal
|
|
30
|
+
(r'''`(?:SELECT|INSERT|UPDATE|DELETE|CREATE|DROP|ALTER)\b[^`]*\$\{(\w+)\}''', 'sql-injection', None),
|
|
31
|
+
# eval()
|
|
32
|
+
(r'''\beval\s*\(\s*(\w+)\s*\)''', 'code-injection', None),
|
|
33
|
+
# child_process.exec
|
|
34
|
+
(r'''\bexec\s*\(\s*(\w+)''', 'command-injection', None),
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
_PY_SINK_PATTERNS = [
|
|
38
|
+
(r'''\bcursor\.execute\s*\(\s*["'].*\+\s*(\w+)''', 'sql-injection', None),
|
|
39
|
+
(r'''\bcursor\.execute\s*\(\s*f["'].*\{(\w+)\}''', 'sql-injection', None),
|
|
40
|
+
(r'''\beval\s*\(\s*(\w+)\s*\)''', 'code-injection', None),
|
|
41
|
+
(r'''\bexec\s*\(\s*(\w+)''', 'code-injection', None),
|
|
42
|
+
(r'''\bos\.system\s*\(\s*(\w+)''', 'command-injection', None),
|
|
43
|
+
(r'''\bsubprocess\.(?:call|run|Popen)\s*\(\s*(\w+)''', 'command-injection', None),
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
# ---------------------------------------------------------------------------
|
|
47
|
+
# Taint source patterns used by _find_tainted_variables
|
|
48
|
+
# ---------------------------------------------------------------------------
|
|
49
|
+
|
|
50
|
+
_JS_TAINT_SOURCES = [
|
|
51
|
+
# Express: req.params.id, req.query.q, etc. — capture the full accessor
|
|
52
|
+
(r'''(?:const|let|var)\s+(\w+)\s*=\s*(req\.(?:params|query|body|headers|cookies)(?:\.\w+|\[[^\]]+\]))''',),
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
_PY_TAINT_SOURCES = [
|
|
56
|
+
(r'''(\w+)\s*=\s*(request\.(?:args|form|values|json|data|files|cookies|headers)(?:\.get\([^)]*\)|\[[^\]]+\]))''',),
|
|
57
|
+
(r'''(\w+)\s*=\s*(request\.GET(?:\.get\([^)]*\)|\[[^\]]+\]))''',),
|
|
58
|
+
(r'''(\w+)\s*=\s*(request\.POST(?:\.get\([^)]*\)|\[[^\]]+\]))''',),
|
|
59
|
+
(r'''(\w+)\s*=\s*(input\s*\([^)]*\))''',),
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# ---------------------------------------------------------------------------
|
|
64
|
+
# Function extraction helpers
|
|
65
|
+
# ---------------------------------------------------------------------------
|
|
66
|
+
|
|
67
|
+
# Regular function: function name(params) { ... }
|
|
68
|
+
_JS_FUNC_RE = re.compile(
|
|
69
|
+
r'(?:async\s+)?function\s+(\w+)\s*\(([^)]*)\)'
|
|
70
|
+
)
|
|
71
|
+
# Arrow / const: const name = (params) => { ... } or const name = function(params) { ... }
|
|
72
|
+
_JS_ARROW_RE = re.compile(
|
|
73
|
+
r'(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?(?:\(([^)]*)\)|(\w+))\s*=>'
|
|
74
|
+
)
|
|
75
|
+
_JS_CONST_FUNC_RE = re.compile(
|
|
76
|
+
r'(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?function\s*\(([^)]*)\)'
|
|
77
|
+
)
|
|
78
|
+
# Python
|
|
79
|
+
_PY_FUNC_RE = re.compile(
|
|
80
|
+
r'^(\s*)(?:async\s+)?def\s+(\w+)\s*\(([^)]*)\)\s*:'
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _extract_js_functions(source):
|
|
85
|
+
"""Extract function definitions from JavaScript/TypeScript source."""
|
|
86
|
+
functions = []
|
|
87
|
+
lines = source.split('\n')
|
|
88
|
+
|
|
89
|
+
for i, line in enumerate(lines):
|
|
90
|
+
for regex in (_JS_FUNC_RE, _JS_ARROW_RE, _JS_CONST_FUNC_RE):
|
|
91
|
+
m = regex.search(line)
|
|
92
|
+
if m:
|
|
93
|
+
name = m.group(1)
|
|
94
|
+
params_str = m.group(2) if m.lastindex >= 2 and m.group(2) else ''
|
|
95
|
+
if regex == _JS_ARROW_RE and m.group(3):
|
|
96
|
+
params_str = m.group(3)
|
|
97
|
+
params = [p.strip().split('=')[0].strip()
|
|
98
|
+
for p in params_str.split(',') if p.strip()]
|
|
99
|
+
# Find function body end (simple brace counting)
|
|
100
|
+
body_start = i
|
|
101
|
+
body_lines = []
|
|
102
|
+
brace_count = 0
|
|
103
|
+
started = False
|
|
104
|
+
for j in range(i, min(i + 200, len(lines))):
|
|
105
|
+
body_lines.append(lines[j])
|
|
106
|
+
brace_count += lines[j].count('{') - lines[j].count('}')
|
|
107
|
+
if '{' in lines[j]:
|
|
108
|
+
started = True
|
|
109
|
+
if started and brace_count <= 0:
|
|
110
|
+
break
|
|
111
|
+
functions.append({
|
|
112
|
+
'name': name,
|
|
113
|
+
'params': params,
|
|
114
|
+
'body': '\n'.join(body_lines),
|
|
115
|
+
'line': i,
|
|
116
|
+
})
|
|
117
|
+
break
|
|
118
|
+
|
|
119
|
+
return functions
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _extract_py_functions(source):
|
|
123
|
+
"""Extract function definitions from Python source."""
|
|
124
|
+
functions = []
|
|
125
|
+
lines = source.split('\n')
|
|
126
|
+
|
|
127
|
+
for i, line in enumerate(lines):
|
|
128
|
+
m = _PY_FUNC_RE.match(line)
|
|
129
|
+
if m:
|
|
130
|
+
indent = len(m.group(1))
|
|
131
|
+
name = m.group(2)
|
|
132
|
+
params_str = m.group(3)
|
|
133
|
+
params = [p.strip().split(':')[0].split('=')[0].strip()
|
|
134
|
+
for p in params_str.split(',') if p.strip()]
|
|
135
|
+
params = [p for p in params if p not in ('self', 'cls')]
|
|
136
|
+
# Find body end by indentation
|
|
137
|
+
body_lines = [line]
|
|
138
|
+
for j in range(i + 1, len(lines)):
|
|
139
|
+
stripped = lines[j].strip()
|
|
140
|
+
if not stripped:
|
|
141
|
+
body_lines.append(lines[j])
|
|
142
|
+
continue
|
|
143
|
+
cur_indent = len(lines[j]) - len(lines[j].lstrip())
|
|
144
|
+
if cur_indent <= indent:
|
|
145
|
+
break
|
|
146
|
+
body_lines.append(lines[j])
|
|
147
|
+
functions.append({
|
|
148
|
+
'name': name,
|
|
149
|
+
'params': params,
|
|
150
|
+
'body': '\n'.join(body_lines),
|
|
151
|
+
'line': i,
|
|
152
|
+
})
|
|
153
|
+
|
|
154
|
+
return functions
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
# ---------------------------------------------------------------------------
|
|
158
|
+
# Public API — functions expected by tests/cross_file_taint_test.py
|
|
159
|
+
# ---------------------------------------------------------------------------
|
|
160
|
+
|
|
161
|
+
def extract_dangerous_functions_regex(source, language):
|
|
162
|
+
"""Identify functions whose parameters flow to dangerous sinks.
|
|
163
|
+
|
|
164
|
+
Returns a list of dicts:
|
|
165
|
+
[{ 'function_name': str,
|
|
166
|
+
'dangerous_params': [{'param_name': str, 'sink_rule_id': str}, ...] }]
|
|
167
|
+
"""
|
|
168
|
+
lang = language.lower()
|
|
169
|
+
if lang in ('javascript', 'typescript'):
|
|
170
|
+
funcs = _extract_js_functions(source)
|
|
171
|
+
sink_patterns = _JS_SINK_PATTERNS
|
|
172
|
+
elif lang == 'python':
|
|
173
|
+
funcs = _extract_py_functions(source)
|
|
174
|
+
sink_patterns = _PY_SINK_PATTERNS
|
|
175
|
+
else:
|
|
176
|
+
return []
|
|
177
|
+
|
|
178
|
+
results = []
|
|
179
|
+
for func in funcs:
|
|
180
|
+
dangerous_params = []
|
|
181
|
+
body = func['body']
|
|
182
|
+
param_set = set(func['params'])
|
|
183
|
+
|
|
184
|
+
for sink_tuple in sink_patterns:
|
|
185
|
+
sink_re, rule_id = sink_tuple[0], sink_tuple[1]
|
|
186
|
+
line_guard = sink_tuple[2] if len(sink_tuple) > 2 else None
|
|
187
|
+
for body_line in body.split('\n'):
|
|
188
|
+
# If there's a line guard, the line must also match it
|
|
189
|
+
if line_guard and not re.search(line_guard, body_line, re.IGNORECASE):
|
|
190
|
+
continue
|
|
191
|
+
for m in re.finditer(sink_re, body_line, re.IGNORECASE):
|
|
192
|
+
var_in_sink = m.group(1)
|
|
193
|
+
# Check if the variable in the sink is one of the params
|
|
194
|
+
# or was assigned from a param
|
|
195
|
+
if var_in_sink in param_set:
|
|
196
|
+
if not any(dp['param_name'] == var_in_sink and dp['sink_rule_id'] == rule_id
|
|
197
|
+
for dp in dangerous_params):
|
|
198
|
+
dangerous_params.append({
|
|
199
|
+
'param_name': var_in_sink,
|
|
200
|
+
'sink_rule_id': rule_id,
|
|
201
|
+
})
|
|
202
|
+
else:
|
|
203
|
+
# Check if the variable was assigned from a param
|
|
204
|
+
# e.g. const query = "SELECT ... " + param;
|
|
205
|
+
assign_re = re.compile(
|
|
206
|
+
r'(?:const|let|var|)\s*' + re.escape(var_in_sink) +
|
|
207
|
+
r'\s*=\s*(.+)',
|
|
208
|
+
re.MULTILINE
|
|
209
|
+
)
|
|
210
|
+
for am in assign_re.finditer(body):
|
|
211
|
+
rhs = am.group(1)
|
|
212
|
+
for param in func['params']:
|
|
213
|
+
if re.search(r'\b' + re.escape(param) + r'\b', rhs):
|
|
214
|
+
if not any(dp['param_name'] == param and dp['sink_rule_id'] == rule_id
|
|
215
|
+
for dp in dangerous_params):
|
|
216
|
+
dangerous_params.append({
|
|
217
|
+
'param_name': param,
|
|
218
|
+
'sink_rule_id': rule_id,
|
|
219
|
+
})
|
|
220
|
+
|
|
221
|
+
if dangerous_params:
|
|
222
|
+
results.append({
|
|
223
|
+
'function_name': func['name'],
|
|
224
|
+
'dangerous_params': dangerous_params,
|
|
225
|
+
})
|
|
226
|
+
|
|
227
|
+
return results
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _find_tainted_variables(source):
|
|
231
|
+
"""Find variables that receive tainted values from sources.
|
|
232
|
+
|
|
233
|
+
Returns a dict mapping variable name -> {'source': source_expr, ...}.
|
|
234
|
+
"""
|
|
235
|
+
tainted = {}
|
|
236
|
+
|
|
237
|
+
for patterns in (_JS_TAINT_SOURCES, _PY_TAINT_SOURCES):
|
|
238
|
+
for pat_tuple in patterns:
|
|
239
|
+
pat = pat_tuple[0]
|
|
240
|
+
for m in re.finditer(pat, source, re.MULTILINE):
|
|
241
|
+
var_name = m.group(1)
|
|
242
|
+
source_expr = m.group(2)
|
|
243
|
+
tainted[var_name] = {
|
|
244
|
+
'source': source_expr,
|
|
245
|
+
'line': source[:m.start()].count('\n'),
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
return tainted
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def _extract_import_bindings(source, language):
|
|
252
|
+
"""Extract import/require bindings from source code.
|
|
253
|
+
|
|
254
|
+
Returns list of dicts:
|
|
255
|
+
[{'module': str, 'names': [str], 'is_default': bool}]
|
|
256
|
+
"""
|
|
257
|
+
bindings = []
|
|
258
|
+
lang = language.lower()
|
|
259
|
+
|
|
260
|
+
if lang in ('javascript', 'typescript'):
|
|
261
|
+
# const x = require('module') — default import
|
|
262
|
+
for m in re.finditer(r'''(?:const|let|var)\s+(\w+)\s*=\s*require\s*\(\s*['"]([^'"]+)['"]\s*\)''', source):
|
|
263
|
+
bindings.append({
|
|
264
|
+
'module': m.group(2),
|
|
265
|
+
'names': [m.group(1)],
|
|
266
|
+
'is_default': True,
|
|
267
|
+
})
|
|
268
|
+
# const { a, b } = require('module') — named imports
|
|
269
|
+
for m in re.finditer(r'''(?:const|let|var)\s+\{\s*([^}]+)\}\s*=\s*require\s*\(\s*['"]([^'"]+)['"]\s*\)''', source):
|
|
270
|
+
names = [n.strip().split(' as ')[-1].strip() for n in m.group(1).split(',') if n.strip()]
|
|
271
|
+
bindings.append({
|
|
272
|
+
'module': m.group(2),
|
|
273
|
+
'names': names,
|
|
274
|
+
'is_default': False,
|
|
275
|
+
})
|
|
276
|
+
# import x from 'module' — ESM default
|
|
277
|
+
for m in re.finditer(r'''import\s+(\w+)\s+from\s+['"]([^'"]+)['"]''', source):
|
|
278
|
+
bindings.append({
|
|
279
|
+
'module': m.group(2),
|
|
280
|
+
'names': [m.group(1)],
|
|
281
|
+
'is_default': True,
|
|
282
|
+
})
|
|
283
|
+
# import { a, b } from 'module' — ESM named
|
|
284
|
+
for m in re.finditer(r'''import\s+\{\s*([^}]+)\}\s*from\s+['"]([^'"]+)['"]''', source):
|
|
285
|
+
names = [n.strip().split(' as ')[-1].strip() for n in m.group(1).split(',') if n.strip()]
|
|
286
|
+
bindings.append({
|
|
287
|
+
'module': m.group(2),
|
|
288
|
+
'names': names,
|
|
289
|
+
'is_default': False,
|
|
290
|
+
})
|
|
291
|
+
elif lang == 'python':
|
|
292
|
+
for m in re.finditer(r'^from\s+(\S+)\s+import\s+(.+)', source, re.MULTILINE):
|
|
293
|
+
names = [n.strip().split(' as ')[-1].strip() for n in m.group(2).split(',') if n.strip()]
|
|
294
|
+
bindings.append({
|
|
295
|
+
'module': m.group(1),
|
|
296
|
+
'names': names,
|
|
297
|
+
'is_default': False,
|
|
298
|
+
})
|
|
299
|
+
for m in re.finditer(r'^import\s+(\S+)(?:\s+as\s+(\w+))?', source, re.MULTILINE):
|
|
300
|
+
alias = m.group(2) or m.group(1).split('.')[-1]
|
|
301
|
+
bindings.append({
|
|
302
|
+
'module': m.group(1),
|
|
303
|
+
'names': [alias],
|
|
304
|
+
'is_default': True,
|
|
305
|
+
})
|
|
306
|
+
|
|
307
|
+
return bindings
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def _find_calls_to_function(source, function_name, object_name=None):
|
|
311
|
+
"""Find all calls to a specific function in source code.
|
|
312
|
+
|
|
313
|
+
Returns list of dicts: [{'args': [str], 'line': int}]
|
|
314
|
+
"""
|
|
315
|
+
calls = []
|
|
316
|
+
|
|
317
|
+
if object_name:
|
|
318
|
+
pattern = re.escape(object_name) + r'\.' + re.escape(function_name) + r'\s*\(([^)]*)\)'
|
|
319
|
+
else:
|
|
320
|
+
pattern = r'(?<!\w\.)' + re.escape(function_name) + r'\s*\(([^)]*)\)'
|
|
321
|
+
|
|
322
|
+
for m in re.finditer(pattern, source):
|
|
323
|
+
args_str = m.group(1).strip()
|
|
324
|
+
args = [a.strip() for a in args_str.split(',') if a.strip()] if args_str else []
|
|
325
|
+
line = source[:m.start()].count('\n')
|
|
326
|
+
calls.append({
|
|
327
|
+
'args': args,
|
|
328
|
+
'line': line,
|
|
329
|
+
})
|
|
330
|
+
|
|
331
|
+
return calls
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def build_export_summaries(file_paths, sources_dict):
|
|
335
|
+
"""Build dangerous function summaries for each file.
|
|
336
|
+
|
|
337
|
+
Args:
|
|
338
|
+
file_paths: list of file paths
|
|
339
|
+
sources_dict: dict mapping absolute path -> source code string
|
|
340
|
+
|
|
341
|
+
Returns dict mapping absolute path -> list of dangerous function summaries.
|
|
342
|
+
"""
|
|
343
|
+
summaries = {}
|
|
344
|
+
|
|
345
|
+
for file_path in file_paths:
|
|
346
|
+
abs_path = os.path.abspath(file_path)
|
|
347
|
+
source = sources_dict.get(abs_path)
|
|
348
|
+
if not source:
|
|
349
|
+
try:
|
|
350
|
+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
|
351
|
+
source = f.read()
|
|
352
|
+
except (OSError, IOError):
|
|
353
|
+
continue
|
|
354
|
+
|
|
355
|
+
lang = detect_language(file_path)
|
|
356
|
+
if lang == 'unknown':
|
|
357
|
+
continue
|
|
358
|
+
|
|
359
|
+
dangerous = extract_dangerous_functions_regex(source, lang)
|
|
360
|
+
if dangerous:
|
|
361
|
+
summaries[abs_path] = dangerous
|
|
362
|
+
|
|
363
|
+
return summaries
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def cross_file_taint_match(source_file, sink_file, tainted_var, callee_name,
|
|
367
|
+
dangerous_param, taint_source):
|
|
368
|
+
"""Build a cross-file-taint finding.
|
|
369
|
+
|
|
370
|
+
Returns a finding dict with ruleId 'cross-file-taint' and full metadata.
|
|
371
|
+
"""
|
|
372
|
+
return {
|
|
373
|
+
'ruleId': 'cross-file-taint',
|
|
374
|
+
'severity': 'error',
|
|
375
|
+
'message': (
|
|
376
|
+
f"Tainted data from {taint_source} in "
|
|
377
|
+
f"'{os.path.basename(source_file)}' flows to "
|
|
378
|
+
f"'{callee_name}({dangerous_param})' which reaches a "
|
|
379
|
+
f"dangerous sink in '{os.path.basename(sink_file)}'"
|
|
380
|
+
),
|
|
381
|
+
'file': source_file,
|
|
382
|
+
'line': 0,
|
|
383
|
+
'column': 0,
|
|
384
|
+
'metadata': {
|
|
385
|
+
'source_file': source_file,
|
|
386
|
+
'sink_file': sink_file,
|
|
387
|
+
'taint_path': [
|
|
388
|
+
f"{os.path.basename(source_file)}: {tainted_var} = {taint_source}",
|
|
389
|
+
f"{os.path.basename(source_file)}: {callee_name}({tainted_var})",
|
|
390
|
+
f"{os.path.basename(sink_file)}: {callee_name}({dangerous_param}) -> sink",
|
|
391
|
+
],
|
|
392
|
+
'tainted_variable': tainted_var,
|
|
393
|
+
'callee_function': callee_name,
|
|
394
|
+
'dangerous_param': dangerous_param,
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
# ---------------------------------------------------------------------------
|
|
400
|
+
# Existing helpers (unchanged)
|
|
401
|
+
# ---------------------------------------------------------------------------
|
|
402
|
+
|
|
18
403
|
def extract_js_imports(source):
|
|
19
404
|
"""Extract import/require statements from JavaScript/TypeScript."""
|
|
20
405
|
imports = []
|
|
@@ -146,11 +531,14 @@ def cross_file_analyze(file_paths):
|
|
|
146
531
|
1. Analyze each file independently
|
|
147
532
|
2. Build import graph
|
|
148
533
|
3. For each file importing from another file with ERROR-severity findings,
|
|
149
|
-
add a cross-file-taint-warning
|
|
534
|
+
add a cross-file-taint-warning (backward compat)
|
|
535
|
+
4. Build export summaries and perform parameter-aware cross-file taint
|
|
536
|
+
matching to produce cross-file-taint findings.
|
|
150
537
|
"""
|
|
151
538
|
# Analyze each file
|
|
152
539
|
file_findings = {}
|
|
153
540
|
all_findings = []
|
|
541
|
+
sources_dict = {}
|
|
154
542
|
|
|
155
543
|
for file_path in file_paths:
|
|
156
544
|
try:
|
|
@@ -163,10 +551,18 @@ def cross_file_analyze(file_paths):
|
|
|
163
551
|
except Exception:
|
|
164
552
|
continue
|
|
165
553
|
|
|
554
|
+
# Cache source code for later
|
|
555
|
+
try:
|
|
556
|
+
abs_path = os.path.abspath(file_path)
|
|
557
|
+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
|
558
|
+
sources_dict[abs_path] = f.read()
|
|
559
|
+
except (OSError, IOError):
|
|
560
|
+
pass
|
|
561
|
+
|
|
166
562
|
# Build import graph
|
|
167
563
|
graph = build_import_graph(file_paths)
|
|
168
564
|
|
|
169
|
-
#
|
|
565
|
+
# --- Backward-compatible shallow warnings ---
|
|
170
566
|
cross_file_warnings = []
|
|
171
567
|
for file_path, edges in graph.items():
|
|
172
568
|
for edge in edges:
|
|
@@ -189,11 +585,88 @@ def cross_file_analyze(file_paths):
|
|
|
189
585
|
}
|
|
190
586
|
cross_file_warnings.append(warning)
|
|
191
587
|
|
|
192
|
-
#
|
|
193
|
-
|
|
588
|
+
# --- Parameter-aware cross-file taint matching ---
|
|
589
|
+
export_summaries = build_export_summaries(file_paths, sources_dict)
|
|
590
|
+
cross_file_taint_findings = []
|
|
591
|
+
|
|
592
|
+
for source_file, edges in graph.items():
|
|
593
|
+
source_code = sources_dict.get(source_file, '')
|
|
594
|
+
if not source_code:
|
|
595
|
+
continue
|
|
596
|
+
|
|
597
|
+
lang = detect_language(source_file)
|
|
598
|
+
if lang == 'unknown':
|
|
599
|
+
continue
|
|
600
|
+
|
|
601
|
+
tainted_vars = _find_tainted_variables(source_code)
|
|
602
|
+
import_bindings = _extract_import_bindings(source_code, lang)
|
|
603
|
+
|
|
604
|
+
for edge in edges:
|
|
605
|
+
sink_file = edge['resolved_path']
|
|
606
|
+
sink_summaries = export_summaries.get(sink_file, [])
|
|
607
|
+
if not sink_summaries:
|
|
608
|
+
continue
|
|
609
|
+
|
|
610
|
+
# Find the import binding for this module
|
|
611
|
+
module_name = edge['module']
|
|
612
|
+
relevant_bindings = [b for b in import_bindings if b['module'] == module_name]
|
|
613
|
+
|
|
614
|
+
for summary in sink_summaries:
|
|
615
|
+
func_name = summary['function_name']
|
|
616
|
+
|
|
617
|
+
for binding in relevant_bindings:
|
|
618
|
+
if binding['is_default']:
|
|
619
|
+
# Default import: db = require('./lib/db')
|
|
620
|
+
# Calls look like: db.getUserById(arg)
|
|
621
|
+
obj_name = binding['names'][0] if binding['names'] else None
|
|
622
|
+
if obj_name:
|
|
623
|
+
calls = _find_calls_to_function(source_code, func_name, obj_name)
|
|
624
|
+
else:
|
|
625
|
+
calls = []
|
|
626
|
+
else:
|
|
627
|
+
# Named import: { getUserById } = require('./lib/db')
|
|
628
|
+
if func_name in binding['names']:
|
|
629
|
+
calls = _find_calls_to_function(source_code, func_name)
|
|
630
|
+
else:
|
|
631
|
+
calls = []
|
|
632
|
+
|
|
633
|
+
for call in calls:
|
|
634
|
+
for dp in summary['dangerous_params']:
|
|
635
|
+
param_name = dp['param_name']
|
|
636
|
+
# Find which arg position this param is
|
|
637
|
+
try:
|
|
638
|
+
param_idx = [p.strip() for p in _get_func_params(
|
|
639
|
+
sink_summaries, func_name
|
|
640
|
+
)].index(param_name)
|
|
641
|
+
except (ValueError, IndexError):
|
|
642
|
+
param_idx = 0
|
|
643
|
+
|
|
644
|
+
if param_idx < len(call['args']):
|
|
645
|
+
arg_name = call['args'][param_idx]
|
|
646
|
+
if arg_name in tainted_vars:
|
|
647
|
+
taint_source = tainted_vars[arg_name]['source']
|
|
648
|
+
finding = cross_file_taint_match(
|
|
649
|
+
source_file, sink_file,
|
|
650
|
+
arg_name, func_name,
|
|
651
|
+
param_name, taint_source
|
|
652
|
+
)
|
|
653
|
+
finding['line'] = call.get('line', 0)
|
|
654
|
+
cross_file_taint_findings.append(finding)
|
|
655
|
+
|
|
656
|
+
# Combine: per-file findings + shallow warnings + cross-file taint findings
|
|
657
|
+
combined = all_findings + cross_file_warnings + cross_file_taint_findings
|
|
194
658
|
return combined
|
|
195
659
|
|
|
196
660
|
|
|
661
|
+
def _get_func_params(summaries, func_name):
|
|
662
|
+
"""Get parameter names for a function from its summary list."""
|
|
663
|
+
# Extract params from the function body in the summary
|
|
664
|
+
for s in summaries:
|
|
665
|
+
if s['function_name'] == func_name:
|
|
666
|
+
return [dp['param_name'] for dp in s['dangerous_params']]
|
|
667
|
+
return []
|
|
668
|
+
|
|
669
|
+
|
|
197
670
|
def main():
|
|
198
671
|
"""CLI entry point. Accepts file paths as arguments, outputs JSON."""
|
|
199
672
|
if len(sys.argv) < 2:
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-security-scanner-mcp",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.19.0",
|
|
4
4
|
"mcpName": "io.github.sinewaveai/agent-security-scanner-mcp",
|
|
5
5
|
"description": "Security scanner MCP server for AI coding agents. Prompt injection firewall, package hallucination detection (4.3M+ packages), 1000+ vulnerability rules with AST & taint analysis, auto-fix. For Claude Code, Cursor, Windsurf, Cline, OpenClaw.",
|
|
6
6
|
"main": "index.js",
|
|
@@ -109,7 +109,8 @@
|
|
|
109
109
|
"skills/**",
|
|
110
110
|
"scripts/postinstall.js",
|
|
111
111
|
"cross_file_analyzer.py",
|
|
112
|
-
"daemon.py"
|
|
112
|
+
"daemon.py",
|
|
113
|
+
"python_taint_fallback.py"
|
|
113
114
|
],
|
|
114
115
|
"devDependencies": {
|
|
115
116
|
"all-the-package-names": "^2.0.2349",
|