agent-security-scanner-mcp 3.6.0 → 3.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/generic_ast.py +7 -2
- package/package.json +1 -1
- package/taint_analyzer.py +516 -11
package/generic_ast.py
CHANGED
|
@@ -87,6 +87,7 @@ class GenericNode:
|
|
|
87
87
|
value: Optional['GenericNode'] = None
|
|
88
88
|
target: Optional['GenericNode'] = None
|
|
89
89
|
args: List['GenericNode'] = field(default_factory=list)
|
|
90
|
+
params: List['GenericNode'] = field(default_factory=list) # For FUNCTION_DEF: parameter nodes
|
|
90
91
|
operator: Optional[str] = None
|
|
91
92
|
|
|
92
93
|
def find_all(self, kind: NodeKind) -> List['GenericNode']:
|
|
@@ -524,12 +525,16 @@ class ASTConverter:
|
|
|
524
525
|
if child.type in ('+', '-', '*', '/', '%', '==', '!=', '<', '>', '<=', '>=', 'and', 'or', '&&', '||', '+'):
|
|
525
526
|
node.operator = source_bytes[child.start_byte:child.end_byte].decode('utf-8')
|
|
526
527
|
|
|
527
|
-
# For function definitions, extract name
|
|
528
|
+
# For function definitions, extract name and parameters
|
|
528
529
|
elif node.kind == NodeKind.FUNCTION_DEF:
|
|
529
530
|
for child in ts_node.children:
|
|
530
531
|
if child.type == 'identifier' or child.type == 'name':
|
|
531
532
|
node.name = source_bytes[child.start_byte:child.end_byte].decode('utf-8')
|
|
532
|
-
|
|
533
|
+
elif child.type in ('parameters', 'formal_parameters', 'parameter_list'):
|
|
534
|
+
for param_child in child.children:
|
|
535
|
+
if param_child.type not in ('(', ')', ',', 'def'):
|
|
536
|
+
param_node = self.convert(param_child, source_bytes)
|
|
537
|
+
node.params.append(param_node)
|
|
533
538
|
|
|
534
539
|
|
|
535
540
|
def convert_tree(ts_tree, language: str, source_bytes: bytes) -> GenericNode:
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-security-scanner-mcp",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.7.0",
|
|
4
4
|
"mcpName": "io.github.sinewaveai/agent-security-scanner-mcp",
|
|
5
5
|
"description": "Security scanner MCP server for AI coding agents. Prompt injection firewall, package hallucination detection (4.3M+ packages), 1000+ vulnerability rules with AST & taint analysis, auto-fix. For Claude Code, Cursor, Windsurf, Cline.",
|
|
6
6
|
"main": "index.js",
|
package/taint_analyzer.py
CHANGED
|
@@ -39,6 +39,39 @@ class VariableAssignment:
|
|
|
39
39
|
node: GenericNode
|
|
40
40
|
|
|
41
41
|
|
|
42
|
+
@dataclass
|
|
43
|
+
class InternalSink:
|
|
44
|
+
"""A sink reached by a function parameter inside a function body."""
|
|
45
|
+
sink_pattern: str
|
|
46
|
+
param_indices: Set[int] # Which param indices flow to this sink
|
|
47
|
+
line: int
|
|
48
|
+
rule_id: str
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class FunctionSummary:
|
|
53
|
+
"""Summary of taint behavior for a single function."""
|
|
54
|
+
name: str
|
|
55
|
+
parameters: List[str] # Param names in order (excluding self/cls)
|
|
56
|
+
returns_taint_from: Set[int] # Param indices whose taint flows to return
|
|
57
|
+
returns_source: bool # Body contains a taint source flowing to return
|
|
58
|
+
source_pattern: Optional[str]
|
|
59
|
+
internal_sinks: List[InternalSink]
|
|
60
|
+
has_sanitizer: bool
|
|
61
|
+
line: int
|
|
62
|
+
end_line: int
|
|
63
|
+
node: GenericNode
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class CallGraph:
|
|
68
|
+
"""Call graph for functions within a single file."""
|
|
69
|
+
functions: Dict[str, GenericNode] # func name -> FUNCTION_DEF node
|
|
70
|
+
summaries: Dict[str, FunctionSummary] # func name -> summary
|
|
71
|
+
calls: Dict[str, Set[str]] # caller -> callees
|
|
72
|
+
reverse: Dict[str, Set[str]] # callee -> callers
|
|
73
|
+
|
|
74
|
+
|
|
42
75
|
class TaintAnalyzer:
|
|
43
76
|
"""
|
|
44
77
|
Performs taint analysis on an AST using TaintRule definitions.
|
|
@@ -70,19 +103,30 @@ class TaintAnalyzer:
|
|
|
70
103
|
# Reset state for each rule
|
|
71
104
|
self.tainted = {}
|
|
72
105
|
self.assignments = []
|
|
73
|
-
|
|
106
|
+
|
|
107
|
+
# Phase 0: Build call graph + generate inter-procedural summaries
|
|
108
|
+
call_graph = self._build_call_graph(ast)
|
|
109
|
+
summaries: Dict[str, FunctionSummary] = {}
|
|
110
|
+
if call_graph.functions:
|
|
111
|
+
summaries = self._generate_summaries(call_graph, rule)
|
|
112
|
+
self._current_summaries = summaries
|
|
113
|
+
|
|
74
114
|
# Step 1: Collect all variable assignments
|
|
75
115
|
self._collect_assignments(ast)
|
|
76
|
-
|
|
116
|
+
|
|
77
117
|
# Step 2: Find sources and mark initial tainted variables
|
|
78
118
|
self._find_sources(ast, rule)
|
|
79
|
-
|
|
80
|
-
# Step 3: Propagate taint through assignments
|
|
119
|
+
|
|
120
|
+
# Step 3: Propagate taint through assignments (now includes inter-procedural)
|
|
81
121
|
self._propagate_taint(rule)
|
|
82
|
-
|
|
122
|
+
|
|
123
|
+
# Step 3.5: Check tainted args against callee internal sinks
|
|
124
|
+
internal_findings = self._check_internal_sinks(ast, rule, summaries)
|
|
125
|
+
|
|
83
126
|
# Step 4: Check sinks for tainted input
|
|
84
127
|
findings = self._check_sinks(ast, rule)
|
|
85
|
-
|
|
128
|
+
findings.extend(internal_findings)
|
|
129
|
+
|
|
86
130
|
return findings
|
|
87
131
|
|
|
88
132
|
def _collect_assignments(self, node: GenericNode):
|
|
@@ -203,28 +247,30 @@ class TaintAnalyzer:
|
|
|
203
247
|
return None
|
|
204
248
|
|
|
205
249
|
def _propagate_taint(self, rule: TaintRule):
|
|
206
|
-
"""Propagate taint through variable assignments"""
|
|
250
|
+
"""Propagate taint through variable assignments (intra + inter-procedural)"""
|
|
251
|
+
summaries = getattr(self, '_current_summaries', {})
|
|
207
252
|
changed = True
|
|
208
253
|
iterations = 0
|
|
209
254
|
max_iterations = 100
|
|
210
|
-
|
|
255
|
+
|
|
211
256
|
while changed and iterations < max_iterations:
|
|
212
257
|
changed = False
|
|
213
258
|
iterations += 1
|
|
214
|
-
|
|
259
|
+
|
|
215
260
|
for assignment in self.assignments:
|
|
216
261
|
if assignment.target in self.tainted:
|
|
217
262
|
continue
|
|
218
|
-
|
|
263
|
+
|
|
219
264
|
if self._is_assignment_sanitized(assignment, rule):
|
|
220
265
|
continue
|
|
221
266
|
|
|
267
|
+
# Part A: Direct variable propagation (existing)
|
|
222
268
|
for source_var in assignment.source_vars:
|
|
223
269
|
if source_var in self.tainted:
|
|
224
270
|
source_taint = self.tainted[source_var]
|
|
225
271
|
new_path = source_taint.propagation_path.copy()
|
|
226
272
|
new_path.append(f"Line {assignment.line}: {assignment.target} = ... {source_var} ...")
|
|
227
|
-
|
|
273
|
+
|
|
228
274
|
self.tainted[assignment.target] = TaintedVariable(
|
|
229
275
|
name=assignment.target,
|
|
230
276
|
source_pattern=source_taint.source_pattern,
|
|
@@ -233,6 +279,59 @@ class TaintAnalyzer:
|
|
|
233
279
|
)
|
|
234
280
|
changed = True
|
|
235
281
|
break
|
|
282
|
+
|
|
283
|
+
if assignment.target in self.tainted:
|
|
284
|
+
continue
|
|
285
|
+
|
|
286
|
+
# Part B: Inter-procedural return propagation
|
|
287
|
+
if summaries:
|
|
288
|
+
call_nodes = assignment.node.find_all(NodeKind.CALL)
|
|
289
|
+
for call_node in call_nodes:
|
|
290
|
+
callee = self._resolve_callee_name(call_node)
|
|
291
|
+
if not callee or callee not in summaries:
|
|
292
|
+
continue
|
|
293
|
+
summary = summaries[callee]
|
|
294
|
+
if summary.has_sanitizer:
|
|
295
|
+
continue
|
|
296
|
+
|
|
297
|
+
# Check if callee returns a source
|
|
298
|
+
if summary.returns_source:
|
|
299
|
+
self.tainted[assignment.target] = TaintedVariable(
|
|
300
|
+
name=assignment.target,
|
|
301
|
+
source_pattern=summary.source_pattern or "function_source",
|
|
302
|
+
source_line=summary.line,
|
|
303
|
+
propagation_path=[f"Source via {callee}()"],
|
|
304
|
+
)
|
|
305
|
+
changed = True
|
|
306
|
+
break
|
|
307
|
+
|
|
308
|
+
# Check if any tainted arg flows to return
|
|
309
|
+
for idx, param_name in enumerate(summary.parameters):
|
|
310
|
+
if idx >= len(call_node.args):
|
|
311
|
+
continue
|
|
312
|
+
if idx not in summary.returns_taint_from:
|
|
313
|
+
continue
|
|
314
|
+
arg_node = call_node.args[idx]
|
|
315
|
+
arg_vars = self._get_referenced_variables(arg_node)
|
|
316
|
+
for av in arg_vars:
|
|
317
|
+
if av in self.tainted:
|
|
318
|
+
source_taint = self.tainted[av]
|
|
319
|
+
new_path = source_taint.propagation_path.copy()
|
|
320
|
+
new_path.append(
|
|
321
|
+
f"Line {assignment.line}: {assignment.target} = {callee}({av}) [inter-procedural]"
|
|
322
|
+
)
|
|
323
|
+
self.tainted[assignment.target] = TaintedVariable(
|
|
324
|
+
name=assignment.target,
|
|
325
|
+
source_pattern=source_taint.source_pattern,
|
|
326
|
+
source_line=source_taint.source_line,
|
|
327
|
+
propagation_path=new_path,
|
|
328
|
+
)
|
|
329
|
+
changed = True
|
|
330
|
+
break
|
|
331
|
+
if assignment.target in self.tainted:
|
|
332
|
+
break
|
|
333
|
+
if assignment.target in self.tainted:
|
|
334
|
+
break
|
|
236
335
|
|
|
237
336
|
def _is_assignment_sanitized(self, assignment: VariableAssignment, rule: TaintRule) -> bool:
|
|
238
337
|
"""Check if an assignment is sanitized"""
|
|
@@ -338,6 +437,412 @@ class TaintAnalyzer:
|
|
|
338
437
|
|
|
339
438
|
# _find_tainted_in_match removed (replaced by _find_tainted_nodes_in_match)
|
|
340
439
|
|
|
440
|
+
# ================================================================
|
|
441
|
+
# Inter-procedural taint analysis (Phase 3)
|
|
442
|
+
# ================================================================
|
|
443
|
+
|
|
444
|
+
def _build_call_graph(self, ast: GenericNode) -> CallGraph:
|
|
445
|
+
"""Build a call graph for all functions in the file."""
|
|
446
|
+
functions: Dict[str, GenericNode] = {}
|
|
447
|
+
calls: Dict[str, Set[str]] = {}
|
|
448
|
+
reverse: Dict[str, Set[str]] = {}
|
|
449
|
+
|
|
450
|
+
# Collect all function definitions
|
|
451
|
+
func_nodes = ast.find_all(NodeKind.FUNCTION_DEF)
|
|
452
|
+
if len(func_nodes) > 500:
|
|
453
|
+
return CallGraph(functions={}, summaries={}, calls={}, reverse={})
|
|
454
|
+
|
|
455
|
+
for func_node in func_nodes:
|
|
456
|
+
if func_node.name:
|
|
457
|
+
functions[func_node.name] = func_node
|
|
458
|
+
|
|
459
|
+
# Build call edges
|
|
460
|
+
for func_name, func_node in functions.items():
|
|
461
|
+
callees: Set[str] = set()
|
|
462
|
+
call_nodes = func_node.find_all(NodeKind.CALL)
|
|
463
|
+
for call_node in call_nodes:
|
|
464
|
+
callee = self._resolve_callee_name(call_node)
|
|
465
|
+
if callee and callee in functions and callee != func_name:
|
|
466
|
+
callees.add(callee)
|
|
467
|
+
if callee not in reverse:
|
|
468
|
+
reverse[callee] = set()
|
|
469
|
+
reverse[callee].add(func_name)
|
|
470
|
+
calls[func_name] = callees
|
|
471
|
+
|
|
472
|
+
return CallGraph(functions=functions, summaries={}, calls=calls, reverse=reverse)
|
|
473
|
+
|
|
474
|
+
def _resolve_callee_name(self, call_node: GenericNode) -> Optional[str]:
|
|
475
|
+
"""Resolve the function name from a CALL node."""
|
|
476
|
+
if call_node.name:
|
|
477
|
+
# Handle dotted names: self.foo -> foo, obj.method -> method
|
|
478
|
+
parts = call_node.name.split('.')
|
|
479
|
+
return parts[-1]
|
|
480
|
+
return None
|
|
481
|
+
|
|
482
|
+
def _extract_param_names(self, func_node: GenericNode) -> List[str]:
|
|
483
|
+
"""Extract parameter names from a FUNCTION_DEF node, excluding self/cls."""
|
|
484
|
+
names = []
|
|
485
|
+
for param in func_node.params:
|
|
486
|
+
if param.kind == NodeKind.IDENTIFIER:
|
|
487
|
+
name = param.text
|
|
488
|
+
else:
|
|
489
|
+
ident = param.find_first(NodeKind.IDENTIFIER)
|
|
490
|
+
name = ident.text if ident else None
|
|
491
|
+
if name and name not in ('self', 'cls'):
|
|
492
|
+
names.append(name)
|
|
493
|
+
return names
|
|
494
|
+
|
|
495
|
+
def _get_function_body(self, func_node: GenericNode) -> Optional[GenericNode]:
|
|
496
|
+
"""Find the block/body child of a function definition."""
|
|
497
|
+
for child in func_node.children:
|
|
498
|
+
if child.kind == NodeKind.BLOCK:
|
|
499
|
+
return child
|
|
500
|
+
return None
|
|
501
|
+
|
|
502
|
+
# ----------------------------------------------------------------
|
|
503
|
+
# Topological sort with SCC detection (Tarjan's algorithm)
|
|
504
|
+
# ----------------------------------------------------------------
|
|
505
|
+
|
|
506
|
+
def _topological_order(self, call_graph: CallGraph) -> List[List[str]]:
|
|
507
|
+
"""Return SCCs in reverse topological order (callees before callers)."""
|
|
508
|
+
index_counter = [0]
|
|
509
|
+
stack: List[str] = []
|
|
510
|
+
on_stack: Set[str] = set()
|
|
511
|
+
indices: Dict[str, int] = {}
|
|
512
|
+
lowlinks: Dict[str, int] = {}
|
|
513
|
+
result: List[List[str]] = []
|
|
514
|
+
|
|
515
|
+
def strongconnect(v: str):
|
|
516
|
+
indices[v] = index_counter[0]
|
|
517
|
+
lowlinks[v] = index_counter[0]
|
|
518
|
+
index_counter[0] += 1
|
|
519
|
+
stack.append(v)
|
|
520
|
+
on_stack.add(v)
|
|
521
|
+
|
|
522
|
+
for w in call_graph.calls.get(v, set()):
|
|
523
|
+
if w not in indices:
|
|
524
|
+
strongconnect(w)
|
|
525
|
+
lowlinks[v] = min(lowlinks[v], lowlinks[w])
|
|
526
|
+
elif w in on_stack:
|
|
527
|
+
lowlinks[v] = min(lowlinks[v], indices[w])
|
|
528
|
+
|
|
529
|
+
if lowlinks[v] == indices[v]:
|
|
530
|
+
scc: List[str] = []
|
|
531
|
+
while True:
|
|
532
|
+
w = stack.pop()
|
|
533
|
+
on_stack.discard(w)
|
|
534
|
+
scc.append(w)
|
|
535
|
+
if w == v:
|
|
536
|
+
break
|
|
537
|
+
result.append(scc)
|
|
538
|
+
|
|
539
|
+
for v in call_graph.functions:
|
|
540
|
+
if v not in indices:
|
|
541
|
+
strongconnect(v)
|
|
542
|
+
|
|
543
|
+
return result # Already in reverse topological order from Tarjan's
|
|
544
|
+
|
|
545
|
+
# ----------------------------------------------------------------
|
|
546
|
+
# Function summary generation
|
|
547
|
+
# ----------------------------------------------------------------
|
|
548
|
+
|
|
549
|
+
def _generate_summaries(self, call_graph: CallGraph, rule: TaintRule) -> Dict[str, FunctionSummary]:
|
|
550
|
+
"""Generate function summaries in topological order."""
|
|
551
|
+
summaries: Dict[str, FunctionSummary] = {}
|
|
552
|
+
sccs = self._topological_order(call_graph)
|
|
553
|
+
|
|
554
|
+
for scc in sccs:
|
|
555
|
+
if len(scc) > 1:
|
|
556
|
+
# Mutually recursive: conservative summary
|
|
557
|
+
for func_name in scc:
|
|
558
|
+
func_node = call_graph.functions[func_name]
|
|
559
|
+
params = self._extract_param_names(func_node)
|
|
560
|
+
summaries[func_name] = FunctionSummary(
|
|
561
|
+
name=func_name,
|
|
562
|
+
parameters=params,
|
|
563
|
+
returns_taint_from=set(range(len(params))),
|
|
564
|
+
returns_source=False,
|
|
565
|
+
source_pattern=None,
|
|
566
|
+
internal_sinks=[],
|
|
567
|
+
has_sanitizer=False,
|
|
568
|
+
line=func_node.line,
|
|
569
|
+
end_line=func_node.end_line,
|
|
570
|
+
node=func_node,
|
|
571
|
+
)
|
|
572
|
+
else:
|
|
573
|
+
func_name = scc[0]
|
|
574
|
+
func_node = call_graph.functions[func_name]
|
|
575
|
+
summaries[func_name] = self._summarize_function(func_node, rule, summaries)
|
|
576
|
+
|
|
577
|
+
call_graph.summaries = summaries
|
|
578
|
+
return summaries
|
|
579
|
+
|
|
580
|
+
def _summarize_function(self, func_node: GenericNode, rule: TaintRule,
|
|
581
|
+
existing_summaries: Dict[str, FunctionSummary]) -> FunctionSummary:
|
|
582
|
+
"""Generate a summary for a single non-recursive function."""
|
|
583
|
+
params = self._extract_param_names(func_node)
|
|
584
|
+
body = self._get_function_body(func_node)
|
|
585
|
+
returns_taint_from: Set[int] = set()
|
|
586
|
+
internal_sinks: List[InternalSink] = []
|
|
587
|
+
returns_source = False
|
|
588
|
+
source_pattern_str: Optional[str] = None
|
|
589
|
+
has_sanitizer = False
|
|
590
|
+
|
|
591
|
+
if not body:
|
|
592
|
+
# No body found — try using the whole function node as body
|
|
593
|
+
body = func_node
|
|
594
|
+
|
|
595
|
+
# Check if body contains a sanitizer
|
|
596
|
+
if rule.sanitizers:
|
|
597
|
+
for sanitizer in rule.sanitizers:
|
|
598
|
+
if self.matcher.find_all(sanitizer, body):
|
|
599
|
+
has_sanitizer = True
|
|
600
|
+
break
|
|
601
|
+
|
|
602
|
+
# Check if body contains a taint source that flows to return
|
|
603
|
+
for source_pattern in rule.sources:
|
|
604
|
+
source_matches = self.matcher.find_all(source_pattern, body)
|
|
605
|
+
if source_matches:
|
|
606
|
+
# Check if any source flows to a return
|
|
607
|
+
return_nodes = body.find_all(NodeKind.RETURN)
|
|
608
|
+
if return_nodes:
|
|
609
|
+
returns_source = True
|
|
610
|
+
source_pattern_str = source_pattern.pattern_text
|
|
611
|
+
|
|
612
|
+
# For each parameter, simulate taint and check what it reaches
|
|
613
|
+
for i, param_name in enumerate(params):
|
|
614
|
+
local_tainted: Dict[str, TaintedVariable] = {
|
|
615
|
+
param_name: TaintedVariable(
|
|
616
|
+
name=param_name,
|
|
617
|
+
source_pattern=f"param:{param_name}",
|
|
618
|
+
source_line=func_node.line,
|
|
619
|
+
propagation_path=[f"Parameter: {param_name}"],
|
|
620
|
+
)
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
# Collect assignments within this function body only
|
|
624
|
+
local_assignments = self._collect_assignments_in_scope(body)
|
|
625
|
+
|
|
626
|
+
# Propagate taint locally
|
|
627
|
+
self._propagate_taint_local(local_tainted, local_assignments, rule, existing_summaries)
|
|
628
|
+
|
|
629
|
+
# Check if taint reaches any RETURN node
|
|
630
|
+
return_nodes = body.find_all(NodeKind.RETURN)
|
|
631
|
+
for ret_node in return_nodes:
|
|
632
|
+
ref_vars = self._get_referenced_variables(ret_node)
|
|
633
|
+
for var in ref_vars:
|
|
634
|
+
if var in local_tainted:
|
|
635
|
+
returns_taint_from.add(i)
|
|
636
|
+
break
|
|
637
|
+
if i in returns_taint_from:
|
|
638
|
+
break
|
|
639
|
+
|
|
640
|
+
# Check if taint reaches any sink pattern
|
|
641
|
+
for sink_pattern in rule.sinks:
|
|
642
|
+
sink_matches = self.matcher.find_all(sink_pattern, body)
|
|
643
|
+
for match in sink_matches:
|
|
644
|
+
if match.node:
|
|
645
|
+
ref_vars = self._get_referenced_variables(match.node)
|
|
646
|
+
for var in ref_vars:
|
|
647
|
+
if var in local_tainted:
|
|
648
|
+
internal_sinks.append(InternalSink(
|
|
649
|
+
sink_pattern=sink_pattern.pattern_text,
|
|
650
|
+
param_indices={i},
|
|
651
|
+
line=match.line,
|
|
652
|
+
rule_id=rule.id,
|
|
653
|
+
))
|
|
654
|
+
break
|
|
655
|
+
|
|
656
|
+
# Merge internal sinks with same line
|
|
657
|
+
merged_sinks: Dict[int, InternalSink] = {}
|
|
658
|
+
for sink in internal_sinks:
|
|
659
|
+
if sink.line in merged_sinks:
|
|
660
|
+
merged_sinks[sink.line].param_indices |= sink.param_indices
|
|
661
|
+
else:
|
|
662
|
+
merged_sinks[sink.line] = sink
|
|
663
|
+
internal_sinks = list(merged_sinks.values())
|
|
664
|
+
|
|
665
|
+
return FunctionSummary(
|
|
666
|
+
name=func_node.name or "",
|
|
667
|
+
parameters=params,
|
|
668
|
+
returns_taint_from=returns_taint_from,
|
|
669
|
+
returns_source=returns_source,
|
|
670
|
+
source_pattern=source_pattern_str,
|
|
671
|
+
internal_sinks=internal_sinks,
|
|
672
|
+
has_sanitizer=has_sanitizer,
|
|
673
|
+
line=func_node.line,
|
|
674
|
+
end_line=func_node.end_line,
|
|
675
|
+
node=func_node,
|
|
676
|
+
)
|
|
677
|
+
|
|
678
|
+
def _collect_assignments_in_scope(self, node: GenericNode) -> List[VariableAssignment]:
|
|
679
|
+
"""Collect assignments within a scope, stopping at nested FUNCTION_DEF boundaries."""
|
|
680
|
+
assignments: List[VariableAssignment] = []
|
|
681
|
+
|
|
682
|
+
def walk(n: GenericNode):
|
|
683
|
+
if n.kind == NodeKind.FUNCTION_DEF and n is not node:
|
|
684
|
+
return # Don't descend into nested functions
|
|
685
|
+
if n.kind == NodeKind.ASSIGNMENT:
|
|
686
|
+
target = self._get_assignment_target(n)
|
|
687
|
+
source_vars = self._get_referenced_variables(n)
|
|
688
|
+
if target:
|
|
689
|
+
source_vars.discard(target)
|
|
690
|
+
assignments.append(VariableAssignment(
|
|
691
|
+
target=target,
|
|
692
|
+
source_vars=source_vars,
|
|
693
|
+
line=n.line,
|
|
694
|
+
column=n.column,
|
|
695
|
+
node=n,
|
|
696
|
+
))
|
|
697
|
+
for child in n.children:
|
|
698
|
+
walk(child)
|
|
699
|
+
|
|
700
|
+
walk(node)
|
|
701
|
+
return assignments
|
|
702
|
+
|
|
703
|
+
def _propagate_taint_local(self, tainted: Dict[str, TaintedVariable],
|
|
704
|
+
assignments: List[VariableAssignment],
|
|
705
|
+
rule: TaintRule,
|
|
706
|
+
existing_summaries: Dict[str, FunctionSummary]):
|
|
707
|
+
"""Propagate taint through assignments locally (within a function body)."""
|
|
708
|
+
changed = True
|
|
709
|
+
iterations = 0
|
|
710
|
+
max_iterations = 100
|
|
711
|
+
|
|
712
|
+
while changed and iterations < max_iterations:
|
|
713
|
+
changed = False
|
|
714
|
+
iterations += 1
|
|
715
|
+
|
|
716
|
+
for assignment in assignments:
|
|
717
|
+
if assignment.target in tainted:
|
|
718
|
+
continue
|
|
719
|
+
if self._is_assignment_sanitized(assignment, rule):
|
|
720
|
+
continue
|
|
721
|
+
|
|
722
|
+
# Part A: Direct variable propagation
|
|
723
|
+
for source_var in assignment.source_vars:
|
|
724
|
+
if source_var in tainted:
|
|
725
|
+
source_taint = tainted[source_var]
|
|
726
|
+
new_path = source_taint.propagation_path.copy()
|
|
727
|
+
new_path.append(f"Line {assignment.line}: {assignment.target} = ... {source_var} ...")
|
|
728
|
+
tainted[assignment.target] = TaintedVariable(
|
|
729
|
+
name=assignment.target,
|
|
730
|
+
source_pattern=source_taint.source_pattern,
|
|
731
|
+
source_line=source_taint.source_line,
|
|
732
|
+
propagation_path=new_path,
|
|
733
|
+
)
|
|
734
|
+
changed = True
|
|
735
|
+
break
|
|
736
|
+
|
|
737
|
+
if assignment.target in tainted:
|
|
738
|
+
continue
|
|
739
|
+
|
|
740
|
+
# Part B: Inter-procedural return propagation
|
|
741
|
+
call_nodes = assignment.node.find_all(NodeKind.CALL)
|
|
742
|
+
for call_node in call_nodes:
|
|
743
|
+
callee = self._resolve_callee_name(call_node)
|
|
744
|
+
if not callee or callee not in existing_summaries:
|
|
745
|
+
continue
|
|
746
|
+
summary = existing_summaries[callee]
|
|
747
|
+
if summary.has_sanitizer:
|
|
748
|
+
continue
|
|
749
|
+
|
|
750
|
+
# Check if callee returns a source
|
|
751
|
+
if summary.returns_source:
|
|
752
|
+
tainted[assignment.target] = TaintedVariable(
|
|
753
|
+
name=assignment.target,
|
|
754
|
+
source_pattern=summary.source_pattern or "function_source",
|
|
755
|
+
source_line=summary.line,
|
|
756
|
+
propagation_path=[f"Source via {callee}()"],
|
|
757
|
+
)
|
|
758
|
+
changed = True
|
|
759
|
+
break
|
|
760
|
+
|
|
761
|
+
# Check if any tainted arg flows to return
|
|
762
|
+
for idx, param_name in enumerate(summary.parameters):
|
|
763
|
+
if idx >= len(call_node.args):
|
|
764
|
+
continue
|
|
765
|
+
if idx not in summary.returns_taint_from:
|
|
766
|
+
continue
|
|
767
|
+
arg_node = call_node.args[idx]
|
|
768
|
+
arg_vars = self._get_referenced_variables(arg_node)
|
|
769
|
+
for av in arg_vars:
|
|
770
|
+
if av in tainted:
|
|
771
|
+
source_taint = tainted[av]
|
|
772
|
+
new_path = source_taint.propagation_path.copy()
|
|
773
|
+
new_path.append(f"Line {assignment.line}: {assignment.target} = {callee}({av}) [inter-procedural]")
|
|
774
|
+
tainted[assignment.target] = TaintedVariable(
|
|
775
|
+
name=assignment.target,
|
|
776
|
+
source_pattern=source_taint.source_pattern,
|
|
777
|
+
source_line=source_taint.source_line,
|
|
778
|
+
propagation_path=new_path,
|
|
779
|
+
)
|
|
780
|
+
changed = True
|
|
781
|
+
break
|
|
782
|
+
if assignment.target in tainted:
|
|
783
|
+
break
|
|
784
|
+
if assignment.target in tainted:
|
|
785
|
+
break
|
|
786
|
+
|
|
787
|
+
# ----------------------------------------------------------------
|
|
788
|
+
# Integration: check internal sinks at call sites
|
|
789
|
+
# ----------------------------------------------------------------
|
|
790
|
+
|
|
791
|
+
def _check_internal_sinks(self, ast: GenericNode, rule: TaintRule,
|
|
792
|
+
summaries: Dict[str, FunctionSummary]) -> List[Finding]:
|
|
793
|
+
"""Check tainted args at call sites against callee internal sinks."""
|
|
794
|
+
findings: List[Finding] = []
|
|
795
|
+
call_nodes = ast.find_all(NodeKind.CALL)
|
|
796
|
+
|
|
797
|
+
for call_node in call_nodes:
|
|
798
|
+
callee = self._resolve_callee_name(call_node)
|
|
799
|
+
if not callee or callee not in summaries:
|
|
800
|
+
continue
|
|
801
|
+
summary = summaries[callee]
|
|
802
|
+
if summary.has_sanitizer:
|
|
803
|
+
continue
|
|
804
|
+
|
|
805
|
+
for idx, param_name in enumerate(summary.parameters):
|
|
806
|
+
if idx >= len(call_node.args):
|
|
807
|
+
continue
|
|
808
|
+
arg_node = call_node.args[idx]
|
|
809
|
+
arg_vars = self._get_referenced_variables(arg_node)
|
|
810
|
+
for av in arg_vars:
|
|
811
|
+
if av not in self.tainted:
|
|
812
|
+
continue
|
|
813
|
+
taint_info = self.tainted[av]
|
|
814
|
+
for isink in summary.internal_sinks:
|
|
815
|
+
if idx in isink.param_indices:
|
|
816
|
+
path_str = " -> ".join(taint_info.propagation_path[-3:])
|
|
817
|
+
message = (
|
|
818
|
+
f"{rule.message}\n\n"
|
|
819
|
+
f"Taint flow: {path_str}\n\n"
|
|
820
|
+
f"Tainted variable '{av}' flows to sink inside {callee}() [inter-procedural]."
|
|
821
|
+
)
|
|
822
|
+
findings.append(Finding(
|
|
823
|
+
rule_id=rule.id,
|
|
824
|
+
rule_name=rule.name,
|
|
825
|
+
message=message,
|
|
826
|
+
severity=rule.severity,
|
|
827
|
+
line=call_node.line,
|
|
828
|
+
column=call_node.column,
|
|
829
|
+
text=call_node.text,
|
|
830
|
+
end_line=call_node.end_line,
|
|
831
|
+
end_column=call_node.end_column,
|
|
832
|
+
metavariables={},
|
|
833
|
+
metadata={
|
|
834
|
+
**rule.metadata,
|
|
835
|
+
'taint_source': taint_info.source_pattern,
|
|
836
|
+
'taint_source_line': taint_info.source_line,
|
|
837
|
+
'tainted_variable': av,
|
|
838
|
+
'inter_procedural': True,
|
|
839
|
+
'callee': callee,
|
|
840
|
+
},
|
|
841
|
+
))
|
|
842
|
+
break
|
|
843
|
+
|
|
844
|
+
return findings
|
|
845
|
+
|
|
341
846
|
|
|
342
847
|
def analyze_taint(ast: GenericNode, rules: List[TaintRule]) -> List[Finding]:
|
|
343
848
|
"""Convenience function to run taint analysis"""
|