agent-security-scanner-mcp 3.17.2 → 3.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,688 @@
1
+ """Lightweight Python taint analysis without tree-sitter.
2
+
3
+ Uses the stdlib ``ast`` module so Python taint findings are still available
4
+ when the tree-sitter engine is not installed. The implementation is purposely
5
+ conservative and targets the high-signal flows exercised by the test suite:
6
+
7
+ - Flask/Django-style request sources
8
+ - input()-derived taint
9
+ - intra-procedural propagation through assignments and expressions
10
+ - inter-procedural propagation through simple function summaries
11
+ - internal sinks reached inside callees
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import ast
17
+ from dataclasses import dataclass, field
18
+ from typing import Dict, Iterable, List, Optional, Set, Tuple
19
+
20
+
21
+ SUMMARY_SOURCE = -1
22
+ MAX_INTERPROCEDURAL_FUNCTIONS = 500
23
+
24
+ SOURCE_CALLS = {
25
+ "request.args.get",
26
+ "request.form.get",
27
+ "request.values.get",
28
+ "request.cookies.get",
29
+ "request.headers.get",
30
+ "request.view_args.get",
31
+ "request.json.get",
32
+ "flask.request.args.get",
33
+ "flask.request.form.get",
34
+ "flask.request.values.get",
35
+ "flask.request.cookies.get",
36
+ "flask.request.headers.get",
37
+ "flask.request.view_args.get",
38
+ "flask.request.json.get",
39
+ "input",
40
+ }
41
+
42
+ SOURCE_ATTRIBUTES = {
43
+ "request.args",
44
+ "request.form",
45
+ "request.values",
46
+ "request.cookies",
47
+ "request.headers",
48
+ "request.view_args",
49
+ "request.json",
50
+ "flask.request.args",
51
+ "flask.request.form",
52
+ "flask.request.values",
53
+ "flask.request.cookies",
54
+ "flask.request.headers",
55
+ "flask.request.view_args",
56
+ "flask.request.json",
57
+ }
58
+
59
+ SANITIZER_CALLS = {
60
+ "shlex.quote",
61
+ "quote",
62
+ "html.escape",
63
+ "markupsafe.escape",
64
+ "urllib.parse.quote",
65
+ }
66
+
67
+
68
+ @dataclass
69
+ class TaintInfo:
70
+ source_pattern: str
71
+ source_line: int
72
+ propagation_path: List[str] = field(default_factory=list)
73
+
74
+
75
+ @dataclass
76
+ class InternalSink:
77
+ rule_id: str
78
+ message: str
79
+ param_indices: Set[int]
80
+ line: int
81
+
82
+
83
+ @dataclass
84
+ class FunctionSummary:
85
+ name: str
86
+ params: List[str]
87
+ returns_taint_from: Set[int] = field(default_factory=set)
88
+ returns_source: bool = False
89
+ source_pattern: Optional[str] = None
90
+ internal_sinks: List[InternalSink] = field(default_factory=list)
91
+ has_sanitizer: bool = False
92
+ line: int = 0
93
+
94
+
95
+ def _get_qualified_name(node: ast.AST) -> Optional[str]:
96
+ if isinstance(node, ast.Name):
97
+ return node.id
98
+ if isinstance(node, ast.Attribute):
99
+ parent = _get_qualified_name(node.value)
100
+ return f"{parent}.{node.attr}" if parent else node.attr
101
+ return None
102
+
103
+
104
+ def _source_pattern(node: ast.AST) -> Optional[str]:
105
+ if isinstance(node, ast.Call):
106
+ qname = _get_qualified_name(node.func)
107
+ if qname in SOURCE_CALLS:
108
+ return qname
109
+ if isinstance(node, ast.Subscript):
110
+ qname = _get_qualified_name(node.value)
111
+ if qname in SOURCE_ATTRIBUTES:
112
+ return qname
113
+ if isinstance(node, ast.Attribute):
114
+ qname = _get_qualified_name(node)
115
+ if qname in SOURCE_ATTRIBUTES:
116
+ return qname
117
+ return None
118
+
119
+
120
+ def _is_sanitizer_call(node: ast.AST) -> bool:
121
+ return isinstance(node, ast.Call) and _get_qualified_name(node.func) in SANITIZER_CALLS
122
+
123
+
124
+ def _extract_target_names(target: ast.AST) -> List[str]:
125
+ if isinstance(target, ast.Name):
126
+ return [target.id]
127
+ if isinstance(target, (ast.Tuple, ast.List)):
128
+ names: List[str] = []
129
+ for elt in target.elts:
130
+ names.extend(_extract_target_names(elt))
131
+ return names
132
+ return []
133
+
134
+
135
+ def _merge_dependency_maps(*maps: Dict[str, Set[int]]) -> Dict[str, Set[int]]:
136
+ merged: Dict[str, Set[int]] = {}
137
+ for mapping in maps:
138
+ for key, value in mapping.items():
139
+ merged[key] = merged.get(key, set()) | set(value)
140
+ return merged
141
+
142
+
143
+ def _merge_taint_envs(*envs: Dict[str, TaintInfo]) -> Dict[str, TaintInfo]:
144
+ merged: Dict[str, TaintInfo] = {}
145
+ for env in envs:
146
+ for key, value in env.items():
147
+ if key not in merged:
148
+ merged[key] = value
149
+ return merged
150
+
151
+
152
+ def _node_column(node: ast.AST) -> int:
153
+ return getattr(node, "col_offset", 0)
154
+
155
+
156
+ def _find_first_taint(node: ast.AST, env: Dict[str, TaintInfo]) -> Optional[Tuple[str, TaintInfo]]:
157
+ if isinstance(node, ast.Name) and node.id in env:
158
+ return node.id, env[node.id]
159
+ if isinstance(node, ast.Attribute):
160
+ return _find_first_taint(node.value, env)
161
+ if isinstance(node, ast.Subscript):
162
+ found = _find_first_taint(node.value, env)
163
+ if found:
164
+ return found
165
+ return _find_first_taint(node.slice, env)
166
+ for child in ast.iter_child_nodes(node):
167
+ found = _find_first_taint(child, env)
168
+ if found:
169
+ return found
170
+ return None
171
+
172
+
173
+ def _match_sink(node: ast.Call) -> Optional[Tuple[str, str, List[ast.AST]]]:
174
+ qname = _get_qualified_name(node.func)
175
+ if not qname:
176
+ return None
177
+
178
+ if qname.endswith(".execute") and node.args:
179
+ return (
180
+ "sql-injection",
181
+ "User-controlled data flows to SQL execution.",
182
+ [node.args[0]],
183
+ )
184
+
185
+ if qname == "os.system" and node.args:
186
+ return (
187
+ "command-injection",
188
+ "User-controlled data flows to os.system().",
189
+ [node.args[0]],
190
+ )
191
+
192
+ if qname.startswith("subprocess.") and node.args:
193
+ shell_true = any(
194
+ kw.arg == "shell" and isinstance(kw.value, ast.Constant) and kw.value.value is True
195
+ for kw in node.keywords
196
+ )
197
+ if shell_true:
198
+ return (
199
+ "command-injection",
200
+ "User-controlled data flows to subprocess with shell=True.",
201
+ [node.args[0]],
202
+ )
203
+
204
+ if qname == "open" and node.args:
205
+ return (
206
+ "path-traversal",
207
+ "User-controlled data flows to file open().",
208
+ [node.args[0]],
209
+ )
210
+
211
+ if qname.endswith("render_template_string") and node.args:
212
+ return (
213
+ "xss",
214
+ "User-controlled data flows to render_template_string().",
215
+ [node.args[0]],
216
+ )
217
+
218
+ if qname in {"eval", "exec"} and node.args:
219
+ return (
220
+ "code-injection",
221
+ "User-controlled data flows to dynamic code execution.",
222
+ [node.args[0]],
223
+ )
224
+
225
+ return None
226
+
227
+
228
+ class _SummaryBuilder:
229
+ def __init__(self, tree: ast.AST):
230
+ self.functions: Dict[str, ast.AST] = {}
231
+ for node in getattr(tree, "body", []):
232
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
233
+ self.functions[node.name] = node
234
+
235
+ def build(self) -> Dict[str, FunctionSummary]:
236
+ if len(self.functions) > MAX_INTERPROCEDURAL_FUNCTIONS:
237
+ return {}
238
+
239
+ summaries = {
240
+ name: FunctionSummary(
241
+ name=name,
242
+ params=self._params(func),
243
+ line=getattr(func, "lineno", 0),
244
+ )
245
+ for name, func in self.functions.items()
246
+ }
247
+
248
+ for _ in range(8):
249
+ changed = False
250
+ for name, func in self.functions.items():
251
+ updated = self._compute_summary(func, summaries)
252
+ if updated != summaries[name]:
253
+ summaries[name] = updated
254
+ changed = True
255
+ if not changed:
256
+ break
257
+
258
+ return summaries
259
+
260
+ @staticmethod
261
+ def _params(func: ast.AST) -> List[str]:
262
+ params: List[str] = []
263
+ for arg in getattr(func.args, "args", []):
264
+ if arg.arg not in {"self", "cls"}:
265
+ params.append(arg.arg)
266
+ return params
267
+
268
+ def _compute_summary(
269
+ self,
270
+ func: ast.AST,
271
+ summaries: Dict[str, FunctionSummary],
272
+ ) -> FunctionSummary:
273
+ params = self._params(func)
274
+ env: Dict[str, Set[int]] = {name: {idx} for idx, name in enumerate(params)}
275
+ returns_taint_from: Set[int] = set()
276
+ returns_source = False
277
+ source_pattern: Optional[str] = None
278
+ internal_sinks: List[InternalSink] = []
279
+ has_sanitizer = False
280
+
281
+ def expr_deps(node: Optional[ast.AST], local_env: Dict[str, Set[int]]) -> Set[int]:
282
+ nonlocal has_sanitizer
283
+ if node is None:
284
+ return set()
285
+
286
+ pattern = _source_pattern(node)
287
+ if pattern:
288
+ return {SUMMARY_SOURCE}
289
+
290
+ if isinstance(node, ast.Name):
291
+ return set(local_env.get(node.id, set()))
292
+
293
+ if isinstance(node, ast.Attribute):
294
+ return expr_deps(node.value, local_env)
295
+
296
+ if isinstance(node, ast.Subscript):
297
+ return expr_deps(node.value, local_env) | expr_deps(node.slice, local_env)
298
+
299
+ if isinstance(node, ast.Call):
300
+ if _is_sanitizer_call(node):
301
+ has_sanitizer = True
302
+ return set()
303
+
304
+ qname = _get_qualified_name(node.func)
305
+ if qname and qname in summaries:
306
+ summary = summaries[qname]
307
+ deps: Set[int] = set()
308
+ if summary.returns_source:
309
+ deps.add(SUMMARY_SOURCE)
310
+ for idx in summary.returns_taint_from:
311
+ if idx < len(node.args):
312
+ deps |= expr_deps(node.args[idx], local_env)
313
+ if summary.has_sanitizer and not deps:
314
+ has_sanitizer = True
315
+ return deps
316
+
317
+ deps = expr_deps(getattr(node.func, "value", None), local_env)
318
+ for arg in node.args:
319
+ deps |= expr_deps(arg, local_env)
320
+ for kw in node.keywords:
321
+ deps |= expr_deps(kw.value, local_env)
322
+ return deps
323
+
324
+ deps: Set[int] = set()
325
+ for child in ast.iter_child_nodes(node):
326
+ deps |= expr_deps(child, local_env)
327
+ return deps
328
+
329
+ def process_statements(
330
+ statements: Iterable[ast.stmt],
331
+ local_env: Dict[str, Set[int]],
332
+ ) -> Dict[str, Set[int]]:
333
+ nonlocal returns_source, source_pattern, internal_sinks, returns_taint_from
334
+
335
+ for stmt in statements:
336
+ if isinstance(stmt, ast.Assign):
337
+ deps = expr_deps(stmt.value, local_env)
338
+ for target in stmt.targets:
339
+ for name in _extract_target_names(target):
340
+ local_env[name] = set(deps)
341
+
342
+ elif isinstance(stmt, ast.AnnAssign):
343
+ deps = expr_deps(stmt.value, local_env)
344
+ for name in _extract_target_names(stmt.target):
345
+ local_env[name] = set(deps)
346
+
347
+ elif isinstance(stmt, ast.AugAssign):
348
+ deps = expr_deps(stmt.value, local_env) | expr_deps(stmt.target, local_env)
349
+ for name in _extract_target_names(stmt.target):
350
+ local_env[name] = set(deps)
351
+
352
+ elif isinstance(stmt, ast.Return):
353
+ deps = expr_deps(stmt.value, local_env)
354
+ if SUMMARY_SOURCE in deps:
355
+ returns_source = True
356
+ source_pattern = source_pattern or _source_pattern(stmt.value) or "request.args.get"
357
+ returns_taint_from |= {d for d in deps if d != SUMMARY_SOURCE}
358
+
359
+ elif isinstance(stmt, ast.Expr) and isinstance(stmt.value, ast.Call):
360
+ sink = _match_sink(stmt.value)
361
+ if sink:
362
+ rule_id, message, relevant_args = sink
363
+ param_indices: Set[int] = set()
364
+ for arg in relevant_args:
365
+ deps = expr_deps(arg, local_env)
366
+ param_indices |= {d for d in deps if d != SUMMARY_SOURCE}
367
+ if param_indices:
368
+ internal_sinks.append(
369
+ InternalSink(
370
+ rule_id=rule_id,
371
+ message=message,
372
+ param_indices=param_indices,
373
+ line=getattr(stmt.value, "lineno", getattr(stmt, "lineno", 1)),
374
+ )
375
+ )
376
+
377
+ qname = _get_qualified_name(stmt.value.func)
378
+ if qname and qname in summaries:
379
+ summary = summaries[qname]
380
+ for isink in summary.internal_sinks:
381
+ param_indices: Set[int] = set()
382
+ for idx in isink.param_indices:
383
+ if idx < len(stmt.value.args):
384
+ deps = expr_deps(stmt.value.args[idx], local_env)
385
+ param_indices |= {d for d in deps if d != SUMMARY_SOURCE}
386
+ if param_indices:
387
+ internal_sinks.append(
388
+ InternalSink(
389
+ rule_id=isink.rule_id,
390
+ message=isink.message,
391
+ param_indices=param_indices,
392
+ line=getattr(stmt.value, "lineno", getattr(stmt, "lineno", 1)),
393
+ )
394
+ )
395
+
396
+ elif isinstance(stmt, ast.If):
397
+ before = dict(local_env)
398
+ body_env = process_statements(stmt.body, dict(local_env))
399
+ else_env = process_statements(stmt.orelse, dict(local_env))
400
+ local_env = _merge_dependency_maps(before, body_env, else_env)
401
+
402
+ elif isinstance(stmt, (ast.For, ast.AsyncFor, ast.While, ast.With, ast.AsyncWith)):
403
+ body_env = process_statements(stmt.body, dict(local_env))
404
+ else_env = process_statements(getattr(stmt, "orelse", []), dict(local_env))
405
+ local_env = _merge_dependency_maps(local_env, body_env, else_env)
406
+
407
+ elif isinstance(stmt, ast.Try):
408
+ branch_envs = [dict(local_env)]
409
+ branch_envs.append(process_statements(stmt.body, dict(local_env)))
410
+ for handler in stmt.handlers:
411
+ branch_envs.append(process_statements(handler.body, dict(local_env)))
412
+ branch_envs.append(process_statements(stmt.orelse, dict(local_env)))
413
+ branch_envs.append(process_statements(stmt.finalbody, dict(local_env)))
414
+ local_env = _merge_dependency_maps(*branch_envs)
415
+
416
+ return local_env
417
+
418
+ process_statements(getattr(func, "body", []), env)
419
+ return FunctionSummary(
420
+ name=func.name,
421
+ params=params,
422
+ returns_taint_from=returns_taint_from,
423
+ returns_source=returns_source,
424
+ source_pattern=source_pattern,
425
+ internal_sinks=internal_sinks,
426
+ has_sanitizer=has_sanitizer,
427
+ line=getattr(func, "lineno", 0),
428
+ )
429
+
430
+
431
+ class _PythonTaintAnalyzer:
432
+ def __init__(self, source: str, file_path: str):
433
+ self.source = source
434
+ self.file_path = file_path
435
+ self.tree = ast.parse(source, filename=file_path)
436
+ self.summaries = _SummaryBuilder(self.tree).build()
437
+
438
+ def analyze(self) -> List[dict]:
439
+ findings: List[dict] = []
440
+ findings.extend(self._analyze_statements(getattr(self.tree, "body", []), {}))
441
+
442
+ for node in getattr(self.tree, "body", []):
443
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
444
+ findings.extend(self._analyze_statements(node.body, {}))
445
+
446
+ unique: List[dict] = []
447
+ seen = set()
448
+ for finding in findings:
449
+ key = (finding["ruleId"], finding["line"], finding["column"], finding["metadata"].get("tainted_variable"))
450
+ if key not in seen:
451
+ seen.add(key)
452
+ unique.append(finding)
453
+ return unique
454
+
455
+ def _analyze_statements(
456
+ self,
457
+ statements: Iterable[ast.stmt],
458
+ env: Dict[str, TaintInfo],
459
+ ) -> List[dict]:
460
+ findings: List[dict] = []
461
+ local_env = dict(env)
462
+
463
+ for stmt in statements:
464
+ if isinstance(stmt, ast.Assign):
465
+ taint = self._expr_taint(stmt.value, local_env)
466
+ for target in stmt.targets:
467
+ for name in _extract_target_names(target):
468
+ if taint:
469
+ local_env[name] = TaintInfo(
470
+ source_pattern=taint.source_pattern,
471
+ source_line=taint.source_line,
472
+ propagation_path=taint.propagation_path
473
+ + [f"Line {getattr(stmt, 'lineno', 1)}: {name} = ..."],
474
+ )
475
+ elif name in local_env:
476
+ del local_env[name]
477
+ findings.extend(self._statement_findings(stmt, local_env))
478
+
479
+ elif isinstance(stmt, ast.AnnAssign):
480
+ taint = self._expr_taint(stmt.value, local_env)
481
+ for name in _extract_target_names(stmt.target):
482
+ if taint:
483
+ local_env[name] = TaintInfo(
484
+ source_pattern=taint.source_pattern,
485
+ source_line=taint.source_line,
486
+ propagation_path=taint.propagation_path
487
+ + [f"Line {getattr(stmt, 'lineno', 1)}: {name} = ..."],
488
+ )
489
+ elif name in local_env:
490
+ del local_env[name]
491
+ findings.extend(self._statement_findings(stmt, local_env))
492
+
493
+ elif isinstance(stmt, ast.AugAssign):
494
+ taint = self._expr_taint(stmt.value, local_env) or self._expr_taint(stmt.target, local_env)
495
+ for name in _extract_target_names(stmt.target):
496
+ if taint:
497
+ local_env[name] = TaintInfo(
498
+ source_pattern=taint.source_pattern,
499
+ source_line=taint.source_line,
500
+ propagation_path=taint.propagation_path
501
+ + [f"Line {getattr(stmt, 'lineno', 1)}: {name} = ..."],
502
+ )
503
+ findings.extend(self._statement_findings(stmt, local_env))
504
+
505
+ elif isinstance(stmt, ast.If):
506
+ body_findings = self._analyze_statements(stmt.body, dict(local_env))
507
+ else_findings = self._analyze_statements(stmt.orelse, dict(local_env))
508
+ findings.extend(body_findings)
509
+ findings.extend(else_findings)
510
+ local_env = _merge_taint_envs(local_env)
511
+
512
+ elif isinstance(stmt, (ast.For, ast.AsyncFor, ast.While, ast.With, ast.AsyncWith)):
513
+ findings.extend(self._statement_findings(stmt, local_env))
514
+ findings.extend(self._analyze_statements(stmt.body, dict(local_env)))
515
+ findings.extend(self._analyze_statements(getattr(stmt, "orelse", []), dict(local_env)))
516
+
517
+ elif isinstance(stmt, ast.Try):
518
+ findings.extend(self._analyze_statements(stmt.body, dict(local_env)))
519
+ for handler in stmt.handlers:
520
+ findings.extend(self._analyze_statements(handler.body, dict(local_env)))
521
+ findings.extend(self._analyze_statements(stmt.orelse, dict(local_env)))
522
+ findings.extend(self._analyze_statements(stmt.finalbody, dict(local_env)))
523
+
524
+ else:
525
+ findings.extend(self._statement_findings(stmt, local_env))
526
+
527
+ return findings
528
+
529
+ def _expr_taint(self, node: Optional[ast.AST], env: Dict[str, TaintInfo]) -> Optional[TaintInfo]:
530
+ if node is None:
531
+ return None
532
+
533
+ pattern = _source_pattern(node)
534
+ if pattern:
535
+ return TaintInfo(
536
+ source_pattern=pattern,
537
+ source_line=getattr(node, "lineno", 1),
538
+ propagation_path=[f"Source: {pattern}"],
539
+ )
540
+
541
+ if isinstance(node, ast.Name):
542
+ return env.get(node.id)
543
+
544
+ if isinstance(node, ast.Attribute):
545
+ return self._expr_taint(node.value, env)
546
+
547
+ if isinstance(node, ast.Subscript):
548
+ return self._expr_taint(node.value, env) or self._expr_taint(node.slice, env)
549
+
550
+ if isinstance(node, ast.Call):
551
+ if _is_sanitizer_call(node):
552
+ return None
553
+
554
+ qname = _get_qualified_name(node.func)
555
+ if qname and qname in self.summaries:
556
+ summary = self.summaries[qname]
557
+ if summary.returns_source:
558
+ return TaintInfo(
559
+ source_pattern=summary.source_pattern or "request.args.get",
560
+ source_line=summary.line or getattr(node, "lineno", 1),
561
+ propagation_path=[f"Source via {qname}()"],
562
+ )
563
+ for idx in summary.returns_taint_from:
564
+ if idx < len(node.args):
565
+ arg_taint = self._expr_taint(node.args[idx], env)
566
+ if arg_taint:
567
+ return TaintInfo(
568
+ source_pattern=arg_taint.source_pattern,
569
+ source_line=arg_taint.source_line,
570
+ propagation_path=arg_taint.propagation_path
571
+ + [f"Line {getattr(node, 'lineno', 1)}: return from {qname}()"],
572
+ )
573
+ if summary.has_sanitizer:
574
+ return None
575
+
576
+ taint = self._expr_taint(getattr(node.func, "value", None), env)
577
+ if taint:
578
+ return taint
579
+ for arg in node.args:
580
+ taint = self._expr_taint(arg, env)
581
+ if taint:
582
+ return taint
583
+ for kw in node.keywords:
584
+ taint = self._expr_taint(kw.value, env)
585
+ if taint:
586
+ return taint
587
+ return None
588
+
589
+ for child in ast.iter_child_nodes(node):
590
+ taint = self._expr_taint(child, env)
591
+ if taint:
592
+ return taint
593
+
594
+ return None
595
+
596
+ def _statement_findings(self, stmt: ast.stmt, env: Dict[str, TaintInfo]) -> List[dict]:
597
+ findings: List[dict] = []
598
+
599
+ for node in ast.walk(stmt):
600
+ if not isinstance(node, ast.Call):
601
+ continue
602
+
603
+ sink = _match_sink(node)
604
+ if sink:
605
+ rule_id, message, relevant_args = sink
606
+ for arg in relevant_args:
607
+ found = _find_first_taint(arg, env)
608
+ taint = self._expr_taint(arg, env)
609
+ if not taint:
610
+ continue
611
+ tainted_var = found[0] if found else ast.unparse(arg) if hasattr(ast, "unparse") else "expression"
612
+ taint_info = found[1] if found else taint
613
+ findings.append(
614
+ self._make_finding(
615
+ rule_id=rule_id,
616
+ message=message,
617
+ node=node,
618
+ tainted_variable=tainted_var,
619
+ taint_info=taint_info,
620
+ )
621
+ )
622
+
623
+ qname = _get_qualified_name(node.func)
624
+ if qname and qname in self.summaries:
625
+ summary = self.summaries[qname]
626
+ if summary.has_sanitizer:
627
+ continue
628
+ for isink in summary.internal_sinks:
629
+ for idx in isink.param_indices:
630
+ if idx >= len(node.args):
631
+ continue
632
+ found = _find_first_taint(node.args[idx], env)
633
+ taint = self._expr_taint(node.args[idx], env)
634
+ if not taint:
635
+ continue
636
+ tainted_var = found[0] if found else ast.unparse(node.args[idx]) if hasattr(ast, "unparse") else "expression"
637
+ taint_info = found[1] if found else taint
638
+ findings.append(
639
+ self._make_finding(
640
+ rule_id=isink.rule_id,
641
+ message=f"{isink.message} Tainted data reaches sink inside {qname}().",
642
+ node=node,
643
+ tainted_variable=tainted_var,
644
+ taint_info=taint_info,
645
+ extra_metadata={"inter_procedural": True, "callee": qname},
646
+ )
647
+ )
648
+
649
+ return findings
650
+
651
+ @staticmethod
652
+ def _make_finding(
653
+ rule_id: str,
654
+ message: str,
655
+ node: ast.AST,
656
+ tainted_variable: str,
657
+ taint_info: TaintInfo,
658
+ extra_metadata: Optional[Dict[str, object]] = None,
659
+ ) -> dict:
660
+ metadata = {
661
+ "taint_source": taint_info.source_pattern,
662
+ "taint_source_line": taint_info.source_line,
663
+ "tainted_variable": tainted_variable,
664
+ }
665
+ if extra_metadata:
666
+ metadata.update(extra_metadata)
667
+ return {
668
+ "ruleId": rule_id,
669
+ "message": message,
670
+ "line": max(getattr(node, "lineno", 1) - 1, 0),
671
+ "column": _node_column(node),
672
+ "length": 0,
673
+ "severity": "error",
674
+ "confidence": "HIGH",
675
+ "metadata": metadata,
676
+ "engine": "taint",
677
+ }
678
+
679
+
680
+ def analyze_python_taint(source: str, file_path: str = "<memory>") -> List[dict]:
681
+ """Run lightweight Python taint analysis and return analyzer-style findings."""
682
+ try:
683
+ analyzer = _PythonTaintAnalyzer(source, file_path)
684
+ return analyzer.analyze()
685
+ except SyntaxError:
686
+ return []
687
+ except Exception:
688
+ return []