apisec-code-bolt 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. apisec_code_bolt/__init__.py +42 -0
  2. apisec_code_bolt/__main__.py +11 -0
  3. apisec_code_bolt/analysis/__init__.py +96 -0
  4. apisec_code_bolt/analysis/analyzer.py +2309 -0
  5. apisec_code_bolt/analysis/binding_tracker.py +341 -0
  6. apisec_code_bolt/analysis/call_graph.py +1197 -0
  7. apisec_code_bolt/analysis/call_graph_types.py +332 -0
  8. apisec_code_bolt/analysis/call_resolver.py +988 -0
  9. apisec_code_bolt/analysis/capability_tagger.py +322 -0
  10. apisec_code_bolt/analysis/config_scanner.py +197 -0
  11. apisec_code_bolt/analysis/data_flow.py +1883 -0
  12. apisec_code_bolt/analysis/dependency_extractor.py +959 -0
  13. apisec_code_bolt/analysis/flow_analysis.py +1406 -0
  14. apisec_code_bolt/analysis/hof_catalog.py +61 -0
  15. apisec_code_bolt/analysis/integration_detector.py +1399 -0
  16. apisec_code_bolt/analysis/literal_scanner.py +300 -0
  17. apisec_code_bolt/analysis/path_normalizer.py +55 -0
  18. apisec_code_bolt/analysis/read_site_detector.py +310 -0
  19. apisec_code_bolt/analysis/request_patterns.py +162 -0
  20. apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
  21. apisec_code_bolt/analysis/sink_evidence.py +333 -0
  22. apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
  23. apisec_code_bolt/cli/__init__.py +5 -0
  24. apisec_code_bolt/cli/exit_codes.py +17 -0
  25. apisec_code_bolt/cli/main.py +1069 -0
  26. apisec_code_bolt/cloud/__init__.py +1 -0
  27. apisec_code_bolt/cloud/apisec_client.py +118 -0
  28. apisec_code_bolt/cloud/client.py +255 -0
  29. apisec_code_bolt/core/__init__.py +75 -0
  30. apisec_code_bolt/core/config.py +528 -0
  31. apisec_code_bolt/core/credentials.py +65 -0
  32. apisec_code_bolt/core/discovery.py +433 -0
  33. apisec_code_bolt/core/log_format.py +115 -0
  34. apisec_code_bolt/core/manifest.py +1009 -0
  35. apisec_code_bolt/core/repo.py +280 -0
  36. apisec_code_bolt/core/state.py +59 -0
  37. apisec_code_bolt/core/telemetry.py +451 -0
  38. apisec_code_bolt/core/types.py +587 -0
  39. apisec_code_bolt/fingerprinting/__init__.py +1 -0
  40. apisec_code_bolt/frameworks/__init__.py +29 -0
  41. apisec_code_bolt/frameworks/_jwt_common.py +50 -0
  42. apisec_code_bolt/frameworks/auth_helpers.py +437 -0
  43. apisec_code_bolt/frameworks/base.py +608 -0
  44. apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
  45. apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
  46. apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
  47. apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
  48. apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
  49. apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
  50. apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
  51. apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
  52. apisec_code_bolt/frameworks/java/__init__.py +6 -0
  53. apisec_code_bolt/frameworks/java/_annotations.py +167 -0
  54. apisec_code_bolt/frameworks/java/_constraints.py +128 -0
  55. apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
  56. apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
  57. apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
  58. apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
  59. apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
  60. apisec_code_bolt/frameworks/js/__init__.py +8 -0
  61. apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
  62. apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
  63. apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
  64. apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
  65. apisec_code_bolt/frameworks/python/__init__.py +19 -0
  66. apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
  67. apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
  68. apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
  69. apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
  70. apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
  71. apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
  72. apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
  73. apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
  74. apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
  75. apisec_code_bolt/parsing/__init__.py +62 -0
  76. apisec_code_bolt/parsing/base.py +554 -0
  77. apisec_code_bolt/parsing/csharp/__init__.py +5 -0
  78. apisec_code_bolt/parsing/csharp/language_services.py +203 -0
  79. apisec_code_bolt/parsing/csharp/literals.py +72 -0
  80. apisec_code_bolt/parsing/csharp/parser.py +1158 -0
  81. apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
  82. apisec_code_bolt/parsing/js/__init__.py +5 -0
  83. apisec_code_bolt/parsing/js/language_services.py +118 -0
  84. apisec_code_bolt/parsing/js/parser.py +622 -0
  85. apisec_code_bolt/parsing/jvm/__init__.py +7 -0
  86. apisec_code_bolt/parsing/jvm/language_services.py +270 -0
  87. apisec_code_bolt/parsing/jvm/parser.py +774 -0
  88. apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
  89. apisec_code_bolt/parsing/python/__init__.py +150 -0
  90. apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
  91. apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
  92. apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
  93. apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
  94. apisec_code_bolt/parsing/python/expression_utils.py +221 -0
  95. apisec_code_bolt/parsing/python/extraction_types.py +271 -0
  96. apisec_code_bolt/parsing/python/language_services.py +487 -0
  97. apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
  98. apisec_code_bolt/parsing/python/parser.py +719 -0
  99. apisec_code_bolt/parsing/python/path_resolver.py +576 -0
  100. apisec_code_bolt/parsing/python/router_registry.py +806 -0
  101. apisec_code_bolt/parsing/python/type_resolver.py +730 -0
  102. apisec_code_bolt/parsing/python/visitors.py +1544 -0
  103. apisec_code_bolt/parsing/services.py +544 -0
  104. apisec_code_bolt/query/__init__.py +1 -0
  105. apisec_code_bolt/query/ast_cache.py +182 -0
  106. apisec_code_bolt/query/executor.py +283 -0
  107. apisec_code_bolt/query/handlers.py +832 -0
  108. apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
  109. apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
  110. apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
  111. apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,832 @@
1
+ """
2
+ Verification query handlers.
3
+
4
+ Each handler answers one type of verification question by inspecting
5
+ the project's source code through the ASTCache. Handlers return
6
+ structured answers (never raw source code) to maintain zero code egress.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import ast
12
+ import contextlib
13
+ import logging
14
+ import re
15
+ import signal
16
+ from dataclasses import dataclass, field
17
+ from typing import Any, Protocol
18
+
19
+ from .ast_cache import ASTCache
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ _REGEX_TIMEOUT_SECONDS = 2
24
+
25
+
26
+ def _safe_re_search(
27
+ pattern: str,
28
+ text: str,
29
+ flags: int = 0,
30
+ ) -> re.Match[str] | None:
31
+ """Run re.search with a timeout guard against catastrophic backtracking."""
32
+ try:
33
+ compiled = re.compile(pattern, flags)
34
+ except re.error:
35
+ logger.warning("Invalid regex pattern rejected: %s", pattern)
36
+ return None
37
+
38
+ def _timeout_handler(signum: int, frame: Any) -> None:
39
+ raise TimeoutError("Regex execution timed out")
40
+
41
+ old_handler = None
42
+ try:
43
+ old_handler = signal.signal(signal.SIGALRM, _timeout_handler)
44
+ signal.alarm(_REGEX_TIMEOUT_SECONDS)
45
+ result = compiled.search(text)
46
+ signal.alarm(0)
47
+ return result
48
+ except TimeoutError:
49
+ logger.warning("Regex timed out (possible ReDoS): %s", pattern[:100])
50
+ return None
51
+ except (ValueError, OSError):
52
+ return compiled.search(text)
53
+ finally:
54
+ signal.alarm(0)
55
+ if old_handler is not None:
56
+ with contextlib.suppress(ValueError, OSError):
57
+ signal.signal(signal.SIGALRM, old_handler)
58
+
59
+
60
+ def _safe_re_finditer(
61
+ pattern: str,
62
+ text: str,
63
+ flags: int = 0,
64
+ ) -> list[re.Match[str]]:
65
+ """Run re.finditer with a timeout guard against catastrophic backtracking."""
66
+ try:
67
+ compiled = re.compile(pattern, flags)
68
+ except re.error:
69
+ logger.warning("Invalid regex pattern rejected: %s", pattern)
70
+ return []
71
+
72
+ def _timeout_handler(signum: int, frame: Any) -> None:
73
+ raise TimeoutError("Regex execution timed out")
74
+
75
+ old_handler = None
76
+ try:
77
+ old_handler = signal.signal(signal.SIGALRM, _timeout_handler)
78
+ signal.alarm(_REGEX_TIMEOUT_SECONDS)
79
+ results = list(compiled.finditer(text))
80
+ signal.alarm(0)
81
+ return results
82
+ except TimeoutError:
83
+ logger.warning("Regex timed out (possible ReDoS): %s", pattern[:100])
84
+ return []
85
+ except (ValueError, OSError):
86
+ return list(compiled.finditer(text))
87
+ finally:
88
+ signal.alarm(0)
89
+ if old_handler is not None:
90
+ with contextlib.suppress(ValueError, OSError):
91
+ signal.signal(signal.SIGALRM, old_handler)
92
+
93
+
94
+ # ------------------------------------------------------------------
95
+ # Shared types
96
+ # ------------------------------------------------------------------
97
+
98
+
99
+ @dataclass
100
+ class QuestionInput:
101
+ """Normalized input for a handler, built from the cloud question."""
102
+
103
+ id: str
104
+ type: str
105
+ gate_id: str
106
+ finding_id: str
107
+ target_file: str | None = None
108
+ target_function: str | None = None
109
+ target_line: int | None = None
110
+ params: dict[str, Any] = field(default_factory=dict)
111
+
112
+
113
+ @dataclass
114
+ class AnswerOutput:
115
+ """Structured answer to return (no raw source code leaves the probe)."""
116
+
117
+ question_id: str
118
+ result: str # found | not_found | partial | error | inconclusive
119
+ evidence: dict[str, Any] = field(default_factory=dict)
120
+ confidence: float = 1.0
121
+ details: str | None = None
122
+
123
+
124
+ class Handler(Protocol):
125
+ def handle(
126
+ self,
127
+ question: QuestionInput,
128
+ cache: ASTCache,
129
+ ) -> AnswerOutput: ...
130
+
131
+
132
+ # ------------------------------------------------------------------
133
+ # Handler registry
134
+ # ------------------------------------------------------------------
135
+
136
+
137
+ _HANDLERS: dict[str, Handler] = {}
138
+
139
+
140
+ def get_handler(question_type: str) -> Handler | None:
141
+ return _HANDLERS.get(question_type)
142
+
143
+
144
+ def _register(qtype: str):
145
+ def decorator(cls):
146
+ _HANDLERS[qtype] = cls()
147
+ return cls
148
+
149
+ return decorator
150
+
151
+
152
+ # ------------------------------------------------------------------
153
+ # GUARD_CHECK
154
+ # ------------------------------------------------------------------
155
+
156
+
157
+ @_register("GUARD_CHECK")
158
+ class GuardCheckHandler:
159
+ """Search for auth/validation guards protecting a function or call site."""
160
+
161
+ def handle(self, question: QuestionInput, cache: ASTCache) -> AnswerOutput:
162
+ guard_patterns: list[str] = question.params.get("guard_patterns", [])
163
+ if not guard_patterns:
164
+ return AnswerOutput(
165
+ question_id=question.id,
166
+ result="error",
167
+ details="No guard_patterns provided in question params",
168
+ )
169
+
170
+ target_file = question.target_file
171
+ target_function = question.target_function
172
+ if not target_file:
173
+ return AnswerOutput(
174
+ question_id=question.id,
175
+ result="error",
176
+ details="No target_file specified",
177
+ )
178
+
179
+ source = cache.get_source(target_file)
180
+ if source is None:
181
+ return AnswerOutput(
182
+ question_id=question.id,
183
+ result="error",
184
+ details=f"File not found: {target_file}",
185
+ )
186
+
187
+ fn_node = None
188
+ if target_function:
189
+ fn_node = cache.get_function_node(target_file, target_function)
190
+
191
+ matches: list[dict[str, Any]] = []
192
+
193
+ if fn_node:
194
+ matches.extend(self._check_decorators(fn_node, guard_patterns))
195
+ matches.extend(self._check_function_body(fn_node, source, guard_patterns))
196
+ else:
197
+ matches.extend(self._check_file_region(source, question.target_line, guard_patterns))
198
+
199
+ if not matches:
200
+ return AnswerOutput(
201
+ question_id=question.id,
202
+ result="not_found",
203
+ confidence=0.8,
204
+ details="No guard patterns found in target scope",
205
+ )
206
+
207
+ all_certain = all(m.get("certainty") == "definite" for m in matches)
208
+ return AnswerOutput(
209
+ question_id=question.id,
210
+ result="found" if all_certain else "partial",
211
+ evidence={"guards": matches},
212
+ confidence=0.9 if all_certain else 0.6,
213
+ details=f"Found {len(matches)} guard indicator(s)",
214
+ )
215
+
216
+ @staticmethod
217
+ def _check_decorators(
218
+ fn_node: ast.FunctionDef | ast.AsyncFunctionDef,
219
+ patterns: list[str],
220
+ ) -> list[dict[str, Any]]:
221
+ hits: list[dict[str, Any]] = []
222
+ for dec in fn_node.decorator_list:
223
+ dec_name = _decorator_name(dec)
224
+ if not dec_name:
225
+ continue
226
+ for pattern in patterns:
227
+ if _safe_re_search(pattern, dec_name, re.IGNORECASE):
228
+ hits.append(
229
+ {
230
+ "type": "decorator",
231
+ "name": dec_name,
232
+ "line": dec.lineno,
233
+ "pattern": pattern,
234
+ "certainty": "definite",
235
+ }
236
+ )
237
+ return hits
238
+
239
+ @staticmethod
240
+ def _check_function_body(
241
+ fn_node: ast.FunctionDef | ast.AsyncFunctionDef,
242
+ source: str,
243
+ patterns: list[str],
244
+ ) -> list[dict[str, Any]]:
245
+ hits: list[dict[str, Any]] = []
246
+ lines = source.splitlines()
247
+ start = fn_node.lineno - 1
248
+ end = fn_node.end_lineno or fn_node.lineno
249
+ body_lines = lines[start:end]
250
+
251
+ for i, line in enumerate(body_lines, start=fn_node.lineno):
252
+ for pattern in patterns:
253
+ if _safe_re_search(pattern, line, re.IGNORECASE):
254
+ hits.append(
255
+ {
256
+ "type": "code_reference",
257
+ "line": i,
258
+ "pattern": pattern,
259
+ "certainty": "probable",
260
+ }
261
+ )
262
+ break
263
+ return hits
264
+
265
+ @staticmethod
266
+ def _check_file_region(
267
+ source: str,
268
+ target_line: int | None,
269
+ patterns: list[str],
270
+ ) -> list[dict[str, Any]]:
271
+ hits: list[dict[str, Any]] = []
272
+ lines = source.splitlines()
273
+ center = (target_line or 1) - 1
274
+ window_start = max(0, center - 20)
275
+ window_end = min(len(lines), center + 20)
276
+
277
+ for i in range(window_start, window_end):
278
+ for pattern in patterns:
279
+ if _safe_re_search(pattern, lines[i], re.IGNORECASE):
280
+ hits.append(
281
+ {
282
+ "type": "code_reference",
283
+ "line": i + 1,
284
+ "pattern": pattern,
285
+ "certainty": "probable",
286
+ }
287
+ )
288
+ break
289
+ return hits
290
+
291
+
292
+ # ------------------------------------------------------------------
293
+ # CODE_PATTERN
294
+ # ------------------------------------------------------------------
295
+
296
+
297
+ @_register("CODE_PATTERN")
298
+ class CodePatternHandler:
299
+ """Run regex patterns against a file or region."""
300
+
301
+ def handle(self, question: QuestionInput, cache: ASTCache) -> AnswerOutput:
302
+ pattern = question.params.get("pattern") or question.params.get("check")
303
+ if not pattern:
304
+ return AnswerOutput(
305
+ question_id=question.id,
306
+ result="error",
307
+ details="No pattern specified",
308
+ )
309
+
310
+ target_file = question.target_file
311
+ if not target_file:
312
+ return AnswerOutput(
313
+ question_id=question.id,
314
+ result="error",
315
+ details="No target_file specified",
316
+ )
317
+
318
+ source = cache.get_source(target_file)
319
+ if source is None:
320
+ return AnswerOutput(
321
+ question_id=question.id,
322
+ result="error",
323
+ details=f"File not found: {target_file}",
324
+ )
325
+
326
+ if question.target_function:
327
+ fn_src = cache.get_function_source(target_file, question.target_function)
328
+ if fn_src:
329
+ source = fn_src
330
+
331
+ regex = _pattern_to_regex(pattern)
332
+ matches = _safe_re_finditer(regex, source, re.IGNORECASE | re.MULTILINE)
333
+
334
+ if not matches:
335
+ return AnswerOutput(
336
+ question_id=question.id,
337
+ result="not_found",
338
+ confidence=0.9,
339
+ details=f"Pattern '{pattern}' not found",
340
+ )
341
+
342
+ hit_lines = []
343
+ source.splitlines()
344
+ for m in matches[:10]:
345
+ line_no = source[: m.start()].count("\n") + 1
346
+ hit_lines.append({"line": line_no, "matched_text_length": len(m.group())})
347
+
348
+ return AnswerOutput(
349
+ question_id=question.id,
350
+ result="found",
351
+ evidence={"match_count": len(matches), "hits": hit_lines},
352
+ confidence=0.9,
353
+ details=f"Pattern matched {len(matches)} time(s)",
354
+ )
355
+
356
+
357
+ # ------------------------------------------------------------------
358
+ # CALL_PATH
359
+ # ------------------------------------------------------------------
360
+
361
+
362
+ @_register("CALL_PATH")
363
+ class CallPathHandler:
364
+ """Check if function A transitively calls function B via AST analysis."""
365
+
366
+ def handle(self, question: QuestionInput, cache: ASTCache) -> AnswerOutput:
367
+ source_fn = question.params.get("source_function")
368
+ target_fn = question.params.get("target_function") or question.target_function
369
+ if not source_fn or not target_fn:
370
+ return AnswerOutput(
371
+ question_id=question.id,
372
+ result="error",
373
+ details="source_function and target_function required",
374
+ )
375
+
376
+ target_file = question.target_file
377
+ if not target_file:
378
+ return AnswerOutput(
379
+ question_id=question.id,
380
+ result="error",
381
+ details="No target_file specified",
382
+ )
383
+
384
+ tree = cache.get_ast(target_file)
385
+ if tree is None:
386
+ return AnswerOutput(
387
+ question_id=question.id,
388
+ result="error",
389
+ details=f"Cannot parse {target_file}",
390
+ )
391
+
392
+ calls_map = _build_local_call_map(tree)
393
+ path = _find_call_path(calls_map, source_fn, target_fn, max_depth=10)
394
+
395
+ if path:
396
+ has_dynamic = any(calls_map.get(fn, {}).get("has_dynamic", False) for fn in path)
397
+ result = "partial" if has_dynamic else "found"
398
+ return AnswerOutput(
399
+ question_id=question.id,
400
+ result=result,
401
+ evidence={"path": path, "has_dynamic_dispatch": has_dynamic},
402
+ confidence=0.7 if has_dynamic else 0.9,
403
+ details=f"Call path: {' -> '.join(path)}",
404
+ )
405
+
406
+ return AnswerOutput(
407
+ question_id=question.id,
408
+ result="not_found",
409
+ confidence=0.7,
410
+ details=f"No call path from {source_fn} to {target_fn} within file",
411
+ )
412
+
413
+
414
+ # ------------------------------------------------------------------
415
+ # CONTEXT
416
+ # ------------------------------------------------------------------
417
+
418
+
419
+ @_register("CONTEXT")
420
+ class ContextHandler:
421
+ """Extract structured metadata about a function (zero code egress)."""
422
+
423
+ def handle(self, question: QuestionInput, cache: ASTCache) -> AnswerOutput:
424
+ target_file = question.target_file
425
+ target_function = question.target_function
426
+ if not target_file or not target_function:
427
+ return AnswerOutput(
428
+ question_id=question.id,
429
+ result="error",
430
+ details="target_file and target_function required",
431
+ )
432
+
433
+ fn_node = cache.get_function_node(target_file, target_function)
434
+ if fn_node is None:
435
+ return AnswerOutput(
436
+ question_id=question.id,
437
+ result="not_found",
438
+ details=f"Function {target_function} not found in {target_file}",
439
+ )
440
+
441
+ decorators = [_decorator_name(d) for d in fn_node.decorator_list]
442
+ params = [
443
+ {"name": a.arg, "annotation": _annotation_str(a.annotation)} for a in fn_node.args.args
444
+ ]
445
+ return_type = _annotation_str(fn_node.returns)
446
+
447
+ called_functions: list[str] = []
448
+ assignments: list[dict[str, str]] = []
449
+ control_flow: list[str] = []
450
+ has_try_except = False
451
+
452
+ for node in ast.walk(fn_node):
453
+ if isinstance(node, ast.Call):
454
+ name = _call_name(node)
455
+ if name:
456
+ called_functions.append(name)
457
+ elif isinstance(node, ast.Assign):
458
+ for target in node.targets:
459
+ if isinstance(target, ast.Name):
460
+ assignments.append({"name": target.id, "line": node.lineno})
461
+ elif isinstance(node, ast.If):
462
+ control_flow.append(f"if@L{node.lineno}")
463
+ elif isinstance(node, ast.For):
464
+ control_flow.append(f"for@L{node.lineno}")
465
+ elif isinstance(node, ast.While):
466
+ control_flow.append(f"while@L{node.lineno}")
467
+ elif isinstance(node, ast.Try):
468
+ has_try_except = True
469
+
470
+ return AnswerOutput(
471
+ question_id=question.id,
472
+ result="found",
473
+ evidence={
474
+ "function_name": target_function,
475
+ "file": target_file,
476
+ "line_start": fn_node.lineno,
477
+ "line_end": fn_node.end_lineno,
478
+ "decorators": [d for d in decorators if d],
479
+ "parameters": params,
480
+ "return_type": return_type,
481
+ "called_functions": list(dict.fromkeys(called_functions)),
482
+ "assignments": assignments[:20],
483
+ "control_flow": control_flow,
484
+ "has_try_except": has_try_except,
485
+ "is_async": isinstance(fn_node, ast.AsyncFunctionDef),
486
+ },
487
+ confidence=1.0,
488
+ details=f"Context for {target_function}: {len(called_functions)} calls, {len(params)} params",
489
+ )
490
+
491
+
492
+ # ------------------------------------------------------------------
493
+ # DATA_FLOW
494
+ # ------------------------------------------------------------------
495
+
496
+
497
+ @_register("DATA_FLOW")
498
+ class DataFlowHandler:
499
+ """Trace data flow from a sink backward to find sources."""
500
+
501
+ def handle(self, question: QuestionInput, cache: ASTCache) -> AnswerOutput:
502
+ target_file = question.target_file
503
+ sink_name = question.params.get("sink_function") or question.target_function
504
+ if not target_file or not sink_name:
505
+ return AnswerOutput(
506
+ question_id=question.id,
507
+ result="error",
508
+ details="target_file and sink_function required",
509
+ )
510
+
511
+ tree = cache.get_ast(target_file)
512
+ if tree is None:
513
+ return AnswerOutput(
514
+ question_id=question.id,
515
+ result="error",
516
+ details=f"Cannot parse {target_file}",
517
+ )
518
+
519
+ sink_calls = _find_calls_to(tree, sink_name)
520
+ if not sink_calls:
521
+ return AnswerOutput(
522
+ question_id=question.id,
523
+ result="not_found",
524
+ confidence=0.8,
525
+ details=f"No calls to {sink_name} found in {target_file}",
526
+ )
527
+
528
+ traces: list[dict[str, Any]] = []
529
+ for call_node in sink_calls:
530
+ args = _extract_call_args(call_node)
531
+ for arg in args:
532
+ origin = _trace_backward(tree, arg["name"], call_node.lineno)
533
+ traces.append(
534
+ {
535
+ "sink": sink_name,
536
+ "sink_line": call_node.lineno,
537
+ "argument": arg["name"],
538
+ "argument_position": arg["position"],
539
+ "origin": origin,
540
+ }
541
+ )
542
+
543
+ if not traces:
544
+ return AnswerOutput(
545
+ question_id=question.id,
546
+ result="not_found",
547
+ confidence=0.7,
548
+ details="Calls found but no traceable arguments",
549
+ )
550
+
551
+ user_controlled = any(t["origin"].get("is_parameter", False) for t in traces)
552
+
553
+ return AnswerOutput(
554
+ question_id=question.id,
555
+ result="found" if user_controlled else "partial",
556
+ evidence={
557
+ "traces": traces[:10],
558
+ "user_controlled_input": user_controlled,
559
+ },
560
+ confidence=0.7 if user_controlled else 0.5,
561
+ details=f"Traced {len(traces)} argument(s) to {sink_name}",
562
+ )
563
+
564
+
565
+ # ------------------------------------------------------------------
566
+ # TYPE_INFO
567
+ # ------------------------------------------------------------------
568
+
569
+
570
+ @_register("TYPE_INFO")
571
+ class TypeInfoHandler:
572
+ """Infer the type of a variable from annotations and assignments."""
573
+
574
+ def handle(self, question: QuestionInput, cache: ASTCache) -> AnswerOutput:
575
+ target_file = question.target_file
576
+ variable = question.params.get("variable")
577
+ scope_function = question.params.get("scope_function") or question.target_function
578
+ if not target_file or not variable:
579
+ return AnswerOutput(
580
+ question_id=question.id,
581
+ result="error",
582
+ details="target_file and variable required",
583
+ )
584
+
585
+ tree = cache.get_ast(target_file)
586
+ if tree is None:
587
+ return AnswerOutput(
588
+ question_id=question.id,
589
+ result="error",
590
+ details=f"Cannot parse {target_file}",
591
+ )
592
+
593
+ scope_node: ast.AST = tree
594
+ if scope_function:
595
+ fn = cache.get_function_node(target_file, scope_function)
596
+ if fn:
597
+ scope_node = fn
598
+
599
+ annotation = self._find_annotation(scope_node, variable)
600
+ assignment_type = self._infer_from_assignment(scope_node, variable)
601
+
602
+ if annotation:
603
+ return AnswerOutput(
604
+ question_id=question.id,
605
+ result="found",
606
+ evidence={
607
+ "variable": variable,
608
+ "annotation": annotation,
609
+ "inferred_from": "type_annotation",
610
+ },
611
+ confidence=0.95,
612
+ details=f"{variable}: {annotation}",
613
+ )
614
+
615
+ if assignment_type:
616
+ return AnswerOutput(
617
+ question_id=question.id,
618
+ result="found",
619
+ evidence={
620
+ "variable": variable,
621
+ "inferred_type": assignment_type,
622
+ "inferred_from": "assignment",
623
+ },
624
+ confidence=0.6,
625
+ details=f"{variable} inferred as {assignment_type}",
626
+ )
627
+
628
+ return AnswerOutput(
629
+ question_id=question.id,
630
+ result="not_found",
631
+ confidence=0.5,
632
+ details=f"Cannot determine type of {variable}",
633
+ )
634
+
635
+ @staticmethod
636
+ def _find_annotation(scope: ast.AST, variable: str) -> str | None:
637
+ for node in ast.walk(scope):
638
+ if isinstance(node, ast.AnnAssign) and isinstance(node.target, ast.Name):
639
+ if node.target.id == variable:
640
+ return _annotation_str(node.annotation)
641
+
642
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
643
+ for arg in node.args.args:
644
+ if arg.arg == variable and arg.annotation:
645
+ return _annotation_str(arg.annotation)
646
+
647
+ if node.returns and variable == "return":
648
+ return _annotation_str(node.returns)
649
+ return None
650
+
651
+ @staticmethod
652
+ def _infer_from_assignment(scope: ast.AST, variable: str) -> str | None:
653
+ for node in ast.walk(scope):
654
+ if isinstance(node, ast.Assign):
655
+ for target in node.targets:
656
+ if isinstance(target, ast.Name) and target.id == variable:
657
+ return _infer_value_type(node.value)
658
+ return None
659
+
660
+
661
+ # ------------------------------------------------------------------
662
+ # AST helpers (shared across handlers)
663
+ # ------------------------------------------------------------------
664
+
665
+
666
+ def _decorator_name(node: ast.expr) -> str | None:
667
+ if isinstance(node, ast.Name):
668
+ return node.id
669
+ if isinstance(node, ast.Attribute):
670
+ return (
671
+ f"{_decorator_name(node.value)}.{node.attr}"
672
+ if isinstance(node.value, (ast.Name, ast.Attribute))
673
+ else node.attr
674
+ )
675
+ if isinstance(node, ast.Call):
676
+ return _decorator_name(node.func)
677
+ return None
678
+
679
+
680
+ def _annotation_str(node: ast.expr | None) -> str | None:
681
+ if node is None:
682
+ return None
683
+ return ast.unparse(node)
684
+
685
+
686
+ def _call_name(node: ast.Call) -> str | None:
687
+ if isinstance(node.func, ast.Name):
688
+ return node.func.id
689
+ if isinstance(node.func, ast.Attribute):
690
+ return node.func.attr
691
+ return None
692
+
693
+
694
+ def _build_local_call_map(tree: ast.Module) -> dict[str, dict[str, Any]]:
695
+ """Build a map of function_name -> {calls: [names], has_dynamic: bool}."""
696
+ result: dict[str, dict[str, Any]] = {}
697
+ for node in ast.walk(tree):
698
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
699
+ calls: list[str] = []
700
+ has_dynamic = False
701
+ for child in ast.walk(node):
702
+ if isinstance(child, ast.Call):
703
+ name = _call_name(child)
704
+ if name:
705
+ calls.append(name)
706
+ else:
707
+ has_dynamic = True
708
+ result[node.name] = {"calls": calls, "has_dynamic": has_dynamic}
709
+ return result
710
+
711
+
712
+ def _find_call_path(
713
+ calls_map: dict[str, dict[str, Any]],
714
+ source: str,
715
+ target: str,
716
+ max_depth: int = 10,
717
+ ) -> list[str] | None:
718
+ """BFS through the local call map."""
719
+ if source not in calls_map:
720
+ return None
721
+
722
+ from collections import deque
723
+
724
+ queue: deque[list[str]] = deque([[source]])
725
+ visited: set[str] = {source}
726
+
727
+ while queue:
728
+ path = queue.popleft()
729
+ if len(path) > max_depth:
730
+ continue
731
+ current = path[-1]
732
+ for callee in calls_map.get(current, {}).get("calls", []):
733
+ if callee == target:
734
+ return path + [callee]
735
+ if callee not in visited and callee in calls_map:
736
+ visited.add(callee)
737
+ queue.append(path + [callee])
738
+ return None
739
+
740
+
741
+ def _find_calls_to(tree: ast.Module, func_name: str) -> list[ast.Call]:
742
+ """Find all Call nodes that call the given function (by short name)."""
743
+ results = []
744
+ short_name = func_name.split(".")[-1]
745
+ for node in ast.walk(tree):
746
+ if isinstance(node, ast.Call):
747
+ name = _call_name(node)
748
+ if name and (name in (short_name, func_name)):
749
+ results.append(node)
750
+ return results
751
+
752
+
753
+ def _extract_call_args(call: ast.Call) -> list[dict[str, Any]]:
754
+ """Extract argument names/positions from a Call node."""
755
+ args: list[dict[str, Any]] = []
756
+ for i, arg in enumerate(call.args):
757
+ if isinstance(arg, ast.Name):
758
+ args.append({"name": arg.id, "position": i})
759
+ elif isinstance(arg, ast.Attribute):
760
+ args.append({"name": ast.unparse(arg), "position": i})
761
+ for kw in call.keywords:
762
+ if kw.arg and isinstance(kw.value, ast.Name):
763
+ args.append({"name": kw.value.id, "position": kw.arg})
764
+ return args
765
+
766
+
767
+ def _trace_backward(
768
+ tree: ast.Module,
769
+ variable: str,
770
+ before_line: int,
771
+ ) -> dict[str, Any]:
772
+ """Trace a variable backward to find its origin."""
773
+ for node in ast.walk(tree):
774
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
775
+ for arg in node.args.args:
776
+ if arg.arg == variable:
777
+ return {
778
+ "type": "function_parameter",
779
+ "name": variable,
780
+ "is_parameter": True,
781
+ "line": node.lineno,
782
+ }
783
+
784
+ assignments: list[ast.Assign] = []
785
+ for node in ast.walk(tree):
786
+ if isinstance(node, ast.Assign) and node.lineno < before_line:
787
+ for target in node.targets:
788
+ if isinstance(target, ast.Name) and target.id == variable:
789
+ assignments.append(node)
790
+
791
+ if assignments:
792
+ last = assignments[-1]
793
+ return {
794
+ "type": "assignment",
795
+ "name": variable,
796
+ "is_parameter": False,
797
+ "line": last.lineno,
798
+ "value_type": _infer_value_type(last.value),
799
+ }
800
+
801
+ return {"type": "unknown", "name": variable, "is_parameter": False}
802
+
803
+
804
+ def _infer_value_type(node: ast.expr) -> str:
805
+ if isinstance(node, ast.Constant):
806
+ return type(node.value).__name__
807
+ if isinstance(node, ast.Call):
808
+ name = _call_name(node)
809
+ return f"call:{name}" if name else "call:unknown"
810
+ if isinstance(node, ast.List):
811
+ return "list"
812
+ if isinstance(node, ast.Dict):
813
+ return "dict"
814
+ if isinstance(node, ast.Set):
815
+ return "set"
816
+ if isinstance(node, ast.Tuple):
817
+ return "tuple"
818
+ if isinstance(node, ast.Name):
819
+ return f"ref:{node.id}"
820
+ return "unknown"
821
+
822
+
823
+ def _pattern_to_regex(pattern: str) -> str:
824
+ """Convert a check name or raw regex to a proper regex.
825
+
826
+ Known check names are expanded; everything else is used as-is.
827
+ """
828
+ known = {
829
+ "sql_parameterized": r"(%s|\?|:\w+|\$\d+)",
830
+ "orm_query_builder": r"\.(filter|where|select|join|group_by|order_by)\s*\(",
831
+ }
832
+ return known.get(pattern, pattern)