apisec-code-bolt 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apisec_code_bolt/__init__.py +42 -0
- apisec_code_bolt/__main__.py +11 -0
- apisec_code_bolt/analysis/__init__.py +96 -0
- apisec_code_bolt/analysis/analyzer.py +2309 -0
- apisec_code_bolt/analysis/binding_tracker.py +341 -0
- apisec_code_bolt/analysis/call_graph.py +1197 -0
- apisec_code_bolt/analysis/call_graph_types.py +332 -0
- apisec_code_bolt/analysis/call_resolver.py +988 -0
- apisec_code_bolt/analysis/capability_tagger.py +322 -0
- apisec_code_bolt/analysis/config_scanner.py +197 -0
- apisec_code_bolt/analysis/data_flow.py +1883 -0
- apisec_code_bolt/analysis/dependency_extractor.py +959 -0
- apisec_code_bolt/analysis/flow_analysis.py +1406 -0
- apisec_code_bolt/analysis/hof_catalog.py +61 -0
- apisec_code_bolt/analysis/integration_detector.py +1399 -0
- apisec_code_bolt/analysis/literal_scanner.py +300 -0
- apisec_code_bolt/analysis/path_normalizer.py +55 -0
- apisec_code_bolt/analysis/read_site_detector.py +310 -0
- apisec_code_bolt/analysis/request_patterns.py +162 -0
- apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
- apisec_code_bolt/analysis/sink_evidence.py +333 -0
- apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
- apisec_code_bolt/cli/__init__.py +5 -0
- apisec_code_bolt/cli/exit_codes.py +17 -0
- apisec_code_bolt/cli/main.py +1069 -0
- apisec_code_bolt/cloud/__init__.py +1 -0
- apisec_code_bolt/cloud/apisec_client.py +118 -0
- apisec_code_bolt/cloud/client.py +255 -0
- apisec_code_bolt/core/__init__.py +75 -0
- apisec_code_bolt/core/config.py +528 -0
- apisec_code_bolt/core/credentials.py +65 -0
- apisec_code_bolt/core/discovery.py +433 -0
- apisec_code_bolt/core/log_format.py +115 -0
- apisec_code_bolt/core/manifest.py +1009 -0
- apisec_code_bolt/core/repo.py +280 -0
- apisec_code_bolt/core/state.py +59 -0
- apisec_code_bolt/core/telemetry.py +451 -0
- apisec_code_bolt/core/types.py +587 -0
- apisec_code_bolt/fingerprinting/__init__.py +1 -0
- apisec_code_bolt/frameworks/__init__.py +29 -0
- apisec_code_bolt/frameworks/_jwt_common.py +50 -0
- apisec_code_bolt/frameworks/auth_helpers.py +437 -0
- apisec_code_bolt/frameworks/base.py +608 -0
- apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
- apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
- apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
- apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
- apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
- apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
- apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
- apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
- apisec_code_bolt/frameworks/java/__init__.py +6 -0
- apisec_code_bolt/frameworks/java/_annotations.py +167 -0
- apisec_code_bolt/frameworks/java/_constraints.py +128 -0
- apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
- apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
- apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
- apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
- apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
- apisec_code_bolt/frameworks/js/__init__.py +8 -0
- apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
- apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
- apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
- apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
- apisec_code_bolt/frameworks/python/__init__.py +19 -0
- apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
- apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
- apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
- apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
- apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
- apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
- apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
- apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
- apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
- apisec_code_bolt/parsing/__init__.py +62 -0
- apisec_code_bolt/parsing/base.py +554 -0
- apisec_code_bolt/parsing/csharp/__init__.py +5 -0
- apisec_code_bolt/parsing/csharp/language_services.py +203 -0
- apisec_code_bolt/parsing/csharp/literals.py +72 -0
- apisec_code_bolt/parsing/csharp/parser.py +1158 -0
- apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
- apisec_code_bolt/parsing/js/__init__.py +5 -0
- apisec_code_bolt/parsing/js/language_services.py +118 -0
- apisec_code_bolt/parsing/js/parser.py +622 -0
- apisec_code_bolt/parsing/jvm/__init__.py +7 -0
- apisec_code_bolt/parsing/jvm/language_services.py +270 -0
- apisec_code_bolt/parsing/jvm/parser.py +774 -0
- apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
- apisec_code_bolt/parsing/python/__init__.py +150 -0
- apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
- apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
- apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
- apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
- apisec_code_bolt/parsing/python/expression_utils.py +221 -0
- apisec_code_bolt/parsing/python/extraction_types.py +271 -0
- apisec_code_bolt/parsing/python/language_services.py +487 -0
- apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
- apisec_code_bolt/parsing/python/parser.py +719 -0
- apisec_code_bolt/parsing/python/path_resolver.py +576 -0
- apisec_code_bolt/parsing/python/router_registry.py +806 -0
- apisec_code_bolt/parsing/python/type_resolver.py +730 -0
- apisec_code_bolt/parsing/python/visitors.py +1544 -0
- apisec_code_bolt/parsing/services.py +544 -0
- apisec_code_bolt/query/__init__.py +1 -0
- apisec_code_bolt/query/ast_cache.py +182 -0
- apisec_code_bolt/query/executor.py +283 -0
- apisec_code_bolt/query/handlers.py +832 -0
- apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
- apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
- apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
- apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,832 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Verification query handlers.
|
|
3
|
+
|
|
4
|
+
Each handler answers one type of verification question by inspecting
|
|
5
|
+
the project's source code through the ASTCache. Handlers return
|
|
6
|
+
structured answers (never raw source code) to maintain zero code egress.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import ast
|
|
12
|
+
import contextlib
|
|
13
|
+
import logging
|
|
14
|
+
import re
|
|
15
|
+
import signal
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from typing import Any, Protocol
|
|
18
|
+
|
|
19
|
+
from .ast_cache import ASTCache
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
_REGEX_TIMEOUT_SECONDS = 2
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _safe_re_search(
|
|
27
|
+
pattern: str,
|
|
28
|
+
text: str,
|
|
29
|
+
flags: int = 0,
|
|
30
|
+
) -> re.Match[str] | None:
|
|
31
|
+
"""Run re.search with a timeout guard against catastrophic backtracking."""
|
|
32
|
+
try:
|
|
33
|
+
compiled = re.compile(pattern, flags)
|
|
34
|
+
except re.error:
|
|
35
|
+
logger.warning("Invalid regex pattern rejected: %s", pattern)
|
|
36
|
+
return None
|
|
37
|
+
|
|
38
|
+
def _timeout_handler(signum: int, frame: Any) -> None:
|
|
39
|
+
raise TimeoutError("Regex execution timed out")
|
|
40
|
+
|
|
41
|
+
old_handler = None
|
|
42
|
+
try:
|
|
43
|
+
old_handler = signal.signal(signal.SIGALRM, _timeout_handler)
|
|
44
|
+
signal.alarm(_REGEX_TIMEOUT_SECONDS)
|
|
45
|
+
result = compiled.search(text)
|
|
46
|
+
signal.alarm(0)
|
|
47
|
+
return result
|
|
48
|
+
except TimeoutError:
|
|
49
|
+
logger.warning("Regex timed out (possible ReDoS): %s", pattern[:100])
|
|
50
|
+
return None
|
|
51
|
+
except (ValueError, OSError):
|
|
52
|
+
return compiled.search(text)
|
|
53
|
+
finally:
|
|
54
|
+
signal.alarm(0)
|
|
55
|
+
if old_handler is not None:
|
|
56
|
+
with contextlib.suppress(ValueError, OSError):
|
|
57
|
+
signal.signal(signal.SIGALRM, old_handler)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _safe_re_finditer(
|
|
61
|
+
pattern: str,
|
|
62
|
+
text: str,
|
|
63
|
+
flags: int = 0,
|
|
64
|
+
) -> list[re.Match[str]]:
|
|
65
|
+
"""Run re.finditer with a timeout guard against catastrophic backtracking."""
|
|
66
|
+
try:
|
|
67
|
+
compiled = re.compile(pattern, flags)
|
|
68
|
+
except re.error:
|
|
69
|
+
logger.warning("Invalid regex pattern rejected: %s", pattern)
|
|
70
|
+
return []
|
|
71
|
+
|
|
72
|
+
def _timeout_handler(signum: int, frame: Any) -> None:
|
|
73
|
+
raise TimeoutError("Regex execution timed out")
|
|
74
|
+
|
|
75
|
+
old_handler = None
|
|
76
|
+
try:
|
|
77
|
+
old_handler = signal.signal(signal.SIGALRM, _timeout_handler)
|
|
78
|
+
signal.alarm(_REGEX_TIMEOUT_SECONDS)
|
|
79
|
+
results = list(compiled.finditer(text))
|
|
80
|
+
signal.alarm(0)
|
|
81
|
+
return results
|
|
82
|
+
except TimeoutError:
|
|
83
|
+
logger.warning("Regex timed out (possible ReDoS): %s", pattern[:100])
|
|
84
|
+
return []
|
|
85
|
+
except (ValueError, OSError):
|
|
86
|
+
return list(compiled.finditer(text))
|
|
87
|
+
finally:
|
|
88
|
+
signal.alarm(0)
|
|
89
|
+
if old_handler is not None:
|
|
90
|
+
with contextlib.suppress(ValueError, OSError):
|
|
91
|
+
signal.signal(signal.SIGALRM, old_handler)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# ------------------------------------------------------------------
|
|
95
|
+
# Shared types
|
|
96
|
+
# ------------------------------------------------------------------
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@dataclass
|
|
100
|
+
class QuestionInput:
|
|
101
|
+
"""Normalized input for a handler, built from the cloud question."""
|
|
102
|
+
|
|
103
|
+
id: str
|
|
104
|
+
type: str
|
|
105
|
+
gate_id: str
|
|
106
|
+
finding_id: str
|
|
107
|
+
target_file: str | None = None
|
|
108
|
+
target_function: str | None = None
|
|
109
|
+
target_line: int | None = None
|
|
110
|
+
params: dict[str, Any] = field(default_factory=dict)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@dataclass
|
|
114
|
+
class AnswerOutput:
|
|
115
|
+
"""Structured answer to return (no raw source code leaves the probe)."""
|
|
116
|
+
|
|
117
|
+
question_id: str
|
|
118
|
+
result: str # found | not_found | partial | error | inconclusive
|
|
119
|
+
evidence: dict[str, Any] = field(default_factory=dict)
|
|
120
|
+
confidence: float = 1.0
|
|
121
|
+
details: str | None = None
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class Handler(Protocol):
|
|
125
|
+
def handle(
|
|
126
|
+
self,
|
|
127
|
+
question: QuestionInput,
|
|
128
|
+
cache: ASTCache,
|
|
129
|
+
) -> AnswerOutput: ...
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
# ------------------------------------------------------------------
|
|
133
|
+
# Handler registry
|
|
134
|
+
# ------------------------------------------------------------------
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
_HANDLERS: dict[str, Handler] = {}
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def get_handler(question_type: str) -> Handler | None:
|
|
141
|
+
return _HANDLERS.get(question_type)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _register(qtype: str):
|
|
145
|
+
def decorator(cls):
|
|
146
|
+
_HANDLERS[qtype] = cls()
|
|
147
|
+
return cls
|
|
148
|
+
|
|
149
|
+
return decorator
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
# ------------------------------------------------------------------
|
|
153
|
+
# GUARD_CHECK
|
|
154
|
+
# ------------------------------------------------------------------
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
@_register("GUARD_CHECK")
|
|
158
|
+
class GuardCheckHandler:
|
|
159
|
+
"""Search for auth/validation guards protecting a function or call site."""
|
|
160
|
+
|
|
161
|
+
def handle(self, question: QuestionInput, cache: ASTCache) -> AnswerOutput:
|
|
162
|
+
guard_patterns: list[str] = question.params.get("guard_patterns", [])
|
|
163
|
+
if not guard_patterns:
|
|
164
|
+
return AnswerOutput(
|
|
165
|
+
question_id=question.id,
|
|
166
|
+
result="error",
|
|
167
|
+
details="No guard_patterns provided in question params",
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
target_file = question.target_file
|
|
171
|
+
target_function = question.target_function
|
|
172
|
+
if not target_file:
|
|
173
|
+
return AnswerOutput(
|
|
174
|
+
question_id=question.id,
|
|
175
|
+
result="error",
|
|
176
|
+
details="No target_file specified",
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
source = cache.get_source(target_file)
|
|
180
|
+
if source is None:
|
|
181
|
+
return AnswerOutput(
|
|
182
|
+
question_id=question.id,
|
|
183
|
+
result="error",
|
|
184
|
+
details=f"File not found: {target_file}",
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
fn_node = None
|
|
188
|
+
if target_function:
|
|
189
|
+
fn_node = cache.get_function_node(target_file, target_function)
|
|
190
|
+
|
|
191
|
+
matches: list[dict[str, Any]] = []
|
|
192
|
+
|
|
193
|
+
if fn_node:
|
|
194
|
+
matches.extend(self._check_decorators(fn_node, guard_patterns))
|
|
195
|
+
matches.extend(self._check_function_body(fn_node, source, guard_patterns))
|
|
196
|
+
else:
|
|
197
|
+
matches.extend(self._check_file_region(source, question.target_line, guard_patterns))
|
|
198
|
+
|
|
199
|
+
if not matches:
|
|
200
|
+
return AnswerOutput(
|
|
201
|
+
question_id=question.id,
|
|
202
|
+
result="not_found",
|
|
203
|
+
confidence=0.8,
|
|
204
|
+
details="No guard patterns found in target scope",
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
all_certain = all(m.get("certainty") == "definite" for m in matches)
|
|
208
|
+
return AnswerOutput(
|
|
209
|
+
question_id=question.id,
|
|
210
|
+
result="found" if all_certain else "partial",
|
|
211
|
+
evidence={"guards": matches},
|
|
212
|
+
confidence=0.9 if all_certain else 0.6,
|
|
213
|
+
details=f"Found {len(matches)} guard indicator(s)",
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
@staticmethod
|
|
217
|
+
def _check_decorators(
|
|
218
|
+
fn_node: ast.FunctionDef | ast.AsyncFunctionDef,
|
|
219
|
+
patterns: list[str],
|
|
220
|
+
) -> list[dict[str, Any]]:
|
|
221
|
+
hits: list[dict[str, Any]] = []
|
|
222
|
+
for dec in fn_node.decorator_list:
|
|
223
|
+
dec_name = _decorator_name(dec)
|
|
224
|
+
if not dec_name:
|
|
225
|
+
continue
|
|
226
|
+
for pattern in patterns:
|
|
227
|
+
if _safe_re_search(pattern, dec_name, re.IGNORECASE):
|
|
228
|
+
hits.append(
|
|
229
|
+
{
|
|
230
|
+
"type": "decorator",
|
|
231
|
+
"name": dec_name,
|
|
232
|
+
"line": dec.lineno,
|
|
233
|
+
"pattern": pattern,
|
|
234
|
+
"certainty": "definite",
|
|
235
|
+
}
|
|
236
|
+
)
|
|
237
|
+
return hits
|
|
238
|
+
|
|
239
|
+
@staticmethod
|
|
240
|
+
def _check_function_body(
|
|
241
|
+
fn_node: ast.FunctionDef | ast.AsyncFunctionDef,
|
|
242
|
+
source: str,
|
|
243
|
+
patterns: list[str],
|
|
244
|
+
) -> list[dict[str, Any]]:
|
|
245
|
+
hits: list[dict[str, Any]] = []
|
|
246
|
+
lines = source.splitlines()
|
|
247
|
+
start = fn_node.lineno - 1
|
|
248
|
+
end = fn_node.end_lineno or fn_node.lineno
|
|
249
|
+
body_lines = lines[start:end]
|
|
250
|
+
|
|
251
|
+
for i, line in enumerate(body_lines, start=fn_node.lineno):
|
|
252
|
+
for pattern in patterns:
|
|
253
|
+
if _safe_re_search(pattern, line, re.IGNORECASE):
|
|
254
|
+
hits.append(
|
|
255
|
+
{
|
|
256
|
+
"type": "code_reference",
|
|
257
|
+
"line": i,
|
|
258
|
+
"pattern": pattern,
|
|
259
|
+
"certainty": "probable",
|
|
260
|
+
}
|
|
261
|
+
)
|
|
262
|
+
break
|
|
263
|
+
return hits
|
|
264
|
+
|
|
265
|
+
@staticmethod
|
|
266
|
+
def _check_file_region(
|
|
267
|
+
source: str,
|
|
268
|
+
target_line: int | None,
|
|
269
|
+
patterns: list[str],
|
|
270
|
+
) -> list[dict[str, Any]]:
|
|
271
|
+
hits: list[dict[str, Any]] = []
|
|
272
|
+
lines = source.splitlines()
|
|
273
|
+
center = (target_line or 1) - 1
|
|
274
|
+
window_start = max(0, center - 20)
|
|
275
|
+
window_end = min(len(lines), center + 20)
|
|
276
|
+
|
|
277
|
+
for i in range(window_start, window_end):
|
|
278
|
+
for pattern in patterns:
|
|
279
|
+
if _safe_re_search(pattern, lines[i], re.IGNORECASE):
|
|
280
|
+
hits.append(
|
|
281
|
+
{
|
|
282
|
+
"type": "code_reference",
|
|
283
|
+
"line": i + 1,
|
|
284
|
+
"pattern": pattern,
|
|
285
|
+
"certainty": "probable",
|
|
286
|
+
}
|
|
287
|
+
)
|
|
288
|
+
break
|
|
289
|
+
return hits
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
# ------------------------------------------------------------------
|
|
293
|
+
# CODE_PATTERN
|
|
294
|
+
# ------------------------------------------------------------------
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
@_register("CODE_PATTERN")
|
|
298
|
+
class CodePatternHandler:
|
|
299
|
+
"""Run regex patterns against a file or region."""
|
|
300
|
+
|
|
301
|
+
def handle(self, question: QuestionInput, cache: ASTCache) -> AnswerOutput:
|
|
302
|
+
pattern = question.params.get("pattern") or question.params.get("check")
|
|
303
|
+
if not pattern:
|
|
304
|
+
return AnswerOutput(
|
|
305
|
+
question_id=question.id,
|
|
306
|
+
result="error",
|
|
307
|
+
details="No pattern specified",
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
target_file = question.target_file
|
|
311
|
+
if not target_file:
|
|
312
|
+
return AnswerOutput(
|
|
313
|
+
question_id=question.id,
|
|
314
|
+
result="error",
|
|
315
|
+
details="No target_file specified",
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
source = cache.get_source(target_file)
|
|
319
|
+
if source is None:
|
|
320
|
+
return AnswerOutput(
|
|
321
|
+
question_id=question.id,
|
|
322
|
+
result="error",
|
|
323
|
+
details=f"File not found: {target_file}",
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
if question.target_function:
|
|
327
|
+
fn_src = cache.get_function_source(target_file, question.target_function)
|
|
328
|
+
if fn_src:
|
|
329
|
+
source = fn_src
|
|
330
|
+
|
|
331
|
+
regex = _pattern_to_regex(pattern)
|
|
332
|
+
matches = _safe_re_finditer(regex, source, re.IGNORECASE | re.MULTILINE)
|
|
333
|
+
|
|
334
|
+
if not matches:
|
|
335
|
+
return AnswerOutput(
|
|
336
|
+
question_id=question.id,
|
|
337
|
+
result="not_found",
|
|
338
|
+
confidence=0.9,
|
|
339
|
+
details=f"Pattern '{pattern}' not found",
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
hit_lines = []
|
|
343
|
+
source.splitlines()
|
|
344
|
+
for m in matches[:10]:
|
|
345
|
+
line_no = source[: m.start()].count("\n") + 1
|
|
346
|
+
hit_lines.append({"line": line_no, "matched_text_length": len(m.group())})
|
|
347
|
+
|
|
348
|
+
return AnswerOutput(
|
|
349
|
+
question_id=question.id,
|
|
350
|
+
result="found",
|
|
351
|
+
evidence={"match_count": len(matches), "hits": hit_lines},
|
|
352
|
+
confidence=0.9,
|
|
353
|
+
details=f"Pattern matched {len(matches)} time(s)",
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
# ------------------------------------------------------------------
|
|
358
|
+
# CALL_PATH
|
|
359
|
+
# ------------------------------------------------------------------
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
@_register("CALL_PATH")
|
|
363
|
+
class CallPathHandler:
|
|
364
|
+
"""Check if function A transitively calls function B via AST analysis."""
|
|
365
|
+
|
|
366
|
+
def handle(self, question: QuestionInput, cache: ASTCache) -> AnswerOutput:
|
|
367
|
+
source_fn = question.params.get("source_function")
|
|
368
|
+
target_fn = question.params.get("target_function") or question.target_function
|
|
369
|
+
if not source_fn or not target_fn:
|
|
370
|
+
return AnswerOutput(
|
|
371
|
+
question_id=question.id,
|
|
372
|
+
result="error",
|
|
373
|
+
details="source_function and target_function required",
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
target_file = question.target_file
|
|
377
|
+
if not target_file:
|
|
378
|
+
return AnswerOutput(
|
|
379
|
+
question_id=question.id,
|
|
380
|
+
result="error",
|
|
381
|
+
details="No target_file specified",
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
tree = cache.get_ast(target_file)
|
|
385
|
+
if tree is None:
|
|
386
|
+
return AnswerOutput(
|
|
387
|
+
question_id=question.id,
|
|
388
|
+
result="error",
|
|
389
|
+
details=f"Cannot parse {target_file}",
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
calls_map = _build_local_call_map(tree)
|
|
393
|
+
path = _find_call_path(calls_map, source_fn, target_fn, max_depth=10)
|
|
394
|
+
|
|
395
|
+
if path:
|
|
396
|
+
has_dynamic = any(calls_map.get(fn, {}).get("has_dynamic", False) for fn in path)
|
|
397
|
+
result = "partial" if has_dynamic else "found"
|
|
398
|
+
return AnswerOutput(
|
|
399
|
+
question_id=question.id,
|
|
400
|
+
result=result,
|
|
401
|
+
evidence={"path": path, "has_dynamic_dispatch": has_dynamic},
|
|
402
|
+
confidence=0.7 if has_dynamic else 0.9,
|
|
403
|
+
details=f"Call path: {' -> '.join(path)}",
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
return AnswerOutput(
|
|
407
|
+
question_id=question.id,
|
|
408
|
+
result="not_found",
|
|
409
|
+
confidence=0.7,
|
|
410
|
+
details=f"No call path from {source_fn} to {target_fn} within file",
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
# ------------------------------------------------------------------
|
|
415
|
+
# CONTEXT
|
|
416
|
+
# ------------------------------------------------------------------
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
@_register("CONTEXT")
|
|
420
|
+
class ContextHandler:
|
|
421
|
+
"""Extract structured metadata about a function (zero code egress)."""
|
|
422
|
+
|
|
423
|
+
def handle(self, question: QuestionInput, cache: ASTCache) -> AnswerOutput:
|
|
424
|
+
target_file = question.target_file
|
|
425
|
+
target_function = question.target_function
|
|
426
|
+
if not target_file or not target_function:
|
|
427
|
+
return AnswerOutput(
|
|
428
|
+
question_id=question.id,
|
|
429
|
+
result="error",
|
|
430
|
+
details="target_file and target_function required",
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
fn_node = cache.get_function_node(target_file, target_function)
|
|
434
|
+
if fn_node is None:
|
|
435
|
+
return AnswerOutput(
|
|
436
|
+
question_id=question.id,
|
|
437
|
+
result="not_found",
|
|
438
|
+
details=f"Function {target_function} not found in {target_file}",
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
decorators = [_decorator_name(d) for d in fn_node.decorator_list]
|
|
442
|
+
params = [
|
|
443
|
+
{"name": a.arg, "annotation": _annotation_str(a.annotation)} for a in fn_node.args.args
|
|
444
|
+
]
|
|
445
|
+
return_type = _annotation_str(fn_node.returns)
|
|
446
|
+
|
|
447
|
+
called_functions: list[str] = []
|
|
448
|
+
assignments: list[dict[str, str]] = []
|
|
449
|
+
control_flow: list[str] = []
|
|
450
|
+
has_try_except = False
|
|
451
|
+
|
|
452
|
+
for node in ast.walk(fn_node):
|
|
453
|
+
if isinstance(node, ast.Call):
|
|
454
|
+
name = _call_name(node)
|
|
455
|
+
if name:
|
|
456
|
+
called_functions.append(name)
|
|
457
|
+
elif isinstance(node, ast.Assign):
|
|
458
|
+
for target in node.targets:
|
|
459
|
+
if isinstance(target, ast.Name):
|
|
460
|
+
assignments.append({"name": target.id, "line": node.lineno})
|
|
461
|
+
elif isinstance(node, ast.If):
|
|
462
|
+
control_flow.append(f"if@L{node.lineno}")
|
|
463
|
+
elif isinstance(node, ast.For):
|
|
464
|
+
control_flow.append(f"for@L{node.lineno}")
|
|
465
|
+
elif isinstance(node, ast.While):
|
|
466
|
+
control_flow.append(f"while@L{node.lineno}")
|
|
467
|
+
elif isinstance(node, ast.Try):
|
|
468
|
+
has_try_except = True
|
|
469
|
+
|
|
470
|
+
return AnswerOutput(
|
|
471
|
+
question_id=question.id,
|
|
472
|
+
result="found",
|
|
473
|
+
evidence={
|
|
474
|
+
"function_name": target_function,
|
|
475
|
+
"file": target_file,
|
|
476
|
+
"line_start": fn_node.lineno,
|
|
477
|
+
"line_end": fn_node.end_lineno,
|
|
478
|
+
"decorators": [d for d in decorators if d],
|
|
479
|
+
"parameters": params,
|
|
480
|
+
"return_type": return_type,
|
|
481
|
+
"called_functions": list(dict.fromkeys(called_functions)),
|
|
482
|
+
"assignments": assignments[:20],
|
|
483
|
+
"control_flow": control_flow,
|
|
484
|
+
"has_try_except": has_try_except,
|
|
485
|
+
"is_async": isinstance(fn_node, ast.AsyncFunctionDef),
|
|
486
|
+
},
|
|
487
|
+
confidence=1.0,
|
|
488
|
+
details=f"Context for {target_function}: {len(called_functions)} calls, {len(params)} params",
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
# ------------------------------------------------------------------
|
|
493
|
+
# DATA_FLOW
|
|
494
|
+
# ------------------------------------------------------------------
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
@_register("DATA_FLOW")
|
|
498
|
+
class DataFlowHandler:
|
|
499
|
+
"""Trace data flow from a sink backward to find sources."""
|
|
500
|
+
|
|
501
|
+
def handle(self, question: QuestionInput, cache: ASTCache) -> AnswerOutput:
|
|
502
|
+
target_file = question.target_file
|
|
503
|
+
sink_name = question.params.get("sink_function") or question.target_function
|
|
504
|
+
if not target_file or not sink_name:
|
|
505
|
+
return AnswerOutput(
|
|
506
|
+
question_id=question.id,
|
|
507
|
+
result="error",
|
|
508
|
+
details="target_file and sink_function required",
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
tree = cache.get_ast(target_file)
|
|
512
|
+
if tree is None:
|
|
513
|
+
return AnswerOutput(
|
|
514
|
+
question_id=question.id,
|
|
515
|
+
result="error",
|
|
516
|
+
details=f"Cannot parse {target_file}",
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
sink_calls = _find_calls_to(tree, sink_name)
|
|
520
|
+
if not sink_calls:
|
|
521
|
+
return AnswerOutput(
|
|
522
|
+
question_id=question.id,
|
|
523
|
+
result="not_found",
|
|
524
|
+
confidence=0.8,
|
|
525
|
+
details=f"No calls to {sink_name} found in {target_file}",
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
traces: list[dict[str, Any]] = []
|
|
529
|
+
for call_node in sink_calls:
|
|
530
|
+
args = _extract_call_args(call_node)
|
|
531
|
+
for arg in args:
|
|
532
|
+
origin = _trace_backward(tree, arg["name"], call_node.lineno)
|
|
533
|
+
traces.append(
|
|
534
|
+
{
|
|
535
|
+
"sink": sink_name,
|
|
536
|
+
"sink_line": call_node.lineno,
|
|
537
|
+
"argument": arg["name"],
|
|
538
|
+
"argument_position": arg["position"],
|
|
539
|
+
"origin": origin,
|
|
540
|
+
}
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
if not traces:
|
|
544
|
+
return AnswerOutput(
|
|
545
|
+
question_id=question.id,
|
|
546
|
+
result="not_found",
|
|
547
|
+
confidence=0.7,
|
|
548
|
+
details="Calls found but no traceable arguments",
|
|
549
|
+
)
|
|
550
|
+
|
|
551
|
+
user_controlled = any(t["origin"].get("is_parameter", False) for t in traces)
|
|
552
|
+
|
|
553
|
+
return AnswerOutput(
|
|
554
|
+
question_id=question.id,
|
|
555
|
+
result="found" if user_controlled else "partial",
|
|
556
|
+
evidence={
|
|
557
|
+
"traces": traces[:10],
|
|
558
|
+
"user_controlled_input": user_controlled,
|
|
559
|
+
},
|
|
560
|
+
confidence=0.7 if user_controlled else 0.5,
|
|
561
|
+
details=f"Traced {len(traces)} argument(s) to {sink_name}",
|
|
562
|
+
)
|
|
563
|
+
|
|
564
|
+
|
|
565
|
+
# ------------------------------------------------------------------
|
|
566
|
+
# TYPE_INFO
|
|
567
|
+
# ------------------------------------------------------------------
|
|
568
|
+
|
|
569
|
+
|
|
570
|
+
@_register("TYPE_INFO")
|
|
571
|
+
class TypeInfoHandler:
|
|
572
|
+
"""Infer the type of a variable from annotations and assignments."""
|
|
573
|
+
|
|
574
|
+
def handle(self, question: QuestionInput, cache: ASTCache) -> AnswerOutput:
|
|
575
|
+
target_file = question.target_file
|
|
576
|
+
variable = question.params.get("variable")
|
|
577
|
+
scope_function = question.params.get("scope_function") or question.target_function
|
|
578
|
+
if not target_file or not variable:
|
|
579
|
+
return AnswerOutput(
|
|
580
|
+
question_id=question.id,
|
|
581
|
+
result="error",
|
|
582
|
+
details="target_file and variable required",
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
tree = cache.get_ast(target_file)
|
|
586
|
+
if tree is None:
|
|
587
|
+
return AnswerOutput(
|
|
588
|
+
question_id=question.id,
|
|
589
|
+
result="error",
|
|
590
|
+
details=f"Cannot parse {target_file}",
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
scope_node: ast.AST = tree
|
|
594
|
+
if scope_function:
|
|
595
|
+
fn = cache.get_function_node(target_file, scope_function)
|
|
596
|
+
if fn:
|
|
597
|
+
scope_node = fn
|
|
598
|
+
|
|
599
|
+
annotation = self._find_annotation(scope_node, variable)
|
|
600
|
+
assignment_type = self._infer_from_assignment(scope_node, variable)
|
|
601
|
+
|
|
602
|
+
if annotation:
|
|
603
|
+
return AnswerOutput(
|
|
604
|
+
question_id=question.id,
|
|
605
|
+
result="found",
|
|
606
|
+
evidence={
|
|
607
|
+
"variable": variable,
|
|
608
|
+
"annotation": annotation,
|
|
609
|
+
"inferred_from": "type_annotation",
|
|
610
|
+
},
|
|
611
|
+
confidence=0.95,
|
|
612
|
+
details=f"{variable}: {annotation}",
|
|
613
|
+
)
|
|
614
|
+
|
|
615
|
+
if assignment_type:
|
|
616
|
+
return AnswerOutput(
|
|
617
|
+
question_id=question.id,
|
|
618
|
+
result="found",
|
|
619
|
+
evidence={
|
|
620
|
+
"variable": variable,
|
|
621
|
+
"inferred_type": assignment_type,
|
|
622
|
+
"inferred_from": "assignment",
|
|
623
|
+
},
|
|
624
|
+
confidence=0.6,
|
|
625
|
+
details=f"{variable} inferred as {assignment_type}",
|
|
626
|
+
)
|
|
627
|
+
|
|
628
|
+
return AnswerOutput(
|
|
629
|
+
question_id=question.id,
|
|
630
|
+
result="not_found",
|
|
631
|
+
confidence=0.5,
|
|
632
|
+
details=f"Cannot determine type of {variable}",
|
|
633
|
+
)
|
|
634
|
+
|
|
635
|
+
@staticmethod
|
|
636
|
+
def _find_annotation(scope: ast.AST, variable: str) -> str | None:
|
|
637
|
+
for node in ast.walk(scope):
|
|
638
|
+
if isinstance(node, ast.AnnAssign) and isinstance(node.target, ast.Name):
|
|
639
|
+
if node.target.id == variable:
|
|
640
|
+
return _annotation_str(node.annotation)
|
|
641
|
+
|
|
642
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
643
|
+
for arg in node.args.args:
|
|
644
|
+
if arg.arg == variable and arg.annotation:
|
|
645
|
+
return _annotation_str(arg.annotation)
|
|
646
|
+
|
|
647
|
+
if node.returns and variable == "return":
|
|
648
|
+
return _annotation_str(node.returns)
|
|
649
|
+
return None
|
|
650
|
+
|
|
651
|
+
@staticmethod
|
|
652
|
+
def _infer_from_assignment(scope: ast.AST, variable: str) -> str | None:
|
|
653
|
+
for node in ast.walk(scope):
|
|
654
|
+
if isinstance(node, ast.Assign):
|
|
655
|
+
for target in node.targets:
|
|
656
|
+
if isinstance(target, ast.Name) and target.id == variable:
|
|
657
|
+
return _infer_value_type(node.value)
|
|
658
|
+
return None
|
|
659
|
+
|
|
660
|
+
|
|
661
|
+
# ------------------------------------------------------------------
|
|
662
|
+
# AST helpers (shared across handlers)
|
|
663
|
+
# ------------------------------------------------------------------
|
|
664
|
+
|
|
665
|
+
|
|
666
|
+
def _decorator_name(node: ast.expr) -> str | None:
|
|
667
|
+
if isinstance(node, ast.Name):
|
|
668
|
+
return node.id
|
|
669
|
+
if isinstance(node, ast.Attribute):
|
|
670
|
+
return (
|
|
671
|
+
f"{_decorator_name(node.value)}.{node.attr}"
|
|
672
|
+
if isinstance(node.value, (ast.Name, ast.Attribute))
|
|
673
|
+
else node.attr
|
|
674
|
+
)
|
|
675
|
+
if isinstance(node, ast.Call):
|
|
676
|
+
return _decorator_name(node.func)
|
|
677
|
+
return None
|
|
678
|
+
|
|
679
|
+
|
|
680
|
+
def _annotation_str(node: ast.expr | None) -> str | None:
|
|
681
|
+
if node is None:
|
|
682
|
+
return None
|
|
683
|
+
return ast.unparse(node)
|
|
684
|
+
|
|
685
|
+
|
|
686
|
+
def _call_name(node: ast.Call) -> str | None:
|
|
687
|
+
if isinstance(node.func, ast.Name):
|
|
688
|
+
return node.func.id
|
|
689
|
+
if isinstance(node.func, ast.Attribute):
|
|
690
|
+
return node.func.attr
|
|
691
|
+
return None
|
|
692
|
+
|
|
693
|
+
|
|
694
|
+
def _build_local_call_map(tree: ast.Module) -> dict[str, dict[str, Any]]:
|
|
695
|
+
"""Build a map of function_name -> {calls: [names], has_dynamic: bool}."""
|
|
696
|
+
result: dict[str, dict[str, Any]] = {}
|
|
697
|
+
for node in ast.walk(tree):
|
|
698
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
699
|
+
calls: list[str] = []
|
|
700
|
+
has_dynamic = False
|
|
701
|
+
for child in ast.walk(node):
|
|
702
|
+
if isinstance(child, ast.Call):
|
|
703
|
+
name = _call_name(child)
|
|
704
|
+
if name:
|
|
705
|
+
calls.append(name)
|
|
706
|
+
else:
|
|
707
|
+
has_dynamic = True
|
|
708
|
+
result[node.name] = {"calls": calls, "has_dynamic": has_dynamic}
|
|
709
|
+
return result
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
def _find_call_path(
|
|
713
|
+
calls_map: dict[str, dict[str, Any]],
|
|
714
|
+
source: str,
|
|
715
|
+
target: str,
|
|
716
|
+
max_depth: int = 10,
|
|
717
|
+
) -> list[str] | None:
|
|
718
|
+
"""BFS through the local call map."""
|
|
719
|
+
if source not in calls_map:
|
|
720
|
+
return None
|
|
721
|
+
|
|
722
|
+
from collections import deque
|
|
723
|
+
|
|
724
|
+
queue: deque[list[str]] = deque([[source]])
|
|
725
|
+
visited: set[str] = {source}
|
|
726
|
+
|
|
727
|
+
while queue:
|
|
728
|
+
path = queue.popleft()
|
|
729
|
+
if len(path) > max_depth:
|
|
730
|
+
continue
|
|
731
|
+
current = path[-1]
|
|
732
|
+
for callee in calls_map.get(current, {}).get("calls", []):
|
|
733
|
+
if callee == target:
|
|
734
|
+
return path + [callee]
|
|
735
|
+
if callee not in visited and callee in calls_map:
|
|
736
|
+
visited.add(callee)
|
|
737
|
+
queue.append(path + [callee])
|
|
738
|
+
return None
|
|
739
|
+
|
|
740
|
+
|
|
741
|
+
def _find_calls_to(tree: ast.Module, func_name: str) -> list[ast.Call]:
|
|
742
|
+
"""Find all Call nodes that call the given function (by short name)."""
|
|
743
|
+
results = []
|
|
744
|
+
short_name = func_name.split(".")[-1]
|
|
745
|
+
for node in ast.walk(tree):
|
|
746
|
+
if isinstance(node, ast.Call):
|
|
747
|
+
name = _call_name(node)
|
|
748
|
+
if name and (name in (short_name, func_name)):
|
|
749
|
+
results.append(node)
|
|
750
|
+
return results
|
|
751
|
+
|
|
752
|
+
|
|
753
|
+
def _extract_call_args(call: ast.Call) -> list[dict[str, Any]]:
|
|
754
|
+
"""Extract argument names/positions from a Call node."""
|
|
755
|
+
args: list[dict[str, Any]] = []
|
|
756
|
+
for i, arg in enumerate(call.args):
|
|
757
|
+
if isinstance(arg, ast.Name):
|
|
758
|
+
args.append({"name": arg.id, "position": i})
|
|
759
|
+
elif isinstance(arg, ast.Attribute):
|
|
760
|
+
args.append({"name": ast.unparse(arg), "position": i})
|
|
761
|
+
for kw in call.keywords:
|
|
762
|
+
if kw.arg and isinstance(kw.value, ast.Name):
|
|
763
|
+
args.append({"name": kw.value.id, "position": kw.arg})
|
|
764
|
+
return args
|
|
765
|
+
|
|
766
|
+
|
|
767
|
+
def _trace_backward(
|
|
768
|
+
tree: ast.Module,
|
|
769
|
+
variable: str,
|
|
770
|
+
before_line: int,
|
|
771
|
+
) -> dict[str, Any]:
|
|
772
|
+
"""Trace a variable backward to find its origin."""
|
|
773
|
+
for node in ast.walk(tree):
|
|
774
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
775
|
+
for arg in node.args.args:
|
|
776
|
+
if arg.arg == variable:
|
|
777
|
+
return {
|
|
778
|
+
"type": "function_parameter",
|
|
779
|
+
"name": variable,
|
|
780
|
+
"is_parameter": True,
|
|
781
|
+
"line": node.lineno,
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
assignments: list[ast.Assign] = []
|
|
785
|
+
for node in ast.walk(tree):
|
|
786
|
+
if isinstance(node, ast.Assign) and node.lineno < before_line:
|
|
787
|
+
for target in node.targets:
|
|
788
|
+
if isinstance(target, ast.Name) and target.id == variable:
|
|
789
|
+
assignments.append(node)
|
|
790
|
+
|
|
791
|
+
if assignments:
|
|
792
|
+
last = assignments[-1]
|
|
793
|
+
return {
|
|
794
|
+
"type": "assignment",
|
|
795
|
+
"name": variable,
|
|
796
|
+
"is_parameter": False,
|
|
797
|
+
"line": last.lineno,
|
|
798
|
+
"value_type": _infer_value_type(last.value),
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
return {"type": "unknown", "name": variable, "is_parameter": False}
|
|
802
|
+
|
|
803
|
+
|
|
804
|
+
def _infer_value_type(node: ast.expr) -> str:
|
|
805
|
+
if isinstance(node, ast.Constant):
|
|
806
|
+
return type(node.value).__name__
|
|
807
|
+
if isinstance(node, ast.Call):
|
|
808
|
+
name = _call_name(node)
|
|
809
|
+
return f"call:{name}" if name else "call:unknown"
|
|
810
|
+
if isinstance(node, ast.List):
|
|
811
|
+
return "list"
|
|
812
|
+
if isinstance(node, ast.Dict):
|
|
813
|
+
return "dict"
|
|
814
|
+
if isinstance(node, ast.Set):
|
|
815
|
+
return "set"
|
|
816
|
+
if isinstance(node, ast.Tuple):
|
|
817
|
+
return "tuple"
|
|
818
|
+
if isinstance(node, ast.Name):
|
|
819
|
+
return f"ref:{node.id}"
|
|
820
|
+
return "unknown"
|
|
821
|
+
|
|
822
|
+
|
|
823
|
+
def _pattern_to_regex(pattern: str) -> str:
|
|
824
|
+
"""Convert a check name or raw regex to a proper regex.
|
|
825
|
+
|
|
826
|
+
Known check names are expanded; everything else is used as-is.
|
|
827
|
+
"""
|
|
828
|
+
known = {
|
|
829
|
+
"sql_parameterized": r"(%s|\?|:\w+|\$\d+)",
|
|
830
|
+
"orm_query_builder": r"\.(filter|where|select|join|group_by|order_by)\s*\(",
|
|
831
|
+
}
|
|
832
|
+
return known.get(pattern, pattern)
|