apisec-code-bolt 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apisec_code_bolt/__init__.py +42 -0
- apisec_code_bolt/__main__.py +11 -0
- apisec_code_bolt/analysis/__init__.py +96 -0
- apisec_code_bolt/analysis/analyzer.py +2309 -0
- apisec_code_bolt/analysis/binding_tracker.py +341 -0
- apisec_code_bolt/analysis/call_graph.py +1197 -0
- apisec_code_bolt/analysis/call_graph_types.py +332 -0
- apisec_code_bolt/analysis/call_resolver.py +988 -0
- apisec_code_bolt/analysis/capability_tagger.py +322 -0
- apisec_code_bolt/analysis/config_scanner.py +197 -0
- apisec_code_bolt/analysis/data_flow.py +1883 -0
- apisec_code_bolt/analysis/dependency_extractor.py +959 -0
- apisec_code_bolt/analysis/flow_analysis.py +1406 -0
- apisec_code_bolt/analysis/hof_catalog.py +61 -0
- apisec_code_bolt/analysis/integration_detector.py +1399 -0
- apisec_code_bolt/analysis/literal_scanner.py +300 -0
- apisec_code_bolt/analysis/path_normalizer.py +55 -0
- apisec_code_bolt/analysis/read_site_detector.py +310 -0
- apisec_code_bolt/analysis/request_patterns.py +162 -0
- apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
- apisec_code_bolt/analysis/sink_evidence.py +333 -0
- apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
- apisec_code_bolt/cli/__init__.py +5 -0
- apisec_code_bolt/cli/exit_codes.py +17 -0
- apisec_code_bolt/cli/main.py +1069 -0
- apisec_code_bolt/cloud/__init__.py +1 -0
- apisec_code_bolt/cloud/apisec_client.py +118 -0
- apisec_code_bolt/cloud/client.py +255 -0
- apisec_code_bolt/core/__init__.py +75 -0
- apisec_code_bolt/core/config.py +528 -0
- apisec_code_bolt/core/credentials.py +65 -0
- apisec_code_bolt/core/discovery.py +433 -0
- apisec_code_bolt/core/log_format.py +115 -0
- apisec_code_bolt/core/manifest.py +1009 -0
- apisec_code_bolt/core/repo.py +280 -0
- apisec_code_bolt/core/state.py +59 -0
- apisec_code_bolt/core/telemetry.py +451 -0
- apisec_code_bolt/core/types.py +587 -0
- apisec_code_bolt/fingerprinting/__init__.py +1 -0
- apisec_code_bolt/frameworks/__init__.py +29 -0
- apisec_code_bolt/frameworks/_jwt_common.py +50 -0
- apisec_code_bolt/frameworks/auth_helpers.py +437 -0
- apisec_code_bolt/frameworks/base.py +608 -0
- apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
- apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
- apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
- apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
- apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
- apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
- apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
- apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
- apisec_code_bolt/frameworks/java/__init__.py +6 -0
- apisec_code_bolt/frameworks/java/_annotations.py +167 -0
- apisec_code_bolt/frameworks/java/_constraints.py +128 -0
- apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
- apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
- apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
- apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
- apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
- apisec_code_bolt/frameworks/js/__init__.py +8 -0
- apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
- apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
- apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
- apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
- apisec_code_bolt/frameworks/python/__init__.py +19 -0
- apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
- apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
- apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
- apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
- apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
- apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
- apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
- apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
- apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
- apisec_code_bolt/parsing/__init__.py +62 -0
- apisec_code_bolt/parsing/base.py +554 -0
- apisec_code_bolt/parsing/csharp/__init__.py +5 -0
- apisec_code_bolt/parsing/csharp/language_services.py +203 -0
- apisec_code_bolt/parsing/csharp/literals.py +72 -0
- apisec_code_bolt/parsing/csharp/parser.py +1158 -0
- apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
- apisec_code_bolt/parsing/js/__init__.py +5 -0
- apisec_code_bolt/parsing/js/language_services.py +118 -0
- apisec_code_bolt/parsing/js/parser.py +622 -0
- apisec_code_bolt/parsing/jvm/__init__.py +7 -0
- apisec_code_bolt/parsing/jvm/language_services.py +270 -0
- apisec_code_bolt/parsing/jvm/parser.py +774 -0
- apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
- apisec_code_bolt/parsing/python/__init__.py +150 -0
- apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
- apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
- apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
- apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
- apisec_code_bolt/parsing/python/expression_utils.py +221 -0
- apisec_code_bolt/parsing/python/extraction_types.py +271 -0
- apisec_code_bolt/parsing/python/language_services.py +487 -0
- apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
- apisec_code_bolt/parsing/python/parser.py +719 -0
- apisec_code_bolt/parsing/python/path_resolver.py +576 -0
- apisec_code_bolt/parsing/python/router_registry.py +806 -0
- apisec_code_bolt/parsing/python/type_resolver.py +730 -0
- apisec_code_bolt/parsing/python/visitors.py +1544 -0
- apisec_code_bolt/parsing/services.py +544 -0
- apisec_code_bolt/query/__init__.py +1 -0
- apisec_code_bolt/query/ast_cache.py +182 -0
- apisec_code_bolt/query/executor.py +283 -0
- apisec_code_bolt/query/handlers.py +832 -0
- apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
- apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
- apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
- apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Sink-evidence collection for data flows.
|
|
3
|
+
|
|
4
|
+
For each DataFlow, this module analyses the call site where tainted data
|
|
5
|
+
arrives (the *sink*) and produces a ``CallSiteEvidence`` record that captures
|
|
6
|
+
structural facts about the call:
|
|
7
|
+
|
|
8
|
+
* **Argument construction** – is the tainted argument a string interpolation,
|
|
9
|
+
a concatenation, a direct variable pass, etc.?
|
|
10
|
+
* **Co-arguments** – what other arguments (literal strings, keyword flags,
|
|
11
|
+
containers) are present alongside the tainted one?
|
|
12
|
+
* **String patterns** – do literal string arguments contain SQL placeholders,
|
|
13
|
+
URL schemes, template syntax, or shell metacharacters?
|
|
14
|
+
|
|
15
|
+
These are pure structural facts. The cloud interprets them to assess risk.
|
|
16
|
+
|
|
17
|
+
The same ``CallSiteEvidence`` model is reusable for *transformation* call
|
|
18
|
+
sites (Gap 1 retrofit) – see ``build_evidence_for_call_site``.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import re
|
|
24
|
+
from dataclasses import dataclass, field
|
|
25
|
+
from typing import TYPE_CHECKING
|
|
26
|
+
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
|
|
30
|
+
from ..parsing.base import ParsedArgument, ParsedCallSite, ParsedFile
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# ---------------------------------------------------------------------------
|
|
34
|
+
# Argument-construction classification
|
|
35
|
+
# ---------------------------------------------------------------------------
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ArgumentConstructionMethod:
|
|
39
|
+
"""Language-neutral vocabulary for how an argument value is constructed."""
|
|
40
|
+
|
|
41
|
+
DIRECT = "direct"
|
|
42
|
+
STRING_INTERPOLATION = "string_interpolation"
|
|
43
|
+
STRING_CONCATENATION = "string_concatenation"
|
|
44
|
+
FORMAT_CALL = "format_call"
|
|
45
|
+
CALL_WRAPPED = "call_wrapped"
|
|
46
|
+
COLLECTION = "collection"
|
|
47
|
+
SPREAD = "spread"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def classify_argument_construction(arg: ParsedArgument) -> str:
|
|
51
|
+
"""Determine how a single argument's value was constructed (language-neutral)."""
|
|
52
|
+
if arg.is_spread or arg.is_keyword_spread:
|
|
53
|
+
return ArgumentConstructionMethod.SPREAD
|
|
54
|
+
if arg.is_string_interpolation:
|
|
55
|
+
return ArgumentConstructionMethod.STRING_INTERPOLATION
|
|
56
|
+
if arg.is_concatenation:
|
|
57
|
+
return ArgumentConstructionMethod.STRING_CONCATENATION
|
|
58
|
+
if arg.is_format_call:
|
|
59
|
+
return ArgumentConstructionMethod.FORMAT_CALL
|
|
60
|
+
if arg.is_call_result:
|
|
61
|
+
return ArgumentConstructionMethod.CALL_WRAPPED
|
|
62
|
+
if arg.container_type:
|
|
63
|
+
return ArgumentConstructionMethod.COLLECTION
|
|
64
|
+
return ArgumentConstructionMethod.DIRECT
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# ---------------------------------------------------------------------------
|
|
68
|
+
# String-pattern detection (data-driven, language-agnostic)
|
|
69
|
+
# ---------------------------------------------------------------------------
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class StringPatternType:
|
|
73
|
+
SQL_PLACEHOLDER = "sql_placeholder"
|
|
74
|
+
SQL_KEYWORD = "sql_keyword"
|
|
75
|
+
URL_SCHEME = "url_scheme"
|
|
76
|
+
TEMPLATE_SYNTAX = "template_syntax"
|
|
77
|
+
SHELL_METACHAR = "shell_metachar"
|
|
78
|
+
HTML_TAG = "html_tag"
|
|
79
|
+
PATH_TRAVERSAL = "path_traversal"
|
|
80
|
+
REGEX_ANCHOR = "regex_anchor"
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@dataclass
|
|
84
|
+
class DetectedStringPattern:
|
|
85
|
+
pattern_type: str
|
|
86
|
+
matched: str
|
|
87
|
+
argument_position: int | None = None
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
_PATTERN_RULES: list[tuple[str, re.Pattern[str]]] = [
|
|
91
|
+
(
|
|
92
|
+
StringPatternType.SQL_PLACEHOLDER,
|
|
93
|
+
re.compile(
|
|
94
|
+
r"(?:%s|%\(\w+\)s|\?\??|:\w+|\$\d+|\$\{\w+\})",
|
|
95
|
+
re.IGNORECASE,
|
|
96
|
+
),
|
|
97
|
+
),
|
|
98
|
+
(
|
|
99
|
+
StringPatternType.SQL_KEYWORD,
|
|
100
|
+
re.compile(
|
|
101
|
+
r"\b(?:SELECT|INSERT|UPDATE|DELETE|DROP|ALTER|CREATE|WHERE|FROM|JOIN|UNION|INTO|VALUES|SET|ORDER\s+BY|GROUP\s+BY|HAVING|LIMIT|OFFSET|EXEC(?:UTE)?)\b",
|
|
102
|
+
re.IGNORECASE,
|
|
103
|
+
),
|
|
104
|
+
),
|
|
105
|
+
(
|
|
106
|
+
StringPatternType.URL_SCHEME,
|
|
107
|
+
re.compile(
|
|
108
|
+
r"https?://|ftp://|file://|s3://",
|
|
109
|
+
),
|
|
110
|
+
),
|
|
111
|
+
(
|
|
112
|
+
StringPatternType.TEMPLATE_SYNTAX,
|
|
113
|
+
re.compile(
|
|
114
|
+
r"\{\{.*?\}\}|\{%.*?%\}|\$\{[^}]+\}|<%=.*?%>",
|
|
115
|
+
),
|
|
116
|
+
),
|
|
117
|
+
(
|
|
118
|
+
StringPatternType.SHELL_METACHAR,
|
|
119
|
+
re.compile(
|
|
120
|
+
r"(?:\|\||&&|[;|`]|\$\(|\$\{)",
|
|
121
|
+
),
|
|
122
|
+
),
|
|
123
|
+
(
|
|
124
|
+
StringPatternType.HTML_TAG,
|
|
125
|
+
re.compile(
|
|
126
|
+
r"<(?:script|iframe|img|object|embed|form|input|style|link|meta|svg|base)\b",
|
|
127
|
+
re.IGNORECASE,
|
|
128
|
+
),
|
|
129
|
+
),
|
|
130
|
+
(
|
|
131
|
+
StringPatternType.PATH_TRAVERSAL,
|
|
132
|
+
re.compile(
|
|
133
|
+
r"\.\./|~/",
|
|
134
|
+
),
|
|
135
|
+
),
|
|
136
|
+
(
|
|
137
|
+
StringPatternType.REGEX_ANCHOR,
|
|
138
|
+
re.compile(
|
|
139
|
+
r"(?:^\^|\$$|\\[bdwsDWBZ])",
|
|
140
|
+
),
|
|
141
|
+
),
|
|
142
|
+
]
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def detect_string_patterns(
|
|
146
|
+
value: str,
|
|
147
|
+
argument_position: int | None = None,
|
|
148
|
+
) -> list[DetectedStringPattern]:
|
|
149
|
+
"""Scan a literal string for well-known patterns."""
|
|
150
|
+
results: list[DetectedStringPattern] = []
|
|
151
|
+
seen: set[tuple[str, str]] = set()
|
|
152
|
+
for ptype, regex in _PATTERN_RULES:
|
|
153
|
+
for m in regex.finditer(value):
|
|
154
|
+
matched = m.group()
|
|
155
|
+
key = (ptype, matched)
|
|
156
|
+
if key not in seen:
|
|
157
|
+
seen.add(key)
|
|
158
|
+
results.append(
|
|
159
|
+
DetectedStringPattern(
|
|
160
|
+
pattern_type=ptype,
|
|
161
|
+
matched=matched,
|
|
162
|
+
argument_position=argument_position,
|
|
163
|
+
)
|
|
164
|
+
)
|
|
165
|
+
return results
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
# ---------------------------------------------------------------------------
|
|
169
|
+
# CallSiteEvidence builder (reusable for sinks AND transformations)
|
|
170
|
+
# ---------------------------------------------------------------------------
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
@dataclass
|
|
174
|
+
class ArgumentEvidence:
|
|
175
|
+
"""Language-agnostic evidence about one argument at a call site."""
|
|
176
|
+
|
|
177
|
+
position: int | None
|
|
178
|
+
name: str | None
|
|
179
|
+
is_literal: bool
|
|
180
|
+
literal_value: str | None
|
|
181
|
+
literal_type: str | None
|
|
182
|
+
is_variable: bool
|
|
183
|
+
variable_name: str | None
|
|
184
|
+
is_call_result: bool
|
|
185
|
+
called_function: str | None
|
|
186
|
+
construction: str = "direct"
|
|
187
|
+
container_type: str | None = None
|
|
188
|
+
source_variables: list[str] = field(default_factory=list)
|
|
189
|
+
expression_text: str | None = None
|
|
190
|
+
is_tainted: bool = False
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
@dataclass
|
|
194
|
+
class CallSiteEvidence:
|
|
195
|
+
"""Complete structural evidence about a call site."""
|
|
196
|
+
|
|
197
|
+
tainted_argument_position: int | None = None
|
|
198
|
+
tainted_argument_name: str | None = None
|
|
199
|
+
tainted_argument_construction: str | None = None
|
|
200
|
+
all_arguments: list[ArgumentEvidence] = field(default_factory=list)
|
|
201
|
+
string_patterns: list[DetectedStringPattern] = field(default_factory=list)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _parsed_arg_to_evidence(
|
|
205
|
+
arg: ParsedArgument,
|
|
206
|
+
is_tainted: bool = False,
|
|
207
|
+
) -> ArgumentEvidence:
|
|
208
|
+
construction = classify_argument_construction(arg)
|
|
209
|
+
lit_val: str | None = None
|
|
210
|
+
if arg.is_literal and arg.literal_value is not None:
|
|
211
|
+
s = str(arg.literal_value)
|
|
212
|
+
lit_val = s[:500] if len(s) > 500 else s
|
|
213
|
+
|
|
214
|
+
expr_text: str | None = None
|
|
215
|
+
if arg.is_expression and arg.expression_text:
|
|
216
|
+
s = arg.expression_text
|
|
217
|
+
expr_text = s[:500] if len(s) > 500 else s
|
|
218
|
+
|
|
219
|
+
return ArgumentEvidence(
|
|
220
|
+
position=arg.position,
|
|
221
|
+
name=arg.name,
|
|
222
|
+
is_literal=arg.is_literal,
|
|
223
|
+
literal_value=lit_val,
|
|
224
|
+
literal_type=arg.literal_type,
|
|
225
|
+
is_variable=arg.is_variable,
|
|
226
|
+
variable_name=arg.variable_name,
|
|
227
|
+
is_call_result=arg.is_call_result,
|
|
228
|
+
called_function=arg.called_function,
|
|
229
|
+
construction=construction,
|
|
230
|
+
container_type=arg.container_type,
|
|
231
|
+
source_variables=list(arg.source_variables),
|
|
232
|
+
expression_text=expr_text,
|
|
233
|
+
is_tainted=is_tainted,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def build_evidence_for_call_site(
|
|
238
|
+
call_site: ParsedCallSite,
|
|
239
|
+
tainted_arg_index: int | None = None,
|
|
240
|
+
tainted_arg_name: str | None = None,
|
|
241
|
+
) -> CallSiteEvidence:
|
|
242
|
+
"""
|
|
243
|
+
Build ``CallSiteEvidence`` for any call site.
|
|
244
|
+
|
|
245
|
+
Parameters
|
|
246
|
+
----------
|
|
247
|
+
call_site:
|
|
248
|
+
The parsed call site (from the parser layer).
|
|
249
|
+
tainted_arg_index:
|
|
250
|
+
Position of the argument carrying tainted data (if applicable).
|
|
251
|
+
tainted_arg_name:
|
|
252
|
+
Keyword name of the argument carrying tainted data (if applicable).
|
|
253
|
+
"""
|
|
254
|
+
args_evidence: list[ArgumentEvidence] = []
|
|
255
|
+
patterns: list[DetectedStringPattern] = []
|
|
256
|
+
tainted_construction: str | None = None
|
|
257
|
+
|
|
258
|
+
for arg in call_site.arguments:
|
|
259
|
+
is_this_tainted = False
|
|
260
|
+
if (
|
|
261
|
+
tainted_arg_index is not None
|
|
262
|
+
and arg.position == tainted_arg_index
|
|
263
|
+
or tainted_arg_name is not None
|
|
264
|
+
and arg.name == tainted_arg_name
|
|
265
|
+
):
|
|
266
|
+
is_this_tainted = True
|
|
267
|
+
|
|
268
|
+
ev = _parsed_arg_to_evidence(arg, is_tainted=is_this_tainted)
|
|
269
|
+
args_evidence.append(ev)
|
|
270
|
+
|
|
271
|
+
if is_this_tainted:
|
|
272
|
+
tainted_construction = ev.construction
|
|
273
|
+
|
|
274
|
+
if arg.is_literal and arg.literal_value is not None:
|
|
275
|
+
val_str = str(arg.literal_value)
|
|
276
|
+
arg_patterns = detect_string_patterns(val_str, argument_position=arg.position)
|
|
277
|
+
patterns.extend(arg_patterns)
|
|
278
|
+
|
|
279
|
+
return CallSiteEvidence(
|
|
280
|
+
tainted_argument_position=tainted_arg_index,
|
|
281
|
+
tainted_argument_name=tainted_arg_name,
|
|
282
|
+
tainted_argument_construction=tainted_construction,
|
|
283
|
+
all_arguments=args_evidence,
|
|
284
|
+
string_patterns=patterns,
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
# ---------------------------------------------------------------------------
|
|
289
|
+
# SinkEvidenceCollector – post-processing step over DataFlows
|
|
290
|
+
# ---------------------------------------------------------------------------
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
class SinkEvidenceCollector:
|
|
294
|
+
"""
|
|
295
|
+
Post-processes a list of ``DataFlow`` objects, enriching each with
|
|
296
|
+
``CallSiteEvidence`` for the sink call site.
|
|
297
|
+
|
|
298
|
+
Requires parsed files so we can look up the ``ParsedCallSite`` at the
|
|
299
|
+
sink location.
|
|
300
|
+
"""
|
|
301
|
+
|
|
302
|
+
def __init__(self, parsed_files: list[ParsedFile]) -> None:
|
|
303
|
+
self._call_sites_by_loc: dict[tuple[str, int], ParsedCallSite] = {}
|
|
304
|
+
for pf in parsed_files:
|
|
305
|
+
if not pf.success:
|
|
306
|
+
continue
|
|
307
|
+
for cs in pf.call_sites:
|
|
308
|
+
key = (str(cs.location.file), cs.location.line)
|
|
309
|
+
self._call_sites_by_loc[key] = cs
|
|
310
|
+
|
|
311
|
+
def get_call_site(
|
|
312
|
+
self,
|
|
313
|
+
file_path: Path | str,
|
|
314
|
+
line: int,
|
|
315
|
+
) -> ParsedCallSite | None:
|
|
316
|
+
return self._call_sites_by_loc.get((str(file_path), line))
|
|
317
|
+
|
|
318
|
+
def collect_for_flow(
|
|
319
|
+
self,
|
|
320
|
+
sink_file: Path | str,
|
|
321
|
+
sink_line: int,
|
|
322
|
+
sink_arg_index: int | None,
|
|
323
|
+
sink_arg_name: str | None,
|
|
324
|
+
) -> CallSiteEvidence | None:
|
|
325
|
+
"""Build evidence for a single data flow's sink."""
|
|
326
|
+
cs = self.get_call_site(sink_file, sink_line)
|
|
327
|
+
if cs is None:
|
|
328
|
+
return None
|
|
329
|
+
return build_evidence_for_call_site(
|
|
330
|
+
cs,
|
|
331
|
+
tainted_arg_index=sink_arg_index,
|
|
332
|
+
tainted_arg_name=sink_arg_name,
|
|
333
|
+
)
|
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cross-file URL prefix resolver for Django and Express/NestJS.
|
|
3
|
+
|
|
4
|
+
Builds per-file prefix maps so that sub-urlconf and sub-router files
|
|
5
|
+
can be annotated with their parent-mounted prefixes.
|
|
6
|
+
|
|
7
|
+
Django: Scans call_sites for path("segment/", include("module.urls")) patterns,
|
|
8
|
+
resolves module dotted path → file path, propagates prefix chains.
|
|
9
|
+
|
|
10
|
+
Express: Tracks require/import variable assignments, then finds
|
|
11
|
+
router.use("/prefix", variable) mounts and propagates chains.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import logging
|
|
17
|
+
import re
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import TYPE_CHECKING
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from ..parsing.base import ParsedFile
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def build_django_prefix_map(
|
|
28
|
+
parsed_files: list[ParsedFile],
|
|
29
|
+
project_root: Path | None = None,
|
|
30
|
+
) -> dict[str, list[str]]:
|
|
31
|
+
"""
|
|
32
|
+
Build a map of {file_path_str: [prefix, ...]} for Django URL files.
|
|
33
|
+
|
|
34
|
+
Scans for path("prefix/", include("app.urls")) patterns and propagates
|
|
35
|
+
prefix chains across files using a fixpoint algorithm (same approach as
|
|
36
|
+
the Express resolver) so that multi-level include chains are composed
|
|
37
|
+
correctly:
|
|
38
|
+
config/urls → api/ → api/urls → auth/ → authentication/urls
|
|
39
|
+
→ authentication/urls gets ["api/auth/"]
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Dict mapping absolute file path strings to list of URL prefixes.
|
|
43
|
+
"""
|
|
44
|
+
# Build dotted-module → file path index
|
|
45
|
+
module_to_file: dict[str, str] = {}
|
|
46
|
+
if project_root:
|
|
47
|
+
for pf in parsed_files:
|
|
48
|
+
try:
|
|
49
|
+
rel = pf.path.relative_to(project_root)
|
|
50
|
+
parts = rel.with_suffix("").parts
|
|
51
|
+
dotted = ".".join(parts)
|
|
52
|
+
module_to_file[dotted] = str(pf.path.resolve())
|
|
53
|
+
if len(parts) > 1:
|
|
54
|
+
shorter = ".".join(parts[1:])
|
|
55
|
+
module_to_file.setdefault(shorter, str(pf.path.resolve()))
|
|
56
|
+
except ValueError:
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
file_by_stem: dict[str, list[str]] = {}
|
|
60
|
+
for pf in parsed_files:
|
|
61
|
+
stem = pf.path.stem
|
|
62
|
+
file_by_stem.setdefault(stem, []).append(str(pf.path.resolve()))
|
|
63
|
+
|
|
64
|
+
def _resolve_module(module_str: str) -> str | None:
|
|
65
|
+
if module_str in module_to_file:
|
|
66
|
+
return module_to_file[module_str]
|
|
67
|
+
for dotted, fp in module_to_file.items():
|
|
68
|
+
if dotted.endswith("." + module_str) or dotted == module_str:
|
|
69
|
+
return fp
|
|
70
|
+
last = module_str.rsplit(".", 1)[-1]
|
|
71
|
+
candidates = file_by_stem.get(last, [])
|
|
72
|
+
if len(candidates) == 1:
|
|
73
|
+
return candidates[0]
|
|
74
|
+
return None
|
|
75
|
+
|
|
76
|
+
# Build directed mount graph: (from_file_resolved, to_file_resolved, segment, call_line)
|
|
77
|
+
# call_line is used to detect variable-wrapper nesting (see step below).
|
|
78
|
+
mounts: list[tuple[str, str, str, int]] = []
|
|
79
|
+
for pf in parsed_files:
|
|
80
|
+
from_file = str(pf.path.resolve())
|
|
81
|
+
for call in pf.call_sites:
|
|
82
|
+
if call.callee_name not in ("path", "re_path", "url"):
|
|
83
|
+
continue
|
|
84
|
+
if not call.arguments or len(call.arguments) < 2:
|
|
85
|
+
continue
|
|
86
|
+
|
|
87
|
+
path_arg = call.arguments[0]
|
|
88
|
+
if not path_arg.is_literal or not isinstance(path_arg.literal_value, str):
|
|
89
|
+
continue
|
|
90
|
+
segment = path_arg.literal_value
|
|
91
|
+
|
|
92
|
+
view_arg = call.arguments[1]
|
|
93
|
+
view_text = ""
|
|
94
|
+
if view_arg.is_expression:
|
|
95
|
+
view_text = view_arg.expression_text or ""
|
|
96
|
+
elif view_arg.is_variable:
|
|
97
|
+
view_text = view_arg.variable_name or ""
|
|
98
|
+
elif view_arg.is_literal:
|
|
99
|
+
view_text = str(view_arg.literal_value)
|
|
100
|
+
|
|
101
|
+
call_line = call.location.line if call.location else 0
|
|
102
|
+
|
|
103
|
+
# Match both include("module") and include(("module", "namespace"))
|
|
104
|
+
include_match = re.search(r"include\s*\(\s*\(?\s*['\"]([^'\"]+)['\"]", view_text)
|
|
105
|
+
if include_match:
|
|
106
|
+
target_module = include_match.group(1)
|
|
107
|
+
target_file = _resolve_module(target_module)
|
|
108
|
+
if target_file:
|
|
109
|
+
mounts.append((from_file, target_file, segment, call_line))
|
|
110
|
+
|
|
111
|
+
# Handle include(variable_name) wrappers.
|
|
112
|
+
#
|
|
113
|
+
# Pattern:
|
|
114
|
+
# helper = [path("sub/", include("app.sub")), ...]
|
|
115
|
+
# urlpatterns = [path("prefix/", include(helper))]
|
|
116
|
+
#
|
|
117
|
+
# The resolver only detects module-string includes; include(variable) produces
|
|
118
|
+
# no mount. But the cross-file mounts inside 'helper' need the extra "prefix/"
|
|
119
|
+
# segment composed in. We detect this by:
|
|
120
|
+
# 1. Finding path("segment/", include(var)) where var is a module-level variable.
|
|
121
|
+
# 2. Treating all mounts from the same file that appear at earlier lines
|
|
122
|
+
# (the helper list is typically defined before urlpatterns) as nested under
|
|
123
|
+
# that segment, replacing their bare segment with the composed one.
|
|
124
|
+
for pf in parsed_files:
|
|
125
|
+
from_file = str(pf.path.resolve())
|
|
126
|
+
for call in pf.call_sites:
|
|
127
|
+
if call.callee_name not in ("path", "re_path", "url"):
|
|
128
|
+
continue
|
|
129
|
+
if not call.arguments or len(call.arguments) < 2:
|
|
130
|
+
continue
|
|
131
|
+
path_arg = call.arguments[0]
|
|
132
|
+
if not path_arg.is_literal or not isinstance(path_arg.literal_value, str):
|
|
133
|
+
continue
|
|
134
|
+
wrapper_segment = path_arg.literal_value
|
|
135
|
+
view_arg = call.arguments[1]
|
|
136
|
+
view_text = (
|
|
137
|
+
view_arg.expression_text
|
|
138
|
+
if view_arg.is_expression
|
|
139
|
+
else (view_arg.variable_name if view_arg.is_variable else "")
|
|
140
|
+
) or ""
|
|
141
|
+
# Detect include(variable_name) — bare identifier, not a module string
|
|
142
|
+
var_match = re.match(r"include\s*\(\s*([A-Za-z_][A-Za-z0-9_]*)\s*\)$", view_text)
|
|
143
|
+
if not var_match:
|
|
144
|
+
continue
|
|
145
|
+
var_name = var_match.group(1)
|
|
146
|
+
if var_name not in pf.module_variables:
|
|
147
|
+
continue
|
|
148
|
+
wrapper_line = call.location.line if call.location else 0
|
|
149
|
+
# Replace mounts from this file whose call line precedes the wrapper.
|
|
150
|
+
# Those mounts are inside the helper variable, not directly in urlpatterns.
|
|
151
|
+
updated: list[tuple[str, str, str, int]] = []
|
|
152
|
+
for m_from, m_to, m_seg, m_line in mounts:
|
|
153
|
+
if m_from == from_file and m_line < wrapper_line:
|
|
154
|
+
composed_seg = _compose_url(wrapper_segment, m_seg)
|
|
155
|
+
updated.append((m_from, m_to, composed_seg, m_line))
|
|
156
|
+
else:
|
|
157
|
+
updated.append((m_from, m_to, m_seg, m_line))
|
|
158
|
+
mounts = updated
|
|
159
|
+
|
|
160
|
+
# Fixpoint: compose absolute prefixes through mount chains.
|
|
161
|
+
# Files absent from prefix_map are roots whose effective base prefix is "".
|
|
162
|
+
prefix_map: dict[str, list[str]] = {}
|
|
163
|
+
changed = True
|
|
164
|
+
max_iters = 20
|
|
165
|
+
iters = 0
|
|
166
|
+
while changed and iters < max_iters:
|
|
167
|
+
changed = False
|
|
168
|
+
iters += 1
|
|
169
|
+
for from_file, to_file, segment, _line in mounts:
|
|
170
|
+
from_prefixes = prefix_map.get(from_file, [""])
|
|
171
|
+
for from_prefix in from_prefixes:
|
|
172
|
+
composed = _compose_url(from_prefix, segment)
|
|
173
|
+
if to_file not in prefix_map:
|
|
174
|
+
prefix_map[to_file] = []
|
|
175
|
+
if composed not in prefix_map[to_file]:
|
|
176
|
+
prefix_map[to_file].append(composed)
|
|
177
|
+
changed = True
|
|
178
|
+
|
|
179
|
+
return prefix_map
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
_FASTIFY_PREFIX_RE = re.compile(r"\bprefix\s*:\s*['\"]([^'\"]+)['\"]")
|
|
183
|
+
_REQUIRE_PATH_RE = re.compile(r"require\s*\(\s*['\"](\.[^'\"]+)['\"]")
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def build_express_prefix_map(
|
|
187
|
+
parsed_files: list[ParsedFile],
|
|
188
|
+
project_root: Path | None = None,
|
|
189
|
+
) -> dict[str, list[str]]:
|
|
190
|
+
"""
|
|
191
|
+
Build a map of {file_path_str: [prefix, ...]} for Express and Fastify router files.
|
|
192
|
+
|
|
193
|
+
Express: scans for router.use("/prefix", routerVar) patterns.
|
|
194
|
+
Fastify: scans for fastify.register(routerVar, { prefix: "/prefix" }) patterns.
|
|
195
|
+
|
|
196
|
+
Both patterns resolve the target module via the variable→file map built from
|
|
197
|
+
require() / ES imports. Handles multi-level mount chains via fixpoint propagation.
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
Dict mapping absolute file path strings to list of URL prefixes.
|
|
201
|
+
"""
|
|
202
|
+
# Build variable → file path map from require() / ES module imports
|
|
203
|
+
var_to_file: dict[str, list[str]] = {}
|
|
204
|
+
|
|
205
|
+
for pf in parsed_files:
|
|
206
|
+
for imp in pf.imports:
|
|
207
|
+
# Only local imports (start with . or /)
|
|
208
|
+
if not (imp.module.startswith(".") or imp.module.startswith("/")):
|
|
209
|
+
continue
|
|
210
|
+
|
|
211
|
+
# Resolve relative path
|
|
212
|
+
try:
|
|
213
|
+
if imp.module.startswith("."):
|
|
214
|
+
target = (pf.path.parent / imp.module).resolve()
|
|
215
|
+
else:
|
|
216
|
+
target = Path(imp.module)
|
|
217
|
+
|
|
218
|
+
# Try common extensions
|
|
219
|
+
for ext in (".ts", ".js", ".mjs", ""):
|
|
220
|
+
candidate = target.with_suffix(ext) if ext else target
|
|
221
|
+
if candidate.exists():
|
|
222
|
+
target = candidate
|
|
223
|
+
break
|
|
224
|
+
# Try index file
|
|
225
|
+
idx = target / ("index" + ext) if ext else target / "index.js"
|
|
226
|
+
if idx.exists():
|
|
227
|
+
target = idx
|
|
228
|
+
break
|
|
229
|
+
|
|
230
|
+
target_str = str(target)
|
|
231
|
+
except Exception:
|
|
232
|
+
target_str = None
|
|
233
|
+
|
|
234
|
+
if not target_str:
|
|
235
|
+
continue
|
|
236
|
+
|
|
237
|
+
for name in imp.names:
|
|
238
|
+
var_to_file.setdefault(name, []).append(target_str)
|
|
239
|
+
|
|
240
|
+
# Build directed mount graph: (from_file, to_file, segment)
|
|
241
|
+
# All paths are resolved so that symlinks (e.g. /var → /private/var on macOS)
|
|
242
|
+
# don't cause mismatches between the map keys and plugin lookups.
|
|
243
|
+
mounts: list[tuple[str, str, str]] = []
|
|
244
|
+
for pf in parsed_files:
|
|
245
|
+
for call in pf.call_sites:
|
|
246
|
+
if not call.is_method_call:
|
|
247
|
+
continue
|
|
248
|
+
callee = call.callee_name.lower()
|
|
249
|
+
|
|
250
|
+
# ── Express: app.use('/prefix', router) ──────────────────────────
|
|
251
|
+
if callee == "use":
|
|
252
|
+
if not call.arguments or len(call.arguments) < 2:
|
|
253
|
+
continue
|
|
254
|
+
prefix_arg = call.arguments[0]
|
|
255
|
+
if not prefix_arg.is_literal or not isinstance(prefix_arg.literal_value, str):
|
|
256
|
+
continue
|
|
257
|
+
segment = prefix_arg.literal_value
|
|
258
|
+
router_arg = call.arguments[1]
|
|
259
|
+
if not router_arg.is_variable or not router_arg.variable_name:
|
|
260
|
+
continue
|
|
261
|
+
var_name = router_arg.variable_name
|
|
262
|
+
for target_file in var_to_file.get(var_name, []):
|
|
263
|
+
mounts.append((str(pf.path.resolve()), target_file, segment))
|
|
264
|
+
|
|
265
|
+
# ── Fastify: fastify.register(routerVar, { prefix: '/prefix' }) ──
|
|
266
|
+
elif callee == "register":
|
|
267
|
+
if not call.arguments or len(call.arguments) < 2:
|
|
268
|
+
continue
|
|
269
|
+
|
|
270
|
+
# Extract prefix from the options object (second arg)
|
|
271
|
+
opts_arg = call.arguments[1]
|
|
272
|
+
opts_text = ""
|
|
273
|
+
if opts_arg.is_expression:
|
|
274
|
+
opts_text = opts_arg.expression_text or ""
|
|
275
|
+
elif opts_arg.is_literal and isinstance(opts_arg.literal_value, str):
|
|
276
|
+
opts_text = opts_arg.literal_value
|
|
277
|
+
|
|
278
|
+
prefix_m = _FASTIFY_PREFIX_RE.search(opts_text)
|
|
279
|
+
if not prefix_m:
|
|
280
|
+
continue
|
|
281
|
+
segment = prefix_m.group(1)
|
|
282
|
+
|
|
283
|
+
# Resolve the plugin argument (first arg) to a file
|
|
284
|
+
plugin_arg = call.arguments[0]
|
|
285
|
+
target_files: list[str] = []
|
|
286
|
+
if plugin_arg.is_variable and plugin_arg.variable_name:
|
|
287
|
+
target_files = var_to_file.get(plugin_arg.variable_name, [])
|
|
288
|
+
elif plugin_arg.is_expression and plugin_arg.expression_text:
|
|
289
|
+
# require('./routes/users') inline
|
|
290
|
+
req_m = _REQUIRE_PATH_RE.search(plugin_arg.expression_text)
|
|
291
|
+
if req_m:
|
|
292
|
+
rel_mod = req_m.group(1)
|
|
293
|
+
try:
|
|
294
|
+
target = (pf.path.parent / rel_mod).resolve()
|
|
295
|
+
for ext in (".ts", ".js", ".mjs", ""):
|
|
296
|
+
candidate = target.with_suffix(ext) if ext else target
|
|
297
|
+
if candidate.exists():
|
|
298
|
+
target_files = [str(candidate)]
|
|
299
|
+
break
|
|
300
|
+
idx = target / ("index" + ext) if ext else target / "index.js"
|
|
301
|
+
if idx.exists():
|
|
302
|
+
target_files = [str(idx)]
|
|
303
|
+
break
|
|
304
|
+
except Exception:
|
|
305
|
+
pass
|
|
306
|
+
|
|
307
|
+
for target_file in target_files:
|
|
308
|
+
mounts.append((str(pf.path.resolve()), target_file, segment))
|
|
309
|
+
|
|
310
|
+
# Fixpoint: compose absolute prefixes through mount chains.
|
|
311
|
+
# Files absent from prefix_map are roots whose effective base prefix is "".
|
|
312
|
+
# Example chain: app.ts("/"→routes) + routes("/blog"→blog) + blog("/writer"→writer)
|
|
313
|
+
# → writer.ts gets ["/blog/writer"]
|
|
314
|
+
prefix_map: dict[str, list[str]] = {}
|
|
315
|
+
changed = True
|
|
316
|
+
max_iters = 20
|
|
317
|
+
iters = 0
|
|
318
|
+
while changed and iters < max_iters:
|
|
319
|
+
changed = False
|
|
320
|
+
iters += 1
|
|
321
|
+
for from_file, to_file, segment in mounts:
|
|
322
|
+
from_prefixes = prefix_map.get(from_file, [""])
|
|
323
|
+
for from_prefix in from_prefixes:
|
|
324
|
+
composed = _compose_url(from_prefix, segment)
|
|
325
|
+
if to_file not in prefix_map:
|
|
326
|
+
prefix_map[to_file] = []
|
|
327
|
+
if composed not in prefix_map[to_file]:
|
|
328
|
+
prefix_map[to_file].append(composed)
|
|
329
|
+
changed = True
|
|
330
|
+
|
|
331
|
+
return prefix_map
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def _compose_url(base: str, segment: str) -> str:
|
|
335
|
+
"""Compose two URL segments into a normalized absolute path."""
|
|
336
|
+
joined = base.rstrip("/") + "/" + segment.lstrip("/")
|
|
337
|
+
joined = re.sub(r"/+", "/", joined)
|
|
338
|
+
return joined or "/"
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Structured exit codes for the surface CLI.
|
|
3
|
+
|
|
4
|
+
Using named constants instead of bare integers makes the exit behaviour
|
|
5
|
+
explicit and lets callers distinguish failure modes.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ExitCode:
|
|
10
|
+
"""Namespace for CLI exit codes."""
|
|
11
|
+
|
|
12
|
+
SUCCESS = 0
|
|
13
|
+
MANIFEST_ERROR = 1 # Analysis completed but manifest invalid or empty
|
|
14
|
+
UPLOAD_FAILURE = 2 # Analysis OK, cloud upload failed
|
|
15
|
+
POLICY_VIOLATION = 3 # Analysis found policy violations (future)
|
|
16
|
+
CONFIG_ERROR = 4 # Configuration file invalid or missing required fields
|
|
17
|
+
AUTH_FAILURE = 5 # API key / cloud auth rejected
|