apisec-code-bolt 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apisec_code_bolt/__init__.py +42 -0
- apisec_code_bolt/__main__.py +11 -0
- apisec_code_bolt/analysis/__init__.py +96 -0
- apisec_code_bolt/analysis/analyzer.py +2309 -0
- apisec_code_bolt/analysis/binding_tracker.py +341 -0
- apisec_code_bolt/analysis/call_graph.py +1197 -0
- apisec_code_bolt/analysis/call_graph_types.py +332 -0
- apisec_code_bolt/analysis/call_resolver.py +988 -0
- apisec_code_bolt/analysis/capability_tagger.py +322 -0
- apisec_code_bolt/analysis/config_scanner.py +197 -0
- apisec_code_bolt/analysis/data_flow.py +1883 -0
- apisec_code_bolt/analysis/dependency_extractor.py +959 -0
- apisec_code_bolt/analysis/flow_analysis.py +1406 -0
- apisec_code_bolt/analysis/hof_catalog.py +61 -0
- apisec_code_bolt/analysis/integration_detector.py +1399 -0
- apisec_code_bolt/analysis/literal_scanner.py +300 -0
- apisec_code_bolt/analysis/path_normalizer.py +55 -0
- apisec_code_bolt/analysis/read_site_detector.py +310 -0
- apisec_code_bolt/analysis/request_patterns.py +162 -0
- apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
- apisec_code_bolt/analysis/sink_evidence.py +333 -0
- apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
- apisec_code_bolt/cli/__init__.py +5 -0
- apisec_code_bolt/cli/exit_codes.py +17 -0
- apisec_code_bolt/cli/main.py +1069 -0
- apisec_code_bolt/cloud/__init__.py +1 -0
- apisec_code_bolt/cloud/apisec_client.py +118 -0
- apisec_code_bolt/cloud/client.py +255 -0
- apisec_code_bolt/core/__init__.py +75 -0
- apisec_code_bolt/core/config.py +528 -0
- apisec_code_bolt/core/credentials.py +65 -0
- apisec_code_bolt/core/discovery.py +433 -0
- apisec_code_bolt/core/log_format.py +115 -0
- apisec_code_bolt/core/manifest.py +1009 -0
- apisec_code_bolt/core/repo.py +280 -0
- apisec_code_bolt/core/state.py +59 -0
- apisec_code_bolt/core/telemetry.py +451 -0
- apisec_code_bolt/core/types.py +587 -0
- apisec_code_bolt/fingerprinting/__init__.py +1 -0
- apisec_code_bolt/frameworks/__init__.py +29 -0
- apisec_code_bolt/frameworks/_jwt_common.py +50 -0
- apisec_code_bolt/frameworks/auth_helpers.py +437 -0
- apisec_code_bolt/frameworks/base.py +608 -0
- apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
- apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
- apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
- apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
- apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
- apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
- apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
- apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
- apisec_code_bolt/frameworks/java/__init__.py +6 -0
- apisec_code_bolt/frameworks/java/_annotations.py +167 -0
- apisec_code_bolt/frameworks/java/_constraints.py +128 -0
- apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
- apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
- apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
- apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
- apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
- apisec_code_bolt/frameworks/js/__init__.py +8 -0
- apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
- apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
- apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
- apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
- apisec_code_bolt/frameworks/python/__init__.py +19 -0
- apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
- apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
- apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
- apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
- apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
- apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
- apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
- apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
- apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
- apisec_code_bolt/parsing/__init__.py +62 -0
- apisec_code_bolt/parsing/base.py +554 -0
- apisec_code_bolt/parsing/csharp/__init__.py +5 -0
- apisec_code_bolt/parsing/csharp/language_services.py +203 -0
- apisec_code_bolt/parsing/csharp/literals.py +72 -0
- apisec_code_bolt/parsing/csharp/parser.py +1158 -0
- apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
- apisec_code_bolt/parsing/js/__init__.py +5 -0
- apisec_code_bolt/parsing/js/language_services.py +118 -0
- apisec_code_bolt/parsing/js/parser.py +622 -0
- apisec_code_bolt/parsing/jvm/__init__.py +7 -0
- apisec_code_bolt/parsing/jvm/language_services.py +270 -0
- apisec_code_bolt/parsing/jvm/parser.py +774 -0
- apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
- apisec_code_bolt/parsing/python/__init__.py +150 -0
- apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
- apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
- apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
- apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
- apisec_code_bolt/parsing/python/expression_utils.py +221 -0
- apisec_code_bolt/parsing/python/extraction_types.py +271 -0
- apisec_code_bolt/parsing/python/language_services.py +487 -0
- apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
- apisec_code_bolt/parsing/python/parser.py +719 -0
- apisec_code_bolt/parsing/python/path_resolver.py +576 -0
- apisec_code_bolt/parsing/python/router_registry.py +806 -0
- apisec_code_bolt/parsing/python/type_resolver.py +730 -0
- apisec_code_bolt/parsing/python/visitors.py +1544 -0
- apisec_code_bolt/parsing/services.py +544 -0
- apisec_code_bolt/query/__init__.py +1 -0
- apisec_code_bolt/query/ast_cache.py +182 -0
- apisec_code_bolt/query/executor.py +283 -0
- apisec_code_bolt/query/handlers.py +832 -0
- apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
- apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
- apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
- apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,2309 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Main analysis orchestrator for generating manifests.
|
|
3
|
+
|
|
4
|
+
This module coordinates:
|
|
5
|
+
- File discovery
|
|
6
|
+
- Parsing (Python, Java)
|
|
7
|
+
- Framework detection and extraction
|
|
8
|
+
- Data flow analysis
|
|
9
|
+
- Manifest generation
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import hashlib
|
|
15
|
+
import logging
|
|
16
|
+
import time
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
from pydantic import ValidationError
|
|
23
|
+
|
|
24
|
+
from ..core.config import CodeBoltConfig
|
|
25
|
+
from ..core.discovery import DiscoveryResult, discover_files
|
|
26
|
+
from ..core.manifest import (
|
|
27
|
+
MANIFEST_VERSION,
|
|
28
|
+
AnalysisMetadataModel,
|
|
29
|
+
AuthDependencyModel,
|
|
30
|
+
AuthModel,
|
|
31
|
+
AuthSchemeModel,
|
|
32
|
+
BodyFieldModel,
|
|
33
|
+
CallContextModel,
|
|
34
|
+
CapabilityModel,
|
|
35
|
+
ClassModel,
|
|
36
|
+
ConfigurationModel,
|
|
37
|
+
DataFlowModel,
|
|
38
|
+
EnvVarUsageModel,
|
|
39
|
+
FunctionCallModel,
|
|
40
|
+
FunctionModel,
|
|
41
|
+
HttpBodyModel,
|
|
42
|
+
HttpParameterModel,
|
|
43
|
+
IntegrationModel,
|
|
44
|
+
JwtConfigModel,
|
|
45
|
+
LiteralsModel,
|
|
46
|
+
LocationModel,
|
|
47
|
+
Manifest,
|
|
48
|
+
MiddlewareModel,
|
|
49
|
+
ParameterModel,
|
|
50
|
+
ProjectMetadata,
|
|
51
|
+
RouteAuthModel,
|
|
52
|
+
RouteModel,
|
|
53
|
+
SchemaFieldModel,
|
|
54
|
+
SchemaModel,
|
|
55
|
+
stable_id,
|
|
56
|
+
)
|
|
57
|
+
from ..core.types import Framework, Language
|
|
58
|
+
from ..frameworks import dotnet as _dotnet_fw # noqa: F401 — registers AspNetCorePlugin
|
|
59
|
+
from ..frameworks import java as _java_fw # noqa: F401 — registers SpringBootPlugin
|
|
60
|
+
from ..frameworks.base import FrameworkPluginRegistry
|
|
61
|
+
from ..parsing import csharp as _csharp_pkg # noqa: F401 — registers CSharpParser
|
|
62
|
+
from ..parsing import js as _js_pkg # noqa: F401 — registers JavaScriptParser
|
|
63
|
+
from ..parsing import jvm as _jvm_pkg # noqa: F401 — registers JavaParser
|
|
64
|
+
from ..parsing.base import ParsedFile, ParserRegistry
|
|
65
|
+
from ..parsing.csharp.language_services import CSharpLanguageServices
|
|
66
|
+
from ..parsing.js.language_services import JavaScriptLanguageServices
|
|
67
|
+
from ..parsing.jvm import language_services as _jvm_lang_services # noqa: F401
|
|
68
|
+
from ..parsing.jvm.language_services import JavaLanguageServices
|
|
69
|
+
from ..parsing.python.language_services import PythonLanguageServices
|
|
70
|
+
from ..parsing.python.parser import PythonProjectParser
|
|
71
|
+
from ..parsing.python.type_resolver import SchemaBuilder, TypeResolver
|
|
72
|
+
from ..parsing.services import AnalysisContext, LanguageServices
|
|
73
|
+
from .call_graph import CallGraph, build_call_graph_with_context
|
|
74
|
+
from .data_flow import DataFlowAnalyzer
|
|
75
|
+
from .dependency_extractor import DependencyExtractor
|
|
76
|
+
from .flow_analysis import build_flow_sensitive_bindings
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _auto_discover_plugins() -> None:
|
|
80
|
+
"""Import all framework plugins to trigger their self-registration."""
|
|
81
|
+
import importlib
|
|
82
|
+
import pkgutil
|
|
83
|
+
|
|
84
|
+
from .. import frameworks as _fw_pkg
|
|
85
|
+
|
|
86
|
+
for _importer, mod_name, _is_pkg in pkgutil.walk_packages(
|
|
87
|
+
_fw_pkg.__path__,
|
|
88
|
+
prefix=_fw_pkg.__name__ + ".",
|
|
89
|
+
):
|
|
90
|
+
if "plugin" in mod_name or mod_name.endswith("_plugin"):
|
|
91
|
+
try:
|
|
92
|
+
importlib.import_module(mod_name)
|
|
93
|
+
except Exception as e: # noqa: BLE001
|
|
94
|
+
logger.debug("Could not load framework plugin %s: %s", mod_name, e)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
_auto_discover_plugins()
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
logger = logging.getLogger(__name__)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _get_probe_version() -> str:
|
|
104
|
+
try:
|
|
105
|
+
from .. import __version__
|
|
106
|
+
|
|
107
|
+
return __version__
|
|
108
|
+
except Exception:
|
|
109
|
+
return "0.1.0"
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
# =============================================================================
|
|
113
|
+
# Analysis Result
|
|
114
|
+
# =============================================================================
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@dataclass
|
|
118
|
+
class AnalysisWarning:
|
|
119
|
+
"""A non-fatal issue encountered during analysis."""
|
|
120
|
+
|
|
121
|
+
phase: str
|
|
122
|
+
message: str
|
|
123
|
+
file: str | None = None
|
|
124
|
+
detail: str | None = None
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
@dataclass
|
|
128
|
+
class AnalysisResult:
|
|
129
|
+
"""Result of analysis containing manifest and metadata."""
|
|
130
|
+
|
|
131
|
+
manifest: Manifest
|
|
132
|
+
|
|
133
|
+
# Statistics
|
|
134
|
+
files_analyzed: int = 0
|
|
135
|
+
files_failed: int = 0
|
|
136
|
+
parse_errors: list[str] = field(default_factory=list)
|
|
137
|
+
|
|
138
|
+
# Non-fatal warnings accumulated during analysis
|
|
139
|
+
warnings: list[AnalysisWarning] = field(default_factory=list)
|
|
140
|
+
|
|
141
|
+
# Timing — top-level
|
|
142
|
+
discovery_time_ms: int = 0
|
|
143
|
+
parse_time_ms: int = 0
|
|
144
|
+
extraction_time_ms: int = 0
|
|
145
|
+
total_time_ms: int = 0
|
|
146
|
+
|
|
147
|
+
# Timing — full per-stage breakdown (stage_name → ms)
|
|
148
|
+
stage_times_ms: dict[str, int] = field(default_factory=dict)
|
|
149
|
+
|
|
150
|
+
# Timing — per-framework-plugin extraction (framework_name → ms across all files)
|
|
151
|
+
extractor_times_ms: dict[str, int] = field(default_factory=dict)
|
|
152
|
+
|
|
153
|
+
# Per-language file counts (language name → count)
|
|
154
|
+
files_by_language: dict[str, int] = field(default_factory=dict)
|
|
155
|
+
|
|
156
|
+
# Per-language parse failures (language name → count)
|
|
157
|
+
failures_by_language: dict[str, int] = field(default_factory=dict)
|
|
158
|
+
|
|
159
|
+
# Files excluded by discovery (gitignore, --exclude, --max-files, unknown extension)
|
|
160
|
+
files_skipped: int = 0
|
|
161
|
+
|
|
162
|
+
# Per-framework route counts (framework_name → route count)
|
|
163
|
+
routes_by_framework: dict[str, int] = field(default_factory=dict)
|
|
164
|
+
|
|
165
|
+
# Structured parse errors with file + line + message (replaces plain strings)
|
|
166
|
+
parse_error_details: list[dict[str, Any]] = field(default_factory=list)
|
|
167
|
+
|
|
168
|
+
# Detected frameworks
|
|
169
|
+
frameworks_detected: list[Framework] = field(default_factory=list)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
# =============================================================================
|
|
173
|
+
# Analyzer
|
|
174
|
+
# =============================================================================
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
class ProjectAnalyzer:
|
|
178
|
+
"""
|
|
179
|
+
Main analyzer for extracting manifest from a project.
|
|
180
|
+
|
|
181
|
+
Coordinates:
|
|
182
|
+
1. File discovery
|
|
183
|
+
2. Parsing files by language
|
|
184
|
+
3. Framework detection
|
|
185
|
+
4. Route/dependency/auth extraction
|
|
186
|
+
5. Data flow analysis
|
|
187
|
+
6. Manifest generation
|
|
188
|
+
"""
|
|
189
|
+
|
|
190
|
+
def __init__(self, config: CodeBoltConfig) -> None:
|
|
191
|
+
self.config = config
|
|
192
|
+
|
|
193
|
+
# Parsers
|
|
194
|
+
self._python_parser = PythonProjectParser()
|
|
195
|
+
self._type_resolver = TypeResolver()
|
|
196
|
+
self._schema_builder = SchemaBuilder(self._type_resolver)
|
|
197
|
+
|
|
198
|
+
# Language services (builds context for each language)
|
|
199
|
+
self._language_services: dict[Language, LanguageServices] = {
|
|
200
|
+
Language.PYTHON: PythonLanguageServices(),
|
|
201
|
+
Language.JAVA: JavaLanguageServices(),
|
|
202
|
+
Language.CSHARP: CSharpLanguageServices(),
|
|
203
|
+
Language.JAVASCRIPT: JavaScriptLanguageServices(),
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
# Analysis contexts per language (built during analysis)
|
|
207
|
+
self._analysis_contexts: dict[Language, AnalysisContext] = {}
|
|
208
|
+
|
|
209
|
+
# Parsed data
|
|
210
|
+
self._parsed_files: dict[Path, ParsedFile] = {}
|
|
211
|
+
self._discovery_result: DiscoveryResult | None = None
|
|
212
|
+
|
|
213
|
+
# Call graph
|
|
214
|
+
self._call_graph: CallGraph | None = None
|
|
215
|
+
|
|
216
|
+
# Data flow analysis results (for manifest metadata)
|
|
217
|
+
self._data_flow_stats: dict[str, int] = {}
|
|
218
|
+
|
|
219
|
+
# Shared flow-sensitive bindings (built once, reused by call graph + data flow)
|
|
220
|
+
self._flow_bindings: Any = None
|
|
221
|
+
|
|
222
|
+
# Framework plugins
|
|
223
|
+
self._detected_frameworks: set[Framework] = set()
|
|
224
|
+
|
|
225
|
+
# Per-plugin extraction timing (populated by _extract_all_routes)
|
|
226
|
+
self._extractor_times_ms: dict[str, int] = {}
|
|
227
|
+
|
|
228
|
+
# Accumulated non-fatal warnings
|
|
229
|
+
self._warnings: list[AnalysisWarning] = []
|
|
230
|
+
|
|
231
|
+
# Pipeline stage definitions: (name, method_name)
|
|
232
|
+
_PIPELINE_STAGES: list[tuple[str, str]] = [
|
|
233
|
+
("discover", "_stage_discover"),
|
|
234
|
+
("parse", "_stage_parse"),
|
|
235
|
+
("resolve", "_stage_resolve"),
|
|
236
|
+
("detect_frameworks", "_stage_detect_frameworks"),
|
|
237
|
+
("flow_bindings", "_stage_flow_bindings"),
|
|
238
|
+
("call_graph", "_stage_call_graph"),
|
|
239
|
+
("extract", "_stage_extract"),
|
|
240
|
+
("analyze_flows", "_stage_analyze_flows"),
|
|
241
|
+
("enrich", "_stage_enrich"),
|
|
242
|
+
]
|
|
243
|
+
|
|
244
|
+
def analyze(self) -> AnalysisResult:
|
|
245
|
+
"""
|
|
246
|
+
Analyze the project and generate manifest.
|
|
247
|
+
|
|
248
|
+
Runs a named pipeline of stages. Each stage is independently
|
|
249
|
+
testable and reports its own timing.
|
|
250
|
+
"""
|
|
251
|
+
start_time = time.perf_counter()
|
|
252
|
+
stage_times: dict[str, int] = {}
|
|
253
|
+
ctx: dict[str, Any] = {}
|
|
254
|
+
|
|
255
|
+
for stage_name, method_name in self._PIPELINE_STAGES:
|
|
256
|
+
stage_start = time.perf_counter()
|
|
257
|
+
stage_fn = getattr(self, method_name)
|
|
258
|
+
stage_fn(ctx)
|
|
259
|
+
stage_times[stage_name] = int((time.perf_counter() - stage_start) * 1000)
|
|
260
|
+
logger.debug("Stage '%s' completed in %dms", stage_name, stage_times[stage_name])
|
|
261
|
+
|
|
262
|
+
total_time_ms = int((time.perf_counter() - start_time) * 1000)
|
|
263
|
+
|
|
264
|
+
manifest = self._build_manifest(
|
|
265
|
+
routes=ctx["routes"],
|
|
266
|
+
functions=ctx["functions"],
|
|
267
|
+
classes=ctx["classes"],
|
|
268
|
+
calls=ctx["calls"],
|
|
269
|
+
data_flows=ctx["data_flows"],
|
|
270
|
+
auth=ctx["auth"],
|
|
271
|
+
middleware=ctx["middleware"],
|
|
272
|
+
dependencies=ctx["pkg_dependencies"],
|
|
273
|
+
integrations=ctx["integrations"],
|
|
274
|
+
capabilities=ctx["capabilities"],
|
|
275
|
+
schemas=ctx["schemas"],
|
|
276
|
+
literals=ctx["literals"],
|
|
277
|
+
configuration=ctx["config_data"],
|
|
278
|
+
analysis_time_ms=total_time_ms,
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
# Build both legacy string errors and structured error details
|
|
282
|
+
parse_errors: list[str] = []
|
|
283
|
+
parse_error_details: list[dict[str, Any]] = []
|
|
284
|
+
for path, parsed in self._parsed_files.items():
|
|
285
|
+
if not parsed.success and parsed.error:
|
|
286
|
+
err = parsed.error
|
|
287
|
+
parse_errors.append(f"{path}: {err}")
|
|
288
|
+
detail: dict[str, Any] = {
|
|
289
|
+
"file": str(path),
|
|
290
|
+
"error": str(err),
|
|
291
|
+
}
|
|
292
|
+
if hasattr(err, "line") and err.line is not None:
|
|
293
|
+
detail["line"] = err.line
|
|
294
|
+
if hasattr(err, "column") and err.column is not None:
|
|
295
|
+
detail["column"] = err.column
|
|
296
|
+
parse_error_details.append(detail)
|
|
297
|
+
|
|
298
|
+
# Per-language file counts from the parsed file registry
|
|
299
|
+
files_by_language: dict[str, int] = {}
|
|
300
|
+
failures_by_language: dict[str, int] = {}
|
|
301
|
+
for parsed in self._parsed_files.values():
|
|
302
|
+
lang_name = parsed.language.name.lower() if parsed.language else "unknown"
|
|
303
|
+
if parsed.success:
|
|
304
|
+
files_by_language[lang_name] = files_by_language.get(lang_name, 0) + 1
|
|
305
|
+
else:
|
|
306
|
+
failures_by_language[lang_name] = failures_by_language.get(lang_name, 0) + 1
|
|
307
|
+
|
|
308
|
+
# Per-framework route counts from the manifest
|
|
309
|
+
routes_by_framework: dict[str, int] = {}
|
|
310
|
+
for ep in manifest.entry_points:
|
|
311
|
+
fw = (ep.framework or "unknown").lower()
|
|
312
|
+
routes_by_framework[fw] = routes_by_framework.get(fw, 0) + 1
|
|
313
|
+
|
|
314
|
+
return AnalysisResult(
|
|
315
|
+
manifest=manifest,
|
|
316
|
+
files_analyzed=len([p for p in self._parsed_files.values() if p.success]),
|
|
317
|
+
files_failed=len([p for p in self._parsed_files.values() if not p.success]),
|
|
318
|
+
files_skipped=self._discovery_result.total_skipped if self._discovery_result else 0,
|
|
319
|
+
parse_errors=parse_errors,
|
|
320
|
+
parse_error_details=parse_error_details,
|
|
321
|
+
warnings=list(self._warnings),
|
|
322
|
+
discovery_time_ms=stage_times.get("discover", 0),
|
|
323
|
+
parse_time_ms=stage_times.get("parse", 0),
|
|
324
|
+
extraction_time_ms=stage_times.get("extract", 0),
|
|
325
|
+
total_time_ms=total_time_ms,
|
|
326
|
+
stage_times_ms=dict(stage_times),
|
|
327
|
+
extractor_times_ms=self._extractor_times_ms,
|
|
328
|
+
files_by_language=files_by_language,
|
|
329
|
+
failures_by_language=failures_by_language,
|
|
330
|
+
routes_by_framework=routes_by_framework,
|
|
331
|
+
frameworks_detected=list(self._detected_frameworks),
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
# -- Pipeline stage implementations ----------------------------------------
|
|
335
|
+
|
|
336
|
+
def _stage_discover(self, ctx: dict[str, Any]) -> None:
|
|
337
|
+
logger.info("Discovering files in %s", self.config.project_root)
|
|
338
|
+
self._discovery_result = discover_files(
|
|
339
|
+
self.config.project_root,
|
|
340
|
+
self.config.analysis.file_discovery,
|
|
341
|
+
)
|
|
342
|
+
logger.info(
|
|
343
|
+
"Discovered %d files, skipped %d",
|
|
344
|
+
self._discovery_result.total_selected,
|
|
345
|
+
self._discovery_result.total_skipped,
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
def _stage_parse(self, ctx: dict[str, Any]) -> None:
|
|
349
|
+
self._parse_all_files()
|
|
350
|
+
|
|
351
|
+
def _stage_resolve(self, ctx: dict[str, Any]) -> None:
|
|
352
|
+
self._build_cross_file_resolver()
|
|
353
|
+
|
|
354
|
+
def _stage_detect_frameworks(self, ctx: dict[str, Any]) -> None:
|
|
355
|
+
self._detect_frameworks()
|
|
356
|
+
|
|
357
|
+
def _stage_flow_bindings(self, ctx: dict[str, Any]) -> None:
|
|
358
|
+
self._build_shared_flow_bindings()
|
|
359
|
+
|
|
360
|
+
def _stage_call_graph(self, ctx: dict[str, Any]) -> None:
|
|
361
|
+
self._build_call_graph()
|
|
362
|
+
|
|
363
|
+
def _stage_extract(self, ctx: dict[str, Any]) -> None:
|
|
364
|
+
raw_routes = self._extract_all_routes()
|
|
365
|
+
ctx["routes"] = self._apply_group_prefixes(raw_routes)
|
|
366
|
+
ctx["dependencies"] = self._extract_all_dependencies()
|
|
367
|
+
ctx["auth"] = self._extract_auth_data()
|
|
368
|
+
ctx["middleware"] = self._extract_middleware()
|
|
369
|
+
self._populate_route_auth_mapping(ctx["auth"], ctx["routes"], ctx["middleware"])
|
|
370
|
+
self._link_routes_to_auth_schemes(ctx["routes"], ctx["auth"])
|
|
371
|
+
self._propagate_file_auth_to_routes(ctx["routes"], ctx["auth"])
|
|
372
|
+
self._apply_spring_filter_chain_policy(ctx["routes"])
|
|
373
|
+
ctx["calls"] = self._extract_all_calls()
|
|
374
|
+
|
|
375
|
+
def _stage_analyze_flows(self, ctx: dict[str, Any]) -> None:
|
|
376
|
+
ctx["data_flows"], _ = self._analyze_data_flows(ctx["routes"])
|
|
377
|
+
ctx["integrations"] = self._detect_integrations()
|
|
378
|
+
ctx["literals"] = self._scan_literals()
|
|
379
|
+
self._classify_sensitivity(ctx["integrations"], ctx["literals"])
|
|
380
|
+
|
|
381
|
+
def _stage_enrich(self, ctx: dict[str, Any]) -> None:
|
|
382
|
+
dep_extractor = DependencyExtractor(self.config.project_root, self._parsed_files)
|
|
383
|
+
ctx["pkg_dependencies"] = dep_extractor.extract()
|
|
384
|
+
ctx["config_data"] = self._extract_configuration()
|
|
385
|
+
ctx["capabilities"] = self._tag_capabilities(
|
|
386
|
+
ctx["integrations"],
|
|
387
|
+
ctx["routes"],
|
|
388
|
+
ctx["config_data"],
|
|
389
|
+
)
|
|
390
|
+
functions, func_id_index = self._collect_all_functions()
|
|
391
|
+
ctx["functions"] = functions
|
|
392
|
+
ctx["classes"] = self._collect_all_classes(func_id_index)
|
|
393
|
+
ctx["schemas"] = self._collect_schemas(ctx["routes"])
|
|
394
|
+
|
|
395
|
+
# =========================================================================
|
|
396
|
+
# Parsing
|
|
397
|
+
# =========================================================================
|
|
398
|
+
|
|
399
|
+
def _parse_all_files(self) -> None:
|
|
400
|
+
"""Parse all discovered files using registry-based language dispatch."""
|
|
401
|
+
if not self._discovery_result:
|
|
402
|
+
return
|
|
403
|
+
|
|
404
|
+
# Group files by language
|
|
405
|
+
files_by_lang: dict[Language, list[Path]] = {}
|
|
406
|
+
for f in self._discovery_result.files:
|
|
407
|
+
if f.language:
|
|
408
|
+
files_by_lang.setdefault(f.language, []).append(f.path)
|
|
409
|
+
|
|
410
|
+
for language, file_paths in files_by_lang.items():
|
|
411
|
+
parser = ParserRegistry.get_parser(language)
|
|
412
|
+
if parser is None:
|
|
413
|
+
logger.info(
|
|
414
|
+
"Found %d %s files (no parser registered)",
|
|
415
|
+
len(file_paths),
|
|
416
|
+
language.name,
|
|
417
|
+
)
|
|
418
|
+
continue
|
|
419
|
+
|
|
420
|
+
logger.info("Parsing %d %s files", len(file_paths), language.name)
|
|
421
|
+
|
|
422
|
+
# Use PythonProjectParser for Python (has cross-file resolution)
|
|
423
|
+
if language == Language.PYTHON:
|
|
424
|
+
self._parsed_files.update(self._python_parser.parse_project(file_paths))
|
|
425
|
+
else:
|
|
426
|
+
for path in file_paths:
|
|
427
|
+
try:
|
|
428
|
+
parsed = parser.parse_file(path)
|
|
429
|
+
self._parsed_files[path] = parsed
|
|
430
|
+
except Exception as e:
|
|
431
|
+
logger.warning("Failed to parse %s: %s", path, e)
|
|
432
|
+
|
|
433
|
+
def _build_analysis_contexts(self) -> None:
|
|
434
|
+
"""
|
|
435
|
+
Build analysis contexts for each language using LanguageServices.
|
|
436
|
+
|
|
437
|
+
This replaces the old approach of manually building each resolver.
|
|
438
|
+
Now each language provides a factory that builds all required services.
|
|
439
|
+
"""
|
|
440
|
+
if not self._parsed_files:
|
|
441
|
+
return
|
|
442
|
+
|
|
443
|
+
# Group parsed files by language
|
|
444
|
+
files_by_language: dict[Language, list[ParsedFile]] = {}
|
|
445
|
+
for parsed in self._parsed_files.values():
|
|
446
|
+
if not parsed.success:
|
|
447
|
+
continue
|
|
448
|
+
lang = parsed.language
|
|
449
|
+
if lang not in files_by_language:
|
|
450
|
+
files_by_language[lang] = []
|
|
451
|
+
files_by_language[lang].append(parsed)
|
|
452
|
+
|
|
453
|
+
# Build context for each language
|
|
454
|
+
for language, files in files_by_language.items():
|
|
455
|
+
if language not in self._language_services:
|
|
456
|
+
logger.warning(f"No language services available for {language.name}")
|
|
457
|
+
continue
|
|
458
|
+
|
|
459
|
+
services = self._language_services[language]
|
|
460
|
+
|
|
461
|
+
# Detect primary framework for this language
|
|
462
|
+
framework = self._detect_primary_framework(files, language)
|
|
463
|
+
framework_name = framework.name.lower() if framework else None
|
|
464
|
+
|
|
465
|
+
logger.info(
|
|
466
|
+
f"Building analysis context for {language.name} (framework: {framework_name})"
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
# Build context using the language services factory
|
|
470
|
+
context = services.build_context(
|
|
471
|
+
parsed_files=files,
|
|
472
|
+
project_root=self.config.project_root,
|
|
473
|
+
framework=framework_name,
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
# Wire URL prefix maps for Django and Express/NestJS
|
|
477
|
+
if language == Language.PYTHON and framework_name == "django":
|
|
478
|
+
try:
|
|
479
|
+
from .url_prefix_resolver import build_django_prefix_map
|
|
480
|
+
|
|
481
|
+
pm = build_django_prefix_map(files, project_root=self.config.project_root)
|
|
482
|
+
context.language_services["_url_prefix_map"] = pm
|
|
483
|
+
logger.debug("Django prefix map built: %d entries", len(pm))
|
|
484
|
+
except Exception as e:
|
|
485
|
+
logger.debug("Django prefix map failed: %s", e)
|
|
486
|
+
elif language == Language.JAVASCRIPT:
|
|
487
|
+
try:
|
|
488
|
+
from .url_prefix_resolver import build_express_prefix_map
|
|
489
|
+
|
|
490
|
+
pm = build_express_prefix_map(files, project_root=self.config.project_root)
|
|
491
|
+
context.language_services["_url_prefix_map"] = pm
|
|
492
|
+
logger.debug("Express prefix map built: %d entries", len(pm))
|
|
493
|
+
except Exception as e:
|
|
494
|
+
logger.debug("Express prefix map failed: %s", e)
|
|
495
|
+
|
|
496
|
+
self._analysis_contexts[language] = context
|
|
497
|
+
|
|
498
|
+
# Log statistics
|
|
499
|
+
if context.type_resolver:
|
|
500
|
+
models = context.type_resolver.get_all_models()
|
|
501
|
+
logger.info(f" - Resolved {len(models)} model types")
|
|
502
|
+
|
|
503
|
+
if context.constant_resolver:
|
|
504
|
+
constants = context.constant_resolver.get_all_constants()
|
|
505
|
+
logger.info(f" - Resolved {len(constants)} constants")
|
|
506
|
+
|
|
507
|
+
if context.router_registry:
|
|
508
|
+
routers = context.router_registry.get_all_routers()
|
|
509
|
+
logger.info(f" - Registered {len(routers)} routers")
|
|
510
|
+
|
|
511
|
+
def _detect_primary_framework(
|
|
512
|
+
self,
|
|
513
|
+
files: list[ParsedFile],
|
|
514
|
+
language: Language,
|
|
515
|
+
) -> Framework | None:
|
|
516
|
+
"""Detect the primary framework for a set of files."""
|
|
517
|
+
framework_counts: dict[Framework, int] = {}
|
|
518
|
+
|
|
519
|
+
for parsed_file in files:
|
|
520
|
+
detected = FrameworkPluginRegistry.detect_frameworks(parsed_file)
|
|
521
|
+
for fw in detected:
|
|
522
|
+
framework_counts[fw] = framework_counts.get(fw, 0) + 1
|
|
523
|
+
|
|
524
|
+
if not framework_counts:
|
|
525
|
+
return None
|
|
526
|
+
|
|
527
|
+
# Return the most common framework
|
|
528
|
+
return max(framework_counts, key=framework_counts.get)
|
|
529
|
+
|
|
530
|
+
# Backward compatibility alias
|
|
531
|
+
def _build_cross_file_resolver(self) -> None:
|
|
532
|
+
"""Build cross-file resolution (alias for _build_analysis_contexts)."""
|
|
533
|
+
self._build_analysis_contexts()
|
|
534
|
+
|
|
535
|
+
def _build_shared_flow_bindings(self) -> None:
|
|
536
|
+
"""Build flow-sensitive bindings once for reuse by call graph and data flow."""
|
|
537
|
+
successful_files = [p for p in self._parsed_files.values() if p.success]
|
|
538
|
+
if not successful_files:
|
|
539
|
+
return
|
|
540
|
+
try:
|
|
541
|
+
self._flow_bindings = build_flow_sensitive_bindings(successful_files)
|
|
542
|
+
logger.info("Built shared flow-sensitive bindings for %d files", len(successful_files))
|
|
543
|
+
except Exception as e:
|
|
544
|
+
logger.warning("Could not build flow-sensitive bindings: %s", e)
|
|
545
|
+
self._flow_bindings = None
|
|
546
|
+
|
|
547
|
+
def _build_call_graph(self) -> None:
|
|
548
|
+
"""Build the call graph for inter-procedural analysis."""
|
|
549
|
+
if not self._parsed_files:
|
|
550
|
+
return
|
|
551
|
+
|
|
552
|
+
logger.info("Building call graph...")
|
|
553
|
+
|
|
554
|
+
# Get successfully parsed files
|
|
555
|
+
successful_files = [parsed for parsed in self._parsed_files.values() if parsed.success]
|
|
556
|
+
|
|
557
|
+
if not successful_files:
|
|
558
|
+
logger.warning("No successfully parsed files for call graph")
|
|
559
|
+
return
|
|
560
|
+
|
|
561
|
+
# Get the primary analysis context (prefer Python, fall back to Java, then C#)
|
|
562
|
+
context = (
|
|
563
|
+
self._analysis_contexts.get(Language.PYTHON)
|
|
564
|
+
or self._analysis_contexts.get(Language.JAVA)
|
|
565
|
+
or self._analysis_contexts.get(Language.CSHARP)
|
|
566
|
+
)
|
|
567
|
+
|
|
568
|
+
try:
|
|
569
|
+
if context:
|
|
570
|
+
self._call_graph = build_call_graph_with_context(
|
|
571
|
+
successful_files,
|
|
572
|
+
context,
|
|
573
|
+
flow_bindings=self._flow_bindings,
|
|
574
|
+
)
|
|
575
|
+
else:
|
|
576
|
+
from .call_graph import build_call_graph
|
|
577
|
+
|
|
578
|
+
self._call_graph = build_call_graph(
|
|
579
|
+
successful_files,
|
|
580
|
+
project_root=self.config.project_root,
|
|
581
|
+
flow_bindings=self._flow_bindings,
|
|
582
|
+
)
|
|
583
|
+
|
|
584
|
+
stats = self._call_graph.get_statistics()
|
|
585
|
+
logger.info(
|
|
586
|
+
f"Call graph built: {stats['internal_nodes']} internal functions, "
|
|
587
|
+
f"{stats['total_edges']} call edges"
|
|
588
|
+
)
|
|
589
|
+
logger.info(f" - Entry points: {stats['entry_points']}")
|
|
590
|
+
logger.info(f" - Resolution confidence: {stats['resolution_confidence']}")
|
|
591
|
+
|
|
592
|
+
except ImportError as e:
|
|
593
|
+
logger.warning(f"Could not build call graph: {e}")
|
|
594
|
+
|
|
595
|
+
def _detect_frameworks(self) -> None:
|
|
596
|
+
"""Detect frameworks used in parsed files."""
|
|
597
|
+
for parsed_file in self._parsed_files.values():
|
|
598
|
+
if not parsed_file.success:
|
|
599
|
+
continue
|
|
600
|
+
|
|
601
|
+
detected = FrameworkPluginRegistry.detect_frameworks(parsed_file)
|
|
602
|
+
self._detected_frameworks.update(detected)
|
|
603
|
+
|
|
604
|
+
logger.info(f"Detected frameworks: {[f.name for f in self._detected_frameworks]}")
|
|
605
|
+
|
|
606
|
+
# =========================================================================
|
|
607
|
+
# Route Extraction
|
|
608
|
+
# =========================================================================
|
|
609
|
+
|
|
610
|
+
def _extract_all_routes(self) -> list[RouteModel]:
|
|
611
|
+
"""Extract routes from all parsed files using analysis contexts."""
|
|
612
|
+
routes: list[RouteModel] = []
|
|
613
|
+
|
|
614
|
+
# Build plugin list from the language-keyed registry so that multiple
|
|
615
|
+
# plugins sharing the same Framework enum value (e.g. GraphQLPythonPlugin
|
|
616
|
+
# and GraphQLJavaPlugin both register as Framework.GRAPHQL) are both
|
|
617
|
+
# included. get_plugin(fw) returns only one entry per framework key and
|
|
618
|
+
# would silently drop the second registrant.
|
|
619
|
+
seen_plugin_ids: set[int] = set()
|
|
620
|
+
plugins = []
|
|
621
|
+
for language in Language:
|
|
622
|
+
for plugin in FrameworkPluginRegistry.get_plugins_for_language(language):
|
|
623
|
+
if (
|
|
624
|
+
plugin.framework in self._detected_frameworks
|
|
625
|
+
and id(plugin) not in seen_plugin_ids
|
|
626
|
+
):
|
|
627
|
+
seen_plugin_ids.add(id(plugin))
|
|
628
|
+
plugins.append(plugin)
|
|
629
|
+
|
|
630
|
+
# Per-plugin wall-clock accumulator: framework_name → total ms
|
|
631
|
+
plugin_times: dict[str, int] = {}
|
|
632
|
+
|
|
633
|
+
# Extract routes from each file
|
|
634
|
+
for parsed_file in self._parsed_files.values():
|
|
635
|
+
if not parsed_file.success:
|
|
636
|
+
continue
|
|
637
|
+
|
|
638
|
+
# Get analysis context for this file's language
|
|
639
|
+
context = self._analysis_contexts.get(parsed_file.language)
|
|
640
|
+
|
|
641
|
+
for plugin in plugins:
|
|
642
|
+
if not plugin.detect(parsed_file):
|
|
643
|
+
continue
|
|
644
|
+
|
|
645
|
+
# Extract routes, passing context when supported
|
|
646
|
+
if hasattr(plugin, "extract_routes"):
|
|
647
|
+
fw_key = plugin.framework.name.lower()
|
|
648
|
+
_t0 = time.perf_counter()
|
|
649
|
+
try:
|
|
650
|
+
extracted_routes = plugin.extract_routes(parsed_file, context=context)
|
|
651
|
+
except TypeError:
|
|
652
|
+
self._setup_legacy_plugin(plugin, parsed_file.language)
|
|
653
|
+
extracted_routes = plugin.extract_routes(parsed_file)
|
|
654
|
+
|
|
655
|
+
elapsed_ms = int((time.perf_counter() - _t0) * 1000)
|
|
656
|
+
plugin_times[fw_key] = plugin_times.get(fw_key, 0) + elapsed_ms
|
|
657
|
+
|
|
658
|
+
framework_name = plugin.framework.name.lower()
|
|
659
|
+
for route in extracted_routes:
|
|
660
|
+
ep_id = stable_id("ep", route.method.name, route.path)
|
|
661
|
+
routes.append(self._convert_route(route, ep_id, framework=framework_name))
|
|
662
|
+
|
|
663
|
+
self._extractor_times_ms = plugin_times
|
|
664
|
+
return routes
|
|
665
|
+
|
|
666
|
+
# =========================================================================
|
|
667
|
+
# Group Prefix Propagation (cross-file module → endpoint extension pattern)
|
|
668
|
+
# =========================================================================
|
|
669
|
+
|
|
670
|
+
def _apply_group_prefixes(self, routes: list[RouteModel]) -> list[RouteModel]:
|
|
671
|
+
"""Prepend MapGroup prefix to routes registered via the module-extension pattern.
|
|
672
|
+
|
|
673
|
+
Handles three patterns:
|
|
674
|
+
1. Cross-file: module file calls MapGroup + extension methods; handler files
|
|
675
|
+
define those methods.
|
|
676
|
+
2. Same-file: file calls MapGroup and registers routes on the result directly
|
|
677
|
+
(e.g. ``var api = app.MapGroup("x"); api.MapGet("/y", ...)``)
|
|
678
|
+
3. Program.cs auth: an outer file calls ``handler.MapXxxApi().RequireAuthorization()``
|
|
679
|
+
without its own MapGroup; auth is propagated to the handler file's routes.
|
|
680
|
+
"""
|
|
681
|
+
import re as _re
|
|
682
|
+
|
|
683
|
+
_STANDARD_MAP = frozenset(
|
|
684
|
+
{
|
|
685
|
+
"MapGet",
|
|
686
|
+
"MapPost",
|
|
687
|
+
"MapPut",
|
|
688
|
+
"MapDelete",
|
|
689
|
+
"MapPatch",
|
|
690
|
+
"MapIdentityApi",
|
|
691
|
+
"MapGroup",
|
|
692
|
+
"MapHub",
|
|
693
|
+
}
|
|
694
|
+
)
|
|
695
|
+
|
|
696
|
+
# ── Step 1: collect extension-method names per file ──────────────────
|
|
697
|
+
# Static methods whose first parameter is IEndpointRouteBuilder or similar.
|
|
698
|
+
ext_methods_by_file: dict[str, set[str]] = {}
|
|
699
|
+
for file_path, parsed_file in self._parsed_files.items():
|
|
700
|
+
if not parsed_file.success:
|
|
701
|
+
continue
|
|
702
|
+
for cls in parsed_file.classes:
|
|
703
|
+
for method in cls.methods:
|
|
704
|
+
if method.binding != "static" or not method.parameters:
|
|
705
|
+
continue
|
|
706
|
+
first_type = (method.parameters[0].type_annotation or "").split("<")[0].strip()
|
|
707
|
+
if first_type in (
|
|
708
|
+
"IEndpointRouteBuilder",
|
|
709
|
+
"RouteGroupBuilder",
|
|
710
|
+
"WebApplication",
|
|
711
|
+
"IApplicationBuilder",
|
|
712
|
+
):
|
|
713
|
+
ext_methods_by_file.setdefault(str(file_path), set()).add(method.name)
|
|
714
|
+
|
|
715
|
+
# Reverse map: extension method name → file that defines it
|
|
716
|
+
ext_method_to_file: dict[str, str] = {}
|
|
717
|
+
for file_path, method_names in ext_methods_by_file.items():
|
|
718
|
+
for method_name in method_names:
|
|
719
|
+
ext_method_to_file[method_name] = file_path
|
|
720
|
+
|
|
721
|
+
# ── Step 2: scan all files for group patterns ─────────────────────────
|
|
722
|
+
# group_registry: ext_method_name → (prefix, auth, anon) [cross-file pattern]
|
|
723
|
+
group_registry: dict[str, tuple[str, bool, bool]] = {}
|
|
724
|
+
# same_file_prefix_map: file_path → (prefix, auth, anon) [same-file pattern, single prefix]
|
|
725
|
+
same_file_prefix_map: dict[str, tuple[str, bool, bool]] = {}
|
|
726
|
+
# same_file_multi_map: file_path → sorted [(group_line, prefix, has_auth)]
|
|
727
|
+
# Used when a file has multiple MapGroup prefixes (e.g. versioned API)
|
|
728
|
+
same_file_multi_map: dict[str, list[tuple[int, str, bool]]] = {}
|
|
729
|
+
# program_auth_overrides: handler file paths that get auth from an outer caller
|
|
730
|
+
program_auth_overrides: set[str] = set()
|
|
731
|
+
|
|
732
|
+
for file_path_obj, parsed_file in self._parsed_files.items():
|
|
733
|
+
file_path = str(file_path_obj)
|
|
734
|
+
if not parsed_file.success:
|
|
735
|
+
continue
|
|
736
|
+
|
|
737
|
+
# Find MapGroup calls with literal prefix arguments
|
|
738
|
+
group_lines: dict[int, str] = {} # start_line → prefix
|
|
739
|
+
for call in parsed_file.call_sites:
|
|
740
|
+
if call.callee_name.split(".")[-1] != "MapGroup":
|
|
741
|
+
continue
|
|
742
|
+
if not call.arguments or not call.arguments[0].is_literal:
|
|
743
|
+
continue
|
|
744
|
+
raw = str(call.arguments[0].literal_value or "")
|
|
745
|
+
# Resolve API versioning placeholder → v1
|
|
746
|
+
raw = _re.sub(r"v\{[^}]*[Vv]ersion[^}]*\}", "v1", raw)
|
|
747
|
+
# Strip remaining route constraints: {id:int} → {id}
|
|
748
|
+
raw = _re.sub(r"\{([^:}]+):[^}]+\}", r"{\1}", raw)
|
|
749
|
+
prefix = "/" + raw.strip("/") if raw else ""
|
|
750
|
+
line = call.location.line if call.location else -1
|
|
751
|
+
if line > 0:
|
|
752
|
+
group_lines[line] = prefix
|
|
753
|
+
|
|
754
|
+
# Build the full chain-line set for fluent group chaining
|
|
755
|
+
chain_lines: set[int] = set(group_lines.keys())
|
|
756
|
+
changed = True
|
|
757
|
+
while changed:
|
|
758
|
+
changed = False
|
|
759
|
+
for call in parsed_file.call_sites:
|
|
760
|
+
recv = call.receiver_expression or ""
|
|
761
|
+
if not recv.startswith("line:"):
|
|
762
|
+
continue
|
|
763
|
+
try:
|
|
764
|
+
target = int(recv[5:])
|
|
765
|
+
except ValueError:
|
|
766
|
+
continue
|
|
767
|
+
if target in chain_lines:
|
|
768
|
+
ln = call.location.line if call.location else -1
|
|
769
|
+
if ln > 0 and ln not in chain_lines:
|
|
770
|
+
chain_lines.add(ln)
|
|
771
|
+
changed = True
|
|
772
|
+
|
|
773
|
+
# Group-level auth: RequireAuthorization chained on the group
|
|
774
|
+
has_group_auth = any(
|
|
775
|
+
call.callee_name.split(".")[-1] == "RequireAuthorization"
|
|
776
|
+
and (call.receiver_expression or "").startswith("line:")
|
|
777
|
+
and (
|
|
778
|
+
lambda rv=call.receiver_expression: (
|
|
779
|
+
rv[5:].isdigit() and int(rv[5:]) in chain_lines
|
|
780
|
+
)
|
|
781
|
+
)()
|
|
782
|
+
for call in parsed_file.call_sites
|
|
783
|
+
)
|
|
784
|
+
|
|
785
|
+
# Find calls to custom Map* extension methods (non-standard, non-chained).
|
|
786
|
+
# These are calls like ``group.MapCreateBrandEndpoint()`` where the method
|
|
787
|
+
# is a project-defined extension, not one of the standard Map* helpers.
|
|
788
|
+
# Note: _STANDARD_MAP filters out MapGet/Post/etc; we keep any custom Map*
|
|
789
|
+
# regardless of receiver name so that group variables named "api", "v1" etc.
|
|
790
|
+
# are also captured.
|
|
791
|
+
ext_call_lines: dict[str, int] = {}
|
|
792
|
+
for call in parsed_file.call_sites:
|
|
793
|
+
method_name = call.callee_name.split(".")[-1]
|
|
794
|
+
recv = call.receiver_expression or ""
|
|
795
|
+
# Skip: no receiver, class-name receiver (uppercase), chained receiver
|
|
796
|
+
if not recv or recv[0].isupper() or recv.startswith("line:"):
|
|
797
|
+
continue
|
|
798
|
+
# Skip: standard Map* methods and the IEndpointGroup groupBuilder receiver
|
|
799
|
+
if not method_name.startswith("Map") or method_name in _STANDARD_MAP:
|
|
800
|
+
continue
|
|
801
|
+
if recv == "groupBuilder":
|
|
802
|
+
continue
|
|
803
|
+
ln = call.location.line if call.location else -1
|
|
804
|
+
if ln > 0:
|
|
805
|
+
ext_call_lines[method_name] = ln
|
|
806
|
+
|
|
807
|
+
# Per-extension-method auth overrides (.AllowAnonymous() / .RequireAuthorization()
|
|
808
|
+
# chained on group.MapXxxEndpoint() in the caller file).
|
|
809
|
+
ext_auth_override: dict[str, str | None] = dict.fromkeys(ext_call_lines, None)
|
|
810
|
+
for call in parsed_file.call_sites:
|
|
811
|
+
method_name = call.callee_name.split(".")[-1]
|
|
812
|
+
recv = call.receiver_expression or ""
|
|
813
|
+
if not recv.startswith("line:"):
|
|
814
|
+
continue
|
|
815
|
+
try:
|
|
816
|
+
target = int(recv[5:])
|
|
817
|
+
except ValueError:
|
|
818
|
+
continue
|
|
819
|
+
for ext_name, ext_line in ext_call_lines.items():
|
|
820
|
+
if target == ext_line and method_name in (
|
|
821
|
+
"AllowAnonymous",
|
|
822
|
+
"RequireAuthorization",
|
|
823
|
+
):
|
|
824
|
+
ext_auth_override[ext_name] = method_name
|
|
825
|
+
|
|
826
|
+
# --- Pattern A: same-file MapGroup with local Map* route calls ---
|
|
827
|
+
# Detect files that call MapGroup AND register routes on the result directly
|
|
828
|
+
# (e.g. eShop OrdersApi.cs: ``var api = app.MapGroup("x"); api.MapPut(...)``)
|
|
829
|
+
if group_lines:
|
|
830
|
+
has_local_map_calls = any(
|
|
831
|
+
(m := call.callee_name.split(".")[-1]) in _STANDARD_MAP
|
|
832
|
+
and m != "MapGroup"
|
|
833
|
+
and (recv := call.receiver_expression or "")
|
|
834
|
+
and not recv.startswith("line:")
|
|
835
|
+
and not recv[0].isupper()
|
|
836
|
+
and recv != "groupBuilder"
|
|
837
|
+
for call in parsed_file.call_sites
|
|
838
|
+
)
|
|
839
|
+
unique_prefixes = set(group_lines.values())
|
|
840
|
+
if has_local_map_calls and len(unique_prefixes) == 1:
|
|
841
|
+
grp_prefix = next(iter(unique_prefixes))
|
|
842
|
+
same_file_prefix_map[file_path] = (grp_prefix, has_group_auth, False)
|
|
843
|
+
elif has_local_map_calls and len(unique_prefixes) > 1:
|
|
844
|
+
# Multi-prefix file (e.g. versioned API): assign each route the
|
|
845
|
+
# nearest preceding MapGroup prefix by line number
|
|
846
|
+
same_file_multi_map[file_path] = sorted(
|
|
847
|
+
[(ln, pfx, has_group_auth) for ln, pfx in group_lines.items()],
|
|
848
|
+
key=lambda t: t[0],
|
|
849
|
+
)
|
|
850
|
+
|
|
851
|
+
# --- Pattern B: cross-file extension method (with MapGroup in caller) ---
|
|
852
|
+
# OR: auth-only from Program.cs (no MapGroup in caller)
|
|
853
|
+
if ext_call_lines:
|
|
854
|
+
if group_lines:
|
|
855
|
+
# Module file: MapGroup + custom extension method calls → cross-file
|
|
856
|
+
unique_prefixes = set(group_lines.values())
|
|
857
|
+
if len(unique_prefixes) == 1:
|
|
858
|
+
grp_prefix = next(iter(unique_prefixes))
|
|
859
|
+
for ext_name in ext_call_lines:
|
|
860
|
+
override = ext_auth_override.get(ext_name)
|
|
861
|
+
allow_anon = override == "AllowAnonymous"
|
|
862
|
+
route_auth = (
|
|
863
|
+
has_group_auth and not allow_anon
|
|
864
|
+
) or override == "RequireAuthorization"
|
|
865
|
+
group_registry[ext_name] = (grp_prefix, route_auth, allow_anon)
|
|
866
|
+
else:
|
|
867
|
+
# Program.cs-like file: no MapGroup, but ext method calls with auth
|
|
868
|
+
# (e.g. ``orders.MapOrdersApiV1().RequireAuthorization()``)
|
|
869
|
+
for ext_name, override in ext_auth_override.items():
|
|
870
|
+
if override == "RequireAuthorization":
|
|
871
|
+
handler_file = ext_method_to_file.get(ext_name)
|
|
872
|
+
if handler_file:
|
|
873
|
+
program_auth_overrides.add(handler_file)
|
|
874
|
+
|
|
875
|
+
# ── Step 3: build unified file_path → (prefix, route_auth, allow_anon) ──
|
|
876
|
+
# Same-file patterns take precedence for prefix resolution.
|
|
877
|
+
file_prefix_map: dict[str, tuple[str, bool, bool]] = dict(same_file_prefix_map)
|
|
878
|
+
# Cross-file patterns fill in files not already covered by same-file
|
|
879
|
+
for file_path, method_names in ext_methods_by_file.items():
|
|
880
|
+
for method_name in method_names:
|
|
881
|
+
if method_name in group_registry and file_path not in file_prefix_map:
|
|
882
|
+
file_prefix_map[file_path] = group_registry[method_name]
|
|
883
|
+
break
|
|
884
|
+
|
|
885
|
+
if not file_prefix_map and not same_file_multi_map and not program_auth_overrides:
|
|
886
|
+
return routes
|
|
887
|
+
|
|
888
|
+
# ── Step 4: apply prefix + auth to matching routes ───────────────────
|
|
889
|
+
updated: list[RouteModel] = []
|
|
890
|
+
for route in routes:
|
|
891
|
+
handler_file = route.handler_location.file if route.handler_location else None
|
|
892
|
+
if not handler_file:
|
|
893
|
+
updated.append(route)
|
|
894
|
+
continue
|
|
895
|
+
|
|
896
|
+
has_prog_auth = handler_file in program_auth_overrides
|
|
897
|
+
|
|
898
|
+
# Multi-prefix: find the nearest preceding MapGroup by route line
|
|
899
|
+
multi_entries = same_file_multi_map.get(handler_file)
|
|
900
|
+
if multi_entries and route.handler_location:
|
|
901
|
+
route_line = route.handler_location.line
|
|
902
|
+
chosen_pfx, chosen_auth = multi_entries[0][1], multi_entries[0][2]
|
|
903
|
+
for grp_line, grp_pfx, grp_auth in multi_entries:
|
|
904
|
+
if grp_line <= route_line:
|
|
905
|
+
chosen_pfx, chosen_auth = grp_pfx, grp_auth
|
|
906
|
+
else:
|
|
907
|
+
break
|
|
908
|
+
prefix_entry: tuple[str, bool, bool] | None = (chosen_pfx, chosen_auth, False)
|
|
909
|
+
else:
|
|
910
|
+
prefix_entry = file_prefix_map.get(handler_file)
|
|
911
|
+
|
|
912
|
+
if prefix_entry:
|
|
913
|
+
prefix, route_auth, allow_anon = prefix_entry
|
|
914
|
+
# Program.cs-level auth override (e.g. .RequireAuthorization() on the
|
|
915
|
+
# extension method call) takes precedence over no-auth from same-file group
|
|
916
|
+
if has_prog_auth:
|
|
917
|
+
route_auth = True
|
|
918
|
+
allow_anon = False
|
|
919
|
+
|
|
920
|
+
new_path = prefix.rstrip("/") + "/" + route.path.lstrip("/")
|
|
921
|
+
new_path = _re.sub(r"/+", "/", new_path)
|
|
922
|
+
|
|
923
|
+
new_deps = list(route.dependencies or [])
|
|
924
|
+
if allow_anon:
|
|
925
|
+
new_deps = [d for d in new_deps if "Authorize" not in d]
|
|
926
|
+
elif route_auth:
|
|
927
|
+
if not any("Authorize" in d or "permission" in d.lower() for d in new_deps):
|
|
928
|
+
new_deps.insert(0, "Authorize@group")
|
|
929
|
+
|
|
930
|
+
updated.append(
|
|
931
|
+
route.model_copy(
|
|
932
|
+
update={
|
|
933
|
+
"path": new_path,
|
|
934
|
+
"id": stable_id("ep", route.method, new_path),
|
|
935
|
+
"dependencies": new_deps,
|
|
936
|
+
}
|
|
937
|
+
)
|
|
938
|
+
)
|
|
939
|
+
|
|
940
|
+
elif has_prog_auth:
|
|
941
|
+
# Auth-only override without a prefix change (extension method defines
|
|
942
|
+
# its own routes directly, prefix already correct in the route)
|
|
943
|
+
new_deps = list(route.dependencies or [])
|
|
944
|
+
if not any("Authorize" in d or "permission" in d.lower() for d in new_deps):
|
|
945
|
+
new_deps.insert(0, "Authorize@group")
|
|
946
|
+
updated.append(route.model_copy(update={"dependencies": new_deps}))
|
|
947
|
+
|
|
948
|
+
else:
|
|
949
|
+
updated.append(route)
|
|
950
|
+
|
|
951
|
+
return updated
|
|
952
|
+
|
|
953
|
+
def _setup_legacy_plugin(self, plugin, language: Language) -> None:
|
|
954
|
+
"""
|
|
955
|
+
Set up a legacy plugin that doesn't use AnalysisContext.
|
|
956
|
+
|
|
957
|
+
This provides backward compatibility with plugins that use
|
|
958
|
+
the old setter-based approach.
|
|
959
|
+
"""
|
|
960
|
+
context = self._analysis_contexts.get(language)
|
|
961
|
+
if not context:
|
|
962
|
+
return
|
|
963
|
+
|
|
964
|
+
# Get successfully parsed files for this language
|
|
965
|
+
successful_files = [
|
|
966
|
+
p for p in self._parsed_files.values() if p.success and p.language == language
|
|
967
|
+
]
|
|
968
|
+
|
|
969
|
+
# Set up legacy services from context
|
|
970
|
+
if hasattr(plugin, "set_cross_file_resolver"):
|
|
971
|
+
# Get raw resolver from language services
|
|
972
|
+
raw_resolver = context.language_services.get("_raw_cross_file_resolver")
|
|
973
|
+
if raw_resolver:
|
|
974
|
+
plugin.set_cross_file_resolver(raw_resolver)
|
|
975
|
+
|
|
976
|
+
if hasattr(plugin, "set_router_registry"):
|
|
977
|
+
raw_registry = context.language_services.get("_raw_router_registry")
|
|
978
|
+
if raw_registry:
|
|
979
|
+
plugin.set_router_registry(raw_registry)
|
|
980
|
+
|
|
981
|
+
if hasattr(plugin, "set_path_resolver"):
|
|
982
|
+
raw_path_resolver = context.language_services.get("_raw_path_resolver")
|
|
983
|
+
if raw_path_resolver:
|
|
984
|
+
plugin.set_path_resolver(raw_path_resolver)
|
|
985
|
+
|
|
986
|
+
if hasattr(plugin, "set_project_context"):
|
|
987
|
+
plugin.set_project_context(
|
|
988
|
+
successful_files,
|
|
989
|
+
project_root=self.config.project_root,
|
|
990
|
+
)
|
|
991
|
+
|
|
992
|
+
def _convert_route(self, route, route_id: str, framework: str = "fastapi") -> RouteModel:
|
|
993
|
+
"""Convert ExtractedRoute to RouteModel (framework from plugin)."""
|
|
994
|
+
# Convert parameters, using the actual ParameterLocation name when available
|
|
995
|
+
path_params = [
|
|
996
|
+
HttpParameterModel(
|
|
997
|
+
name=p.name,
|
|
998
|
+
location=p.location.name.lower() if hasattr(p.location, "name") else "path",
|
|
999
|
+
type_annotation=p.type_annotation,
|
|
1000
|
+
required=p.required,
|
|
1001
|
+
default_value=p.default_value,
|
|
1002
|
+
alias=p.alias,
|
|
1003
|
+
constraints=p.constraints,
|
|
1004
|
+
)
|
|
1005
|
+
for p in route.path_params
|
|
1006
|
+
]
|
|
1007
|
+
|
|
1008
|
+
query_params = [
|
|
1009
|
+
HttpParameterModel(
|
|
1010
|
+
name=p.name,
|
|
1011
|
+
location=p.location.name.lower() if hasattr(p.location, "name") else "query",
|
|
1012
|
+
type_annotation=p.type_annotation,
|
|
1013
|
+
required=p.required,
|
|
1014
|
+
default_value=p.default_value,
|
|
1015
|
+
alias=p.alias,
|
|
1016
|
+
constraints=p.constraints,
|
|
1017
|
+
)
|
|
1018
|
+
for p in route.query_params
|
|
1019
|
+
]
|
|
1020
|
+
|
|
1021
|
+
header_params = [
|
|
1022
|
+
HttpParameterModel(
|
|
1023
|
+
name=p.name,
|
|
1024
|
+
location="header",
|
|
1025
|
+
type_annotation=p.type_annotation,
|
|
1026
|
+
required=p.required,
|
|
1027
|
+
default_value=p.default_value,
|
|
1028
|
+
)
|
|
1029
|
+
for p in route.header_params
|
|
1030
|
+
]
|
|
1031
|
+
|
|
1032
|
+
cookie_params = [
|
|
1033
|
+
HttpParameterModel(
|
|
1034
|
+
name=p.name,
|
|
1035
|
+
location="cookie",
|
|
1036
|
+
type_annotation=p.type_annotation,
|
|
1037
|
+
required=p.required,
|
|
1038
|
+
default_value=p.default_value,
|
|
1039
|
+
)
|
|
1040
|
+
for p in route.cookie_params
|
|
1041
|
+
]
|
|
1042
|
+
|
|
1043
|
+
# Convert body — enrich field list with types from the resolver
|
|
1044
|
+
body = None
|
|
1045
|
+
if route.body:
|
|
1046
|
+
enriched_fields = self._enrich_body_fields(
|
|
1047
|
+
route.body.model_name,
|
|
1048
|
+
route.body.model_fields,
|
|
1049
|
+
route.handler_location.file if route.handler_location else None,
|
|
1050
|
+
)
|
|
1051
|
+
body = HttpBodyModel(
|
|
1052
|
+
content_type=route.body.content_type,
|
|
1053
|
+
model_name=route.body.model_name,
|
|
1054
|
+
model_fields=enriched_fields,
|
|
1055
|
+
required=route.body.required,
|
|
1056
|
+
)
|
|
1057
|
+
|
|
1058
|
+
return RouteModel(
|
|
1059
|
+
id=route_id,
|
|
1060
|
+
kind=route.kind,
|
|
1061
|
+
method=route.method.name,
|
|
1062
|
+
path=route.path,
|
|
1063
|
+
path_params=path_params,
|
|
1064
|
+
query_params=query_params,
|
|
1065
|
+
header_params=header_params,
|
|
1066
|
+
cookie_params=cookie_params,
|
|
1067
|
+
body=body,
|
|
1068
|
+
handler_function=route.handler_function.full,
|
|
1069
|
+
handler_location=LocationModel(
|
|
1070
|
+
file=str(route.handler_location.file),
|
|
1071
|
+
line=route.handler_location.line,
|
|
1072
|
+
column=route.handler_location.column,
|
|
1073
|
+
),
|
|
1074
|
+
framework=framework,
|
|
1075
|
+
router_name=route.router_name,
|
|
1076
|
+
tags=route.tags,
|
|
1077
|
+
operation_id=route.operation_id,
|
|
1078
|
+
summary=route.summary,
|
|
1079
|
+
deprecated=route.deprecated,
|
|
1080
|
+
dependencies=route.dependency_refs,
|
|
1081
|
+
response_model=route.response.model_name,
|
|
1082
|
+
response_status_code=route.response.status_code,
|
|
1083
|
+
)
|
|
1084
|
+
|
|
1085
|
+
# =========================================================================
|
|
1086
|
+
# Body Field Enrichment & Schema Collection
|
|
1087
|
+
# =========================================================================
|
|
1088
|
+
|
|
1089
|
+
def _enrich_body_fields(
|
|
1090
|
+
self,
|
|
1091
|
+
model_name: str | None,
|
|
1092
|
+
raw_field_names: list[str],
|
|
1093
|
+
in_file: Path | None,
|
|
1094
|
+
) -> list[BodyFieldModel]:
|
|
1095
|
+
"""Resolve raw field name strings into typed BodyFieldModel objects."""
|
|
1096
|
+
if not model_name:
|
|
1097
|
+
return [BodyFieldModel(name=n) for n in raw_field_names]
|
|
1098
|
+
|
|
1099
|
+
type_resolver = self._get_type_resolver()
|
|
1100
|
+
if not type_resolver:
|
|
1101
|
+
return [BodyFieldModel(name=n) for n in raw_field_names]
|
|
1102
|
+
|
|
1103
|
+
known_models = set(type_resolver.get_all_models().keys())
|
|
1104
|
+
resolved_fields = type_resolver.get_model_fields(model_name, in_file)
|
|
1105
|
+
resolved_by_name = {f.name: f for f in resolved_fields}
|
|
1106
|
+
result: list[BodyFieldModel] = []
|
|
1107
|
+
|
|
1108
|
+
# When raw_field_names is empty (e.g. Ardalis pattern where body type is
|
|
1109
|
+
# known but fields were not pre-populated), use the resolved fields directly.
|
|
1110
|
+
names_to_process = raw_field_names if raw_field_names else [f.name for f in resolved_fields]
|
|
1111
|
+
|
|
1112
|
+
for name in names_to_process:
|
|
1113
|
+
rf = resolved_by_name.get(name)
|
|
1114
|
+
if rf:
|
|
1115
|
+
inner_type = self._unwrap_inner_type(rf.type_annotation)
|
|
1116
|
+
is_complex = self._is_known_model(inner_type, known_models)
|
|
1117
|
+
result.append(
|
|
1118
|
+
BodyFieldModel(
|
|
1119
|
+
name=name,
|
|
1120
|
+
type_annotation=rf.type_annotation,
|
|
1121
|
+
required=rf.is_required,
|
|
1122
|
+
is_complex=is_complex,
|
|
1123
|
+
nested_model=inner_type if is_complex else None,
|
|
1124
|
+
constraints=rf.constraints,
|
|
1125
|
+
)
|
|
1126
|
+
)
|
|
1127
|
+
else:
|
|
1128
|
+
result.append(BodyFieldModel(name=name))
|
|
1129
|
+
|
|
1130
|
+
return result
|
|
1131
|
+
|
|
1132
|
+
_ORM_BASE_CLASSES = frozenset(
|
|
1133
|
+
{
|
|
1134
|
+
"SQLModel",
|
|
1135
|
+
"DeclarativeBase",
|
|
1136
|
+
"Base",
|
|
1137
|
+
"Model",
|
|
1138
|
+
"db.Model",
|
|
1139
|
+
"Document",
|
|
1140
|
+
"EmbeddedDocument",
|
|
1141
|
+
}
|
|
1142
|
+
)
|
|
1143
|
+
|
|
1144
|
+
def _collect_schemas(self, routes: list[RouteModel]) -> dict[str, SchemaModel]:
|
|
1145
|
+
"""
|
|
1146
|
+
Transitively collect all schemas referenced by route bodies.
|
|
1147
|
+
|
|
1148
|
+
Walks the body model for every route, resolves each nested model,
|
|
1149
|
+
and keeps going until no new models are discovered.
|
|
1150
|
+
"""
|
|
1151
|
+
type_resolver = self._get_type_resolver()
|
|
1152
|
+
if not type_resolver:
|
|
1153
|
+
return {}
|
|
1154
|
+
|
|
1155
|
+
known_models = set(type_resolver.get_all_models().keys())
|
|
1156
|
+
simple_to_qualified: dict[str, str] = {}
|
|
1157
|
+
for qname in known_models:
|
|
1158
|
+
simple = qname.rsplit(".", 1)[-1]
|
|
1159
|
+
simple_to_qualified[simple] = qname
|
|
1160
|
+
|
|
1161
|
+
# Pre-compute usage sets from routes
|
|
1162
|
+
request_body_names: set[str] = set()
|
|
1163
|
+
response_model_names: set[str] = set()
|
|
1164
|
+
for route in routes:
|
|
1165
|
+
if route.body and route.body.model_name:
|
|
1166
|
+
request_body_names.add(route.body.model_name)
|
|
1167
|
+
if route.response_model:
|
|
1168
|
+
response_model_names.add(route.response_model)
|
|
1169
|
+
|
|
1170
|
+
schemas: dict[str, SchemaModel] = {}
|
|
1171
|
+
queue: list[str] = []
|
|
1172
|
+
|
|
1173
|
+
for name in request_body_names:
|
|
1174
|
+
if name not in schemas:
|
|
1175
|
+
queue.append(name)
|
|
1176
|
+
|
|
1177
|
+
for name in response_model_names:
|
|
1178
|
+
if name not in schemas:
|
|
1179
|
+
queue.append(name)
|
|
1180
|
+
|
|
1181
|
+
for model_qname in type_resolver.get_all_models():
|
|
1182
|
+
simple_name = model_qname.rsplit(".", 1)[-1]
|
|
1183
|
+
if simple_name not in schemas:
|
|
1184
|
+
queue.append(simple_name)
|
|
1185
|
+
|
|
1186
|
+
visited: set[str] = set()
|
|
1187
|
+
|
|
1188
|
+
while queue:
|
|
1189
|
+
model_name = queue.pop(0)
|
|
1190
|
+
if model_name in visited:
|
|
1191
|
+
continue
|
|
1192
|
+
visited.add(model_name)
|
|
1193
|
+
|
|
1194
|
+
resolved_fields = type_resolver.get_model_fields(model_name, None)
|
|
1195
|
+
if not resolved_fields:
|
|
1196
|
+
continue
|
|
1197
|
+
|
|
1198
|
+
resolved_type = type_resolver.resolve_type(model_name, None)
|
|
1199
|
+
qualified_name = resolved_type.qualified_name if resolved_type else None
|
|
1200
|
+
base_classes = (
|
|
1201
|
+
list(resolved_type.base_classes)
|
|
1202
|
+
if resolved_type and resolved_type.base_classes
|
|
1203
|
+
else []
|
|
1204
|
+
)
|
|
1205
|
+
is_enum = resolved_type.is_enum if resolved_type else False
|
|
1206
|
+
|
|
1207
|
+
# Determine usage based on discovery source and base classes
|
|
1208
|
+
usage: list[str] = []
|
|
1209
|
+
if model_name in request_body_names:
|
|
1210
|
+
usage.append("request_body")
|
|
1211
|
+
if model_name in response_model_names:
|
|
1212
|
+
usage.append("response")
|
|
1213
|
+
base_simple_names = {b.rsplit(".", 1)[-1] for b in base_classes}
|
|
1214
|
+
if base_simple_names & self._ORM_BASE_CLASSES:
|
|
1215
|
+
usage.append("orm")
|
|
1216
|
+
if not usage:
|
|
1217
|
+
usage.append("domain")
|
|
1218
|
+
|
|
1219
|
+
schema_fields: list[SchemaFieldModel] = []
|
|
1220
|
+
for rf in resolved_fields:
|
|
1221
|
+
inner_type = self._unwrap_inner_type(rf.type_annotation)
|
|
1222
|
+
is_complex = self._is_known_model(inner_type, known_models)
|
|
1223
|
+
schema_fields.append(
|
|
1224
|
+
SchemaFieldModel(
|
|
1225
|
+
name=rf.name,
|
|
1226
|
+
type_annotation=rf.type_annotation,
|
|
1227
|
+
required=rf.is_required,
|
|
1228
|
+
default_value=rf.default_value,
|
|
1229
|
+
is_complex=is_complex,
|
|
1230
|
+
nested_model=inner_type if is_complex else None,
|
|
1231
|
+
alias=rf.alias,
|
|
1232
|
+
constraints=rf.constraints,
|
|
1233
|
+
)
|
|
1234
|
+
)
|
|
1235
|
+
if is_complex and inner_type and inner_type not in visited:
|
|
1236
|
+
queue.append(inner_type)
|
|
1237
|
+
|
|
1238
|
+
schemas[model_name] = SchemaModel(
|
|
1239
|
+
name=model_name,
|
|
1240
|
+
qualified_name=qualified_name,
|
|
1241
|
+
fields=schema_fields,
|
|
1242
|
+
base_classes=base_classes,
|
|
1243
|
+
is_enum=is_enum,
|
|
1244
|
+
usage=usage,
|
|
1245
|
+
)
|
|
1246
|
+
|
|
1247
|
+
return schemas
|
|
1248
|
+
|
|
1249
|
+
@staticmethod
|
|
1250
|
+
def _unwrap_inner_type(type_annotation: str | None) -> str | None:
|
|
1251
|
+
"""Strip Optional[...] and list[...] wrappers to get the core type name."""
|
|
1252
|
+
if not type_annotation:
|
|
1253
|
+
return None
|
|
1254
|
+
t = type_annotation
|
|
1255
|
+
for prefix in ("Optional[", "optional["):
|
|
1256
|
+
if t.startswith(prefix) and t.endswith("]"):
|
|
1257
|
+
t = t[len(prefix) : -1].strip()
|
|
1258
|
+
for prefix in (
|
|
1259
|
+
"list[",
|
|
1260
|
+
"List[",
|
|
1261
|
+
"Sequence[",
|
|
1262
|
+
"set[",
|
|
1263
|
+
"Set[",
|
|
1264
|
+
"frozenset[",
|
|
1265
|
+
"FrozenSet[",
|
|
1266
|
+
"tuple[",
|
|
1267
|
+
"Tuple[",
|
|
1268
|
+
):
|
|
1269
|
+
if t.startswith(prefix) and t.endswith("]"):
|
|
1270
|
+
t = t[len(prefix) : -1].strip()
|
|
1271
|
+
# Handle list[Optional[X]]
|
|
1272
|
+
for inner_prefix in ("Optional[", "optional["):
|
|
1273
|
+
if t.startswith(inner_prefix) and t.endswith("]"):
|
|
1274
|
+
t = t[len(inner_prefix) : -1].strip()
|
|
1275
|
+
break
|
|
1276
|
+
return t
|
|
1277
|
+
|
|
1278
|
+
@staticmethod
|
|
1279
|
+
def _is_known_model(type_name: str | None, known_models: set[str]) -> bool:
|
|
1280
|
+
"""Check whether a (simple) type name matches any known model."""
|
|
1281
|
+
if not type_name:
|
|
1282
|
+
return False
|
|
1283
|
+
if type_name in known_models:
|
|
1284
|
+
return True
|
|
1285
|
+
# known_models contains qualified names; check if the simple name
|
|
1286
|
+
# matches the suffix of any qualified name.
|
|
1287
|
+
return any(qn.endswith("." + type_name) for qn in known_models)
|
|
1288
|
+
|
|
1289
|
+
def _get_type_resolver(self):
|
|
1290
|
+
"""Get the type resolver from whichever analysis context is available."""
|
|
1291
|
+
ctx = (
|
|
1292
|
+
self._analysis_contexts.get(Language.PYTHON)
|
|
1293
|
+
or self._analysis_contexts.get(Language.JAVA)
|
|
1294
|
+
or self._analysis_contexts.get(Language.CSHARP)
|
|
1295
|
+
)
|
|
1296
|
+
return ctx.type_resolver if ctx else None
|
|
1297
|
+
|
|
1298
|
+
# =========================================================================
|
|
1299
|
+
# Dependency Extraction
|
|
1300
|
+
# =========================================================================
|
|
1301
|
+
|
|
1302
|
+
def _extract_all_dependencies(self) -> list[dict]:
|
|
1303
|
+
"""Extract all dependency definitions."""
|
|
1304
|
+
dependencies = []
|
|
1305
|
+
|
|
1306
|
+
for parsed_file in self._parsed_files.values():
|
|
1307
|
+
if not parsed_file.success:
|
|
1308
|
+
continue
|
|
1309
|
+
|
|
1310
|
+
for fw in self._detected_frameworks:
|
|
1311
|
+
plugin = FrameworkPluginRegistry.get_plugin(fw)
|
|
1312
|
+
if plugin and plugin.detect(parsed_file):
|
|
1313
|
+
deps = plugin.extract_dependencies(parsed_file)
|
|
1314
|
+
dependencies.extend(deps)
|
|
1315
|
+
|
|
1316
|
+
return dependencies
|
|
1317
|
+
|
|
1318
|
+
# =========================================================================
|
|
1319
|
+
# Auth Extraction
|
|
1320
|
+
# =========================================================================
|
|
1321
|
+
|
|
1322
|
+
def _extract_auth_data(self) -> AuthModel:
|
|
1323
|
+
"""Extract authentication data from all files."""
|
|
1324
|
+
schemes: list[AuthSchemeModel] = []
|
|
1325
|
+
auth_deps: list[AuthDependencyModel] = []
|
|
1326
|
+
jwt_config = JwtConfigModel()
|
|
1327
|
+
|
|
1328
|
+
# Pre-pass: collect all Depends() argument names and all auth-scheme
|
|
1329
|
+
# variable names across the entire project. This bridges the standard
|
|
1330
|
+
# FastAPI pattern where auth deps are defined in deps.py but used via
|
|
1331
|
+
# Depends() in routers/*.py — both sets will be non-empty even though
|
|
1332
|
+
# neither file contains both halves.
|
|
1333
|
+
import re as _re
|
|
1334
|
+
|
|
1335
|
+
all_project_depends_names: set[str] = set()
|
|
1336
|
+
all_project_scheme_names: set[str] = set()
|
|
1337
|
+
for _pf in self._parsed_files.values():
|
|
1338
|
+
if not _pf.success:
|
|
1339
|
+
continue
|
|
1340
|
+
for _call in _pf.call_sites:
|
|
1341
|
+
if getattr(_call, "callee_name", None) == "Depends":
|
|
1342
|
+
for _arg in _call.arguments:
|
|
1343
|
+
if getattr(_arg, "is_variable", False) and _arg.variable_name:
|
|
1344
|
+
# Direct reference: Depends(get_current_user)
|
|
1345
|
+
all_project_depends_names.add(_arg.variable_name)
|
|
1346
|
+
if getattr(_arg, "literal_value", None):
|
|
1347
|
+
all_project_depends_names.add(str(_arg.literal_value))
|
|
1348
|
+
# Factory-call pattern: Depends(get_current_user_authorizer())
|
|
1349
|
+
_expr = getattr(_arg, "expression_text", None) or ""
|
|
1350
|
+
_fm = _re.match(r"(\w+)\s*\(", _expr)
|
|
1351
|
+
if _fm:
|
|
1352
|
+
all_project_depends_names.add(_fm.group(1))
|
|
1353
|
+
for _func in _pf.functions:
|
|
1354
|
+
for _param in _func.parameters:
|
|
1355
|
+
if _param.default_value:
|
|
1356
|
+
for _m in _re.finditer(r"Depends\s*\(\s*(\w+)", _param.default_value):
|
|
1357
|
+
all_project_depends_names.add(_m.group(1))
|
|
1358
|
+
for _fw in self._detected_frameworks:
|
|
1359
|
+
_plugin = FrameworkPluginRegistry.get_plugin(_fw)
|
|
1360
|
+
if _plugin and _plugin.detect(_pf):
|
|
1361
|
+
for _scheme in _plugin.extract_auth_schemes(_pf):
|
|
1362
|
+
if _scheme.name:
|
|
1363
|
+
all_project_scheme_names.add(_scheme.name)
|
|
1364
|
+
|
|
1365
|
+
for parsed_file in self._parsed_files.values():
|
|
1366
|
+
if not parsed_file.success:
|
|
1367
|
+
continue
|
|
1368
|
+
|
|
1369
|
+
for fw in self._detected_frameworks:
|
|
1370
|
+
plugin = FrameworkPluginRegistry.get_plugin(fw)
|
|
1371
|
+
if plugin and plugin.detect(parsed_file):
|
|
1372
|
+
# Extract schemes and collect names for dependency correlation
|
|
1373
|
+
file_scheme_names: set[str] = set()
|
|
1374
|
+
for scheme in plugin.extract_auth_schemes(parsed_file):
|
|
1375
|
+
file_scheme_names.add(scheme.name)
|
|
1376
|
+
try:
|
|
1377
|
+
schemes.append(
|
|
1378
|
+
AuthSchemeModel(
|
|
1379
|
+
id=stable_id(
|
|
1380
|
+
"scheme",
|
|
1381
|
+
scheme.scheme_type.name,
|
|
1382
|
+
str(scheme.location.file),
|
|
1383
|
+
str(scheme.location.line),
|
|
1384
|
+
),
|
|
1385
|
+
type=scheme.scheme_type.name,
|
|
1386
|
+
framework=fw.name.lower(),
|
|
1387
|
+
location=LocationModel(
|
|
1388
|
+
file=str(scheme.location.file),
|
|
1389
|
+
line=scheme.location.line,
|
|
1390
|
+
),
|
|
1391
|
+
config=scheme.config,
|
|
1392
|
+
)
|
|
1393
|
+
)
|
|
1394
|
+
except ValidationError as exc:
|
|
1395
|
+
# Malformed scheme record — skip rather than abort the entire
|
|
1396
|
+
# analysis. This was previously a hard-crash path (see
|
|
1397
|
+
# dispatch-main repro) because a single framework plugin
|
|
1398
|
+
# producing a bad LocationModel killed the whole run.
|
|
1399
|
+
logger.warning(
|
|
1400
|
+
"Skipping malformed AuthSchemeModel for scheme=%r file=%s: %s",
|
|
1401
|
+
scheme.name,
|
|
1402
|
+
getattr(scheme.location, "file", "<unknown>"),
|
|
1403
|
+
exc,
|
|
1404
|
+
)
|
|
1405
|
+
continue
|
|
1406
|
+
|
|
1407
|
+
# Extract auth dependencies using cross-file pre-pass sets
|
|
1408
|
+
# so that deps defined in deps.py and used in routers/*.py
|
|
1409
|
+
# are correctly detected in both directions.
|
|
1410
|
+
for dep in plugin.extract_auth_dependencies(
|
|
1411
|
+
parsed_file,
|
|
1412
|
+
known_scheme_names=file_scheme_names | all_project_scheme_names,
|
|
1413
|
+
all_project_depends_names=all_project_depends_names,
|
|
1414
|
+
):
|
|
1415
|
+
try:
|
|
1416
|
+
auth_deps.append(
|
|
1417
|
+
AuthDependencyModel(
|
|
1418
|
+
id=stable_id("dep", dep.name, dep.dependency_type.name),
|
|
1419
|
+
name=dep.name,
|
|
1420
|
+
type=dep.dependency_type.name,
|
|
1421
|
+
location=LocationModel(
|
|
1422
|
+
file=str(dep.location.file),
|
|
1423
|
+
line=dep.location.line,
|
|
1424
|
+
),
|
|
1425
|
+
uses_schemes=list(dep.uses_schemes),
|
|
1426
|
+
extracts=dep.extracts_fields,
|
|
1427
|
+
validates=dep.validates,
|
|
1428
|
+
jwt_operations=dep.jwt_operations,
|
|
1429
|
+
)
|
|
1430
|
+
)
|
|
1431
|
+
except ValidationError as exc:
|
|
1432
|
+
logger.warning(
|
|
1433
|
+
"Skipping malformed AuthDependencyModel for dep=%r file=%s: %s",
|
|
1434
|
+
dep.name,
|
|
1435
|
+
getattr(dep.location, "file", "<unknown>"),
|
|
1436
|
+
exc,
|
|
1437
|
+
)
|
|
1438
|
+
continue
|
|
1439
|
+
|
|
1440
|
+
# Extract JWT config
|
|
1441
|
+
jwt = plugin.extract_jwt_config(parsed_file)
|
|
1442
|
+
if jwt and jwt.detected:
|
|
1443
|
+
jwt_config.detected = True
|
|
1444
|
+
jwt_config.library = jwt.library
|
|
1445
|
+
jwt_config.algorithms = jwt.algorithms
|
|
1446
|
+
|
|
1447
|
+
# Framework-independent JWT scan: detect JWT usage in files that
|
|
1448
|
+
# framework plugins don't cover (e.g. service modules without FastAPI imports).
|
|
1449
|
+
if not jwt_config.detected:
|
|
1450
|
+
jwt_libraries = {"jose", "python-jose", "pyjwt", "jwt", "authlib"}
|
|
1451
|
+
for parsed_file in self._parsed_files.values():
|
|
1452
|
+
if not parsed_file.success:
|
|
1453
|
+
continue
|
|
1454
|
+
for imp in parsed_file.imports:
|
|
1455
|
+
if imp.module in jwt_libraries or any(
|
|
1456
|
+
imp.module.startswith(f"{lib}.") for lib in jwt_libraries
|
|
1457
|
+
):
|
|
1458
|
+
jwt_config.detected = True
|
|
1459
|
+
jwt_config.library = imp.module.split(".")[0]
|
|
1460
|
+
for call in parsed_file.call_sites:
|
|
1461
|
+
if (
|
|
1462
|
+
"decode" in call.callee_name.lower()
|
|
1463
|
+
or "encode" in call.callee_name.lower()
|
|
1464
|
+
):
|
|
1465
|
+
jwt_config.locations.append(call.location)
|
|
1466
|
+
break
|
|
1467
|
+
if jwt_config.detected:
|
|
1468
|
+
break
|
|
1469
|
+
|
|
1470
|
+
# Synthetic fallback: if JWT usage is detected (via any library) but no
|
|
1471
|
+
# framework-specific security objects were found, create an inferred
|
|
1472
|
+
# JWT_BEARER scheme so downstream auth mapping has something to match.
|
|
1473
|
+
if jwt_config.detected and not schemes:
|
|
1474
|
+
# jwt_config.locations contains CodeLocation objects (file: Path);
|
|
1475
|
+
# AuthSchemeModel.location expects a LocationModel (file: str).
|
|
1476
|
+
# Always construct explicitly to avoid Pydantic validation failures.
|
|
1477
|
+
if jwt_config.locations:
|
|
1478
|
+
raw_loc = jwt_config.locations[0]
|
|
1479
|
+
loc = LocationModel(
|
|
1480
|
+
file=str(getattr(raw_loc, "file", "unknown")),
|
|
1481
|
+
line=getattr(raw_loc, "line", 0) or 0,
|
|
1482
|
+
)
|
|
1483
|
+
else:
|
|
1484
|
+
loc = LocationModel(file="unknown", line=0)
|
|
1485
|
+
try:
|
|
1486
|
+
schemes.append(
|
|
1487
|
+
AuthSchemeModel(
|
|
1488
|
+
id=stable_id("scheme", "JWT_BEARER", jwt_config.library or "jwt"),
|
|
1489
|
+
type="JWT_BEARER",
|
|
1490
|
+
framework="generic",
|
|
1491
|
+
location=loc,
|
|
1492
|
+
config={
|
|
1493
|
+
"library": jwt_config.library,
|
|
1494
|
+
"algorithms": jwt_config.algorithms,
|
|
1495
|
+
"synthetic": True,
|
|
1496
|
+
},
|
|
1497
|
+
)
|
|
1498
|
+
)
|
|
1499
|
+
except ValidationError as exc:
|
|
1500
|
+
logger.warning("Skipping synthetic JWT_BEARER AuthSchemeModel: %s", exc)
|
|
1501
|
+
|
|
1502
|
+
return AuthModel(
|
|
1503
|
+
schemes_detected=schemes,
|
|
1504
|
+
auth_dependencies=auth_deps,
|
|
1505
|
+
jwt_config=jwt_config,
|
|
1506
|
+
)
|
|
1507
|
+
|
|
1508
|
+
def _propagate_file_auth_to_routes(
|
|
1509
|
+
self,
|
|
1510
|
+
routes: list[RouteModel],
|
|
1511
|
+
auth_model: AuthModel,
|
|
1512
|
+
) -> None:
|
|
1513
|
+
"""
|
|
1514
|
+
Propagate file-level auth dependencies to routes that have no per-route
|
|
1515
|
+
auth signal (router_name is None).
|
|
1516
|
+
|
|
1517
|
+
Middleware-based auth frameworks (Express, NestJS without per-route guards)
|
|
1518
|
+
express auth as router.use(middleware) at the sub-router level.
|
|
1519
|
+
extract_auth_dependencies() correctly detects these as MIDDLEWARE-type deps,
|
|
1520
|
+
but the routes themselves carry no router_name because the middleware is not
|
|
1521
|
+
an inline argument to the route registration call.
|
|
1522
|
+
|
|
1523
|
+
This pass closes that gap: for each route without router_name, if its
|
|
1524
|
+
handler file has a MIDDLEWARE auth dependency, set router_name to that
|
|
1525
|
+
dependency's name so the benchmark's auth signal is populated.
|
|
1526
|
+
|
|
1527
|
+
Only MIDDLEWARE-type deps are used — CLASS/DECORATOR/ANNOTATION deps are
|
|
1528
|
+
already captured directly on routes during extraction.
|
|
1529
|
+
"""
|
|
1530
|
+
# Build file → first MIDDLEWARE auth dep name
|
|
1531
|
+
file_to_auth_dep: dict[str, str] = {}
|
|
1532
|
+
for dep in auth_model.auth_dependencies:
|
|
1533
|
+
if dep.type != "MIDDLEWARE":
|
|
1534
|
+
continue
|
|
1535
|
+
if not dep.location or not dep.location.file:
|
|
1536
|
+
continue
|
|
1537
|
+
file_key = dep.location.file
|
|
1538
|
+
if file_key not in file_to_auth_dep:
|
|
1539
|
+
file_to_auth_dep[file_key] = dep.name
|
|
1540
|
+
|
|
1541
|
+
if not file_to_auth_dep:
|
|
1542
|
+
return
|
|
1543
|
+
|
|
1544
|
+
for route in routes:
|
|
1545
|
+
if route.router_name is not None:
|
|
1546
|
+
continue # already has a per-route auth signal
|
|
1547
|
+
if not route.handler_location or not route.handler_location.file:
|
|
1548
|
+
continue
|
|
1549
|
+
dep_name = file_to_auth_dep.get(route.handler_location.file)
|
|
1550
|
+
if dep_name:
|
|
1551
|
+
route.router_name = dep_name
|
|
1552
|
+
|
|
1553
|
+
def _apply_spring_filter_chain_policy(self, routes: list[RouteModel]) -> None:
|
|
1554
|
+
"""Apply a Spring SecurityFilterChain policy to Spring/GraphQL routes.
|
|
1555
|
+
|
|
1556
|
+
Routes already carrying a router_name (per-method annotations) are left
|
|
1557
|
+
alone. Routes with no auth signal are marked router_name="SecurityFilterChain"
|
|
1558
|
+
if the global policy says they require authentication.
|
|
1559
|
+
"""
|
|
1560
|
+
from ..frameworks.java.spring_plugin import FilterChainPolicy, SpringBootPlugin
|
|
1561
|
+
|
|
1562
|
+
plugin = FrameworkPluginRegistry.get_plugin(Framework.SPRING_BOOT)
|
|
1563
|
+
if not isinstance(plugin, SpringBootPlugin):
|
|
1564
|
+
return
|
|
1565
|
+
|
|
1566
|
+
policy: FilterChainPolicy | None = plugin.get_filter_chain_policy(
|
|
1567
|
+
list(self._parsed_files.values())
|
|
1568
|
+
)
|
|
1569
|
+
if policy is None or not policy.any_request_auth:
|
|
1570
|
+
return
|
|
1571
|
+
|
|
1572
|
+
for route in routes:
|
|
1573
|
+
if route.router_name is not None:
|
|
1574
|
+
continue
|
|
1575
|
+
if route.framework not in ("spring_boot", "graphql"):
|
|
1576
|
+
continue
|
|
1577
|
+
if policy.requires_auth(route.path):
|
|
1578
|
+
route.router_name = "SecurityFilterChain"
|
|
1579
|
+
|
|
1580
|
+
def _populate_route_auth_mapping(
|
|
1581
|
+
self,
|
|
1582
|
+
auth_model: AuthModel,
|
|
1583
|
+
routes: list[RouteModel],
|
|
1584
|
+
middleware_list: list[MiddlewareModel] | None = None,
|
|
1585
|
+
) -> None:
|
|
1586
|
+
"""Generate route_auth_mapping by correlating route dependencies with auth deps.
|
|
1587
|
+
|
|
1588
|
+
A route is considered authenticated if any of its ``dependencies``
|
|
1589
|
+
reference a known auth dependency (by name match). Auth dependencies
|
|
1590
|
+
are identified by their name appearing in ``auth_model.auth_dependencies``.
|
|
1591
|
+
|
|
1592
|
+
Heuristic fallback: if no formal auth dependencies were detected but
|
|
1593
|
+
routes have dependency names containing auth-related keywords
|
|
1594
|
+
(``auth``, ``user``, ``token``, ``permission``, ``login``, ``verify``),
|
|
1595
|
+
we still treat those as auth-related.
|
|
1596
|
+
|
|
1597
|
+
Middleware-based auth (e.g. ``app.add_middleware(AuthMiddleware, ...)``)
|
|
1598
|
+
is also propagated: if any globally-applied middleware declares
|
|
1599
|
+
``operations=['auth']``, every route without route-level auth is marked
|
|
1600
|
+
authenticated with mechanism ``middleware:<name>`` so the engine's
|
|
1601
|
+
missing_auth rule will not fire for middleware-protected apps.
|
|
1602
|
+
"""
|
|
1603
|
+
AUTH_KEYWORDS = {
|
|
1604
|
+
"auth",
|
|
1605
|
+
"user",
|
|
1606
|
+
"token",
|
|
1607
|
+
"permission",
|
|
1608
|
+
"login",
|
|
1609
|
+
"verify",
|
|
1610
|
+
"current_user",
|
|
1611
|
+
"principal",
|
|
1612
|
+
"credential",
|
|
1613
|
+
"identity",
|
|
1614
|
+
"session",
|
|
1615
|
+
"bearer",
|
|
1616
|
+
"api_key",
|
|
1617
|
+
"apikey",
|
|
1618
|
+
"security",
|
|
1619
|
+
"authorize",
|
|
1620
|
+
"authenticated",
|
|
1621
|
+
}
|
|
1622
|
+
|
|
1623
|
+
known_auth_names = {dep.name for dep in auth_model.auth_dependencies}
|
|
1624
|
+
|
|
1625
|
+
global_auth_middleware: list[MiddlewareModel] = []
|
|
1626
|
+
if middleware_list:
|
|
1627
|
+
for mw in middleware_list:
|
|
1628
|
+
if "auth" not in mw.operations:
|
|
1629
|
+
continue
|
|
1630
|
+
# Only applies_to=["all"] (set by explicit app.UseMiddleware<T>() registration)
|
|
1631
|
+
# indicates truly global scope. Empty applies_to means "not explicitly scoped",
|
|
1632
|
+
# which is how class-based definitions are recorded — they are NOT necessarily global.
|
|
1633
|
+
if mw.applies_to == ["all"]:
|
|
1634
|
+
global_auth_middleware.append(mw)
|
|
1635
|
+
|
|
1636
|
+
has_global_auth_middleware = bool(global_auth_middleware)
|
|
1637
|
+
middleware_mechanisms = [f"middleware:{mw.name}" for mw in global_auth_middleware]
|
|
1638
|
+
|
|
1639
|
+
mapping: list[RouteAuthModel] = []
|
|
1640
|
+
for route in routes:
|
|
1641
|
+
if not route.dependencies:
|
|
1642
|
+
# No route-level Depends(), but a global auth middleware may
|
|
1643
|
+
# still protect this endpoint.
|
|
1644
|
+
if has_global_auth_middleware:
|
|
1645
|
+
mapping.append(
|
|
1646
|
+
RouteAuthModel(
|
|
1647
|
+
route_id=route.id,
|
|
1648
|
+
auth_required=True,
|
|
1649
|
+
mechanisms=list(middleware_mechanisms),
|
|
1650
|
+
)
|
|
1651
|
+
)
|
|
1652
|
+
else:
|
|
1653
|
+
mapping.append(
|
|
1654
|
+
RouteAuthModel(
|
|
1655
|
+
route_id=route.id,
|
|
1656
|
+
auth_required=False,
|
|
1657
|
+
)
|
|
1658
|
+
)
|
|
1659
|
+
continue
|
|
1660
|
+
|
|
1661
|
+
matched_mechanisms: list[str] = []
|
|
1662
|
+
for dep_name in route.dependencies:
|
|
1663
|
+
dep_base = dep_name.rsplit(".", 1)[-1]
|
|
1664
|
+
|
|
1665
|
+
if dep_base in known_auth_names or dep_name in known_auth_names:
|
|
1666
|
+
matched_mechanisms.append(dep_name)
|
|
1667
|
+
continue
|
|
1668
|
+
|
|
1669
|
+
name_lower = dep_base.lower()
|
|
1670
|
+
if any(kw in name_lower for kw in AUTH_KEYWORDS):
|
|
1671
|
+
matched_mechanisms.append(dep_name)
|
|
1672
|
+
|
|
1673
|
+
# Even if route-level deps didn't match, middleware can still
|
|
1674
|
+
# provide auth.
|
|
1675
|
+
if not matched_mechanisms and has_global_auth_middleware:
|
|
1676
|
+
matched_mechanisms = list(middleware_mechanisms)
|
|
1677
|
+
|
|
1678
|
+
mapping.append(
|
|
1679
|
+
RouteAuthModel(
|
|
1680
|
+
route_id=route.id,
|
|
1681
|
+
auth_required=bool(matched_mechanisms),
|
|
1682
|
+
mechanisms=matched_mechanisms,
|
|
1683
|
+
)
|
|
1684
|
+
)
|
|
1685
|
+
|
|
1686
|
+
auth_model.route_auth_mapping = mapping
|
|
1687
|
+
auth_model.global_middleware_auth = has_global_auth_middleware
|
|
1688
|
+
|
|
1689
|
+
def _link_routes_to_auth_schemes(
|
|
1690
|
+
self,
|
|
1691
|
+
routes: list[RouteModel],
|
|
1692
|
+
auth_model: AuthModel,
|
|
1693
|
+
) -> None:
|
|
1694
|
+
"""Populate ``auth_config_ref`` on each route from the auth model.
|
|
1695
|
+
|
|
1696
|
+
Strategy:
|
|
1697
|
+
1. Build a dep-name → scheme-id index from ``auth_dependencies.uses_schemes``.
|
|
1698
|
+
2. For each authenticated route, look up its mechanisms (dep names) in
|
|
1699
|
+
that index and set ``auth_config_ref`` to the first matching scheme.
|
|
1700
|
+
3. Fallback: if the index yields no match but auth *is* required and
|
|
1701
|
+
at least one scheme exists, assign the first scheme (common in
|
|
1702
|
+
single-scheme apps).
|
|
1703
|
+
"""
|
|
1704
|
+
scheme_by_dep: dict[str, str] = {}
|
|
1705
|
+
for dep in auth_model.auth_dependencies:
|
|
1706
|
+
for scheme_id in dep.uses_schemes:
|
|
1707
|
+
scheme_by_dep[dep.name] = scheme_id
|
|
1708
|
+
|
|
1709
|
+
mapping_by_route: dict[str, RouteAuthModel] = {
|
|
1710
|
+
m.route_id: m for m in auth_model.route_auth_mapping
|
|
1711
|
+
}
|
|
1712
|
+
fallback_scheme = auth_model.schemes_detected[0].id if auth_model.schemes_detected else None
|
|
1713
|
+
|
|
1714
|
+
for route in routes:
|
|
1715
|
+
mapping = mapping_by_route.get(route.id)
|
|
1716
|
+
if not mapping or not mapping.auth_required:
|
|
1717
|
+
continue
|
|
1718
|
+
for mech in mapping.mechanisms:
|
|
1719
|
+
mech_base = mech.rsplit(".", 1)[-1]
|
|
1720
|
+
if mech_base in scheme_by_dep:
|
|
1721
|
+
route.auth_config_ref = scheme_by_dep[mech_base]
|
|
1722
|
+
break
|
|
1723
|
+
if mech in scheme_by_dep:
|
|
1724
|
+
route.auth_config_ref = scheme_by_dep[mech]
|
|
1725
|
+
break
|
|
1726
|
+
if not route.auth_config_ref and fallback_scheme:
|
|
1727
|
+
route.auth_config_ref = fallback_scheme
|
|
1728
|
+
|
|
1729
|
+
# =========================================================================
|
|
1730
|
+
# Middleware Extraction
|
|
1731
|
+
# =========================================================================
|
|
1732
|
+
|
|
1733
|
+
def _extract_middleware(self) -> list[MiddlewareModel]:
|
|
1734
|
+
"""Extract middleware from all files."""
|
|
1735
|
+
middleware: list[MiddlewareModel] = []
|
|
1736
|
+
|
|
1737
|
+
for parsed_file in self._parsed_files.values():
|
|
1738
|
+
if not parsed_file.success:
|
|
1739
|
+
continue
|
|
1740
|
+
|
|
1741
|
+
for fw in self._detected_frameworks:
|
|
1742
|
+
plugin = FrameworkPluginRegistry.get_plugin(fw)
|
|
1743
|
+
if plugin and plugin.detect(parsed_file):
|
|
1744
|
+
for mw in plugin.extract_middleware(parsed_file):
|
|
1745
|
+
middleware.append(
|
|
1746
|
+
MiddlewareModel(
|
|
1747
|
+
id=stable_id("mw", mw.name, mw.middleware_type),
|
|
1748
|
+
name=mw.name,
|
|
1749
|
+
type=mw.middleware_type,
|
|
1750
|
+
location=LocationModel(
|
|
1751
|
+
file=str(mw.location.file),
|
|
1752
|
+
line=mw.location.line,
|
|
1753
|
+
),
|
|
1754
|
+
order=mw.order,
|
|
1755
|
+
applies_to=mw.applies_to_patterns or ["all"]
|
|
1756
|
+
if mw.applies_to_all
|
|
1757
|
+
else [],
|
|
1758
|
+
operations=mw.operations,
|
|
1759
|
+
)
|
|
1760
|
+
)
|
|
1761
|
+
|
|
1762
|
+
return middleware
|
|
1763
|
+
|
|
1764
|
+
# =========================================================================
|
|
1765
|
+
# Call Site Extraction
|
|
1766
|
+
# =========================================================================
|
|
1767
|
+
|
|
1768
|
+
def _extract_all_calls(self) -> list[FunctionCallModel]:
|
|
1769
|
+
"""Extract all function call sites from the call graph.
|
|
1770
|
+
|
|
1771
|
+
Assigns a ``sequence_index`` to each call so the reasoning engine
|
|
1772
|
+
can reason about intra-function ordering (e.g. mutation-before-authz).
|
|
1773
|
+
"""
|
|
1774
|
+
if not self._call_graph:
|
|
1775
|
+
return []
|
|
1776
|
+
|
|
1777
|
+
raw_calls = list(self._call_graph.to_manifest_calls())
|
|
1778
|
+
|
|
1779
|
+
# Group by caller and sort by line number to assign sequence_index.
|
|
1780
|
+
from collections import defaultdict
|
|
1781
|
+
|
|
1782
|
+
by_caller: dict[str, list[dict]] = defaultdict(list)
|
|
1783
|
+
for cd in raw_calls:
|
|
1784
|
+
by_caller[cd["caller"]].append(cd)
|
|
1785
|
+
for group in by_caller.values():
|
|
1786
|
+
group.sort(key=lambda c: c["location"].get("line", 0))
|
|
1787
|
+
for idx, cd in enumerate(group):
|
|
1788
|
+
cd["_sequence_index"] = idx
|
|
1789
|
+
|
|
1790
|
+
calls: list[FunctionCallModel] = []
|
|
1791
|
+
for call_dict in raw_calls:
|
|
1792
|
+
try:
|
|
1793
|
+
ctx = call_dict.get("context", {})
|
|
1794
|
+
call = FunctionCallModel(
|
|
1795
|
+
id=call_dict["id"],
|
|
1796
|
+
caller=call_dict["caller"],
|
|
1797
|
+
callee=call_dict["callee"],
|
|
1798
|
+
callee_resolved=call_dict["callee_resolved"],
|
|
1799
|
+
location=LocationModel(
|
|
1800
|
+
file=call_dict["location"]["file"],
|
|
1801
|
+
line=call_dict["location"]["line"],
|
|
1802
|
+
column=call_dict["location"].get("column", 0),
|
|
1803
|
+
),
|
|
1804
|
+
context=CallContextModel(
|
|
1805
|
+
in_try_block=ctx.get("in_try_block", False),
|
|
1806
|
+
in_conditional=ctx.get("in_conditional", False),
|
|
1807
|
+
in_loop=ctx.get("in_loop", False),
|
|
1808
|
+
),
|
|
1809
|
+
possible_callees=call_dict.get("possible_callees", []),
|
|
1810
|
+
sequence_index=call_dict.get("_sequence_index"),
|
|
1811
|
+
)
|
|
1812
|
+
calls.append(call)
|
|
1813
|
+
except (KeyError, ValueError, TypeError) as e:
|
|
1814
|
+
logger.error("Failed to convert call: %s", e, exc_info=True)
|
|
1815
|
+
self._warnings.append(
|
|
1816
|
+
AnalysisWarning(
|
|
1817
|
+
phase="call_extraction",
|
|
1818
|
+
message=f"Failed to convert call: {e}",
|
|
1819
|
+
file=call_dict.get("location", {}).get("file"),
|
|
1820
|
+
)
|
|
1821
|
+
)
|
|
1822
|
+
continue
|
|
1823
|
+
|
|
1824
|
+
return calls
|
|
1825
|
+
|
|
1826
|
+
# =========================================================================
|
|
1827
|
+
# Data Flow Analysis
|
|
1828
|
+
# =========================================================================
|
|
1829
|
+
|
|
1830
|
+
def _analyze_data_flows(
|
|
1831
|
+
self, routes: list[RouteModel]
|
|
1832
|
+
) -> tuple[list[DataFlowModel], dict[str, int]]:
|
|
1833
|
+
"""
|
|
1834
|
+
Analyze data flows from entry points to sinks.
|
|
1835
|
+
|
|
1836
|
+
Uses inter-procedural taint tracking through the call graph.
|
|
1837
|
+
Returns (flows, stats) for manifest metadata.
|
|
1838
|
+
"""
|
|
1839
|
+
flows: list[DataFlowModel] = []
|
|
1840
|
+
self._data_flow_stats = {
|
|
1841
|
+
"entry_points_analyzed": 0,
|
|
1842
|
+
"origins_identified": 0,
|
|
1843
|
+
"flows_discovered": 0,
|
|
1844
|
+
"truncated_flows": 0,
|
|
1845
|
+
}
|
|
1846
|
+
|
|
1847
|
+
if not self._call_graph:
|
|
1848
|
+
return flows, self._data_flow_stats
|
|
1849
|
+
|
|
1850
|
+
successful_files = [p for p in self._parsed_files.values() if p.success]
|
|
1851
|
+
if not successful_files:
|
|
1852
|
+
return flows, self._data_flow_stats
|
|
1853
|
+
|
|
1854
|
+
entry_points = [
|
|
1855
|
+
{
|
|
1856
|
+
"id": r.id,
|
|
1857
|
+
"path": r.path,
|
|
1858
|
+
"handler_qualified_name": r.handler_function,
|
|
1859
|
+
"framework": r.framework,
|
|
1860
|
+
"path_params": [{"name": p.name} for p in (r.path_params or [])],
|
|
1861
|
+
"query_params": [{"name": p.name} for p in (r.query_params or [])],
|
|
1862
|
+
"header_params": [{"name": p.name} for p in (r.header_params or [])],
|
|
1863
|
+
"cookie_params": [{"name": p.name} for p in (r.cookie_params or [])],
|
|
1864
|
+
"body": {
|
|
1865
|
+
"model_fields": [
|
|
1866
|
+
{"name": f.name} if hasattr(f, "name") else {"name": f}
|
|
1867
|
+
for f in (r.body.model_fields or [])
|
|
1868
|
+
]
|
|
1869
|
+
}
|
|
1870
|
+
if r.body
|
|
1871
|
+
else None,
|
|
1872
|
+
}
|
|
1873
|
+
for r in routes
|
|
1874
|
+
]
|
|
1875
|
+
|
|
1876
|
+
# Reuse shared flow-sensitive bindings (built once in _build_shared_flow_bindings)
|
|
1877
|
+
flow_bindings = self._flow_bindings
|
|
1878
|
+
|
|
1879
|
+
# Resolve handler names to call graph symbols (qualified name consistency)
|
|
1880
|
+
entry_points = self._resolve_entry_points(entry_points)
|
|
1881
|
+
|
|
1882
|
+
max_depth = self.config.analysis.data_flow.max_depth
|
|
1883
|
+
|
|
1884
|
+
try:
|
|
1885
|
+
df_analyzer = DataFlowAnalyzer(
|
|
1886
|
+
call_graph=self._call_graph,
|
|
1887
|
+
parsed_files=successful_files,
|
|
1888
|
+
entry_points=entry_points,
|
|
1889
|
+
flow_bindings=flow_bindings,
|
|
1890
|
+
max_depth=max_depth,
|
|
1891
|
+
)
|
|
1892
|
+
df_analyzer.analyze()
|
|
1893
|
+
self._data_flow_stats = df_analyzer.get_statistics()
|
|
1894
|
+
|
|
1895
|
+
# Convert to manifest format
|
|
1896
|
+
for flow_dict in df_analyzer.to_manifest_flows():
|
|
1897
|
+
try:
|
|
1898
|
+
flow_model = DataFlowModel(**flow_dict)
|
|
1899
|
+
flows.append(flow_model)
|
|
1900
|
+
except (KeyError, ValueError, TypeError) as e:
|
|
1901
|
+
logger.error("Failed to convert data flow to model: %s", e, exc_info=True)
|
|
1902
|
+
self._warnings.append(
|
|
1903
|
+
AnalysisWarning(
|
|
1904
|
+
phase="data_flow",
|
|
1905
|
+
message=f"Failed to convert data flow: {e}",
|
|
1906
|
+
)
|
|
1907
|
+
)
|
|
1908
|
+
|
|
1909
|
+
except Exception as e:
|
|
1910
|
+
logger.error("Data flow analysis failed: %s", e, exc_info=True)
|
|
1911
|
+
self._warnings.append(
|
|
1912
|
+
AnalysisWarning(
|
|
1913
|
+
phase="data_flow",
|
|
1914
|
+
message=f"Data flow analysis failed: {e}",
|
|
1915
|
+
)
|
|
1916
|
+
)
|
|
1917
|
+
|
|
1918
|
+
return flows, self._data_flow_stats
|
|
1919
|
+
|
|
1920
|
+
def _resolve_entry_points(self, entry_points: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
1921
|
+
"""
|
|
1922
|
+
Resolve handler qualified names to call graph symbols.
|
|
1923
|
+
|
|
1924
|
+
Handles mismatches between route extraction and call graph naming
|
|
1925
|
+
(e.g., module path variations).
|
|
1926
|
+
"""
|
|
1927
|
+
if not self._call_graph:
|
|
1928
|
+
return entry_points
|
|
1929
|
+
|
|
1930
|
+
symbols = set(self._call_graph._symbols.keys())
|
|
1931
|
+
resolved = []
|
|
1932
|
+
|
|
1933
|
+
for ep in entry_points:
|
|
1934
|
+
handler = ep.get("handler_qualified_name")
|
|
1935
|
+
if not handler:
|
|
1936
|
+
resolved.append(ep)
|
|
1937
|
+
continue
|
|
1938
|
+
|
|
1939
|
+
if handler in symbols:
|
|
1940
|
+
resolved.append(ep)
|
|
1941
|
+
continue
|
|
1942
|
+
|
|
1943
|
+
# Try common variations
|
|
1944
|
+
candidates = [
|
|
1945
|
+
handler,
|
|
1946
|
+
handler.replace(".__init__", ""),
|
|
1947
|
+
]
|
|
1948
|
+
# Try with/without package prefix (e.g., "app.main.get_user" -> "main.get_user")
|
|
1949
|
+
if "." in handler:
|
|
1950
|
+
parts = handler.split(".")
|
|
1951
|
+
for i in range(1, len(parts)):
|
|
1952
|
+
variant = ".".join(parts[i:])
|
|
1953
|
+
if variant not in candidates:
|
|
1954
|
+
candidates.append(variant)
|
|
1955
|
+
|
|
1956
|
+
for cand in candidates:
|
|
1957
|
+
if cand in symbols:
|
|
1958
|
+
ep = dict(ep)
|
|
1959
|
+
ep["handler_qualified_name"] = cand
|
|
1960
|
+
break
|
|
1961
|
+
resolved.append(ep)
|
|
1962
|
+
|
|
1963
|
+
return resolved
|
|
1964
|
+
|
|
1965
|
+
# =========================================================================
|
|
1966
|
+
# Integration Detection
|
|
1967
|
+
# =========================================================================
|
|
1968
|
+
|
|
1969
|
+
def _detect_integrations(self) -> list[IntegrationModel]:
|
|
1970
|
+
"""Detect external integrations with usage tracking and SDK analysis."""
|
|
1971
|
+
from .integration_detector import IntegrationDetector
|
|
1972
|
+
|
|
1973
|
+
detector = IntegrationDetector(list(self._parsed_files.values()))
|
|
1974
|
+
return detector.to_manifest_models()
|
|
1975
|
+
|
|
1976
|
+
def _scan_literals(self) -> LiteralsModel:
|
|
1977
|
+
"""Scan all string literals for URLs, SQL fragments, and connection strings."""
|
|
1978
|
+
from .literal_scanner import LiteralScanner
|
|
1979
|
+
|
|
1980
|
+
scanner = LiteralScanner(list(self._parsed_files.values()))
|
|
1981
|
+
return scanner.to_manifest_models()
|
|
1982
|
+
|
|
1983
|
+
def _classify_sensitivity(
|
|
1984
|
+
self,
|
|
1985
|
+
integrations: list[IntegrationModel],
|
|
1986
|
+
literals: LiteralsModel,
|
|
1987
|
+
) -> None:
|
|
1988
|
+
"""Classify sensitivity on integrations and SQL patterns (mutates in place)."""
|
|
1989
|
+
from .sensitivity_classifier import SensitivityClassifier
|
|
1990
|
+
|
|
1991
|
+
classifier = SensitivityClassifier()
|
|
1992
|
+
classifier.enrich_integrations(integrations)
|
|
1993
|
+
classifier.enrich_sql_patterns(literals.sql_patterns)
|
|
1994
|
+
|
|
1995
|
+
def _tag_capabilities(
|
|
1996
|
+
self,
|
|
1997
|
+
integrations: list[IntegrationModel],
|
|
1998
|
+
routes: list[RouteModel],
|
|
1999
|
+
configuration: ConfigurationModel,
|
|
2000
|
+
) -> list[CapabilityModel]:
|
|
2001
|
+
"""Infer business capabilities from integrations, routes, and config."""
|
|
2002
|
+
from .capability_tagger import CapabilityTagger
|
|
2003
|
+
|
|
2004
|
+
tagger = CapabilityTagger()
|
|
2005
|
+
return tagger.tag(integrations, routes, configuration)
|
|
2006
|
+
|
|
2007
|
+
# =========================================================================
|
|
2008
|
+
# Configuration Extraction
|
|
2009
|
+
# =========================================================================
|
|
2010
|
+
|
|
2011
|
+
def _extract_configuration(self) -> ConfigurationModel:
|
|
2012
|
+
"""Extract configuration usage from code and .env files."""
|
|
2013
|
+
from .config_scanner import ConfigScanner
|
|
2014
|
+
|
|
2015
|
+
env_var_names: dict[str, list[LocationModel]] = {}
|
|
2016
|
+
|
|
2017
|
+
# Phase 1: env vars referenced in code (os.getenv, os.environ.get)
|
|
2018
|
+
for parsed_file in self._parsed_files.values():
|
|
2019
|
+
if not parsed_file.success:
|
|
2020
|
+
continue
|
|
2021
|
+
for call in parsed_file.call_sites:
|
|
2022
|
+
if call.callee_name in {"os.environ.get", "os.getenv", "environ.get"}:
|
|
2023
|
+
for arg in call.arguments:
|
|
2024
|
+
if arg.position == 0 and arg.literal_value:
|
|
2025
|
+
var_name = str(arg.literal_value)
|
|
2026
|
+
if var_name not in env_var_names:
|
|
2027
|
+
env_var_names[var_name] = []
|
|
2028
|
+
env_var_names[var_name].append(
|
|
2029
|
+
LocationModel(
|
|
2030
|
+
file=str(call.location.file),
|
|
2031
|
+
line=call.location.line,
|
|
2032
|
+
)
|
|
2033
|
+
)
|
|
2034
|
+
|
|
2035
|
+
env_vars: list[EnvVarUsageModel] = [
|
|
2036
|
+
EnvVarUsageModel(name=name, locations=locations)
|
|
2037
|
+
for name, locations in env_var_names.items()
|
|
2038
|
+
]
|
|
2039
|
+
|
|
2040
|
+
# Phase 2: discover .env files and merge any new variable names
|
|
2041
|
+
scanner = ConfigScanner(self.config.project_root)
|
|
2042
|
+
env_file_vars = scanner.to_env_var_models()
|
|
2043
|
+
existing_names = {ev.name for ev in env_vars}
|
|
2044
|
+
for ev_model in env_file_vars:
|
|
2045
|
+
if ev_model.name not in existing_names:
|
|
2046
|
+
env_vars.append(ev_model)
|
|
2047
|
+
|
|
2048
|
+
# Phase 3: discover config files
|
|
2049
|
+
config_files = scanner.to_config_file_models()
|
|
2050
|
+
|
|
2051
|
+
return ConfigurationModel(env_vars_used=env_vars, config_files=config_files)
|
|
2052
|
+
|
|
2053
|
+
# =========================================================================
|
|
2054
|
+
# Function/Class Collection
|
|
2055
|
+
# =========================================================================
|
|
2056
|
+
|
|
2057
|
+
def _collect_all_functions(self) -> tuple[list[FunctionModel], dict[str, str]]:
|
|
2058
|
+
"""Collect all functions (free + class methods) from parsed files.
|
|
2059
|
+
|
|
2060
|
+
Returns the function list and a qualified_name -> func_id index
|
|
2061
|
+
so that _collect_all_classes can resolve method IDs.
|
|
2062
|
+
"""
|
|
2063
|
+
functions: list[FunctionModel] = []
|
|
2064
|
+
qn_to_id: dict[str, str] = {}
|
|
2065
|
+
|
|
2066
|
+
def _normalize_body(source: str | None) -> str:
|
|
2067
|
+
"""Normalize function body for hashing: strip comments, blank lines, and collapse whitespace."""
|
|
2068
|
+
if not source:
|
|
2069
|
+
return ""
|
|
2070
|
+
lines = []
|
|
2071
|
+
for line in source.splitlines():
|
|
2072
|
+
stripped = line.strip()
|
|
2073
|
+
if not stripped or stripped.startswith("#"):
|
|
2074
|
+
continue
|
|
2075
|
+
if "#" in stripped:
|
|
2076
|
+
stripped = stripped[: stripped.index("#")].rstrip()
|
|
2077
|
+
if stripped:
|
|
2078
|
+
lines.append(stripped)
|
|
2079
|
+
return "\n".join(lines)
|
|
2080
|
+
|
|
2081
|
+
def _content_hash(func: ParsedFunction) -> str: # noqa: F821
|
|
2082
|
+
parts = []
|
|
2083
|
+
for p in func.parameters:
|
|
2084
|
+
parts.append(f"{p.name}:{p.type_annotation or ''}")
|
|
2085
|
+
parts.append(f"ret:{func.return_type or ''}")
|
|
2086
|
+
parts.append(f"ann:{','.join(d.name for d in func.decorators)}")
|
|
2087
|
+
body = _normalize_body(func.body_source)
|
|
2088
|
+
parts.append(f"body:{hashlib.sha256(body.encode()).hexdigest()[:16]}")
|
|
2089
|
+
raw = "|".join(parts)
|
|
2090
|
+
return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:16]
|
|
2091
|
+
|
|
2092
|
+
def _append(func: ParsedFunction) -> None: # noqa: F821
|
|
2093
|
+
func_id = stable_id("func", func.qualified_name.full)
|
|
2094
|
+
qn_to_id[func.qualified_name.full] = func_id
|
|
2095
|
+
functions.append(
|
|
2096
|
+
FunctionModel(
|
|
2097
|
+
id=func_id,
|
|
2098
|
+
qualified_name=func.qualified_name.full,
|
|
2099
|
+
name=func.name,
|
|
2100
|
+
location=LocationModel(
|
|
2101
|
+
file=str(func.location.file),
|
|
2102
|
+
line=func.location.line,
|
|
2103
|
+
column=func.location.column,
|
|
2104
|
+
end_line=func.location.end_line,
|
|
2105
|
+
),
|
|
2106
|
+
is_async=func.is_async,
|
|
2107
|
+
binding=func.binding,
|
|
2108
|
+
visibility=self._infer_visibility(func.name),
|
|
2109
|
+
parameters=[
|
|
2110
|
+
ParameterModel(
|
|
2111
|
+
name=p.name,
|
|
2112
|
+
type_annotation=p.type_annotation,
|
|
2113
|
+
default_value=p.default_value,
|
|
2114
|
+
is_variadic=p.is_variadic,
|
|
2115
|
+
is_keyword_variadic=p.is_keyword_variadic,
|
|
2116
|
+
)
|
|
2117
|
+
for p in func.parameters
|
|
2118
|
+
],
|
|
2119
|
+
return_type=func.return_type,
|
|
2120
|
+
annotations=[d.name for d in func.decorators],
|
|
2121
|
+
docstring=func.docstring,
|
|
2122
|
+
owner_type=func.owner_type,
|
|
2123
|
+
content_hash=_content_hash(func),
|
|
2124
|
+
)
|
|
2125
|
+
)
|
|
2126
|
+
|
|
2127
|
+
for parsed_file in self._parsed_files.values():
|
|
2128
|
+
if not parsed_file.success:
|
|
2129
|
+
continue
|
|
2130
|
+
for func in parsed_file.functions:
|
|
2131
|
+
_append(func)
|
|
2132
|
+
for cls in parsed_file.classes:
|
|
2133
|
+
for method in cls.methods:
|
|
2134
|
+
_append(method)
|
|
2135
|
+
|
|
2136
|
+
return functions, qn_to_id
|
|
2137
|
+
|
|
2138
|
+
@staticmethod
|
|
2139
|
+
def _infer_visibility(name: str) -> str | None:
|
|
2140
|
+
"""Infer visibility from Python naming convention."""
|
|
2141
|
+
if name.startswith("__") and name.endswith("__"):
|
|
2142
|
+
return "public"
|
|
2143
|
+
if name.startswith("__") or name.startswith("_"):
|
|
2144
|
+
return "private"
|
|
2145
|
+
return "public"
|
|
2146
|
+
|
|
2147
|
+
def _collect_all_classes(self, func_id_index: dict[str, str]) -> list[ClassModel]:
|
|
2148
|
+
"""Collect all classes from parsed files.
|
|
2149
|
+
|
|
2150
|
+
Args:
|
|
2151
|
+
func_id_index: qualified_name -> func_id mapping built by
|
|
2152
|
+
_collect_all_functions, used to resolve method IDs.
|
|
2153
|
+
"""
|
|
2154
|
+
classes: list[ClassModel] = []
|
|
2155
|
+
|
|
2156
|
+
for parsed_file in self._parsed_files.values():
|
|
2157
|
+
if not parsed_file.success:
|
|
2158
|
+
continue
|
|
2159
|
+
|
|
2160
|
+
for cls in parsed_file.classes:
|
|
2161
|
+
method_ids = [func_id_index.get(m.qualified_name.full, m.name) for m in cls.methods]
|
|
2162
|
+
classes.append(
|
|
2163
|
+
ClassModel(
|
|
2164
|
+
id=stable_id("cls", cls.qualified_name.full),
|
|
2165
|
+
qualified_name=cls.qualified_name.full,
|
|
2166
|
+
name=cls.name,
|
|
2167
|
+
location=LocationModel(
|
|
2168
|
+
file=str(cls.location.file),
|
|
2169
|
+
line=cls.location.line,
|
|
2170
|
+
column=cls.location.column,
|
|
2171
|
+
end_line=cls.location.end_line,
|
|
2172
|
+
),
|
|
2173
|
+
base_classes=cls.base_classes,
|
|
2174
|
+
annotations=[d.name for d in cls.decorators],
|
|
2175
|
+
methods=method_ids,
|
|
2176
|
+
docstring=cls.docstring,
|
|
2177
|
+
)
|
|
2178
|
+
)
|
|
2179
|
+
|
|
2180
|
+
return classes
|
|
2181
|
+
|
|
2182
|
+
# =========================================================================
|
|
2183
|
+
# Manifest Building
|
|
2184
|
+
# =========================================================================
|
|
2185
|
+
|
|
2186
|
+
def _build_analysis_metadata(self) -> AnalysisMetadataModel:
|
|
2187
|
+
"""Build analysis metadata from real statistics."""
|
|
2188
|
+
truncated_flows = self._data_flow_stats.get("truncated_flows", 0)
|
|
2189
|
+
unresolved_calls = 0
|
|
2190
|
+
|
|
2191
|
+
if self._call_graph:
|
|
2192
|
+
cg_stats = self._call_graph.get_statistics()
|
|
2193
|
+
resolution = cg_stats.get("resolution_confidence", {})
|
|
2194
|
+
unresolved_calls = resolution.get("UNRESOLVED", 0)
|
|
2195
|
+
|
|
2196
|
+
return AnalysisMetadataModel(
|
|
2197
|
+
data_flow_mode=self.config.analysis.data_flow.mode,
|
|
2198
|
+
data_flow_depth=self.config.analysis.data_flow.max_depth,
|
|
2199
|
+
truncated_flows=truncated_flows,
|
|
2200
|
+
unresolved_calls=unresolved_calls,
|
|
2201
|
+
)
|
|
2202
|
+
|
|
2203
|
+
def _build_manifest(
|
|
2204
|
+
self,
|
|
2205
|
+
routes: list[RouteModel],
|
|
2206
|
+
functions: list[FunctionModel],
|
|
2207
|
+
classes: list[ClassModel],
|
|
2208
|
+
calls: list[FunctionCallModel],
|
|
2209
|
+
data_flows: list[DataFlowModel],
|
|
2210
|
+
auth: AuthModel,
|
|
2211
|
+
middleware: list[MiddlewareModel],
|
|
2212
|
+
dependencies: DependenciesModel, # noqa: F821
|
|
2213
|
+
integrations: list[IntegrationModel],
|
|
2214
|
+
capabilities: list[CapabilityModel],
|
|
2215
|
+
schemas: dict[str, SchemaModel],
|
|
2216
|
+
literals: LiteralsModel,
|
|
2217
|
+
configuration: ConfigurationModel,
|
|
2218
|
+
analysis_time_ms: int,
|
|
2219
|
+
) -> Manifest:
|
|
2220
|
+
"""Build the final manifest."""
|
|
2221
|
+
# Get project name
|
|
2222
|
+
project_name = self.config.project_root.name
|
|
2223
|
+
pyproject = self.config.project_root / "pyproject.toml"
|
|
2224
|
+
if pyproject.exists():
|
|
2225
|
+
try:
|
|
2226
|
+
import tomllib
|
|
2227
|
+
|
|
2228
|
+
data = tomllib.loads(pyproject.read_text())
|
|
2229
|
+
project_name = data.get("project", {}).get("name", project_name)
|
|
2230
|
+
except Exception as e:
|
|
2231
|
+
logger.debug("Could not read project name from pyproject.toml: %s", e)
|
|
2232
|
+
|
|
2233
|
+
# Determine languages
|
|
2234
|
+
languages = []
|
|
2235
|
+
if any(
|
|
2236
|
+
f.language == Language.PYTHON
|
|
2237
|
+
for f in self._discovery_result.files
|
|
2238
|
+
if self._discovery_result
|
|
2239
|
+
):
|
|
2240
|
+
languages.append("python")
|
|
2241
|
+
if any(
|
|
2242
|
+
f.language == Language.JAVA
|
|
2243
|
+
for f in self._discovery_result.files
|
|
2244
|
+
if self._discovery_result
|
|
2245
|
+
):
|
|
2246
|
+
languages.append("java")
|
|
2247
|
+
if any(
|
|
2248
|
+
f.language == Language.CSHARP
|
|
2249
|
+
for f in self._discovery_result.files
|
|
2250
|
+
if self._discovery_result
|
|
2251
|
+
):
|
|
2252
|
+
languages.append("csharp")
|
|
2253
|
+
if any(
|
|
2254
|
+
f.language == Language.JAVASCRIPT
|
|
2255
|
+
for f in self._discovery_result.files
|
|
2256
|
+
if self._discovery_result
|
|
2257
|
+
):
|
|
2258
|
+
languages.append("javascript")
|
|
2259
|
+
|
|
2260
|
+
# Determine frameworks
|
|
2261
|
+
frameworks = [fw.name.lower() for fw in self._detected_frameworks]
|
|
2262
|
+
|
|
2263
|
+
return Manifest(
|
|
2264
|
+
manifest_version=MANIFEST_VERSION,
|
|
2265
|
+
generated_at=datetime.utcnow(),
|
|
2266
|
+
probe_version=_get_probe_version(),
|
|
2267
|
+
project=ProjectMetadata(
|
|
2268
|
+
root=str(self.config.project_root),
|
|
2269
|
+
name=project_name,
|
|
2270
|
+
languages_detected=languages,
|
|
2271
|
+
frameworks_detected=frameworks,
|
|
2272
|
+
files_analyzed=len([p for p in self._parsed_files.values() if p.success]),
|
|
2273
|
+
files_skipped=len([p for p in self._parsed_files.values() if not p.success]),
|
|
2274
|
+
analysis_duration_ms=analysis_time_ms,
|
|
2275
|
+
),
|
|
2276
|
+
entry_points=routes,
|
|
2277
|
+
functions=functions,
|
|
2278
|
+
classes=classes,
|
|
2279
|
+
calls=calls,
|
|
2280
|
+
data_flows=data_flows,
|
|
2281
|
+
auth=auth,
|
|
2282
|
+
middleware=middleware,
|
|
2283
|
+
dependencies=dependencies,
|
|
2284
|
+
integrations=integrations,
|
|
2285
|
+
capabilities=capabilities,
|
|
2286
|
+
schemas=schemas,
|
|
2287
|
+
literals=literals,
|
|
2288
|
+
configuration=configuration,
|
|
2289
|
+
analysis_metadata=self._build_analysis_metadata(),
|
|
2290
|
+
)
|
|
2291
|
+
|
|
2292
|
+
|
|
2293
|
+
# =============================================================================
|
|
2294
|
+
# Convenience Function
|
|
2295
|
+
# =============================================================================
|
|
2296
|
+
|
|
2297
|
+
|
|
2298
|
+
def analyze_project(config: CodeBoltConfig) -> AnalysisResult:
|
|
2299
|
+
"""
|
|
2300
|
+
Analyze a project and generate manifest.
|
|
2301
|
+
|
|
2302
|
+
Args:
|
|
2303
|
+
config: Configuration for the analysis
|
|
2304
|
+
|
|
2305
|
+
Returns:
|
|
2306
|
+
AnalysisResult containing manifest and metadata
|
|
2307
|
+
"""
|
|
2308
|
+
analyzer = ProjectAnalyzer(config)
|
|
2309
|
+
return analyzer.analyze()
|