apisec-code-bolt 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. apisec_code_bolt/__init__.py +42 -0
  2. apisec_code_bolt/__main__.py +11 -0
  3. apisec_code_bolt/analysis/__init__.py +96 -0
  4. apisec_code_bolt/analysis/analyzer.py +2309 -0
  5. apisec_code_bolt/analysis/binding_tracker.py +341 -0
  6. apisec_code_bolt/analysis/call_graph.py +1197 -0
  7. apisec_code_bolt/analysis/call_graph_types.py +332 -0
  8. apisec_code_bolt/analysis/call_resolver.py +988 -0
  9. apisec_code_bolt/analysis/capability_tagger.py +322 -0
  10. apisec_code_bolt/analysis/config_scanner.py +197 -0
  11. apisec_code_bolt/analysis/data_flow.py +1883 -0
  12. apisec_code_bolt/analysis/dependency_extractor.py +959 -0
  13. apisec_code_bolt/analysis/flow_analysis.py +1406 -0
  14. apisec_code_bolt/analysis/hof_catalog.py +61 -0
  15. apisec_code_bolt/analysis/integration_detector.py +1399 -0
  16. apisec_code_bolt/analysis/literal_scanner.py +300 -0
  17. apisec_code_bolt/analysis/path_normalizer.py +55 -0
  18. apisec_code_bolt/analysis/read_site_detector.py +310 -0
  19. apisec_code_bolt/analysis/request_patterns.py +162 -0
  20. apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
  21. apisec_code_bolt/analysis/sink_evidence.py +333 -0
  22. apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
  23. apisec_code_bolt/cli/__init__.py +5 -0
  24. apisec_code_bolt/cli/exit_codes.py +17 -0
  25. apisec_code_bolt/cli/main.py +1069 -0
  26. apisec_code_bolt/cloud/__init__.py +1 -0
  27. apisec_code_bolt/cloud/apisec_client.py +118 -0
  28. apisec_code_bolt/cloud/client.py +255 -0
  29. apisec_code_bolt/core/__init__.py +75 -0
  30. apisec_code_bolt/core/config.py +528 -0
  31. apisec_code_bolt/core/credentials.py +65 -0
  32. apisec_code_bolt/core/discovery.py +433 -0
  33. apisec_code_bolt/core/log_format.py +115 -0
  34. apisec_code_bolt/core/manifest.py +1009 -0
  35. apisec_code_bolt/core/repo.py +280 -0
  36. apisec_code_bolt/core/state.py +59 -0
  37. apisec_code_bolt/core/telemetry.py +451 -0
  38. apisec_code_bolt/core/types.py +587 -0
  39. apisec_code_bolt/fingerprinting/__init__.py +1 -0
  40. apisec_code_bolt/frameworks/__init__.py +29 -0
  41. apisec_code_bolt/frameworks/_jwt_common.py +50 -0
  42. apisec_code_bolt/frameworks/auth_helpers.py +437 -0
  43. apisec_code_bolt/frameworks/base.py +608 -0
  44. apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
  45. apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
  46. apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
  47. apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
  48. apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
  49. apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
  50. apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
  51. apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
  52. apisec_code_bolt/frameworks/java/__init__.py +6 -0
  53. apisec_code_bolt/frameworks/java/_annotations.py +167 -0
  54. apisec_code_bolt/frameworks/java/_constraints.py +128 -0
  55. apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
  56. apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
  57. apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
  58. apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
  59. apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
  60. apisec_code_bolt/frameworks/js/__init__.py +8 -0
  61. apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
  62. apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
  63. apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
  64. apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
  65. apisec_code_bolt/frameworks/python/__init__.py +19 -0
  66. apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
  67. apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
  68. apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
  69. apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
  70. apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
  71. apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
  72. apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
  73. apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
  74. apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
  75. apisec_code_bolt/parsing/__init__.py +62 -0
  76. apisec_code_bolt/parsing/base.py +554 -0
  77. apisec_code_bolt/parsing/csharp/__init__.py +5 -0
  78. apisec_code_bolt/parsing/csharp/language_services.py +203 -0
  79. apisec_code_bolt/parsing/csharp/literals.py +72 -0
  80. apisec_code_bolt/parsing/csharp/parser.py +1158 -0
  81. apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
  82. apisec_code_bolt/parsing/js/__init__.py +5 -0
  83. apisec_code_bolt/parsing/js/language_services.py +118 -0
  84. apisec_code_bolt/parsing/js/parser.py +622 -0
  85. apisec_code_bolt/parsing/jvm/__init__.py +7 -0
  86. apisec_code_bolt/parsing/jvm/language_services.py +270 -0
  87. apisec_code_bolt/parsing/jvm/parser.py +774 -0
  88. apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
  89. apisec_code_bolt/parsing/python/__init__.py +150 -0
  90. apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
  91. apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
  92. apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
  93. apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
  94. apisec_code_bolt/parsing/python/expression_utils.py +221 -0
  95. apisec_code_bolt/parsing/python/extraction_types.py +271 -0
  96. apisec_code_bolt/parsing/python/language_services.py +487 -0
  97. apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
  98. apisec_code_bolt/parsing/python/parser.py +719 -0
  99. apisec_code_bolt/parsing/python/path_resolver.py +576 -0
  100. apisec_code_bolt/parsing/python/router_registry.py +806 -0
  101. apisec_code_bolt/parsing/python/type_resolver.py +730 -0
  102. apisec_code_bolt/parsing/python/visitors.py +1544 -0
  103. apisec_code_bolt/parsing/services.py +544 -0
  104. apisec_code_bolt/query/__init__.py +1 -0
  105. apisec_code_bolt/query/ast_cache.py +182 -0
  106. apisec_code_bolt/query/executor.py +283 -0
  107. apisec_code_bolt/query/handlers.py +832 -0
  108. apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
  109. apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
  110. apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
  111. apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,2309 @@
1
+ """
2
+ Main analysis orchestrator for generating manifests.
3
+
4
+ This module coordinates:
5
+ - File discovery
6
+ - Parsing (Python, Java)
7
+ - Framework detection and extraction
8
+ - Data flow analysis
9
+ - Manifest generation
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import hashlib
15
+ import logging
16
+ import time
17
+ from dataclasses import dataclass, field
18
+ from datetime import datetime
19
+ from pathlib import Path
20
+ from typing import Any
21
+
22
+ from pydantic import ValidationError
23
+
24
+ from ..core.config import CodeBoltConfig
25
+ from ..core.discovery import DiscoveryResult, discover_files
26
+ from ..core.manifest import (
27
+ MANIFEST_VERSION,
28
+ AnalysisMetadataModel,
29
+ AuthDependencyModel,
30
+ AuthModel,
31
+ AuthSchemeModel,
32
+ BodyFieldModel,
33
+ CallContextModel,
34
+ CapabilityModel,
35
+ ClassModel,
36
+ ConfigurationModel,
37
+ DataFlowModel,
38
+ EnvVarUsageModel,
39
+ FunctionCallModel,
40
+ FunctionModel,
41
+ HttpBodyModel,
42
+ HttpParameterModel,
43
+ IntegrationModel,
44
+ JwtConfigModel,
45
+ LiteralsModel,
46
+ LocationModel,
47
+ Manifest,
48
+ MiddlewareModel,
49
+ ParameterModel,
50
+ ProjectMetadata,
51
+ RouteAuthModel,
52
+ RouteModel,
53
+ SchemaFieldModel,
54
+ SchemaModel,
55
+ stable_id,
56
+ )
57
+ from ..core.types import Framework, Language
58
+ from ..frameworks import dotnet as _dotnet_fw # noqa: F401 — registers AspNetCorePlugin
59
+ from ..frameworks import java as _java_fw # noqa: F401 — registers SpringBootPlugin
60
+ from ..frameworks.base import FrameworkPluginRegistry
61
+ from ..parsing import csharp as _csharp_pkg # noqa: F401 — registers CSharpParser
62
+ from ..parsing import js as _js_pkg # noqa: F401 — registers JavaScriptParser
63
+ from ..parsing import jvm as _jvm_pkg # noqa: F401 — registers JavaParser
64
+ from ..parsing.base import ParsedFile, ParserRegistry
65
+ from ..parsing.csharp.language_services import CSharpLanguageServices
66
+ from ..parsing.js.language_services import JavaScriptLanguageServices
67
+ from ..parsing.jvm import language_services as _jvm_lang_services # noqa: F401
68
+ from ..parsing.jvm.language_services import JavaLanguageServices
69
+ from ..parsing.python.language_services import PythonLanguageServices
70
+ from ..parsing.python.parser import PythonProjectParser
71
+ from ..parsing.python.type_resolver import SchemaBuilder, TypeResolver
72
+ from ..parsing.services import AnalysisContext, LanguageServices
73
+ from .call_graph import CallGraph, build_call_graph_with_context
74
+ from .data_flow import DataFlowAnalyzer
75
+ from .dependency_extractor import DependencyExtractor
76
+ from .flow_analysis import build_flow_sensitive_bindings
77
+
78
+
79
+ def _auto_discover_plugins() -> None:
80
+ """Import all framework plugins to trigger their self-registration."""
81
+ import importlib
82
+ import pkgutil
83
+
84
+ from .. import frameworks as _fw_pkg
85
+
86
+ for _importer, mod_name, _is_pkg in pkgutil.walk_packages(
87
+ _fw_pkg.__path__,
88
+ prefix=_fw_pkg.__name__ + ".",
89
+ ):
90
+ if "plugin" in mod_name or mod_name.endswith("_plugin"):
91
+ try:
92
+ importlib.import_module(mod_name)
93
+ except Exception as e: # noqa: BLE001
94
+ logger.debug("Could not load framework plugin %s: %s", mod_name, e)
95
+
96
+
97
+ _auto_discover_plugins()
98
+
99
+
100
+ logger = logging.getLogger(__name__)
101
+
102
+
103
+ def _get_probe_version() -> str:
104
+ try:
105
+ from .. import __version__
106
+
107
+ return __version__
108
+ except Exception:
109
+ return "0.1.0"
110
+
111
+
112
+ # =============================================================================
113
+ # Analysis Result
114
+ # =============================================================================
115
+
116
+
117
+ @dataclass
118
+ class AnalysisWarning:
119
+ """A non-fatal issue encountered during analysis."""
120
+
121
+ phase: str
122
+ message: str
123
+ file: str | None = None
124
+ detail: str | None = None
125
+
126
+
127
+ @dataclass
128
+ class AnalysisResult:
129
+ """Result of analysis containing manifest and metadata."""
130
+
131
+ manifest: Manifest
132
+
133
+ # Statistics
134
+ files_analyzed: int = 0
135
+ files_failed: int = 0
136
+ parse_errors: list[str] = field(default_factory=list)
137
+
138
+ # Non-fatal warnings accumulated during analysis
139
+ warnings: list[AnalysisWarning] = field(default_factory=list)
140
+
141
+ # Timing — top-level
142
+ discovery_time_ms: int = 0
143
+ parse_time_ms: int = 0
144
+ extraction_time_ms: int = 0
145
+ total_time_ms: int = 0
146
+
147
+ # Timing — full per-stage breakdown (stage_name → ms)
148
+ stage_times_ms: dict[str, int] = field(default_factory=dict)
149
+
150
+ # Timing — per-framework-plugin extraction (framework_name → ms across all files)
151
+ extractor_times_ms: dict[str, int] = field(default_factory=dict)
152
+
153
+ # Per-language file counts (language name → count)
154
+ files_by_language: dict[str, int] = field(default_factory=dict)
155
+
156
+ # Per-language parse failures (language name → count)
157
+ failures_by_language: dict[str, int] = field(default_factory=dict)
158
+
159
+ # Files excluded by discovery (gitignore, --exclude, --max-files, unknown extension)
160
+ files_skipped: int = 0
161
+
162
+ # Per-framework route counts (framework_name → route count)
163
+ routes_by_framework: dict[str, int] = field(default_factory=dict)
164
+
165
+ # Structured parse errors with file + line + message (replaces plain strings)
166
+ parse_error_details: list[dict[str, Any]] = field(default_factory=list)
167
+
168
+ # Detected frameworks
169
+ frameworks_detected: list[Framework] = field(default_factory=list)
170
+
171
+
172
+ # =============================================================================
173
+ # Analyzer
174
+ # =============================================================================
175
+
176
+
177
+ class ProjectAnalyzer:
178
+ """
179
+ Main analyzer for extracting manifest from a project.
180
+
181
+ Coordinates:
182
+ 1. File discovery
183
+ 2. Parsing files by language
184
+ 3. Framework detection
185
+ 4. Route/dependency/auth extraction
186
+ 5. Data flow analysis
187
+ 6. Manifest generation
188
+ """
189
+
190
+ def __init__(self, config: CodeBoltConfig) -> None:
191
+ self.config = config
192
+
193
+ # Parsers
194
+ self._python_parser = PythonProjectParser()
195
+ self._type_resolver = TypeResolver()
196
+ self._schema_builder = SchemaBuilder(self._type_resolver)
197
+
198
+ # Language services (builds context for each language)
199
+ self._language_services: dict[Language, LanguageServices] = {
200
+ Language.PYTHON: PythonLanguageServices(),
201
+ Language.JAVA: JavaLanguageServices(),
202
+ Language.CSHARP: CSharpLanguageServices(),
203
+ Language.JAVASCRIPT: JavaScriptLanguageServices(),
204
+ }
205
+
206
+ # Analysis contexts per language (built during analysis)
207
+ self._analysis_contexts: dict[Language, AnalysisContext] = {}
208
+
209
+ # Parsed data
210
+ self._parsed_files: dict[Path, ParsedFile] = {}
211
+ self._discovery_result: DiscoveryResult | None = None
212
+
213
+ # Call graph
214
+ self._call_graph: CallGraph | None = None
215
+
216
+ # Data flow analysis results (for manifest metadata)
217
+ self._data_flow_stats: dict[str, int] = {}
218
+
219
+ # Shared flow-sensitive bindings (built once, reused by call graph + data flow)
220
+ self._flow_bindings: Any = None
221
+
222
+ # Framework plugins
223
+ self._detected_frameworks: set[Framework] = set()
224
+
225
+ # Per-plugin extraction timing (populated by _extract_all_routes)
226
+ self._extractor_times_ms: dict[str, int] = {}
227
+
228
+ # Accumulated non-fatal warnings
229
+ self._warnings: list[AnalysisWarning] = []
230
+
231
+ # Pipeline stage definitions: (name, method_name)
232
+ _PIPELINE_STAGES: list[tuple[str, str]] = [
233
+ ("discover", "_stage_discover"),
234
+ ("parse", "_stage_parse"),
235
+ ("resolve", "_stage_resolve"),
236
+ ("detect_frameworks", "_stage_detect_frameworks"),
237
+ ("flow_bindings", "_stage_flow_bindings"),
238
+ ("call_graph", "_stage_call_graph"),
239
+ ("extract", "_stage_extract"),
240
+ ("analyze_flows", "_stage_analyze_flows"),
241
+ ("enrich", "_stage_enrich"),
242
+ ]
243
+
244
+ def analyze(self) -> AnalysisResult:
245
+ """
246
+ Analyze the project and generate manifest.
247
+
248
+ Runs a named pipeline of stages. Each stage is independently
249
+ testable and reports its own timing.
250
+ """
251
+ start_time = time.perf_counter()
252
+ stage_times: dict[str, int] = {}
253
+ ctx: dict[str, Any] = {}
254
+
255
+ for stage_name, method_name in self._PIPELINE_STAGES:
256
+ stage_start = time.perf_counter()
257
+ stage_fn = getattr(self, method_name)
258
+ stage_fn(ctx)
259
+ stage_times[stage_name] = int((time.perf_counter() - stage_start) * 1000)
260
+ logger.debug("Stage '%s' completed in %dms", stage_name, stage_times[stage_name])
261
+
262
+ total_time_ms = int((time.perf_counter() - start_time) * 1000)
263
+
264
+ manifest = self._build_manifest(
265
+ routes=ctx["routes"],
266
+ functions=ctx["functions"],
267
+ classes=ctx["classes"],
268
+ calls=ctx["calls"],
269
+ data_flows=ctx["data_flows"],
270
+ auth=ctx["auth"],
271
+ middleware=ctx["middleware"],
272
+ dependencies=ctx["pkg_dependencies"],
273
+ integrations=ctx["integrations"],
274
+ capabilities=ctx["capabilities"],
275
+ schemas=ctx["schemas"],
276
+ literals=ctx["literals"],
277
+ configuration=ctx["config_data"],
278
+ analysis_time_ms=total_time_ms,
279
+ )
280
+
281
+ # Build both legacy string errors and structured error details
282
+ parse_errors: list[str] = []
283
+ parse_error_details: list[dict[str, Any]] = []
284
+ for path, parsed in self._parsed_files.items():
285
+ if not parsed.success and parsed.error:
286
+ err = parsed.error
287
+ parse_errors.append(f"{path}: {err}")
288
+ detail: dict[str, Any] = {
289
+ "file": str(path),
290
+ "error": str(err),
291
+ }
292
+ if hasattr(err, "line") and err.line is not None:
293
+ detail["line"] = err.line
294
+ if hasattr(err, "column") and err.column is not None:
295
+ detail["column"] = err.column
296
+ parse_error_details.append(detail)
297
+
298
+ # Per-language file counts from the parsed file registry
299
+ files_by_language: dict[str, int] = {}
300
+ failures_by_language: dict[str, int] = {}
301
+ for parsed in self._parsed_files.values():
302
+ lang_name = parsed.language.name.lower() if parsed.language else "unknown"
303
+ if parsed.success:
304
+ files_by_language[lang_name] = files_by_language.get(lang_name, 0) + 1
305
+ else:
306
+ failures_by_language[lang_name] = failures_by_language.get(lang_name, 0) + 1
307
+
308
+ # Per-framework route counts from the manifest
309
+ routes_by_framework: dict[str, int] = {}
310
+ for ep in manifest.entry_points:
311
+ fw = (ep.framework or "unknown").lower()
312
+ routes_by_framework[fw] = routes_by_framework.get(fw, 0) + 1
313
+
314
+ return AnalysisResult(
315
+ manifest=manifest,
316
+ files_analyzed=len([p for p in self._parsed_files.values() if p.success]),
317
+ files_failed=len([p for p in self._parsed_files.values() if not p.success]),
318
+ files_skipped=self._discovery_result.total_skipped if self._discovery_result else 0,
319
+ parse_errors=parse_errors,
320
+ parse_error_details=parse_error_details,
321
+ warnings=list(self._warnings),
322
+ discovery_time_ms=stage_times.get("discover", 0),
323
+ parse_time_ms=stage_times.get("parse", 0),
324
+ extraction_time_ms=stage_times.get("extract", 0),
325
+ total_time_ms=total_time_ms,
326
+ stage_times_ms=dict(stage_times),
327
+ extractor_times_ms=self._extractor_times_ms,
328
+ files_by_language=files_by_language,
329
+ failures_by_language=failures_by_language,
330
+ routes_by_framework=routes_by_framework,
331
+ frameworks_detected=list(self._detected_frameworks),
332
+ )
333
+
334
+ # -- Pipeline stage implementations ----------------------------------------
335
+
336
+ def _stage_discover(self, ctx: dict[str, Any]) -> None:
337
+ logger.info("Discovering files in %s", self.config.project_root)
338
+ self._discovery_result = discover_files(
339
+ self.config.project_root,
340
+ self.config.analysis.file_discovery,
341
+ )
342
+ logger.info(
343
+ "Discovered %d files, skipped %d",
344
+ self._discovery_result.total_selected,
345
+ self._discovery_result.total_skipped,
346
+ )
347
+
348
+ def _stage_parse(self, ctx: dict[str, Any]) -> None:
349
+ self._parse_all_files()
350
+
351
+ def _stage_resolve(self, ctx: dict[str, Any]) -> None:
352
+ self._build_cross_file_resolver()
353
+
354
+ def _stage_detect_frameworks(self, ctx: dict[str, Any]) -> None:
355
+ self._detect_frameworks()
356
+
357
+ def _stage_flow_bindings(self, ctx: dict[str, Any]) -> None:
358
+ self._build_shared_flow_bindings()
359
+
360
+ def _stage_call_graph(self, ctx: dict[str, Any]) -> None:
361
+ self._build_call_graph()
362
+
363
+ def _stage_extract(self, ctx: dict[str, Any]) -> None:
364
+ raw_routes = self._extract_all_routes()
365
+ ctx["routes"] = self._apply_group_prefixes(raw_routes)
366
+ ctx["dependencies"] = self._extract_all_dependencies()
367
+ ctx["auth"] = self._extract_auth_data()
368
+ ctx["middleware"] = self._extract_middleware()
369
+ self._populate_route_auth_mapping(ctx["auth"], ctx["routes"], ctx["middleware"])
370
+ self._link_routes_to_auth_schemes(ctx["routes"], ctx["auth"])
371
+ self._propagate_file_auth_to_routes(ctx["routes"], ctx["auth"])
372
+ self._apply_spring_filter_chain_policy(ctx["routes"])
373
+ ctx["calls"] = self._extract_all_calls()
374
+
375
+ def _stage_analyze_flows(self, ctx: dict[str, Any]) -> None:
376
+ ctx["data_flows"], _ = self._analyze_data_flows(ctx["routes"])
377
+ ctx["integrations"] = self._detect_integrations()
378
+ ctx["literals"] = self._scan_literals()
379
+ self._classify_sensitivity(ctx["integrations"], ctx["literals"])
380
+
381
+ def _stage_enrich(self, ctx: dict[str, Any]) -> None:
382
+ dep_extractor = DependencyExtractor(self.config.project_root, self._parsed_files)
383
+ ctx["pkg_dependencies"] = dep_extractor.extract()
384
+ ctx["config_data"] = self._extract_configuration()
385
+ ctx["capabilities"] = self._tag_capabilities(
386
+ ctx["integrations"],
387
+ ctx["routes"],
388
+ ctx["config_data"],
389
+ )
390
+ functions, func_id_index = self._collect_all_functions()
391
+ ctx["functions"] = functions
392
+ ctx["classes"] = self._collect_all_classes(func_id_index)
393
+ ctx["schemas"] = self._collect_schemas(ctx["routes"])
394
+
395
+ # =========================================================================
396
+ # Parsing
397
+ # =========================================================================
398
+
399
+ def _parse_all_files(self) -> None:
400
+ """Parse all discovered files using registry-based language dispatch."""
401
+ if not self._discovery_result:
402
+ return
403
+
404
+ # Group files by language
405
+ files_by_lang: dict[Language, list[Path]] = {}
406
+ for f in self._discovery_result.files:
407
+ if f.language:
408
+ files_by_lang.setdefault(f.language, []).append(f.path)
409
+
410
+ for language, file_paths in files_by_lang.items():
411
+ parser = ParserRegistry.get_parser(language)
412
+ if parser is None:
413
+ logger.info(
414
+ "Found %d %s files (no parser registered)",
415
+ len(file_paths),
416
+ language.name,
417
+ )
418
+ continue
419
+
420
+ logger.info("Parsing %d %s files", len(file_paths), language.name)
421
+
422
+ # Use PythonProjectParser for Python (has cross-file resolution)
423
+ if language == Language.PYTHON:
424
+ self._parsed_files.update(self._python_parser.parse_project(file_paths))
425
+ else:
426
+ for path in file_paths:
427
+ try:
428
+ parsed = parser.parse_file(path)
429
+ self._parsed_files[path] = parsed
430
+ except Exception as e:
431
+ logger.warning("Failed to parse %s: %s", path, e)
432
+
433
+ def _build_analysis_contexts(self) -> None:
434
+ """
435
+ Build analysis contexts for each language using LanguageServices.
436
+
437
+ This replaces the old approach of manually building each resolver.
438
+ Now each language provides a factory that builds all required services.
439
+ """
440
+ if not self._parsed_files:
441
+ return
442
+
443
+ # Group parsed files by language
444
+ files_by_language: dict[Language, list[ParsedFile]] = {}
445
+ for parsed in self._parsed_files.values():
446
+ if not parsed.success:
447
+ continue
448
+ lang = parsed.language
449
+ if lang not in files_by_language:
450
+ files_by_language[lang] = []
451
+ files_by_language[lang].append(parsed)
452
+
453
+ # Build context for each language
454
+ for language, files in files_by_language.items():
455
+ if language not in self._language_services:
456
+ logger.warning(f"No language services available for {language.name}")
457
+ continue
458
+
459
+ services = self._language_services[language]
460
+
461
+ # Detect primary framework for this language
462
+ framework = self._detect_primary_framework(files, language)
463
+ framework_name = framework.name.lower() if framework else None
464
+
465
+ logger.info(
466
+ f"Building analysis context for {language.name} (framework: {framework_name})"
467
+ )
468
+
469
+ # Build context using the language services factory
470
+ context = services.build_context(
471
+ parsed_files=files,
472
+ project_root=self.config.project_root,
473
+ framework=framework_name,
474
+ )
475
+
476
+ # Wire URL prefix maps for Django and Express/NestJS
477
+ if language == Language.PYTHON and framework_name == "django":
478
+ try:
479
+ from .url_prefix_resolver import build_django_prefix_map
480
+
481
+ pm = build_django_prefix_map(files, project_root=self.config.project_root)
482
+ context.language_services["_url_prefix_map"] = pm
483
+ logger.debug("Django prefix map built: %d entries", len(pm))
484
+ except Exception as e:
485
+ logger.debug("Django prefix map failed: %s", e)
486
+ elif language == Language.JAVASCRIPT:
487
+ try:
488
+ from .url_prefix_resolver import build_express_prefix_map
489
+
490
+ pm = build_express_prefix_map(files, project_root=self.config.project_root)
491
+ context.language_services["_url_prefix_map"] = pm
492
+ logger.debug("Express prefix map built: %d entries", len(pm))
493
+ except Exception as e:
494
+ logger.debug("Express prefix map failed: %s", e)
495
+
496
+ self._analysis_contexts[language] = context
497
+
498
+ # Log statistics
499
+ if context.type_resolver:
500
+ models = context.type_resolver.get_all_models()
501
+ logger.info(f" - Resolved {len(models)} model types")
502
+
503
+ if context.constant_resolver:
504
+ constants = context.constant_resolver.get_all_constants()
505
+ logger.info(f" - Resolved {len(constants)} constants")
506
+
507
+ if context.router_registry:
508
+ routers = context.router_registry.get_all_routers()
509
+ logger.info(f" - Registered {len(routers)} routers")
510
+
511
+ def _detect_primary_framework(
512
+ self,
513
+ files: list[ParsedFile],
514
+ language: Language,
515
+ ) -> Framework | None:
516
+ """Detect the primary framework for a set of files."""
517
+ framework_counts: dict[Framework, int] = {}
518
+
519
+ for parsed_file in files:
520
+ detected = FrameworkPluginRegistry.detect_frameworks(parsed_file)
521
+ for fw in detected:
522
+ framework_counts[fw] = framework_counts.get(fw, 0) + 1
523
+
524
+ if not framework_counts:
525
+ return None
526
+
527
+ # Return the most common framework
528
+ return max(framework_counts, key=framework_counts.get)
529
+
530
+ # Backward compatibility alias
531
+ def _build_cross_file_resolver(self) -> None:
532
+ """Build cross-file resolution (alias for _build_analysis_contexts)."""
533
+ self._build_analysis_contexts()
534
+
535
+ def _build_shared_flow_bindings(self) -> None:
536
+ """Build flow-sensitive bindings once for reuse by call graph and data flow."""
537
+ successful_files = [p for p in self._parsed_files.values() if p.success]
538
+ if not successful_files:
539
+ return
540
+ try:
541
+ self._flow_bindings = build_flow_sensitive_bindings(successful_files)
542
+ logger.info("Built shared flow-sensitive bindings for %d files", len(successful_files))
543
+ except Exception as e:
544
+ logger.warning("Could not build flow-sensitive bindings: %s", e)
545
+ self._flow_bindings = None
546
+
547
+ def _build_call_graph(self) -> None:
548
+ """Build the call graph for inter-procedural analysis."""
549
+ if not self._parsed_files:
550
+ return
551
+
552
+ logger.info("Building call graph...")
553
+
554
+ # Get successfully parsed files
555
+ successful_files = [parsed for parsed in self._parsed_files.values() if parsed.success]
556
+
557
+ if not successful_files:
558
+ logger.warning("No successfully parsed files for call graph")
559
+ return
560
+
561
+ # Get the primary analysis context (prefer Python, fall back to Java, then C#)
562
+ context = (
563
+ self._analysis_contexts.get(Language.PYTHON)
564
+ or self._analysis_contexts.get(Language.JAVA)
565
+ or self._analysis_contexts.get(Language.CSHARP)
566
+ )
567
+
568
+ try:
569
+ if context:
570
+ self._call_graph = build_call_graph_with_context(
571
+ successful_files,
572
+ context,
573
+ flow_bindings=self._flow_bindings,
574
+ )
575
+ else:
576
+ from .call_graph import build_call_graph
577
+
578
+ self._call_graph = build_call_graph(
579
+ successful_files,
580
+ project_root=self.config.project_root,
581
+ flow_bindings=self._flow_bindings,
582
+ )
583
+
584
+ stats = self._call_graph.get_statistics()
585
+ logger.info(
586
+ f"Call graph built: {stats['internal_nodes']} internal functions, "
587
+ f"{stats['total_edges']} call edges"
588
+ )
589
+ logger.info(f" - Entry points: {stats['entry_points']}")
590
+ logger.info(f" - Resolution confidence: {stats['resolution_confidence']}")
591
+
592
+ except ImportError as e:
593
+ logger.warning(f"Could not build call graph: {e}")
594
+
595
+ def _detect_frameworks(self) -> None:
596
+ """Detect frameworks used in parsed files."""
597
+ for parsed_file in self._parsed_files.values():
598
+ if not parsed_file.success:
599
+ continue
600
+
601
+ detected = FrameworkPluginRegistry.detect_frameworks(parsed_file)
602
+ self._detected_frameworks.update(detected)
603
+
604
+ logger.info(f"Detected frameworks: {[f.name for f in self._detected_frameworks]}")
605
+
606
+ # =========================================================================
607
+ # Route Extraction
608
+ # =========================================================================
609
+
610
+ def _extract_all_routes(self) -> list[RouteModel]:
611
+ """Extract routes from all parsed files using analysis contexts."""
612
+ routes: list[RouteModel] = []
613
+
614
+ # Build plugin list from the language-keyed registry so that multiple
615
+ # plugins sharing the same Framework enum value (e.g. GraphQLPythonPlugin
616
+ # and GraphQLJavaPlugin both register as Framework.GRAPHQL) are both
617
+ # included. get_plugin(fw) returns only one entry per framework key and
618
+ # would silently drop the second registrant.
619
+ seen_plugin_ids: set[int] = set()
620
+ plugins = []
621
+ for language in Language:
622
+ for plugin in FrameworkPluginRegistry.get_plugins_for_language(language):
623
+ if (
624
+ plugin.framework in self._detected_frameworks
625
+ and id(plugin) not in seen_plugin_ids
626
+ ):
627
+ seen_plugin_ids.add(id(plugin))
628
+ plugins.append(plugin)
629
+
630
+ # Per-plugin wall-clock accumulator: framework_name → total ms
631
+ plugin_times: dict[str, int] = {}
632
+
633
+ # Extract routes from each file
634
+ for parsed_file in self._parsed_files.values():
635
+ if not parsed_file.success:
636
+ continue
637
+
638
+ # Get analysis context for this file's language
639
+ context = self._analysis_contexts.get(parsed_file.language)
640
+
641
+ for plugin in plugins:
642
+ if not plugin.detect(parsed_file):
643
+ continue
644
+
645
+ # Extract routes, passing context when supported
646
+ if hasattr(plugin, "extract_routes"):
647
+ fw_key = plugin.framework.name.lower()
648
+ _t0 = time.perf_counter()
649
+ try:
650
+ extracted_routes = plugin.extract_routes(parsed_file, context=context)
651
+ except TypeError:
652
+ self._setup_legacy_plugin(plugin, parsed_file.language)
653
+ extracted_routes = plugin.extract_routes(parsed_file)
654
+
655
+ elapsed_ms = int((time.perf_counter() - _t0) * 1000)
656
+ plugin_times[fw_key] = plugin_times.get(fw_key, 0) + elapsed_ms
657
+
658
+ framework_name = plugin.framework.name.lower()
659
+ for route in extracted_routes:
660
+ ep_id = stable_id("ep", route.method.name, route.path)
661
+ routes.append(self._convert_route(route, ep_id, framework=framework_name))
662
+
663
+ self._extractor_times_ms = plugin_times
664
+ return routes
665
+
666
+ # =========================================================================
667
+ # Group Prefix Propagation (cross-file module → endpoint extension pattern)
668
+ # =========================================================================
669
+
670
+ def _apply_group_prefixes(self, routes: list[RouteModel]) -> list[RouteModel]:
671
+ """Prepend MapGroup prefix to routes registered via the module-extension pattern.
672
+
673
+ Handles three patterns:
674
+ 1. Cross-file: module file calls MapGroup + extension methods; handler files
675
+ define those methods.
676
+ 2. Same-file: file calls MapGroup and registers routes on the result directly
677
+ (e.g. ``var api = app.MapGroup("x"); api.MapGet("/y", ...)``)
678
+ 3. Program.cs auth: an outer file calls ``handler.MapXxxApi().RequireAuthorization()``
679
+ without its own MapGroup; auth is propagated to the handler file's routes.
680
+ """
681
+ import re as _re
682
+
683
+ _STANDARD_MAP = frozenset(
684
+ {
685
+ "MapGet",
686
+ "MapPost",
687
+ "MapPut",
688
+ "MapDelete",
689
+ "MapPatch",
690
+ "MapIdentityApi",
691
+ "MapGroup",
692
+ "MapHub",
693
+ }
694
+ )
695
+
696
+ # ── Step 1: collect extension-method names per file ──────────────────
697
+ # Static methods whose first parameter is IEndpointRouteBuilder or similar.
698
+ ext_methods_by_file: dict[str, set[str]] = {}
699
+ for file_path, parsed_file in self._parsed_files.items():
700
+ if not parsed_file.success:
701
+ continue
702
+ for cls in parsed_file.classes:
703
+ for method in cls.methods:
704
+ if method.binding != "static" or not method.parameters:
705
+ continue
706
+ first_type = (method.parameters[0].type_annotation or "").split("<")[0].strip()
707
+ if first_type in (
708
+ "IEndpointRouteBuilder",
709
+ "RouteGroupBuilder",
710
+ "WebApplication",
711
+ "IApplicationBuilder",
712
+ ):
713
+ ext_methods_by_file.setdefault(str(file_path), set()).add(method.name)
714
+
715
+ # Reverse map: extension method name → file that defines it
716
+ ext_method_to_file: dict[str, str] = {}
717
+ for file_path, method_names in ext_methods_by_file.items():
718
+ for method_name in method_names:
719
+ ext_method_to_file[method_name] = file_path
720
+
721
+ # ── Step 2: scan all files for group patterns ─────────────────────────
722
+ # group_registry: ext_method_name → (prefix, auth, anon) [cross-file pattern]
723
+ group_registry: dict[str, tuple[str, bool, bool]] = {}
724
+ # same_file_prefix_map: file_path → (prefix, auth, anon) [same-file pattern, single prefix]
725
+ same_file_prefix_map: dict[str, tuple[str, bool, bool]] = {}
726
+ # same_file_multi_map: file_path → sorted [(group_line, prefix, has_auth)]
727
+ # Used when a file has multiple MapGroup prefixes (e.g. versioned API)
728
+ same_file_multi_map: dict[str, list[tuple[int, str, bool]]] = {}
729
+ # program_auth_overrides: handler file paths that get auth from an outer caller
730
+ program_auth_overrides: set[str] = set()
731
+
732
+ for file_path_obj, parsed_file in self._parsed_files.items():
733
+ file_path = str(file_path_obj)
734
+ if not parsed_file.success:
735
+ continue
736
+
737
+ # Find MapGroup calls with literal prefix arguments
738
+ group_lines: dict[int, str] = {} # start_line → prefix
739
+ for call in parsed_file.call_sites:
740
+ if call.callee_name.split(".")[-1] != "MapGroup":
741
+ continue
742
+ if not call.arguments or not call.arguments[0].is_literal:
743
+ continue
744
+ raw = str(call.arguments[0].literal_value or "")
745
+ # Resolve API versioning placeholder → v1
746
+ raw = _re.sub(r"v\{[^}]*[Vv]ersion[^}]*\}", "v1", raw)
747
+ # Strip remaining route constraints: {id:int} → {id}
748
+ raw = _re.sub(r"\{([^:}]+):[^}]+\}", r"{\1}", raw)
749
+ prefix = "/" + raw.strip("/") if raw else ""
750
+ line = call.location.line if call.location else -1
751
+ if line > 0:
752
+ group_lines[line] = prefix
753
+
754
+ # Build the full chain-line set for fluent group chaining
755
+ chain_lines: set[int] = set(group_lines.keys())
756
+ changed = True
757
+ while changed:
758
+ changed = False
759
+ for call in parsed_file.call_sites:
760
+ recv = call.receiver_expression or ""
761
+ if not recv.startswith("line:"):
762
+ continue
763
+ try:
764
+ target = int(recv[5:])
765
+ except ValueError:
766
+ continue
767
+ if target in chain_lines:
768
+ ln = call.location.line if call.location else -1
769
+ if ln > 0 and ln not in chain_lines:
770
+ chain_lines.add(ln)
771
+ changed = True
772
+
773
+ # Group-level auth: RequireAuthorization chained on the group
774
+ has_group_auth = any(
775
+ call.callee_name.split(".")[-1] == "RequireAuthorization"
776
+ and (call.receiver_expression or "").startswith("line:")
777
+ and (
778
+ lambda rv=call.receiver_expression: (
779
+ rv[5:].isdigit() and int(rv[5:]) in chain_lines
780
+ )
781
+ )()
782
+ for call in parsed_file.call_sites
783
+ )
784
+
785
+ # Find calls to custom Map* extension methods (non-standard, non-chained).
786
+ # These are calls like ``group.MapCreateBrandEndpoint()`` where the method
787
+ # is a project-defined extension, not one of the standard Map* helpers.
788
+ # Note: _STANDARD_MAP filters out MapGet/Post/etc; we keep any custom Map*
789
+ # regardless of receiver name so that group variables named "api", "v1" etc.
790
+ # are also captured.
791
+ ext_call_lines: dict[str, int] = {}
792
+ for call in parsed_file.call_sites:
793
+ method_name = call.callee_name.split(".")[-1]
794
+ recv = call.receiver_expression or ""
795
+ # Skip: no receiver, class-name receiver (uppercase), chained receiver
796
+ if not recv or recv[0].isupper() or recv.startswith("line:"):
797
+ continue
798
+ # Skip: standard Map* methods and the IEndpointGroup groupBuilder receiver
799
+ if not method_name.startswith("Map") or method_name in _STANDARD_MAP:
800
+ continue
801
+ if recv == "groupBuilder":
802
+ continue
803
+ ln = call.location.line if call.location else -1
804
+ if ln > 0:
805
+ ext_call_lines[method_name] = ln
806
+
807
+ # Per-extension-method auth overrides (.AllowAnonymous() / .RequireAuthorization()
808
+ # chained on group.MapXxxEndpoint() in the caller file).
809
+ ext_auth_override: dict[str, str | None] = dict.fromkeys(ext_call_lines, None)
810
+ for call in parsed_file.call_sites:
811
+ method_name = call.callee_name.split(".")[-1]
812
+ recv = call.receiver_expression or ""
813
+ if not recv.startswith("line:"):
814
+ continue
815
+ try:
816
+ target = int(recv[5:])
817
+ except ValueError:
818
+ continue
819
+ for ext_name, ext_line in ext_call_lines.items():
820
+ if target == ext_line and method_name in (
821
+ "AllowAnonymous",
822
+ "RequireAuthorization",
823
+ ):
824
+ ext_auth_override[ext_name] = method_name
825
+
826
+ # --- Pattern A: same-file MapGroup with local Map* route calls ---
827
+ # Detect files that call MapGroup AND register routes on the result directly
828
+ # (e.g. eShop OrdersApi.cs: ``var api = app.MapGroup("x"); api.MapPut(...)``)
829
+ if group_lines:
830
+ has_local_map_calls = any(
831
+ (m := call.callee_name.split(".")[-1]) in _STANDARD_MAP
832
+ and m != "MapGroup"
833
+ and (recv := call.receiver_expression or "")
834
+ and not recv.startswith("line:")
835
+ and not recv[0].isupper()
836
+ and recv != "groupBuilder"
837
+ for call in parsed_file.call_sites
838
+ )
839
+ unique_prefixes = set(group_lines.values())
840
+ if has_local_map_calls and len(unique_prefixes) == 1:
841
+ grp_prefix = next(iter(unique_prefixes))
842
+ same_file_prefix_map[file_path] = (grp_prefix, has_group_auth, False)
843
+ elif has_local_map_calls and len(unique_prefixes) > 1:
844
+ # Multi-prefix file (e.g. versioned API): assign each route the
845
+ # nearest preceding MapGroup prefix by line number
846
+ same_file_multi_map[file_path] = sorted(
847
+ [(ln, pfx, has_group_auth) for ln, pfx in group_lines.items()],
848
+ key=lambda t: t[0],
849
+ )
850
+
851
+ # --- Pattern B: cross-file extension method (with MapGroup in caller) ---
852
+ # OR: auth-only from Program.cs (no MapGroup in caller)
853
+ if ext_call_lines:
854
+ if group_lines:
855
+ # Module file: MapGroup + custom extension method calls → cross-file
856
+ unique_prefixes = set(group_lines.values())
857
+ if len(unique_prefixes) == 1:
858
+ grp_prefix = next(iter(unique_prefixes))
859
+ for ext_name in ext_call_lines:
860
+ override = ext_auth_override.get(ext_name)
861
+ allow_anon = override == "AllowAnonymous"
862
+ route_auth = (
863
+ has_group_auth and not allow_anon
864
+ ) or override == "RequireAuthorization"
865
+ group_registry[ext_name] = (grp_prefix, route_auth, allow_anon)
866
+ else:
867
+ # Program.cs-like file: no MapGroup, but ext method calls with auth
868
+ # (e.g. ``orders.MapOrdersApiV1().RequireAuthorization()``)
869
+ for ext_name, override in ext_auth_override.items():
870
+ if override == "RequireAuthorization":
871
+ handler_file = ext_method_to_file.get(ext_name)
872
+ if handler_file:
873
+ program_auth_overrides.add(handler_file)
874
+
875
+ # ── Step 3: build unified file_path → (prefix, route_auth, allow_anon) ──
876
+ # Same-file patterns take precedence for prefix resolution.
877
+ file_prefix_map: dict[str, tuple[str, bool, bool]] = dict(same_file_prefix_map)
878
+ # Cross-file patterns fill in files not already covered by same-file
879
+ for file_path, method_names in ext_methods_by_file.items():
880
+ for method_name in method_names:
881
+ if method_name in group_registry and file_path not in file_prefix_map:
882
+ file_prefix_map[file_path] = group_registry[method_name]
883
+ break
884
+
885
+ if not file_prefix_map and not same_file_multi_map and not program_auth_overrides:
886
+ return routes
887
+
888
+ # ── Step 4: apply prefix + auth to matching routes ───────────────────
889
+ updated: list[RouteModel] = []
890
+ for route in routes:
891
+ handler_file = route.handler_location.file if route.handler_location else None
892
+ if not handler_file:
893
+ updated.append(route)
894
+ continue
895
+
896
+ has_prog_auth = handler_file in program_auth_overrides
897
+
898
+ # Multi-prefix: find the nearest preceding MapGroup by route line
899
+ multi_entries = same_file_multi_map.get(handler_file)
900
+ if multi_entries and route.handler_location:
901
+ route_line = route.handler_location.line
902
+ chosen_pfx, chosen_auth = multi_entries[0][1], multi_entries[0][2]
903
+ for grp_line, grp_pfx, grp_auth in multi_entries:
904
+ if grp_line <= route_line:
905
+ chosen_pfx, chosen_auth = grp_pfx, grp_auth
906
+ else:
907
+ break
908
+ prefix_entry: tuple[str, bool, bool] | None = (chosen_pfx, chosen_auth, False)
909
+ else:
910
+ prefix_entry = file_prefix_map.get(handler_file)
911
+
912
+ if prefix_entry:
913
+ prefix, route_auth, allow_anon = prefix_entry
914
+ # Program.cs-level auth override (e.g. .RequireAuthorization() on the
915
+ # extension method call) takes precedence over no-auth from same-file group
916
+ if has_prog_auth:
917
+ route_auth = True
918
+ allow_anon = False
919
+
920
+ new_path = prefix.rstrip("/") + "/" + route.path.lstrip("/")
921
+ new_path = _re.sub(r"/+", "/", new_path)
922
+
923
+ new_deps = list(route.dependencies or [])
924
+ if allow_anon:
925
+ new_deps = [d for d in new_deps if "Authorize" not in d]
926
+ elif route_auth:
927
+ if not any("Authorize" in d or "permission" in d.lower() for d in new_deps):
928
+ new_deps.insert(0, "Authorize@group")
929
+
930
+ updated.append(
931
+ route.model_copy(
932
+ update={
933
+ "path": new_path,
934
+ "id": stable_id("ep", route.method, new_path),
935
+ "dependencies": new_deps,
936
+ }
937
+ )
938
+ )
939
+
940
+ elif has_prog_auth:
941
+ # Auth-only override without a prefix change (extension method defines
942
+ # its own routes directly, prefix already correct in the route)
943
+ new_deps = list(route.dependencies or [])
944
+ if not any("Authorize" in d or "permission" in d.lower() for d in new_deps):
945
+ new_deps.insert(0, "Authorize@group")
946
+ updated.append(route.model_copy(update={"dependencies": new_deps}))
947
+
948
+ else:
949
+ updated.append(route)
950
+
951
+ return updated
952
+
953
+ def _setup_legacy_plugin(self, plugin, language: Language) -> None:
954
+ """
955
+ Set up a legacy plugin that doesn't use AnalysisContext.
956
+
957
+ This provides backward compatibility with plugins that use
958
+ the old setter-based approach.
959
+ """
960
+ context = self._analysis_contexts.get(language)
961
+ if not context:
962
+ return
963
+
964
+ # Get successfully parsed files for this language
965
+ successful_files = [
966
+ p for p in self._parsed_files.values() if p.success and p.language == language
967
+ ]
968
+
969
+ # Set up legacy services from context
970
+ if hasattr(plugin, "set_cross_file_resolver"):
971
+ # Get raw resolver from language services
972
+ raw_resolver = context.language_services.get("_raw_cross_file_resolver")
973
+ if raw_resolver:
974
+ plugin.set_cross_file_resolver(raw_resolver)
975
+
976
+ if hasattr(plugin, "set_router_registry"):
977
+ raw_registry = context.language_services.get("_raw_router_registry")
978
+ if raw_registry:
979
+ plugin.set_router_registry(raw_registry)
980
+
981
+ if hasattr(plugin, "set_path_resolver"):
982
+ raw_path_resolver = context.language_services.get("_raw_path_resolver")
983
+ if raw_path_resolver:
984
+ plugin.set_path_resolver(raw_path_resolver)
985
+
986
+ if hasattr(plugin, "set_project_context"):
987
+ plugin.set_project_context(
988
+ successful_files,
989
+ project_root=self.config.project_root,
990
+ )
991
+
992
+ def _convert_route(self, route, route_id: str, framework: str = "fastapi") -> RouteModel:
993
+ """Convert ExtractedRoute to RouteModel (framework from plugin)."""
994
+ # Convert parameters, using the actual ParameterLocation name when available
995
+ path_params = [
996
+ HttpParameterModel(
997
+ name=p.name,
998
+ location=p.location.name.lower() if hasattr(p.location, "name") else "path",
999
+ type_annotation=p.type_annotation,
1000
+ required=p.required,
1001
+ default_value=p.default_value,
1002
+ alias=p.alias,
1003
+ constraints=p.constraints,
1004
+ )
1005
+ for p in route.path_params
1006
+ ]
1007
+
1008
+ query_params = [
1009
+ HttpParameterModel(
1010
+ name=p.name,
1011
+ location=p.location.name.lower() if hasattr(p.location, "name") else "query",
1012
+ type_annotation=p.type_annotation,
1013
+ required=p.required,
1014
+ default_value=p.default_value,
1015
+ alias=p.alias,
1016
+ constraints=p.constraints,
1017
+ )
1018
+ for p in route.query_params
1019
+ ]
1020
+
1021
+ header_params = [
1022
+ HttpParameterModel(
1023
+ name=p.name,
1024
+ location="header",
1025
+ type_annotation=p.type_annotation,
1026
+ required=p.required,
1027
+ default_value=p.default_value,
1028
+ )
1029
+ for p in route.header_params
1030
+ ]
1031
+
1032
+ cookie_params = [
1033
+ HttpParameterModel(
1034
+ name=p.name,
1035
+ location="cookie",
1036
+ type_annotation=p.type_annotation,
1037
+ required=p.required,
1038
+ default_value=p.default_value,
1039
+ )
1040
+ for p in route.cookie_params
1041
+ ]
1042
+
1043
+ # Convert body — enrich field list with types from the resolver
1044
+ body = None
1045
+ if route.body:
1046
+ enriched_fields = self._enrich_body_fields(
1047
+ route.body.model_name,
1048
+ route.body.model_fields,
1049
+ route.handler_location.file if route.handler_location else None,
1050
+ )
1051
+ body = HttpBodyModel(
1052
+ content_type=route.body.content_type,
1053
+ model_name=route.body.model_name,
1054
+ model_fields=enriched_fields,
1055
+ required=route.body.required,
1056
+ )
1057
+
1058
+ return RouteModel(
1059
+ id=route_id,
1060
+ kind=route.kind,
1061
+ method=route.method.name,
1062
+ path=route.path,
1063
+ path_params=path_params,
1064
+ query_params=query_params,
1065
+ header_params=header_params,
1066
+ cookie_params=cookie_params,
1067
+ body=body,
1068
+ handler_function=route.handler_function.full,
1069
+ handler_location=LocationModel(
1070
+ file=str(route.handler_location.file),
1071
+ line=route.handler_location.line,
1072
+ column=route.handler_location.column,
1073
+ ),
1074
+ framework=framework,
1075
+ router_name=route.router_name,
1076
+ tags=route.tags,
1077
+ operation_id=route.operation_id,
1078
+ summary=route.summary,
1079
+ deprecated=route.deprecated,
1080
+ dependencies=route.dependency_refs,
1081
+ response_model=route.response.model_name,
1082
+ response_status_code=route.response.status_code,
1083
+ )
1084
+
1085
+ # =========================================================================
1086
+ # Body Field Enrichment & Schema Collection
1087
+ # =========================================================================
1088
+
1089
+ def _enrich_body_fields(
1090
+ self,
1091
+ model_name: str | None,
1092
+ raw_field_names: list[str],
1093
+ in_file: Path | None,
1094
+ ) -> list[BodyFieldModel]:
1095
+ """Resolve raw field name strings into typed BodyFieldModel objects."""
1096
+ if not model_name:
1097
+ return [BodyFieldModel(name=n) for n in raw_field_names]
1098
+
1099
+ type_resolver = self._get_type_resolver()
1100
+ if not type_resolver:
1101
+ return [BodyFieldModel(name=n) for n in raw_field_names]
1102
+
1103
+ known_models = set(type_resolver.get_all_models().keys())
1104
+ resolved_fields = type_resolver.get_model_fields(model_name, in_file)
1105
+ resolved_by_name = {f.name: f for f in resolved_fields}
1106
+ result: list[BodyFieldModel] = []
1107
+
1108
+ # When raw_field_names is empty (e.g. Ardalis pattern where body type is
1109
+ # known but fields were not pre-populated), use the resolved fields directly.
1110
+ names_to_process = raw_field_names if raw_field_names else [f.name for f in resolved_fields]
1111
+
1112
+ for name in names_to_process:
1113
+ rf = resolved_by_name.get(name)
1114
+ if rf:
1115
+ inner_type = self._unwrap_inner_type(rf.type_annotation)
1116
+ is_complex = self._is_known_model(inner_type, known_models)
1117
+ result.append(
1118
+ BodyFieldModel(
1119
+ name=name,
1120
+ type_annotation=rf.type_annotation,
1121
+ required=rf.is_required,
1122
+ is_complex=is_complex,
1123
+ nested_model=inner_type if is_complex else None,
1124
+ constraints=rf.constraints,
1125
+ )
1126
+ )
1127
+ else:
1128
+ result.append(BodyFieldModel(name=name))
1129
+
1130
+ return result
1131
+
1132
+ _ORM_BASE_CLASSES = frozenset(
1133
+ {
1134
+ "SQLModel",
1135
+ "DeclarativeBase",
1136
+ "Base",
1137
+ "Model",
1138
+ "db.Model",
1139
+ "Document",
1140
+ "EmbeddedDocument",
1141
+ }
1142
+ )
1143
+
1144
+ def _collect_schemas(self, routes: list[RouteModel]) -> dict[str, SchemaModel]:
1145
+ """
1146
+ Transitively collect all schemas referenced by route bodies.
1147
+
1148
+ Walks the body model for every route, resolves each nested model,
1149
+ and keeps going until no new models are discovered.
1150
+ """
1151
+ type_resolver = self._get_type_resolver()
1152
+ if not type_resolver:
1153
+ return {}
1154
+
1155
+ known_models = set(type_resolver.get_all_models().keys())
1156
+ simple_to_qualified: dict[str, str] = {}
1157
+ for qname in known_models:
1158
+ simple = qname.rsplit(".", 1)[-1]
1159
+ simple_to_qualified[simple] = qname
1160
+
1161
+ # Pre-compute usage sets from routes
1162
+ request_body_names: set[str] = set()
1163
+ response_model_names: set[str] = set()
1164
+ for route in routes:
1165
+ if route.body and route.body.model_name:
1166
+ request_body_names.add(route.body.model_name)
1167
+ if route.response_model:
1168
+ response_model_names.add(route.response_model)
1169
+
1170
+ schemas: dict[str, SchemaModel] = {}
1171
+ queue: list[str] = []
1172
+
1173
+ for name in request_body_names:
1174
+ if name not in schemas:
1175
+ queue.append(name)
1176
+
1177
+ for name in response_model_names:
1178
+ if name not in schemas:
1179
+ queue.append(name)
1180
+
1181
+ for model_qname in type_resolver.get_all_models():
1182
+ simple_name = model_qname.rsplit(".", 1)[-1]
1183
+ if simple_name not in schemas:
1184
+ queue.append(simple_name)
1185
+
1186
+ visited: set[str] = set()
1187
+
1188
+ while queue:
1189
+ model_name = queue.pop(0)
1190
+ if model_name in visited:
1191
+ continue
1192
+ visited.add(model_name)
1193
+
1194
+ resolved_fields = type_resolver.get_model_fields(model_name, None)
1195
+ if not resolved_fields:
1196
+ continue
1197
+
1198
+ resolved_type = type_resolver.resolve_type(model_name, None)
1199
+ qualified_name = resolved_type.qualified_name if resolved_type else None
1200
+ base_classes = (
1201
+ list(resolved_type.base_classes)
1202
+ if resolved_type and resolved_type.base_classes
1203
+ else []
1204
+ )
1205
+ is_enum = resolved_type.is_enum if resolved_type else False
1206
+
1207
+ # Determine usage based on discovery source and base classes
1208
+ usage: list[str] = []
1209
+ if model_name in request_body_names:
1210
+ usage.append("request_body")
1211
+ if model_name in response_model_names:
1212
+ usage.append("response")
1213
+ base_simple_names = {b.rsplit(".", 1)[-1] for b in base_classes}
1214
+ if base_simple_names & self._ORM_BASE_CLASSES:
1215
+ usage.append("orm")
1216
+ if not usage:
1217
+ usage.append("domain")
1218
+
1219
+ schema_fields: list[SchemaFieldModel] = []
1220
+ for rf in resolved_fields:
1221
+ inner_type = self._unwrap_inner_type(rf.type_annotation)
1222
+ is_complex = self._is_known_model(inner_type, known_models)
1223
+ schema_fields.append(
1224
+ SchemaFieldModel(
1225
+ name=rf.name,
1226
+ type_annotation=rf.type_annotation,
1227
+ required=rf.is_required,
1228
+ default_value=rf.default_value,
1229
+ is_complex=is_complex,
1230
+ nested_model=inner_type if is_complex else None,
1231
+ alias=rf.alias,
1232
+ constraints=rf.constraints,
1233
+ )
1234
+ )
1235
+ if is_complex and inner_type and inner_type not in visited:
1236
+ queue.append(inner_type)
1237
+
1238
+ schemas[model_name] = SchemaModel(
1239
+ name=model_name,
1240
+ qualified_name=qualified_name,
1241
+ fields=schema_fields,
1242
+ base_classes=base_classes,
1243
+ is_enum=is_enum,
1244
+ usage=usage,
1245
+ )
1246
+
1247
+ return schemas
1248
+
1249
+ @staticmethod
1250
+ def _unwrap_inner_type(type_annotation: str | None) -> str | None:
1251
+ """Strip Optional[...] and list[...] wrappers to get the core type name."""
1252
+ if not type_annotation:
1253
+ return None
1254
+ t = type_annotation
1255
+ for prefix in ("Optional[", "optional["):
1256
+ if t.startswith(prefix) and t.endswith("]"):
1257
+ t = t[len(prefix) : -1].strip()
1258
+ for prefix in (
1259
+ "list[",
1260
+ "List[",
1261
+ "Sequence[",
1262
+ "set[",
1263
+ "Set[",
1264
+ "frozenset[",
1265
+ "FrozenSet[",
1266
+ "tuple[",
1267
+ "Tuple[",
1268
+ ):
1269
+ if t.startswith(prefix) and t.endswith("]"):
1270
+ t = t[len(prefix) : -1].strip()
1271
+ # Handle list[Optional[X]]
1272
+ for inner_prefix in ("Optional[", "optional["):
1273
+ if t.startswith(inner_prefix) and t.endswith("]"):
1274
+ t = t[len(inner_prefix) : -1].strip()
1275
+ break
1276
+ return t
1277
+
1278
+ @staticmethod
1279
+ def _is_known_model(type_name: str | None, known_models: set[str]) -> bool:
1280
+ """Check whether a (simple) type name matches any known model."""
1281
+ if not type_name:
1282
+ return False
1283
+ if type_name in known_models:
1284
+ return True
1285
+ # known_models contains qualified names; check if the simple name
1286
+ # matches the suffix of any qualified name.
1287
+ return any(qn.endswith("." + type_name) for qn in known_models)
1288
+
1289
+ def _get_type_resolver(self):
1290
+ """Get the type resolver from whichever analysis context is available."""
1291
+ ctx = (
1292
+ self._analysis_contexts.get(Language.PYTHON)
1293
+ or self._analysis_contexts.get(Language.JAVA)
1294
+ or self._analysis_contexts.get(Language.CSHARP)
1295
+ )
1296
+ return ctx.type_resolver if ctx else None
1297
+
1298
+ # =========================================================================
1299
+ # Dependency Extraction
1300
+ # =========================================================================
1301
+
1302
+ def _extract_all_dependencies(self) -> list[dict]:
1303
+ """Extract all dependency definitions."""
1304
+ dependencies = []
1305
+
1306
+ for parsed_file in self._parsed_files.values():
1307
+ if not parsed_file.success:
1308
+ continue
1309
+
1310
+ for fw in self._detected_frameworks:
1311
+ plugin = FrameworkPluginRegistry.get_plugin(fw)
1312
+ if plugin and plugin.detect(parsed_file):
1313
+ deps = plugin.extract_dependencies(parsed_file)
1314
+ dependencies.extend(deps)
1315
+
1316
+ return dependencies
1317
+
1318
+ # =========================================================================
1319
+ # Auth Extraction
1320
+ # =========================================================================
1321
+
1322
+ def _extract_auth_data(self) -> AuthModel:
1323
+ """Extract authentication data from all files."""
1324
+ schemes: list[AuthSchemeModel] = []
1325
+ auth_deps: list[AuthDependencyModel] = []
1326
+ jwt_config = JwtConfigModel()
1327
+
1328
+ # Pre-pass: collect all Depends() argument names and all auth-scheme
1329
+ # variable names across the entire project. This bridges the standard
1330
+ # FastAPI pattern where auth deps are defined in deps.py but used via
1331
+ # Depends() in routers/*.py — both sets will be non-empty even though
1332
+ # neither file contains both halves.
1333
+ import re as _re
1334
+
1335
+ all_project_depends_names: set[str] = set()
1336
+ all_project_scheme_names: set[str] = set()
1337
+ for _pf in self._parsed_files.values():
1338
+ if not _pf.success:
1339
+ continue
1340
+ for _call in _pf.call_sites:
1341
+ if getattr(_call, "callee_name", None) == "Depends":
1342
+ for _arg in _call.arguments:
1343
+ if getattr(_arg, "is_variable", False) and _arg.variable_name:
1344
+ # Direct reference: Depends(get_current_user)
1345
+ all_project_depends_names.add(_arg.variable_name)
1346
+ if getattr(_arg, "literal_value", None):
1347
+ all_project_depends_names.add(str(_arg.literal_value))
1348
+ # Factory-call pattern: Depends(get_current_user_authorizer())
1349
+ _expr = getattr(_arg, "expression_text", None) or ""
1350
+ _fm = _re.match(r"(\w+)\s*\(", _expr)
1351
+ if _fm:
1352
+ all_project_depends_names.add(_fm.group(1))
1353
+ for _func in _pf.functions:
1354
+ for _param in _func.parameters:
1355
+ if _param.default_value:
1356
+ for _m in _re.finditer(r"Depends\s*\(\s*(\w+)", _param.default_value):
1357
+ all_project_depends_names.add(_m.group(1))
1358
+ for _fw in self._detected_frameworks:
1359
+ _plugin = FrameworkPluginRegistry.get_plugin(_fw)
1360
+ if _plugin and _plugin.detect(_pf):
1361
+ for _scheme in _plugin.extract_auth_schemes(_pf):
1362
+ if _scheme.name:
1363
+ all_project_scheme_names.add(_scheme.name)
1364
+
1365
+ for parsed_file in self._parsed_files.values():
1366
+ if not parsed_file.success:
1367
+ continue
1368
+
1369
+ for fw in self._detected_frameworks:
1370
+ plugin = FrameworkPluginRegistry.get_plugin(fw)
1371
+ if plugin and plugin.detect(parsed_file):
1372
+ # Extract schemes and collect names for dependency correlation
1373
+ file_scheme_names: set[str] = set()
1374
+ for scheme in plugin.extract_auth_schemes(parsed_file):
1375
+ file_scheme_names.add(scheme.name)
1376
+ try:
1377
+ schemes.append(
1378
+ AuthSchemeModel(
1379
+ id=stable_id(
1380
+ "scheme",
1381
+ scheme.scheme_type.name,
1382
+ str(scheme.location.file),
1383
+ str(scheme.location.line),
1384
+ ),
1385
+ type=scheme.scheme_type.name,
1386
+ framework=fw.name.lower(),
1387
+ location=LocationModel(
1388
+ file=str(scheme.location.file),
1389
+ line=scheme.location.line,
1390
+ ),
1391
+ config=scheme.config,
1392
+ )
1393
+ )
1394
+ except ValidationError as exc:
1395
+ # Malformed scheme record — skip rather than abort the entire
1396
+ # analysis. This was previously a hard-crash path (see
1397
+ # dispatch-main repro) because a single framework plugin
1398
+ # producing a bad LocationModel killed the whole run.
1399
+ logger.warning(
1400
+ "Skipping malformed AuthSchemeModel for scheme=%r file=%s: %s",
1401
+ scheme.name,
1402
+ getattr(scheme.location, "file", "<unknown>"),
1403
+ exc,
1404
+ )
1405
+ continue
1406
+
1407
+ # Extract auth dependencies using cross-file pre-pass sets
1408
+ # so that deps defined in deps.py and used in routers/*.py
1409
+ # are correctly detected in both directions.
1410
+ for dep in plugin.extract_auth_dependencies(
1411
+ parsed_file,
1412
+ known_scheme_names=file_scheme_names | all_project_scheme_names,
1413
+ all_project_depends_names=all_project_depends_names,
1414
+ ):
1415
+ try:
1416
+ auth_deps.append(
1417
+ AuthDependencyModel(
1418
+ id=stable_id("dep", dep.name, dep.dependency_type.name),
1419
+ name=dep.name,
1420
+ type=dep.dependency_type.name,
1421
+ location=LocationModel(
1422
+ file=str(dep.location.file),
1423
+ line=dep.location.line,
1424
+ ),
1425
+ uses_schemes=list(dep.uses_schemes),
1426
+ extracts=dep.extracts_fields,
1427
+ validates=dep.validates,
1428
+ jwt_operations=dep.jwt_operations,
1429
+ )
1430
+ )
1431
+ except ValidationError as exc:
1432
+ logger.warning(
1433
+ "Skipping malformed AuthDependencyModel for dep=%r file=%s: %s",
1434
+ dep.name,
1435
+ getattr(dep.location, "file", "<unknown>"),
1436
+ exc,
1437
+ )
1438
+ continue
1439
+
1440
+ # Extract JWT config
1441
+ jwt = plugin.extract_jwt_config(parsed_file)
1442
+ if jwt and jwt.detected:
1443
+ jwt_config.detected = True
1444
+ jwt_config.library = jwt.library
1445
+ jwt_config.algorithms = jwt.algorithms
1446
+
1447
+ # Framework-independent JWT scan: detect JWT usage in files that
1448
+ # framework plugins don't cover (e.g. service modules without FastAPI imports).
1449
+ if not jwt_config.detected:
1450
+ jwt_libraries = {"jose", "python-jose", "pyjwt", "jwt", "authlib"}
1451
+ for parsed_file in self._parsed_files.values():
1452
+ if not parsed_file.success:
1453
+ continue
1454
+ for imp in parsed_file.imports:
1455
+ if imp.module in jwt_libraries or any(
1456
+ imp.module.startswith(f"{lib}.") for lib in jwt_libraries
1457
+ ):
1458
+ jwt_config.detected = True
1459
+ jwt_config.library = imp.module.split(".")[0]
1460
+ for call in parsed_file.call_sites:
1461
+ if (
1462
+ "decode" in call.callee_name.lower()
1463
+ or "encode" in call.callee_name.lower()
1464
+ ):
1465
+ jwt_config.locations.append(call.location)
1466
+ break
1467
+ if jwt_config.detected:
1468
+ break
1469
+
1470
+ # Synthetic fallback: if JWT usage is detected (via any library) but no
1471
+ # framework-specific security objects were found, create an inferred
1472
+ # JWT_BEARER scheme so downstream auth mapping has something to match.
1473
+ if jwt_config.detected and not schemes:
1474
+ # jwt_config.locations contains CodeLocation objects (file: Path);
1475
+ # AuthSchemeModel.location expects a LocationModel (file: str).
1476
+ # Always construct explicitly to avoid Pydantic validation failures.
1477
+ if jwt_config.locations:
1478
+ raw_loc = jwt_config.locations[0]
1479
+ loc = LocationModel(
1480
+ file=str(getattr(raw_loc, "file", "unknown")),
1481
+ line=getattr(raw_loc, "line", 0) or 0,
1482
+ )
1483
+ else:
1484
+ loc = LocationModel(file="unknown", line=0)
1485
+ try:
1486
+ schemes.append(
1487
+ AuthSchemeModel(
1488
+ id=stable_id("scheme", "JWT_BEARER", jwt_config.library or "jwt"),
1489
+ type="JWT_BEARER",
1490
+ framework="generic",
1491
+ location=loc,
1492
+ config={
1493
+ "library": jwt_config.library,
1494
+ "algorithms": jwt_config.algorithms,
1495
+ "synthetic": True,
1496
+ },
1497
+ )
1498
+ )
1499
+ except ValidationError as exc:
1500
+ logger.warning("Skipping synthetic JWT_BEARER AuthSchemeModel: %s", exc)
1501
+
1502
+ return AuthModel(
1503
+ schemes_detected=schemes,
1504
+ auth_dependencies=auth_deps,
1505
+ jwt_config=jwt_config,
1506
+ )
1507
+
1508
+ def _propagate_file_auth_to_routes(
1509
+ self,
1510
+ routes: list[RouteModel],
1511
+ auth_model: AuthModel,
1512
+ ) -> None:
1513
+ """
1514
+ Propagate file-level auth dependencies to routes that have no per-route
1515
+ auth signal (router_name is None).
1516
+
1517
+ Middleware-based auth frameworks (Express, NestJS without per-route guards)
1518
+ express auth as router.use(middleware) at the sub-router level.
1519
+ extract_auth_dependencies() correctly detects these as MIDDLEWARE-type deps,
1520
+ but the routes themselves carry no router_name because the middleware is not
1521
+ an inline argument to the route registration call.
1522
+
1523
+ This pass closes that gap: for each route without router_name, if its
1524
+ handler file has a MIDDLEWARE auth dependency, set router_name to that
1525
+ dependency's name so the benchmark's auth signal is populated.
1526
+
1527
+ Only MIDDLEWARE-type deps are used — CLASS/DECORATOR/ANNOTATION deps are
1528
+ already captured directly on routes during extraction.
1529
+ """
1530
+ # Build file → first MIDDLEWARE auth dep name
1531
+ file_to_auth_dep: dict[str, str] = {}
1532
+ for dep in auth_model.auth_dependencies:
1533
+ if dep.type != "MIDDLEWARE":
1534
+ continue
1535
+ if not dep.location or not dep.location.file:
1536
+ continue
1537
+ file_key = dep.location.file
1538
+ if file_key not in file_to_auth_dep:
1539
+ file_to_auth_dep[file_key] = dep.name
1540
+
1541
+ if not file_to_auth_dep:
1542
+ return
1543
+
1544
+ for route in routes:
1545
+ if route.router_name is not None:
1546
+ continue # already has a per-route auth signal
1547
+ if not route.handler_location or not route.handler_location.file:
1548
+ continue
1549
+ dep_name = file_to_auth_dep.get(route.handler_location.file)
1550
+ if dep_name:
1551
+ route.router_name = dep_name
1552
+
1553
+ def _apply_spring_filter_chain_policy(self, routes: list[RouteModel]) -> None:
1554
+ """Apply a Spring SecurityFilterChain policy to Spring/GraphQL routes.
1555
+
1556
+ Routes already carrying a router_name (per-method annotations) are left
1557
+ alone. Routes with no auth signal are marked router_name="SecurityFilterChain"
1558
+ if the global policy says they require authentication.
1559
+ """
1560
+ from ..frameworks.java.spring_plugin import FilterChainPolicy, SpringBootPlugin
1561
+
1562
+ plugin = FrameworkPluginRegistry.get_plugin(Framework.SPRING_BOOT)
1563
+ if not isinstance(plugin, SpringBootPlugin):
1564
+ return
1565
+
1566
+ policy: FilterChainPolicy | None = plugin.get_filter_chain_policy(
1567
+ list(self._parsed_files.values())
1568
+ )
1569
+ if policy is None or not policy.any_request_auth:
1570
+ return
1571
+
1572
+ for route in routes:
1573
+ if route.router_name is not None:
1574
+ continue
1575
+ if route.framework not in ("spring_boot", "graphql"):
1576
+ continue
1577
+ if policy.requires_auth(route.path):
1578
+ route.router_name = "SecurityFilterChain"
1579
+
1580
+ def _populate_route_auth_mapping(
1581
+ self,
1582
+ auth_model: AuthModel,
1583
+ routes: list[RouteModel],
1584
+ middleware_list: list[MiddlewareModel] | None = None,
1585
+ ) -> None:
1586
+ """Generate route_auth_mapping by correlating route dependencies with auth deps.
1587
+
1588
+ A route is considered authenticated if any of its ``dependencies``
1589
+ reference a known auth dependency (by name match). Auth dependencies
1590
+ are identified by their name appearing in ``auth_model.auth_dependencies``.
1591
+
1592
+ Heuristic fallback: if no formal auth dependencies were detected but
1593
+ routes have dependency names containing auth-related keywords
1594
+ (``auth``, ``user``, ``token``, ``permission``, ``login``, ``verify``),
1595
+ we still treat those as auth-related.
1596
+
1597
+ Middleware-based auth (e.g. ``app.add_middleware(AuthMiddleware, ...)``)
1598
+ is also propagated: if any globally-applied middleware declares
1599
+ ``operations=['auth']``, every route without route-level auth is marked
1600
+ authenticated with mechanism ``middleware:<name>`` so the engine's
1601
+ missing_auth rule will not fire for middleware-protected apps.
1602
+ """
1603
+ AUTH_KEYWORDS = {
1604
+ "auth",
1605
+ "user",
1606
+ "token",
1607
+ "permission",
1608
+ "login",
1609
+ "verify",
1610
+ "current_user",
1611
+ "principal",
1612
+ "credential",
1613
+ "identity",
1614
+ "session",
1615
+ "bearer",
1616
+ "api_key",
1617
+ "apikey",
1618
+ "security",
1619
+ "authorize",
1620
+ "authenticated",
1621
+ }
1622
+
1623
+ known_auth_names = {dep.name for dep in auth_model.auth_dependencies}
1624
+
1625
+ global_auth_middleware: list[MiddlewareModel] = []
1626
+ if middleware_list:
1627
+ for mw in middleware_list:
1628
+ if "auth" not in mw.operations:
1629
+ continue
1630
+ # Only applies_to=["all"] (set by explicit app.UseMiddleware<T>() registration)
1631
+ # indicates truly global scope. Empty applies_to means "not explicitly scoped",
1632
+ # which is how class-based definitions are recorded — they are NOT necessarily global.
1633
+ if mw.applies_to == ["all"]:
1634
+ global_auth_middleware.append(mw)
1635
+
1636
+ has_global_auth_middleware = bool(global_auth_middleware)
1637
+ middleware_mechanisms = [f"middleware:{mw.name}" for mw in global_auth_middleware]
1638
+
1639
+ mapping: list[RouteAuthModel] = []
1640
+ for route in routes:
1641
+ if not route.dependencies:
1642
+ # No route-level Depends(), but a global auth middleware may
1643
+ # still protect this endpoint.
1644
+ if has_global_auth_middleware:
1645
+ mapping.append(
1646
+ RouteAuthModel(
1647
+ route_id=route.id,
1648
+ auth_required=True,
1649
+ mechanisms=list(middleware_mechanisms),
1650
+ )
1651
+ )
1652
+ else:
1653
+ mapping.append(
1654
+ RouteAuthModel(
1655
+ route_id=route.id,
1656
+ auth_required=False,
1657
+ )
1658
+ )
1659
+ continue
1660
+
1661
+ matched_mechanisms: list[str] = []
1662
+ for dep_name in route.dependencies:
1663
+ dep_base = dep_name.rsplit(".", 1)[-1]
1664
+
1665
+ if dep_base in known_auth_names or dep_name in known_auth_names:
1666
+ matched_mechanisms.append(dep_name)
1667
+ continue
1668
+
1669
+ name_lower = dep_base.lower()
1670
+ if any(kw in name_lower for kw in AUTH_KEYWORDS):
1671
+ matched_mechanisms.append(dep_name)
1672
+
1673
+ # Even if route-level deps didn't match, middleware can still
1674
+ # provide auth.
1675
+ if not matched_mechanisms and has_global_auth_middleware:
1676
+ matched_mechanisms = list(middleware_mechanisms)
1677
+
1678
+ mapping.append(
1679
+ RouteAuthModel(
1680
+ route_id=route.id,
1681
+ auth_required=bool(matched_mechanisms),
1682
+ mechanisms=matched_mechanisms,
1683
+ )
1684
+ )
1685
+
1686
+ auth_model.route_auth_mapping = mapping
1687
+ auth_model.global_middleware_auth = has_global_auth_middleware
1688
+
1689
+ def _link_routes_to_auth_schemes(
1690
+ self,
1691
+ routes: list[RouteModel],
1692
+ auth_model: AuthModel,
1693
+ ) -> None:
1694
+ """Populate ``auth_config_ref`` on each route from the auth model.
1695
+
1696
+ Strategy:
1697
+ 1. Build a dep-name → scheme-id index from ``auth_dependencies.uses_schemes``.
1698
+ 2. For each authenticated route, look up its mechanisms (dep names) in
1699
+ that index and set ``auth_config_ref`` to the first matching scheme.
1700
+ 3. Fallback: if the index yields no match but auth *is* required and
1701
+ at least one scheme exists, assign the first scheme (common in
1702
+ single-scheme apps).
1703
+ """
1704
+ scheme_by_dep: dict[str, str] = {}
1705
+ for dep in auth_model.auth_dependencies:
1706
+ for scheme_id in dep.uses_schemes:
1707
+ scheme_by_dep[dep.name] = scheme_id
1708
+
1709
+ mapping_by_route: dict[str, RouteAuthModel] = {
1710
+ m.route_id: m for m in auth_model.route_auth_mapping
1711
+ }
1712
+ fallback_scheme = auth_model.schemes_detected[0].id if auth_model.schemes_detected else None
1713
+
1714
+ for route in routes:
1715
+ mapping = mapping_by_route.get(route.id)
1716
+ if not mapping or not mapping.auth_required:
1717
+ continue
1718
+ for mech in mapping.mechanisms:
1719
+ mech_base = mech.rsplit(".", 1)[-1]
1720
+ if mech_base in scheme_by_dep:
1721
+ route.auth_config_ref = scheme_by_dep[mech_base]
1722
+ break
1723
+ if mech in scheme_by_dep:
1724
+ route.auth_config_ref = scheme_by_dep[mech]
1725
+ break
1726
+ if not route.auth_config_ref and fallback_scheme:
1727
+ route.auth_config_ref = fallback_scheme
1728
+
1729
+ # =========================================================================
1730
+ # Middleware Extraction
1731
+ # =========================================================================
1732
+
1733
+ def _extract_middleware(self) -> list[MiddlewareModel]:
1734
+ """Extract middleware from all files."""
1735
+ middleware: list[MiddlewareModel] = []
1736
+
1737
+ for parsed_file in self._parsed_files.values():
1738
+ if not parsed_file.success:
1739
+ continue
1740
+
1741
+ for fw in self._detected_frameworks:
1742
+ plugin = FrameworkPluginRegistry.get_plugin(fw)
1743
+ if plugin and plugin.detect(parsed_file):
1744
+ for mw in plugin.extract_middleware(parsed_file):
1745
+ middleware.append(
1746
+ MiddlewareModel(
1747
+ id=stable_id("mw", mw.name, mw.middleware_type),
1748
+ name=mw.name,
1749
+ type=mw.middleware_type,
1750
+ location=LocationModel(
1751
+ file=str(mw.location.file),
1752
+ line=mw.location.line,
1753
+ ),
1754
+ order=mw.order,
1755
+ applies_to=mw.applies_to_patterns or ["all"]
1756
+ if mw.applies_to_all
1757
+ else [],
1758
+ operations=mw.operations,
1759
+ )
1760
+ )
1761
+
1762
+ return middleware
1763
+
1764
+ # =========================================================================
1765
+ # Call Site Extraction
1766
+ # =========================================================================
1767
+
1768
+ def _extract_all_calls(self) -> list[FunctionCallModel]:
1769
+ """Extract all function call sites from the call graph.
1770
+
1771
+ Assigns a ``sequence_index`` to each call so the reasoning engine
1772
+ can reason about intra-function ordering (e.g. mutation-before-authz).
1773
+ """
1774
+ if not self._call_graph:
1775
+ return []
1776
+
1777
+ raw_calls = list(self._call_graph.to_manifest_calls())
1778
+
1779
+ # Group by caller and sort by line number to assign sequence_index.
1780
+ from collections import defaultdict
1781
+
1782
+ by_caller: dict[str, list[dict]] = defaultdict(list)
1783
+ for cd in raw_calls:
1784
+ by_caller[cd["caller"]].append(cd)
1785
+ for group in by_caller.values():
1786
+ group.sort(key=lambda c: c["location"].get("line", 0))
1787
+ for idx, cd in enumerate(group):
1788
+ cd["_sequence_index"] = idx
1789
+
1790
+ calls: list[FunctionCallModel] = []
1791
+ for call_dict in raw_calls:
1792
+ try:
1793
+ ctx = call_dict.get("context", {})
1794
+ call = FunctionCallModel(
1795
+ id=call_dict["id"],
1796
+ caller=call_dict["caller"],
1797
+ callee=call_dict["callee"],
1798
+ callee_resolved=call_dict["callee_resolved"],
1799
+ location=LocationModel(
1800
+ file=call_dict["location"]["file"],
1801
+ line=call_dict["location"]["line"],
1802
+ column=call_dict["location"].get("column", 0),
1803
+ ),
1804
+ context=CallContextModel(
1805
+ in_try_block=ctx.get("in_try_block", False),
1806
+ in_conditional=ctx.get("in_conditional", False),
1807
+ in_loop=ctx.get("in_loop", False),
1808
+ ),
1809
+ possible_callees=call_dict.get("possible_callees", []),
1810
+ sequence_index=call_dict.get("_sequence_index"),
1811
+ )
1812
+ calls.append(call)
1813
+ except (KeyError, ValueError, TypeError) as e:
1814
+ logger.error("Failed to convert call: %s", e, exc_info=True)
1815
+ self._warnings.append(
1816
+ AnalysisWarning(
1817
+ phase="call_extraction",
1818
+ message=f"Failed to convert call: {e}",
1819
+ file=call_dict.get("location", {}).get("file"),
1820
+ )
1821
+ )
1822
+ continue
1823
+
1824
+ return calls
1825
+
1826
+ # =========================================================================
1827
+ # Data Flow Analysis
1828
+ # =========================================================================
1829
+
1830
+ def _analyze_data_flows(
1831
+ self, routes: list[RouteModel]
1832
+ ) -> tuple[list[DataFlowModel], dict[str, int]]:
1833
+ """
1834
+ Analyze data flows from entry points to sinks.
1835
+
1836
+ Uses inter-procedural taint tracking through the call graph.
1837
+ Returns (flows, stats) for manifest metadata.
1838
+ """
1839
+ flows: list[DataFlowModel] = []
1840
+ self._data_flow_stats = {
1841
+ "entry_points_analyzed": 0,
1842
+ "origins_identified": 0,
1843
+ "flows_discovered": 0,
1844
+ "truncated_flows": 0,
1845
+ }
1846
+
1847
+ if not self._call_graph:
1848
+ return flows, self._data_flow_stats
1849
+
1850
+ successful_files = [p for p in self._parsed_files.values() if p.success]
1851
+ if not successful_files:
1852
+ return flows, self._data_flow_stats
1853
+
1854
+ entry_points = [
1855
+ {
1856
+ "id": r.id,
1857
+ "path": r.path,
1858
+ "handler_qualified_name": r.handler_function,
1859
+ "framework": r.framework,
1860
+ "path_params": [{"name": p.name} for p in (r.path_params or [])],
1861
+ "query_params": [{"name": p.name} for p in (r.query_params or [])],
1862
+ "header_params": [{"name": p.name} for p in (r.header_params or [])],
1863
+ "cookie_params": [{"name": p.name} for p in (r.cookie_params or [])],
1864
+ "body": {
1865
+ "model_fields": [
1866
+ {"name": f.name} if hasattr(f, "name") else {"name": f}
1867
+ for f in (r.body.model_fields or [])
1868
+ ]
1869
+ }
1870
+ if r.body
1871
+ else None,
1872
+ }
1873
+ for r in routes
1874
+ ]
1875
+
1876
+ # Reuse shared flow-sensitive bindings (built once in _build_shared_flow_bindings)
1877
+ flow_bindings = self._flow_bindings
1878
+
1879
+ # Resolve handler names to call graph symbols (qualified name consistency)
1880
+ entry_points = self._resolve_entry_points(entry_points)
1881
+
1882
+ max_depth = self.config.analysis.data_flow.max_depth
1883
+
1884
+ try:
1885
+ df_analyzer = DataFlowAnalyzer(
1886
+ call_graph=self._call_graph,
1887
+ parsed_files=successful_files,
1888
+ entry_points=entry_points,
1889
+ flow_bindings=flow_bindings,
1890
+ max_depth=max_depth,
1891
+ )
1892
+ df_analyzer.analyze()
1893
+ self._data_flow_stats = df_analyzer.get_statistics()
1894
+
1895
+ # Convert to manifest format
1896
+ for flow_dict in df_analyzer.to_manifest_flows():
1897
+ try:
1898
+ flow_model = DataFlowModel(**flow_dict)
1899
+ flows.append(flow_model)
1900
+ except (KeyError, ValueError, TypeError) as e:
1901
+ logger.error("Failed to convert data flow to model: %s", e, exc_info=True)
1902
+ self._warnings.append(
1903
+ AnalysisWarning(
1904
+ phase="data_flow",
1905
+ message=f"Failed to convert data flow: {e}",
1906
+ )
1907
+ )
1908
+
1909
+ except Exception as e:
1910
+ logger.error("Data flow analysis failed: %s", e, exc_info=True)
1911
+ self._warnings.append(
1912
+ AnalysisWarning(
1913
+ phase="data_flow",
1914
+ message=f"Data flow analysis failed: {e}",
1915
+ )
1916
+ )
1917
+
1918
+ return flows, self._data_flow_stats
1919
+
1920
+ def _resolve_entry_points(self, entry_points: list[dict[str, Any]]) -> list[dict[str, Any]]:
1921
+ """
1922
+ Resolve handler qualified names to call graph symbols.
1923
+
1924
+ Handles mismatches between route extraction and call graph naming
1925
+ (e.g., module path variations).
1926
+ """
1927
+ if not self._call_graph:
1928
+ return entry_points
1929
+
1930
+ symbols = set(self._call_graph._symbols.keys())
1931
+ resolved = []
1932
+
1933
+ for ep in entry_points:
1934
+ handler = ep.get("handler_qualified_name")
1935
+ if not handler:
1936
+ resolved.append(ep)
1937
+ continue
1938
+
1939
+ if handler in symbols:
1940
+ resolved.append(ep)
1941
+ continue
1942
+
1943
+ # Try common variations
1944
+ candidates = [
1945
+ handler,
1946
+ handler.replace(".__init__", ""),
1947
+ ]
1948
+ # Try with/without package prefix (e.g., "app.main.get_user" -> "main.get_user")
1949
+ if "." in handler:
1950
+ parts = handler.split(".")
1951
+ for i in range(1, len(parts)):
1952
+ variant = ".".join(parts[i:])
1953
+ if variant not in candidates:
1954
+ candidates.append(variant)
1955
+
1956
+ for cand in candidates:
1957
+ if cand in symbols:
1958
+ ep = dict(ep)
1959
+ ep["handler_qualified_name"] = cand
1960
+ break
1961
+ resolved.append(ep)
1962
+
1963
+ return resolved
1964
+
1965
+ # =========================================================================
1966
+ # Integration Detection
1967
+ # =========================================================================
1968
+
1969
+ def _detect_integrations(self) -> list[IntegrationModel]:
1970
+ """Detect external integrations with usage tracking and SDK analysis."""
1971
+ from .integration_detector import IntegrationDetector
1972
+
1973
+ detector = IntegrationDetector(list(self._parsed_files.values()))
1974
+ return detector.to_manifest_models()
1975
+
1976
+ def _scan_literals(self) -> LiteralsModel:
1977
+ """Scan all string literals for URLs, SQL fragments, and connection strings."""
1978
+ from .literal_scanner import LiteralScanner
1979
+
1980
+ scanner = LiteralScanner(list(self._parsed_files.values()))
1981
+ return scanner.to_manifest_models()
1982
+
1983
+ def _classify_sensitivity(
1984
+ self,
1985
+ integrations: list[IntegrationModel],
1986
+ literals: LiteralsModel,
1987
+ ) -> None:
1988
+ """Classify sensitivity on integrations and SQL patterns (mutates in place)."""
1989
+ from .sensitivity_classifier import SensitivityClassifier
1990
+
1991
+ classifier = SensitivityClassifier()
1992
+ classifier.enrich_integrations(integrations)
1993
+ classifier.enrich_sql_patterns(literals.sql_patterns)
1994
+
1995
+ def _tag_capabilities(
1996
+ self,
1997
+ integrations: list[IntegrationModel],
1998
+ routes: list[RouteModel],
1999
+ configuration: ConfigurationModel,
2000
+ ) -> list[CapabilityModel]:
2001
+ """Infer business capabilities from integrations, routes, and config."""
2002
+ from .capability_tagger import CapabilityTagger
2003
+
2004
+ tagger = CapabilityTagger()
2005
+ return tagger.tag(integrations, routes, configuration)
2006
+
2007
+ # =========================================================================
2008
+ # Configuration Extraction
2009
+ # =========================================================================
2010
+
2011
+ def _extract_configuration(self) -> ConfigurationModel:
2012
+ """Extract configuration usage from code and .env files."""
2013
+ from .config_scanner import ConfigScanner
2014
+
2015
+ env_var_names: dict[str, list[LocationModel]] = {}
2016
+
2017
+ # Phase 1: env vars referenced in code (os.getenv, os.environ.get)
2018
+ for parsed_file in self._parsed_files.values():
2019
+ if not parsed_file.success:
2020
+ continue
2021
+ for call in parsed_file.call_sites:
2022
+ if call.callee_name in {"os.environ.get", "os.getenv", "environ.get"}:
2023
+ for arg in call.arguments:
2024
+ if arg.position == 0 and arg.literal_value:
2025
+ var_name = str(arg.literal_value)
2026
+ if var_name not in env_var_names:
2027
+ env_var_names[var_name] = []
2028
+ env_var_names[var_name].append(
2029
+ LocationModel(
2030
+ file=str(call.location.file),
2031
+ line=call.location.line,
2032
+ )
2033
+ )
2034
+
2035
+ env_vars: list[EnvVarUsageModel] = [
2036
+ EnvVarUsageModel(name=name, locations=locations)
2037
+ for name, locations in env_var_names.items()
2038
+ ]
2039
+
2040
+ # Phase 2: discover .env files and merge any new variable names
2041
+ scanner = ConfigScanner(self.config.project_root)
2042
+ env_file_vars = scanner.to_env_var_models()
2043
+ existing_names = {ev.name for ev in env_vars}
2044
+ for ev_model in env_file_vars:
2045
+ if ev_model.name not in existing_names:
2046
+ env_vars.append(ev_model)
2047
+
2048
+ # Phase 3: discover config files
2049
+ config_files = scanner.to_config_file_models()
2050
+
2051
+ return ConfigurationModel(env_vars_used=env_vars, config_files=config_files)
2052
+
2053
+ # =========================================================================
2054
+ # Function/Class Collection
2055
+ # =========================================================================
2056
+
2057
+ def _collect_all_functions(self) -> tuple[list[FunctionModel], dict[str, str]]:
2058
+ """Collect all functions (free + class methods) from parsed files.
2059
+
2060
+ Returns the function list and a qualified_name -> func_id index
2061
+ so that _collect_all_classes can resolve method IDs.
2062
+ """
2063
+ functions: list[FunctionModel] = []
2064
+ qn_to_id: dict[str, str] = {}
2065
+
2066
+ def _normalize_body(source: str | None) -> str:
2067
+ """Normalize function body for hashing: strip comments, blank lines, and collapse whitespace."""
2068
+ if not source:
2069
+ return ""
2070
+ lines = []
2071
+ for line in source.splitlines():
2072
+ stripped = line.strip()
2073
+ if not stripped or stripped.startswith("#"):
2074
+ continue
2075
+ if "#" in stripped:
2076
+ stripped = stripped[: stripped.index("#")].rstrip()
2077
+ if stripped:
2078
+ lines.append(stripped)
2079
+ return "\n".join(lines)
2080
+
2081
+ def _content_hash(func: ParsedFunction) -> str: # noqa: F821
2082
+ parts = []
2083
+ for p in func.parameters:
2084
+ parts.append(f"{p.name}:{p.type_annotation or ''}")
2085
+ parts.append(f"ret:{func.return_type or ''}")
2086
+ parts.append(f"ann:{','.join(d.name for d in func.decorators)}")
2087
+ body = _normalize_body(func.body_source)
2088
+ parts.append(f"body:{hashlib.sha256(body.encode()).hexdigest()[:16]}")
2089
+ raw = "|".join(parts)
2090
+ return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:16]
2091
+
2092
+ def _append(func: ParsedFunction) -> None: # noqa: F821
2093
+ func_id = stable_id("func", func.qualified_name.full)
2094
+ qn_to_id[func.qualified_name.full] = func_id
2095
+ functions.append(
2096
+ FunctionModel(
2097
+ id=func_id,
2098
+ qualified_name=func.qualified_name.full,
2099
+ name=func.name,
2100
+ location=LocationModel(
2101
+ file=str(func.location.file),
2102
+ line=func.location.line,
2103
+ column=func.location.column,
2104
+ end_line=func.location.end_line,
2105
+ ),
2106
+ is_async=func.is_async,
2107
+ binding=func.binding,
2108
+ visibility=self._infer_visibility(func.name),
2109
+ parameters=[
2110
+ ParameterModel(
2111
+ name=p.name,
2112
+ type_annotation=p.type_annotation,
2113
+ default_value=p.default_value,
2114
+ is_variadic=p.is_variadic,
2115
+ is_keyword_variadic=p.is_keyword_variadic,
2116
+ )
2117
+ for p in func.parameters
2118
+ ],
2119
+ return_type=func.return_type,
2120
+ annotations=[d.name for d in func.decorators],
2121
+ docstring=func.docstring,
2122
+ owner_type=func.owner_type,
2123
+ content_hash=_content_hash(func),
2124
+ )
2125
+ )
2126
+
2127
+ for parsed_file in self._parsed_files.values():
2128
+ if not parsed_file.success:
2129
+ continue
2130
+ for func in parsed_file.functions:
2131
+ _append(func)
2132
+ for cls in parsed_file.classes:
2133
+ for method in cls.methods:
2134
+ _append(method)
2135
+
2136
+ return functions, qn_to_id
2137
+
2138
+ @staticmethod
2139
+ def _infer_visibility(name: str) -> str | None:
2140
+ """Infer visibility from Python naming convention."""
2141
+ if name.startswith("__") and name.endswith("__"):
2142
+ return "public"
2143
+ if name.startswith("__") or name.startswith("_"):
2144
+ return "private"
2145
+ return "public"
2146
+
2147
+ def _collect_all_classes(self, func_id_index: dict[str, str]) -> list[ClassModel]:
2148
+ """Collect all classes from parsed files.
2149
+
2150
+ Args:
2151
+ func_id_index: qualified_name -> func_id mapping built by
2152
+ _collect_all_functions, used to resolve method IDs.
2153
+ """
2154
+ classes: list[ClassModel] = []
2155
+
2156
+ for parsed_file in self._parsed_files.values():
2157
+ if not parsed_file.success:
2158
+ continue
2159
+
2160
+ for cls in parsed_file.classes:
2161
+ method_ids = [func_id_index.get(m.qualified_name.full, m.name) for m in cls.methods]
2162
+ classes.append(
2163
+ ClassModel(
2164
+ id=stable_id("cls", cls.qualified_name.full),
2165
+ qualified_name=cls.qualified_name.full,
2166
+ name=cls.name,
2167
+ location=LocationModel(
2168
+ file=str(cls.location.file),
2169
+ line=cls.location.line,
2170
+ column=cls.location.column,
2171
+ end_line=cls.location.end_line,
2172
+ ),
2173
+ base_classes=cls.base_classes,
2174
+ annotations=[d.name for d in cls.decorators],
2175
+ methods=method_ids,
2176
+ docstring=cls.docstring,
2177
+ )
2178
+ )
2179
+
2180
+ return classes
2181
+
2182
+ # =========================================================================
2183
+ # Manifest Building
2184
+ # =========================================================================
2185
+
2186
+ def _build_analysis_metadata(self) -> AnalysisMetadataModel:
2187
+ """Build analysis metadata from real statistics."""
2188
+ truncated_flows = self._data_flow_stats.get("truncated_flows", 0)
2189
+ unresolved_calls = 0
2190
+
2191
+ if self._call_graph:
2192
+ cg_stats = self._call_graph.get_statistics()
2193
+ resolution = cg_stats.get("resolution_confidence", {})
2194
+ unresolved_calls = resolution.get("UNRESOLVED", 0)
2195
+
2196
+ return AnalysisMetadataModel(
2197
+ data_flow_mode=self.config.analysis.data_flow.mode,
2198
+ data_flow_depth=self.config.analysis.data_flow.max_depth,
2199
+ truncated_flows=truncated_flows,
2200
+ unresolved_calls=unresolved_calls,
2201
+ )
2202
+
2203
+ def _build_manifest(
2204
+ self,
2205
+ routes: list[RouteModel],
2206
+ functions: list[FunctionModel],
2207
+ classes: list[ClassModel],
2208
+ calls: list[FunctionCallModel],
2209
+ data_flows: list[DataFlowModel],
2210
+ auth: AuthModel,
2211
+ middleware: list[MiddlewareModel],
2212
+ dependencies: DependenciesModel, # noqa: F821
2213
+ integrations: list[IntegrationModel],
2214
+ capabilities: list[CapabilityModel],
2215
+ schemas: dict[str, SchemaModel],
2216
+ literals: LiteralsModel,
2217
+ configuration: ConfigurationModel,
2218
+ analysis_time_ms: int,
2219
+ ) -> Manifest:
2220
+ """Build the final manifest."""
2221
+ # Get project name
2222
+ project_name = self.config.project_root.name
2223
+ pyproject = self.config.project_root / "pyproject.toml"
2224
+ if pyproject.exists():
2225
+ try:
2226
+ import tomllib
2227
+
2228
+ data = tomllib.loads(pyproject.read_text())
2229
+ project_name = data.get("project", {}).get("name", project_name)
2230
+ except Exception as e:
2231
+ logger.debug("Could not read project name from pyproject.toml: %s", e)
2232
+
2233
+ # Determine languages
2234
+ languages = []
2235
+ if any(
2236
+ f.language == Language.PYTHON
2237
+ for f in self._discovery_result.files
2238
+ if self._discovery_result
2239
+ ):
2240
+ languages.append("python")
2241
+ if any(
2242
+ f.language == Language.JAVA
2243
+ for f in self._discovery_result.files
2244
+ if self._discovery_result
2245
+ ):
2246
+ languages.append("java")
2247
+ if any(
2248
+ f.language == Language.CSHARP
2249
+ for f in self._discovery_result.files
2250
+ if self._discovery_result
2251
+ ):
2252
+ languages.append("csharp")
2253
+ if any(
2254
+ f.language == Language.JAVASCRIPT
2255
+ for f in self._discovery_result.files
2256
+ if self._discovery_result
2257
+ ):
2258
+ languages.append("javascript")
2259
+
2260
+ # Determine frameworks
2261
+ frameworks = [fw.name.lower() for fw in self._detected_frameworks]
2262
+
2263
+ return Manifest(
2264
+ manifest_version=MANIFEST_VERSION,
2265
+ generated_at=datetime.utcnow(),
2266
+ probe_version=_get_probe_version(),
2267
+ project=ProjectMetadata(
2268
+ root=str(self.config.project_root),
2269
+ name=project_name,
2270
+ languages_detected=languages,
2271
+ frameworks_detected=frameworks,
2272
+ files_analyzed=len([p for p in self._parsed_files.values() if p.success]),
2273
+ files_skipped=len([p for p in self._parsed_files.values() if not p.success]),
2274
+ analysis_duration_ms=analysis_time_ms,
2275
+ ),
2276
+ entry_points=routes,
2277
+ functions=functions,
2278
+ classes=classes,
2279
+ calls=calls,
2280
+ data_flows=data_flows,
2281
+ auth=auth,
2282
+ middleware=middleware,
2283
+ dependencies=dependencies,
2284
+ integrations=integrations,
2285
+ capabilities=capabilities,
2286
+ schemas=schemas,
2287
+ literals=literals,
2288
+ configuration=configuration,
2289
+ analysis_metadata=self._build_analysis_metadata(),
2290
+ )
2291
+
2292
+
2293
+ # =============================================================================
2294
+ # Convenience Function
2295
+ # =============================================================================
2296
+
2297
+
2298
+ def analyze_project(config: CodeBoltConfig) -> AnalysisResult:
2299
+ """
2300
+ Analyze a project and generate manifest.
2301
+
2302
+ Args:
2303
+ config: Configuration for the analysis
2304
+
2305
+ Returns:
2306
+ AnalysisResult containing manifest and metadata
2307
+ """
2308
+ analyzer = ProjectAnalyzer(config)
2309
+ return analyzer.analyze()