apisec-code-bolt 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apisec_code_bolt/__init__.py +42 -0
- apisec_code_bolt/__main__.py +11 -0
- apisec_code_bolt/analysis/__init__.py +96 -0
- apisec_code_bolt/analysis/analyzer.py +2309 -0
- apisec_code_bolt/analysis/binding_tracker.py +341 -0
- apisec_code_bolt/analysis/call_graph.py +1197 -0
- apisec_code_bolt/analysis/call_graph_types.py +332 -0
- apisec_code_bolt/analysis/call_resolver.py +988 -0
- apisec_code_bolt/analysis/capability_tagger.py +322 -0
- apisec_code_bolt/analysis/config_scanner.py +197 -0
- apisec_code_bolt/analysis/data_flow.py +1883 -0
- apisec_code_bolt/analysis/dependency_extractor.py +959 -0
- apisec_code_bolt/analysis/flow_analysis.py +1406 -0
- apisec_code_bolt/analysis/hof_catalog.py +61 -0
- apisec_code_bolt/analysis/integration_detector.py +1399 -0
- apisec_code_bolt/analysis/literal_scanner.py +300 -0
- apisec_code_bolt/analysis/path_normalizer.py +55 -0
- apisec_code_bolt/analysis/read_site_detector.py +310 -0
- apisec_code_bolt/analysis/request_patterns.py +162 -0
- apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
- apisec_code_bolt/analysis/sink_evidence.py +333 -0
- apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
- apisec_code_bolt/cli/__init__.py +5 -0
- apisec_code_bolt/cli/exit_codes.py +17 -0
- apisec_code_bolt/cli/main.py +1069 -0
- apisec_code_bolt/cloud/__init__.py +1 -0
- apisec_code_bolt/cloud/apisec_client.py +118 -0
- apisec_code_bolt/cloud/client.py +255 -0
- apisec_code_bolt/core/__init__.py +75 -0
- apisec_code_bolt/core/config.py +528 -0
- apisec_code_bolt/core/credentials.py +65 -0
- apisec_code_bolt/core/discovery.py +433 -0
- apisec_code_bolt/core/log_format.py +115 -0
- apisec_code_bolt/core/manifest.py +1009 -0
- apisec_code_bolt/core/repo.py +280 -0
- apisec_code_bolt/core/state.py +59 -0
- apisec_code_bolt/core/telemetry.py +451 -0
- apisec_code_bolt/core/types.py +587 -0
- apisec_code_bolt/fingerprinting/__init__.py +1 -0
- apisec_code_bolt/frameworks/__init__.py +29 -0
- apisec_code_bolt/frameworks/_jwt_common.py +50 -0
- apisec_code_bolt/frameworks/auth_helpers.py +437 -0
- apisec_code_bolt/frameworks/base.py +608 -0
- apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
- apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
- apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
- apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
- apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
- apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
- apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
- apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
- apisec_code_bolt/frameworks/java/__init__.py +6 -0
- apisec_code_bolt/frameworks/java/_annotations.py +167 -0
- apisec_code_bolt/frameworks/java/_constraints.py +128 -0
- apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
- apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
- apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
- apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
- apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
- apisec_code_bolt/frameworks/js/__init__.py +8 -0
- apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
- apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
- apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
- apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
- apisec_code_bolt/frameworks/python/__init__.py +19 -0
- apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
- apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
- apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
- apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
- apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
- apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
- apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
- apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
- apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
- apisec_code_bolt/parsing/__init__.py +62 -0
- apisec_code_bolt/parsing/base.py +554 -0
- apisec_code_bolt/parsing/csharp/__init__.py +5 -0
- apisec_code_bolt/parsing/csharp/language_services.py +203 -0
- apisec_code_bolt/parsing/csharp/literals.py +72 -0
- apisec_code_bolt/parsing/csharp/parser.py +1158 -0
- apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
- apisec_code_bolt/parsing/js/__init__.py +5 -0
- apisec_code_bolt/parsing/js/language_services.py +118 -0
- apisec_code_bolt/parsing/js/parser.py +622 -0
- apisec_code_bolt/parsing/jvm/__init__.py +7 -0
- apisec_code_bolt/parsing/jvm/language_services.py +270 -0
- apisec_code_bolt/parsing/jvm/parser.py +774 -0
- apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
- apisec_code_bolt/parsing/python/__init__.py +150 -0
- apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
- apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
- apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
- apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
- apisec_code_bolt/parsing/python/expression_utils.py +221 -0
- apisec_code_bolt/parsing/python/extraction_types.py +271 -0
- apisec_code_bolt/parsing/python/language_services.py +487 -0
- apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
- apisec_code_bolt/parsing/python/parser.py +719 -0
- apisec_code_bolt/parsing/python/path_resolver.py +576 -0
- apisec_code_bolt/parsing/python/router_registry.py +806 -0
- apisec_code_bolt/parsing/python/type_resolver.py +730 -0
- apisec_code_bolt/parsing/python/visitors.py +1544 -0
- apisec_code_bolt/parsing/services.py +544 -0
- apisec_code_bolt/query/__init__.py +1 -0
- apisec_code_bolt/query/ast_cache.py +182 -0
- apisec_code_bolt/query/executor.py +283 -0
- apisec_code_bolt/query/handlers.py +832 -0
- apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
- apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
- apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
- apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,433 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File discovery module for traversing project directories.
|
|
3
|
+
|
|
4
|
+
This module handles:
|
|
5
|
+
- Recursive directory traversal
|
|
6
|
+
- .gitignore pattern matching
|
|
7
|
+
- File filtering by extension, size, and exclusion patterns
|
|
8
|
+
- Symlink handling
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import os
|
|
14
|
+
from collections.abc import Iterator
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import TYPE_CHECKING
|
|
18
|
+
|
|
19
|
+
import pathspec
|
|
20
|
+
|
|
21
|
+
from .config import FileDiscoveryConfig
|
|
22
|
+
from .types import AnalysisNote, Language
|
|
23
|
+
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# =============================================================================
|
|
29
|
+
# File Discovery Result
|
|
30
|
+
# =============================================================================
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class DiscoveredFile:
|
|
35
|
+
"""A file discovered for analysis."""
|
|
36
|
+
|
|
37
|
+
path: Path
|
|
38
|
+
relative_path: Path
|
|
39
|
+
language: Language | None
|
|
40
|
+
size_bytes: int
|
|
41
|
+
|
|
42
|
+
# Why was this file selected/skipped?
|
|
43
|
+
skip_reason: str | None = None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class DiscoveryResult:
|
|
48
|
+
"""Result of file discovery."""
|
|
49
|
+
|
|
50
|
+
root: Path
|
|
51
|
+
files: list[DiscoveredFile] = field(default_factory=list)
|
|
52
|
+
skipped_files: list[DiscoveredFile] = field(default_factory=list)
|
|
53
|
+
|
|
54
|
+
# Statistics
|
|
55
|
+
total_scanned: int = 0
|
|
56
|
+
total_selected: int = 0
|
|
57
|
+
total_skipped: int = 0
|
|
58
|
+
total_size_bytes: int = 0
|
|
59
|
+
|
|
60
|
+
# Warnings
|
|
61
|
+
notes: list[AnalysisNote] = field(default_factory=list)
|
|
62
|
+
|
|
63
|
+
def files_by_language(self, language: Language) -> list[DiscoveredFile]:
|
|
64
|
+
"""Get files for a specific language."""
|
|
65
|
+
return [f for f in self.files if f.language == language]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# =============================================================================
|
|
69
|
+
# Language Detection
|
|
70
|
+
# =============================================================================
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# Extension to language mapping
|
|
74
|
+
EXTENSION_TO_LANGUAGE: dict[str, Language] = {
|
|
75
|
+
# Python
|
|
76
|
+
".py": Language.PYTHON,
|
|
77
|
+
".pyw": Language.PYTHON,
|
|
78
|
+
".pyi": Language.PYTHON, # Type stubs
|
|
79
|
+
# Java
|
|
80
|
+
".java": Language.JAVA,
|
|
81
|
+
# C# / .NET
|
|
82
|
+
".cs": Language.CSHARP,
|
|
83
|
+
# JavaScript / TypeScript
|
|
84
|
+
".js": Language.JAVASCRIPT,
|
|
85
|
+
".mjs": Language.JAVASCRIPT,
|
|
86
|
+
".cjs": Language.JAVASCRIPT,
|
|
87
|
+
".ts": Language.JAVASCRIPT,
|
|
88
|
+
".tsx": Language.JAVASCRIPT,
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def detect_language(file_path: Path) -> Language | None:
|
|
93
|
+
"""Detect programming language from file extension."""
|
|
94
|
+
suffix = file_path.suffix.lower()
|
|
95
|
+
return EXTENSION_TO_LANGUAGE.get(suffix)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# =============================================================================
|
|
99
|
+
# Gitignore Handling
|
|
100
|
+
# =============================================================================
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class GitignoreManager:
|
|
104
|
+
"""
|
|
105
|
+
Manages .gitignore patterns for a project.
|
|
106
|
+
|
|
107
|
+
Handles:
|
|
108
|
+
- Loading .gitignore from project root
|
|
109
|
+
- Loading nested .gitignore files in subdirectories
|
|
110
|
+
- Combining patterns for accurate matching
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
def __init__(self, root: Path) -> None:
|
|
114
|
+
self.root = root
|
|
115
|
+
self._specs: dict[Path, pathspec.PathSpec] = {}
|
|
116
|
+
self._load_gitignore(root)
|
|
117
|
+
|
|
118
|
+
def _load_gitignore(self, directory: Path) -> None:
|
|
119
|
+
"""Load .gitignore from a directory if it exists."""
|
|
120
|
+
gitignore_path = directory / ".gitignore"
|
|
121
|
+
if gitignore_path.exists() and gitignore_path.is_file():
|
|
122
|
+
try:
|
|
123
|
+
with open(gitignore_path, encoding="utf-8", errors="ignore") as f:
|
|
124
|
+
patterns = f.read().splitlines()
|
|
125
|
+
# Filter empty lines and comments
|
|
126
|
+
patterns = [
|
|
127
|
+
p.strip() for p in patterns if p.strip() and not p.strip().startswith("#")
|
|
128
|
+
]
|
|
129
|
+
if patterns:
|
|
130
|
+
self._specs[directory] = pathspec.PathSpec.from_lines("gitwildmatch", patterns)
|
|
131
|
+
except Exception:
|
|
132
|
+
# Silently ignore gitignore parse errors
|
|
133
|
+
pass
|
|
134
|
+
|
|
135
|
+
def is_ignored(self, file_path: Path) -> bool:
|
|
136
|
+
"""
|
|
137
|
+
Check if a file path should be ignored.
|
|
138
|
+
|
|
139
|
+
Checks against all applicable .gitignore files from root to file's directory.
|
|
140
|
+
"""
|
|
141
|
+
try:
|
|
142
|
+
file_path.relative_to(self.root)
|
|
143
|
+
except ValueError:
|
|
144
|
+
return False
|
|
145
|
+
|
|
146
|
+
# Check each gitignore spec
|
|
147
|
+
for spec_dir, spec in self._specs.items():
|
|
148
|
+
try:
|
|
149
|
+
# Get path relative to the gitignore's directory
|
|
150
|
+
rel_to_spec = file_path.relative_to(spec_dir)
|
|
151
|
+
if spec.match_file(str(rel_to_spec)):
|
|
152
|
+
return True
|
|
153
|
+
except ValueError:
|
|
154
|
+
continue
|
|
155
|
+
|
|
156
|
+
return False
|
|
157
|
+
|
|
158
|
+
def load_nested_gitignore(self, directory: Path) -> None:
|
|
159
|
+
"""Load .gitignore from a nested directory."""
|
|
160
|
+
if directory not in self._specs:
|
|
161
|
+
self._load_gitignore(directory)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
# =============================================================================
|
|
165
|
+
# File Discovery
|
|
166
|
+
# =============================================================================
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
class FileDiscoverer:
|
|
170
|
+
"""
|
|
171
|
+
Discovers files for analysis in a project directory.
|
|
172
|
+
|
|
173
|
+
Handles:
|
|
174
|
+
- Recursive traversal
|
|
175
|
+
- .gitignore patterns
|
|
176
|
+
- File filtering (extension, size, exclusions)
|
|
177
|
+
- Symlink handling
|
|
178
|
+
"""
|
|
179
|
+
|
|
180
|
+
def __init__(
|
|
181
|
+
self,
|
|
182
|
+
root: Path,
|
|
183
|
+
config: FileDiscoveryConfig | None = None,
|
|
184
|
+
) -> None:
|
|
185
|
+
self.root = root.resolve()
|
|
186
|
+
self.config = config or FileDiscoveryConfig()
|
|
187
|
+
|
|
188
|
+
# Initialize gitignore manager
|
|
189
|
+
self._gitignore: GitignoreManager | None = None
|
|
190
|
+
if self.config.respect_gitignore:
|
|
191
|
+
self._gitignore = GitignoreManager(self.root)
|
|
192
|
+
|
|
193
|
+
# Build exclusion spec from config patterns
|
|
194
|
+
self._exclusion_spec = self._build_exclusion_spec()
|
|
195
|
+
|
|
196
|
+
# Max file size in bytes
|
|
197
|
+
self._max_size_bytes = int(self.config.max_file_size_mb * 1024 * 1024)
|
|
198
|
+
|
|
199
|
+
def _build_exclusion_spec(self) -> pathspec.PathSpec:
|
|
200
|
+
"""Build pathspec from exclusion patterns."""
|
|
201
|
+
patterns = list(self.config.exclude_patterns)
|
|
202
|
+
return pathspec.PathSpec.from_lines("gitwildmatch", patterns)
|
|
203
|
+
|
|
204
|
+
def _should_skip_directory(self, dir_path: Path) -> tuple[bool, str | None]:
|
|
205
|
+
"""
|
|
206
|
+
Check if a directory should be skipped entirely.
|
|
207
|
+
|
|
208
|
+
Returns (should_skip, reason).
|
|
209
|
+
"""
|
|
210
|
+
dir_name = dir_path.name
|
|
211
|
+
|
|
212
|
+
# Always-excluded directories (hardcoded for safety)
|
|
213
|
+
if dir_name in self.config.ALWAYS_EXCLUDE:
|
|
214
|
+
return True, f"always_excluded:{dir_name}"
|
|
215
|
+
|
|
216
|
+
# Skip hidden directories (starting with .)
|
|
217
|
+
if dir_name.startswith(".") and dir_name not in {"."}:
|
|
218
|
+
return True, "hidden_directory"
|
|
219
|
+
|
|
220
|
+
# Check gitignore
|
|
221
|
+
if self._gitignore and self._gitignore.is_ignored(dir_path):
|
|
222
|
+
return True, "gitignore"
|
|
223
|
+
|
|
224
|
+
return False, None
|
|
225
|
+
|
|
226
|
+
def _should_skip_file(self, file_path: Path) -> tuple[bool, str | None]:
|
|
227
|
+
"""
|
|
228
|
+
Check if a file should be skipped.
|
|
229
|
+
|
|
230
|
+
Returns (should_skip, reason).
|
|
231
|
+
"""
|
|
232
|
+
file_name = file_path.name
|
|
233
|
+
|
|
234
|
+
# Skip hidden files
|
|
235
|
+
if file_name.startswith("."):
|
|
236
|
+
return True, "hidden_file"
|
|
237
|
+
|
|
238
|
+
# Check file extension - only analyze known languages
|
|
239
|
+
language = detect_language(file_path)
|
|
240
|
+
if language is None:
|
|
241
|
+
return True, "unsupported_extension"
|
|
242
|
+
|
|
243
|
+
# Check gitignore
|
|
244
|
+
if self._gitignore and self._gitignore.is_ignored(file_path):
|
|
245
|
+
return True, "gitignore"
|
|
246
|
+
|
|
247
|
+
# Check exclusion patterns
|
|
248
|
+
try:
|
|
249
|
+
relative = file_path.relative_to(self.root)
|
|
250
|
+
if self._exclusion_spec.match_file(str(relative)):
|
|
251
|
+
return True, "exclusion_pattern"
|
|
252
|
+
except ValueError:
|
|
253
|
+
pass
|
|
254
|
+
|
|
255
|
+
# Check test files if not including tests
|
|
256
|
+
if not self.config.include_tests:
|
|
257
|
+
relative_str = str(file_path.relative_to(self.root))
|
|
258
|
+
if self._is_test_file(file_path, relative_str):
|
|
259
|
+
return True, "test_file"
|
|
260
|
+
|
|
261
|
+
# Check file size
|
|
262
|
+
try:
|
|
263
|
+
size = file_path.stat().st_size
|
|
264
|
+
if size > self._max_size_bytes:
|
|
265
|
+
return True, f"file_too_large:{size}"
|
|
266
|
+
except OSError:
|
|
267
|
+
return True, "stat_error"
|
|
268
|
+
|
|
269
|
+
return False, None
|
|
270
|
+
|
|
271
|
+
def _is_test_file(self, file_path: Path, relative_str: str) -> bool:
|
|
272
|
+
"""Check if a file is a test file."""
|
|
273
|
+
name = file_path.stem.lower()
|
|
274
|
+
|
|
275
|
+
# Common test file patterns
|
|
276
|
+
if name.startswith("test_") or name.endswith("_test"):
|
|
277
|
+
return True
|
|
278
|
+
if name in {"conftest", "test", "tests"}:
|
|
279
|
+
return True
|
|
280
|
+
|
|
281
|
+
# Test directories
|
|
282
|
+
parts = relative_str.lower().split(os.sep)
|
|
283
|
+
return bool(any(p in {"test", "tests", "testing", "__tests__"} for p in parts))
|
|
284
|
+
|
|
285
|
+
def discover(self) -> DiscoveryResult:
|
|
286
|
+
"""
|
|
287
|
+
Discover all files for analysis.
|
|
288
|
+
|
|
289
|
+
Returns DiscoveryResult with selected and skipped files.
|
|
290
|
+
"""
|
|
291
|
+
result = DiscoveryResult(root=self.root)
|
|
292
|
+
|
|
293
|
+
for file_info in self._walk_directory(self.root):
|
|
294
|
+
result.total_scanned += 1
|
|
295
|
+
|
|
296
|
+
if file_info.skip_reason:
|
|
297
|
+
result.skipped_files.append(file_info)
|
|
298
|
+
result.total_skipped += 1
|
|
299
|
+
else:
|
|
300
|
+
# Check max files limit
|
|
301
|
+
if result.total_selected >= self.config.max_files:
|
|
302
|
+
file_info.skip_reason = "max_files_reached"
|
|
303
|
+
result.skipped_files.append(file_info)
|
|
304
|
+
result.total_skipped += 1
|
|
305
|
+
result.notes.append(
|
|
306
|
+
AnalysisNote(
|
|
307
|
+
level="warning",
|
|
308
|
+
message=f"Max files limit ({self.config.max_files}) reached, skipping remaining files",
|
|
309
|
+
)
|
|
310
|
+
)
|
|
311
|
+
break
|
|
312
|
+
|
|
313
|
+
result.files.append(file_info)
|
|
314
|
+
result.total_selected += 1
|
|
315
|
+
result.total_size_bytes += file_info.size_bytes
|
|
316
|
+
|
|
317
|
+
return result
|
|
318
|
+
|
|
319
|
+
def _walk_directory(self, directory: Path) -> Iterator[DiscoveredFile]:
|
|
320
|
+
"""
|
|
321
|
+
Walk a directory tree, yielding DiscoveredFile for each file.
|
|
322
|
+
|
|
323
|
+
Handles symlinks according to configuration.
|
|
324
|
+
"""
|
|
325
|
+
try:
|
|
326
|
+
entries = list(directory.iterdir())
|
|
327
|
+
except PermissionError:
|
|
328
|
+
return
|
|
329
|
+
except OSError:
|
|
330
|
+
return
|
|
331
|
+
|
|
332
|
+
# Sort for deterministic ordering
|
|
333
|
+
entries.sort(key=lambda p: p.name.lower())
|
|
334
|
+
|
|
335
|
+
# Process files first, then directories
|
|
336
|
+
files = []
|
|
337
|
+
dirs = []
|
|
338
|
+
|
|
339
|
+
for entry in entries:
|
|
340
|
+
try:
|
|
341
|
+
if entry.is_symlink():
|
|
342
|
+
if not self.config.follow_symlinks:
|
|
343
|
+
continue
|
|
344
|
+
# Resolve symlink
|
|
345
|
+
entry = entry.resolve()
|
|
346
|
+
|
|
347
|
+
if entry.is_file():
|
|
348
|
+
files.append(entry)
|
|
349
|
+
elif entry.is_dir():
|
|
350
|
+
dirs.append(entry)
|
|
351
|
+
except OSError:
|
|
352
|
+
continue
|
|
353
|
+
|
|
354
|
+
# Yield files
|
|
355
|
+
for file_path in files:
|
|
356
|
+
skip, reason = self._should_skip_file(file_path)
|
|
357
|
+
|
|
358
|
+
try:
|
|
359
|
+
size = file_path.stat().st_size
|
|
360
|
+
relative = file_path.relative_to(self.root)
|
|
361
|
+
except (OSError, ValueError):
|
|
362
|
+
continue
|
|
363
|
+
|
|
364
|
+
language = detect_language(file_path) if not skip else None
|
|
365
|
+
|
|
366
|
+
yield DiscoveredFile(
|
|
367
|
+
path=file_path,
|
|
368
|
+
relative_path=relative,
|
|
369
|
+
language=language,
|
|
370
|
+
size_bytes=size,
|
|
371
|
+
skip_reason=reason,
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
# Recurse into directories
|
|
375
|
+
for dir_path in dirs:
|
|
376
|
+
skip, reason = self._should_skip_directory(dir_path)
|
|
377
|
+
if skip:
|
|
378
|
+
continue
|
|
379
|
+
|
|
380
|
+
# Load nested gitignore
|
|
381
|
+
if self._gitignore:
|
|
382
|
+
self._gitignore.load_nested_gitignore(dir_path)
|
|
383
|
+
|
|
384
|
+
yield from self._walk_directory(dir_path)
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
# =============================================================================
|
|
388
|
+
# Convenience Functions
|
|
389
|
+
# =============================================================================
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
def discover_files(
|
|
393
|
+
root: Path,
|
|
394
|
+
config: FileDiscoveryConfig | None = None,
|
|
395
|
+
) -> DiscoveryResult:
|
|
396
|
+
"""
|
|
397
|
+
Discover files for analysis in a project directory.
|
|
398
|
+
|
|
399
|
+
Args:
|
|
400
|
+
root: Project root directory
|
|
401
|
+
config: File discovery configuration
|
|
402
|
+
|
|
403
|
+
Returns:
|
|
404
|
+
DiscoveryResult with files to analyze
|
|
405
|
+
"""
|
|
406
|
+
discoverer = FileDiscoverer(root, config)
|
|
407
|
+
return discoverer.discover()
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def discover_python_files(
|
|
411
|
+
root: Path,
|
|
412
|
+
config: FileDiscoveryConfig | None = None,
|
|
413
|
+
) -> list[Path]:
|
|
414
|
+
"""
|
|
415
|
+
Discover Python files in a project directory.
|
|
416
|
+
|
|
417
|
+
Convenience function that returns just the paths.
|
|
418
|
+
"""
|
|
419
|
+
result = discover_files(root, config)
|
|
420
|
+
return [f.path for f in result.files if f.language == Language.PYTHON]
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
def discover_java_files(
|
|
424
|
+
root: Path,
|
|
425
|
+
config: FileDiscoveryConfig | None = None,
|
|
426
|
+
) -> list[Path]:
|
|
427
|
+
"""
|
|
428
|
+
Discover Java files in a project directory.
|
|
429
|
+
|
|
430
|
+
Convenience function that returns just the paths.
|
|
431
|
+
"""
|
|
432
|
+
result = discover_files(root, config)
|
|
433
|
+
return [f.path for f in result.files if f.language == Language.JAVA]
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Structured JSON logging for the CLI.
|
|
3
|
+
|
|
4
|
+
When --log-format json is passed, all diagnostic output is emitted as
|
|
5
|
+
newline-delimited JSON (NDJSON) to stderr instead of Rich-formatted text.
|
|
6
|
+
Each line is a self-contained JSON object with a "level" and "event" key.
|
|
7
|
+
|
|
8
|
+
The analyzer_summary() function emits the machine-readable scan result that
|
|
9
|
+
Mohsin's deployment pipeline consumes to detect regressions and surface errors.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import sys
|
|
16
|
+
from datetime import UTC, datetime
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _now() -> str:
|
|
22
|
+
return datetime.now(UTC).isoformat(timespec="seconds")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _emit(obj: dict[str, Any]) -> None:
|
|
26
|
+
"""Write one NDJSON record to stderr."""
|
|
27
|
+
sys.stderr.write(json.dumps(obj, default=str) + "\n")
|
|
28
|
+
sys.stderr.flush()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def log_info(event: str, **kwargs: Any) -> None:
|
|
32
|
+
_emit({"ts": _now(), "level": "info", "event": event, **kwargs})
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def log_warning(event: str, **kwargs: Any) -> None:
|
|
36
|
+
_emit({"ts": _now(), "level": "warning", "event": event, **kwargs})
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def log_error(event: str, **kwargs: Any) -> None:
|
|
40
|
+
_emit({"ts": _now(), "level": "error", "event": event, **kwargs})
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def analyzer_summary(
|
|
44
|
+
project_root: Path,
|
|
45
|
+
result: Any, # AnalysisResult — avoid circular import
|
|
46
|
+
probe_version: str,
|
|
47
|
+
) -> dict[str, Any]:
|
|
48
|
+
"""
|
|
49
|
+
Build the structured summary record emitted after a successful scan.
|
|
50
|
+
|
|
51
|
+
Shape (all fields always present, never null):
|
|
52
|
+
{
|
|
53
|
+
"ts": "2026-06-12T15:00:00+00:00",
|
|
54
|
+
"level": "info",
|
|
55
|
+
"event": "scan_complete",
|
|
56
|
+
"version": "0.1.0",
|
|
57
|
+
"project": "/path/to/project",
|
|
58
|
+
"routes_found": 42,
|
|
59
|
+
"files_analyzed": 120,
|
|
60
|
+
"files_failed": 2,
|
|
61
|
+
"frameworks": ["django", "celery"],
|
|
62
|
+
"languages": {"python": 118, "javascript": 2},
|
|
63
|
+
"parse_errors": [
|
|
64
|
+
{"file": "src/foo.py", "line": 12, "error": "SyntaxError: invalid syntax"}
|
|
65
|
+
],
|
|
66
|
+
"stage_times_ms": {
|
|
67
|
+
"discover": 12, "parse": 340, "resolve": 5, "detect_frameworks": 3,
|
|
68
|
+
"flow_bindings": 8, "call_graph": 22, "extract": 45,
|
|
69
|
+
"analyze_flows": 180, "enrich": 14
|
|
70
|
+
},
|
|
71
|
+
"extractor_times_ms": {
|
|
72
|
+
"spring_boot": 38, "django": 7
|
|
73
|
+
},
|
|
74
|
+
"total_time_ms": 629
|
|
75
|
+
}
|
|
76
|
+
"""
|
|
77
|
+
frameworks = [
|
|
78
|
+
f.name.lower() if hasattr(f, "name") else str(f) for f in result.frameworks_detected
|
|
79
|
+
]
|
|
80
|
+
|
|
81
|
+
# Prefer structured error details (file + line + error) when available;
|
|
82
|
+
# fall back to splitting the legacy "path: message" strings.
|
|
83
|
+
if hasattr(result, "parse_error_details") and result.parse_error_details:
|
|
84
|
+
structured_errors = result.parse_error_details
|
|
85
|
+
else:
|
|
86
|
+
structured_errors = []
|
|
87
|
+
for err_str in result.parse_errors:
|
|
88
|
+
if ": " in err_str:
|
|
89
|
+
file_part, _, msg_part = err_str.partition(": ")
|
|
90
|
+
structured_errors.append({"file": str(file_part), "error": msg_part})
|
|
91
|
+
else:
|
|
92
|
+
structured_errors.append({"file": "", "error": err_str})
|
|
93
|
+
|
|
94
|
+
extractor_times = getattr(result, "extractor_times_ms", {})
|
|
95
|
+
routes_by_framework = getattr(result, "routes_by_framework", {})
|
|
96
|
+
files_skipped = getattr(result, "files_skipped", 0)
|
|
97
|
+
|
|
98
|
+
return {
|
|
99
|
+
"ts": _now(),
|
|
100
|
+
"level": "info",
|
|
101
|
+
"event": "scan_complete",
|
|
102
|
+
"version": probe_version,
|
|
103
|
+
"project": str(project_root),
|
|
104
|
+
"routes_found": len(result.manifest.entry_points),
|
|
105
|
+
"routes_by_framework": routes_by_framework,
|
|
106
|
+
"files_analyzed": result.files_analyzed,
|
|
107
|
+
"files_failed": result.files_failed,
|
|
108
|
+
"files_skipped": files_skipped,
|
|
109
|
+
"frameworks": frameworks,
|
|
110
|
+
"languages": result.files_by_language,
|
|
111
|
+
"parse_errors": structured_errors,
|
|
112
|
+
"stage_times_ms": result.stage_times_ms,
|
|
113
|
+
"extractor_times_ms": extractor_times,
|
|
114
|
+
"total_time_ms": result.total_time_ms,
|
|
115
|
+
}
|