apisec-code-bolt 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apisec_code_bolt/__init__.py +42 -0
- apisec_code_bolt/__main__.py +11 -0
- apisec_code_bolt/analysis/__init__.py +96 -0
- apisec_code_bolt/analysis/analyzer.py +2309 -0
- apisec_code_bolt/analysis/binding_tracker.py +341 -0
- apisec_code_bolt/analysis/call_graph.py +1197 -0
- apisec_code_bolt/analysis/call_graph_types.py +332 -0
- apisec_code_bolt/analysis/call_resolver.py +988 -0
- apisec_code_bolt/analysis/capability_tagger.py +322 -0
- apisec_code_bolt/analysis/config_scanner.py +197 -0
- apisec_code_bolt/analysis/data_flow.py +1883 -0
- apisec_code_bolt/analysis/dependency_extractor.py +959 -0
- apisec_code_bolt/analysis/flow_analysis.py +1406 -0
- apisec_code_bolt/analysis/hof_catalog.py +61 -0
- apisec_code_bolt/analysis/integration_detector.py +1399 -0
- apisec_code_bolt/analysis/literal_scanner.py +300 -0
- apisec_code_bolt/analysis/path_normalizer.py +55 -0
- apisec_code_bolt/analysis/read_site_detector.py +310 -0
- apisec_code_bolt/analysis/request_patterns.py +162 -0
- apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
- apisec_code_bolt/analysis/sink_evidence.py +333 -0
- apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
- apisec_code_bolt/cli/__init__.py +5 -0
- apisec_code_bolt/cli/exit_codes.py +17 -0
- apisec_code_bolt/cli/main.py +1069 -0
- apisec_code_bolt/cloud/__init__.py +1 -0
- apisec_code_bolt/cloud/apisec_client.py +118 -0
- apisec_code_bolt/cloud/client.py +255 -0
- apisec_code_bolt/core/__init__.py +75 -0
- apisec_code_bolt/core/config.py +528 -0
- apisec_code_bolt/core/credentials.py +65 -0
- apisec_code_bolt/core/discovery.py +433 -0
- apisec_code_bolt/core/log_format.py +115 -0
- apisec_code_bolt/core/manifest.py +1009 -0
- apisec_code_bolt/core/repo.py +280 -0
- apisec_code_bolt/core/state.py +59 -0
- apisec_code_bolt/core/telemetry.py +451 -0
- apisec_code_bolt/core/types.py +587 -0
- apisec_code_bolt/fingerprinting/__init__.py +1 -0
- apisec_code_bolt/frameworks/__init__.py +29 -0
- apisec_code_bolt/frameworks/_jwt_common.py +50 -0
- apisec_code_bolt/frameworks/auth_helpers.py +437 -0
- apisec_code_bolt/frameworks/base.py +608 -0
- apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
- apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
- apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
- apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
- apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
- apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
- apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
- apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
- apisec_code_bolt/frameworks/java/__init__.py +6 -0
- apisec_code_bolt/frameworks/java/_annotations.py +167 -0
- apisec_code_bolt/frameworks/java/_constraints.py +128 -0
- apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
- apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
- apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
- apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
- apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
- apisec_code_bolt/frameworks/js/__init__.py +8 -0
- apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
- apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
- apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
- apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
- apisec_code_bolt/frameworks/python/__init__.py +19 -0
- apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
- apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
- apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
- apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
- apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
- apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
- apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
- apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
- apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
- apisec_code_bolt/parsing/__init__.py +62 -0
- apisec_code_bolt/parsing/base.py +554 -0
- apisec_code_bolt/parsing/csharp/__init__.py +5 -0
- apisec_code_bolt/parsing/csharp/language_services.py +203 -0
- apisec_code_bolt/parsing/csharp/literals.py +72 -0
- apisec_code_bolt/parsing/csharp/parser.py +1158 -0
- apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
- apisec_code_bolt/parsing/js/__init__.py +5 -0
- apisec_code_bolt/parsing/js/language_services.py +118 -0
- apisec_code_bolt/parsing/js/parser.py +622 -0
- apisec_code_bolt/parsing/jvm/__init__.py +7 -0
- apisec_code_bolt/parsing/jvm/language_services.py +270 -0
- apisec_code_bolt/parsing/jvm/parser.py +774 -0
- apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
- apisec_code_bolt/parsing/python/__init__.py +150 -0
- apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
- apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
- apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
- apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
- apisec_code_bolt/parsing/python/expression_utils.py +221 -0
- apisec_code_bolt/parsing/python/extraction_types.py +271 -0
- apisec_code_bolt/parsing/python/language_services.py +487 -0
- apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
- apisec_code_bolt/parsing/python/parser.py +719 -0
- apisec_code_bolt/parsing/python/path_resolver.py +576 -0
- apisec_code_bolt/parsing/python/router_registry.py +806 -0
- apisec_code_bolt/parsing/python/type_resolver.py +730 -0
- apisec_code_bolt/parsing/python/visitors.py +1544 -0
- apisec_code_bolt/parsing/services.py +544 -0
- apisec_code_bolt/query/__init__.py +1 -0
- apisec_code_bolt/query/ast_cache.py +182 -0
- apisec_code_bolt/query/executor.py +283 -0
- apisec_code_bolt/query/handlers.py +832 -0
- apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
- apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
- apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
- apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Capability tagging.
|
|
3
|
+
|
|
4
|
+
Infers high-level business capabilities from detected integrations,
|
|
5
|
+
routes, and env-var patterns. Capabilities are factual labels like
|
|
6
|
+
``PAYMENT``, ``USER_AUTH``, ``FILE_STORAGE`` that help the cloud
|
|
7
|
+
understand **what the application does** at a business level.
|
|
8
|
+
|
|
9
|
+
Each capability includes evidence (integration IDs, route patterns)
|
|
10
|
+
so the cloud can audit the inference.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from typing import TYPE_CHECKING
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from ..core.manifest import (
|
|
20
|
+
CapabilityModel,
|
|
21
|
+
ConfigurationModel,
|
|
22
|
+
IntegrationModel,
|
|
23
|
+
RouteModel,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# ============================================================================
|
|
28
|
+
# Capability constants
|
|
29
|
+
# ============================================================================
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class Capability:
|
|
33
|
+
PAYMENT = "PAYMENT"
|
|
34
|
+
USER_AUTH = "USER_AUTH"
|
|
35
|
+
DATA_PERSISTENCE = "DATA_PERSISTENCE"
|
|
36
|
+
CACHING = "CACHING"
|
|
37
|
+
ASYNC_MESSAGING = "ASYNC_MESSAGING"
|
|
38
|
+
FILE_STORAGE = "FILE_STORAGE"
|
|
39
|
+
EMAIL_NOTIFICATION = "EMAIL_NOTIFICATION"
|
|
40
|
+
EXTERNAL_API = "EXTERNAL_API"
|
|
41
|
+
CLOUD_INFRASTRUCTURE = "CLOUD_INFRASTRUCTURE"
|
|
42
|
+
CRYPTOGRAPHIC_OPERATIONS = "CRYPTOGRAPHIC_OPERATIONS"
|
|
43
|
+
TEMPLATE_RENDERING = "TEMPLATE_RENDERING"
|
|
44
|
+
MONITORING = "MONITORING"
|
|
45
|
+
SEARCH = "SEARCH"
|
|
46
|
+
XML_PROCESSING = "XML_PROCESSING"
|
|
47
|
+
SERIALIZATION = "SERIALIZATION"
|
|
48
|
+
OS_COMMAND_EXECUTION = "OS_COMMAND_EXECUTION"
|
|
49
|
+
DIRECTORY_SERVICE = "DIRECTORY_SERVICE"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# ============================================================================
|
|
53
|
+
# Rules: integration type → capabilities
|
|
54
|
+
# ============================================================================
|
|
55
|
+
|
|
56
|
+
_INTEGRATION_TYPE_TO_CAPABILITY: dict[str, str] = {
|
|
57
|
+
"database": Capability.DATA_PERSISTENCE,
|
|
58
|
+
"cache": Capability.CACHING,
|
|
59
|
+
"message_queue": Capability.ASYNC_MESSAGING,
|
|
60
|
+
"http_client": Capability.EXTERNAL_API,
|
|
61
|
+
"cloud_service": Capability.CLOUD_INFRASTRUCTURE,
|
|
62
|
+
"serialization": Capability.SERIALIZATION,
|
|
63
|
+
"xml_parser": Capability.XML_PROCESSING,
|
|
64
|
+
"template_engine": Capability.TEMPLATE_RENDERING,
|
|
65
|
+
"crypto": Capability.CRYPTOGRAPHIC_OPERATIONS,
|
|
66
|
+
"directory_service": Capability.DIRECTORY_SERVICE,
|
|
67
|
+
"email": Capability.EMAIL_NOTIFICATION,
|
|
68
|
+
"file_system": Capability.FILE_STORAGE,
|
|
69
|
+
"os_command": Capability.OS_COMMAND_EXECUTION,
|
|
70
|
+
"monitoring": Capability.MONITORING,
|
|
71
|
+
"logging": Capability.MONITORING,
|
|
72
|
+
"auth_provider": Capability.USER_AUTH,
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
# Integration names that indicate payment capability
|
|
76
|
+
_PAYMENT_NAMES: set[str] = {
|
|
77
|
+
"stripe",
|
|
78
|
+
"braintree",
|
|
79
|
+
"adyen",
|
|
80
|
+
"paypal",
|
|
81
|
+
"square",
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
# Integration names that indicate search capability
|
|
85
|
+
_SEARCH_NAMES: set[str] = {
|
|
86
|
+
"elasticsearch",
|
|
87
|
+
"opensearch",
|
|
88
|
+
"solr",
|
|
89
|
+
"meilisearch",
|
|
90
|
+
"typesense",
|
|
91
|
+
"algolia",
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
# Route patterns that suggest auth capability
|
|
95
|
+
_AUTH_ROUTE_PATTERNS: set[str] = {
|
|
96
|
+
"login",
|
|
97
|
+
"logout",
|
|
98
|
+
"signin",
|
|
99
|
+
"signup",
|
|
100
|
+
"register",
|
|
101
|
+
"auth",
|
|
102
|
+
"token",
|
|
103
|
+
"refresh",
|
|
104
|
+
"password",
|
|
105
|
+
"reset",
|
|
106
|
+
"verify",
|
|
107
|
+
"oauth",
|
|
108
|
+
"callback",
|
|
109
|
+
"session",
|
|
110
|
+
"sso",
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
# Route patterns that suggest payment capability
|
|
114
|
+
_PAYMENT_ROUTE_PATTERNS: set[str] = {
|
|
115
|
+
"payment",
|
|
116
|
+
"checkout",
|
|
117
|
+
"charge",
|
|
118
|
+
"refund",
|
|
119
|
+
"invoice",
|
|
120
|
+
"subscription",
|
|
121
|
+
"billing",
|
|
122
|
+
"order",
|
|
123
|
+
"cart",
|
|
124
|
+
"purchase",
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
# Route patterns that suggest file storage
|
|
128
|
+
_FILE_ROUTE_PATTERNS: set[str] = {
|
|
129
|
+
"upload",
|
|
130
|
+
"download",
|
|
131
|
+
"file",
|
|
132
|
+
"attachment",
|
|
133
|
+
"media",
|
|
134
|
+
"image",
|
|
135
|
+
"document",
|
|
136
|
+
"asset",
|
|
137
|
+
"blob",
|
|
138
|
+
"storage",
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
# Env-var names that suggest capabilities
|
|
142
|
+
_ENV_CAPABILITY_PATTERNS: dict[str, str] = {
|
|
143
|
+
"STRIPE": Capability.PAYMENT,
|
|
144
|
+
"PAYPAL": Capability.PAYMENT,
|
|
145
|
+
"BRAINTREE": Capability.PAYMENT,
|
|
146
|
+
"SENDGRID": Capability.EMAIL_NOTIFICATION,
|
|
147
|
+
"MAILGUN": Capability.EMAIL_NOTIFICATION,
|
|
148
|
+
"SMTP": Capability.EMAIL_NOTIFICATION,
|
|
149
|
+
"SENTRY": Capability.MONITORING,
|
|
150
|
+
"DATADOG": Capability.MONITORING,
|
|
151
|
+
"ELASTICSEARCH": Capability.SEARCH,
|
|
152
|
+
"OPENSEARCH": Capability.SEARCH,
|
|
153
|
+
"AUTH0": Capability.USER_AUTH,
|
|
154
|
+
"OKTA": Capability.USER_AUTH,
|
|
155
|
+
"COGNITO": Capability.USER_AUTH,
|
|
156
|
+
"FIREBASE_AUTH": Capability.USER_AUTH,
|
|
157
|
+
"S3_BUCKET": Capability.FILE_STORAGE,
|
|
158
|
+
"GCS_BUCKET": Capability.FILE_STORAGE,
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
# ============================================================================
|
|
163
|
+
# Detected capability (internal)
|
|
164
|
+
# ============================================================================
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
@dataclass
|
|
168
|
+
class DetectedCapability:
|
|
169
|
+
tag: str
|
|
170
|
+
evidence: list[str] = field(default_factory=list)
|
|
171
|
+
confidence: str = "MEDIUM"
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
# ============================================================================
|
|
175
|
+
# CapabilityTagger
|
|
176
|
+
# ============================================================================
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
class CapabilityTagger:
|
|
180
|
+
"""
|
|
181
|
+
Infers business capabilities from integrations, routes, and env vars.
|
|
182
|
+
"""
|
|
183
|
+
|
|
184
|
+
def tag(
|
|
185
|
+
self,
|
|
186
|
+
integrations: list[IntegrationModel],
|
|
187
|
+
routes: list[RouteModel] | None = None,
|
|
188
|
+
configuration: ConfigurationModel | None = None,
|
|
189
|
+
) -> list[CapabilityModel]:
|
|
190
|
+
"""Run all tagging rules and return deduplicated capabilities."""
|
|
191
|
+
caps: dict[str, DetectedCapability] = {}
|
|
192
|
+
|
|
193
|
+
self._tag_from_integrations(integrations, caps)
|
|
194
|
+
if routes:
|
|
195
|
+
self._tag_from_routes(routes, caps)
|
|
196
|
+
if configuration:
|
|
197
|
+
self._tag_from_env_vars(configuration, caps)
|
|
198
|
+
|
|
199
|
+
# Promote confidence when multiple evidence sources agree
|
|
200
|
+
for cap in caps.values():
|
|
201
|
+
if len(cap.evidence) >= 3:
|
|
202
|
+
cap.confidence = "HIGH"
|
|
203
|
+
elif len(cap.evidence) >= 2:
|
|
204
|
+
cap.confidence = "MEDIUM"
|
|
205
|
+
|
|
206
|
+
return self._to_manifest_models(caps)
|
|
207
|
+
|
|
208
|
+
# ------------------------------------------------------------------
|
|
209
|
+
# Integration-based
|
|
210
|
+
# ------------------------------------------------------------------
|
|
211
|
+
|
|
212
|
+
def _tag_from_integrations(
|
|
213
|
+
self,
|
|
214
|
+
integrations: list[IntegrationModel],
|
|
215
|
+
caps: dict[str, DetectedCapability],
|
|
216
|
+
) -> None:
|
|
217
|
+
for integration in integrations:
|
|
218
|
+
# Type-based capability
|
|
219
|
+
cap_tag = _INTEGRATION_TYPE_TO_CAPABILITY.get(integration.type)
|
|
220
|
+
if cap_tag:
|
|
221
|
+
self._add(caps, cap_tag, f"integration:{integration.id}")
|
|
222
|
+
|
|
223
|
+
# Payment-specific names
|
|
224
|
+
name_lower = integration.name.lower()
|
|
225
|
+
for pay_name in _PAYMENT_NAMES:
|
|
226
|
+
if pay_name in name_lower:
|
|
227
|
+
self._add(caps, Capability.PAYMENT, f"integration:{integration.id}")
|
|
228
|
+
break
|
|
229
|
+
|
|
230
|
+
# Search-specific names
|
|
231
|
+
for search_name in _SEARCH_NAMES:
|
|
232
|
+
if search_name in name_lower:
|
|
233
|
+
self._add(caps, Capability.SEARCH, f"integration:{integration.id}")
|
|
234
|
+
break
|
|
235
|
+
|
|
236
|
+
# Cloud sub-services can indicate specific capabilities
|
|
237
|
+
for sub in integration.metadata.get("services", []):
|
|
238
|
+
sub_lower = sub.lower()
|
|
239
|
+
if sub_lower in ("s3", "gcs", "blob"):
|
|
240
|
+
self._add(caps, Capability.FILE_STORAGE, f"integration:{integration.id}:{sub}")
|
|
241
|
+
elif sub_lower in ("sqs", "sns", "pubsub", "eventbridge"):
|
|
242
|
+
self._add(
|
|
243
|
+
caps, Capability.ASYNC_MESSAGING, f"integration:{integration.id}:{sub}"
|
|
244
|
+
)
|
|
245
|
+
elif sub_lower in ("ses",):
|
|
246
|
+
self._add(
|
|
247
|
+
caps, Capability.EMAIL_NOTIFICATION, f"integration:{integration.id}:{sub}"
|
|
248
|
+
)
|
|
249
|
+
elif sub_lower in ("cognito", "iam"):
|
|
250
|
+
self._add(caps, Capability.USER_AUTH, f"integration:{integration.id}:{sub}")
|
|
251
|
+
elif sub_lower in ("dynamodb", "rds", "aurora"):
|
|
252
|
+
self._add(
|
|
253
|
+
caps, Capability.DATA_PERSISTENCE, f"integration:{integration.id}:{sub}"
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
# ------------------------------------------------------------------
|
|
257
|
+
# Route-based
|
|
258
|
+
# ------------------------------------------------------------------
|
|
259
|
+
|
|
260
|
+
def _tag_from_routes(
|
|
261
|
+
self,
|
|
262
|
+
routes: list[RouteModel],
|
|
263
|
+
caps: dict[str, DetectedCapability],
|
|
264
|
+
) -> None:
|
|
265
|
+
for route in routes:
|
|
266
|
+
path_lower = route.path.lower()
|
|
267
|
+
segments = set(path_lower.strip("/").split("/"))
|
|
268
|
+
|
|
269
|
+
for pattern in _AUTH_ROUTE_PATTERNS:
|
|
270
|
+
if pattern in segments or pattern in path_lower:
|
|
271
|
+
self._add(caps, Capability.USER_AUTH, f"route:{route.id}")
|
|
272
|
+
break
|
|
273
|
+
|
|
274
|
+
for pattern in _PAYMENT_ROUTE_PATTERNS:
|
|
275
|
+
if pattern in segments or pattern in path_lower:
|
|
276
|
+
self._add(caps, Capability.PAYMENT, f"route:{route.id}")
|
|
277
|
+
break
|
|
278
|
+
|
|
279
|
+
for pattern in _FILE_ROUTE_PATTERNS:
|
|
280
|
+
if pattern in segments or pattern in path_lower:
|
|
281
|
+
self._add(caps, Capability.FILE_STORAGE, f"route:{route.id}")
|
|
282
|
+
break
|
|
283
|
+
|
|
284
|
+
# ------------------------------------------------------------------
|
|
285
|
+
# Env-var based
|
|
286
|
+
# ------------------------------------------------------------------
|
|
287
|
+
|
|
288
|
+
def _tag_from_env_vars(
|
|
289
|
+
self,
|
|
290
|
+
configuration: ConfigurationModel,
|
|
291
|
+
caps: dict[str, DetectedCapability],
|
|
292
|
+
) -> None:
|
|
293
|
+
for env_var in configuration.env_vars_used:
|
|
294
|
+
name_upper = env_var.name.upper()
|
|
295
|
+
for pattern, cap_tag in _ENV_CAPABILITY_PATTERNS.items():
|
|
296
|
+
if pattern in name_upper:
|
|
297
|
+
self._add(caps, cap_tag, f"env:{env_var.name}")
|
|
298
|
+
break
|
|
299
|
+
|
|
300
|
+
# ------------------------------------------------------------------
|
|
301
|
+
# Helpers
|
|
302
|
+
# ------------------------------------------------------------------
|
|
303
|
+
|
|
304
|
+
@staticmethod
|
|
305
|
+
def _add(caps: dict[str, DetectedCapability], tag: str, evidence: str) -> None:
|
|
306
|
+
if tag not in caps:
|
|
307
|
+
caps[tag] = DetectedCapability(tag=tag)
|
|
308
|
+
if evidence not in caps[tag].evidence:
|
|
309
|
+
caps[tag].evidence.append(evidence)
|
|
310
|
+
|
|
311
|
+
@staticmethod
|
|
312
|
+
def _to_manifest_models(caps: dict[str, DetectedCapability]) -> list[CapabilityModel]:
|
|
313
|
+
from ..core.manifest import CapabilityModel
|
|
314
|
+
|
|
315
|
+
return [
|
|
316
|
+
CapabilityModel(
|
|
317
|
+
tag=cap.tag,
|
|
318
|
+
evidence=cap.evidence[:20],
|
|
319
|
+
confidence=cap.confidence,
|
|
320
|
+
)
|
|
321
|
+
for cap in sorted(caps.values(), key=lambda c: c.tag)
|
|
322
|
+
]
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration file scanner.
|
|
3
|
+
|
|
4
|
+
Discovers and parses configuration files within the project:
|
|
5
|
+
|
|
6
|
+
- **.env files** — Extracts variable names (never values) and detects
|
|
7
|
+
connection strings, secret patterns, and service references.
|
|
8
|
+
- **Known config files** — Identifies ``settings.py``, ``config.yaml``,
|
|
9
|
+
``application.yml``, ``pyproject.toml``, ``docker-compose.yml``, etc.
|
|
10
|
+
|
|
11
|
+
The scanner never includes sensitive values in its output. Variable names
|
|
12
|
+
and file paths are the only data emitted.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import re
|
|
18
|
+
from dataclasses import dataclass
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import TYPE_CHECKING
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from ..core.manifest import ConfigFileModel, EnvVarUsageModel
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# ============================================================================
|
|
27
|
+
# Known config file patterns
|
|
28
|
+
# ============================================================================
|
|
29
|
+
|
|
30
|
+
_CONFIG_FILE_PATTERNS: dict[str, str] = {
|
|
31
|
+
".env": "env",
|
|
32
|
+
".env.local": "env",
|
|
33
|
+
".env.development": "env",
|
|
34
|
+
".env.production": "env",
|
|
35
|
+
".env.staging": "env",
|
|
36
|
+
".env.test": "env",
|
|
37
|
+
".env.example": "env",
|
|
38
|
+
".env.sample": "env",
|
|
39
|
+
"settings.py": "python",
|
|
40
|
+
"config.py": "python",
|
|
41
|
+
"config.yaml": "yaml",
|
|
42
|
+
"config.yml": "yaml",
|
|
43
|
+
"application.yml": "yaml",
|
|
44
|
+
"application.yaml": "yaml",
|
|
45
|
+
"application.properties": "properties",
|
|
46
|
+
"docker-compose.yml": "yaml",
|
|
47
|
+
"docker-compose.yaml": "yaml",
|
|
48
|
+
"Dockerfile": "dockerfile",
|
|
49
|
+
"pyproject.toml": "toml",
|
|
50
|
+
"setup.cfg": "ini",
|
|
51
|
+
".flaskenv": "env",
|
|
52
|
+
"alembic.ini": "ini",
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
# Env var name patterns that indicate connection strings
|
|
56
|
+
_CONNECTION_VAR_PATTERNS = re.compile(
|
|
57
|
+
r"(?:DATABASE_URL|DB_URL|REDIS_URL|MONGO_URI|BROKER_URL|AMQP_URL|"
|
|
58
|
+
r"CELERY_BROKER_URL|CACHE_URL|ELASTICSEARCH_URL|SQLALCHEMY_DATABASE_URI)",
|
|
59
|
+
re.IGNORECASE,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# ============================================================================
|
|
64
|
+
# Parsed env-file entry
|
|
65
|
+
# ============================================================================
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclass
|
|
69
|
+
class EnvFileEntry:
|
|
70
|
+
"""A variable found in a .env file (value is never stored)."""
|
|
71
|
+
|
|
72
|
+
name: str
|
|
73
|
+
has_value: bool
|
|
74
|
+
is_connection_string: bool = False
|
|
75
|
+
is_secret_name: bool = False
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# ============================================================================
|
|
79
|
+
# ConfigScanner
|
|
80
|
+
# ============================================================================
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class ConfigScanner:
|
|
84
|
+
"""
|
|
85
|
+
Scans project root for configuration files and .env variable names.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
def __init__(self, project_root: Path) -> None:
|
|
89
|
+
self._root = project_root
|
|
90
|
+
|
|
91
|
+
def discover_config_files(self) -> list[tuple[str, str]]:
|
|
92
|
+
"""Return (relative_path, type) for known config files found."""
|
|
93
|
+
found: list[tuple[str, str]] = []
|
|
94
|
+
|
|
95
|
+
for name, file_type in _CONFIG_FILE_PATTERNS.items():
|
|
96
|
+
# Check root-level
|
|
97
|
+
candidate = self._root / name
|
|
98
|
+
if candidate.is_file():
|
|
99
|
+
found.append((name, file_type))
|
|
100
|
+
|
|
101
|
+
# Check common subdirectories
|
|
102
|
+
for subdir in ("config", "conf", "deploy", ".docker"):
|
|
103
|
+
candidate = self._root / subdir / name
|
|
104
|
+
if candidate.is_file():
|
|
105
|
+
found.append((f"{subdir}/{name}", file_type))
|
|
106
|
+
|
|
107
|
+
return found
|
|
108
|
+
|
|
109
|
+
def parse_env_files(self) -> list[EnvFileEntry]:
|
|
110
|
+
"""Parse all .env* files and extract variable names (never values)."""
|
|
111
|
+
entries: list[EnvFileEntry] = []
|
|
112
|
+
seen: set[str] = set()
|
|
113
|
+
|
|
114
|
+
for name, file_type in _CONFIG_FILE_PATTERNS.items():
|
|
115
|
+
if file_type != "env":
|
|
116
|
+
continue
|
|
117
|
+
path = self._root / name
|
|
118
|
+
if not path.is_file():
|
|
119
|
+
continue
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
content = path.read_text(errors="replace")
|
|
123
|
+
except OSError:
|
|
124
|
+
continue
|
|
125
|
+
|
|
126
|
+
for line in content.splitlines():
|
|
127
|
+
line = line.strip()
|
|
128
|
+
if not line or line.startswith("#"):
|
|
129
|
+
continue
|
|
130
|
+
|
|
131
|
+
# Parse NAME=value or NAME=
|
|
132
|
+
if "=" not in line:
|
|
133
|
+
continue
|
|
134
|
+
|
|
135
|
+
var_name = line.split("=", 1)[0].strip()
|
|
136
|
+
if not var_name or not var_name[0].isalpha():
|
|
137
|
+
continue
|
|
138
|
+
|
|
139
|
+
if var_name in seen:
|
|
140
|
+
continue
|
|
141
|
+
seen.add(var_name)
|
|
142
|
+
|
|
143
|
+
is_conn = bool(_CONNECTION_VAR_PATTERNS.search(var_name))
|
|
144
|
+
is_secret = bool(
|
|
145
|
+
re.search(
|
|
146
|
+
r"(?:PASSWORD|SECRET|PRIVATE_KEY|API_KEY|TOKEN|CREDENTIALS?)",
|
|
147
|
+
var_name,
|
|
148
|
+
re.IGNORECASE,
|
|
149
|
+
)
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
entries.append(
|
|
153
|
+
EnvFileEntry(
|
|
154
|
+
name=var_name,
|
|
155
|
+
has_value=len(line.split("=", 1)[1].strip()) > 0,
|
|
156
|
+
is_connection_string=is_conn,
|
|
157
|
+
is_secret_name=is_secret,
|
|
158
|
+
)
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
return entries
|
|
162
|
+
|
|
163
|
+
# ------------------------------------------------------------------
|
|
164
|
+
# Conversion to manifest models
|
|
165
|
+
# ------------------------------------------------------------------
|
|
166
|
+
|
|
167
|
+
def to_config_file_models(self) -> list[ConfigFileModel]:
|
|
168
|
+
"""Return ConfigFileModel list for all discovered config files."""
|
|
169
|
+
from ..core.manifest import ConfigFileModel
|
|
170
|
+
|
|
171
|
+
return [
|
|
172
|
+
ConfigFileModel(path=path, type=file_type)
|
|
173
|
+
for path, file_type in self.discover_config_files()
|
|
174
|
+
]
|
|
175
|
+
|
|
176
|
+
def to_env_var_models(self) -> list[EnvVarUsageModel]:
|
|
177
|
+
"""Return EnvVarUsageModel list from .env file variable names."""
|
|
178
|
+
from ..core.manifest import EnvVarUsageModel
|
|
179
|
+
|
|
180
|
+
entries = self.parse_env_files()
|
|
181
|
+
models: list[EnvVarUsageModel] = []
|
|
182
|
+
for entry in entries:
|
|
183
|
+
default_val = None
|
|
184
|
+
if entry.is_connection_string:
|
|
185
|
+
default_val = "(connection_string)"
|
|
186
|
+
elif entry.is_secret_name:
|
|
187
|
+
default_val = "(secret)"
|
|
188
|
+
|
|
189
|
+
models.append(
|
|
190
|
+
EnvVarUsageModel(
|
|
191
|
+
name=entry.name,
|
|
192
|
+
locations=[],
|
|
193
|
+
default_value=default_val,
|
|
194
|
+
)
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
return models
|