apisec-code-bolt 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apisec_code_bolt/__init__.py +42 -0
- apisec_code_bolt/__main__.py +11 -0
- apisec_code_bolt/analysis/__init__.py +96 -0
- apisec_code_bolt/analysis/analyzer.py +2309 -0
- apisec_code_bolt/analysis/binding_tracker.py +341 -0
- apisec_code_bolt/analysis/call_graph.py +1197 -0
- apisec_code_bolt/analysis/call_graph_types.py +332 -0
- apisec_code_bolt/analysis/call_resolver.py +988 -0
- apisec_code_bolt/analysis/capability_tagger.py +322 -0
- apisec_code_bolt/analysis/config_scanner.py +197 -0
- apisec_code_bolt/analysis/data_flow.py +1883 -0
- apisec_code_bolt/analysis/dependency_extractor.py +959 -0
- apisec_code_bolt/analysis/flow_analysis.py +1406 -0
- apisec_code_bolt/analysis/hof_catalog.py +61 -0
- apisec_code_bolt/analysis/integration_detector.py +1399 -0
- apisec_code_bolt/analysis/literal_scanner.py +300 -0
- apisec_code_bolt/analysis/path_normalizer.py +55 -0
- apisec_code_bolt/analysis/read_site_detector.py +310 -0
- apisec_code_bolt/analysis/request_patterns.py +162 -0
- apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
- apisec_code_bolt/analysis/sink_evidence.py +333 -0
- apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
- apisec_code_bolt/cli/__init__.py +5 -0
- apisec_code_bolt/cli/exit_codes.py +17 -0
- apisec_code_bolt/cli/main.py +1069 -0
- apisec_code_bolt/cloud/__init__.py +1 -0
- apisec_code_bolt/cloud/apisec_client.py +118 -0
- apisec_code_bolt/cloud/client.py +255 -0
- apisec_code_bolt/core/__init__.py +75 -0
- apisec_code_bolt/core/config.py +528 -0
- apisec_code_bolt/core/credentials.py +65 -0
- apisec_code_bolt/core/discovery.py +433 -0
- apisec_code_bolt/core/log_format.py +115 -0
- apisec_code_bolt/core/manifest.py +1009 -0
- apisec_code_bolt/core/repo.py +280 -0
- apisec_code_bolt/core/state.py +59 -0
- apisec_code_bolt/core/telemetry.py +451 -0
- apisec_code_bolt/core/types.py +587 -0
- apisec_code_bolt/fingerprinting/__init__.py +1 -0
- apisec_code_bolt/frameworks/__init__.py +29 -0
- apisec_code_bolt/frameworks/_jwt_common.py +50 -0
- apisec_code_bolt/frameworks/auth_helpers.py +437 -0
- apisec_code_bolt/frameworks/base.py +608 -0
- apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
- apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
- apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
- apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
- apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
- apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
- apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
- apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
- apisec_code_bolt/frameworks/java/__init__.py +6 -0
- apisec_code_bolt/frameworks/java/_annotations.py +167 -0
- apisec_code_bolt/frameworks/java/_constraints.py +128 -0
- apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
- apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
- apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
- apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
- apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
- apisec_code_bolt/frameworks/js/__init__.py +8 -0
- apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
- apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
- apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
- apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
- apisec_code_bolt/frameworks/python/__init__.py +19 -0
- apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
- apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
- apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
- apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
- apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
- apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
- apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
- apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
- apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
- apisec_code_bolt/parsing/__init__.py +62 -0
- apisec_code_bolt/parsing/base.py +554 -0
- apisec_code_bolt/parsing/csharp/__init__.py +5 -0
- apisec_code_bolt/parsing/csharp/language_services.py +203 -0
- apisec_code_bolt/parsing/csharp/literals.py +72 -0
- apisec_code_bolt/parsing/csharp/parser.py +1158 -0
- apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
- apisec_code_bolt/parsing/js/__init__.py +5 -0
- apisec_code_bolt/parsing/js/language_services.py +118 -0
- apisec_code_bolt/parsing/js/parser.py +622 -0
- apisec_code_bolt/parsing/jvm/__init__.py +7 -0
- apisec_code_bolt/parsing/jvm/language_services.py +270 -0
- apisec_code_bolt/parsing/jvm/parser.py +774 -0
- apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
- apisec_code_bolt/parsing/python/__init__.py +150 -0
- apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
- apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
- apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
- apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
- apisec_code_bolt/parsing/python/expression_utils.py +221 -0
- apisec_code_bolt/parsing/python/extraction_types.py +271 -0
- apisec_code_bolt/parsing/python/language_services.py +487 -0
- apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
- apisec_code_bolt/parsing/python/parser.py +719 -0
- apisec_code_bolt/parsing/python/path_resolver.py +576 -0
- apisec_code_bolt/parsing/python/router_registry.py +806 -0
- apisec_code_bolt/parsing/python/type_resolver.py +730 -0
- apisec_code_bolt/parsing/python/visitors.py +1544 -0
- apisec_code_bolt/parsing/services.py +544 -0
- apisec_code_bolt/query/__init__.py +1 -0
- apisec_code_bolt/query/ast_cache.py +182 -0
- apisec_code_bolt/query/executor.py +283 -0
- apisec_code_bolt/query/handlers.py +832 -0
- apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
- apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
- apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
- apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Request access pattern registry for read-site origin detection.
|
|
3
|
+
|
|
4
|
+
Maps framework-specific request access patterns (request.path_params["id"],
|
|
5
|
+
request.args.get("page"), etc.) to generic OriginType. Used to identify
|
|
6
|
+
data origins when handlers read from the request object directly
|
|
7
|
+
(Flask, Django) rather than via parameters (FastAPI).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class RequestAccessPattern:
|
|
17
|
+
"""Maps framework-specific request access to origin type."""
|
|
18
|
+
|
|
19
|
+
framework: str # "fastapi", "flask", "django", "starlette"
|
|
20
|
+
# Attribute chain: ("request", "path_params") or ("request", "args")
|
|
21
|
+
attr_chain: tuple[str, ...]
|
|
22
|
+
# Access method: "get", "__getitem__", None for direct/call
|
|
23
|
+
access_method: str | None = None
|
|
24
|
+
# For .get("key"), the param name is typically the first arg
|
|
25
|
+
# For ["key"], the param name is the subscript
|
|
26
|
+
origin_type: str = "" # Will map to OriginType
|
|
27
|
+
|
|
28
|
+
# Optional: specific param name if inferrable from pattern
|
|
29
|
+
param_name_from_subscript: bool = True
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# =============================================================================
|
|
33
|
+
# Pattern Registry
|
|
34
|
+
# =============================================================================
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# Built-in patterns for each framework
|
|
38
|
+
# Format: (attr_chain, access_method) -> OriginType name
|
|
39
|
+
FASTAPI_STARLETTE_PATTERNS: list[tuple[tuple[str, ...], str | None, str]] = [
|
|
40
|
+
(("request", "path_params"), "__getitem__", "HTTP_PATH_PARAM"),
|
|
41
|
+
(("request", "path_params"), "get", "HTTP_PATH_PARAM"),
|
|
42
|
+
(("request", "query_params"), "__getitem__", "HTTP_QUERY_PARAM"),
|
|
43
|
+
(("request", "query_params"), "get", "HTTP_QUERY_PARAM"),
|
|
44
|
+
(("request", "headers"), "__getitem__", "HTTP_HEADER"),
|
|
45
|
+
(("request", "headers"), "get", "HTTP_HEADER"),
|
|
46
|
+
(("request", "cookies"), "__getitem__", "HTTP_COOKIE"),
|
|
47
|
+
(("request", "cookies"), "get", "HTTP_COOKIE"),
|
|
48
|
+
(("request", "body"), None, "HTTP_BODY"),
|
|
49
|
+
(("request", "json"), None, "HTTP_BODY"), # .json() returns body
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
FLASK_PATTERNS: list[tuple[tuple[str, ...], str | None, str]] = [
|
|
53
|
+
(("request", "args"), "get", "HTTP_QUERY_PARAM"),
|
|
54
|
+
(("request", "args"), "__getitem__", "HTTP_QUERY_PARAM"),
|
|
55
|
+
(("request", "form"), "get", "HTTP_FORM"),
|
|
56
|
+
(("request", "form"), "__getitem__", "HTTP_FORM"),
|
|
57
|
+
(("request", "files"), "get", "HTTP_FILE"),
|
|
58
|
+
(("request", "headers"), "get", "HTTP_HEADER"),
|
|
59
|
+
(("request", "cookies"), "get", "HTTP_COOKIE"),
|
|
60
|
+
(("request", "get_json"), None, "HTTP_BODY"), # request.get_json()
|
|
61
|
+
(("request", "json"), None, "HTTP_BODY"), # request.json
|
|
62
|
+
(("request", "data"), None, "HTTP_BODY"),
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
DJANGO_PATTERNS: list[tuple[tuple[str, ...], str | None, str]] = [
|
|
66
|
+
(("request", "GET"), "get", "HTTP_QUERY_PARAM"),
|
|
67
|
+
(("request", "POST"), "get", "HTTP_FORM"),
|
|
68
|
+
(("request", "POST"), "__getitem__", "HTTP_FORM"),
|
|
69
|
+
(("request", "FILES"), "get", "HTTP_FILE"),
|
|
70
|
+
(("request", "COOKIES"), "get", "HTTP_COOKIE"),
|
|
71
|
+
(("request", "headers"), "get", "HTTP_HEADER"),
|
|
72
|
+
(("request", "body"), None, "HTTP_BODY"),
|
|
73
|
+
# Django Rest Framework
|
|
74
|
+
(("request", "data"), "__getitem__", "HTTP_BODY"),
|
|
75
|
+
(("request", "data"), "get", "HTTP_BODY"),
|
|
76
|
+
(("request", "query_params"), "get", "HTTP_QUERY_PARAM"),
|
|
77
|
+
]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class RequestPatternRegistry:
|
|
81
|
+
"""Registry of request access patterns per framework."""
|
|
82
|
+
|
|
83
|
+
def __init__(self):
|
|
84
|
+
self._patterns: dict[str, list[RequestAccessPattern]] = {
|
|
85
|
+
"fastapi": [],
|
|
86
|
+
"starlette": [],
|
|
87
|
+
"flask": [],
|
|
88
|
+
"django": [],
|
|
89
|
+
}
|
|
90
|
+
self._init_builtin_patterns()
|
|
91
|
+
|
|
92
|
+
def _init_builtin_patterns(self) -> None:
|
|
93
|
+
"""Initialize with built-in patterns."""
|
|
94
|
+
for attr_chain, method, origin_name in FASTAPI_STARLETTE_PATTERNS:
|
|
95
|
+
for fw in ("fastapi", "starlette"):
|
|
96
|
+
self._patterns[fw].append(
|
|
97
|
+
RequestAccessPattern(
|
|
98
|
+
framework=fw,
|
|
99
|
+
attr_chain=attr_chain,
|
|
100
|
+
access_method=method,
|
|
101
|
+
origin_type=origin_name,
|
|
102
|
+
)
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
for attr_chain, method, origin_name in FLASK_PATTERNS:
|
|
106
|
+
self._patterns["flask"].append(
|
|
107
|
+
RequestAccessPattern(
|
|
108
|
+
framework="flask",
|
|
109
|
+
attr_chain=attr_chain,
|
|
110
|
+
access_method=method,
|
|
111
|
+
origin_type=origin_name,
|
|
112
|
+
)
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
for attr_chain, method, origin_name in DJANGO_PATTERNS:
|
|
116
|
+
self._patterns["django"].append(
|
|
117
|
+
RequestAccessPattern(
|
|
118
|
+
framework="django",
|
|
119
|
+
attr_chain=attr_chain,
|
|
120
|
+
access_method=method,
|
|
121
|
+
origin_type=origin_name,
|
|
122
|
+
)
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
def get_origin_type(
|
|
126
|
+
self,
|
|
127
|
+
framework: str,
|
|
128
|
+
attr_chain: tuple[str, ...],
|
|
129
|
+
access_method: str | None = None,
|
|
130
|
+
) -> str | None:
|
|
131
|
+
"""
|
|
132
|
+
Get the OriginType name for a request access pattern.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
framework: "fastapi", "flask", "django", "starlette"
|
|
136
|
+
attr_chain: e.g. ("request", "path_params") or ("request", "args")
|
|
137
|
+
access_method: "get", "__getitem__", or None
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
OriginType name string or None if no match
|
|
141
|
+
"""
|
|
142
|
+
patterns = self._patterns.get(framework.lower(), [])
|
|
143
|
+
for p in patterns:
|
|
144
|
+
if p.attr_chain == attr_chain and p.access_method == access_method:
|
|
145
|
+
return p.origin_type
|
|
146
|
+
return None
|
|
147
|
+
|
|
148
|
+
def get_all_patterns(self, framework: str) -> list[RequestAccessPattern]:
|
|
149
|
+
"""Get all patterns for a framework."""
|
|
150
|
+
return self._patterns.get(framework.lower(), [])
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
# Singleton registry
|
|
154
|
+
_default_registry: RequestPatternRegistry | None = None
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def get_request_pattern_registry() -> RequestPatternRegistry:
|
|
158
|
+
"""Get the default request pattern registry."""
|
|
159
|
+
global _default_registry
|
|
160
|
+
if _default_registry is None:
|
|
161
|
+
_default_registry = RequestPatternRegistry()
|
|
162
|
+
return _default_registry
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Sensitivity classification for integrations and data.
|
|
3
|
+
|
|
4
|
+
Assigns sensitivity labels to detected integrations based on:
|
|
5
|
+
1. **Integration type** — databases inherently store data, caches may hold
|
|
6
|
+
sessions, email services handle contact info.
|
|
7
|
+
2. **SQL table/column heuristics** — SQL patterns referencing ``users``,
|
|
8
|
+
``passwords``, ``payments`` suggest PII or financial data.
|
|
9
|
+
3. **Variable / env-var naming** — ``DB_PASSWORD``, ``STRIPE_SECRET_KEY``
|
|
10
|
+
indicate auth credentials or financial integrations.
|
|
11
|
+
|
|
12
|
+
Labels are purely factual tags (not severity scores) that the cloud uses
|
|
13
|
+
to prioritise and contextualise findings.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import re
|
|
19
|
+
from typing import TYPE_CHECKING, Any
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from ..core.manifest import IntegrationModel, LiteralPatternModel
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# ============================================================================
|
|
26
|
+
# Sensitivity label constants
|
|
27
|
+
# ============================================================================
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class SensitivityLabel:
|
|
31
|
+
PII = "PII"
|
|
32
|
+
FINANCIAL = "FINANCIAL"
|
|
33
|
+
AUTH_CREDENTIAL = "AUTH_CREDENTIAL"
|
|
34
|
+
SECRETS = "SECRETS"
|
|
35
|
+
INTERNAL = "INTERNAL"
|
|
36
|
+
GENERAL = "GENERAL"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# ============================================================================
|
|
40
|
+
# Rules: integration-type → default sensitivity
|
|
41
|
+
# ============================================================================
|
|
42
|
+
|
|
43
|
+
_INTEGRATION_TYPE_SENSITIVITY: dict[str, list[str]] = {
|
|
44
|
+
"database": [SensitivityLabel.PII, SensitivityLabel.INTERNAL],
|
|
45
|
+
"cache": [SensitivityLabel.INTERNAL],
|
|
46
|
+
"message_queue": [SensitivityLabel.INTERNAL],
|
|
47
|
+
"http_client": [],
|
|
48
|
+
"cloud_service": [SensitivityLabel.INTERNAL],
|
|
49
|
+
"serialization": [SensitivityLabel.INTERNAL],
|
|
50
|
+
"xml_parser": [],
|
|
51
|
+
"template_engine": [],
|
|
52
|
+
"crypto": [SensitivityLabel.AUTH_CREDENTIAL],
|
|
53
|
+
"directory_service": [SensitivityLabel.AUTH_CREDENTIAL, SensitivityLabel.PII],
|
|
54
|
+
"email": [SensitivityLabel.PII],
|
|
55
|
+
"file_system": [],
|
|
56
|
+
"os_command": [],
|
|
57
|
+
"monitoring": [],
|
|
58
|
+
"logging": [],
|
|
59
|
+
"auth_provider": [SensitivityLabel.AUTH_CREDENTIAL],
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
# Integration names that indicate financial data
|
|
63
|
+
_FINANCIAL_INTEGRATIONS: set[str] = {
|
|
64
|
+
"stripe",
|
|
65
|
+
"braintree",
|
|
66
|
+
"adyen",
|
|
67
|
+
"paypal",
|
|
68
|
+
"square",
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
# ============================================================================
|
|
72
|
+
# Rules: SQL table/column patterns
|
|
73
|
+
# ============================================================================
|
|
74
|
+
|
|
75
|
+
_PII_TABLE_PATTERNS = re.compile(
|
|
76
|
+
r"\b(?:users?|customers?|patients?|employees?|persons?|contacts?|profiles?|"
|
|
77
|
+
r"accounts?|members?|subscribers?)\b",
|
|
78
|
+
re.IGNORECASE,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
_FINANCIAL_TABLE_PATTERNS = re.compile(
|
|
82
|
+
r"\b(?:payments?|transactions?|invoices?|orders?|billing|subscriptions?|"
|
|
83
|
+
r"charges?|refunds?|credits?|wallets?|balances?)\b",
|
|
84
|
+
re.IGNORECASE,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
_AUTH_TABLE_PATTERNS = re.compile(
|
|
88
|
+
r"\b(?:passwords?|credentials?|tokens?|sessions?|api_keys?|secrets?|"
|
|
89
|
+
r"auth|oauth|permissions?|roles?|grants?|logins?)\b",
|
|
90
|
+
re.IGNORECASE,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# ============================================================================
|
|
94
|
+
# Rules: env-var / variable name patterns
|
|
95
|
+
# ============================================================================
|
|
96
|
+
|
|
97
|
+
_SECRET_VAR_PATTERNS = re.compile(
|
|
98
|
+
r"(?:PASSWORD|SECRET|PRIVATE_KEY|API_KEY|TOKEN|CREDENTIALS?|AUTH)",
|
|
99
|
+
re.IGNORECASE,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
_FINANCIAL_VAR_PATTERNS = re.compile(
|
|
103
|
+
r"(?:STRIPE|PAYPAL|BRAINTREE|ADYEN|SQUARE|PAYMENT)",
|
|
104
|
+
re.IGNORECASE,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
_PII_VAR_PATTERNS = re.compile(
|
|
108
|
+
r"(?:SSN|SOCIAL_SECURITY|EMAIL|PHONE|ADDRESS|DATE_OF_BIRTH|DOB|"
|
|
109
|
+
r"FIRST_NAME|LAST_NAME|FULL_NAME)",
|
|
110
|
+
re.IGNORECASE,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
# ============================================================================
|
|
115
|
+
# SensitivityClassifier
|
|
116
|
+
# ============================================================================
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class SensitivityClassifier:
|
|
120
|
+
"""
|
|
121
|
+
Assigns sensitivity labels to integrations, SQL patterns, and env vars.
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
def classify_integration(
|
|
125
|
+
self,
|
|
126
|
+
integration_type: str,
|
|
127
|
+
integration_name: str,
|
|
128
|
+
metadata: dict[str, Any] | None = None,
|
|
129
|
+
) -> list[str]:
|
|
130
|
+
"""Return sensitivity labels for an integration."""
|
|
131
|
+
labels: set[str] = set()
|
|
132
|
+
|
|
133
|
+
# Type-based defaults
|
|
134
|
+
for label in _INTEGRATION_TYPE_SENSITIVITY.get(integration_type, []):
|
|
135
|
+
labels.add(label)
|
|
136
|
+
|
|
137
|
+
# Financial integration names
|
|
138
|
+
name_lower = integration_name.lower()
|
|
139
|
+
for fin_name in _FINANCIAL_INTEGRATIONS:
|
|
140
|
+
if fin_name in name_lower:
|
|
141
|
+
labels.add(SensitivityLabel.FINANCIAL)
|
|
142
|
+
break
|
|
143
|
+
|
|
144
|
+
# Sub-services may indicate financial (e.g. "AWS SQS" vs "AWS S3")
|
|
145
|
+
if metadata:
|
|
146
|
+
for svc in metadata.get("services", []):
|
|
147
|
+
svc_lower = svc.lower()
|
|
148
|
+
if any(f in svc_lower for f in ("payment", "billing", "invoice")):
|
|
149
|
+
labels.add(SensitivityLabel.FINANCIAL)
|
|
150
|
+
|
|
151
|
+
return sorted(labels) if labels else [SensitivityLabel.GENERAL]
|
|
152
|
+
|
|
153
|
+
def classify_sql_pattern(self, sql_value: str) -> list[str]:
|
|
154
|
+
"""Return sensitivity labels based on SQL table/column names."""
|
|
155
|
+
labels: set[str] = set()
|
|
156
|
+
|
|
157
|
+
if _PII_TABLE_PATTERNS.search(sql_value):
|
|
158
|
+
labels.add(SensitivityLabel.PII)
|
|
159
|
+
if _FINANCIAL_TABLE_PATTERNS.search(sql_value):
|
|
160
|
+
labels.add(SensitivityLabel.FINANCIAL)
|
|
161
|
+
if _AUTH_TABLE_PATTERNS.search(sql_value):
|
|
162
|
+
labels.add(SensitivityLabel.AUTH_CREDENTIAL)
|
|
163
|
+
|
|
164
|
+
return sorted(labels)
|
|
165
|
+
|
|
166
|
+
def classify_env_var(self, var_name: str) -> list[str]:
|
|
167
|
+
"""Return sensitivity labels based on env variable name."""
|
|
168
|
+
labels: set[str] = set()
|
|
169
|
+
|
|
170
|
+
if _SECRET_VAR_PATTERNS.search(var_name):
|
|
171
|
+
labels.add(SensitivityLabel.SECRETS)
|
|
172
|
+
if _FINANCIAL_VAR_PATTERNS.search(var_name):
|
|
173
|
+
labels.add(SensitivityLabel.FINANCIAL)
|
|
174
|
+
if _PII_VAR_PATTERNS.search(var_name):
|
|
175
|
+
labels.add(SensitivityLabel.PII)
|
|
176
|
+
|
|
177
|
+
return sorted(labels)
|
|
178
|
+
|
|
179
|
+
def classify_connection_string(self, scheme: str) -> list[str]:
|
|
180
|
+
"""Return sensitivity labels based on a connection string scheme."""
|
|
181
|
+
labels: set[str] = set()
|
|
182
|
+
scheme_lower = scheme.lower()
|
|
183
|
+
|
|
184
|
+
labels.add(SensitivityLabel.AUTH_CREDENTIAL)
|
|
185
|
+
|
|
186
|
+
if any(db in scheme_lower for db in ("postgres", "mysql", "sqlite", "mongodb")):
|
|
187
|
+
labels.add(SensitivityLabel.PII)
|
|
188
|
+
if "redis" in scheme_lower:
|
|
189
|
+
labels.add(SensitivityLabel.INTERNAL)
|
|
190
|
+
if any(mq in scheme_lower for mq in ("amqp", "kafka", "nats")):
|
|
191
|
+
labels.add(SensitivityLabel.INTERNAL)
|
|
192
|
+
|
|
193
|
+
return sorted(labels)
|
|
194
|
+
|
|
195
|
+
# ------------------------------------------------------------------
|
|
196
|
+
# Batch operations
|
|
197
|
+
# ------------------------------------------------------------------
|
|
198
|
+
|
|
199
|
+
def enrich_integrations(
|
|
200
|
+
self,
|
|
201
|
+
integrations: list[IntegrationModel],
|
|
202
|
+
) -> None:
|
|
203
|
+
"""Mutate integration models in-place, adding sensitivity_labels."""
|
|
204
|
+
for integration in integrations:
|
|
205
|
+
labels = self.classify_integration(
|
|
206
|
+
integration.type,
|
|
207
|
+
integration.name,
|
|
208
|
+
integration.metadata,
|
|
209
|
+
)
|
|
210
|
+
integration.sensitivity_labels = labels
|
|
211
|
+
|
|
212
|
+
def enrich_sql_patterns(
|
|
213
|
+
self,
|
|
214
|
+
sql_patterns: list[LiteralPatternModel],
|
|
215
|
+
) -> None:
|
|
216
|
+
"""Annotate SQL literal patterns with sensitivity in notes."""
|
|
217
|
+
for pattern in sql_patterns:
|
|
218
|
+
if not pattern.value:
|
|
219
|
+
continue
|
|
220
|
+
labels = self.classify_sql_pattern(pattern.value)
|
|
221
|
+
if labels:
|
|
222
|
+
existing = pattern.notes or ""
|
|
223
|
+
sensitivity_str = f"sensitivity: {', '.join(labels)}"
|
|
224
|
+
pattern.notes = f"{existing}; {sensitivity_str}" if existing else sensitivity_str
|