prooflayer-runtime 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. prooflayer/__init__.py +50 -0
  2. prooflayer/cli.py +362 -0
  3. prooflayer/config/__init__.py +6 -0
  4. prooflayer/config/allowlist.py +138 -0
  5. prooflayer/config/loader.py +29 -0
  6. prooflayer/detection/__init__.py +21 -0
  7. prooflayer/detection/engine.py +783 -0
  8. prooflayer/detection/models.py +49 -0
  9. prooflayer/detection/normalizer.py +245 -0
  10. prooflayer/detection/rules.py +104 -0
  11. prooflayer/detection/scanner.py +160 -0
  12. prooflayer/detection/scorer.py +65 -0
  13. prooflayer/detection/semantic.py +73 -0
  14. prooflayer/metrics.py +266 -0
  15. prooflayer/reporting/__init__.py +5 -0
  16. prooflayer/reporting/reporter.py +190 -0
  17. prooflayer/response/__init__.py +6 -0
  18. prooflayer/response/actions.py +152 -0
  19. prooflayer/response/killer.py +73 -0
  20. prooflayer/rules/command-injection.yaml +123 -0
  21. prooflayer/rules/data-exfiltration.yaml +83 -0
  22. prooflayer/rules/jailbreaks.yaml +67 -0
  23. prooflayer/rules/prompt-injection.yaml +99 -0
  24. prooflayer/rules/role-manipulation.yaml +60 -0
  25. prooflayer/rules/sql-injection.yaml +51 -0
  26. prooflayer/rules/ssrf-xxe.yaml +51 -0
  27. prooflayer/rules/tool-poisoning.yaml +46 -0
  28. prooflayer/runtime/__init__.py +21 -0
  29. prooflayer/runtime/interceptor.py +91 -0
  30. prooflayer/runtime/mcp_wrapper.py +395 -0
  31. prooflayer/runtime/middleware.py +86 -0
  32. prooflayer/runtime/transport.py +306 -0
  33. prooflayer/runtime/wrapper.py +265 -0
  34. prooflayer/utils/__init__.py +21 -0
  35. prooflayer/utils/encoding.py +87 -0
  36. prooflayer/utils/entropy.py +51 -0
  37. prooflayer/utils/logging.py +86 -0
  38. prooflayer/utils/masking.py +72 -0
  39. prooflayer/version.py +6 -0
  40. prooflayer_runtime-0.1.0.dist-info/METADATA +266 -0
  41. prooflayer_runtime-0.1.0.dist-info/RECORD +45 -0
  42. prooflayer_runtime-0.1.0.dist-info/WHEEL +5 -0
  43. prooflayer_runtime-0.1.0.dist-info/entry_points.txt +2 -0
  44. prooflayer_runtime-0.1.0.dist-info/licenses/LICENSE +4 -0
  45. prooflayer_runtime-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,306 @@
1
+ """
2
+ ProofLayer Transport Proxy
3
+ ===========================
4
+
5
+ HTTP reverse proxy that intercepts MCP JSON-RPC tool calls
6
+ for security scanning. Designed for Rick Spencer's simple-mcp
7
+ (Go-based MCP server that speaks HTTP).
8
+
9
+ Usage:
10
+ proxy = ProofLayerTransportProxy(listen_port=8080, backend_port=8081)
11
+ proxy.start() # blocking
12
+ # or
13
+ proxy.start_background() # returns Thread
14
+ """
15
+
16
+ import json
17
+ import logging
18
+ import threading
19
+ import time
20
+ from http.server import ThreadingHTTPServer, BaseHTTPRequestHandler
21
+ from typing import Optional, Dict, Any
22
+
23
+ import httpx
24
+
25
+ from ..detection.engine import DetectionEngine
26
+ from ..detection.models import ScanResult
27
+ from ..reporting.reporter import SecurityReporter
28
+ from ..response.actions import ResponseAction, ThreatAction
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ class ProofLayerTransportProxy:
34
+ """HTTP reverse proxy with MCP tool call security scanning."""
35
+
36
+ def __init__(
37
+ self,
38
+ listen_port: int = 8080,
39
+ backend_port: int = 8081,
40
+ backend_host: str = "127.0.0.1",
41
+ detection_engine: Optional[DetectionEngine] = None,
42
+ reporter: Optional[SecurityReporter] = None,
43
+ response_action: Optional[ResponseAction] = None,
44
+ rules_dir: Optional[str] = None,
45
+ report_dir: str = "./security-reports",
46
+ action_on_threat: str = "block",
47
+ kill_on_threat: bool = False,
48
+ ):
49
+ self.listen_port = listen_port
50
+ self.backend_host = backend_host
51
+ self.backend_port = backend_port
52
+ self.kill_on_threat = kill_on_threat
53
+
54
+ # Initialize components (use provided or create defaults)
55
+ self.engine = detection_engine or DetectionEngine(rules_dir=rules_dir)
56
+ self.reporter = reporter or SecurityReporter(report_dir=report_dir)
57
+ self.response_action = response_action or ResponseAction(
58
+ default_action=action_on_threat, reporter=self.reporter
59
+ )
60
+
61
+ self._server: Optional[ThreadingHTTPServer] = None
62
+ self._thread: Optional[threading.Thread] = None
63
+ self._client = httpx.Client(
64
+ base_url=f"http://{backend_host}:{backend_port}",
65
+ timeout=30.0,
66
+ )
67
+
68
+ def start(self):
69
+ """Start the proxy server (blocking)."""
70
+ handler = self._make_handler()
71
+ self._server = ThreadingHTTPServer(("0.0.0.0", self.listen_port), handler)
72
+ logger.info(
73
+ "ProofLayer proxy listening on :%d, forwarding to %s:%d",
74
+ self.listen_port, self.backend_host, self.backend_port,
75
+ )
76
+ try:
77
+ self._server.serve_forever()
78
+ except KeyboardInterrupt:
79
+ self.stop()
80
+
81
+ def start_background(self) -> threading.Thread:
82
+ """Start the proxy in a background thread. Returns the thread."""
83
+ handler = self._make_handler()
84
+ self._server = ThreadingHTTPServer(("0.0.0.0", self.listen_port), handler)
85
+ self._thread = threading.Thread(target=self._server.serve_forever, daemon=True)
86
+ self._thread.start()
87
+ logger.info(
88
+ "ProofLayer proxy started in background on :%d -> %s:%d",
89
+ self.listen_port, self.backend_host, self.backend_port,
90
+ )
91
+ return self._thread
92
+
93
+ def stop(self):
94
+ """Stop the proxy server."""
95
+ if self._server:
96
+ self._server.shutdown()
97
+ self._server = None
98
+ if self._client:
99
+ self._client.close()
100
+ logger.info("ProofLayer proxy stopped")
101
+
102
+ def _make_handler(self):
103
+ """Create the request handler class with access to proxy instance."""
104
+ proxy = self
105
+
106
+ class ProxyHandler(BaseHTTPRequestHandler):
107
+ def do_POST(self):
108
+ content_length = int(self.headers.get("Content-Length", 0))
109
+ body = self.rfile.read(content_length) if content_length else b""
110
+
111
+ # Try to parse as JSON-RPC
112
+ try:
113
+ payload = json.loads(body) if body else None
114
+ except (json.JSONDecodeError, UnicodeDecodeError):
115
+ payload = None
116
+
117
+ # Handle batch JSON-RPC (array of requests)
118
+ if isinstance(payload, list):
119
+ batch_responses = [None] * len(payload)
120
+ items_to_forward = [] # (index, item) pairs
121
+
122
+ for i, item in enumerate(payload):
123
+ blocked, response = proxy._check_tool_call(item)
124
+ if blocked:
125
+ batch_responses[i] = response
126
+ else:
127
+ items_to_forward.append((i, item))
128
+
129
+ # Forward non-blocked items individually to backend
130
+ for idx, item in items_to_forward:
131
+ try:
132
+ item_body = json.dumps(item).encode("utf-8")
133
+ backend_response = proxy._client.post(
134
+ self.path,
135
+ content=item_body,
136
+ headers={
137
+ k: v for k, v in self.headers.items()
138
+ if k.lower() not in ("host", "content-length")
139
+ },
140
+ )
141
+ try:
142
+ batch_responses[idx] = json.loads(backend_response.content)
143
+ except (json.JSONDecodeError, UnicodeDecodeError):
144
+ batch_responses[idx] = {
145
+ "jsonrpc": "2.0",
146
+ "error": {"code": -32000, "message": "Invalid backend response"},
147
+ "id": item.get("id") if isinstance(item, dict) else None,
148
+ }
149
+ except httpx.ConnectError:
150
+ batch_responses[idx] = {
151
+ "jsonrpc": "2.0",
152
+ "error": {"code": -32000, "message": "Backend unavailable"},
153
+ "id": item.get("id") if isinstance(item, dict) else None,
154
+ }
155
+ except httpx.TimeoutException:
156
+ batch_responses[idx] = {
157
+ "jsonrpc": "2.0",
158
+ "error": {"code": -32000, "message": "Backend timeout"},
159
+ "id": item.get("id") if isinstance(item, dict) else None,
160
+ }
161
+
162
+ self._send_json(200, batch_responses)
163
+ return
164
+
165
+ elif isinstance(payload, dict):
166
+ blocked, response = proxy._check_tool_call(payload)
167
+ if blocked:
168
+ self._send_json(200, response)
169
+ return
170
+
171
+ # Forward the request to backend
172
+ try:
173
+ backend_response = proxy._client.post(
174
+ self.path,
175
+ content=body,
176
+ headers={
177
+ k: v for k, v in self.headers.items()
178
+ if k.lower() not in ("host", "content-length")
179
+ },
180
+ )
181
+ self.send_response(backend_response.status_code)
182
+ for key, value in backend_response.headers.items():
183
+ if key.lower() not in ("transfer-encoding", "content-encoding", "content-length"):
184
+ self.send_header(key, value)
185
+ response_body = backend_response.content
186
+ self.send_header("Content-Length", str(len(response_body)))
187
+ self.end_headers()
188
+ self.wfile.write(response_body)
189
+ except httpx.ConnectError:
190
+ self._send_json(502, {
191
+ "jsonrpc": "2.0",
192
+ "error": {"code": -32000, "message": "Backend unavailable"},
193
+ "id": payload.get("id") if isinstance(payload, dict) else None,
194
+ })
195
+ except httpx.TimeoutException:
196
+ self._send_json(504, {
197
+ "jsonrpc": "2.0",
198
+ "error": {"code": -32000, "message": "Backend timeout"},
199
+ "id": payload.get("id") if isinstance(payload, dict) else None,
200
+ })
201
+
202
+ def do_GET(self):
203
+ """Forward GET requests transparently."""
204
+ try:
205
+ backend_response = proxy._client.get(
206
+ self.path,
207
+ headers={
208
+ k: v for k, v in self.headers.items()
209
+ if k.lower() not in ("host",)
210
+ },
211
+ )
212
+ self.send_response(backend_response.status_code)
213
+ for key, value in backend_response.headers.items():
214
+ if key.lower() not in ("transfer-encoding", "content-encoding", "content-length"):
215
+ self.send_header(key, value)
216
+ response_body = backend_response.content
217
+ self.send_header("Content-Length", str(len(response_body)))
218
+ self.end_headers()
219
+ self.wfile.write(response_body)
220
+ except httpx.ConnectError:
221
+ self.send_response(502)
222
+ self.end_headers()
223
+
224
+ def _send_json(self, status_code, data):
225
+ body = json.dumps(data).encode("utf-8")
226
+ self.send_response(status_code)
227
+ self.send_header("Content-Type", "application/json")
228
+ self.send_header("Content-Length", str(len(body)))
229
+ self.end_headers()
230
+ self.wfile.write(body)
231
+
232
+ def log_message(self, format, *args):
233
+ """Route access logs through Python logging."""
234
+ logger.debug("Proxy: %s", format % args)
235
+
236
+ return ProxyHandler
237
+
238
+ def _check_tool_call(self, payload: dict) -> tuple:
239
+ """
240
+ Check if a JSON-RPC payload is a tools/call and scan it.
241
+
242
+ Returns:
243
+ (blocked: bool, response: dict or None)
244
+ """
245
+ # Only intercept tools/call JSON-RPC method
246
+ method = payload.get("method", "")
247
+ if method != "tools/call":
248
+ return False, None
249
+
250
+ params = payload.get("params", {})
251
+ tool_name = params.get("name", "")
252
+ arguments = params.get("arguments", {})
253
+ request_id = payload.get("id")
254
+
255
+ if not tool_name:
256
+ return False, None
257
+
258
+ # Run detection
259
+ result = self.engine.scan(tool_name=tool_name, arguments=arguments)
260
+
261
+ if result.score >= self.engine.score_threshold["block"][0]:
262
+ # THREAT - block
263
+ threat_type = "unknown"
264
+ if result.matched_rules:
265
+ top_rule = max(result.matched_rules, key=lambda r: r.score)
266
+ threat_type = top_rule.category
267
+
268
+ self.reporter.generate_report(
269
+ threat_type=threat_type,
270
+ tool_name=tool_name,
271
+ arguments=arguments,
272
+ risk_score=result.score,
273
+ matched_rules=result.matched_rules,
274
+ action="BLOCK",
275
+ scan_result=result,
276
+ )
277
+
278
+ logger.warning(
279
+ "BLOCKED tool call via proxy: %s (score=%d, rules=%s)",
280
+ tool_name, result.score,
281
+ [r.id for r in result.matched_rules],
282
+ )
283
+
284
+ blocked_response = {
285
+ "jsonrpc": "2.0",
286
+ "result": {
287
+ "content": [
288
+ {
289
+ "type": "text",
290
+ "text": f"Tool call blocked by ProofLayer: {tool_name} "
291
+ f"(risk score: {result.score})",
292
+ }
293
+ ],
294
+ "isError": True,
295
+ },
296
+ "id": request_id,
297
+ }
298
+ return True, blocked_response
299
+
300
+ if result.score >= self.engine.score_threshold["warn"][0]:
301
+ logger.warning(
302
+ "SUSPICIOUS tool call via proxy: %s (score=%d)",
303
+ tool_name, result.score,
304
+ )
305
+
306
+ return False, None
@@ -0,0 +1,265 @@
1
+ """
2
+ ProofLayer Runtime Wrapper
3
+ ===========================
4
+
5
+ Wraps MCP servers with runtime security monitoring.
6
+ """
7
+
8
+ import os
9
+ import sys
10
+ import json
11
+ import logging
12
+ from typing import Any, Dict, Optional, Callable, Tuple
13
+ from pathlib import Path
14
+
15
+ from ..detection.engine import DetectionEngine
16
+ from ..response.actions import ResponseAction, ThreatAction
17
+ from ..reporting.reporter import SecurityReporter
18
+ from ..config.loader import ConfigLoader
19
+ from ..utils.logging import configure_logging
20
+ from ..metrics import metrics, start_metrics_server
21
+
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class ProofLayerRuntime:
27
+ """
28
+ Runtime security wrapper for MCP servers.
29
+
30
+ Usage:
31
+ runtime = ProofLayerRuntime(config_path="prooflayer.yaml")
32
+ protected_server = runtime.wrap(mcp_server)
33
+ protected_server.run()
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ config_path: Optional[str] = None,
39
+ detection_rules: Optional[str] = None,
40
+ action_on_threat: str = "kill",
41
+ report_dir: Optional[str] = None,
42
+ score_threshold: Optional[Dict[str, Tuple[int, ...]]] = None
43
+ ):
44
+ """
45
+ Initialize ProofLayer Runtime.
46
+
47
+ Args:
48
+ config_path: Path to YAML config file
49
+ detection_rules: Rules to load ("prompt-injection", "all")
50
+ action_on_threat: Action on threat detection ("allow", "warn", "block", "kill")
51
+ report_dir: Directory for security reports
52
+ score_threshold: Dict with allow/warn/block ranges
53
+ """
54
+ self.config = self._load_config(config_path)
55
+
56
+ # Override config with explicit parameters
57
+ if detection_rules:
58
+ self.config["detection"]["rules"] = detection_rules
59
+ if action_on_threat:
60
+ self.config["response"]["on_threat"] = action_on_threat
61
+ if report_dir:
62
+ self.config["response"]["report_dir"] = report_dir
63
+ if score_threshold:
64
+ self.config["detection"]["score_threshold"] = score_threshold
65
+
66
+ # Initialize components
67
+ self.detection_engine = DetectionEngine(
68
+ rules_dir=self.config["detection"].get("rules_dir"),
69
+ score_threshold=self.config["detection"].get("score_threshold"),
70
+ fail_closed=self.config["detection"].get("fail_closed", True)
71
+ )
72
+
73
+ self.reporter = SecurityReporter(
74
+ report_dir=self.config["response"].get("report_dir", "./security-reports")
75
+ )
76
+
77
+ self.response_action = ResponseAction(
78
+ default_action=self.config["response"].get("on_threat", "warn"),
79
+ reporter=self.reporter
80
+ )
81
+
82
+ # Apply structured logging from config
83
+ log_cfg = self.config.get("logging", {})
84
+ configure_logging(
85
+ level=log_cfg.get("level", "INFO"),
86
+ log_format=log_cfg.get("format", "text"),
87
+ )
88
+
89
+ # Enable metrics if configured
90
+ metrics_cfg = self.config.get("metrics", {})
91
+ if metrics_cfg.get("enabled", False):
92
+ metrics.enabled = True
93
+ metrics_port = metrics_cfg.get("port", 9090)
94
+ start_metrics_server(port=metrics_port)
95
+
96
+ logger.info(f"ProofLayer Runtime v0.1.0 initialized")
97
+ logger.info(f"Detection rules loaded: {len(self.detection_engine.rules)}")
98
+ logger.info(f"Default action on threat: {self.response_action.default_action}")
99
+
100
+ def _load_config(self, config_path: Optional[str]) -> Dict[str, Any]:
101
+ """Load configuration from file or use defaults."""
102
+ if config_path and os.path.exists(config_path):
103
+ return ConfigLoader.load(config_path)
104
+
105
+ # Default configuration
106
+ return {
107
+ "detection": {
108
+ "enabled": True,
109
+ "rules_dir": None, # Will use packaged rules
110
+ "score_threshold": {
111
+ "allow": (0, 29),
112
+ "warn": (30, 69),
113
+ "block": (70, 100)
114
+ }
115
+ },
116
+ "response": {
117
+ "on_threat": "warn", # Conservative default
118
+ "report_dir": "./security-reports",
119
+ "alert_webhook": None
120
+ },
121
+ "performance": {
122
+ "max_latency_ms": 10,
123
+ "cache_rules": True
124
+ },
125
+ "logging": {
126
+ "level": "INFO",
127
+ "format": "json"
128
+ }
129
+ }
130
+
131
+ def wrap(self, mcp_server: Any) -> "ProtectedMCPServer":
132
+ """
133
+ Wrap an MCP server with ProofLayer security.
134
+
135
+ Args:
136
+ mcp_server: Original MCP server instance
137
+
138
+ Returns:
139
+ Protected MCP server with runtime security
140
+ """
141
+ return ProtectedMCPServer(
142
+ mcp_server=mcp_server,
143
+ detection_engine=self.detection_engine,
144
+ response_action=self.response_action,
145
+ reporter=self.reporter
146
+ )
147
+
148
+ def scan_tool_call(
149
+ self,
150
+ tool_name: str,
151
+ arguments: Dict[str, Any],
152
+ context: Optional[Dict[str, Any]] = None
153
+ ) -> Tuple[int, ThreatAction, Dict[str, Any]]:
154
+ """
155
+ Scan a single MCP tool call for threats.
156
+
157
+ Args:
158
+ tool_name: Name of the MCP tool
159
+ arguments: Tool call arguments
160
+ context: Additional context (message ID, timestamp, etc.)
161
+
162
+ Returns:
163
+ Tuple of (risk_score, action, detection_details)
164
+ """
165
+ # Run detection
166
+ risk_score, matched_rules = self.detection_engine.scan(
167
+ tool_name=tool_name,
168
+ arguments=arguments
169
+ )
170
+
171
+ # Determine action based on score
172
+ if risk_score <= self.config["detection"]["score_threshold"]["allow"][1]:
173
+ action = ThreatAction.ALLOW
174
+ elif risk_score <= self.config["detection"]["score_threshold"]["warn"][1]:
175
+ action = ThreatAction.WARN
176
+ else:
177
+ action = ThreatAction.BLOCK
178
+
179
+ detection_details = {
180
+ "risk_score": risk_score,
181
+ "matched_rules": [rule.id for rule in matched_rules],
182
+ "confidence": "HIGH" if risk_score > 70 else "MEDIUM" if risk_score > 30 else "LOW"
183
+ }
184
+
185
+ return risk_score, action, detection_details
186
+
187
+
188
+ class ProtectedMCPServer:
189
+ """
190
+ MCP server wrapped with ProofLayer runtime security.
191
+ """
192
+
193
+ def __init__(
194
+ self,
195
+ mcp_server: Any,
196
+ detection_engine: DetectionEngine,
197
+ response_action: ResponseAction,
198
+ reporter: SecurityReporter
199
+ ):
200
+ self.mcp_server = mcp_server
201
+ self.detection_engine = detection_engine
202
+ self.response_action = response_action
203
+ self.reporter = reporter
204
+
205
+ # Wrap the original tool call handler
206
+ self._wrap_tool_handlers()
207
+
208
+ def _wrap_tool_handlers(self):
209
+ """Intercept MCP tool calls and inject security scanning."""
210
+ original_call_tool = getattr(self.mcp_server, "call_tool", None)
211
+
212
+ if original_call_tool:
213
+ def wrapped_call_tool(tool_name: str, arguments: Dict[str, Any]):
214
+ # Security scan before execution
215
+ risk_score, matched_rules = self.detection_engine.scan(
216
+ tool_name=tool_name,
217
+ arguments=arguments
218
+ )
219
+
220
+ # Determine action
221
+ action = self.response_action.decide_action(risk_score)
222
+
223
+ if action == ThreatAction.BLOCK or action == ThreatAction.KILL:
224
+ # Extract threat type from highest-scoring matched rule
225
+ threat_type = "unknown"
226
+ if matched_rules:
227
+ top_rule = max(matched_rules, key=lambda r: r.score)
228
+ threat_type = top_rule.category
229
+
230
+ # Generate security report
231
+ report = self.reporter.generate_report(
232
+ threat_type=threat_type,
233
+ tool_name=tool_name,
234
+ arguments=arguments,
235
+ risk_score=risk_score,
236
+ matched_rules=matched_rules,
237
+ action=action.value
238
+ )
239
+
240
+ logger.error(f"THREAT DETECTED: {tool_name} - Score: {risk_score} - Action: {action.value}")
241
+
242
+ if action == ThreatAction.KILL:
243
+ # Kill the MCP server
244
+ self.response_action.kill_server(report)
245
+
246
+ raise SecurityError(f"Tool call blocked: {tool_name} (risk score: {risk_score})")
247
+
248
+ elif action == ThreatAction.WARN:
249
+ logger.warning(f"SUSPICIOUS: {tool_name} - Score: {risk_score}")
250
+
251
+ # Allow the call to proceed
252
+ return original_call_tool(tool_name, arguments)
253
+
254
+ # Replace the original method
255
+ setattr(self.mcp_server, "call_tool", wrapped_call_tool)
256
+
257
+ def run(self):
258
+ """Run the protected MCP server."""
259
+ logger.info("Starting ProofLayer-protected MCP server")
260
+ return self.mcp_server.run()
261
+
262
+
263
+ class SecurityError(Exception):
264
+ """Raised when a security threat is detected."""
265
+ pass
@@ -0,0 +1,21 @@
1
+ """Utility functions."""
2
+
3
+ from .entropy import calculate_shannon_entropy
4
+ from .masking import mask_sensitive_data
5
+ from .encoding import (
6
+ decode_hex_escapes,
7
+ decode_octal_escapes,
8
+ decode_unicode_escapes,
9
+ decode_url_encoding,
10
+ decode_base64_payloads,
11
+ )
12
+
13
+ __all__ = [
14
+ "calculate_shannon_entropy",
15
+ "mask_sensitive_data",
16
+ "decode_hex_escapes",
17
+ "decode_octal_escapes",
18
+ "decode_unicode_escapes",
19
+ "decode_url_encoding",
20
+ "decode_base64_payloads",
21
+ ]
@@ -0,0 +1,87 @@
1
+ """
2
+ Encoding Utilities
3
+ ==================
4
+
5
+ Decode various encoding evasion techniques used to bypass detection.
6
+ """
7
+
8
+ import re
9
+ import base64
10
+ import logging
11
+ from typing import List
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # Pre-compiled regex patterns for encoding detection
16
+ HEX_ESCAPE_RE = re.compile(r"\\x([0-9a-fA-F]{2})")
17
+ OCTAL_ESCAPE_RE = re.compile(r"\\([0-7]{1,3})")
18
+ UNICODE_ESCAPE_RE = re.compile(r"\\u([0-9a-fA-F]{4})")
19
+ URL_ENCODE_RE = re.compile(r"%([0-9a-fA-F]{2})")
20
+ BASE64_RE = re.compile(r"(?:[A-Za-z0-9+/]{4}){1,}(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?")
21
+
22
+
23
+ def decode_hex_escapes(text: str) -> str:
24
+ """Decode hex escape sequences like \\x63\\x75\\x72\\x6c -> curl."""
25
+ def _replace(match):
26
+ try:
27
+ return chr(int(match.group(1), 16))
28
+ except (ValueError, OverflowError):
29
+ return match.group(0)
30
+ return HEX_ESCAPE_RE.sub(_replace, text)
31
+
32
+
33
+ def decode_octal_escapes(text: str) -> str:
34
+ """Decode octal escape sequences like \\143\\165\\162\\154 -> curl."""
35
+ def _replace(match):
36
+ try:
37
+ value = int(match.group(1), 8)
38
+ if value <= 0x10FFFF:
39
+ return chr(value)
40
+ except (ValueError, OverflowError):
41
+ pass
42
+ return match.group(0)
43
+ return OCTAL_ESCAPE_RE.sub(_replace, text)
44
+
45
+
46
+ def decode_unicode_escapes(text: str) -> str:
47
+ """Decode unicode escape sequences like \\u0063\\u0075\\u0072\\u006c -> curl."""
48
+ def _replace(match):
49
+ try:
50
+ return chr(int(match.group(1), 16))
51
+ except (ValueError, OverflowError):
52
+ return match.group(0)
53
+ return UNICODE_ESCAPE_RE.sub(_replace, text)
54
+
55
+
56
+ def decode_url_encoding(text: str) -> str:
57
+ """Decode URL-encoded sequences like %63%75%72%6c -> curl."""
58
+ def _replace(match):
59
+ try:
60
+ return chr(int(match.group(1), 16))
61
+ except (ValueError, OverflowError):
62
+ return match.group(0)
63
+ return URL_ENCODE_RE.sub(_replace, text)
64
+
65
+
66
+ def decode_base64_payloads(text: str) -> str:
67
+ """
68
+ Detect and decode base64-encoded segments within text.
69
+
70
+ Prepends decoded content before the original so both are scanned.
71
+ Only decodes segments that produce valid UTF-8 text.
72
+ """
73
+ decoded_parts: List[str] = []
74
+ for match in BASE64_RE.finditer(text):
75
+ candidate = match.group(0)
76
+ if len(candidate) < 8:
77
+ continue
78
+ try:
79
+ decoded = base64.b64decode(candidate).decode("utf-8", errors="strict")
80
+ if any(c.isalpha() for c in decoded):
81
+ decoded_parts.append(decoded)
82
+ except Exception:
83
+ continue
84
+
85
+ if decoded_parts:
86
+ return " ".join(decoded_parts) + " " + text
87
+ return text