iflow-mcp_developermode-korea_reversecore-mcp 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/METADATA +543 -0
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/RECORD +79 -0
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/WHEEL +5 -0
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/entry_points.txt +2 -0
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/licenses/LICENSE +21 -0
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/top_level.txt +1 -0
- reversecore_mcp/__init__.py +9 -0
- reversecore_mcp/core/__init__.py +78 -0
- reversecore_mcp/core/audit.py +101 -0
- reversecore_mcp/core/binary_cache.py +138 -0
- reversecore_mcp/core/command_spec.py +357 -0
- reversecore_mcp/core/config.py +432 -0
- reversecore_mcp/core/container.py +288 -0
- reversecore_mcp/core/decorators.py +152 -0
- reversecore_mcp/core/error_formatting.py +93 -0
- reversecore_mcp/core/error_handling.py +142 -0
- reversecore_mcp/core/evidence.py +229 -0
- reversecore_mcp/core/exceptions.py +296 -0
- reversecore_mcp/core/execution.py +240 -0
- reversecore_mcp/core/ghidra.py +642 -0
- reversecore_mcp/core/ghidra_helper.py +481 -0
- reversecore_mcp/core/ghidra_manager.py +234 -0
- reversecore_mcp/core/json_utils.py +131 -0
- reversecore_mcp/core/loader.py +73 -0
- reversecore_mcp/core/logging_config.py +206 -0
- reversecore_mcp/core/memory.py +721 -0
- reversecore_mcp/core/metrics.py +198 -0
- reversecore_mcp/core/mitre_mapper.py +365 -0
- reversecore_mcp/core/plugin.py +45 -0
- reversecore_mcp/core/r2_helpers.py +404 -0
- reversecore_mcp/core/r2_pool.py +403 -0
- reversecore_mcp/core/report_generator.py +268 -0
- reversecore_mcp/core/resilience.py +252 -0
- reversecore_mcp/core/resource_manager.py +169 -0
- reversecore_mcp/core/result.py +132 -0
- reversecore_mcp/core/security.py +213 -0
- reversecore_mcp/core/validators.py +238 -0
- reversecore_mcp/dashboard/__init__.py +221 -0
- reversecore_mcp/prompts/__init__.py +56 -0
- reversecore_mcp/prompts/common.py +24 -0
- reversecore_mcp/prompts/game.py +280 -0
- reversecore_mcp/prompts/malware.py +1219 -0
- reversecore_mcp/prompts/report.py +150 -0
- reversecore_mcp/prompts/security.py +136 -0
- reversecore_mcp/resources.py +329 -0
- reversecore_mcp/server.py +727 -0
- reversecore_mcp/tools/__init__.py +49 -0
- reversecore_mcp/tools/analysis/__init__.py +74 -0
- reversecore_mcp/tools/analysis/capa_tools.py +215 -0
- reversecore_mcp/tools/analysis/die_tools.py +180 -0
- reversecore_mcp/tools/analysis/diff_tools.py +643 -0
- reversecore_mcp/tools/analysis/lief_tools.py +272 -0
- reversecore_mcp/tools/analysis/signature_tools.py +591 -0
- reversecore_mcp/tools/analysis/static_analysis.py +479 -0
- reversecore_mcp/tools/common/__init__.py +58 -0
- reversecore_mcp/tools/common/file_operations.py +352 -0
- reversecore_mcp/tools/common/memory_tools.py +516 -0
- reversecore_mcp/tools/common/patch_explainer.py +230 -0
- reversecore_mcp/tools/common/server_tools.py +115 -0
- reversecore_mcp/tools/ghidra/__init__.py +19 -0
- reversecore_mcp/tools/ghidra/decompilation.py +975 -0
- reversecore_mcp/tools/ghidra/ghidra_tools.py +1052 -0
- reversecore_mcp/tools/malware/__init__.py +61 -0
- reversecore_mcp/tools/malware/adaptive_vaccine.py +579 -0
- reversecore_mcp/tools/malware/dormant_detector.py +756 -0
- reversecore_mcp/tools/malware/ioc_tools.py +228 -0
- reversecore_mcp/tools/malware/vulnerability_hunter.py +519 -0
- reversecore_mcp/tools/malware/yara_tools.py +214 -0
- reversecore_mcp/tools/patch_explainer.py +19 -0
- reversecore_mcp/tools/radare2/__init__.py +13 -0
- reversecore_mcp/tools/radare2/r2_analysis.py +972 -0
- reversecore_mcp/tools/radare2/r2_session.py +376 -0
- reversecore_mcp/tools/radare2/radare2_mcp_tools.py +1183 -0
- reversecore_mcp/tools/report/__init__.py +4 -0
- reversecore_mcp/tools/report/email.py +82 -0
- reversecore_mcp/tools/report/report_mcp_tools.py +344 -0
- reversecore_mcp/tools/report/report_tools.py +1076 -0
- reversecore_mcp/tools/report/session.py +194 -0
- reversecore_mcp/tools/report_tools.py +11 -0
|
@@ -0,0 +1,972 @@
|
|
|
1
|
+
"""Radare2-based analysis tools for binary analysis, cross-references, and execution tracing."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from async_lru import alru_cache
|
|
8
|
+
from fastmcp import Context
|
|
9
|
+
|
|
10
|
+
# Use high-performance JSON implementation (3-5x faster)
|
|
11
|
+
from reversecore_mcp.core import json_utils as json
|
|
12
|
+
from reversecore_mcp.core.command_spec import validate_r2_command
|
|
13
|
+
from reversecore_mcp.core.config import get_config
|
|
14
|
+
from reversecore_mcp.core.decorators import log_execution
|
|
15
|
+
from reversecore_mcp.core.error_handling import handle_tool_errors
|
|
16
|
+
from reversecore_mcp.core.execution import execute_subprocess_async # For test compatibility
|
|
17
|
+
from reversecore_mcp.core.metrics import track_metrics
|
|
18
|
+
from reversecore_mcp.core.r2_helpers import (
|
|
19
|
+
build_r2_cmd as _build_r2_cmd,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# Import shared R2 helper functions from core (avoids circular dependencies)
|
|
23
|
+
from reversecore_mcp.core.r2_helpers import (
|
|
24
|
+
calculate_dynamic_timeout,
|
|
25
|
+
remove_analysis_commands,
|
|
26
|
+
)
|
|
27
|
+
from reversecore_mcp.core.r2_helpers import (
|
|
28
|
+
escape_mermaid_chars as _escape_mermaid_chars,
|
|
29
|
+
)
|
|
30
|
+
from reversecore_mcp.core.r2_helpers import (
|
|
31
|
+
execute_r2_command as _execute_r2_command,
|
|
32
|
+
)
|
|
33
|
+
from reversecore_mcp.core.r2_helpers import (
|
|
34
|
+
parse_json_output as _parse_json_output,
|
|
35
|
+
)
|
|
36
|
+
from reversecore_mcp.core.r2_helpers import (
|
|
37
|
+
strip_address_prefixes as _strip_address_prefixes,
|
|
38
|
+
)
|
|
39
|
+
from reversecore_mcp.core.resilience import circuit_breaker
|
|
40
|
+
from reversecore_mcp.core.result import ToolResult, failure, success
|
|
41
|
+
from reversecore_mcp.core.security import validate_file_path
|
|
42
|
+
from reversecore_mcp.core.validators import (
|
|
43
|
+
_ADDRESS_PATTERN, # OPTIMIZATION: Import pre-compiled pattern instead of duplicating
|
|
44
|
+
validate_tool_parameters,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# Load default timeout from configuration
|
|
48
|
+
DEFAULT_TIMEOUT = get_config().default_tool_timeout
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@log_execution(tool_name="run_radare2")
|
|
52
|
+
@track_metrics("run_radare2")
|
|
53
|
+
@circuit_breaker("run_radare2", failure_threshold=5, recovery_timeout=60)
|
|
54
|
+
@handle_tool_errors
|
|
55
|
+
async def run_radare2(
|
|
56
|
+
file_path: str,
|
|
57
|
+
r2_command: str,
|
|
58
|
+
max_output_size: int = 10_000_000,
|
|
59
|
+
timeout: int = DEFAULT_TIMEOUT,
|
|
60
|
+
ctx: Context = None,
|
|
61
|
+
) -> ToolResult:
|
|
62
|
+
"""Execute vetted radare2 commands for binary triage."""
|
|
63
|
+
|
|
64
|
+
validate_tool_parameters("run_radare2", {"r2_command": r2_command})
|
|
65
|
+
validated_path = validate_file_path(file_path)
|
|
66
|
+
validated_command = validate_r2_command(r2_command)
|
|
67
|
+
|
|
68
|
+
# Adaptive analysis logic based on command type and file size
|
|
69
|
+
# Use 'aa' for basic commands, 'aaa' for analysis-heavy commands on small files
|
|
70
|
+
analysis_level = "aa"
|
|
71
|
+
|
|
72
|
+
# Simple information commands don't need analysis
|
|
73
|
+
simple_commands = ["i", "iI", "iz", "izj", "il", "is", "isj", "ie", "it", "iS", "iSj"]
|
|
74
|
+
if validated_command in simple_commands or validated_command.startswith("i "):
|
|
75
|
+
analysis_level = "-n"
|
|
76
|
+
|
|
77
|
+
# Function listing commands (afl, aflj) benefit from deeper analysis
|
|
78
|
+
# but only if file is small enough
|
|
79
|
+
function_commands = ["afl", "aflj", "afll", "afllj", "pdf", "pdr"]
|
|
80
|
+
if any(cmd in validated_command for cmd in function_commands):
|
|
81
|
+
try:
|
|
82
|
+
file_size_mb = os.path.getsize(validated_path) / (1024 * 1024)
|
|
83
|
+
if file_size_mb < 10: # For files under 10MB, use deeper analysis
|
|
84
|
+
analysis_level = "aaa"
|
|
85
|
+
except OSError:
|
|
86
|
+
pass
|
|
87
|
+
|
|
88
|
+
# If user explicitly requested analysis, handle it via caching
|
|
89
|
+
if "aaa" in validated_command or "aa" in validated_command:
|
|
90
|
+
# Remove explicit analysis commands as they are handled by _build_r2_cmd
|
|
91
|
+
validated_command = remove_analysis_commands(validated_command)
|
|
92
|
+
|
|
93
|
+
# Use helper function to execute radare2 command
|
|
94
|
+
try:
|
|
95
|
+
output, bytes_read = await _execute_r2_command(
|
|
96
|
+
validated_path,
|
|
97
|
+
[validated_command],
|
|
98
|
+
analysis_level=analysis_level,
|
|
99
|
+
max_output_size=max_output_size,
|
|
100
|
+
base_timeout=timeout,
|
|
101
|
+
)
|
|
102
|
+
return success(output, bytes_read=bytes_read, analysis_level=analysis_level)
|
|
103
|
+
except Exception as e:
|
|
104
|
+
# Log error to client if context is available
|
|
105
|
+
if ctx:
|
|
106
|
+
await ctx.error(f"radare2 command '{validated_command}' failed: {str(e)}")
|
|
107
|
+
raise
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
# Note: R2AnalysisPlugin has been removed.
|
|
111
|
+
# All tools (run_radare2, trace_execution_path, generate_function_graph, analyze_xrefs)
|
|
112
|
+
# are now registered via Radare2ToolsPlugin in radare2_mcp_tools.py for unified management.
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
# Dangerous sink APIs for prioritized path tracing
|
|
116
|
+
_DANGEROUS_SINKS = frozenset(
|
|
117
|
+
{
|
|
118
|
+
# Command execution
|
|
119
|
+
"system",
|
|
120
|
+
"execve",
|
|
121
|
+
"execl",
|
|
122
|
+
"execlp",
|
|
123
|
+
"execle",
|
|
124
|
+
"execv",
|
|
125
|
+
"execvp",
|
|
126
|
+
"execvpe",
|
|
127
|
+
"popen",
|
|
128
|
+
"_popen",
|
|
129
|
+
"ShellExecute",
|
|
130
|
+
"ShellExecuteEx",
|
|
131
|
+
"CreateProcess",
|
|
132
|
+
"WinExec",
|
|
133
|
+
"spawn",
|
|
134
|
+
"fork",
|
|
135
|
+
# Memory corruption
|
|
136
|
+
"strcpy",
|
|
137
|
+
"strcat",
|
|
138
|
+
"sprintf",
|
|
139
|
+
"vsprintf",
|
|
140
|
+
"gets",
|
|
141
|
+
"scanf",
|
|
142
|
+
"memcpy",
|
|
143
|
+
"memmove",
|
|
144
|
+
"strncpy",
|
|
145
|
+
# File operations
|
|
146
|
+
"fopen",
|
|
147
|
+
"open",
|
|
148
|
+
"CreateFile",
|
|
149
|
+
"DeleteFile",
|
|
150
|
+
"WriteFile",
|
|
151
|
+
# Network
|
|
152
|
+
"connect",
|
|
153
|
+
"send",
|
|
154
|
+
"recv",
|
|
155
|
+
"socket",
|
|
156
|
+
"bind",
|
|
157
|
+
"listen",
|
|
158
|
+
# Registry (Windows)
|
|
159
|
+
"RegSetValue",
|
|
160
|
+
"RegCreateKey",
|
|
161
|
+
"RegDeleteKey",
|
|
162
|
+
}
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# =============================================================================
|
|
166
|
+
# Symbol Alias Database for Enhanced Matching
|
|
167
|
+
# =============================================================================
|
|
168
|
+
# Maps common API names to their variants (Windows A/W suffixes, safety variants, etc.)
|
|
169
|
+
_SYMBOL_ALIASES: dict[str, list[str]] = {
|
|
170
|
+
# Windows Process APIs
|
|
171
|
+
"createprocess": ["CreateProcessA", "CreateProcessW", "CreateProcessAsUserA", "CreateProcessAsUserW"],
|
|
172
|
+
"shellexecute": ["ShellExecuteA", "ShellExecuteW", "ShellExecuteExA", "ShellExecuteExW"],
|
|
173
|
+
"winexec": ["WinExec"],
|
|
174
|
+
# Windows File APIs
|
|
175
|
+
"createfile": ["CreateFileA", "CreateFileW", "CreateFile2"],
|
|
176
|
+
"deletefile": ["DeleteFileA", "DeleteFileW"],
|
|
177
|
+
"writefile": ["WriteFile", "WriteFileEx"],
|
|
178
|
+
"readfile": ["ReadFile", "ReadFileEx"],
|
|
179
|
+
"copyfile": ["CopyFileA", "CopyFileW", "CopyFileExA", "CopyFileExW"],
|
|
180
|
+
# Windows Registry APIs
|
|
181
|
+
"regsetvalue": ["RegSetValueA", "RegSetValueW", "RegSetValueExA", "RegSetValueExW"],
|
|
182
|
+
"regcreatekey": ["RegCreateKeyA", "RegCreateKeyW", "RegCreateKeyExA", "RegCreateKeyExW"],
|
|
183
|
+
"regopenkey": ["RegOpenKeyA", "RegOpenKeyW", "RegOpenKeyExA", "RegOpenKeyExW"],
|
|
184
|
+
"regdeletekey": ["RegDeleteKeyA", "RegDeleteKeyW", "RegDeleteKeyExA", "RegDeleteKeyExW"],
|
|
185
|
+
# Windows Message APIs
|
|
186
|
+
"messagebox": ["MessageBoxA", "MessageBoxW", "MessageBoxExA", "MessageBoxExW"],
|
|
187
|
+
# Windows Service APIs
|
|
188
|
+
"createservice": ["CreateServiceA", "CreateServiceW"],
|
|
189
|
+
"openservice": ["OpenServiceA", "OpenServiceW"],
|
|
190
|
+
"startservice": ["StartServiceA", "StartServiceW"],
|
|
191
|
+
# Windows Network APIs
|
|
192
|
+
"internetopen": ["InternetOpenA", "InternetOpenW"],
|
|
193
|
+
"internetconnect": ["InternetConnectA", "InternetConnectW"],
|
|
194
|
+
"httpopen": ["HttpOpenRequestA", "HttpOpenRequestW"],
|
|
195
|
+
# C Runtime String Functions
|
|
196
|
+
"strcpy": ["strcpy", "strcpy_s", "__strcpy_chk", "wcscpy", "lstrcpyA", "lstrcpyW"],
|
|
197
|
+
"strcat": ["strcat", "strcat_s", "__strcat_chk", "wcscat", "lstrcatA", "lstrcatW"],
|
|
198
|
+
"sprintf": ["sprintf", "sprintf_s", "swprintf", "wsprintfA", "wsprintfW", "_snprintf"],
|
|
199
|
+
"printf": ["printf", "wprintf", "_printf_l"],
|
|
200
|
+
"scanf": ["scanf", "scanf_s", "wscanf", "sscanf", "fscanf"],
|
|
201
|
+
# C Runtime Memory Functions
|
|
202
|
+
"malloc": ["malloc", "_malloc", "calloc", "realloc"],
|
|
203
|
+
"free": ["free", "_free"],
|
|
204
|
+
"memcpy": ["memcpy", "memcpy_s", "memmove", "memmove_s", "wmemcpy"],
|
|
205
|
+
# System/Exec Functions
|
|
206
|
+
"system": ["system", "_system", "msvcrt.system", "_wsystem"],
|
|
207
|
+
"popen": ["popen", "_popen", "_wpopen"],
|
|
208
|
+
"execve": ["execve", "execv", "execl", "execvp", "execlp"],
|
|
209
|
+
# Network Functions
|
|
210
|
+
"socket": ["socket", "WSASocket", "WSASocketA", "WSASocketW"],
|
|
211
|
+
"connect": ["connect", "WSAConnect"],
|
|
212
|
+
"send": ["send", "sendto", "WSASend", "WSASendTo"],
|
|
213
|
+
"recv": ["recv", "recvfrom", "WSARecv", "WSARecvFrom"],
|
|
214
|
+
# Crypto Functions
|
|
215
|
+
"cryptencrypt": ["CryptEncrypt", "CryptDecrypt"],
|
|
216
|
+
"cryptgenkey": ["CryptGenKey", "CryptDeriveKey"],
|
|
217
|
+
"cryptacquirecontext": ["CryptAcquireContextA", "CryptAcquireContextW"],
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
# Pre-compute reverse lookup for O(1) alias checking
|
|
221
|
+
_ALIAS_REVERSE_LOOKUP: dict[str, str] = {}
|
|
222
|
+
for _base, _aliases in _SYMBOL_ALIASES.items():
|
|
223
|
+
for _alias in _aliases:
|
|
224
|
+
_ALIAS_REVERSE_LOOKUP[_alias.lower()] = _base
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _clean_symbol_name(name: str) -> str:
|
|
228
|
+
"""Remove common prefixes and normalize symbol name."""
|
|
229
|
+
if not name:
|
|
230
|
+
return ""
|
|
231
|
+
# Remove common radare2/binary prefixes
|
|
232
|
+
clean = name
|
|
233
|
+
for prefix in ["sym.imp.", "sym.", "imp.", "fcn.", "sub_", "loc_"]:
|
|
234
|
+
if clean.lower().startswith(prefix):
|
|
235
|
+
clean = clean[len(prefix):]
|
|
236
|
+
break
|
|
237
|
+
# Remove leading/trailing underscores
|
|
238
|
+
return clean.strip("_").lower()
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def _fuzzy_match_symbol(target: str, symbol: str) -> tuple[float, str]:
|
|
242
|
+
"""
|
|
243
|
+
Calculate fuzzy match score between target and symbol.
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
Tuple of (score, match_method) where score is 0.0-1.0
|
|
247
|
+
"""
|
|
248
|
+
target_clean = _clean_symbol_name(target)
|
|
249
|
+
symbol_clean = _clean_symbol_name(symbol)
|
|
250
|
+
|
|
251
|
+
if not target_clean or not symbol_clean:
|
|
252
|
+
return (0.0, "none")
|
|
253
|
+
|
|
254
|
+
# Exact match (after cleaning)
|
|
255
|
+
if target_clean == symbol_clean:
|
|
256
|
+
return (1.0, "exact")
|
|
257
|
+
|
|
258
|
+
# Check alias database
|
|
259
|
+
if target_clean in _SYMBOL_ALIASES:
|
|
260
|
+
for alias in _SYMBOL_ALIASES[target_clean]:
|
|
261
|
+
if alias.lower() == symbol_clean or symbol_clean.endswith(alias.lower()):
|
|
262
|
+
return (0.95, "alias")
|
|
263
|
+
|
|
264
|
+
# Reverse alias check
|
|
265
|
+
if symbol_clean in _ALIAS_REVERSE_LOOKUP:
|
|
266
|
+
base = _ALIAS_REVERSE_LOOKUP[symbol_clean]
|
|
267
|
+
if base == target_clean:
|
|
268
|
+
return (0.95, "alias_reverse")
|
|
269
|
+
|
|
270
|
+
# Suffix match (e.g., "system" matches "msvcrt.system")
|
|
271
|
+
if symbol_clean.endswith(target_clean):
|
|
272
|
+
return (0.85, "suffix")
|
|
273
|
+
|
|
274
|
+
# Prefix match
|
|
275
|
+
if symbol_clean.startswith(target_clean):
|
|
276
|
+
return (0.75, "prefix")
|
|
277
|
+
|
|
278
|
+
# Contains match
|
|
279
|
+
if target_clean in symbol_clean:
|
|
280
|
+
return (0.65, "contains")
|
|
281
|
+
|
|
282
|
+
# Reverse contains (symbol in target)
|
|
283
|
+
if symbol_clean in target_clean:
|
|
284
|
+
return (0.55, "contains_reverse")
|
|
285
|
+
|
|
286
|
+
return (0.0, "none")
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def _find_best_symbol_match(target: str, symbols: list[dict]) -> tuple[dict | None, float, str]:
|
|
290
|
+
"""
|
|
291
|
+
Find the best matching symbol from a list.
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
target: Target function name to find
|
|
295
|
+
symbols: List of symbol dicts with 'name' field
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
Tuple of (best_match_dict, score, match_method)
|
|
299
|
+
"""
|
|
300
|
+
best_match = None
|
|
301
|
+
best_score = 0.0
|
|
302
|
+
best_method = "none"
|
|
303
|
+
|
|
304
|
+
for sym in symbols:
|
|
305
|
+
if not isinstance(sym, dict):
|
|
306
|
+
continue
|
|
307
|
+
|
|
308
|
+
# Check both 'name' and 'realname' fields
|
|
309
|
+
for name_field in ["name", "realname"]:
|
|
310
|
+
name = sym.get(name_field, "")
|
|
311
|
+
if not name:
|
|
312
|
+
continue
|
|
313
|
+
|
|
314
|
+
score, method = _fuzzy_match_symbol(target, name)
|
|
315
|
+
if score > best_score:
|
|
316
|
+
best_score = score
|
|
317
|
+
best_match = sym
|
|
318
|
+
best_method = method
|
|
319
|
+
|
|
320
|
+
return (best_match, best_score, best_method)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
# OPTIMIZATION: Pre-define translation table for faster function name cleaning
|
|
324
|
+
# Use empty second argument to delete characters
|
|
325
|
+
_FUNC_NAME_CLEAN_TABLE = str.maketrans("", "", "_")
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
@log_execution(tool_name="trace_execution_path")
|
|
329
|
+
@track_metrics("trace_execution_path")
|
|
330
|
+
@handle_tool_errors
|
|
331
|
+
async def trace_execution_path(
|
|
332
|
+
file_path: str,
|
|
333
|
+
target_function: str,
|
|
334
|
+
max_depth: int = 5,
|
|
335
|
+
max_paths: int = 5,
|
|
336
|
+
timeout: int | None = None,
|
|
337
|
+
prioritize_sinks: bool = True,
|
|
338
|
+
) -> ToolResult:
|
|
339
|
+
"""
|
|
340
|
+
Trace function calls backwards from a target function (Sink) to find potential execution paths.
|
|
341
|
+
|
|
342
|
+
This tool helps identify "Exploit Paths" by finding which functions call a dangerous
|
|
343
|
+
target function (like 'system', 'strcpy', 'execve'). It performs a recursive
|
|
344
|
+
cross-reference analysis (backtrace) to map out how execution reaches the target.
|
|
345
|
+
|
|
346
|
+
**Use Cases:**
|
|
347
|
+
- **Vulnerability Analysis**: Check if user input (main/recv) reaches 'system'
|
|
348
|
+
- **Reachability Analysis**: Verify if a vulnerable function is actually called
|
|
349
|
+
- **Taint Analysis Helper**: Provide the path for AI to perform manual taint checking
|
|
350
|
+
|
|
351
|
+
**Performance Optimizations (v3.0):**
|
|
352
|
+
- Reduced default depth (3→2) for faster analysis
|
|
353
|
+
- Sink-aware pruning: prioritizes paths through dangerous APIs
|
|
354
|
+
- Dynamic timeout based on file size
|
|
355
|
+
|
|
356
|
+
Args:
|
|
357
|
+
file_path: Path to the binary file
|
|
358
|
+
target_function: Name or address of the target function (e.g., 'sym.imp.system', '0x401000')
|
|
359
|
+
max_depth: Maximum depth of backtrace (default: 2, reduce for speed)
|
|
360
|
+
max_paths: Maximum number of paths to return (default: 5)
|
|
361
|
+
timeout: Execution timeout in seconds (uses dynamic timeout if None)
|
|
362
|
+
prioritize_sinks: Prioritize paths through dangerous sink APIs (default: True)
|
|
363
|
+
|
|
364
|
+
Returns:
|
|
365
|
+
ToolResult with a list of execution paths (call chains).
|
|
366
|
+
"""
|
|
367
|
+
validated_path = validate_file_path(file_path)
|
|
368
|
+
|
|
369
|
+
# Calculate dynamic timeout based on file size
|
|
370
|
+
effective_timeout = (
|
|
371
|
+
timeout if timeout else calculate_dynamic_timeout(str(validated_path), base_timeout=30)
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
# Helper to check if a function name is a dangerous sink
|
|
375
|
+
def is_dangerous_sink(func_name: str) -> bool:
|
|
376
|
+
"""Check if function name matches any dangerous sink API."""
|
|
377
|
+
if not func_name:
|
|
378
|
+
return False
|
|
379
|
+
# OPTIMIZATION: Use str.translate() for faster prefix removal
|
|
380
|
+
# Remove common prefixes first
|
|
381
|
+
clean_name = func_name.replace("sym.imp.", "").replace("sym.", "")
|
|
382
|
+
# Then remove underscores using translate (faster than replace)
|
|
383
|
+
clean_name = clean_name.translate(_FUNC_NAME_CLEAN_TABLE)
|
|
384
|
+
return any(sink in clean_name.lower() for sink in _DANGEROUS_SINKS)
|
|
385
|
+
|
|
386
|
+
# Enhanced symbol resolution with fuzzy matching
|
|
387
|
+
match_info = {"score": 0.0, "method": "none", "resolved_name": None}
|
|
388
|
+
|
|
389
|
+
async def resolve_symbol_address(func_name: str) -> int | None:
|
|
390
|
+
"""Enhanced symbol resolution with fuzzy matching and multi-source lookup."""
|
|
391
|
+
nonlocal match_info
|
|
392
|
+
|
|
393
|
+
# If already an address, return as-is
|
|
394
|
+
if func_name.startswith("0x"):
|
|
395
|
+
try:
|
|
396
|
+
match_info = {"score": 1.0, "method": "direct_address", "resolved_name": func_name}
|
|
397
|
+
return int(func_name, 16)
|
|
398
|
+
except ValueError:
|
|
399
|
+
return None
|
|
400
|
+
|
|
401
|
+
# Multi-source lookup: symbols, functions, imports
|
|
402
|
+
cmd = _build_r2_cmd(str(validated_path), ["isj", "aflj", "iij"], "aaa")
|
|
403
|
+
out, _ = await execute_subprocess_async(cmd, timeout=effective_timeout)
|
|
404
|
+
|
|
405
|
+
lines = [line.strip() for line in out.strip().split("\n") if line.strip()]
|
|
406
|
+
if not lines:
|
|
407
|
+
return None
|
|
408
|
+
|
|
409
|
+
all_symbols = []
|
|
410
|
+
|
|
411
|
+
# Parse all sources
|
|
412
|
+
for line in lines:
|
|
413
|
+
try:
|
|
414
|
+
parsed = _parse_json_output(line)
|
|
415
|
+
if isinstance(parsed, list):
|
|
416
|
+
all_symbols.extend(parsed)
|
|
417
|
+
except (json.JSONDecodeError, TypeError):
|
|
418
|
+
continue
|
|
419
|
+
|
|
420
|
+
if not all_symbols:
|
|
421
|
+
return None
|
|
422
|
+
|
|
423
|
+
# Use fuzzy matching to find best match
|
|
424
|
+
best_match, score, method = _find_best_symbol_match(func_name, all_symbols)
|
|
425
|
+
|
|
426
|
+
if best_match and score >= 0.5: # Minimum confidence threshold
|
|
427
|
+
match_info = {
|
|
428
|
+
"score": score,
|
|
429
|
+
"method": method,
|
|
430
|
+
"resolved_name": best_match.get("name") or best_match.get("realname"),
|
|
431
|
+
}
|
|
432
|
+
# Get address from match (different fields for symbols vs functions)
|
|
433
|
+
addr = best_match.get("vaddr") or best_match.get("offset") or best_match.get("plt")
|
|
434
|
+
return addr
|
|
435
|
+
|
|
436
|
+
return None
|
|
437
|
+
|
|
438
|
+
# Resolve target address
|
|
439
|
+
target_addr = target_function
|
|
440
|
+
resolved_addr = await resolve_symbol_address(target_function)
|
|
441
|
+
if resolved_addr:
|
|
442
|
+
target_addr = hex(resolved_addr)
|
|
443
|
+
# If we can't resolve it, use the original name (might work as r2 symbol)
|
|
444
|
+
|
|
445
|
+
paths = []
|
|
446
|
+
visited = set()
|
|
447
|
+
|
|
448
|
+
async def recursive_backtrace(current_addr, current_path, depth):
|
|
449
|
+
if depth >= max_depth or len(paths) >= max_paths:
|
|
450
|
+
return
|
|
451
|
+
|
|
452
|
+
# OPTIMIZATION: Pre-compute addresses in current path to avoid repeated list comprehensions
|
|
453
|
+
current_path_addrs = {p["addr"] for p in current_path}
|
|
454
|
+
|
|
455
|
+
if current_addr in visited and current_addr not in current_path_addrs:
|
|
456
|
+
# Allow revisiting if it's a different path, but prevent cycles in current path
|
|
457
|
+
pass
|
|
458
|
+
elif current_addr in current_path_addrs:
|
|
459
|
+
return # Cycle detected
|
|
460
|
+
|
|
461
|
+
# Get xrefs TO this address
|
|
462
|
+
cmd = _build_r2_cmd(str(validated_path), [f"axtj @ {current_addr}"], "aaa")
|
|
463
|
+
out, _ = await execute_subprocess_async(cmd, timeout=effective_timeout)
|
|
464
|
+
|
|
465
|
+
try:
|
|
466
|
+
xrefs = _parse_json_output(out)
|
|
467
|
+
except (json.JSONDecodeError, TypeError):
|
|
468
|
+
xrefs = []
|
|
469
|
+
|
|
470
|
+
if not xrefs:
|
|
471
|
+
# End of chain (root caller found or no xrefs)
|
|
472
|
+
if len(current_path) > 1:
|
|
473
|
+
paths.append(current_path)
|
|
474
|
+
return
|
|
475
|
+
|
|
476
|
+
# OPTIMIZATION v3.0: Prioritize xrefs through dangerous sink APIs
|
|
477
|
+
# This implements "Sink-aware pruning" - explore high-value paths first
|
|
478
|
+
if prioritize_sinks and len(xrefs) > 1:
|
|
479
|
+
xrefs = sorted(
|
|
480
|
+
xrefs,
|
|
481
|
+
key=lambda x: (
|
|
482
|
+
# Priority 1: main/entry functions (complete paths)
|
|
483
|
+
-2
|
|
484
|
+
if any(k in x.get("fcn_name", "").lower() for k in ["main", "entry"])
|
|
485
|
+
# Priority 2: Dangerous sink APIs
|
|
486
|
+
else -1
|
|
487
|
+
if is_dangerous_sink(x.get("fcn_name", ""))
|
|
488
|
+
# Priority 3: Everything else
|
|
489
|
+
else 0
|
|
490
|
+
),
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
for xref in xrefs:
|
|
494
|
+
if len(paths) >= max_paths:
|
|
495
|
+
break
|
|
496
|
+
|
|
497
|
+
caller_addr = hex(xref.get("fcn_addr", 0))
|
|
498
|
+
caller_name = xref.get("fcn_name", "unknown")
|
|
499
|
+
type_ref = xref.get("type", "call")
|
|
500
|
+
|
|
501
|
+
if type_ref not in ["call", "jump"]:
|
|
502
|
+
continue
|
|
503
|
+
|
|
504
|
+
new_node = {"addr": caller_addr, "name": caller_name, "type": type_ref}
|
|
505
|
+
|
|
506
|
+
# If we reached main or entry, this is a complete path
|
|
507
|
+
if "main" in caller_name or "entry" in caller_name:
|
|
508
|
+
paths.append(current_path + [new_node])
|
|
509
|
+
else:
|
|
510
|
+
await recursive_backtrace(caller_addr, current_path + [new_node], depth + 1)
|
|
511
|
+
|
|
512
|
+
# Start trace
|
|
513
|
+
root_node = {"addr": target_addr, "name": target_function, "type": "target"}
|
|
514
|
+
await recursive_backtrace(target_addr, [root_node], 0)
|
|
515
|
+
|
|
516
|
+
# Format results
|
|
517
|
+
# OPTIMIZATION: Use list comprehension with generator expression in join
|
|
518
|
+
# This reduces memory by avoiding intermediate list creation in the join
|
|
519
|
+
formatted_paths = [" -> ".join(f"{n['name']} ({n['addr']})" for n in p[::-1]) for p in paths]
|
|
520
|
+
|
|
521
|
+
return success(
|
|
522
|
+
{"paths": formatted_paths, "raw_paths": paths},
|
|
523
|
+
path_count=len(paths),
|
|
524
|
+
target=target_function,
|
|
525
|
+
resolved_address=target_addr,
|
|
526
|
+
match_confidence=match_info["score"],
|
|
527
|
+
match_method=match_info["method"],
|
|
528
|
+
resolved_name=match_info["resolved_name"],
|
|
529
|
+
description=f"Found {len(paths)} execution paths to {target_function} (match: {match_info['method']}, confidence: {match_info['score']:.0%})",
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def _radare2_json_to_mermaid(json_str: str) -> str:
|
|
534
|
+
"""
|
|
535
|
+
Convert Radare2 'agfj' JSON output to Mermaid Flowchart syntax.
|
|
536
|
+
Optimized for LLM context efficiency.
|
|
537
|
+
|
|
538
|
+
Args:
|
|
539
|
+
json_str: JSON output from radare2 agfj command
|
|
540
|
+
|
|
541
|
+
Returns:
|
|
542
|
+
Mermaid flowchart syntax string
|
|
543
|
+
"""
|
|
544
|
+
# Maximum nodes before switching to summary mode (browser Mermaid limit)
|
|
545
|
+
MAX_MERMAID_NODES = 100
|
|
546
|
+
|
|
547
|
+
try:
|
|
548
|
+
graph_data = json.loads(json_str)
|
|
549
|
+
if not graph_data:
|
|
550
|
+
return "graph TD;\n Error[No graph data found]"
|
|
551
|
+
|
|
552
|
+
# agfj returns list format for function graph
|
|
553
|
+
blocks = (
|
|
554
|
+
graph_data[0].get("blocks", [])
|
|
555
|
+
if isinstance(graph_data, list)
|
|
556
|
+
else graph_data.get("blocks", [])
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
# Check node count limit to prevent browser rendering crashes
|
|
560
|
+
if len(blocks) > MAX_MERMAID_NODES:
|
|
561
|
+
return (
|
|
562
|
+
f"graph TD;\n"
|
|
563
|
+
f" Warning[\"Graph too complex: {len(blocks)} nodes\"]"
|
|
564
|
+
f"\n Warning --> Hint[\"Use PNG export or reduce scope\"]"
|
|
565
|
+
)
|
|
566
|
+
|
|
567
|
+
mermaid_lines = ["graph TD"]
|
|
568
|
+
|
|
569
|
+
for block in blocks:
|
|
570
|
+
# 1. Generate node ID from offset
|
|
571
|
+
node_id = f"N_{hex(block.get('offset', 0))}"
|
|
572
|
+
|
|
573
|
+
# 2. Generate node label from assembly opcodes
|
|
574
|
+
ops = block.get("ops", [])
|
|
575
|
+
# OPTIMIZATION: Use enumerate with early break to avoid processing all ops
|
|
576
|
+
# For token efficiency, we limit to 5 instructions per block
|
|
577
|
+
op_codes = []
|
|
578
|
+
has_more = False
|
|
579
|
+
for i, op in enumerate(ops):
|
|
580
|
+
if i < 5:
|
|
581
|
+
op_codes.append(op.get("opcode", ""))
|
|
582
|
+
elif i == 5:
|
|
583
|
+
has_more = True
|
|
584
|
+
break
|
|
585
|
+
|
|
586
|
+
if has_more:
|
|
587
|
+
op_codes.append("...")
|
|
588
|
+
|
|
589
|
+
# Escape Mermaid special characters using optimized function
|
|
590
|
+
label_content = _escape_mermaid_chars("\\n".join(op_codes))
|
|
591
|
+
|
|
592
|
+
# Define node
|
|
593
|
+
mermaid_lines.append(f' {node_id}["{label_content}"]')
|
|
594
|
+
|
|
595
|
+
# 3. Create edges
|
|
596
|
+
# True branch (jump)
|
|
597
|
+
if "jump" in block:
|
|
598
|
+
target_id = f"N_{hex(block['jump'])}"
|
|
599
|
+
mermaid_lines.append(f" {node_id} -->|True| {target_id}")
|
|
600
|
+
|
|
601
|
+
# False branch (fail)
|
|
602
|
+
if "fail" in block:
|
|
603
|
+
target_id = f"N_{hex(block['fail'])}"
|
|
604
|
+
mermaid_lines.append(f" {node_id} -.->|False| {target_id}")
|
|
605
|
+
|
|
606
|
+
return "\n".join(mermaid_lines)
|
|
607
|
+
|
|
608
|
+
except Exception as e:
|
|
609
|
+
return f"graph TD;\n Error[Parse Error: {str(e)}]"
|
|
610
|
+
|
|
611
|
+
|
|
612
|
+
@alru_cache(maxsize=32)
|
|
613
|
+
@log_execution(tool_name="generate_function_graph")
|
|
614
|
+
@track_metrics("generate_function_graph")
|
|
615
|
+
@handle_tool_errors
|
|
616
|
+
async def _generate_function_graph_impl(
|
|
617
|
+
file_path: str,
|
|
618
|
+
function_address: str,
|
|
619
|
+
format: str = "mermaid",
|
|
620
|
+
timeout: int = DEFAULT_TIMEOUT,
|
|
621
|
+
) -> ToolResult:
|
|
622
|
+
"""
|
|
623
|
+
Internal implementation of generate_function_graph with caching.
|
|
624
|
+
"""
|
|
625
|
+
# 1. Parameter validation
|
|
626
|
+
validate_tool_parameters(
|
|
627
|
+
"generate_function_graph",
|
|
628
|
+
{"function_address": function_address, "format": format},
|
|
629
|
+
)
|
|
630
|
+
validated_path = validate_file_path(file_path)
|
|
631
|
+
|
|
632
|
+
# 2. Security check for function address (prevent shell injection)
|
|
633
|
+
from reversecore_mcp.core.exceptions import ValidationError
|
|
634
|
+
from reversecore_mcp.core.validators import validate_address_format
|
|
635
|
+
|
|
636
|
+
try:
|
|
637
|
+
validate_address_format(function_address, "function_address")
|
|
638
|
+
except ValidationError as e:
|
|
639
|
+
return failure("VALIDATION_ERROR", str(e))
|
|
640
|
+
|
|
641
|
+
# 3. Build radare2 command
|
|
642
|
+
r2_cmd_str = f"agfj @ {function_address}"
|
|
643
|
+
|
|
644
|
+
# 4. Execute subprocess asynchronously using helper
|
|
645
|
+
# Large graphs need higher output limit
|
|
646
|
+
output, bytes_read = await _execute_r2_command(
|
|
647
|
+
validated_path,
|
|
648
|
+
[r2_cmd_str],
|
|
649
|
+
analysis_level="aaa",
|
|
650
|
+
max_output_size=50_000_000,
|
|
651
|
+
base_timeout=timeout,
|
|
652
|
+
)
|
|
653
|
+
|
|
654
|
+
# Add timestamp for cache visibility
|
|
655
|
+
import time
|
|
656
|
+
|
|
657
|
+
timestamp = time.time()
|
|
658
|
+
|
|
659
|
+
# 5. Format conversion and return
|
|
660
|
+
if format.lower() == "json":
|
|
661
|
+
return success(output, bytes_read=bytes_read, format="json", timestamp=timestamp)
|
|
662
|
+
|
|
663
|
+
elif format.lower() == "mermaid":
|
|
664
|
+
mermaid_code = _radare2_json_to_mermaid(output)
|
|
665
|
+
return success(
|
|
666
|
+
mermaid_code,
|
|
667
|
+
bytes_read=bytes_read,
|
|
668
|
+
format="mermaid",
|
|
669
|
+
description="Render this using Mermaid to see the control flow.",
|
|
670
|
+
timestamp=timestamp,
|
|
671
|
+
)
|
|
672
|
+
|
|
673
|
+
elif format.lower() == "dot":
|
|
674
|
+
# For DOT format, call radare2 with agfd command
|
|
675
|
+
# NOTE: This is a separate call from agfj above, but this is optimal because:
|
|
676
|
+
# - DOT format requires a different command (agfd vs agfj)
|
|
677
|
+
# - Batching both would waste resources since we only need one format
|
|
678
|
+
# - DOT format is rarely used (mermaid and json are preferred)
|
|
679
|
+
dot_cmd_str = f"agfd @ {function_address}"
|
|
680
|
+
|
|
681
|
+
dot_output, dot_bytes = await _execute_r2_command(
|
|
682
|
+
validated_path,
|
|
683
|
+
[dot_cmd_str],
|
|
684
|
+
analysis_level="aaa",
|
|
685
|
+
max_output_size=50_000_000,
|
|
686
|
+
base_timeout=timeout,
|
|
687
|
+
)
|
|
688
|
+
return success(dot_output, bytes_read=dot_bytes, format="dot")
|
|
689
|
+
|
|
690
|
+
return failure("INVALID_FORMAT", f"Unsupported format: {format}")
|
|
691
|
+
|
|
692
|
+
|
|
693
|
+
async def generate_function_graph(
|
|
694
|
+
file_path: str,
|
|
695
|
+
function_address: str,
|
|
696
|
+
format: str = "mermaid",
|
|
697
|
+
timeout: int = DEFAULT_TIMEOUT,
|
|
698
|
+
) -> ToolResult:
|
|
699
|
+
"""
|
|
700
|
+
Generate a Control Flow Graph (CFG) for a specific function.
|
|
701
|
+
|
|
702
|
+
This tool uses radare2 to analyze the function structure and returns
|
|
703
|
+
a visualization code (Mermaid by default) or PNG image that helps AI understand
|
|
704
|
+
the code flow without reading thousands of lines of assembly.
|
|
705
|
+
|
|
706
|
+
Args:
|
|
707
|
+
file_path: Path to the binary file (must be in workspace)
|
|
708
|
+
function_address: Function address (e.g., 'main', '0x140001000', 'sym.foo')
|
|
709
|
+
format: Output format ('mermaid', 'json', 'dot', or 'png'). Default is 'mermaid'.
|
|
710
|
+
timeout: Execution timeout in seconds
|
|
711
|
+
|
|
712
|
+
Returns:
|
|
713
|
+
ToolResult with CFG visualization, JSON data, or PNG image
|
|
714
|
+
"""
|
|
715
|
+
import time
|
|
716
|
+
|
|
717
|
+
from fastmcp.utilities.types import Image
|
|
718
|
+
|
|
719
|
+
# If PNG format requested, generate DOT first then convert
|
|
720
|
+
if format.lower() == "png":
|
|
721
|
+
# Get DOT format first
|
|
722
|
+
result = await _generate_function_graph_impl(file_path, function_address, "dot", timeout)
|
|
723
|
+
|
|
724
|
+
if result.is_error:
|
|
725
|
+
return result
|
|
726
|
+
|
|
727
|
+
# Convert DOT to PNG using graphviz
|
|
728
|
+
try:
|
|
729
|
+
import subprocess
|
|
730
|
+
import tempfile
|
|
731
|
+
from pathlib import Path as PathlibPath
|
|
732
|
+
|
|
733
|
+
# Get DOT content from result
|
|
734
|
+
dot_content = result.content[0].text if result.content else ""
|
|
735
|
+
|
|
736
|
+
# Create temp files
|
|
737
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".dot", delete=False) as dot_file:
|
|
738
|
+
dot_file.write(dot_content)
|
|
739
|
+
dot_path = dot_file.name
|
|
740
|
+
|
|
741
|
+
png_path = dot_path.replace(".dot", ".png")
|
|
742
|
+
|
|
743
|
+
try:
|
|
744
|
+
# Use async subprocess execution to avoid blocking the event loop
|
|
745
|
+
# This allows concurrent operations and better resource utilization
|
|
746
|
+
await execute_subprocess_async(
|
|
747
|
+
["dot", "-Tpng", dot_path, "-o", png_path],
|
|
748
|
+
max_output_size=1_000_000, # 1MB for error messages
|
|
749
|
+
timeout=30,
|
|
750
|
+
)
|
|
751
|
+
|
|
752
|
+
# Read PNG file
|
|
753
|
+
png_data = PathlibPath(png_path).read_bytes()
|
|
754
|
+
|
|
755
|
+
# Return Image object
|
|
756
|
+
return Image(data=png_data, mime_type="image/png")
|
|
757
|
+
|
|
758
|
+
finally:
|
|
759
|
+
# Cleanup temp files
|
|
760
|
+
try:
|
|
761
|
+
PathlibPath(dot_path).unlink()
|
|
762
|
+
if PathlibPath(png_path).exists():
|
|
763
|
+
PathlibPath(png_path).unlink()
|
|
764
|
+
except (OSError, FileNotFoundError):
|
|
765
|
+
pass
|
|
766
|
+
|
|
767
|
+
except Exception as e:
|
|
768
|
+
return failure(
|
|
769
|
+
"IMAGE_GENERATION_ERROR",
|
|
770
|
+
f"Failed to generate PNG image: {str(e)}",
|
|
771
|
+
hint="Ensure graphviz is installed in the container",
|
|
772
|
+
)
|
|
773
|
+
|
|
774
|
+
# For other formats, use existing implementation
|
|
775
|
+
result = await _generate_function_graph_impl(file_path, function_address, format, timeout)
|
|
776
|
+
|
|
777
|
+
# Check for cache hit
|
|
778
|
+
if result.status == "success" and result.metadata:
|
|
779
|
+
ts = result.metadata.get("timestamp")
|
|
780
|
+
if ts and (time.time() - ts > 1.0):
|
|
781
|
+
result.metadata["cache_hit"] = True
|
|
782
|
+
# Update description to indicate cached result
|
|
783
|
+
# Note: ToolSuccess has 'data' field, not 'content'
|
|
784
|
+
|
|
785
|
+
return result
|
|
786
|
+
|
|
787
|
+
|
|
788
|
+
@log_execution(tool_name="analyze_xrefs")
|
|
789
|
+
@track_metrics("analyze_xrefs")
|
|
790
|
+
@handle_tool_errors
|
|
791
|
+
async def analyze_xrefs(
|
|
792
|
+
file_path: str,
|
|
793
|
+
address: str,
|
|
794
|
+
xref_type: str = "all",
|
|
795
|
+
timeout: int = DEFAULT_TIMEOUT,
|
|
796
|
+
ctx: Context = None,
|
|
797
|
+
) -> ToolResult:
|
|
798
|
+
"""
|
|
799
|
+
Analyze cross-references (xrefs) for a specific address using radare2.
|
|
800
|
+
|
|
801
|
+
Cross-references show the relationships between code blocks - who calls this
|
|
802
|
+
function (callers) and what it calls (callees). This is essential for:
|
|
803
|
+
- Understanding program flow
|
|
804
|
+
- Tracing data dependencies
|
|
805
|
+
- Identifying attack surfaces
|
|
806
|
+
- Reverse engineering malware C&C
|
|
807
|
+
|
|
808
|
+
**xref_type Options:**
|
|
809
|
+
- **"to"**: Show who references this address (callers/jumps TO here)
|
|
810
|
+
- **"from"**: Show what this address references (calls/jumps FROM here)
|
|
811
|
+
- **"all"**: Show both directions (complete relationship map)
|
|
812
|
+
|
|
813
|
+
Args:
|
|
814
|
+
file_path: Path to the binary file (must be in workspace)
|
|
815
|
+
address: Function or address to analyze (e.g., 'main', '0x401000', 'sym.decrypt')
|
|
816
|
+
xref_type: Type of cross-references to show: 'all', 'to', 'from' (default: 'all')
|
|
817
|
+
timeout: Execution timeout in seconds (default: 300)
|
|
818
|
+
ctx: FastMCP Context for progress reporting (auto-injected)
|
|
819
|
+
|
|
820
|
+
Returns:
|
|
821
|
+
ToolResult with structured JSON containing xrefs data:
|
|
822
|
+
{
|
|
823
|
+
"address": "main",
|
|
824
|
+
"xref_type": "all",
|
|
825
|
+
"xrefs_to": [{"from": "0x401050", "type": "call", "fcn_name": "entry0"}],
|
|
826
|
+
"xrefs_from": [{"addr": "0x401100", "type": "call", "fcn_name": "printf"}],
|
|
827
|
+
"summary": "2 reference(s) TO this address (callers), 1 reference(s) FROM this address (callees)",
|
|
828
|
+
"total_refs_to": 2,
|
|
829
|
+
"total_refs_from": 1
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
Example:
|
|
833
|
+
# Find who calls the suspicious 'decrypt' function
|
|
834
|
+
analyze_xrefs("/app/workspace/malware.exe", "sym.decrypt", "to")
|
|
835
|
+
|
|
836
|
+
# Find what APIs a malware function uses
|
|
837
|
+
analyze_xrefs("/app/workspace/malware.exe", "0x401000", "from")
|
|
838
|
+
|
|
839
|
+
# Get complete relationship map
|
|
840
|
+
analyze_xrefs("/app/workspace/malware.exe", "main", "all")
|
|
841
|
+
"""
|
|
842
|
+
# 1. Validate parameters
|
|
843
|
+
validated_path = validate_file_path(file_path)
|
|
844
|
+
|
|
845
|
+
if xref_type not in ["all", "to", "from"]:
|
|
846
|
+
return failure(
|
|
847
|
+
"VALIDATION_ERROR",
|
|
848
|
+
f"Invalid xref_type: {xref_type}",
|
|
849
|
+
hint="Valid options are: 'all', 'to', 'from'",
|
|
850
|
+
)
|
|
851
|
+
|
|
852
|
+
# 2. Validate address format
|
|
853
|
+
# OPTIMIZATION: Use pre-compiled pattern from validators module
|
|
854
|
+
if not _ADDRESS_PATTERN.match(
|
|
855
|
+
_strip_address_prefixes(address),
|
|
856
|
+
):
|
|
857
|
+
return failure(
|
|
858
|
+
"VALIDATION_ERROR",
|
|
859
|
+
"Invalid address format",
|
|
860
|
+
hint="Address must contain only alphanumeric characters, dots, underscores, and prefixes like '0x', 'sym.', 'fcn.'",
|
|
861
|
+
)
|
|
862
|
+
|
|
863
|
+
# 3. Build radare2 commands to get xrefs
|
|
864
|
+
# axj = analyze xrefs in JSON format
|
|
865
|
+
commands = []
|
|
866
|
+
|
|
867
|
+
if xref_type in ["all", "to"]:
|
|
868
|
+
# axtj = xrefs TO this address (callers)
|
|
869
|
+
commands.append(f"axtj @ {address}")
|
|
870
|
+
|
|
871
|
+
if xref_type in ["all", "from"]:
|
|
872
|
+
# axfj = xrefs FROM this address (callees)
|
|
873
|
+
commands.append(f"axfj @ {address}")
|
|
874
|
+
|
|
875
|
+
# Build command string
|
|
876
|
+
r2_commands_str = "; ".join(commands)
|
|
877
|
+
|
|
878
|
+
if ctx:
|
|
879
|
+
await ctx.report_progress(10, 100)
|
|
880
|
+
await ctx.info(f"Analyzing xrefs for {address}...")
|
|
881
|
+
|
|
882
|
+
# 4. Execute analysis using helper
|
|
883
|
+
# Use 'aa' (basic analysis) as default to prevent timeouts on large/obfuscated binaries
|
|
884
|
+
# 'aaa' is much slower but more accurate - only use for small files
|
|
885
|
+
|
|
886
|
+
analysis_level = "aa"
|
|
887
|
+
try:
|
|
888
|
+
file_size_mb = os.path.getsize(validated_path) / (1024 * 1024)
|
|
889
|
+
if file_size_mb < 5:
|
|
890
|
+
analysis_level = "aaa" # Full analysis for small files (<5MB)
|
|
891
|
+
if ctx and file_size_mb > 5:
|
|
892
|
+
await ctx.info(f"Large file ({file_size_mb:.1f}MB) detected, using basic analysis...")
|
|
893
|
+
except OSError:
|
|
894
|
+
pass
|
|
895
|
+
|
|
896
|
+
output, bytes_read = await _execute_r2_command(
|
|
897
|
+
validated_path,
|
|
898
|
+
[r2_commands_str],
|
|
899
|
+
analysis_level=analysis_level,
|
|
900
|
+
max_output_size=10_000_000,
|
|
901
|
+
base_timeout=timeout,
|
|
902
|
+
)
|
|
903
|
+
|
|
904
|
+
if ctx:
|
|
905
|
+
await ctx.report_progress(90, 100)
|
|
906
|
+
|
|
907
|
+
# 5. Parse JSON output
|
|
908
|
+
try:
|
|
909
|
+
# Output may contain multiple JSON arrays if both "to" and "from" were requested
|
|
910
|
+
# Split by lines and parse each JSON array
|
|
911
|
+
lines = [line.strip() for line in output.strip().split("\n") if line.strip()]
|
|
912
|
+
|
|
913
|
+
xrefs_to = []
|
|
914
|
+
xrefs_from = []
|
|
915
|
+
|
|
916
|
+
for line in lines:
|
|
917
|
+
# Robust JSON extraction from line
|
|
918
|
+
try:
|
|
919
|
+
refs = _parse_json_output(line)
|
|
920
|
+
if isinstance(refs, list) and refs: # OPTIMIZATION: Direct bool check instead of len() comparison
|
|
921
|
+
# Determine if this is "to" or "from" based on field names
|
|
922
|
+
first_ref = refs[0]
|
|
923
|
+
if "from" in first_ref:
|
|
924
|
+
# This is xrefs TO (callers)
|
|
925
|
+
xrefs_to = refs
|
|
926
|
+
elif "addr" in first_ref or "fcn_addr" in first_ref:
|
|
927
|
+
# This is xrefs FROM (callees)
|
|
928
|
+
xrefs_from = refs
|
|
929
|
+
except json.JSONDecodeError:
|
|
930
|
+
# Skip lines that don't contain valid JSON
|
|
931
|
+
continue
|
|
932
|
+
|
|
933
|
+
# 6. Format results
|
|
934
|
+
result = {
|
|
935
|
+
"address": address,
|
|
936
|
+
"xref_type": xref_type,
|
|
937
|
+
"xrefs_to": xrefs_to,
|
|
938
|
+
"xrefs_from": xrefs_from,
|
|
939
|
+
"total_refs_to": len(xrefs_to),
|
|
940
|
+
"total_refs_from": len(xrefs_from),
|
|
941
|
+
}
|
|
942
|
+
|
|
943
|
+
# Add human-readable summary
|
|
944
|
+
summary_parts = []
|
|
945
|
+
if xrefs_to:
|
|
946
|
+
summary_parts.append(f"{len(xrefs_to)} reference(s) TO this address (callers)")
|
|
947
|
+
if xrefs_from:
|
|
948
|
+
summary_parts.append(f"{len(xrefs_from)} reference(s) FROM this address (callees)")
|
|
949
|
+
|
|
950
|
+
if not summary_parts:
|
|
951
|
+
summary = "No cross-references found"
|
|
952
|
+
else:
|
|
953
|
+
summary = ", ".join(summary_parts)
|
|
954
|
+
|
|
955
|
+
result["summary"] = summary
|
|
956
|
+
|
|
957
|
+
# 7. Return structured result
|
|
958
|
+
return success(
|
|
959
|
+
result,
|
|
960
|
+
bytes_read=bytes_read,
|
|
961
|
+
address=address,
|
|
962
|
+
xref_type=xref_type,
|
|
963
|
+
total_refs=len(xrefs_to) + len(xrefs_from),
|
|
964
|
+
description=f"Cross-reference analysis for {address}: {summary}",
|
|
965
|
+
)
|
|
966
|
+
|
|
967
|
+
except Exception as e:
|
|
968
|
+
return failure(
|
|
969
|
+
"XREF_ANALYSIS_ERROR",
|
|
970
|
+
f"Failed to parse cross-reference data: {str(e)}",
|
|
971
|
+
hint="The address may not exist or the binary may not have been analyzed. Try running 'afl' first to see available functions.",
|
|
972
|
+
)
|