iflow-mcp_developermode-korea_reversecore-mcp 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/METADATA +543 -0
  2. iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/RECORD +79 -0
  3. iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/WHEEL +5 -0
  4. iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/entry_points.txt +2 -0
  5. iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/licenses/LICENSE +21 -0
  6. iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/top_level.txt +1 -0
  7. reversecore_mcp/__init__.py +9 -0
  8. reversecore_mcp/core/__init__.py +78 -0
  9. reversecore_mcp/core/audit.py +101 -0
  10. reversecore_mcp/core/binary_cache.py +138 -0
  11. reversecore_mcp/core/command_spec.py +357 -0
  12. reversecore_mcp/core/config.py +432 -0
  13. reversecore_mcp/core/container.py +288 -0
  14. reversecore_mcp/core/decorators.py +152 -0
  15. reversecore_mcp/core/error_formatting.py +93 -0
  16. reversecore_mcp/core/error_handling.py +142 -0
  17. reversecore_mcp/core/evidence.py +229 -0
  18. reversecore_mcp/core/exceptions.py +296 -0
  19. reversecore_mcp/core/execution.py +240 -0
  20. reversecore_mcp/core/ghidra.py +642 -0
  21. reversecore_mcp/core/ghidra_helper.py +481 -0
  22. reversecore_mcp/core/ghidra_manager.py +234 -0
  23. reversecore_mcp/core/json_utils.py +131 -0
  24. reversecore_mcp/core/loader.py +73 -0
  25. reversecore_mcp/core/logging_config.py +206 -0
  26. reversecore_mcp/core/memory.py +721 -0
  27. reversecore_mcp/core/metrics.py +198 -0
  28. reversecore_mcp/core/mitre_mapper.py +365 -0
  29. reversecore_mcp/core/plugin.py +45 -0
  30. reversecore_mcp/core/r2_helpers.py +404 -0
  31. reversecore_mcp/core/r2_pool.py +403 -0
  32. reversecore_mcp/core/report_generator.py +268 -0
  33. reversecore_mcp/core/resilience.py +252 -0
  34. reversecore_mcp/core/resource_manager.py +169 -0
  35. reversecore_mcp/core/result.py +132 -0
  36. reversecore_mcp/core/security.py +213 -0
  37. reversecore_mcp/core/validators.py +238 -0
  38. reversecore_mcp/dashboard/__init__.py +221 -0
  39. reversecore_mcp/prompts/__init__.py +56 -0
  40. reversecore_mcp/prompts/common.py +24 -0
  41. reversecore_mcp/prompts/game.py +280 -0
  42. reversecore_mcp/prompts/malware.py +1219 -0
  43. reversecore_mcp/prompts/report.py +150 -0
  44. reversecore_mcp/prompts/security.py +136 -0
  45. reversecore_mcp/resources.py +329 -0
  46. reversecore_mcp/server.py +727 -0
  47. reversecore_mcp/tools/__init__.py +49 -0
  48. reversecore_mcp/tools/analysis/__init__.py +74 -0
  49. reversecore_mcp/tools/analysis/capa_tools.py +215 -0
  50. reversecore_mcp/tools/analysis/die_tools.py +180 -0
  51. reversecore_mcp/tools/analysis/diff_tools.py +643 -0
  52. reversecore_mcp/tools/analysis/lief_tools.py +272 -0
  53. reversecore_mcp/tools/analysis/signature_tools.py +591 -0
  54. reversecore_mcp/tools/analysis/static_analysis.py +479 -0
  55. reversecore_mcp/tools/common/__init__.py +58 -0
  56. reversecore_mcp/tools/common/file_operations.py +352 -0
  57. reversecore_mcp/tools/common/memory_tools.py +516 -0
  58. reversecore_mcp/tools/common/patch_explainer.py +230 -0
  59. reversecore_mcp/tools/common/server_tools.py +115 -0
  60. reversecore_mcp/tools/ghidra/__init__.py +19 -0
  61. reversecore_mcp/tools/ghidra/decompilation.py +975 -0
  62. reversecore_mcp/tools/ghidra/ghidra_tools.py +1052 -0
  63. reversecore_mcp/tools/malware/__init__.py +61 -0
  64. reversecore_mcp/tools/malware/adaptive_vaccine.py +579 -0
  65. reversecore_mcp/tools/malware/dormant_detector.py +756 -0
  66. reversecore_mcp/tools/malware/ioc_tools.py +228 -0
  67. reversecore_mcp/tools/malware/vulnerability_hunter.py +519 -0
  68. reversecore_mcp/tools/malware/yara_tools.py +214 -0
  69. reversecore_mcp/tools/patch_explainer.py +19 -0
  70. reversecore_mcp/tools/radare2/__init__.py +13 -0
  71. reversecore_mcp/tools/radare2/r2_analysis.py +972 -0
  72. reversecore_mcp/tools/radare2/r2_session.py +376 -0
  73. reversecore_mcp/tools/radare2/radare2_mcp_tools.py +1183 -0
  74. reversecore_mcp/tools/report/__init__.py +4 -0
  75. reversecore_mcp/tools/report/email.py +82 -0
  76. reversecore_mcp/tools/report/report_mcp_tools.py +344 -0
  77. reversecore_mcp/tools/report/report_tools.py +1076 -0
  78. reversecore_mcp/tools/report/session.py +194 -0
  79. reversecore_mcp/tools/report_tools.py +11 -0
@@ -0,0 +1,972 @@
1
+ """Radare2-based analysis tools for binary analysis, cross-references, and execution tracing."""
2
+
3
+ import os
4
+ import re
5
+ from typing import Any
6
+
7
+ from async_lru import alru_cache
8
+ from fastmcp import Context
9
+
10
+ # Use high-performance JSON implementation (3-5x faster)
11
+ from reversecore_mcp.core import json_utils as json
12
+ from reversecore_mcp.core.command_spec import validate_r2_command
13
+ from reversecore_mcp.core.config import get_config
14
+ from reversecore_mcp.core.decorators import log_execution
15
+ from reversecore_mcp.core.error_handling import handle_tool_errors
16
+ from reversecore_mcp.core.execution import execute_subprocess_async # For test compatibility
17
+ from reversecore_mcp.core.metrics import track_metrics
18
+ from reversecore_mcp.core.r2_helpers import (
19
+ build_r2_cmd as _build_r2_cmd,
20
+ )
21
+
22
+ # Import shared R2 helper functions from core (avoids circular dependencies)
23
+ from reversecore_mcp.core.r2_helpers import (
24
+ calculate_dynamic_timeout,
25
+ remove_analysis_commands,
26
+ )
27
+ from reversecore_mcp.core.r2_helpers import (
28
+ escape_mermaid_chars as _escape_mermaid_chars,
29
+ )
30
+ from reversecore_mcp.core.r2_helpers import (
31
+ execute_r2_command as _execute_r2_command,
32
+ )
33
+ from reversecore_mcp.core.r2_helpers import (
34
+ parse_json_output as _parse_json_output,
35
+ )
36
+ from reversecore_mcp.core.r2_helpers import (
37
+ strip_address_prefixes as _strip_address_prefixes,
38
+ )
39
+ from reversecore_mcp.core.resilience import circuit_breaker
40
+ from reversecore_mcp.core.result import ToolResult, failure, success
41
+ from reversecore_mcp.core.security import validate_file_path
42
+ from reversecore_mcp.core.validators import (
43
+ _ADDRESS_PATTERN, # OPTIMIZATION: Import pre-compiled pattern instead of duplicating
44
+ validate_tool_parameters,
45
+ )
46
+
47
+ # Load default timeout from configuration
48
+ DEFAULT_TIMEOUT = get_config().default_tool_timeout
49
+
50
+
51
+ @log_execution(tool_name="run_radare2")
52
+ @track_metrics("run_radare2")
53
+ @circuit_breaker("run_radare2", failure_threshold=5, recovery_timeout=60)
54
+ @handle_tool_errors
55
+ async def run_radare2(
56
+ file_path: str,
57
+ r2_command: str,
58
+ max_output_size: int = 10_000_000,
59
+ timeout: int = DEFAULT_TIMEOUT,
60
+ ctx: Context = None,
61
+ ) -> ToolResult:
62
+ """Execute vetted radare2 commands for binary triage."""
63
+
64
+ validate_tool_parameters("run_radare2", {"r2_command": r2_command})
65
+ validated_path = validate_file_path(file_path)
66
+ validated_command = validate_r2_command(r2_command)
67
+
68
+ # Adaptive analysis logic based on command type and file size
69
+ # Use 'aa' for basic commands, 'aaa' for analysis-heavy commands on small files
70
+ analysis_level = "aa"
71
+
72
+ # Simple information commands don't need analysis
73
+ simple_commands = ["i", "iI", "iz", "izj", "il", "is", "isj", "ie", "it", "iS", "iSj"]
74
+ if validated_command in simple_commands or validated_command.startswith("i "):
75
+ analysis_level = "-n"
76
+
77
+ # Function listing commands (afl, aflj) benefit from deeper analysis
78
+ # but only if file is small enough
79
+ function_commands = ["afl", "aflj", "afll", "afllj", "pdf", "pdr"]
80
+ if any(cmd in validated_command for cmd in function_commands):
81
+ try:
82
+ file_size_mb = os.path.getsize(validated_path) / (1024 * 1024)
83
+ if file_size_mb < 10: # For files under 10MB, use deeper analysis
84
+ analysis_level = "aaa"
85
+ except OSError:
86
+ pass
87
+
88
+ # If user explicitly requested analysis, handle it via caching
89
+ if "aaa" in validated_command or "aa" in validated_command:
90
+ # Remove explicit analysis commands as they are handled by _build_r2_cmd
91
+ validated_command = remove_analysis_commands(validated_command)
92
+
93
+ # Use helper function to execute radare2 command
94
+ try:
95
+ output, bytes_read = await _execute_r2_command(
96
+ validated_path,
97
+ [validated_command],
98
+ analysis_level=analysis_level,
99
+ max_output_size=max_output_size,
100
+ base_timeout=timeout,
101
+ )
102
+ return success(output, bytes_read=bytes_read, analysis_level=analysis_level)
103
+ except Exception as e:
104
+ # Log error to client if context is available
105
+ if ctx:
106
+ await ctx.error(f"radare2 command '{validated_command}' failed: {str(e)}")
107
+ raise
108
+
109
+
110
+ # Note: R2AnalysisPlugin has been removed.
111
+ # All tools (run_radare2, trace_execution_path, generate_function_graph, analyze_xrefs)
112
+ # are now registered via Radare2ToolsPlugin in radare2_mcp_tools.py for unified management.
113
+
114
+
115
+ # Dangerous sink APIs for prioritized path tracing
116
+ _DANGEROUS_SINKS = frozenset(
117
+ {
118
+ # Command execution
119
+ "system",
120
+ "execve",
121
+ "execl",
122
+ "execlp",
123
+ "execle",
124
+ "execv",
125
+ "execvp",
126
+ "execvpe",
127
+ "popen",
128
+ "_popen",
129
+ "ShellExecute",
130
+ "ShellExecuteEx",
131
+ "CreateProcess",
132
+ "WinExec",
133
+ "spawn",
134
+ "fork",
135
+ # Memory corruption
136
+ "strcpy",
137
+ "strcat",
138
+ "sprintf",
139
+ "vsprintf",
140
+ "gets",
141
+ "scanf",
142
+ "memcpy",
143
+ "memmove",
144
+ "strncpy",
145
+ # File operations
146
+ "fopen",
147
+ "open",
148
+ "CreateFile",
149
+ "DeleteFile",
150
+ "WriteFile",
151
+ # Network
152
+ "connect",
153
+ "send",
154
+ "recv",
155
+ "socket",
156
+ "bind",
157
+ "listen",
158
+ # Registry (Windows)
159
+ "RegSetValue",
160
+ "RegCreateKey",
161
+ "RegDeleteKey",
162
+ }
163
+ )
164
+
165
+ # =============================================================================
166
+ # Symbol Alias Database for Enhanced Matching
167
+ # =============================================================================
168
+ # Maps common API names to their variants (Windows A/W suffixes, safety variants, etc.)
169
+ _SYMBOL_ALIASES: dict[str, list[str]] = {
170
+ # Windows Process APIs
171
+ "createprocess": ["CreateProcessA", "CreateProcessW", "CreateProcessAsUserA", "CreateProcessAsUserW"],
172
+ "shellexecute": ["ShellExecuteA", "ShellExecuteW", "ShellExecuteExA", "ShellExecuteExW"],
173
+ "winexec": ["WinExec"],
174
+ # Windows File APIs
175
+ "createfile": ["CreateFileA", "CreateFileW", "CreateFile2"],
176
+ "deletefile": ["DeleteFileA", "DeleteFileW"],
177
+ "writefile": ["WriteFile", "WriteFileEx"],
178
+ "readfile": ["ReadFile", "ReadFileEx"],
179
+ "copyfile": ["CopyFileA", "CopyFileW", "CopyFileExA", "CopyFileExW"],
180
+ # Windows Registry APIs
181
+ "regsetvalue": ["RegSetValueA", "RegSetValueW", "RegSetValueExA", "RegSetValueExW"],
182
+ "regcreatekey": ["RegCreateKeyA", "RegCreateKeyW", "RegCreateKeyExA", "RegCreateKeyExW"],
183
+ "regopenkey": ["RegOpenKeyA", "RegOpenKeyW", "RegOpenKeyExA", "RegOpenKeyExW"],
184
+ "regdeletekey": ["RegDeleteKeyA", "RegDeleteKeyW", "RegDeleteKeyExA", "RegDeleteKeyExW"],
185
+ # Windows Message APIs
186
+ "messagebox": ["MessageBoxA", "MessageBoxW", "MessageBoxExA", "MessageBoxExW"],
187
+ # Windows Service APIs
188
+ "createservice": ["CreateServiceA", "CreateServiceW"],
189
+ "openservice": ["OpenServiceA", "OpenServiceW"],
190
+ "startservice": ["StartServiceA", "StartServiceW"],
191
+ # Windows Network APIs
192
+ "internetopen": ["InternetOpenA", "InternetOpenW"],
193
+ "internetconnect": ["InternetConnectA", "InternetConnectW"],
194
+ "httpopen": ["HttpOpenRequestA", "HttpOpenRequestW"],
195
+ # C Runtime String Functions
196
+ "strcpy": ["strcpy", "strcpy_s", "__strcpy_chk", "wcscpy", "lstrcpyA", "lstrcpyW"],
197
+ "strcat": ["strcat", "strcat_s", "__strcat_chk", "wcscat", "lstrcatA", "lstrcatW"],
198
+ "sprintf": ["sprintf", "sprintf_s", "swprintf", "wsprintfA", "wsprintfW", "_snprintf"],
199
+ "printf": ["printf", "wprintf", "_printf_l"],
200
+ "scanf": ["scanf", "scanf_s", "wscanf", "sscanf", "fscanf"],
201
+ # C Runtime Memory Functions
202
+ "malloc": ["malloc", "_malloc", "calloc", "realloc"],
203
+ "free": ["free", "_free"],
204
+ "memcpy": ["memcpy", "memcpy_s", "memmove", "memmove_s", "wmemcpy"],
205
+ # System/Exec Functions
206
+ "system": ["system", "_system", "msvcrt.system", "_wsystem"],
207
+ "popen": ["popen", "_popen", "_wpopen"],
208
+ "execve": ["execve", "execv", "execl", "execvp", "execlp"],
209
+ # Network Functions
210
+ "socket": ["socket", "WSASocket", "WSASocketA", "WSASocketW"],
211
+ "connect": ["connect", "WSAConnect"],
212
+ "send": ["send", "sendto", "WSASend", "WSASendTo"],
213
+ "recv": ["recv", "recvfrom", "WSARecv", "WSARecvFrom"],
214
+ # Crypto Functions
215
+ "cryptencrypt": ["CryptEncrypt", "CryptDecrypt"],
216
+ "cryptgenkey": ["CryptGenKey", "CryptDeriveKey"],
217
+ "cryptacquirecontext": ["CryptAcquireContextA", "CryptAcquireContextW"],
218
+ }
219
+
220
+ # Pre-compute reverse lookup for O(1) alias checking
221
+ _ALIAS_REVERSE_LOOKUP: dict[str, str] = {}
222
+ for _base, _aliases in _SYMBOL_ALIASES.items():
223
+ for _alias in _aliases:
224
+ _ALIAS_REVERSE_LOOKUP[_alias.lower()] = _base
225
+
226
+
227
+ def _clean_symbol_name(name: str) -> str:
228
+ """Remove common prefixes and normalize symbol name."""
229
+ if not name:
230
+ return ""
231
+ # Remove common radare2/binary prefixes
232
+ clean = name
233
+ for prefix in ["sym.imp.", "sym.", "imp.", "fcn.", "sub_", "loc_"]:
234
+ if clean.lower().startswith(prefix):
235
+ clean = clean[len(prefix):]
236
+ break
237
+ # Remove leading/trailing underscores
238
+ return clean.strip("_").lower()
239
+
240
+
241
+ def _fuzzy_match_symbol(target: str, symbol: str) -> tuple[float, str]:
242
+ """
243
+ Calculate fuzzy match score between target and symbol.
244
+
245
+ Returns:
246
+ Tuple of (score, match_method) where score is 0.0-1.0
247
+ """
248
+ target_clean = _clean_symbol_name(target)
249
+ symbol_clean = _clean_symbol_name(symbol)
250
+
251
+ if not target_clean or not symbol_clean:
252
+ return (0.0, "none")
253
+
254
+ # Exact match (after cleaning)
255
+ if target_clean == symbol_clean:
256
+ return (1.0, "exact")
257
+
258
+ # Check alias database
259
+ if target_clean in _SYMBOL_ALIASES:
260
+ for alias in _SYMBOL_ALIASES[target_clean]:
261
+ if alias.lower() == symbol_clean or symbol_clean.endswith(alias.lower()):
262
+ return (0.95, "alias")
263
+
264
+ # Reverse alias check
265
+ if symbol_clean in _ALIAS_REVERSE_LOOKUP:
266
+ base = _ALIAS_REVERSE_LOOKUP[symbol_clean]
267
+ if base == target_clean:
268
+ return (0.95, "alias_reverse")
269
+
270
+ # Suffix match (e.g., "system" matches "msvcrt.system")
271
+ if symbol_clean.endswith(target_clean):
272
+ return (0.85, "suffix")
273
+
274
+ # Prefix match
275
+ if symbol_clean.startswith(target_clean):
276
+ return (0.75, "prefix")
277
+
278
+ # Contains match
279
+ if target_clean in symbol_clean:
280
+ return (0.65, "contains")
281
+
282
+ # Reverse contains (symbol in target)
283
+ if symbol_clean in target_clean:
284
+ return (0.55, "contains_reverse")
285
+
286
+ return (0.0, "none")
287
+
288
+
289
+ def _find_best_symbol_match(target: str, symbols: list[dict]) -> tuple[dict | None, float, str]:
290
+ """
291
+ Find the best matching symbol from a list.
292
+
293
+ Args:
294
+ target: Target function name to find
295
+ symbols: List of symbol dicts with 'name' field
296
+
297
+ Returns:
298
+ Tuple of (best_match_dict, score, match_method)
299
+ """
300
+ best_match = None
301
+ best_score = 0.0
302
+ best_method = "none"
303
+
304
+ for sym in symbols:
305
+ if not isinstance(sym, dict):
306
+ continue
307
+
308
+ # Check both 'name' and 'realname' fields
309
+ for name_field in ["name", "realname"]:
310
+ name = sym.get(name_field, "")
311
+ if not name:
312
+ continue
313
+
314
+ score, method = _fuzzy_match_symbol(target, name)
315
+ if score > best_score:
316
+ best_score = score
317
+ best_match = sym
318
+ best_method = method
319
+
320
+ return (best_match, best_score, best_method)
321
+
322
+
323
+ # OPTIMIZATION: Pre-define translation table for faster function name cleaning
324
+ # Use empty second argument to delete characters
325
+ _FUNC_NAME_CLEAN_TABLE = str.maketrans("", "", "_")
326
+
327
+
328
+ @log_execution(tool_name="trace_execution_path")
329
+ @track_metrics("trace_execution_path")
330
+ @handle_tool_errors
331
+ async def trace_execution_path(
332
+ file_path: str,
333
+ target_function: str,
334
+ max_depth: int = 5,
335
+ max_paths: int = 5,
336
+ timeout: int | None = None,
337
+ prioritize_sinks: bool = True,
338
+ ) -> ToolResult:
339
+ """
340
+ Trace function calls backwards from a target function (Sink) to find potential execution paths.
341
+
342
+ This tool helps identify "Exploit Paths" by finding which functions call a dangerous
343
+ target function (like 'system', 'strcpy', 'execve'). It performs a recursive
344
+ cross-reference analysis (backtrace) to map out how execution reaches the target.
345
+
346
+ **Use Cases:**
347
+ - **Vulnerability Analysis**: Check if user input (main/recv) reaches 'system'
348
+ - **Reachability Analysis**: Verify if a vulnerable function is actually called
349
+ - **Taint Analysis Helper**: Provide the path for AI to perform manual taint checking
350
+
351
+ **Performance Optimizations (v3.0):**
352
+ - Reduced default depth (3→2) for faster analysis
353
+ - Sink-aware pruning: prioritizes paths through dangerous APIs
354
+ - Dynamic timeout based on file size
355
+
356
+ Args:
357
+ file_path: Path to the binary file
358
+ target_function: Name or address of the target function (e.g., 'sym.imp.system', '0x401000')
359
+ max_depth: Maximum depth of backtrace (default: 2, reduce for speed)
360
+ max_paths: Maximum number of paths to return (default: 5)
361
+ timeout: Execution timeout in seconds (uses dynamic timeout if None)
362
+ prioritize_sinks: Prioritize paths through dangerous sink APIs (default: True)
363
+
364
+ Returns:
365
+ ToolResult with a list of execution paths (call chains).
366
+ """
367
+ validated_path = validate_file_path(file_path)
368
+
369
+ # Calculate dynamic timeout based on file size
370
+ effective_timeout = (
371
+ timeout if timeout else calculate_dynamic_timeout(str(validated_path), base_timeout=30)
372
+ )
373
+
374
+ # Helper to check if a function name is a dangerous sink
375
+ def is_dangerous_sink(func_name: str) -> bool:
376
+ """Check if function name matches any dangerous sink API."""
377
+ if not func_name:
378
+ return False
379
+ # OPTIMIZATION: Use str.translate() for faster prefix removal
380
+ # Remove common prefixes first
381
+ clean_name = func_name.replace("sym.imp.", "").replace("sym.", "")
382
+ # Then remove underscores using translate (faster than replace)
383
+ clean_name = clean_name.translate(_FUNC_NAME_CLEAN_TABLE)
384
+ return any(sink in clean_name.lower() for sink in _DANGEROUS_SINKS)
385
+
386
+ # Enhanced symbol resolution with fuzzy matching
387
+ match_info = {"score": 0.0, "method": "none", "resolved_name": None}
388
+
389
+ async def resolve_symbol_address(func_name: str) -> int | None:
390
+ """Enhanced symbol resolution with fuzzy matching and multi-source lookup."""
391
+ nonlocal match_info
392
+
393
+ # If already an address, return as-is
394
+ if func_name.startswith("0x"):
395
+ try:
396
+ match_info = {"score": 1.0, "method": "direct_address", "resolved_name": func_name}
397
+ return int(func_name, 16)
398
+ except ValueError:
399
+ return None
400
+
401
+ # Multi-source lookup: symbols, functions, imports
402
+ cmd = _build_r2_cmd(str(validated_path), ["isj", "aflj", "iij"], "aaa")
403
+ out, _ = await execute_subprocess_async(cmd, timeout=effective_timeout)
404
+
405
+ lines = [line.strip() for line in out.strip().split("\n") if line.strip()]
406
+ if not lines:
407
+ return None
408
+
409
+ all_symbols = []
410
+
411
+ # Parse all sources
412
+ for line in lines:
413
+ try:
414
+ parsed = _parse_json_output(line)
415
+ if isinstance(parsed, list):
416
+ all_symbols.extend(parsed)
417
+ except (json.JSONDecodeError, TypeError):
418
+ continue
419
+
420
+ if not all_symbols:
421
+ return None
422
+
423
+ # Use fuzzy matching to find best match
424
+ best_match, score, method = _find_best_symbol_match(func_name, all_symbols)
425
+
426
+ if best_match and score >= 0.5: # Minimum confidence threshold
427
+ match_info = {
428
+ "score": score,
429
+ "method": method,
430
+ "resolved_name": best_match.get("name") or best_match.get("realname"),
431
+ }
432
+ # Get address from match (different fields for symbols vs functions)
433
+ addr = best_match.get("vaddr") or best_match.get("offset") or best_match.get("plt")
434
+ return addr
435
+
436
+ return None
437
+
438
+ # Resolve target address
439
+ target_addr = target_function
440
+ resolved_addr = await resolve_symbol_address(target_function)
441
+ if resolved_addr:
442
+ target_addr = hex(resolved_addr)
443
+ # If we can't resolve it, use the original name (might work as r2 symbol)
444
+
445
+ paths = []
446
+ visited = set()
447
+
448
+ async def recursive_backtrace(current_addr, current_path, depth):
449
+ if depth >= max_depth or len(paths) >= max_paths:
450
+ return
451
+
452
+ # OPTIMIZATION: Pre-compute addresses in current path to avoid repeated list comprehensions
453
+ current_path_addrs = {p["addr"] for p in current_path}
454
+
455
+ if current_addr in visited and current_addr not in current_path_addrs:
456
+ # Allow revisiting if it's a different path, but prevent cycles in current path
457
+ pass
458
+ elif current_addr in current_path_addrs:
459
+ return # Cycle detected
460
+
461
+ # Get xrefs TO this address
462
+ cmd = _build_r2_cmd(str(validated_path), [f"axtj @ {current_addr}"], "aaa")
463
+ out, _ = await execute_subprocess_async(cmd, timeout=effective_timeout)
464
+
465
+ try:
466
+ xrefs = _parse_json_output(out)
467
+ except (json.JSONDecodeError, TypeError):
468
+ xrefs = []
469
+
470
+ if not xrefs:
471
+ # End of chain (root caller found or no xrefs)
472
+ if len(current_path) > 1:
473
+ paths.append(current_path)
474
+ return
475
+
476
+ # OPTIMIZATION v3.0: Prioritize xrefs through dangerous sink APIs
477
+ # This implements "Sink-aware pruning" - explore high-value paths first
478
+ if prioritize_sinks and len(xrefs) > 1:
479
+ xrefs = sorted(
480
+ xrefs,
481
+ key=lambda x: (
482
+ # Priority 1: main/entry functions (complete paths)
483
+ -2
484
+ if any(k in x.get("fcn_name", "").lower() for k in ["main", "entry"])
485
+ # Priority 2: Dangerous sink APIs
486
+ else -1
487
+ if is_dangerous_sink(x.get("fcn_name", ""))
488
+ # Priority 3: Everything else
489
+ else 0
490
+ ),
491
+ )
492
+
493
+ for xref in xrefs:
494
+ if len(paths) >= max_paths:
495
+ break
496
+
497
+ caller_addr = hex(xref.get("fcn_addr", 0))
498
+ caller_name = xref.get("fcn_name", "unknown")
499
+ type_ref = xref.get("type", "call")
500
+
501
+ if type_ref not in ["call", "jump"]:
502
+ continue
503
+
504
+ new_node = {"addr": caller_addr, "name": caller_name, "type": type_ref}
505
+
506
+ # If we reached main or entry, this is a complete path
507
+ if "main" in caller_name or "entry" in caller_name:
508
+ paths.append(current_path + [new_node])
509
+ else:
510
+ await recursive_backtrace(caller_addr, current_path + [new_node], depth + 1)
511
+
512
+ # Start trace
513
+ root_node = {"addr": target_addr, "name": target_function, "type": "target"}
514
+ await recursive_backtrace(target_addr, [root_node], 0)
515
+
516
+ # Format results
517
+ # OPTIMIZATION: Use list comprehension with generator expression in join
518
+ # This reduces memory by avoiding intermediate list creation in the join
519
+ formatted_paths = [" -> ".join(f"{n['name']} ({n['addr']})" for n in p[::-1]) for p in paths]
520
+
521
+ return success(
522
+ {"paths": formatted_paths, "raw_paths": paths},
523
+ path_count=len(paths),
524
+ target=target_function,
525
+ resolved_address=target_addr,
526
+ match_confidence=match_info["score"],
527
+ match_method=match_info["method"],
528
+ resolved_name=match_info["resolved_name"],
529
+ description=f"Found {len(paths)} execution paths to {target_function} (match: {match_info['method']}, confidence: {match_info['score']:.0%})",
530
+ )
531
+
532
+
533
+ def _radare2_json_to_mermaid(json_str: str) -> str:
534
+ """
535
+ Convert Radare2 'agfj' JSON output to Mermaid Flowchart syntax.
536
+ Optimized for LLM context efficiency.
537
+
538
+ Args:
539
+ json_str: JSON output from radare2 agfj command
540
+
541
+ Returns:
542
+ Mermaid flowchart syntax string
543
+ """
544
+ # Maximum nodes before switching to summary mode (browser Mermaid limit)
545
+ MAX_MERMAID_NODES = 100
546
+
547
+ try:
548
+ graph_data = json.loads(json_str)
549
+ if not graph_data:
550
+ return "graph TD;\n Error[No graph data found]"
551
+
552
+ # agfj returns list format for function graph
553
+ blocks = (
554
+ graph_data[0].get("blocks", [])
555
+ if isinstance(graph_data, list)
556
+ else graph_data.get("blocks", [])
557
+ )
558
+
559
+ # Check node count limit to prevent browser rendering crashes
560
+ if len(blocks) > MAX_MERMAID_NODES:
561
+ return (
562
+ f"graph TD;\n"
563
+ f" Warning[\"Graph too complex: {len(blocks)} nodes\"]"
564
+ f"\n Warning --> Hint[\"Use PNG export or reduce scope\"]"
565
+ )
566
+
567
+ mermaid_lines = ["graph TD"]
568
+
569
+ for block in blocks:
570
+ # 1. Generate node ID from offset
571
+ node_id = f"N_{hex(block.get('offset', 0))}"
572
+
573
+ # 2. Generate node label from assembly opcodes
574
+ ops = block.get("ops", [])
575
+ # OPTIMIZATION: Use enumerate with early break to avoid processing all ops
576
+ # For token efficiency, we limit to 5 instructions per block
577
+ op_codes = []
578
+ has_more = False
579
+ for i, op in enumerate(ops):
580
+ if i < 5:
581
+ op_codes.append(op.get("opcode", ""))
582
+ elif i == 5:
583
+ has_more = True
584
+ break
585
+
586
+ if has_more:
587
+ op_codes.append("...")
588
+
589
+ # Escape Mermaid special characters using optimized function
590
+ label_content = _escape_mermaid_chars("\\n".join(op_codes))
591
+
592
+ # Define node
593
+ mermaid_lines.append(f' {node_id}["{label_content}"]')
594
+
595
+ # 3. Create edges
596
+ # True branch (jump)
597
+ if "jump" in block:
598
+ target_id = f"N_{hex(block['jump'])}"
599
+ mermaid_lines.append(f" {node_id} -->|True| {target_id}")
600
+
601
+ # False branch (fail)
602
+ if "fail" in block:
603
+ target_id = f"N_{hex(block['fail'])}"
604
+ mermaid_lines.append(f" {node_id} -.->|False| {target_id}")
605
+
606
+ return "\n".join(mermaid_lines)
607
+
608
+ except Exception as e:
609
+ return f"graph TD;\n Error[Parse Error: {str(e)}]"
610
+
611
+
612
+ @alru_cache(maxsize=32)
613
+ @log_execution(tool_name="generate_function_graph")
614
+ @track_metrics("generate_function_graph")
615
+ @handle_tool_errors
616
+ async def _generate_function_graph_impl(
617
+ file_path: str,
618
+ function_address: str,
619
+ format: str = "mermaid",
620
+ timeout: int = DEFAULT_TIMEOUT,
621
+ ) -> ToolResult:
622
+ """
623
+ Internal implementation of generate_function_graph with caching.
624
+ """
625
+ # 1. Parameter validation
626
+ validate_tool_parameters(
627
+ "generate_function_graph",
628
+ {"function_address": function_address, "format": format},
629
+ )
630
+ validated_path = validate_file_path(file_path)
631
+
632
+ # 2. Security check for function address (prevent shell injection)
633
+ from reversecore_mcp.core.exceptions import ValidationError
634
+ from reversecore_mcp.core.validators import validate_address_format
635
+
636
+ try:
637
+ validate_address_format(function_address, "function_address")
638
+ except ValidationError as e:
639
+ return failure("VALIDATION_ERROR", str(e))
640
+
641
+ # 3. Build radare2 command
642
+ r2_cmd_str = f"agfj @ {function_address}"
643
+
644
+ # 4. Execute subprocess asynchronously using helper
645
+ # Large graphs need higher output limit
646
+ output, bytes_read = await _execute_r2_command(
647
+ validated_path,
648
+ [r2_cmd_str],
649
+ analysis_level="aaa",
650
+ max_output_size=50_000_000,
651
+ base_timeout=timeout,
652
+ )
653
+
654
+ # Add timestamp for cache visibility
655
+ import time
656
+
657
+ timestamp = time.time()
658
+
659
+ # 5. Format conversion and return
660
+ if format.lower() == "json":
661
+ return success(output, bytes_read=bytes_read, format="json", timestamp=timestamp)
662
+
663
+ elif format.lower() == "mermaid":
664
+ mermaid_code = _radare2_json_to_mermaid(output)
665
+ return success(
666
+ mermaid_code,
667
+ bytes_read=bytes_read,
668
+ format="mermaid",
669
+ description="Render this using Mermaid to see the control flow.",
670
+ timestamp=timestamp,
671
+ )
672
+
673
+ elif format.lower() == "dot":
674
+ # For DOT format, call radare2 with agfd command
675
+ # NOTE: This is a separate call from agfj above, but this is optimal because:
676
+ # - DOT format requires a different command (agfd vs agfj)
677
+ # - Batching both would waste resources since we only need one format
678
+ # - DOT format is rarely used (mermaid and json are preferred)
679
+ dot_cmd_str = f"agfd @ {function_address}"
680
+
681
+ dot_output, dot_bytes = await _execute_r2_command(
682
+ validated_path,
683
+ [dot_cmd_str],
684
+ analysis_level="aaa",
685
+ max_output_size=50_000_000,
686
+ base_timeout=timeout,
687
+ )
688
+ return success(dot_output, bytes_read=dot_bytes, format="dot")
689
+
690
+ return failure("INVALID_FORMAT", f"Unsupported format: {format}")
691
+
692
+
693
+ async def generate_function_graph(
694
+ file_path: str,
695
+ function_address: str,
696
+ format: str = "mermaid",
697
+ timeout: int = DEFAULT_TIMEOUT,
698
+ ) -> ToolResult:
699
+ """
700
+ Generate a Control Flow Graph (CFG) for a specific function.
701
+
702
+ This tool uses radare2 to analyze the function structure and returns
703
+ a visualization code (Mermaid by default) or PNG image that helps AI understand
704
+ the code flow without reading thousands of lines of assembly.
705
+
706
+ Args:
707
+ file_path: Path to the binary file (must be in workspace)
708
+ function_address: Function address (e.g., 'main', '0x140001000', 'sym.foo')
709
+ format: Output format ('mermaid', 'json', 'dot', or 'png'). Default is 'mermaid'.
710
+ timeout: Execution timeout in seconds
711
+
712
+ Returns:
713
+ ToolResult with CFG visualization, JSON data, or PNG image
714
+ """
715
+ import time
716
+
717
+ from fastmcp.utilities.types import Image
718
+
719
+ # If PNG format requested, generate DOT first then convert
720
+ if format.lower() == "png":
721
+ # Get DOT format first
722
+ result = await _generate_function_graph_impl(file_path, function_address, "dot", timeout)
723
+
724
+ if result.is_error:
725
+ return result
726
+
727
+ # Convert DOT to PNG using graphviz
728
+ try:
729
+ import subprocess
730
+ import tempfile
731
+ from pathlib import Path as PathlibPath
732
+
733
+ # Get DOT content from result
734
+ dot_content = result.content[0].text if result.content else ""
735
+
736
+ # Create temp files
737
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".dot", delete=False) as dot_file:
738
+ dot_file.write(dot_content)
739
+ dot_path = dot_file.name
740
+
741
+ png_path = dot_path.replace(".dot", ".png")
742
+
743
+ try:
744
+ # Use async subprocess execution to avoid blocking the event loop
745
+ # This allows concurrent operations and better resource utilization
746
+ await execute_subprocess_async(
747
+ ["dot", "-Tpng", dot_path, "-o", png_path],
748
+ max_output_size=1_000_000, # 1MB for error messages
749
+ timeout=30,
750
+ )
751
+
752
+ # Read PNG file
753
+ png_data = PathlibPath(png_path).read_bytes()
754
+
755
+ # Return Image object
756
+ return Image(data=png_data, mime_type="image/png")
757
+
758
+ finally:
759
+ # Cleanup temp files
760
+ try:
761
+ PathlibPath(dot_path).unlink()
762
+ if PathlibPath(png_path).exists():
763
+ PathlibPath(png_path).unlink()
764
+ except (OSError, FileNotFoundError):
765
+ pass
766
+
767
+ except Exception as e:
768
+ return failure(
769
+ "IMAGE_GENERATION_ERROR",
770
+ f"Failed to generate PNG image: {str(e)}",
771
+ hint="Ensure graphviz is installed in the container",
772
+ )
773
+
774
+ # For other formats, use existing implementation
775
+ result = await _generate_function_graph_impl(file_path, function_address, format, timeout)
776
+
777
+ # Check for cache hit
778
+ if result.status == "success" and result.metadata:
779
+ ts = result.metadata.get("timestamp")
780
+ if ts and (time.time() - ts > 1.0):
781
+ result.metadata["cache_hit"] = True
782
+ # Update description to indicate cached result
783
+ # Note: ToolSuccess has 'data' field, not 'content'
784
+
785
+ return result
786
+
787
+
788
+ @log_execution(tool_name="analyze_xrefs")
789
+ @track_metrics("analyze_xrefs")
790
+ @handle_tool_errors
791
+ async def analyze_xrefs(
792
+ file_path: str,
793
+ address: str,
794
+ xref_type: str = "all",
795
+ timeout: int = DEFAULT_TIMEOUT,
796
+ ctx: Context = None,
797
+ ) -> ToolResult:
798
+ """
799
+ Analyze cross-references (xrefs) for a specific address using radare2.
800
+
801
+ Cross-references show the relationships between code blocks - who calls this
802
+ function (callers) and what it calls (callees). This is essential for:
803
+ - Understanding program flow
804
+ - Tracing data dependencies
805
+ - Identifying attack surfaces
806
+ - Reverse engineering malware C&C
807
+
808
+ **xref_type Options:**
809
+ - **"to"**: Show who references this address (callers/jumps TO here)
810
+ - **"from"**: Show what this address references (calls/jumps FROM here)
811
+ - **"all"**: Show both directions (complete relationship map)
812
+
813
+ Args:
814
+ file_path: Path to the binary file (must be in workspace)
815
+ address: Function or address to analyze (e.g., 'main', '0x401000', 'sym.decrypt')
816
+ xref_type: Type of cross-references to show: 'all', 'to', 'from' (default: 'all')
817
+ timeout: Execution timeout in seconds (default: 300)
818
+ ctx: FastMCP Context for progress reporting (auto-injected)
819
+
820
+ Returns:
821
+ ToolResult with structured JSON containing xrefs data:
822
+ {
823
+ "address": "main",
824
+ "xref_type": "all",
825
+ "xrefs_to": [{"from": "0x401050", "type": "call", "fcn_name": "entry0"}],
826
+ "xrefs_from": [{"addr": "0x401100", "type": "call", "fcn_name": "printf"}],
827
+ "summary": "2 reference(s) TO this address (callers), 1 reference(s) FROM this address (callees)",
828
+ "total_refs_to": 2,
829
+ "total_refs_from": 1
830
+ }
831
+
832
+ Example:
833
+ # Find who calls the suspicious 'decrypt' function
834
+ analyze_xrefs("/app/workspace/malware.exe", "sym.decrypt", "to")
835
+
836
+ # Find what APIs a malware function uses
837
+ analyze_xrefs("/app/workspace/malware.exe", "0x401000", "from")
838
+
839
+ # Get complete relationship map
840
+ analyze_xrefs("/app/workspace/malware.exe", "main", "all")
841
+ """
842
+ # 1. Validate parameters
843
+ validated_path = validate_file_path(file_path)
844
+
845
+ if xref_type not in ["all", "to", "from"]:
846
+ return failure(
847
+ "VALIDATION_ERROR",
848
+ f"Invalid xref_type: {xref_type}",
849
+ hint="Valid options are: 'all', 'to', 'from'",
850
+ )
851
+
852
+ # 2. Validate address format
853
+ # OPTIMIZATION: Use pre-compiled pattern from validators module
854
+ if not _ADDRESS_PATTERN.match(
855
+ _strip_address_prefixes(address),
856
+ ):
857
+ return failure(
858
+ "VALIDATION_ERROR",
859
+ "Invalid address format",
860
+ hint="Address must contain only alphanumeric characters, dots, underscores, and prefixes like '0x', 'sym.', 'fcn.'",
861
+ )
862
+
863
+ # 3. Build radare2 commands to get xrefs
864
+ # axj = analyze xrefs in JSON format
865
+ commands = []
866
+
867
+ if xref_type in ["all", "to"]:
868
+ # axtj = xrefs TO this address (callers)
869
+ commands.append(f"axtj @ {address}")
870
+
871
+ if xref_type in ["all", "from"]:
872
+ # axfj = xrefs FROM this address (callees)
873
+ commands.append(f"axfj @ {address}")
874
+
875
+ # Build command string
876
+ r2_commands_str = "; ".join(commands)
877
+
878
+ if ctx:
879
+ await ctx.report_progress(10, 100)
880
+ await ctx.info(f"Analyzing xrefs for {address}...")
881
+
882
+ # 4. Execute analysis using helper
883
+ # Use 'aa' (basic analysis) as default to prevent timeouts on large/obfuscated binaries
884
+ # 'aaa' is much slower but more accurate - only use for small files
885
+
886
+ analysis_level = "aa"
887
+ try:
888
+ file_size_mb = os.path.getsize(validated_path) / (1024 * 1024)
889
+ if file_size_mb < 5:
890
+ analysis_level = "aaa" # Full analysis for small files (<5MB)
891
+ if ctx and file_size_mb > 5:
892
+ await ctx.info(f"Large file ({file_size_mb:.1f}MB) detected, using basic analysis...")
893
+ except OSError:
894
+ pass
895
+
896
+ output, bytes_read = await _execute_r2_command(
897
+ validated_path,
898
+ [r2_commands_str],
899
+ analysis_level=analysis_level,
900
+ max_output_size=10_000_000,
901
+ base_timeout=timeout,
902
+ )
903
+
904
+ if ctx:
905
+ await ctx.report_progress(90, 100)
906
+
907
+ # 5. Parse JSON output
908
+ try:
909
+ # Output may contain multiple JSON arrays if both "to" and "from" were requested
910
+ # Split by lines and parse each JSON array
911
+ lines = [line.strip() for line in output.strip().split("\n") if line.strip()]
912
+
913
+ xrefs_to = []
914
+ xrefs_from = []
915
+
916
+ for line in lines:
917
+ # Robust JSON extraction from line
918
+ try:
919
+ refs = _parse_json_output(line)
920
+ if isinstance(refs, list) and refs: # OPTIMIZATION: Direct bool check instead of len() comparison
921
+ # Determine if this is "to" or "from" based on field names
922
+ first_ref = refs[0]
923
+ if "from" in first_ref:
924
+ # This is xrefs TO (callers)
925
+ xrefs_to = refs
926
+ elif "addr" in first_ref or "fcn_addr" in first_ref:
927
+ # This is xrefs FROM (callees)
928
+ xrefs_from = refs
929
+ except json.JSONDecodeError:
930
+ # Skip lines that don't contain valid JSON
931
+ continue
932
+
933
+ # 6. Format results
934
+ result = {
935
+ "address": address,
936
+ "xref_type": xref_type,
937
+ "xrefs_to": xrefs_to,
938
+ "xrefs_from": xrefs_from,
939
+ "total_refs_to": len(xrefs_to),
940
+ "total_refs_from": len(xrefs_from),
941
+ }
942
+
943
+ # Add human-readable summary
944
+ summary_parts = []
945
+ if xrefs_to:
946
+ summary_parts.append(f"{len(xrefs_to)} reference(s) TO this address (callers)")
947
+ if xrefs_from:
948
+ summary_parts.append(f"{len(xrefs_from)} reference(s) FROM this address (callees)")
949
+
950
+ if not summary_parts:
951
+ summary = "No cross-references found"
952
+ else:
953
+ summary = ", ".join(summary_parts)
954
+
955
+ result["summary"] = summary
956
+
957
+ # 7. Return structured result
958
+ return success(
959
+ result,
960
+ bytes_read=bytes_read,
961
+ address=address,
962
+ xref_type=xref_type,
963
+ total_refs=len(xrefs_to) + len(xrefs_from),
964
+ description=f"Cross-reference analysis for {address}: {summary}",
965
+ )
966
+
967
+ except Exception as e:
968
+ return failure(
969
+ "XREF_ANALYSIS_ERROR",
970
+ f"Failed to parse cross-reference data: {str(e)}",
971
+ hint="The address may not exist or the binary may not have been analyzed. Try running 'afl' first to see available functions.",
972
+ )