iflow-mcp_developermode-korea_reversecore-mcp 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/METADATA +543 -0
  2. iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/RECORD +79 -0
  3. iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/WHEEL +5 -0
  4. iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/entry_points.txt +2 -0
  5. iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/licenses/LICENSE +21 -0
  6. iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/top_level.txt +1 -0
  7. reversecore_mcp/__init__.py +9 -0
  8. reversecore_mcp/core/__init__.py +78 -0
  9. reversecore_mcp/core/audit.py +101 -0
  10. reversecore_mcp/core/binary_cache.py +138 -0
  11. reversecore_mcp/core/command_spec.py +357 -0
  12. reversecore_mcp/core/config.py +432 -0
  13. reversecore_mcp/core/container.py +288 -0
  14. reversecore_mcp/core/decorators.py +152 -0
  15. reversecore_mcp/core/error_formatting.py +93 -0
  16. reversecore_mcp/core/error_handling.py +142 -0
  17. reversecore_mcp/core/evidence.py +229 -0
  18. reversecore_mcp/core/exceptions.py +296 -0
  19. reversecore_mcp/core/execution.py +240 -0
  20. reversecore_mcp/core/ghidra.py +642 -0
  21. reversecore_mcp/core/ghidra_helper.py +481 -0
  22. reversecore_mcp/core/ghidra_manager.py +234 -0
  23. reversecore_mcp/core/json_utils.py +131 -0
  24. reversecore_mcp/core/loader.py +73 -0
  25. reversecore_mcp/core/logging_config.py +206 -0
  26. reversecore_mcp/core/memory.py +721 -0
  27. reversecore_mcp/core/metrics.py +198 -0
  28. reversecore_mcp/core/mitre_mapper.py +365 -0
  29. reversecore_mcp/core/plugin.py +45 -0
  30. reversecore_mcp/core/r2_helpers.py +404 -0
  31. reversecore_mcp/core/r2_pool.py +403 -0
  32. reversecore_mcp/core/report_generator.py +268 -0
  33. reversecore_mcp/core/resilience.py +252 -0
  34. reversecore_mcp/core/resource_manager.py +169 -0
  35. reversecore_mcp/core/result.py +132 -0
  36. reversecore_mcp/core/security.py +213 -0
  37. reversecore_mcp/core/validators.py +238 -0
  38. reversecore_mcp/dashboard/__init__.py +221 -0
  39. reversecore_mcp/prompts/__init__.py +56 -0
  40. reversecore_mcp/prompts/common.py +24 -0
  41. reversecore_mcp/prompts/game.py +280 -0
  42. reversecore_mcp/prompts/malware.py +1219 -0
  43. reversecore_mcp/prompts/report.py +150 -0
  44. reversecore_mcp/prompts/security.py +136 -0
  45. reversecore_mcp/resources.py +329 -0
  46. reversecore_mcp/server.py +727 -0
  47. reversecore_mcp/tools/__init__.py +49 -0
  48. reversecore_mcp/tools/analysis/__init__.py +74 -0
  49. reversecore_mcp/tools/analysis/capa_tools.py +215 -0
  50. reversecore_mcp/tools/analysis/die_tools.py +180 -0
  51. reversecore_mcp/tools/analysis/diff_tools.py +643 -0
  52. reversecore_mcp/tools/analysis/lief_tools.py +272 -0
  53. reversecore_mcp/tools/analysis/signature_tools.py +591 -0
  54. reversecore_mcp/tools/analysis/static_analysis.py +479 -0
  55. reversecore_mcp/tools/common/__init__.py +58 -0
  56. reversecore_mcp/tools/common/file_operations.py +352 -0
  57. reversecore_mcp/tools/common/memory_tools.py +516 -0
  58. reversecore_mcp/tools/common/patch_explainer.py +230 -0
  59. reversecore_mcp/tools/common/server_tools.py +115 -0
  60. reversecore_mcp/tools/ghidra/__init__.py +19 -0
  61. reversecore_mcp/tools/ghidra/decompilation.py +975 -0
  62. reversecore_mcp/tools/ghidra/ghidra_tools.py +1052 -0
  63. reversecore_mcp/tools/malware/__init__.py +61 -0
  64. reversecore_mcp/tools/malware/adaptive_vaccine.py +579 -0
  65. reversecore_mcp/tools/malware/dormant_detector.py +756 -0
  66. reversecore_mcp/tools/malware/ioc_tools.py +228 -0
  67. reversecore_mcp/tools/malware/vulnerability_hunter.py +519 -0
  68. reversecore_mcp/tools/malware/yara_tools.py +214 -0
  69. reversecore_mcp/tools/patch_explainer.py +19 -0
  70. reversecore_mcp/tools/radare2/__init__.py +13 -0
  71. reversecore_mcp/tools/radare2/r2_analysis.py +972 -0
  72. reversecore_mcp/tools/radare2/r2_session.py +376 -0
  73. reversecore_mcp/tools/radare2/radare2_mcp_tools.py +1183 -0
  74. reversecore_mcp/tools/report/__init__.py +4 -0
  75. reversecore_mcp/tools/report/email.py +82 -0
  76. reversecore_mcp/tools/report/report_mcp_tools.py +344 -0
  77. reversecore_mcp/tools/report/report_tools.py +1076 -0
  78. reversecore_mcp/tools/report/session.py +194 -0
  79. reversecore_mcp/tools/report_tools.py +11 -0
@@ -0,0 +1,975 @@
1
+ """Decompilation and code recovery tools for binary analysis."""
2
+
3
+ import os
4
+ import re
5
+
6
+ from async_lru import alru_cache
7
+ from fastmcp import Context
8
+
9
+ # Use high-performance JSON implementation (3-5x faster)
10
+ from reversecore_mcp.core import json_utils as json
11
+ from reversecore_mcp.core.config import get_config
12
+ from reversecore_mcp.core.decorators import log_execution
13
+ from reversecore_mcp.core.error_handling import handle_tool_errors
14
+ from reversecore_mcp.core.exceptions import ValidationError
15
+ from reversecore_mcp.core.logging_config import get_logger
16
+ from reversecore_mcp.core.metrics import track_metrics
17
+
18
+ # Import shared R2 helper functions from core (avoids circular dependencies)
19
+ from reversecore_mcp.core.r2_helpers import (
20
+ execute_r2_command as _execute_r2_command,
21
+ )
22
+ from reversecore_mcp.core.r2_helpers import (
23
+ strip_address_prefixes as _strip_address_prefixes,
24
+ )
25
+ from reversecore_mcp.core.resilience import circuit_breaker
26
+ from reversecore_mcp.core.result import ToolResult, failure, success
27
+ from reversecore_mcp.core.security import validate_file_path
28
+ from reversecore_mcp.core.validators import validate_tool_parameters
29
+
30
+ # Load default timeout from configuration
31
+ DEFAULT_TIMEOUT = get_config().default_tool_timeout
32
+
33
+ logger = get_logger(__name__)
34
+
35
+ # OPTIMIZATION: Pre-compile regex patterns used in hot paths
36
+ _FUNCTION_ADDRESS_PATTERN = re.compile(r"^[a-zA-Z0-9_.:<>]+$")
37
+
38
+
39
+ # =============================================================================
40
+ # Helper Functions for Structure Recovery
41
+ # =============================================================================
42
+
43
+ # OPTIMIZATION: Pre-defined type size mapping at module level
44
+ # Uses exact match for common types (O(1) dict lookup) and substring match for compound types.
45
+ # Note: Types appear in both collections intentionally - _TYPE_SIZES_EXACT for exact matches,
46
+ # _TYPE_SIZES_CONTAINS for substring matching in compound types like "unsigned int".
47
+ #
48
+ # LIMITATION: Pointer types (void*, size_t, intptr_t) assume 64-bit architecture (8 bytes).
49
+ # For 32-bit binaries, these should be 4 bytes. A future improvement would be to pass
50
+ # the binary's architecture (Bits field) and adjust pointer sizes dynamically.
51
+ _TYPE_SIZES_EXACT = {
52
+ "char": 1,
53
+ "byte": 1,
54
+ "uint8_t": 1,
55
+ "int8_t": 1,
56
+ "bool": 1,
57
+ "short": 2,
58
+ "uint16_t": 2,
59
+ "int16_t": 2,
60
+ "word": 2,
61
+ "wchar_t": 2,
62
+ "int": 4,
63
+ "uint32_t": 4,
64
+ "int32_t": 4,
65
+ "dword": 4,
66
+ "float": 4,
67
+ "long": 4,
68
+ "long long": 8,
69
+ "uint64_t": 8,
70
+ "int64_t": 8,
71
+ "qword": 8,
72
+ "double": 8,
73
+ # Pointer-sized types - assuming 64-bit (see LIMITATION note above)
74
+ "size_t": 8,
75
+ "void *": 8,
76
+ "intptr_t": 8,
77
+ }
78
+
79
+ # Types for substring match, ordered by:
80
+ # 1. Size (largest first) - ensures "uint64_t" matches before "int"
81
+ # 2. Specificity - longer/more specific types before shorter ones
82
+ # This ordering prevents "int" from matching before "uint32_t" in compound types
83
+ _TYPE_SIZES_CONTAINS = (
84
+ # 8-byte types first (larger size takes priority)
85
+ ("uint64_t", 8),
86
+ ("int64_t", 8),
87
+ ("qword", 8),
88
+ ("double", 8),
89
+ ("size_t", 8),
90
+ ("intptr_t", 8),
91
+ ("long long", 8),
92
+ # 4-byte types
93
+ ("uint32_t", 4),
94
+ ("int32_t", 4),
95
+ ("dword", 4),
96
+ ("float", 4),
97
+ # 2-byte types
98
+ ("uint16_t", 2),
99
+ ("int16_t", 2),
100
+ ("wchar_t", 2),
101
+ ("short", 2),
102
+ ("word", 2),
103
+ # 1-byte types (smallest size last)
104
+ ("uint8_t", 1),
105
+ ("int8_t", 1),
106
+ ("char", 1),
107
+ ("byte", 1),
108
+ ("bool", 1),
109
+ )
110
+
111
+
112
+ def _estimate_type_size(type_str: str) -> int:
113
+ """
114
+ Estimate the size of a C/C++ type in bytes.
115
+
116
+ Uses module-level pre-defined mappings for O(1) exact match lookup,
117
+ falling back to substring search for compound types.
118
+
119
+ Args:
120
+ type_str: Type string (e.g., "int", "char *", "float")
121
+
122
+ Returns:
123
+ Estimated size in bytes
124
+ """
125
+ type_str = type_str.lower().strip()
126
+
127
+ # Fast path: Pointer types (64-bit assumed)
128
+ if "*" in type_str or "ptr" in type_str:
129
+ return 8
130
+
131
+ # Fast path: Try exact match first (O(1) lookup)
132
+ if type_str in _TYPE_SIZES_EXACT:
133
+ return _TYPE_SIZES_EXACT[type_str]
134
+
135
+ # Slow path: Substring match for compound types (e.g., "unsigned int")
136
+ for type_name, size in _TYPE_SIZES_CONTAINS:
137
+ if type_name in type_str:
138
+ return size
139
+
140
+ # Default for unknown types
141
+ return 4
142
+
143
+
144
+ def _extract_structures_from_disasm(disasm_ops: list) -> dict:
145
+ """
146
+ Extract structure-like patterns from disassembly.
147
+
148
+ Analyzes memory access patterns to detect structure field accesses.
149
+ For example: [rbx+0x4c], [rax+0x60], etc.
150
+
151
+ Args:
152
+ disasm_ops: List of disassembly operations from pdfj
153
+
154
+ Returns:
155
+ Dictionary of detected structures with fields
156
+ """
157
+ structures = {}
158
+
159
+ # Pattern for memory accesses: [reg+offset] or [reg-offset]
160
+ mem_pattern = re.compile(r"\[([a-z0-9]+)\s*([+-])\s*(0x[0-9a-f]+|[0-9]+)\]", re.IGNORECASE)
161
+
162
+ for op in disasm_ops:
163
+ if not isinstance(op, dict):
164
+ continue
165
+
166
+ opcode = op.get("opcode", "")
167
+ disasm = op.get("disasm", "")
168
+
169
+ # Look for memory access patterns
170
+ matches = mem_pattern.findall(disasm)
171
+
172
+ for reg, sign, offset_str in matches:
173
+ # Skip stack-based accesses (usually local variables, not structures)
174
+ if reg.lower() in ("rsp", "esp", "rbp", "ebp", "sp", "bp"):
175
+ continue
176
+
177
+ # Calculate offset
178
+ try:
179
+ offset = int(offset_str, 16) if offset_str.startswith("0x") else int(offset_str)
180
+ if sign == "-":
181
+ offset = -offset
182
+ except ValueError:
183
+ continue
184
+
185
+ # Only consider positive offsets (structure fields)
186
+ if offset < 0:
187
+ continue
188
+
189
+ # Infer type from instruction
190
+ field_type = _infer_type_from_instruction(opcode, disasm)
191
+
192
+ # Group by register (potential structure pointer)
193
+ struct_name = f"struct_ptr_{reg}"
194
+ if struct_name not in structures:
195
+ structures[struct_name] = {
196
+ "name": struct_name,
197
+ "fields": [],
198
+ "source": "memory_access_pattern",
199
+ }
200
+
201
+ # Check if we already have this offset
202
+ existing_offsets = {f["offset"] for f in structures[struct_name]["fields"]}
203
+ offset_hex = f"0x{offset:x}"
204
+
205
+ if offset_hex not in existing_offsets:
206
+ structures[struct_name]["fields"].append(
207
+ {
208
+ "offset": offset_hex,
209
+ "type": field_type,
210
+ "name": f"field_{offset:x}",
211
+ "size": _estimate_type_size(field_type),
212
+ }
213
+ )
214
+
215
+ return structures
216
+
217
+
218
+ def _infer_type_from_instruction(opcode: str, disasm: str) -> str:
219
+ """
220
+ Infer the data type from the instruction.
221
+
222
+ Args:
223
+ opcode: Instruction opcode (e.g., "mov", "movss")
224
+ disasm: Full disassembly string
225
+
226
+ Returns:
227
+ Inferred type string
228
+ """
229
+ opcode_lower = opcode.lower()
230
+ disasm_lower = disasm.lower()
231
+
232
+ # Floating point operations
233
+ if any(x in opcode_lower for x in ("movss", "addss", "subss", "mulss", "divss", "comiss")):
234
+ return "float"
235
+ if any(x in opcode_lower for x in ("movsd", "addsd", "subsd", "mulsd", "divsd", "comisd")):
236
+ return "double"
237
+ if any(x in opcode_lower for x in ("movaps", "movups", "xmm")):
238
+ return "float[4]" # SSE vector
239
+
240
+ # Size hints from operand suffixes
241
+ if "byte" in disasm_lower or opcode_lower.endswith("b"):
242
+ return "uint8_t"
243
+ if "word" in disasm_lower and "dword" not in disasm_lower and "qword" not in disasm_lower:
244
+ return "uint16_t"
245
+ if "dword" in disasm_lower:
246
+ return "uint32_t"
247
+ if "qword" in disasm_lower:
248
+ return "uint64_t"
249
+
250
+ # Register-based inference
251
+ if any(r in disasm_lower for r in ("rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9")):
252
+ return "uint64_t"
253
+ if any(r in disasm_lower for r in ("eax", "ebx", "ecx", "edx", "esi", "edi")):
254
+ return "uint32_t"
255
+ if any(r in disasm_lower for r in ("ax", "bx", "cx", "dx")):
256
+ return "uint16_t"
257
+ if any(r in disasm_lower for r in ("al", "bl", "cl", "dl", "ah", "bh", "ch", "dh")):
258
+ return "uint8_t"
259
+
260
+ # Default
261
+ return "uint32_t"
262
+
263
+
264
+ def _validate_address_or_fail(address: str, param_name: str = "address"):
265
+ """
266
+ Validate address format and return failure ToolResult if invalid.
267
+
268
+ This helper consolidates the repeated pattern of address validation
269
+ with try-except and failure return.
270
+
271
+ Args:
272
+ address: Address string to validate
273
+ param_name: Parameter name for error messages
274
+
275
+ Returns:
276
+ None if validation passes, or ToolResult failure if invalid
277
+
278
+ Raises:
279
+ No exceptions - all validation errors are converted to ToolResult failures
280
+ """
281
+ from reversecore_mcp.core.validators import validate_address_format
282
+
283
+ try:
284
+ validate_address_format(address, param_name)
285
+ return None # Validation passed
286
+ except ValidationError as e:
287
+ return failure("VALIDATION_ERROR", str(e))
288
+
289
+
290
+ def _parse_register_state(ar_output: str) -> dict:
291
+ """
292
+ Parse radare2 'ar' command output into structured register state.
293
+
294
+ Args:
295
+ ar_output: Raw output from 'ar' command
296
+
297
+ Returns:
298
+ Dictionary mapping register names to values
299
+
300
+ Example output from 'ar':
301
+ rax = 0x00000000
302
+ rbx = 0x00401000
303
+ ...
304
+ """
305
+ registers = {}
306
+
307
+ for line in ar_output.strip().split("\n"):
308
+ if "=" in line:
309
+ parts = line.split("=")
310
+ if len(parts) == 2:
311
+ reg_name = parts[0].strip()
312
+ reg_value = parts[1].strip()
313
+ registers[reg_name] = reg_value
314
+
315
+ return registers
316
+
317
+
318
+ @log_execution(tool_name="emulate_machine_code")
319
+ @track_metrics("emulate_machine_code")
320
+ @handle_tool_errors
321
+ async def emulate_machine_code(
322
+ file_path: str,
323
+ start_address: str,
324
+ instructions: int = 50,
325
+ timeout: int = DEFAULT_TIMEOUT,
326
+ ) -> ToolResult:
327
+ """
328
+ Emulate machine code execution using radare2 ESIL (Evaluable Strings Intermediate Language).
329
+
330
+ This tool provides safe, sandboxed emulation of binary code without actual execution.
331
+ Perfect for analyzing obfuscated code, understanding register states, and predicting
332
+ execution outcomes without security risks.
333
+
334
+ **Key Use Cases:**
335
+ - De-obfuscation: Reveal hidden strings by emulating XOR/shift operations
336
+ - Register Analysis: See final register values after code execution
337
+ - Safe Malware Analysis: Predict behavior without running malicious code
338
+
339
+ **Safety Features:**
340
+ - Virtual CPU simulation (no real execution)
341
+ - Instruction count limit (max 1000) prevents infinite loops
342
+ - Memory sandboxing (changes don't affect host system)
343
+
344
+ Args:
345
+ file_path: Path to the binary file (must be in workspace)
346
+ start_address: Address to start emulation (e.g., 'main', '0x401000', 'sym.decrypt')
347
+ instructions: Number of instructions to execute (default 50, max 1000)
348
+ timeout: Execution timeout in seconds
349
+
350
+ Returns:
351
+ ToolResult with register states and emulation summary
352
+ """
353
+ # 1. Parameter validation
354
+ validate_tool_parameters(
355
+ "emulate_machine_code",
356
+ {"start_address": start_address, "instructions": instructions},
357
+ )
358
+ validated_path = validate_file_path(file_path)
359
+
360
+ # 2. Security check for start address (prevent shell injection)
361
+ validation_error = _validate_address_or_fail(start_address, "start_address")
362
+ if validation_error:
363
+ return validation_error
364
+
365
+ # 3. Build radare2 ESIL emulation command chain
366
+ # Note: Commands must be executed in specific order for ESIL to work correctly
367
+ esil_cmds = [
368
+ f"s {start_address}", # Seek to start address
369
+ "aei", # Initialize ESIL VM
370
+ "aeim", # Initialize ESIL memory (stack)
371
+ "aeip", # Initialize program counter to current seek
372
+ f"aes {instructions}", # Step through N instructions
373
+ "ar", # Show all registers
374
+ ]
375
+
376
+ # 4. Execute emulation using helper
377
+ try:
378
+ output, bytes_read = await _execute_r2_command(
379
+ validated_path,
380
+ esil_cmds,
381
+ analysis_level="aaa",
382
+ max_output_size=10_000_000,
383
+ base_timeout=timeout,
384
+ )
385
+
386
+ # 5. Parse register state
387
+ register_state = _parse_register_state(output)
388
+
389
+ if not register_state:
390
+ return failure(
391
+ "EMULATION_ERROR",
392
+ "Failed to extract register state from emulation output",
393
+ hint="The binary may not be compatible with ESIL emulation, or the start address is invalid",
394
+ )
395
+
396
+ # 6. Build result with metadata
397
+ return success(
398
+ register_state,
399
+ bytes_read=bytes_read,
400
+ format="register_state",
401
+ instructions_executed=instructions,
402
+ start_address=start_address,
403
+ description=f"Emulated {instructions} instructions starting at {start_address}",
404
+ )
405
+
406
+ except Exception as e:
407
+ return failure(
408
+ "EMULATION_ERROR",
409
+ f"ESIL emulation failed: {str(e)}",
410
+ hint="Check that the binary architecture is supported and the start address is valid",
411
+ )
412
+
413
+
414
+ @log_execution(tool_name="get_pseudo_code")
415
+ @track_metrics("get_pseudo_code")
416
+ @handle_tool_errors
417
+ async def get_pseudo_code(
418
+ file_path: str,
419
+ address: str = "main",
420
+ timeout: int = 300,
421
+ ) -> ToolResult:
422
+ """
423
+ Generate pseudo C code (decompilation) for a function using radare2's pdc command.
424
+
425
+ This tool decompiles binary code into C-like pseudocode, making it much easier
426
+ to understand program logic compared to raw assembly. The output can be further
427
+ refined by AI for better readability.
428
+
429
+ **Use Cases:**
430
+ - Quick function understanding without reading assembly
431
+ - AI-assisted code analysis and refactoring
432
+ - Documentation generation from binaries
433
+ - Reverse engineering workflow optimization
434
+
435
+ **Note:** The output is "pseudo C" - it may not be syntactically perfect C,
436
+ but provides a high-level representation of the function logic.
437
+
438
+ Args:
439
+ file_path: Path to the binary file (must be in workspace)
440
+ address: Function address to decompile (e.g., 'main', '0x401000', 'sym.foo')
441
+ timeout: Execution timeout in seconds (default 300)
442
+
443
+ Returns:
444
+ ToolResult with pseudo C code string
445
+
446
+ Example:
447
+ get_pseudo_code("/app/workspace/sample.exe", "main")
448
+ # Returns C-like code representation of the main function
449
+ """
450
+ # 1. Validate file path
451
+ validated_path = validate_file_path(file_path)
452
+
453
+ # 2. Security check for address (prevent shell injection)
454
+ validation_error = _validate_address_or_fail(address, "address")
455
+ if validation_error:
456
+ return validation_error
457
+
458
+ # 3. Build radare2 command to decompilation
459
+ r2_cmd = f"pdc @ {address}"
460
+
461
+ # 4. Determine analysis level based on file size
462
+ # Use 'aa' (basic) for large files to prevent timeouts
463
+ analysis_level = "aa"
464
+ try:
465
+ file_size_mb = os.path.getsize(validated_path) / (1024 * 1024)
466
+ if file_size_mb < 5:
467
+ analysis_level = "aaa" # Full analysis for small files
468
+ except OSError:
469
+ pass
470
+
471
+ # 5. Execute decompilation using helper
472
+ output, bytes_read = await _execute_r2_command(
473
+ validated_path,
474
+ [r2_cmd],
475
+ analysis_level=analysis_level,
476
+ max_output_size=10_000_000,
477
+ base_timeout=timeout,
478
+ )
479
+
480
+ # 6. Check if output is valid
481
+ if not output or output.strip() == "":
482
+ return failure(
483
+ "DECOMPILATION_ERROR",
484
+ f"No decompilation output for address: {address}",
485
+ hint="Verify the address exists and points to a valid function. Try analyzing with 'afl' first.",
486
+ )
487
+
488
+ # 7. Return pseudo C code
489
+ return success(
490
+ output,
491
+ bytes_read=bytes_read,
492
+ address=address,
493
+ format="pseudo_c",
494
+ analysis_level=analysis_level,
495
+ description=f"Pseudo C code decompiled from address {address} (analysis: {analysis_level})",
496
+ )
497
+
498
+
499
+ @alru_cache(maxsize=32)
500
+ @log_execution(tool_name="smart_decompile")
501
+ @track_metrics("smart_decompile")
502
+ @circuit_breaker("smart_decompile", failure_threshold=3, recovery_timeout=60)
503
+ @handle_tool_errors
504
+ async def _smart_decompile_impl(
505
+ file_path: str,
506
+ function_address: str,
507
+ timeout: int = DEFAULT_TIMEOUT,
508
+ use_ghidra: bool = True,
509
+ _file_mtime: float = 0.0, # Cache key includes mtime for invalidation on file change
510
+ ) -> ToolResult:
511
+ """
512
+ Internal implementation of smart_decompile with caching.
513
+
514
+ Note: _file_mtime parameter is used for cache invalidation when the file
515
+ is modified (e.g., after patching with adaptive_vaccine).
516
+ """
517
+ # 1. Validate parameters
518
+ validate_tool_parameters("smart_decompile", {"function_address": function_address})
519
+ validated_path = validate_file_path(file_path)
520
+
521
+ # 2. Security check for function address (prevent shell injection)
522
+ validation_error = _validate_address_or_fail(function_address, "function_address")
523
+ if validation_error:
524
+ return validation_error
525
+
526
+ # 3. Try Ghidra first if requested and available
527
+ if use_ghidra:
528
+ try:
529
+ from reversecore_mcp.core.ghidra import (
530
+ decompile_function_with_ghidra,
531
+ ensure_ghidra_available,
532
+ )
533
+
534
+ if ensure_ghidra_available():
535
+ logger.info(f"Using Ghidra decompiler for {function_address}")
536
+
537
+ # Run Ghidra decompilation
538
+ try:
539
+ c_code, metadata = decompile_function_with_ghidra(
540
+ validated_path, function_address, timeout
541
+ )
542
+
543
+ return success(
544
+ c_code,
545
+ function_address=function_address,
546
+ format="pseudo_c",
547
+ decompiler="ghidra",
548
+ **metadata,
549
+ )
550
+
551
+ except Exception as ghidra_error:
552
+ logger.warning(
553
+ f"Ghidra decompilation failed: {ghidra_error}. Falling back to radare2"
554
+ )
555
+ # Fall through to radare2
556
+ else:
557
+ logger.info("Ghidra not available, using radare2")
558
+
559
+ except ImportError:
560
+ logger.info("PyGhidra not installed, using radare2")
561
+
562
+ # 4. Fallback to radare2 (original implementation)
563
+ logger.info(f"Using radare2 decompiler for {function_address}")
564
+
565
+ r2_cmds = [f"pdc @ {function_address}"]
566
+
567
+ # 5. Execute decompilation using helper
568
+ try:
569
+ output, bytes_read = await _execute_r2_command(
570
+ validated_path,
571
+ r2_cmds,
572
+ analysis_level="aa",
573
+ max_output_size=10_000_000,
574
+ base_timeout=timeout,
575
+ )
576
+ except Exception as e:
577
+ # If 'aaa' fails, try lighter analysis 'aa' or just '-n' if desperate,
578
+ # but pdc requires analysis.
579
+ return failure(
580
+ "DECOMPILATION_ERROR",
581
+ f"Radare2 decompilation failed: {str(e)}",
582
+ hint="Analysis failed. The binary might be packed or corrupted.",
583
+ )
584
+
585
+ # Add timestamp for cache visibility
586
+ import time
587
+
588
+ timestamp = time.time()
589
+
590
+ # 6. Return result
591
+ return success(
592
+ output,
593
+ bytes_read=bytes_read,
594
+ function_address=function_address,
595
+ format="pseudo_c",
596
+ decompiler="radare2",
597
+ description=f"Decompiled code from function {function_address}",
598
+ timestamp=timestamp,
599
+ )
600
+
601
+
602
+ async def smart_decompile(
603
+ file_path: str,
604
+ function_address: str,
605
+ timeout: int = DEFAULT_TIMEOUT,
606
+ use_ghidra: bool = True,
607
+ ctx: Context = None,
608
+ ) -> ToolResult:
609
+ """
610
+ Decompile a function to pseudo C code using Ghidra or radare2.
611
+
612
+ This tool provides decompilation for a specific function in a binary,
613
+ making it easier to understand the logic without reading raw assembly.
614
+
615
+ **Decompiler Selection:**
616
+ - Ghidra (default): More accurate, better type recovery, industry-standard
617
+ - radare2 (fallback): Faster, lighter weight, good for quick analysis
618
+
619
+ Args:
620
+ file_path: Path to the binary file (must be in workspace)
621
+ function_address: Function address to decompile (e.g., 'main', '0x401000')
622
+ timeout: Execution timeout in seconds (default 300)
623
+ use_ghidra: Use Ghidra decompiler if available (default True)
624
+ ctx: FastMCP Context (auto-injected)
625
+
626
+ Returns:
627
+ ToolResult with decompiled pseudo C code
628
+ """
629
+ import os
630
+ import time
631
+
632
+ # Get file mtime for cache invalidation (cache busts when file is modified)
633
+ try:
634
+ file_mtime = os.path.getmtime(file_path)
635
+ except OSError:
636
+ file_mtime = 0.0
637
+
638
+ result = await _smart_decompile_impl(
639
+ file_path, function_address, timeout, use_ghidra, _file_mtime=file_mtime
640
+ )
641
+
642
+ # Check for cache hit
643
+ if result.status == "success" and result.metadata:
644
+ ts = result.metadata.get("timestamp")
645
+ if ts and (time.time() - ts > 1.0):
646
+ result.metadata["cache_hit"] = True
647
+
648
+ return result
649
+
650
+
651
+ @log_execution(tool_name="recover_structures")
652
+ @track_metrics("recover_structures")
653
+ @handle_tool_errors
654
+ async def recover_structures(
655
+ file_path: str,
656
+ function_address: str,
657
+ use_ghidra: bool = True,
658
+ fast_mode: bool = True,
659
+ timeout: int = DEFAULT_TIMEOUT * 5,
660
+ ctx: Context = None,
661
+ ) -> ToolResult:
662
+ """
663
+ Recover C++ class structures and data types from binary code.
664
+
665
+ This is THE game-changer for C++ reverse engineering. Transforms cryptic
666
+ "this + 0x4" memory accesses into meaningful "Player.health" structure fields.
667
+ Uses Ghidra's powerful data type propagation and structure recovery algorithms.
668
+
669
+ **Why Structure Recovery Matters:**
670
+ - **C++ Analysis**: 99% of game clients and commercial apps are C++
671
+ - **Understanding**: "this + 0x4" means nothing, "Player.health = 100" tells a story
672
+ - **AI Comprehension**: AI can't understand raw offsets, but understands named fields
673
+ - **Scale**: One structure definition can clarify thousands of lines of code
674
+
675
+ **Performance Tips (for large binaries like game clients):**
676
+ - Use `fast_mode=True` (default) to skip full binary analysis
677
+ - Use `use_ghidra=False` for quick radare2-based analysis
678
+ - For best results on first run, set `fast_mode=False` but expect longer wait
679
+
680
+ **How It Works:**
681
+ 1. Analyze memory access patterns in the function
682
+ 2. Identify structure layouts from offset usage
683
+ 3. Use data type propagation to infer field types
684
+ 4. Generate C structure definitions with meaningful names
685
+
686
+ **Use Cases:**
687
+ - Game hacking: Recover Player, Entity, Weapon structures
688
+ - Malware analysis: Understand malware configuration structures
689
+ - Vulnerability research: Find buffer overflow candidates in structs
690
+ - Software auditing: Document undocumented data structures
691
+
692
+ **Ghidra vs Radare2:**
693
+ - Ghidra (default): Superior type recovery, structure propagation, C++ support
694
+ - Radare2 (fallback): Basic structure definition, faster but less intelligent
695
+
696
+ Args:
697
+ file_path: Path to the binary file (must be in workspace)
698
+ function_address: Function to analyze for structure usage (e.g., 'main', '0x401000')
699
+ use_ghidra: Use Ghidra for advanced recovery (default True), or radare2 for basic
700
+ fast_mode: Skip full binary analysis for faster startup (default True)
701
+ timeout: Execution timeout in seconds (default 300 seconds)
702
+ ctx: FastMCP Context (auto-injected)
703
+
704
+ Returns:
705
+ ToolResult with recovered structures in C format:
706
+ {
707
+ "structures": [
708
+ {
709
+ "name": "Player",
710
+ "size": 64,
711
+ "fields": [
712
+ {"offset": "0x0", "type": "int", "name": "health"},
713
+ {"offset": "0x4", "type": "int", "name": "armor"},
714
+ {"offset": "0x8", "type": "Vector3", "name": "position"}
715
+ ]
716
+ }
717
+ ],
718
+ "c_definitions": "struct Player { int health; int armor; Vector3 position; };"
719
+ }
720
+
721
+ Example:
722
+ # Fast structure recovery (recommended for large binaries)
723
+ recover_structures("/app/workspace/game.exe", "main")
724
+
725
+ # More thorough analysis (slower but more accurate)
726
+ recover_structures("/app/workspace/game.exe", "main", fast_mode=False)
727
+
728
+ # Use radare2 for quick analysis
729
+ recover_structures("/app/workspace/binary", "0x401000", use_ghidra=False)
730
+ """
731
+ from reversecore_mcp.core.ghidra import ensure_ghidra_available
732
+
733
+ # 1. Validate parameters
734
+ validated_path = validate_file_path(file_path)
735
+
736
+ # 2. Validate address format
737
+ # OPTIMIZATION: Use pre-compiled regex pattern (faster)
738
+ if not _FUNCTION_ADDRESS_PATTERN.match(_strip_address_prefixes(function_address)):
739
+ return failure(
740
+ "VALIDATION_ERROR",
741
+ "Invalid function address format",
742
+ hint="Address must contain only alphanumeric characters, dots, underscores, colons, angle brackets, and prefixes like '0x', 'sym.'",
743
+ )
744
+
745
+ # 3. Check if Ghidra is available when requested
746
+ if use_ghidra:
747
+ # Check availability and fallback if needed
748
+ if not ensure_ghidra_available():
749
+ # Instead of failing, let's fallback to radare2 with a warning in the description
750
+ # This improves UX when Ghidra is optional but requested by default
751
+ use_ghidra = False
752
+ # We will append a note to the result description later
753
+ fallback_note = " (Ghidra not available, fell back to radare2)"
754
+ else:
755
+ fallback_note = ""
756
+ # 4a. Use Ghidra for advanced structure recovery
757
+ try:
758
+ from reversecore_mcp.core.ghidra import (
759
+ recover_structures_with_ghidra,
760
+ )
761
+
762
+ # Pass fast_mode to skip full binary analysis
763
+ structures, metadata = recover_structures_with_ghidra(
764
+ validated_path, function_address, timeout, skip_full_analysis=fast_mode
765
+ )
766
+
767
+ mode_note = " (fast mode)" if fast_mode else " (full analysis)"
768
+ return success(
769
+ {"structures": structures},
770
+ **metadata,
771
+ function_address=function_address,
772
+ method="ghidra",
773
+ fast_mode=fast_mode,
774
+ description=f"Structures recovered from {function_address} using Ghidra{mode_note}",
775
+ )
776
+
777
+ except Exception as e:
778
+ # If Ghidra fails during execution, also fallback
779
+ use_ghidra = False
780
+ fallback_note = f" (Ghidra failed: {str(e)}, fell back to radare2)"
781
+
782
+ if not use_ghidra:
783
+ # 4b. Use radare2 for enhanced structure recovery
784
+ # Multi-pronged approach:
785
+ # 1. Function variables (afvj)
786
+ # 2. Data types from binary (tj)
787
+ # 3. Memory access patterns (axtj for structure field access)
788
+ # 4. RTTI-based class detection
789
+
790
+ import os
791
+
792
+ file_size_mb = os.path.getsize(validated_path) / (1024 * 1024)
793
+
794
+ # For structure recovery, we need deeper analysis than basic 'aa'
795
+ # Use 'aaa' for structure recovery even on large files, but with timeout protection
796
+ if fast_mode:
797
+ # Fast mode: minimal analysis, may miss structures
798
+ analysis_level = "aa"
799
+ analysis_note = " (fast mode - may miss some structures)"
800
+ else:
801
+ # Full mode: thorough analysis for structure recovery
802
+ # Even for large files, we need 'aaa' to detect types
803
+ analysis_level = "aaa"
804
+ analysis_note = " (full analysis)"
805
+
806
+ # Enhanced command set for structure recovery
807
+ r2_cmds = [
808
+ f"s {function_address}", # Seek to function
809
+ "af", # Analyze this function
810
+ "afvj", # Get function variables in JSON
811
+ "afij", # Get function info (size, type)
812
+ "pdfj", # Disassemble function - detect memory access patterns
813
+ ]
814
+
815
+ # Execute using helper
816
+ output, bytes_read = await _execute_r2_command(
817
+ validated_path,
818
+ r2_cmds,
819
+ analysis_level=analysis_level,
820
+ max_output_size=10_000_000,
821
+ base_timeout=timeout,
822
+ )
823
+
824
+ # 5. Parse radare2 output - enhanced parsing
825
+ try:
826
+ structures = {}
827
+ detected_classes = []
828
+ memory_accesses = []
829
+
830
+ # Parse multi-command output
831
+ outputs = output.strip().split("\n")
832
+
833
+ # Try to parse each line as JSON
834
+ variables = []
835
+ function_info = {}
836
+ disasm_ops = []
837
+
838
+ valid_json_parsed = False
839
+ for line in outputs:
840
+ line = line.strip()
841
+ if not line:
842
+ continue
843
+ try:
844
+ parsed = json.loads(line)
845
+ valid_json_parsed = True
846
+ if isinstance(parsed, list):
847
+ # Could be variables (afvj) or disasm ops
848
+ if parsed and isinstance(parsed[0], dict):
849
+ if "name" in parsed[0] and "type" in parsed[0]:
850
+ variables = parsed
851
+ elif "opcode" in parsed[0]:
852
+ disasm_ops = parsed
853
+ elif isinstance(parsed, dict):
854
+ if "ops" in parsed:
855
+ disasm_ops = parsed.get("ops", [])
856
+ elif "name" in parsed:
857
+ function_info = parsed
858
+ except json.JSONDecodeError:
859
+ continue
860
+
861
+ # If we had output but failed to parse any JSON, raise error
862
+ if output.strip() and not valid_json_parsed:
863
+ raise json.JSONDecodeError("No valid JSON found in output", output, 0)
864
+
865
+ # Extract structures from variables
866
+ for var in variables:
867
+ if isinstance(var, dict):
868
+ var_type = var.get("type", "unknown")
869
+ var_name = var.get("name", "unnamed")
870
+ offset = var.get("delta", 0)
871
+ kind = var.get("kind", "")
872
+
873
+ # Determine structure grouping
874
+ if "arg" in kind:
875
+ base = "args"
876
+ elif "var" in kind or "local" in kind:
877
+ base = "locals"
878
+ else:
879
+ base = (
880
+ var.get("ref", {}).get("base", "stack")
881
+ if isinstance(var.get("ref"), dict)
882
+ else "stack"
883
+ )
884
+
885
+ if base not in structures:
886
+ structures[base] = {
887
+ "name": f"struct_{base}",
888
+ "fields": [],
889
+ "source": "variables",
890
+ }
891
+
892
+ structures[base]["fields"].append(
893
+ {
894
+ "offset": f"0x{abs(offset):x}",
895
+ "type": var_type,
896
+ "name": var_name,
897
+ "size": _estimate_type_size(var_type),
898
+ }
899
+ )
900
+
901
+ # Analyze disassembly for memory access patterns (structure field detection)
902
+ struct_from_memory = _extract_structures_from_disasm(disasm_ops)
903
+ for struct_name, struct_data in struct_from_memory.items():
904
+ if struct_name not in structures:
905
+ structures[struct_name] = struct_data
906
+ else:
907
+ # Merge fields
908
+ existing_offsets = {f["offset"] for f in structures[struct_name]["fields"]}
909
+ for field in struct_data["fields"]:
910
+ if field["offset"] not in existing_offsets:
911
+ structures[struct_name]["fields"].append(field)
912
+
913
+ # Sort fields by offset within each structure
914
+ for struct_data in structures.values():
915
+ struct_data["fields"].sort(
916
+ key=lambda f: int(f["offset"], 16)
917
+ if f["offset"].startswith("0x")
918
+ else int(f["offset"])
919
+ )
920
+
921
+ # 6. Generate C structure definitions
922
+ c_definitions = []
923
+ for _struct_name, struct_data in structures.items():
924
+ if not struct_data["fields"]:
925
+ continue
926
+
927
+ field_strs = [
928
+ f" {field['type']} {field['name']}; // offset {field['offset']}, size ~{field.get('size', '?')} bytes"
929
+ for field in struct_data["fields"]
930
+ ]
931
+ fields_str = "\n".join(field_strs)
932
+
933
+ c_def = f"struct {struct_data['name']} {{\n{fields_str}\n}};"
934
+ c_definitions.append(c_def)
935
+
936
+ # Filter out empty structures
937
+ non_empty_structures = {k: v for k, v in structures.items() if v["fields"]}
938
+
939
+ result = {
940
+ "structures": list(non_empty_structures.values()),
941
+ "c_definitions": "\n\n".join(c_definitions),
942
+ "count": len(non_empty_structures),
943
+ "analysis_mode": "fast" if fast_mode else "full",
944
+ }
945
+
946
+ desc = f"Structure recovery from {function_address} using radare2{analysis_note} (found {len(non_empty_structures)} structure(s))"
947
+ if "fallback_note" in locals():
948
+ desc += fallback_note
949
+
950
+ # Add hint if no structures found
951
+ hint = None
952
+ if not non_empty_structures: # OPTIMIZATION: Direct bool check instead of len() comparison
953
+ hint = "No structures found. Try: 1) fast_mode=False for deeper analysis, 2) use_ghidra=True for C++ structures, 3) analyze a function that uses structures (not main/entry0)"
954
+
955
+ return success(
956
+ result,
957
+ bytes_read=bytes_read,
958
+ function_address=function_address,
959
+ method="radare2",
960
+ structure_count=len(non_empty_structures),
961
+ description=desc,
962
+ hint=hint,
963
+ )
964
+
965
+ except json.JSONDecodeError as e:
966
+ return failure(
967
+ "STRUCTURE_RECOVERY_ERROR",
968
+ f"Failed to parse structure data: {str(e)}",
969
+ hint="The function may not exist or may not use structures. Verify the address with 'afl' command.",
970
+ )
971
+
972
+ # Note: DecompilationPlugin has been removed.
973
+ # All tools (emulate_machine_code, get_pseudo_code, smart_decompile, recover_structures)
974
+ # are now registered via GhidraToolsPlugin in ghidra_tools.py for unified management.
975
+