iflow-mcp_developermode-korea_reversecore-mcp 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/METADATA +543 -0
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/RECORD +79 -0
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/WHEEL +5 -0
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/entry_points.txt +2 -0
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/licenses/LICENSE +21 -0
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/top_level.txt +1 -0
- reversecore_mcp/__init__.py +9 -0
- reversecore_mcp/core/__init__.py +78 -0
- reversecore_mcp/core/audit.py +101 -0
- reversecore_mcp/core/binary_cache.py +138 -0
- reversecore_mcp/core/command_spec.py +357 -0
- reversecore_mcp/core/config.py +432 -0
- reversecore_mcp/core/container.py +288 -0
- reversecore_mcp/core/decorators.py +152 -0
- reversecore_mcp/core/error_formatting.py +93 -0
- reversecore_mcp/core/error_handling.py +142 -0
- reversecore_mcp/core/evidence.py +229 -0
- reversecore_mcp/core/exceptions.py +296 -0
- reversecore_mcp/core/execution.py +240 -0
- reversecore_mcp/core/ghidra.py +642 -0
- reversecore_mcp/core/ghidra_helper.py +481 -0
- reversecore_mcp/core/ghidra_manager.py +234 -0
- reversecore_mcp/core/json_utils.py +131 -0
- reversecore_mcp/core/loader.py +73 -0
- reversecore_mcp/core/logging_config.py +206 -0
- reversecore_mcp/core/memory.py +721 -0
- reversecore_mcp/core/metrics.py +198 -0
- reversecore_mcp/core/mitre_mapper.py +365 -0
- reversecore_mcp/core/plugin.py +45 -0
- reversecore_mcp/core/r2_helpers.py +404 -0
- reversecore_mcp/core/r2_pool.py +403 -0
- reversecore_mcp/core/report_generator.py +268 -0
- reversecore_mcp/core/resilience.py +252 -0
- reversecore_mcp/core/resource_manager.py +169 -0
- reversecore_mcp/core/result.py +132 -0
- reversecore_mcp/core/security.py +213 -0
- reversecore_mcp/core/validators.py +238 -0
- reversecore_mcp/dashboard/__init__.py +221 -0
- reversecore_mcp/prompts/__init__.py +56 -0
- reversecore_mcp/prompts/common.py +24 -0
- reversecore_mcp/prompts/game.py +280 -0
- reversecore_mcp/prompts/malware.py +1219 -0
- reversecore_mcp/prompts/report.py +150 -0
- reversecore_mcp/prompts/security.py +136 -0
- reversecore_mcp/resources.py +329 -0
- reversecore_mcp/server.py +727 -0
- reversecore_mcp/tools/__init__.py +49 -0
- reversecore_mcp/tools/analysis/__init__.py +74 -0
- reversecore_mcp/tools/analysis/capa_tools.py +215 -0
- reversecore_mcp/tools/analysis/die_tools.py +180 -0
- reversecore_mcp/tools/analysis/diff_tools.py +643 -0
- reversecore_mcp/tools/analysis/lief_tools.py +272 -0
- reversecore_mcp/tools/analysis/signature_tools.py +591 -0
- reversecore_mcp/tools/analysis/static_analysis.py +479 -0
- reversecore_mcp/tools/common/__init__.py +58 -0
- reversecore_mcp/tools/common/file_operations.py +352 -0
- reversecore_mcp/tools/common/memory_tools.py +516 -0
- reversecore_mcp/tools/common/patch_explainer.py +230 -0
- reversecore_mcp/tools/common/server_tools.py +115 -0
- reversecore_mcp/tools/ghidra/__init__.py +19 -0
- reversecore_mcp/tools/ghidra/decompilation.py +975 -0
- reversecore_mcp/tools/ghidra/ghidra_tools.py +1052 -0
- reversecore_mcp/tools/malware/__init__.py +61 -0
- reversecore_mcp/tools/malware/adaptive_vaccine.py +579 -0
- reversecore_mcp/tools/malware/dormant_detector.py +756 -0
- reversecore_mcp/tools/malware/ioc_tools.py +228 -0
- reversecore_mcp/tools/malware/vulnerability_hunter.py +519 -0
- reversecore_mcp/tools/malware/yara_tools.py +214 -0
- reversecore_mcp/tools/patch_explainer.py +19 -0
- reversecore_mcp/tools/radare2/__init__.py +13 -0
- reversecore_mcp/tools/radare2/r2_analysis.py +972 -0
- reversecore_mcp/tools/radare2/r2_session.py +376 -0
- reversecore_mcp/tools/radare2/radare2_mcp_tools.py +1183 -0
- reversecore_mcp/tools/report/__init__.py +4 -0
- reversecore_mcp/tools/report/email.py +82 -0
- reversecore_mcp/tools/report/report_mcp_tools.py +344 -0
- reversecore_mcp/tools/report/report_tools.py +1076 -0
- reversecore_mcp/tools/report/session.py +194 -0
- reversecore_mcp/tools/report_tools.py +11 -0
|
@@ -0,0 +1,975 @@
|
|
|
1
|
+
"""Decompilation and code recovery tools for binary analysis."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
|
|
6
|
+
from async_lru import alru_cache
|
|
7
|
+
from fastmcp import Context
|
|
8
|
+
|
|
9
|
+
# Use high-performance JSON implementation (3-5x faster)
|
|
10
|
+
from reversecore_mcp.core import json_utils as json
|
|
11
|
+
from reversecore_mcp.core.config import get_config
|
|
12
|
+
from reversecore_mcp.core.decorators import log_execution
|
|
13
|
+
from reversecore_mcp.core.error_handling import handle_tool_errors
|
|
14
|
+
from reversecore_mcp.core.exceptions import ValidationError
|
|
15
|
+
from reversecore_mcp.core.logging_config import get_logger
|
|
16
|
+
from reversecore_mcp.core.metrics import track_metrics
|
|
17
|
+
|
|
18
|
+
# Import shared R2 helper functions from core (avoids circular dependencies)
|
|
19
|
+
from reversecore_mcp.core.r2_helpers import (
|
|
20
|
+
execute_r2_command as _execute_r2_command,
|
|
21
|
+
)
|
|
22
|
+
from reversecore_mcp.core.r2_helpers import (
|
|
23
|
+
strip_address_prefixes as _strip_address_prefixes,
|
|
24
|
+
)
|
|
25
|
+
from reversecore_mcp.core.resilience import circuit_breaker
|
|
26
|
+
from reversecore_mcp.core.result import ToolResult, failure, success
|
|
27
|
+
from reversecore_mcp.core.security import validate_file_path
|
|
28
|
+
from reversecore_mcp.core.validators import validate_tool_parameters
|
|
29
|
+
|
|
30
|
+
# Load default timeout from configuration
|
|
31
|
+
DEFAULT_TIMEOUT = get_config().default_tool_timeout
|
|
32
|
+
|
|
33
|
+
logger = get_logger(__name__)
|
|
34
|
+
|
|
35
|
+
# OPTIMIZATION: Pre-compile regex patterns used in hot paths
|
|
36
|
+
_FUNCTION_ADDRESS_PATTERN = re.compile(r"^[a-zA-Z0-9_.:<>]+$")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# =============================================================================
|
|
40
|
+
# Helper Functions for Structure Recovery
|
|
41
|
+
# =============================================================================
|
|
42
|
+
|
|
43
|
+
# OPTIMIZATION: Pre-defined type size mapping at module level
|
|
44
|
+
# Uses exact match for common types (O(1) dict lookup) and substring match for compound types.
|
|
45
|
+
# Note: Types appear in both collections intentionally - _TYPE_SIZES_EXACT for exact matches,
|
|
46
|
+
# _TYPE_SIZES_CONTAINS for substring matching in compound types like "unsigned int".
|
|
47
|
+
#
|
|
48
|
+
# LIMITATION: Pointer types (void*, size_t, intptr_t) assume 64-bit architecture (8 bytes).
|
|
49
|
+
# For 32-bit binaries, these should be 4 bytes. A future improvement would be to pass
|
|
50
|
+
# the binary's architecture (Bits field) and adjust pointer sizes dynamically.
|
|
51
|
+
_TYPE_SIZES_EXACT = {
|
|
52
|
+
"char": 1,
|
|
53
|
+
"byte": 1,
|
|
54
|
+
"uint8_t": 1,
|
|
55
|
+
"int8_t": 1,
|
|
56
|
+
"bool": 1,
|
|
57
|
+
"short": 2,
|
|
58
|
+
"uint16_t": 2,
|
|
59
|
+
"int16_t": 2,
|
|
60
|
+
"word": 2,
|
|
61
|
+
"wchar_t": 2,
|
|
62
|
+
"int": 4,
|
|
63
|
+
"uint32_t": 4,
|
|
64
|
+
"int32_t": 4,
|
|
65
|
+
"dword": 4,
|
|
66
|
+
"float": 4,
|
|
67
|
+
"long": 4,
|
|
68
|
+
"long long": 8,
|
|
69
|
+
"uint64_t": 8,
|
|
70
|
+
"int64_t": 8,
|
|
71
|
+
"qword": 8,
|
|
72
|
+
"double": 8,
|
|
73
|
+
# Pointer-sized types - assuming 64-bit (see LIMITATION note above)
|
|
74
|
+
"size_t": 8,
|
|
75
|
+
"void *": 8,
|
|
76
|
+
"intptr_t": 8,
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
# Types for substring match, ordered by:
|
|
80
|
+
# 1. Size (largest first) - ensures "uint64_t" matches before "int"
|
|
81
|
+
# 2. Specificity - longer/more specific types before shorter ones
|
|
82
|
+
# This ordering prevents "int" from matching before "uint32_t" in compound types
|
|
83
|
+
_TYPE_SIZES_CONTAINS = (
|
|
84
|
+
# 8-byte types first (larger size takes priority)
|
|
85
|
+
("uint64_t", 8),
|
|
86
|
+
("int64_t", 8),
|
|
87
|
+
("qword", 8),
|
|
88
|
+
("double", 8),
|
|
89
|
+
("size_t", 8),
|
|
90
|
+
("intptr_t", 8),
|
|
91
|
+
("long long", 8),
|
|
92
|
+
# 4-byte types
|
|
93
|
+
("uint32_t", 4),
|
|
94
|
+
("int32_t", 4),
|
|
95
|
+
("dword", 4),
|
|
96
|
+
("float", 4),
|
|
97
|
+
# 2-byte types
|
|
98
|
+
("uint16_t", 2),
|
|
99
|
+
("int16_t", 2),
|
|
100
|
+
("wchar_t", 2),
|
|
101
|
+
("short", 2),
|
|
102
|
+
("word", 2),
|
|
103
|
+
# 1-byte types (smallest size last)
|
|
104
|
+
("uint8_t", 1),
|
|
105
|
+
("int8_t", 1),
|
|
106
|
+
("char", 1),
|
|
107
|
+
("byte", 1),
|
|
108
|
+
("bool", 1),
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _estimate_type_size(type_str: str) -> int:
|
|
113
|
+
"""
|
|
114
|
+
Estimate the size of a C/C++ type in bytes.
|
|
115
|
+
|
|
116
|
+
Uses module-level pre-defined mappings for O(1) exact match lookup,
|
|
117
|
+
falling back to substring search for compound types.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
type_str: Type string (e.g., "int", "char *", "float")
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
Estimated size in bytes
|
|
124
|
+
"""
|
|
125
|
+
type_str = type_str.lower().strip()
|
|
126
|
+
|
|
127
|
+
# Fast path: Pointer types (64-bit assumed)
|
|
128
|
+
if "*" in type_str or "ptr" in type_str:
|
|
129
|
+
return 8
|
|
130
|
+
|
|
131
|
+
# Fast path: Try exact match first (O(1) lookup)
|
|
132
|
+
if type_str in _TYPE_SIZES_EXACT:
|
|
133
|
+
return _TYPE_SIZES_EXACT[type_str]
|
|
134
|
+
|
|
135
|
+
# Slow path: Substring match for compound types (e.g., "unsigned int")
|
|
136
|
+
for type_name, size in _TYPE_SIZES_CONTAINS:
|
|
137
|
+
if type_name in type_str:
|
|
138
|
+
return size
|
|
139
|
+
|
|
140
|
+
# Default for unknown types
|
|
141
|
+
return 4
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _extract_structures_from_disasm(disasm_ops: list) -> dict:
|
|
145
|
+
"""
|
|
146
|
+
Extract structure-like patterns from disassembly.
|
|
147
|
+
|
|
148
|
+
Analyzes memory access patterns to detect structure field accesses.
|
|
149
|
+
For example: [rbx+0x4c], [rax+0x60], etc.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
disasm_ops: List of disassembly operations from pdfj
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
Dictionary of detected structures with fields
|
|
156
|
+
"""
|
|
157
|
+
structures = {}
|
|
158
|
+
|
|
159
|
+
# Pattern for memory accesses: [reg+offset] or [reg-offset]
|
|
160
|
+
mem_pattern = re.compile(r"\[([a-z0-9]+)\s*([+-])\s*(0x[0-9a-f]+|[0-9]+)\]", re.IGNORECASE)
|
|
161
|
+
|
|
162
|
+
for op in disasm_ops:
|
|
163
|
+
if not isinstance(op, dict):
|
|
164
|
+
continue
|
|
165
|
+
|
|
166
|
+
opcode = op.get("opcode", "")
|
|
167
|
+
disasm = op.get("disasm", "")
|
|
168
|
+
|
|
169
|
+
# Look for memory access patterns
|
|
170
|
+
matches = mem_pattern.findall(disasm)
|
|
171
|
+
|
|
172
|
+
for reg, sign, offset_str in matches:
|
|
173
|
+
# Skip stack-based accesses (usually local variables, not structures)
|
|
174
|
+
if reg.lower() in ("rsp", "esp", "rbp", "ebp", "sp", "bp"):
|
|
175
|
+
continue
|
|
176
|
+
|
|
177
|
+
# Calculate offset
|
|
178
|
+
try:
|
|
179
|
+
offset = int(offset_str, 16) if offset_str.startswith("0x") else int(offset_str)
|
|
180
|
+
if sign == "-":
|
|
181
|
+
offset = -offset
|
|
182
|
+
except ValueError:
|
|
183
|
+
continue
|
|
184
|
+
|
|
185
|
+
# Only consider positive offsets (structure fields)
|
|
186
|
+
if offset < 0:
|
|
187
|
+
continue
|
|
188
|
+
|
|
189
|
+
# Infer type from instruction
|
|
190
|
+
field_type = _infer_type_from_instruction(opcode, disasm)
|
|
191
|
+
|
|
192
|
+
# Group by register (potential structure pointer)
|
|
193
|
+
struct_name = f"struct_ptr_{reg}"
|
|
194
|
+
if struct_name not in structures:
|
|
195
|
+
structures[struct_name] = {
|
|
196
|
+
"name": struct_name,
|
|
197
|
+
"fields": [],
|
|
198
|
+
"source": "memory_access_pattern",
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
# Check if we already have this offset
|
|
202
|
+
existing_offsets = {f["offset"] for f in structures[struct_name]["fields"]}
|
|
203
|
+
offset_hex = f"0x{offset:x}"
|
|
204
|
+
|
|
205
|
+
if offset_hex not in existing_offsets:
|
|
206
|
+
structures[struct_name]["fields"].append(
|
|
207
|
+
{
|
|
208
|
+
"offset": offset_hex,
|
|
209
|
+
"type": field_type,
|
|
210
|
+
"name": f"field_{offset:x}",
|
|
211
|
+
"size": _estimate_type_size(field_type),
|
|
212
|
+
}
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
return structures
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _infer_type_from_instruction(opcode: str, disasm: str) -> str:
|
|
219
|
+
"""
|
|
220
|
+
Infer the data type from the instruction.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
opcode: Instruction opcode (e.g., "mov", "movss")
|
|
224
|
+
disasm: Full disassembly string
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
Inferred type string
|
|
228
|
+
"""
|
|
229
|
+
opcode_lower = opcode.lower()
|
|
230
|
+
disasm_lower = disasm.lower()
|
|
231
|
+
|
|
232
|
+
# Floating point operations
|
|
233
|
+
if any(x in opcode_lower for x in ("movss", "addss", "subss", "mulss", "divss", "comiss")):
|
|
234
|
+
return "float"
|
|
235
|
+
if any(x in opcode_lower for x in ("movsd", "addsd", "subsd", "mulsd", "divsd", "comisd")):
|
|
236
|
+
return "double"
|
|
237
|
+
if any(x in opcode_lower for x in ("movaps", "movups", "xmm")):
|
|
238
|
+
return "float[4]" # SSE vector
|
|
239
|
+
|
|
240
|
+
# Size hints from operand suffixes
|
|
241
|
+
if "byte" in disasm_lower or opcode_lower.endswith("b"):
|
|
242
|
+
return "uint8_t"
|
|
243
|
+
if "word" in disasm_lower and "dword" not in disasm_lower and "qword" not in disasm_lower:
|
|
244
|
+
return "uint16_t"
|
|
245
|
+
if "dword" in disasm_lower:
|
|
246
|
+
return "uint32_t"
|
|
247
|
+
if "qword" in disasm_lower:
|
|
248
|
+
return "uint64_t"
|
|
249
|
+
|
|
250
|
+
# Register-based inference
|
|
251
|
+
if any(r in disasm_lower for r in ("rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9")):
|
|
252
|
+
return "uint64_t"
|
|
253
|
+
if any(r in disasm_lower for r in ("eax", "ebx", "ecx", "edx", "esi", "edi")):
|
|
254
|
+
return "uint32_t"
|
|
255
|
+
if any(r in disasm_lower for r in ("ax", "bx", "cx", "dx")):
|
|
256
|
+
return "uint16_t"
|
|
257
|
+
if any(r in disasm_lower for r in ("al", "bl", "cl", "dl", "ah", "bh", "ch", "dh")):
|
|
258
|
+
return "uint8_t"
|
|
259
|
+
|
|
260
|
+
# Default
|
|
261
|
+
return "uint32_t"
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def _validate_address_or_fail(address: str, param_name: str = "address"):
|
|
265
|
+
"""
|
|
266
|
+
Validate address format and return failure ToolResult if invalid.
|
|
267
|
+
|
|
268
|
+
This helper consolidates the repeated pattern of address validation
|
|
269
|
+
with try-except and failure return.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
address: Address string to validate
|
|
273
|
+
param_name: Parameter name for error messages
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
None if validation passes, or ToolResult failure if invalid
|
|
277
|
+
|
|
278
|
+
Raises:
|
|
279
|
+
No exceptions - all validation errors are converted to ToolResult failures
|
|
280
|
+
"""
|
|
281
|
+
from reversecore_mcp.core.validators import validate_address_format
|
|
282
|
+
|
|
283
|
+
try:
|
|
284
|
+
validate_address_format(address, param_name)
|
|
285
|
+
return None # Validation passed
|
|
286
|
+
except ValidationError as e:
|
|
287
|
+
return failure("VALIDATION_ERROR", str(e))
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def _parse_register_state(ar_output: str) -> dict:
|
|
291
|
+
"""
|
|
292
|
+
Parse radare2 'ar' command output into structured register state.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
ar_output: Raw output from 'ar' command
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
Dictionary mapping register names to values
|
|
299
|
+
|
|
300
|
+
Example output from 'ar':
|
|
301
|
+
rax = 0x00000000
|
|
302
|
+
rbx = 0x00401000
|
|
303
|
+
...
|
|
304
|
+
"""
|
|
305
|
+
registers = {}
|
|
306
|
+
|
|
307
|
+
for line in ar_output.strip().split("\n"):
|
|
308
|
+
if "=" in line:
|
|
309
|
+
parts = line.split("=")
|
|
310
|
+
if len(parts) == 2:
|
|
311
|
+
reg_name = parts[0].strip()
|
|
312
|
+
reg_value = parts[1].strip()
|
|
313
|
+
registers[reg_name] = reg_value
|
|
314
|
+
|
|
315
|
+
return registers
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
@log_execution(tool_name="emulate_machine_code")
|
|
319
|
+
@track_metrics("emulate_machine_code")
|
|
320
|
+
@handle_tool_errors
|
|
321
|
+
async def emulate_machine_code(
|
|
322
|
+
file_path: str,
|
|
323
|
+
start_address: str,
|
|
324
|
+
instructions: int = 50,
|
|
325
|
+
timeout: int = DEFAULT_TIMEOUT,
|
|
326
|
+
) -> ToolResult:
|
|
327
|
+
"""
|
|
328
|
+
Emulate machine code execution using radare2 ESIL (Evaluable Strings Intermediate Language).
|
|
329
|
+
|
|
330
|
+
This tool provides safe, sandboxed emulation of binary code without actual execution.
|
|
331
|
+
Perfect for analyzing obfuscated code, understanding register states, and predicting
|
|
332
|
+
execution outcomes without security risks.
|
|
333
|
+
|
|
334
|
+
**Key Use Cases:**
|
|
335
|
+
- De-obfuscation: Reveal hidden strings by emulating XOR/shift operations
|
|
336
|
+
- Register Analysis: See final register values after code execution
|
|
337
|
+
- Safe Malware Analysis: Predict behavior without running malicious code
|
|
338
|
+
|
|
339
|
+
**Safety Features:**
|
|
340
|
+
- Virtual CPU simulation (no real execution)
|
|
341
|
+
- Instruction count limit (max 1000) prevents infinite loops
|
|
342
|
+
- Memory sandboxing (changes don't affect host system)
|
|
343
|
+
|
|
344
|
+
Args:
|
|
345
|
+
file_path: Path to the binary file (must be in workspace)
|
|
346
|
+
start_address: Address to start emulation (e.g., 'main', '0x401000', 'sym.decrypt')
|
|
347
|
+
instructions: Number of instructions to execute (default 50, max 1000)
|
|
348
|
+
timeout: Execution timeout in seconds
|
|
349
|
+
|
|
350
|
+
Returns:
|
|
351
|
+
ToolResult with register states and emulation summary
|
|
352
|
+
"""
|
|
353
|
+
# 1. Parameter validation
|
|
354
|
+
validate_tool_parameters(
|
|
355
|
+
"emulate_machine_code",
|
|
356
|
+
{"start_address": start_address, "instructions": instructions},
|
|
357
|
+
)
|
|
358
|
+
validated_path = validate_file_path(file_path)
|
|
359
|
+
|
|
360
|
+
# 2. Security check for start address (prevent shell injection)
|
|
361
|
+
validation_error = _validate_address_or_fail(start_address, "start_address")
|
|
362
|
+
if validation_error:
|
|
363
|
+
return validation_error
|
|
364
|
+
|
|
365
|
+
# 3. Build radare2 ESIL emulation command chain
|
|
366
|
+
# Note: Commands must be executed in specific order for ESIL to work correctly
|
|
367
|
+
esil_cmds = [
|
|
368
|
+
f"s {start_address}", # Seek to start address
|
|
369
|
+
"aei", # Initialize ESIL VM
|
|
370
|
+
"aeim", # Initialize ESIL memory (stack)
|
|
371
|
+
"aeip", # Initialize program counter to current seek
|
|
372
|
+
f"aes {instructions}", # Step through N instructions
|
|
373
|
+
"ar", # Show all registers
|
|
374
|
+
]
|
|
375
|
+
|
|
376
|
+
# 4. Execute emulation using helper
|
|
377
|
+
try:
|
|
378
|
+
output, bytes_read = await _execute_r2_command(
|
|
379
|
+
validated_path,
|
|
380
|
+
esil_cmds,
|
|
381
|
+
analysis_level="aaa",
|
|
382
|
+
max_output_size=10_000_000,
|
|
383
|
+
base_timeout=timeout,
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
# 5. Parse register state
|
|
387
|
+
register_state = _parse_register_state(output)
|
|
388
|
+
|
|
389
|
+
if not register_state:
|
|
390
|
+
return failure(
|
|
391
|
+
"EMULATION_ERROR",
|
|
392
|
+
"Failed to extract register state from emulation output",
|
|
393
|
+
hint="The binary may not be compatible with ESIL emulation, or the start address is invalid",
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
# 6. Build result with metadata
|
|
397
|
+
return success(
|
|
398
|
+
register_state,
|
|
399
|
+
bytes_read=bytes_read,
|
|
400
|
+
format="register_state",
|
|
401
|
+
instructions_executed=instructions,
|
|
402
|
+
start_address=start_address,
|
|
403
|
+
description=f"Emulated {instructions} instructions starting at {start_address}",
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
except Exception as e:
|
|
407
|
+
return failure(
|
|
408
|
+
"EMULATION_ERROR",
|
|
409
|
+
f"ESIL emulation failed: {str(e)}",
|
|
410
|
+
hint="Check that the binary architecture is supported and the start address is valid",
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
@log_execution(tool_name="get_pseudo_code")
|
|
415
|
+
@track_metrics("get_pseudo_code")
|
|
416
|
+
@handle_tool_errors
|
|
417
|
+
async def get_pseudo_code(
|
|
418
|
+
file_path: str,
|
|
419
|
+
address: str = "main",
|
|
420
|
+
timeout: int = 300,
|
|
421
|
+
) -> ToolResult:
|
|
422
|
+
"""
|
|
423
|
+
Generate pseudo C code (decompilation) for a function using radare2's pdc command.
|
|
424
|
+
|
|
425
|
+
This tool decompiles binary code into C-like pseudocode, making it much easier
|
|
426
|
+
to understand program logic compared to raw assembly. The output can be further
|
|
427
|
+
refined by AI for better readability.
|
|
428
|
+
|
|
429
|
+
**Use Cases:**
|
|
430
|
+
- Quick function understanding without reading assembly
|
|
431
|
+
- AI-assisted code analysis and refactoring
|
|
432
|
+
- Documentation generation from binaries
|
|
433
|
+
- Reverse engineering workflow optimization
|
|
434
|
+
|
|
435
|
+
**Note:** The output is "pseudo C" - it may not be syntactically perfect C,
|
|
436
|
+
but provides a high-level representation of the function logic.
|
|
437
|
+
|
|
438
|
+
Args:
|
|
439
|
+
file_path: Path to the binary file (must be in workspace)
|
|
440
|
+
address: Function address to decompile (e.g., 'main', '0x401000', 'sym.foo')
|
|
441
|
+
timeout: Execution timeout in seconds (default 300)
|
|
442
|
+
|
|
443
|
+
Returns:
|
|
444
|
+
ToolResult with pseudo C code string
|
|
445
|
+
|
|
446
|
+
Example:
|
|
447
|
+
get_pseudo_code("/app/workspace/sample.exe", "main")
|
|
448
|
+
# Returns C-like code representation of the main function
|
|
449
|
+
"""
|
|
450
|
+
# 1. Validate file path
|
|
451
|
+
validated_path = validate_file_path(file_path)
|
|
452
|
+
|
|
453
|
+
# 2. Security check for address (prevent shell injection)
|
|
454
|
+
validation_error = _validate_address_or_fail(address, "address")
|
|
455
|
+
if validation_error:
|
|
456
|
+
return validation_error
|
|
457
|
+
|
|
458
|
+
# 3. Build radare2 command to decompilation
|
|
459
|
+
r2_cmd = f"pdc @ {address}"
|
|
460
|
+
|
|
461
|
+
# 4. Determine analysis level based on file size
|
|
462
|
+
# Use 'aa' (basic) for large files to prevent timeouts
|
|
463
|
+
analysis_level = "aa"
|
|
464
|
+
try:
|
|
465
|
+
file_size_mb = os.path.getsize(validated_path) / (1024 * 1024)
|
|
466
|
+
if file_size_mb < 5:
|
|
467
|
+
analysis_level = "aaa" # Full analysis for small files
|
|
468
|
+
except OSError:
|
|
469
|
+
pass
|
|
470
|
+
|
|
471
|
+
# 5. Execute decompilation using helper
|
|
472
|
+
output, bytes_read = await _execute_r2_command(
|
|
473
|
+
validated_path,
|
|
474
|
+
[r2_cmd],
|
|
475
|
+
analysis_level=analysis_level,
|
|
476
|
+
max_output_size=10_000_000,
|
|
477
|
+
base_timeout=timeout,
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
# 6. Check if output is valid
|
|
481
|
+
if not output or output.strip() == "":
|
|
482
|
+
return failure(
|
|
483
|
+
"DECOMPILATION_ERROR",
|
|
484
|
+
f"No decompilation output for address: {address}",
|
|
485
|
+
hint="Verify the address exists and points to a valid function. Try analyzing with 'afl' first.",
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
# 7. Return pseudo C code
|
|
489
|
+
return success(
|
|
490
|
+
output,
|
|
491
|
+
bytes_read=bytes_read,
|
|
492
|
+
address=address,
|
|
493
|
+
format="pseudo_c",
|
|
494
|
+
analysis_level=analysis_level,
|
|
495
|
+
description=f"Pseudo C code decompiled from address {address} (analysis: {analysis_level})",
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
@alru_cache(maxsize=32)
|
|
500
|
+
@log_execution(tool_name="smart_decompile")
|
|
501
|
+
@track_metrics("smart_decompile")
|
|
502
|
+
@circuit_breaker("smart_decompile", failure_threshold=3, recovery_timeout=60)
|
|
503
|
+
@handle_tool_errors
|
|
504
|
+
async def _smart_decompile_impl(
|
|
505
|
+
file_path: str,
|
|
506
|
+
function_address: str,
|
|
507
|
+
timeout: int = DEFAULT_TIMEOUT,
|
|
508
|
+
use_ghidra: bool = True,
|
|
509
|
+
_file_mtime: float = 0.0, # Cache key includes mtime for invalidation on file change
|
|
510
|
+
) -> ToolResult:
|
|
511
|
+
"""
|
|
512
|
+
Internal implementation of smart_decompile with caching.
|
|
513
|
+
|
|
514
|
+
Note: _file_mtime parameter is used for cache invalidation when the file
|
|
515
|
+
is modified (e.g., after patching with adaptive_vaccine).
|
|
516
|
+
"""
|
|
517
|
+
# 1. Validate parameters
|
|
518
|
+
validate_tool_parameters("smart_decompile", {"function_address": function_address})
|
|
519
|
+
validated_path = validate_file_path(file_path)
|
|
520
|
+
|
|
521
|
+
# 2. Security check for function address (prevent shell injection)
|
|
522
|
+
validation_error = _validate_address_or_fail(function_address, "function_address")
|
|
523
|
+
if validation_error:
|
|
524
|
+
return validation_error
|
|
525
|
+
|
|
526
|
+
# 3. Try Ghidra first if requested and available
|
|
527
|
+
if use_ghidra:
|
|
528
|
+
try:
|
|
529
|
+
from reversecore_mcp.core.ghidra import (
|
|
530
|
+
decompile_function_with_ghidra,
|
|
531
|
+
ensure_ghidra_available,
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
if ensure_ghidra_available():
|
|
535
|
+
logger.info(f"Using Ghidra decompiler for {function_address}")
|
|
536
|
+
|
|
537
|
+
# Run Ghidra decompilation
|
|
538
|
+
try:
|
|
539
|
+
c_code, metadata = decompile_function_with_ghidra(
|
|
540
|
+
validated_path, function_address, timeout
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
return success(
|
|
544
|
+
c_code,
|
|
545
|
+
function_address=function_address,
|
|
546
|
+
format="pseudo_c",
|
|
547
|
+
decompiler="ghidra",
|
|
548
|
+
**metadata,
|
|
549
|
+
)
|
|
550
|
+
|
|
551
|
+
except Exception as ghidra_error:
|
|
552
|
+
logger.warning(
|
|
553
|
+
f"Ghidra decompilation failed: {ghidra_error}. Falling back to radare2"
|
|
554
|
+
)
|
|
555
|
+
# Fall through to radare2
|
|
556
|
+
else:
|
|
557
|
+
logger.info("Ghidra not available, using radare2")
|
|
558
|
+
|
|
559
|
+
except ImportError:
|
|
560
|
+
logger.info("PyGhidra not installed, using radare2")
|
|
561
|
+
|
|
562
|
+
# 4. Fallback to radare2 (original implementation)
|
|
563
|
+
logger.info(f"Using radare2 decompiler for {function_address}")
|
|
564
|
+
|
|
565
|
+
r2_cmds = [f"pdc @ {function_address}"]
|
|
566
|
+
|
|
567
|
+
# 5. Execute decompilation using helper
|
|
568
|
+
try:
|
|
569
|
+
output, bytes_read = await _execute_r2_command(
|
|
570
|
+
validated_path,
|
|
571
|
+
r2_cmds,
|
|
572
|
+
analysis_level="aa",
|
|
573
|
+
max_output_size=10_000_000,
|
|
574
|
+
base_timeout=timeout,
|
|
575
|
+
)
|
|
576
|
+
except Exception as e:
|
|
577
|
+
# If 'aaa' fails, try lighter analysis 'aa' or just '-n' if desperate,
|
|
578
|
+
# but pdc requires analysis.
|
|
579
|
+
return failure(
|
|
580
|
+
"DECOMPILATION_ERROR",
|
|
581
|
+
f"Radare2 decompilation failed: {str(e)}",
|
|
582
|
+
hint="Analysis failed. The binary might be packed or corrupted.",
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
# Add timestamp for cache visibility
|
|
586
|
+
import time
|
|
587
|
+
|
|
588
|
+
timestamp = time.time()
|
|
589
|
+
|
|
590
|
+
# 6. Return result
|
|
591
|
+
return success(
|
|
592
|
+
output,
|
|
593
|
+
bytes_read=bytes_read,
|
|
594
|
+
function_address=function_address,
|
|
595
|
+
format="pseudo_c",
|
|
596
|
+
decompiler="radare2",
|
|
597
|
+
description=f"Decompiled code from function {function_address}",
|
|
598
|
+
timestamp=timestamp,
|
|
599
|
+
)
|
|
600
|
+
|
|
601
|
+
|
|
602
|
+
async def smart_decompile(
|
|
603
|
+
file_path: str,
|
|
604
|
+
function_address: str,
|
|
605
|
+
timeout: int = DEFAULT_TIMEOUT,
|
|
606
|
+
use_ghidra: bool = True,
|
|
607
|
+
ctx: Context = None,
|
|
608
|
+
) -> ToolResult:
|
|
609
|
+
"""
|
|
610
|
+
Decompile a function to pseudo C code using Ghidra or radare2.
|
|
611
|
+
|
|
612
|
+
This tool provides decompilation for a specific function in a binary,
|
|
613
|
+
making it easier to understand the logic without reading raw assembly.
|
|
614
|
+
|
|
615
|
+
**Decompiler Selection:**
|
|
616
|
+
- Ghidra (default): More accurate, better type recovery, industry-standard
|
|
617
|
+
- radare2 (fallback): Faster, lighter weight, good for quick analysis
|
|
618
|
+
|
|
619
|
+
Args:
|
|
620
|
+
file_path: Path to the binary file (must be in workspace)
|
|
621
|
+
function_address: Function address to decompile (e.g., 'main', '0x401000')
|
|
622
|
+
timeout: Execution timeout in seconds (default 300)
|
|
623
|
+
use_ghidra: Use Ghidra decompiler if available (default True)
|
|
624
|
+
ctx: FastMCP Context (auto-injected)
|
|
625
|
+
|
|
626
|
+
Returns:
|
|
627
|
+
ToolResult with decompiled pseudo C code
|
|
628
|
+
"""
|
|
629
|
+
import os
|
|
630
|
+
import time
|
|
631
|
+
|
|
632
|
+
# Get file mtime for cache invalidation (cache busts when file is modified)
|
|
633
|
+
try:
|
|
634
|
+
file_mtime = os.path.getmtime(file_path)
|
|
635
|
+
except OSError:
|
|
636
|
+
file_mtime = 0.0
|
|
637
|
+
|
|
638
|
+
result = await _smart_decompile_impl(
|
|
639
|
+
file_path, function_address, timeout, use_ghidra, _file_mtime=file_mtime
|
|
640
|
+
)
|
|
641
|
+
|
|
642
|
+
# Check for cache hit
|
|
643
|
+
if result.status == "success" and result.metadata:
|
|
644
|
+
ts = result.metadata.get("timestamp")
|
|
645
|
+
if ts and (time.time() - ts > 1.0):
|
|
646
|
+
result.metadata["cache_hit"] = True
|
|
647
|
+
|
|
648
|
+
return result
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
@log_execution(tool_name="recover_structures")
|
|
652
|
+
@track_metrics("recover_structures")
|
|
653
|
+
@handle_tool_errors
|
|
654
|
+
async def recover_structures(
|
|
655
|
+
file_path: str,
|
|
656
|
+
function_address: str,
|
|
657
|
+
use_ghidra: bool = True,
|
|
658
|
+
fast_mode: bool = True,
|
|
659
|
+
timeout: int = DEFAULT_TIMEOUT * 5,
|
|
660
|
+
ctx: Context = None,
|
|
661
|
+
) -> ToolResult:
|
|
662
|
+
"""
|
|
663
|
+
Recover C++ class structures and data types from binary code.
|
|
664
|
+
|
|
665
|
+
This is THE game-changer for C++ reverse engineering. Transforms cryptic
|
|
666
|
+
"this + 0x4" memory accesses into meaningful "Player.health" structure fields.
|
|
667
|
+
Uses Ghidra's powerful data type propagation and structure recovery algorithms.
|
|
668
|
+
|
|
669
|
+
**Why Structure Recovery Matters:**
|
|
670
|
+
- **C++ Analysis**: 99% of game clients and commercial apps are C++
|
|
671
|
+
- **Understanding**: "this + 0x4" means nothing, "Player.health = 100" tells a story
|
|
672
|
+
- **AI Comprehension**: AI can't understand raw offsets, but understands named fields
|
|
673
|
+
- **Scale**: One structure definition can clarify thousands of lines of code
|
|
674
|
+
|
|
675
|
+
**Performance Tips (for large binaries like game clients):**
|
|
676
|
+
- Use `fast_mode=True` (default) to skip full binary analysis
|
|
677
|
+
- Use `use_ghidra=False` for quick radare2-based analysis
|
|
678
|
+
- For best results on first run, set `fast_mode=False` but expect longer wait
|
|
679
|
+
|
|
680
|
+
**How It Works:**
|
|
681
|
+
1. Analyze memory access patterns in the function
|
|
682
|
+
2. Identify structure layouts from offset usage
|
|
683
|
+
3. Use data type propagation to infer field types
|
|
684
|
+
4. Generate C structure definitions with meaningful names
|
|
685
|
+
|
|
686
|
+
**Use Cases:**
|
|
687
|
+
- Game hacking: Recover Player, Entity, Weapon structures
|
|
688
|
+
- Malware analysis: Understand malware configuration structures
|
|
689
|
+
- Vulnerability research: Find buffer overflow candidates in structs
|
|
690
|
+
- Software auditing: Document undocumented data structures
|
|
691
|
+
|
|
692
|
+
**Ghidra vs Radare2:**
|
|
693
|
+
- Ghidra (default): Superior type recovery, structure propagation, C++ support
|
|
694
|
+
- Radare2 (fallback): Basic structure definition, faster but less intelligent
|
|
695
|
+
|
|
696
|
+
Args:
|
|
697
|
+
file_path: Path to the binary file (must be in workspace)
|
|
698
|
+
function_address: Function to analyze for structure usage (e.g., 'main', '0x401000')
|
|
699
|
+
use_ghidra: Use Ghidra for advanced recovery (default True), or radare2 for basic
|
|
700
|
+
fast_mode: Skip full binary analysis for faster startup (default True)
|
|
701
|
+
timeout: Execution timeout in seconds (default 300 seconds)
|
|
702
|
+
ctx: FastMCP Context (auto-injected)
|
|
703
|
+
|
|
704
|
+
Returns:
|
|
705
|
+
ToolResult with recovered structures in C format:
|
|
706
|
+
{
|
|
707
|
+
"structures": [
|
|
708
|
+
{
|
|
709
|
+
"name": "Player",
|
|
710
|
+
"size": 64,
|
|
711
|
+
"fields": [
|
|
712
|
+
{"offset": "0x0", "type": "int", "name": "health"},
|
|
713
|
+
{"offset": "0x4", "type": "int", "name": "armor"},
|
|
714
|
+
{"offset": "0x8", "type": "Vector3", "name": "position"}
|
|
715
|
+
]
|
|
716
|
+
}
|
|
717
|
+
],
|
|
718
|
+
"c_definitions": "struct Player { int health; int armor; Vector3 position; };"
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
Example:
|
|
722
|
+
# Fast structure recovery (recommended for large binaries)
|
|
723
|
+
recover_structures("/app/workspace/game.exe", "main")
|
|
724
|
+
|
|
725
|
+
# More thorough analysis (slower but more accurate)
|
|
726
|
+
recover_structures("/app/workspace/game.exe", "main", fast_mode=False)
|
|
727
|
+
|
|
728
|
+
# Use radare2 for quick analysis
|
|
729
|
+
recover_structures("/app/workspace/binary", "0x401000", use_ghidra=False)
|
|
730
|
+
"""
|
|
731
|
+
from reversecore_mcp.core.ghidra import ensure_ghidra_available
|
|
732
|
+
|
|
733
|
+
# 1. Validate parameters
|
|
734
|
+
validated_path = validate_file_path(file_path)
|
|
735
|
+
|
|
736
|
+
# 2. Validate address format
|
|
737
|
+
# OPTIMIZATION: Use pre-compiled regex pattern (faster)
|
|
738
|
+
if not _FUNCTION_ADDRESS_PATTERN.match(_strip_address_prefixes(function_address)):
|
|
739
|
+
return failure(
|
|
740
|
+
"VALIDATION_ERROR",
|
|
741
|
+
"Invalid function address format",
|
|
742
|
+
hint="Address must contain only alphanumeric characters, dots, underscores, colons, angle brackets, and prefixes like '0x', 'sym.'",
|
|
743
|
+
)
|
|
744
|
+
|
|
745
|
+
# 3. Check if Ghidra is available when requested
|
|
746
|
+
if use_ghidra:
|
|
747
|
+
# Check availability and fallback if needed
|
|
748
|
+
if not ensure_ghidra_available():
|
|
749
|
+
# Instead of failing, let's fallback to radare2 with a warning in the description
|
|
750
|
+
# This improves UX when Ghidra is optional but requested by default
|
|
751
|
+
use_ghidra = False
|
|
752
|
+
# We will append a note to the result description later
|
|
753
|
+
fallback_note = " (Ghidra not available, fell back to radare2)"
|
|
754
|
+
else:
|
|
755
|
+
fallback_note = ""
|
|
756
|
+
# 4a. Use Ghidra for advanced structure recovery
|
|
757
|
+
try:
|
|
758
|
+
from reversecore_mcp.core.ghidra import (
|
|
759
|
+
recover_structures_with_ghidra,
|
|
760
|
+
)
|
|
761
|
+
|
|
762
|
+
# Pass fast_mode to skip full binary analysis
|
|
763
|
+
structures, metadata = recover_structures_with_ghidra(
|
|
764
|
+
validated_path, function_address, timeout, skip_full_analysis=fast_mode
|
|
765
|
+
)
|
|
766
|
+
|
|
767
|
+
mode_note = " (fast mode)" if fast_mode else " (full analysis)"
|
|
768
|
+
return success(
|
|
769
|
+
{"structures": structures},
|
|
770
|
+
**metadata,
|
|
771
|
+
function_address=function_address,
|
|
772
|
+
method="ghidra",
|
|
773
|
+
fast_mode=fast_mode,
|
|
774
|
+
description=f"Structures recovered from {function_address} using Ghidra{mode_note}",
|
|
775
|
+
)
|
|
776
|
+
|
|
777
|
+
except Exception as e:
|
|
778
|
+
# If Ghidra fails during execution, also fallback
|
|
779
|
+
use_ghidra = False
|
|
780
|
+
fallback_note = f" (Ghidra failed: {str(e)}, fell back to radare2)"
|
|
781
|
+
|
|
782
|
+
if not use_ghidra:
|
|
783
|
+
# 4b. Use radare2 for enhanced structure recovery
|
|
784
|
+
# Multi-pronged approach:
|
|
785
|
+
# 1. Function variables (afvj)
|
|
786
|
+
# 2. Data types from binary (tj)
|
|
787
|
+
# 3. Memory access patterns (axtj for structure field access)
|
|
788
|
+
# 4. RTTI-based class detection
|
|
789
|
+
|
|
790
|
+
import os
|
|
791
|
+
|
|
792
|
+
file_size_mb = os.path.getsize(validated_path) / (1024 * 1024)
|
|
793
|
+
|
|
794
|
+
# For structure recovery, we need deeper analysis than basic 'aa'
|
|
795
|
+
# Use 'aaa' for structure recovery even on large files, but with timeout protection
|
|
796
|
+
if fast_mode:
|
|
797
|
+
# Fast mode: minimal analysis, may miss structures
|
|
798
|
+
analysis_level = "aa"
|
|
799
|
+
analysis_note = " (fast mode - may miss some structures)"
|
|
800
|
+
else:
|
|
801
|
+
# Full mode: thorough analysis for structure recovery
|
|
802
|
+
# Even for large files, we need 'aaa' to detect types
|
|
803
|
+
analysis_level = "aaa"
|
|
804
|
+
analysis_note = " (full analysis)"
|
|
805
|
+
|
|
806
|
+
# Enhanced command set for structure recovery
|
|
807
|
+
r2_cmds = [
|
|
808
|
+
f"s {function_address}", # Seek to function
|
|
809
|
+
"af", # Analyze this function
|
|
810
|
+
"afvj", # Get function variables in JSON
|
|
811
|
+
"afij", # Get function info (size, type)
|
|
812
|
+
"pdfj", # Disassemble function - detect memory access patterns
|
|
813
|
+
]
|
|
814
|
+
|
|
815
|
+
# Execute using helper
|
|
816
|
+
output, bytes_read = await _execute_r2_command(
|
|
817
|
+
validated_path,
|
|
818
|
+
r2_cmds,
|
|
819
|
+
analysis_level=analysis_level,
|
|
820
|
+
max_output_size=10_000_000,
|
|
821
|
+
base_timeout=timeout,
|
|
822
|
+
)
|
|
823
|
+
|
|
824
|
+
# 5. Parse radare2 output - enhanced parsing
|
|
825
|
+
try:
|
|
826
|
+
structures = {}
|
|
827
|
+
detected_classes = []
|
|
828
|
+
memory_accesses = []
|
|
829
|
+
|
|
830
|
+
# Parse multi-command output
|
|
831
|
+
outputs = output.strip().split("\n")
|
|
832
|
+
|
|
833
|
+
# Try to parse each line as JSON
|
|
834
|
+
variables = []
|
|
835
|
+
function_info = {}
|
|
836
|
+
disasm_ops = []
|
|
837
|
+
|
|
838
|
+
valid_json_parsed = False
|
|
839
|
+
for line in outputs:
|
|
840
|
+
line = line.strip()
|
|
841
|
+
if not line:
|
|
842
|
+
continue
|
|
843
|
+
try:
|
|
844
|
+
parsed = json.loads(line)
|
|
845
|
+
valid_json_parsed = True
|
|
846
|
+
if isinstance(parsed, list):
|
|
847
|
+
# Could be variables (afvj) or disasm ops
|
|
848
|
+
if parsed and isinstance(parsed[0], dict):
|
|
849
|
+
if "name" in parsed[0] and "type" in parsed[0]:
|
|
850
|
+
variables = parsed
|
|
851
|
+
elif "opcode" in parsed[0]:
|
|
852
|
+
disasm_ops = parsed
|
|
853
|
+
elif isinstance(parsed, dict):
|
|
854
|
+
if "ops" in parsed:
|
|
855
|
+
disasm_ops = parsed.get("ops", [])
|
|
856
|
+
elif "name" in parsed:
|
|
857
|
+
function_info = parsed
|
|
858
|
+
except json.JSONDecodeError:
|
|
859
|
+
continue
|
|
860
|
+
|
|
861
|
+
# If we had output but failed to parse any JSON, raise error
|
|
862
|
+
if output.strip() and not valid_json_parsed:
|
|
863
|
+
raise json.JSONDecodeError("No valid JSON found in output", output, 0)
|
|
864
|
+
|
|
865
|
+
# Extract structures from variables
|
|
866
|
+
for var in variables:
|
|
867
|
+
if isinstance(var, dict):
|
|
868
|
+
var_type = var.get("type", "unknown")
|
|
869
|
+
var_name = var.get("name", "unnamed")
|
|
870
|
+
offset = var.get("delta", 0)
|
|
871
|
+
kind = var.get("kind", "")
|
|
872
|
+
|
|
873
|
+
# Determine structure grouping
|
|
874
|
+
if "arg" in kind:
|
|
875
|
+
base = "args"
|
|
876
|
+
elif "var" in kind or "local" in kind:
|
|
877
|
+
base = "locals"
|
|
878
|
+
else:
|
|
879
|
+
base = (
|
|
880
|
+
var.get("ref", {}).get("base", "stack")
|
|
881
|
+
if isinstance(var.get("ref"), dict)
|
|
882
|
+
else "stack"
|
|
883
|
+
)
|
|
884
|
+
|
|
885
|
+
if base not in structures:
|
|
886
|
+
structures[base] = {
|
|
887
|
+
"name": f"struct_{base}",
|
|
888
|
+
"fields": [],
|
|
889
|
+
"source": "variables",
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
structures[base]["fields"].append(
|
|
893
|
+
{
|
|
894
|
+
"offset": f"0x{abs(offset):x}",
|
|
895
|
+
"type": var_type,
|
|
896
|
+
"name": var_name,
|
|
897
|
+
"size": _estimate_type_size(var_type),
|
|
898
|
+
}
|
|
899
|
+
)
|
|
900
|
+
|
|
901
|
+
# Analyze disassembly for memory access patterns (structure field detection)
|
|
902
|
+
struct_from_memory = _extract_structures_from_disasm(disasm_ops)
|
|
903
|
+
for struct_name, struct_data in struct_from_memory.items():
|
|
904
|
+
if struct_name not in structures:
|
|
905
|
+
structures[struct_name] = struct_data
|
|
906
|
+
else:
|
|
907
|
+
# Merge fields
|
|
908
|
+
existing_offsets = {f["offset"] for f in structures[struct_name]["fields"]}
|
|
909
|
+
for field in struct_data["fields"]:
|
|
910
|
+
if field["offset"] not in existing_offsets:
|
|
911
|
+
structures[struct_name]["fields"].append(field)
|
|
912
|
+
|
|
913
|
+
# Sort fields by offset within each structure
|
|
914
|
+
for struct_data in structures.values():
|
|
915
|
+
struct_data["fields"].sort(
|
|
916
|
+
key=lambda f: int(f["offset"], 16)
|
|
917
|
+
if f["offset"].startswith("0x")
|
|
918
|
+
else int(f["offset"])
|
|
919
|
+
)
|
|
920
|
+
|
|
921
|
+
# 6. Generate C structure definitions
|
|
922
|
+
c_definitions = []
|
|
923
|
+
for _struct_name, struct_data in structures.items():
|
|
924
|
+
if not struct_data["fields"]:
|
|
925
|
+
continue
|
|
926
|
+
|
|
927
|
+
field_strs = [
|
|
928
|
+
f" {field['type']} {field['name']}; // offset {field['offset']}, size ~{field.get('size', '?')} bytes"
|
|
929
|
+
for field in struct_data["fields"]
|
|
930
|
+
]
|
|
931
|
+
fields_str = "\n".join(field_strs)
|
|
932
|
+
|
|
933
|
+
c_def = f"struct {struct_data['name']} {{\n{fields_str}\n}};"
|
|
934
|
+
c_definitions.append(c_def)
|
|
935
|
+
|
|
936
|
+
# Filter out empty structures
|
|
937
|
+
non_empty_structures = {k: v for k, v in structures.items() if v["fields"]}
|
|
938
|
+
|
|
939
|
+
result = {
|
|
940
|
+
"structures": list(non_empty_structures.values()),
|
|
941
|
+
"c_definitions": "\n\n".join(c_definitions),
|
|
942
|
+
"count": len(non_empty_structures),
|
|
943
|
+
"analysis_mode": "fast" if fast_mode else "full",
|
|
944
|
+
}
|
|
945
|
+
|
|
946
|
+
desc = f"Structure recovery from {function_address} using radare2{analysis_note} (found {len(non_empty_structures)} structure(s))"
|
|
947
|
+
if "fallback_note" in locals():
|
|
948
|
+
desc += fallback_note
|
|
949
|
+
|
|
950
|
+
# Add hint if no structures found
|
|
951
|
+
hint = None
|
|
952
|
+
if not non_empty_structures: # OPTIMIZATION: Direct bool check instead of len() comparison
|
|
953
|
+
hint = "No structures found. Try: 1) fast_mode=False for deeper analysis, 2) use_ghidra=True for C++ structures, 3) analyze a function that uses structures (not main/entry0)"
|
|
954
|
+
|
|
955
|
+
return success(
|
|
956
|
+
result,
|
|
957
|
+
bytes_read=bytes_read,
|
|
958
|
+
function_address=function_address,
|
|
959
|
+
method="radare2",
|
|
960
|
+
structure_count=len(non_empty_structures),
|
|
961
|
+
description=desc,
|
|
962
|
+
hint=hint,
|
|
963
|
+
)
|
|
964
|
+
|
|
965
|
+
except json.JSONDecodeError as e:
|
|
966
|
+
return failure(
|
|
967
|
+
"STRUCTURE_RECOVERY_ERROR",
|
|
968
|
+
f"Failed to parse structure data: {str(e)}",
|
|
969
|
+
hint="The function may not exist or may not use structures. Verify the address with 'afl' command.",
|
|
970
|
+
)
|
|
971
|
+
|
|
972
|
+
# Note: DecompilationPlugin has been removed.
|
|
973
|
+
# All tools (emulate_machine_code, get_pseudo_code, smart_decompile, recover_structures)
|
|
974
|
+
# are now registered via GhidraToolsPlugin in ghidra_tools.py for unified management.
|
|
975
|
+
|