iflow-mcp_developermode-korea_reversecore-mcp 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/METADATA +543 -0
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/RECORD +79 -0
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/WHEEL +5 -0
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/entry_points.txt +2 -0
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/licenses/LICENSE +21 -0
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/top_level.txt +1 -0
- reversecore_mcp/__init__.py +9 -0
- reversecore_mcp/core/__init__.py +78 -0
- reversecore_mcp/core/audit.py +101 -0
- reversecore_mcp/core/binary_cache.py +138 -0
- reversecore_mcp/core/command_spec.py +357 -0
- reversecore_mcp/core/config.py +432 -0
- reversecore_mcp/core/container.py +288 -0
- reversecore_mcp/core/decorators.py +152 -0
- reversecore_mcp/core/error_formatting.py +93 -0
- reversecore_mcp/core/error_handling.py +142 -0
- reversecore_mcp/core/evidence.py +229 -0
- reversecore_mcp/core/exceptions.py +296 -0
- reversecore_mcp/core/execution.py +240 -0
- reversecore_mcp/core/ghidra.py +642 -0
- reversecore_mcp/core/ghidra_helper.py +481 -0
- reversecore_mcp/core/ghidra_manager.py +234 -0
- reversecore_mcp/core/json_utils.py +131 -0
- reversecore_mcp/core/loader.py +73 -0
- reversecore_mcp/core/logging_config.py +206 -0
- reversecore_mcp/core/memory.py +721 -0
- reversecore_mcp/core/metrics.py +198 -0
- reversecore_mcp/core/mitre_mapper.py +365 -0
- reversecore_mcp/core/plugin.py +45 -0
- reversecore_mcp/core/r2_helpers.py +404 -0
- reversecore_mcp/core/r2_pool.py +403 -0
- reversecore_mcp/core/report_generator.py +268 -0
- reversecore_mcp/core/resilience.py +252 -0
- reversecore_mcp/core/resource_manager.py +169 -0
- reversecore_mcp/core/result.py +132 -0
- reversecore_mcp/core/security.py +213 -0
- reversecore_mcp/core/validators.py +238 -0
- reversecore_mcp/dashboard/__init__.py +221 -0
- reversecore_mcp/prompts/__init__.py +56 -0
- reversecore_mcp/prompts/common.py +24 -0
- reversecore_mcp/prompts/game.py +280 -0
- reversecore_mcp/prompts/malware.py +1219 -0
- reversecore_mcp/prompts/report.py +150 -0
- reversecore_mcp/prompts/security.py +136 -0
- reversecore_mcp/resources.py +329 -0
- reversecore_mcp/server.py +727 -0
- reversecore_mcp/tools/__init__.py +49 -0
- reversecore_mcp/tools/analysis/__init__.py +74 -0
- reversecore_mcp/tools/analysis/capa_tools.py +215 -0
- reversecore_mcp/tools/analysis/die_tools.py +180 -0
- reversecore_mcp/tools/analysis/diff_tools.py +643 -0
- reversecore_mcp/tools/analysis/lief_tools.py +272 -0
- reversecore_mcp/tools/analysis/signature_tools.py +591 -0
- reversecore_mcp/tools/analysis/static_analysis.py +479 -0
- reversecore_mcp/tools/common/__init__.py +58 -0
- reversecore_mcp/tools/common/file_operations.py +352 -0
- reversecore_mcp/tools/common/memory_tools.py +516 -0
- reversecore_mcp/tools/common/patch_explainer.py +230 -0
- reversecore_mcp/tools/common/server_tools.py +115 -0
- reversecore_mcp/tools/ghidra/__init__.py +19 -0
- reversecore_mcp/tools/ghidra/decompilation.py +975 -0
- reversecore_mcp/tools/ghidra/ghidra_tools.py +1052 -0
- reversecore_mcp/tools/malware/__init__.py +61 -0
- reversecore_mcp/tools/malware/adaptive_vaccine.py +579 -0
- reversecore_mcp/tools/malware/dormant_detector.py +756 -0
- reversecore_mcp/tools/malware/ioc_tools.py +228 -0
- reversecore_mcp/tools/malware/vulnerability_hunter.py +519 -0
- reversecore_mcp/tools/malware/yara_tools.py +214 -0
- reversecore_mcp/tools/patch_explainer.py +19 -0
- reversecore_mcp/tools/radare2/__init__.py +13 -0
- reversecore_mcp/tools/radare2/r2_analysis.py +972 -0
- reversecore_mcp/tools/radare2/r2_session.py +376 -0
- reversecore_mcp/tools/radare2/radare2_mcp_tools.py +1183 -0
- reversecore_mcp/tools/report/__init__.py +4 -0
- reversecore_mcp/tools/report/email.py +82 -0
- reversecore_mcp/tools/report/report_mcp_tools.py +344 -0
- reversecore_mcp/tools/report/report_tools.py +1076 -0
- reversecore_mcp/tools/report/session.py +194 -0
- reversecore_mcp/tools/report_tools.py +11 -0
|
@@ -0,0 +1,642 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Ghidra Integration Module
|
|
3
|
+
|
|
4
|
+
This module provides a unified interface for Ghidra decompilation and analysis.
|
|
5
|
+
It consolidates functionality from the previous ghidra_helper.py and ghidra_manager.py
|
|
6
|
+
to provide:
|
|
7
|
+
- JVM lifecycle management (singleton pattern)
|
|
8
|
+
- Project caching for performance
|
|
9
|
+
- Decompilation and structure recovery APIs
|
|
10
|
+
- Thread-safe operations
|
|
11
|
+
|
|
12
|
+
Usage:
|
|
13
|
+
from reversecore_mcp.core.ghidra import ghidra_service
|
|
14
|
+
|
|
15
|
+
# Check availability
|
|
16
|
+
if ghidra_service.is_available():
|
|
17
|
+
code = await ghidra_service.decompile_async(file_path, function_address)
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import asyncio
|
|
21
|
+
import os
|
|
22
|
+
import re
|
|
23
|
+
import shutil
|
|
24
|
+
import tempfile
|
|
25
|
+
import threading
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
from typing import TYPE_CHECKING, Any, Optional
|
|
28
|
+
|
|
29
|
+
from reversecore_mcp.core.exceptions import ValidationError
|
|
30
|
+
from reversecore_mcp.core.logging_config import get_logger
|
|
31
|
+
from reversecore_mcp.core.r2_helpers import calculate_dynamic_timeout
|
|
32
|
+
|
|
33
|
+
if TYPE_CHECKING:
|
|
34
|
+
from ghidra.program.flatapi import FlatProgramAPI
|
|
35
|
+
from ghidra.program.model.listing import Function
|
|
36
|
+
|
|
37
|
+
logger = get_logger(__name__)
|
|
38
|
+
|
|
39
|
+
# Pre-compiled pattern for hex prefix removal
|
|
40
|
+
_HEX_PREFIX_PATTERN = re.compile(r"^0[xX]")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class GhidraService:
|
|
44
|
+
"""
|
|
45
|
+
Unified Ghidra service for decompilation and analysis.
|
|
46
|
+
|
|
47
|
+
This singleton class manages:
|
|
48
|
+
- JVM lifecycle (started once, reused)
|
|
49
|
+
- Project caching (LRU eviction)
|
|
50
|
+
- Thread-safe operations
|
|
51
|
+
- Async wrapper methods
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
_instance: Optional["GhidraService"] = None
|
|
55
|
+
_lock = threading.RLock()
|
|
56
|
+
|
|
57
|
+
def __new__(cls) -> "GhidraService":
|
|
58
|
+
if cls._instance is None:
|
|
59
|
+
with cls._lock:
|
|
60
|
+
if cls._instance is None:
|
|
61
|
+
cls._instance = super().__new__(cls)
|
|
62
|
+
cls._instance._initialized = False
|
|
63
|
+
return cls._instance
|
|
64
|
+
|
|
65
|
+
def __init__(self) -> None:
|
|
66
|
+
if self._initialized:
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
self._jvm_started = False
|
|
70
|
+
self._projects: dict[str, Any] = {}
|
|
71
|
+
self._project_lock = threading.RLock()
|
|
72
|
+
self._max_projects: int | None = None # Lazy-loaded from config
|
|
73
|
+
self._pyghidra = None
|
|
74
|
+
self._flat_program_api = None
|
|
75
|
+
self._initialized = True
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def max_projects(self) -> int:
|
|
79
|
+
"""Get max projects from config (lazy-loaded)."""
|
|
80
|
+
if self._max_projects is None:
|
|
81
|
+
try:
|
|
82
|
+
from reversecore_mcp.core.config import get_config
|
|
83
|
+
self._max_projects = get_config().ghidra_max_projects
|
|
84
|
+
except Exception:
|
|
85
|
+
self._max_projects = 3 # Default fallback
|
|
86
|
+
return self._max_projects
|
|
87
|
+
|
|
88
|
+
def is_available(self) -> bool:
|
|
89
|
+
"""Check if Ghidra and PyGhidra are available."""
|
|
90
|
+
try:
|
|
91
|
+
import pyghidra # noqa: F401
|
|
92
|
+
|
|
93
|
+
return True
|
|
94
|
+
except ImportError:
|
|
95
|
+
return False
|
|
96
|
+
|
|
97
|
+
def _configure_environment(self) -> None:
|
|
98
|
+
"""Configure environment variables for Ghidra (JAVA_HOME, etc.)."""
|
|
99
|
+
if os.environ.get("JAVA_HOME"):
|
|
100
|
+
return
|
|
101
|
+
|
|
102
|
+
java_path = shutil.which("java")
|
|
103
|
+
if java_path:
|
|
104
|
+
try:
|
|
105
|
+
real_path = Path(java_path).resolve()
|
|
106
|
+
if real_path.name == "java" and real_path.parent.name == "bin":
|
|
107
|
+
java_home = real_path.parent.parent
|
|
108
|
+
os.environ["JAVA_HOME"] = str(java_home)
|
|
109
|
+
logger.info(f"Set JAVA_HOME to {java_home}")
|
|
110
|
+
except Exception as e:
|
|
111
|
+
logger.warning(f"Failed to resolve JAVA_HOME: {e}")
|
|
112
|
+
|
|
113
|
+
def _ensure_jvm_started(self) -> None:
|
|
114
|
+
"""Start the JVM if not already started."""
|
|
115
|
+
if self._jvm_started:
|
|
116
|
+
return
|
|
117
|
+
|
|
118
|
+
with self._lock:
|
|
119
|
+
if self._jvm_started:
|
|
120
|
+
return
|
|
121
|
+
|
|
122
|
+
try:
|
|
123
|
+
import pyghidra
|
|
124
|
+
|
|
125
|
+
self._configure_environment()
|
|
126
|
+
|
|
127
|
+
# OPTIMIZATION: Configure JVM for large binary analysis
|
|
128
|
+
# Memory settings optimized for modern systems (24-32GB RAM)
|
|
129
|
+
jvm_args = [
|
|
130
|
+
"-Xms2g", # Initial heap size (2GB)
|
|
131
|
+
"-Xmx16g", # Maximum heap size (16GB) - handles very large binaries
|
|
132
|
+
"-XX:+UseG1GC", # G1 garbage collector (better for large heaps)
|
|
133
|
+
"-XX:MaxGCPauseMillis=200", # Limit GC pause time
|
|
134
|
+
"-XX:+ParallelRefProcEnabled", # Parallel reference processing
|
|
135
|
+
"-XX:G1HeapRegionSize=32m", # Larger regions for big objects
|
|
136
|
+
"-XX:InitiatingHeapOccupancyPercent=35", # Start GC earlier
|
|
137
|
+
]
|
|
138
|
+
|
|
139
|
+
logger.info("Starting Ghidra JVM...")
|
|
140
|
+
try:
|
|
141
|
+
pyghidra.start(jvm_args=jvm_args)
|
|
142
|
+
except Exception as e:
|
|
143
|
+
logger.debug(f"pyghidra.start() result: {e}")
|
|
144
|
+
|
|
145
|
+
self._pyghidra = pyghidra
|
|
146
|
+
self._jvm_started = True
|
|
147
|
+
logger.info("Ghidra JVM started successfully")
|
|
148
|
+
|
|
149
|
+
except ImportError:
|
|
150
|
+
logger.error("pyghidra not installed")
|
|
151
|
+
raise ImportError("pyghidra not installed. Install with: pip install pyghidra")
|
|
152
|
+
except Exception as e:
|
|
153
|
+
logger.error(f"Failed to start Ghidra JVM: {e}")
|
|
154
|
+
raise
|
|
155
|
+
|
|
156
|
+
def _get_project(self, file_path: str) -> tuple[Any, Any, Any]:
|
|
157
|
+
"""Get or load a cached project for the given file."""
|
|
158
|
+
with self._project_lock:
|
|
159
|
+
if file_path in self._projects:
|
|
160
|
+
val = self._projects.pop(file_path)
|
|
161
|
+
self._projects[file_path] = val
|
|
162
|
+
return val
|
|
163
|
+
|
|
164
|
+
# Evict if needed (LRU)
|
|
165
|
+
while len(self._projects) >= self.max_projects:
|
|
166
|
+
oldest_path, (_, _, old_ctx) = self._projects.popitem(last=False)
|
|
167
|
+
logger.info(f"Evicting Ghidra project: {oldest_path}")
|
|
168
|
+
try:
|
|
169
|
+
old_ctx.__exit__(None, None, None)
|
|
170
|
+
except Exception as e:
|
|
171
|
+
logger.warning(f"Error closing evicted project: {e}")
|
|
172
|
+
|
|
173
|
+
logger.info(f"Loading Ghidra project: {file_path}")
|
|
174
|
+
ctx = self._pyghidra.open_program(file_path)
|
|
175
|
+
flat_api = ctx.__enter__()
|
|
176
|
+
program = flat_api.getCurrentProgram()
|
|
177
|
+
|
|
178
|
+
self._projects[file_path] = (program, flat_api, ctx)
|
|
179
|
+
return program, flat_api, ctx
|
|
180
|
+
|
|
181
|
+
def _invalidate_project(self, file_path: str) -> None:
|
|
182
|
+
"""Remove a project from cache on error."""
|
|
183
|
+
with self._project_lock:
|
|
184
|
+
if file_path in self._projects:
|
|
185
|
+
del self._projects[file_path]
|
|
186
|
+
|
|
187
|
+
def _resolve_function(
|
|
188
|
+
self, flat_api: "FlatProgramAPI", address_str: str, create_if_missing: bool = True
|
|
189
|
+
) -> Optional["Function"]:
|
|
190
|
+
"""
|
|
191
|
+
Resolve a function from address string or symbol name.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
flat_api: Ghidra FlatProgramAPI instance
|
|
195
|
+
address_str: Function address (hex) or symbol name
|
|
196
|
+
create_if_missing: If True, create a function at the address if none exists
|
|
197
|
+
"""
|
|
198
|
+
program = flat_api.getCurrentProgram()
|
|
199
|
+
function_manager = program.getFunctionManager()
|
|
200
|
+
|
|
201
|
+
# Try as symbol name first
|
|
202
|
+
symbol_table = program.getSymbolTable()
|
|
203
|
+
symbols = symbol_table.getSymbols(address_str)
|
|
204
|
+
|
|
205
|
+
if symbols.hasNext():
|
|
206
|
+
symbol = symbols.next()
|
|
207
|
+
address = symbol.getAddress()
|
|
208
|
+
func = function_manager.getFunctionAt(address)
|
|
209
|
+
if func is not None:
|
|
210
|
+
return func
|
|
211
|
+
|
|
212
|
+
# Try as hex address
|
|
213
|
+
addr_str = _HEX_PREFIX_PATTERN.sub("", address_str)
|
|
214
|
+
address = None
|
|
215
|
+
|
|
216
|
+
try:
|
|
217
|
+
address = flat_api.toAddr(int(addr_str, 16))
|
|
218
|
+
func = function_manager.getFunctionAt(address)
|
|
219
|
+
if func is not None:
|
|
220
|
+
return func
|
|
221
|
+
except (ValueError, Exception):
|
|
222
|
+
pass
|
|
223
|
+
|
|
224
|
+
# Try to find function containing this address
|
|
225
|
+
if address is not None:
|
|
226
|
+
try:
|
|
227
|
+
func = function_manager.getFunctionContaining(address)
|
|
228
|
+
if func is not None:
|
|
229
|
+
return func
|
|
230
|
+
except Exception:
|
|
231
|
+
pass
|
|
232
|
+
|
|
233
|
+
# If no function found and create_if_missing, create one at the address
|
|
234
|
+
# This is needed when analyze=False (no auto function detection)
|
|
235
|
+
if create_if_missing and address is not None:
|
|
236
|
+
try:
|
|
237
|
+
logger.info(f"Creating function at address: {address}")
|
|
238
|
+
func = flat_api.createFunction(address, None) # Auto-generate name
|
|
239
|
+
if func is not None:
|
|
240
|
+
return func
|
|
241
|
+
except Exception as e:
|
|
242
|
+
logger.debug(f"Failed to create function: {e}")
|
|
243
|
+
|
|
244
|
+
return None
|
|
245
|
+
|
|
246
|
+
def _extract_structure_fields(self, data_type) -> list:
|
|
247
|
+
"""Extract fields from a Ghidra data type structure."""
|
|
248
|
+
fields = []
|
|
249
|
+
|
|
250
|
+
if not hasattr(data_type, "getNumComponents"):
|
|
251
|
+
return fields
|
|
252
|
+
|
|
253
|
+
num_components = data_type.getNumComponents()
|
|
254
|
+
|
|
255
|
+
for j in range(num_components):
|
|
256
|
+
component = data_type.getComponent(j)
|
|
257
|
+
field_name = component.getFieldName()
|
|
258
|
+
field_type = component.getDataType().getName()
|
|
259
|
+
field_offset = component.getOffset()
|
|
260
|
+
field_size = component.getLength()
|
|
261
|
+
|
|
262
|
+
fields.append(
|
|
263
|
+
{
|
|
264
|
+
"offset": f"0x{field_offset:x}",
|
|
265
|
+
"type": field_type,
|
|
266
|
+
"name": field_name if field_name else f"field_{field_offset:x}",
|
|
267
|
+
"size": field_size,
|
|
268
|
+
}
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
return fields
|
|
272
|
+
|
|
273
|
+
def decompile(
|
|
274
|
+
self,
|
|
275
|
+
file_path: str,
|
|
276
|
+
function_address: str | None = None,
|
|
277
|
+
timeout: int | None = None,
|
|
278
|
+
) -> tuple[str, dict[str, Any]]:
|
|
279
|
+
"""
|
|
280
|
+
Decompile a function using Ghidra.
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
file_path: Path to the binary file
|
|
284
|
+
function_address: Function address (hex string or symbol name)
|
|
285
|
+
timeout: Maximum execution time in seconds (uses dynamic timeout if None)
|
|
286
|
+
|
|
287
|
+
Returns:
|
|
288
|
+
Tuple of (decompiled_code, metadata)
|
|
289
|
+
|
|
290
|
+
Raises:
|
|
291
|
+
ValidationError: If decompilation fails
|
|
292
|
+
ImportError: If PyGhidra is not available
|
|
293
|
+
"""
|
|
294
|
+
# Calculate dynamic timeout based on file size
|
|
295
|
+
effective_timeout = (
|
|
296
|
+
timeout if timeout else calculate_dynamic_timeout(file_path, base_timeout=300)
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
self._ensure_jvm_started()
|
|
300
|
+
|
|
301
|
+
with self._lock:
|
|
302
|
+
try:
|
|
303
|
+
program, flat_api, _ = self._get_project(file_path)
|
|
304
|
+
|
|
305
|
+
from ghidra.app.decompiler import DecompInterface
|
|
306
|
+
from ghidra.util.task import ConsoleTaskMonitor
|
|
307
|
+
|
|
308
|
+
decompiler = DecompInterface()
|
|
309
|
+
decompiler.openProgram(program)
|
|
310
|
+
monitor = ConsoleTaskMonitor()
|
|
311
|
+
|
|
312
|
+
try:
|
|
313
|
+
if not function_address:
|
|
314
|
+
return "// Please specify a function address", {}
|
|
315
|
+
|
|
316
|
+
function = self._resolve_function(flat_api, function_address)
|
|
317
|
+
|
|
318
|
+
if function is None:
|
|
319
|
+
raise ValidationError(
|
|
320
|
+
f"Could not find function at address: {function_address}",
|
|
321
|
+
details={"address": function_address},
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
logger.info(f"Decompiling function: {function.getName()}")
|
|
325
|
+
results = decompiler.decompileFunction(function, effective_timeout, monitor)
|
|
326
|
+
|
|
327
|
+
if not results.decompileCompleted():
|
|
328
|
+
error_msg = results.getErrorMessage()
|
|
329
|
+
raise ValidationError(
|
|
330
|
+
f"Decompilation failed: {error_msg}",
|
|
331
|
+
details={
|
|
332
|
+
"function": function.getName(),
|
|
333
|
+
"address": function_address,
|
|
334
|
+
},
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
decompiled_function = results.getDecompiledFunction()
|
|
338
|
+
c_code = decompiled_function.getC()
|
|
339
|
+
|
|
340
|
+
high_function = results.getHighFunction()
|
|
341
|
+
metadata = {
|
|
342
|
+
"function_name": function.getName(),
|
|
343
|
+
"entry_point": str(function.getEntryPoint()),
|
|
344
|
+
"parameter_count": (
|
|
345
|
+
high_function.getFunctionPrototype().getNumParams()
|
|
346
|
+
if high_function
|
|
347
|
+
else 0
|
|
348
|
+
),
|
|
349
|
+
"local_symbol_count": (
|
|
350
|
+
high_function.getLocalSymbolMap().getNumSymbols()
|
|
351
|
+
if high_function
|
|
352
|
+
else 0
|
|
353
|
+
),
|
|
354
|
+
"signature": function.getSignature().getPrototypeString(),
|
|
355
|
+
"body_size": function.getBody().getNumAddresses(),
|
|
356
|
+
"decompiler": "ghidra",
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
logger.info(f"Successfully decompiled {function.getName()}")
|
|
360
|
+
return c_code, metadata
|
|
361
|
+
|
|
362
|
+
finally:
|
|
363
|
+
decompiler.dispose()
|
|
364
|
+
|
|
365
|
+
except Exception as e:
|
|
366
|
+
logger.error(f"Ghidra decompilation failed: {e}")
|
|
367
|
+
self._invalidate_project(file_path)
|
|
368
|
+
raise
|
|
369
|
+
|
|
370
|
+
async def decompile_async(
|
|
371
|
+
self,
|
|
372
|
+
file_path: str,
|
|
373
|
+
function_address: str | None = None,
|
|
374
|
+
timeout: int | None = None,
|
|
375
|
+
) -> tuple[str, dict[str, Any]]:
|
|
376
|
+
"""Execute decompilation asynchronously with dynamic timeout."""
|
|
377
|
+
return await asyncio.to_thread(self.decompile, file_path, function_address, timeout)
|
|
378
|
+
|
|
379
|
+
def recover_structures(
|
|
380
|
+
self,
|
|
381
|
+
file_path: str,
|
|
382
|
+
function_address: str,
|
|
383
|
+
timeout: int | None = None,
|
|
384
|
+
skip_full_analysis: bool = True,
|
|
385
|
+
) -> tuple[dict[str, Any], dict[str, Any]]:
|
|
386
|
+
"""
|
|
387
|
+
Recover structure definitions from a function.
|
|
388
|
+
|
|
389
|
+
Args:
|
|
390
|
+
file_path: Path to the binary file
|
|
391
|
+
function_address: Function address (hex string or symbol name)
|
|
392
|
+
timeout: Maximum execution time in seconds (uses dynamic timeout if None)
|
|
393
|
+
skip_full_analysis: Skip full binary analysis for faster startup (default True)
|
|
394
|
+
The decompiler performs targeted analysis on the function.
|
|
395
|
+
|
|
396
|
+
Returns:
|
|
397
|
+
Tuple of (structures_dict, metadata_dict)
|
|
398
|
+
|
|
399
|
+
Performance Notes:
|
|
400
|
+
- skip_full_analysis=True (default): ~30-60 seconds for large binaries
|
|
401
|
+
- skip_full_analysis=False: Can take 10+ minutes, uses more memory
|
|
402
|
+
- For repeated analysis of same binary, results are cached in JVM
|
|
403
|
+
"""
|
|
404
|
+
# Calculate dynamic timeout based on file size
|
|
405
|
+
effective_timeout = (
|
|
406
|
+
timeout if timeout else calculate_dynamic_timeout(file_path, base_timeout=300)
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
self._ensure_jvm_started()
|
|
410
|
+
|
|
411
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
412
|
+
project_location = Path(temp_dir) / "ghidra_project"
|
|
413
|
+
project_name = "struct_analysis"
|
|
414
|
+
|
|
415
|
+
try:
|
|
416
|
+
logger.info(
|
|
417
|
+
f"Analyzing structures in: {file_path} (skip_full_analysis={skip_full_analysis})"
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
# OPTIMIZATION: Always skip full analysis for structure recovery
|
|
421
|
+
# The decompiler performs targeted analysis on the specific function
|
|
422
|
+
# Full analysis is rarely needed and extremely slow on large binaries
|
|
423
|
+
analyze = False # Force skip - decompiler handles function analysis
|
|
424
|
+
|
|
425
|
+
with self._pyghidra.open_program(
|
|
426
|
+
str(file_path),
|
|
427
|
+
project_location=str(project_location),
|
|
428
|
+
project_name=project_name,
|
|
429
|
+
analyze=analyze,
|
|
430
|
+
) as flat_api:
|
|
431
|
+
from ghidra.app.decompiler import DecompInterface
|
|
432
|
+
|
|
433
|
+
program = flat_api.getCurrentProgram()
|
|
434
|
+
function = self._resolve_function(flat_api, function_address)
|
|
435
|
+
|
|
436
|
+
if function is None:
|
|
437
|
+
raise ValidationError(
|
|
438
|
+
f"Could not find function at address: {function_address}",
|
|
439
|
+
details={"address": function_address},
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
decompiler = DecompInterface()
|
|
443
|
+
decompiler.openProgram(program)
|
|
444
|
+
|
|
445
|
+
try:
|
|
446
|
+
results = decompiler.decompileFunction(function, effective_timeout, None)
|
|
447
|
+
|
|
448
|
+
if not results.decompileCompleted():
|
|
449
|
+
error_msg = results.getErrorMessage()
|
|
450
|
+
raise ValidationError(
|
|
451
|
+
f"Structure analysis failed: {error_msg}",
|
|
452
|
+
details={
|
|
453
|
+
"function": function.getName(),
|
|
454
|
+
"address": function_address,
|
|
455
|
+
},
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
high_function = results.getHighFunction()
|
|
459
|
+
if high_function is None:
|
|
460
|
+
raise ValidationError(
|
|
461
|
+
"Could not get high-level function representation",
|
|
462
|
+
details={"function": function.getName()},
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
structures_found = {}
|
|
466
|
+
local_symbols = high_function.getLocalSymbolMap()
|
|
467
|
+
|
|
468
|
+
# Analyze local variables
|
|
469
|
+
for i in range(local_symbols.getNumSymbols()):
|
|
470
|
+
symbol = local_symbols.getSymbol(i)
|
|
471
|
+
high_var = symbol.getHighVariable()
|
|
472
|
+
|
|
473
|
+
if high_var is not None:
|
|
474
|
+
data_type = high_var.getDataType()
|
|
475
|
+
|
|
476
|
+
if data_type is not None:
|
|
477
|
+
type_name = data_type.getName()
|
|
478
|
+
|
|
479
|
+
if "struct" in type_name.lower() or data_type.getLength() > 8:
|
|
480
|
+
actual_type = data_type
|
|
481
|
+
if hasattr(data_type, "getDataType"):
|
|
482
|
+
actual_type = data_type.getDataType()
|
|
483
|
+
|
|
484
|
+
struct_name = actual_type.getName()
|
|
485
|
+
if struct_name not in structures_found:
|
|
486
|
+
fields = self._extract_structure_fields(actual_type)
|
|
487
|
+
structures_found[struct_name] = {
|
|
488
|
+
"name": struct_name,
|
|
489
|
+
"size": actual_type.getLength(),
|
|
490
|
+
"fields": fields,
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
# Analyze function parameters
|
|
494
|
+
for param in function.getParameters():
|
|
495
|
+
param_type = param.getDataType()
|
|
496
|
+
if param_type is not None:
|
|
497
|
+
type_name = param_type.getName()
|
|
498
|
+
if (
|
|
499
|
+
"struct" in type_name.lower()
|
|
500
|
+
and type_name not in structures_found
|
|
501
|
+
):
|
|
502
|
+
fields = self._extract_structure_fields(param_type)
|
|
503
|
+
structures_found[type_name] = {
|
|
504
|
+
"name": type_name,
|
|
505
|
+
"size": param_type.getLength(),
|
|
506
|
+
"fields": fields,
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
# Generate C definitions
|
|
510
|
+
c_definitions = []
|
|
511
|
+
for struct_name, struct_data in structures_found.items():
|
|
512
|
+
if struct_data["fields"]:
|
|
513
|
+
field_strs = [
|
|
514
|
+
f"{f['type']} {f['name']}; // offset {f['offset']}, size {f['size']}"
|
|
515
|
+
for f in struct_data["fields"]
|
|
516
|
+
]
|
|
517
|
+
fields_str = "\n ".join(field_strs)
|
|
518
|
+
c_def = f"struct {struct_name} {{\n {fields_str}\n}};"
|
|
519
|
+
else:
|
|
520
|
+
c_def = f"struct {struct_name} {{ /* size: {struct_data['size']} bytes */ }};"
|
|
521
|
+
c_definitions.append(c_def)
|
|
522
|
+
|
|
523
|
+
result = {
|
|
524
|
+
"structures": list(structures_found.values()),
|
|
525
|
+
"c_definitions": "\n\n".join(c_definitions)
|
|
526
|
+
if c_definitions
|
|
527
|
+
else "// No structures found",
|
|
528
|
+
"count": len(structures_found),
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
metadata = {
|
|
532
|
+
"function_name": function.getName(),
|
|
533
|
+
"entry_point": str(function.getEntryPoint()),
|
|
534
|
+
"structure_count": len(structures_found),
|
|
535
|
+
"analyzed_variables": local_symbols.getNumSymbols(),
|
|
536
|
+
"decompiler": "ghidra",
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
logger.info(
|
|
540
|
+
f"Recovered {len(structures_found)} structure(s) from {function.getName()}"
|
|
541
|
+
)
|
|
542
|
+
return result, metadata
|
|
543
|
+
|
|
544
|
+
finally:
|
|
545
|
+
decompiler.dispose()
|
|
546
|
+
|
|
547
|
+
except Exception as e:
|
|
548
|
+
logger.error(f"Ghidra structure recovery failed: {e}", exc_info=True)
|
|
549
|
+
raise
|
|
550
|
+
|
|
551
|
+
async def recover_structures_async(
|
|
552
|
+
self,
|
|
553
|
+
file_path: str,
|
|
554
|
+
function_address: str,
|
|
555
|
+
timeout: int | None = None,
|
|
556
|
+
) -> tuple[dict[str, Any], dict[str, Any]]:
|
|
557
|
+
"""Execute structure recovery asynchronously with dynamic timeout."""
|
|
558
|
+
return await asyncio.to_thread(
|
|
559
|
+
self.recover_structures, file_path, function_address, timeout
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
def get_version(self) -> str | None:
|
|
563
|
+
"""Get the installed Ghidra version."""
|
|
564
|
+
try:
|
|
565
|
+
self._ensure_jvm_started()
|
|
566
|
+
from ghidra import framework
|
|
567
|
+
|
|
568
|
+
version = framework.Application.getApplicationVersion()
|
|
569
|
+
return str(version)
|
|
570
|
+
except Exception:
|
|
571
|
+
return None
|
|
572
|
+
|
|
573
|
+
def close_all(self) -> None:
|
|
574
|
+
"""Close all cached projects and clean up resources."""
|
|
575
|
+
with self._project_lock:
|
|
576
|
+
for file_path, (_program, _flat_api, ctx) in list(self._projects.items()):
|
|
577
|
+
try:
|
|
578
|
+
ctx.__exit__(None, None, None)
|
|
579
|
+
except Exception as e:
|
|
580
|
+
logger.warning(f"Error closing project {file_path}: {e}")
|
|
581
|
+
self._projects.clear()
|
|
582
|
+
logger.info("All Ghidra projects closed")
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
# Global singleton instance
|
|
586
|
+
ghidra_service = GhidraService()
|
|
587
|
+
|
|
588
|
+
|
|
589
|
+
# Legacy compatibility aliases (deprecated - will be removed in future version)
|
|
590
|
+
def ensure_ghidra_available() -> bool:
|
|
591
|
+
"""Check if Ghidra is available. (Deprecated: use ghidra_service.is_available())"""
|
|
592
|
+
return ghidra_service.is_available()
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
def decompile_function_with_ghidra(
|
|
596
|
+
file_path: Path, function_address: str, timeout: int = 300
|
|
597
|
+
) -> tuple[str, dict[str, Any]]:
|
|
598
|
+
"""
|
|
599
|
+
Decompile a function using Ghidra.
|
|
600
|
+
(Deprecated: use ghidra_service.decompile())
|
|
601
|
+
"""
|
|
602
|
+
return ghidra_service.decompile(str(file_path), function_address, timeout)
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
def recover_structures_with_ghidra(
|
|
606
|
+
file_path: Path, function_address: str, timeout: int = 600, skip_full_analysis: bool = True
|
|
607
|
+
) -> tuple[dict[str, Any], dict[str, Any]]:
|
|
608
|
+
"""
|
|
609
|
+
Recover structures using Ghidra.
|
|
610
|
+
|
|
611
|
+
Args:
|
|
612
|
+
file_path: Path to the binary file
|
|
613
|
+
function_address: Function address or name
|
|
614
|
+
timeout: Timeout in seconds
|
|
615
|
+
skip_full_analysis: Skip full binary analysis for faster startup (default True)
|
|
616
|
+
|
|
617
|
+
(Deprecated: use ghidra_service.recover_structures())
|
|
618
|
+
"""
|
|
619
|
+
return ghidra_service.recover_structures(
|
|
620
|
+
str(file_path), function_address, timeout, skip_full_analysis
|
|
621
|
+
)
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
def get_ghidra_version() -> str | None:
|
|
625
|
+
"""
|
|
626
|
+
Get Ghidra version.
|
|
627
|
+
(Deprecated: use ghidra_service.get_version())
|
|
628
|
+
"""
|
|
629
|
+
return ghidra_service.get_version()
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
# Also provide GhidraManager alias for backward compatibility
|
|
633
|
+
class GhidraManager(GhidraService):
|
|
634
|
+
"""
|
|
635
|
+
Legacy alias for GhidraService.
|
|
636
|
+
(Deprecated: use GhidraService or ghidra_service directly)
|
|
637
|
+
"""
|
|
638
|
+
|
|
639
|
+
pass
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
ghidra_manager = ghidra_service
|