iflow-mcp_developermode-korea_reversecore-mcp 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/METADATA +543 -0
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/RECORD +79 -0
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/WHEEL +5 -0
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/entry_points.txt +2 -0
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/licenses/LICENSE +21 -0
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/top_level.txt +1 -0
- reversecore_mcp/__init__.py +9 -0
- reversecore_mcp/core/__init__.py +78 -0
- reversecore_mcp/core/audit.py +101 -0
- reversecore_mcp/core/binary_cache.py +138 -0
- reversecore_mcp/core/command_spec.py +357 -0
- reversecore_mcp/core/config.py +432 -0
- reversecore_mcp/core/container.py +288 -0
- reversecore_mcp/core/decorators.py +152 -0
- reversecore_mcp/core/error_formatting.py +93 -0
- reversecore_mcp/core/error_handling.py +142 -0
- reversecore_mcp/core/evidence.py +229 -0
- reversecore_mcp/core/exceptions.py +296 -0
- reversecore_mcp/core/execution.py +240 -0
- reversecore_mcp/core/ghidra.py +642 -0
- reversecore_mcp/core/ghidra_helper.py +481 -0
- reversecore_mcp/core/ghidra_manager.py +234 -0
- reversecore_mcp/core/json_utils.py +131 -0
- reversecore_mcp/core/loader.py +73 -0
- reversecore_mcp/core/logging_config.py +206 -0
- reversecore_mcp/core/memory.py +721 -0
- reversecore_mcp/core/metrics.py +198 -0
- reversecore_mcp/core/mitre_mapper.py +365 -0
- reversecore_mcp/core/plugin.py +45 -0
- reversecore_mcp/core/r2_helpers.py +404 -0
- reversecore_mcp/core/r2_pool.py +403 -0
- reversecore_mcp/core/report_generator.py +268 -0
- reversecore_mcp/core/resilience.py +252 -0
- reversecore_mcp/core/resource_manager.py +169 -0
- reversecore_mcp/core/result.py +132 -0
- reversecore_mcp/core/security.py +213 -0
- reversecore_mcp/core/validators.py +238 -0
- reversecore_mcp/dashboard/__init__.py +221 -0
- reversecore_mcp/prompts/__init__.py +56 -0
- reversecore_mcp/prompts/common.py +24 -0
- reversecore_mcp/prompts/game.py +280 -0
- reversecore_mcp/prompts/malware.py +1219 -0
- reversecore_mcp/prompts/report.py +150 -0
- reversecore_mcp/prompts/security.py +136 -0
- reversecore_mcp/resources.py +329 -0
- reversecore_mcp/server.py +727 -0
- reversecore_mcp/tools/__init__.py +49 -0
- reversecore_mcp/tools/analysis/__init__.py +74 -0
- reversecore_mcp/tools/analysis/capa_tools.py +215 -0
- reversecore_mcp/tools/analysis/die_tools.py +180 -0
- reversecore_mcp/tools/analysis/diff_tools.py +643 -0
- reversecore_mcp/tools/analysis/lief_tools.py +272 -0
- reversecore_mcp/tools/analysis/signature_tools.py +591 -0
- reversecore_mcp/tools/analysis/static_analysis.py +479 -0
- reversecore_mcp/tools/common/__init__.py +58 -0
- reversecore_mcp/tools/common/file_operations.py +352 -0
- reversecore_mcp/tools/common/memory_tools.py +516 -0
- reversecore_mcp/tools/common/patch_explainer.py +230 -0
- reversecore_mcp/tools/common/server_tools.py +115 -0
- reversecore_mcp/tools/ghidra/__init__.py +19 -0
- reversecore_mcp/tools/ghidra/decompilation.py +975 -0
- reversecore_mcp/tools/ghidra/ghidra_tools.py +1052 -0
- reversecore_mcp/tools/malware/__init__.py +61 -0
- reversecore_mcp/tools/malware/adaptive_vaccine.py +579 -0
- reversecore_mcp/tools/malware/dormant_detector.py +756 -0
- reversecore_mcp/tools/malware/ioc_tools.py +228 -0
- reversecore_mcp/tools/malware/vulnerability_hunter.py +519 -0
- reversecore_mcp/tools/malware/yara_tools.py +214 -0
- reversecore_mcp/tools/patch_explainer.py +19 -0
- reversecore_mcp/tools/radare2/__init__.py +13 -0
- reversecore_mcp/tools/radare2/r2_analysis.py +972 -0
- reversecore_mcp/tools/radare2/r2_session.py +376 -0
- reversecore_mcp/tools/radare2/radare2_mcp_tools.py +1183 -0
- reversecore_mcp/tools/report/__init__.py +4 -0
- reversecore_mcp/tools/report/email.py +82 -0
- reversecore_mcp/tools/report/report_mcp_tools.py +344 -0
- reversecore_mcp/tools/report/report_tools.py +1076 -0
- reversecore_mcp/tools/report/session.py +194 -0
- reversecore_mcp/tools/report_tools.py +11 -0
|
@@ -0,0 +1,756 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dormant Detector: Hybrid Reverse Engineering Tool.
|
|
3
|
+
|
|
4
|
+
This tool combines static analysis and partial emulation to detect hidden malicious behaviors
|
|
5
|
+
(Logic Bombs, Dormant Malware) that are often missed by traditional dynamic analysis.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import re
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from async_lru import alru_cache
|
|
14
|
+
from fastmcp import Context, FastMCP
|
|
15
|
+
|
|
16
|
+
from reversecore_mcp.core import json_utils as json # Use optimized JSON (3-5x faster)
|
|
17
|
+
try:
|
|
18
|
+
import lief
|
|
19
|
+
except ImportError:
|
|
20
|
+
lief = None
|
|
21
|
+
|
|
22
|
+
from reversecore_mcp.core.decorators import log_execution
|
|
23
|
+
from reversecore_mcp.core.error_handling import handle_tool_errors
|
|
24
|
+
from reversecore_mcp.core.exceptions import ValidationError
|
|
25
|
+
from reversecore_mcp.core.execution import execute_subprocess_async
|
|
26
|
+
from reversecore_mcp.core.logging_config import get_logger
|
|
27
|
+
from reversecore_mcp.core.metrics import track_metrics
|
|
28
|
+
from reversecore_mcp.core.r2_helpers import calculate_dynamic_timeout
|
|
29
|
+
from reversecore_mcp.core.result import ToolResult, failure, success
|
|
30
|
+
from reversecore_mcp.core.security import validate_file_path
|
|
31
|
+
|
|
32
|
+
logger = get_logger(__name__)
|
|
33
|
+
|
|
34
|
+
# OPTIMIZATION: Pre-compile regex patterns for identifier validation
|
|
35
|
+
_HEX_ADDRESS_PATTERN = re.compile(r"^0x[0-9a-fA-F]+$")
|
|
36
|
+
_SYMBOL_PATTERN = re.compile(r"^sym\.[a-zA-Z0-9_\.]+$")
|
|
37
|
+
_FUNCTION_NAME_PATTERN = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]*$")
|
|
38
|
+
_REG_PATTERN = re.compile(r"^[a-z0-9]+$")
|
|
39
|
+
_VALUE_PATTERN = re.compile(r"^(0x[0-9a-fA-F]+|\d+)$")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _extract_json_safely(output: str) -> Any | None:
|
|
43
|
+
"""Extract JSON from radare2 output using robust state-machine parser.
|
|
44
|
+
|
|
45
|
+
Delegates to r2_helpers.parse_json_output which uses O(n) algorithm
|
|
46
|
+
instead of fragile regex patterns.
|
|
47
|
+
"""
|
|
48
|
+
if not output or not output.strip():
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
from reversecore_mcp.core.r2_helpers import parse_json_output
|
|
53
|
+
return parse_json_output(output)
|
|
54
|
+
except (json.JSONDecodeError, Exception) as e:
|
|
55
|
+
logger.warning(f"Failed to extract valid JSON from radare2 output: {e}")
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _validate_r2_identifier(identifier: str) -> str:
|
|
60
|
+
"""Validate and sanitize radare2 function/address identifier."""
|
|
61
|
+
# OPTIMIZATION: Use pre-compiled patterns (faster)
|
|
62
|
+
# Allow: hex addresses (0x...), symbols (sym.*), function names
|
|
63
|
+
if (_HEX_ADDRESS_PATTERN.match(identifier) or
|
|
64
|
+
_SYMBOL_PATTERN.match(identifier) or
|
|
65
|
+
_FUNCTION_NAME_PATTERN.match(identifier)):
|
|
66
|
+
return identifier
|
|
67
|
+
|
|
68
|
+
raise ValidationError(
|
|
69
|
+
f"Invalid radare2 identifier: '{identifier}'. "
|
|
70
|
+
"Must be hex address (0x...) or valid symbol name."
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _check_game_context(file_path: Path) -> dict[str, Any]:
|
|
76
|
+
"""
|
|
77
|
+
Check if the file is likely a game client to reduce false positives.
|
|
78
|
+
|
|
79
|
+
Checks for:
|
|
80
|
+
1. Game-related imported libraries (D3D, OpenGL, FMOD, Unity, etc.)
|
|
81
|
+
2. Digital signatures from known game publishers.
|
|
82
|
+
"""
|
|
83
|
+
context = {"is_game": False, "score": 0, "indicators": []}
|
|
84
|
+
|
|
85
|
+
if not lief:
|
|
86
|
+
return context
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
binary = lief.parse(str(file_path))
|
|
90
|
+
if not binary:
|
|
91
|
+
return context
|
|
92
|
+
|
|
93
|
+
# 1. Check Libraries
|
|
94
|
+
game_libs = {
|
|
95
|
+
"d3d", "dxgi", "opengl", "vulkan", "fmod", "wwise",
|
|
96
|
+
"unity", "unreal", "physx", "steam_api", "galaxy",
|
|
97
|
+
"battleye", "easyanticheat", "mono", "discord"
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
# Binary.libraries is available for PE, ELF, MachO in LIEF
|
|
101
|
+
if hasattr(binary, "libraries"):
|
|
102
|
+
for lib in binary.libraries:
|
|
103
|
+
lib_lower = lib.lower()
|
|
104
|
+
if any(g in lib_lower for g in game_libs):
|
|
105
|
+
context["indicators"].append(f"Imported Game Lib: {lib}")
|
|
106
|
+
context["score"] += 10
|
|
107
|
+
|
|
108
|
+
# 2. Check Signature (PE specific)
|
|
109
|
+
if hasattr(binary, "signatures") and binary.signatures:
|
|
110
|
+
for sig in binary.signatures:
|
|
111
|
+
# Basic check for trusted publishers in signer info
|
|
112
|
+
# LIEF structure varies by version, handling broadly
|
|
113
|
+
try:
|
|
114
|
+
if hasattr(sig, "signers"):
|
|
115
|
+
for signer in sig.signers:
|
|
116
|
+
# Verify issuer string
|
|
117
|
+
issuer = str(signer.issuer).lower()
|
|
118
|
+
trusted = ["blizzard", "electronic arts", "ubisoft", "valve",
|
|
119
|
+
"unity", "epic games", "riot games", "nexon", "ncsoft"]
|
|
120
|
+
if any(t in issuer for t in trusted):
|
|
121
|
+
context["indicators"].append(f"Trusted Publisher: {signer.issuer}")
|
|
122
|
+
context["score"] += 50
|
|
123
|
+
except Exception:
|
|
124
|
+
pass
|
|
125
|
+
|
|
126
|
+
# 3. Check for specific sections (e.g. Unity resS)
|
|
127
|
+
if hasattr(binary, "sections"):
|
|
128
|
+
for section in binary.sections:
|
|
129
|
+
if "ress" in section.name.lower() or "il2cpp" in section.name.lower():
|
|
130
|
+
context["indicators"].append(f"Game Section: {section.name}")
|
|
131
|
+
context["score"] += 10
|
|
132
|
+
|
|
133
|
+
except Exception as e:
|
|
134
|
+
logger.debug(f"Game context check failed: {e}")
|
|
135
|
+
|
|
136
|
+
if context["score"] >= 20:
|
|
137
|
+
context["is_game"] = True
|
|
138
|
+
|
|
139
|
+
return context
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def register_dormant_detector(mcp: FastMCP) -> None:
|
|
143
|
+
"""Register the Dormant Detector tool with the FastMCP server."""
|
|
144
|
+
mcp.tool(dormant_detector)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _get_file_cache_key(file_path: str) -> str:
|
|
148
|
+
"""Generate a cache key based on file path and modification time.
|
|
149
|
+
|
|
150
|
+
This ensures cache invalidation when the file is modified.
|
|
151
|
+
"""
|
|
152
|
+
try:
|
|
153
|
+
stat = os.stat(file_path)
|
|
154
|
+
return f"{file_path}:{stat.st_mtime}:{stat.st_size}"
|
|
155
|
+
except OSError:
|
|
156
|
+
# If file doesn't exist or can't be accessed, use path only
|
|
157
|
+
return file_path
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
@alru_cache(maxsize=64, ttl=300) # Cache for 5 minutes, max 64 entries
|
|
161
|
+
async def _run_r2_cmd_cached(
|
|
162
|
+
cache_key: str, file_path: str, cmd: str, timeout: int | None = None
|
|
163
|
+
) -> str:
|
|
164
|
+
"""Cached helper to run a single radare2 command.
|
|
165
|
+
|
|
166
|
+
The cache_key includes file modification time for automatic invalidation.
|
|
167
|
+
Uses dynamic timeout based on file size if timeout is not specified.
|
|
168
|
+
"""
|
|
169
|
+
# Calculate dynamic timeout if not provided
|
|
170
|
+
effective_timeout = (
|
|
171
|
+
timeout if timeout else calculate_dynamic_timeout(file_path, base_timeout=30)
|
|
172
|
+
)
|
|
173
|
+
full_cmd = ["radare2", "-q", "-c", cmd, str(file_path)]
|
|
174
|
+
output, _ = await execute_subprocess_async(full_cmd, timeout=effective_timeout)
|
|
175
|
+
return output
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
async def _run_r2_cmd(
|
|
179
|
+
file_path: str, cmd: str, timeout: int | None = None, use_cache: bool = True
|
|
180
|
+
) -> str:
|
|
181
|
+
"""Helper to run a single radare2 command with optional caching.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
file_path: Path to the binary file
|
|
185
|
+
cmd: Radare2 command to execute
|
|
186
|
+
timeout: Command timeout in seconds (uses dynamic timeout if None)
|
|
187
|
+
use_cache: Whether to use caching (default: True)
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
Command output as string
|
|
191
|
+
"""
|
|
192
|
+
# Calculate dynamic timeout based on file size
|
|
193
|
+
effective_timeout = (
|
|
194
|
+
timeout if timeout else calculate_dynamic_timeout(file_path, base_timeout=30)
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
if use_cache:
|
|
198
|
+
cache_key = _get_file_cache_key(file_path)
|
|
199
|
+
return await _run_r2_cmd_cached(cache_key, file_path, cmd, effective_timeout)
|
|
200
|
+
|
|
201
|
+
# Direct execution without caching (for commands with side effects)
|
|
202
|
+
full_cmd = ["radare2", "-q", "-c", cmd, str(file_path)]
|
|
203
|
+
output, _ = await execute_subprocess_async(full_cmd, timeout=effective_timeout)
|
|
204
|
+
return output
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
@log_execution(tool_name="dormant_detector")
|
|
208
|
+
@track_metrics("dormant_detector")
|
|
209
|
+
@handle_tool_errors
|
|
210
|
+
async def dormant_detector(
|
|
211
|
+
file_path: str,
|
|
212
|
+
focus_function: str | None = None,
|
|
213
|
+
hypothesis: dict[str, Any] | None = None,
|
|
214
|
+
timeout: int = 300,
|
|
215
|
+
ctx: Context = None,
|
|
216
|
+
) -> ToolResult:
|
|
217
|
+
"""
|
|
218
|
+
Detect hidden malicious behaviors using static analysis + emulation.
|
|
219
|
+
|
|
220
|
+
This tool performs a hybrid analysis:
|
|
221
|
+
1. **Scan**: Finds "Orphan Functions" (not called by main) and "Suspicious Logic" (magic value checks).
|
|
222
|
+
2. **Hypothesize**: (Optional) If `hypothesis` is provided, it sets up emulation conditions.
|
|
223
|
+
3. **Emulate**: (Optional) If `focus_function` is provided, it emulates that specific function
|
|
224
|
+
to verify the hypothesis (e.g., "If register eax=0x1234, does it call system()?").
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
file_path: Path to the binary.
|
|
228
|
+
focus_function: (Optional) Name or address of a specific function to emulate.
|
|
229
|
+
hypothesis: (Optional) Dictionary defining emulation parameters:
|
|
230
|
+
{
|
|
231
|
+
"registers": {"eax": "0x1234", "zf": "1"},
|
|
232
|
+
"args": ["arg1", "arg2"],
|
|
233
|
+
"max_steps": 100
|
|
234
|
+
}
|
|
235
|
+
timeout: Execution timeout.
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
ToolResult containing suspicious candidates or emulation results.
|
|
239
|
+
"""
|
|
240
|
+
validated_path = validate_file_path(file_path)
|
|
241
|
+
|
|
242
|
+
# 1. If focus_function is provided, run emulation (Verification Phase)
|
|
243
|
+
if focus_function and hypothesis:
|
|
244
|
+
if ctx:
|
|
245
|
+
await ctx.info(f" Dormant Detector: Emulating {focus_function} with hypothesis...")
|
|
246
|
+
return await _verify_hypothesis_with_emulation(
|
|
247
|
+
validated_path, focus_function, hypothesis, timeout
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
# 2. Otherwise, run full scan (Discovery Phase)
|
|
251
|
+
if ctx:
|
|
252
|
+
await ctx.info(" Dormant Detector: Scanning for suspicious logic...")
|
|
253
|
+
|
|
254
|
+
# Run analysis
|
|
255
|
+
# We chain commands: aaa (analyze), aflj (list functions json)
|
|
256
|
+
# Note: 'aaa' can be slow on large binaries. Use 'aa' for faster but less complete analysis.
|
|
257
|
+
# For large binaries (>5MB), use lighter analysis
|
|
258
|
+
file_size = os.path.getsize(validated_path)
|
|
259
|
+
analysis_cmd = "aa" if file_size > 5_000_000 else "aaa"
|
|
260
|
+
|
|
261
|
+
cmd = f"{analysis_cmd}; aflj"
|
|
262
|
+
output = await _run_r2_cmd(validated_path, cmd, timeout=timeout, use_cache=False)
|
|
263
|
+
|
|
264
|
+
# Debug logging for troubleshooting
|
|
265
|
+
logger.debug(f"r2 output length: {len(output)}, first 500 chars: {output[:500]}")
|
|
266
|
+
|
|
267
|
+
# Parse functions with safe JSON extraction
|
|
268
|
+
functions = _extract_json_safely(output)
|
|
269
|
+
|
|
270
|
+
# Handle failed JSON extraction (not empty list which is valid)
|
|
271
|
+
if functions is None:
|
|
272
|
+
logger.warning(f"Could not extract JSON from r2 output. Output preview: {output[:300]}...")
|
|
273
|
+
# Try a fallback: run aflj separately with more time
|
|
274
|
+
fallback_output = await _run_r2_cmd(validated_path, "aflj", timeout=60, use_cache=False)
|
|
275
|
+
functions = _extract_json_safely(fallback_output)
|
|
276
|
+
|
|
277
|
+
# If still None after fallback, return error
|
|
278
|
+
if functions is None:
|
|
279
|
+
logger.error(
|
|
280
|
+
f"Failed to parse JSON after fallback. Fallback output: {fallback_output[:200]}..."
|
|
281
|
+
)
|
|
282
|
+
return failure(
|
|
283
|
+
"PARSE_ERROR",
|
|
284
|
+
"Failed to parse function list from radare2. "
|
|
285
|
+
"Output may be corrupted or analysis failed.",
|
|
286
|
+
hint="Try increasing timeout or using a simpler analysis mode.",
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
# Validate that functions is a list (empty list is valid for stripped binaries)
|
|
290
|
+
if not isinstance(functions, list):
|
|
291
|
+
logger.error(
|
|
292
|
+
f"Invalid function list format (type: {type(functions)}). Output preview: {output[:200]}..."
|
|
293
|
+
)
|
|
294
|
+
return failure(
|
|
295
|
+
"PARSE_ERROR",
|
|
296
|
+
"Failed to parse function list from radare2. "
|
|
297
|
+
"Output may be corrupted or analysis failed.",
|
|
298
|
+
hint="Try increasing timeout or using a simpler analysis mode.",
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
if not functions:
|
|
302
|
+
logger.info("No functions found in binary (possibly stripped or small)")
|
|
303
|
+
|
|
304
|
+
# Find orphans and suspicious logic
|
|
305
|
+
orphans = []
|
|
306
|
+
suspicious_logic = []
|
|
307
|
+
|
|
308
|
+
# We need xrefs to find orphans. 'aflj' doesn't give xrefs count reliably in all versions.
|
|
309
|
+
# We'll do a quick pass.
|
|
310
|
+
# Actually, 'aflj' has 'noreturn', 'stack', etc.
|
|
311
|
+
# Let's use a heuristic: if name is not 'main' and not 'entry' and not exported,
|
|
312
|
+
# and we can't easily see xrefs (we'd need 'ax' for all, which is slow).
|
|
313
|
+
|
|
314
|
+
# Better approach for orphans:
|
|
315
|
+
# 1. Get all functions.
|
|
316
|
+
# 2. Get entry points (ie).
|
|
317
|
+
# 3. For a subset of "interesting" functions (large size, specific strings), check xrefs.
|
|
318
|
+
|
|
319
|
+
# For this MVP, let's focus on "Suspicious Logic" (Magic Values) in ALL functions.
|
|
320
|
+
# We'll search for 'cmp' instructions with immediate values.
|
|
321
|
+
|
|
322
|
+
# Search for magic value checks: '/m' command in r2 finds "magic" signatures, but we want code logic.
|
|
323
|
+
# We'll search for instructions like 'cmp reg, 0x...'
|
|
324
|
+
# cmd: "aaa; /aj cmp,0x" (search for 'cmp' instructions with '0x' operand)
|
|
325
|
+
# This is a bit raw.
|
|
326
|
+
|
|
327
|
+
# Let's implement `find_orphan_functions` and `identify_conditional_paths` properly.
|
|
328
|
+
|
|
329
|
+
if ctx:
|
|
330
|
+
await ctx.report_progress(30, 100)
|
|
331
|
+
await ctx.info(" Dormant Detector: Identifying orphan functions...")
|
|
332
|
+
|
|
333
|
+
orphans = await _find_orphan_functions(validated_path, functions)
|
|
334
|
+
|
|
335
|
+
if ctx:
|
|
336
|
+
await ctx.report_progress(60, 100)
|
|
337
|
+
await ctx.info(" Dormant Detector: Analyzing conditional logic...")
|
|
338
|
+
|
|
339
|
+
suspicious_logic = await _identify_conditional_paths(
|
|
340
|
+
validated_path, functions[:20], ctx
|
|
341
|
+
) # Limit to top 20 for speed in MVP
|
|
342
|
+
|
|
343
|
+
if ctx:
|
|
344
|
+
await ctx.report_progress(100, 100)
|
|
345
|
+
|
|
346
|
+
# --- Context Awareness: Game Client Check ---
|
|
347
|
+
game_ctx = _check_game_context(validated_path)
|
|
348
|
+
if game_ctx["is_game"]:
|
|
349
|
+
if ctx:
|
|
350
|
+
await ctx.info(f"🎮 Context: Detected Game Client ({', '.join(game_ctx['indicators'][:2])}). Adjusting sensitivity.")
|
|
351
|
+
|
|
352
|
+
# Suppress false positives for games
|
|
353
|
+
# 1. Games often have main loops with time checks (GetTickCount) -> Downgrade confidence
|
|
354
|
+
# 2. Anti-cheat checks (IsDebuggerPresent) are "normal" for games -> Mark as defensive
|
|
355
|
+
|
|
356
|
+
for item in suspicious_logic:
|
|
357
|
+
reason = item.get("reason", "").lower()
|
|
358
|
+
if "time-based" in reason:
|
|
359
|
+
# Games loop forever; time checks are normal.
|
|
360
|
+
# If confidence was medium, downgrade to low or suppress
|
|
361
|
+
item["confidence"] = "low"
|
|
362
|
+
item["note"] = "Common in game loops (suppressed)"
|
|
363
|
+
|
|
364
|
+
if "environment-based" in reason and "debugger" in reason:
|
|
365
|
+
# Anti-cheat behavior
|
|
366
|
+
item["confidence"] = "info"
|
|
367
|
+
item["reason"] += " (Likely Anti-Cheat)"
|
|
368
|
+
|
|
369
|
+
return success(
|
|
370
|
+
{
|
|
371
|
+
"scan_type": "discovery",
|
|
372
|
+
"context": game_ctx,
|
|
373
|
+
"orphan_functions": orphans,
|
|
374
|
+
"suspicious_logic": suspicious_logic,
|
|
375
|
+
"description": "Found potential logic bombs. Use 'focus_function' and 'hypothesis' to verify.",
|
|
376
|
+
}
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def _functions_to_tuple(functions: list[dict[str, Any]]) -> tuple:
|
|
381
|
+
"""Convert functions list to hashable tuple for caching."""
|
|
382
|
+
return tuple(
|
|
383
|
+
(
|
|
384
|
+
f.get("name", ""),
|
|
385
|
+
f.get("offset", 0),
|
|
386
|
+
f.get("size", 0),
|
|
387
|
+
tuple(f.get("codexrefs", []) or []),
|
|
388
|
+
)
|
|
389
|
+
for f in functions
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
@alru_cache(maxsize=32, ttl=300)
|
|
394
|
+
async def _find_orphan_functions_cached(
|
|
395
|
+
file_path_str: str, functions_tuple: tuple
|
|
396
|
+
) -> tuple[dict[str, Any], ...]:
|
|
397
|
+
"""Cached implementation of orphan function detection."""
|
|
398
|
+
orphans = []
|
|
399
|
+
|
|
400
|
+
for func_data in functions_tuple:
|
|
401
|
+
name, offset, size, codexrefs = func_data
|
|
402
|
+
|
|
403
|
+
if name.startswith("sym.imp"): # Skip imports
|
|
404
|
+
continue
|
|
405
|
+
if "main" in name or "entry" in name:
|
|
406
|
+
continue
|
|
407
|
+
|
|
408
|
+
# Check refs - only care about non-trivial functions
|
|
409
|
+
if not codexrefs and size > 50:
|
|
410
|
+
orphans.append(
|
|
411
|
+
{
|
|
412
|
+
"name": name,
|
|
413
|
+
"address": hex(offset),
|
|
414
|
+
"size": size,
|
|
415
|
+
"reason": "No code cross-references found (potential dormant code)",
|
|
416
|
+
}
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
return tuple(orphans)
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
async def _find_orphan_functions(
|
|
423
|
+
file_path: Path, functions: list[dict[str, Any]]
|
|
424
|
+
) -> list[dict[str, Any]]:
|
|
425
|
+
"""Identify functions with no direct XREFs (potential dead code/backdoors).
|
|
426
|
+
|
|
427
|
+
Uses caching to avoid recomputing orphan analysis for the same functions.
|
|
428
|
+
"""
|
|
429
|
+
# Convert to hashable format for caching
|
|
430
|
+
functions_tuple = _functions_to_tuple(functions)
|
|
431
|
+
result = await _find_orphan_functions_cached(str(file_path), functions_tuple)
|
|
432
|
+
return list(result)
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
async def _identify_conditional_paths(
|
|
436
|
+
file_path: Path, functions: list[dict[str, Any]], ctx: Context = None
|
|
437
|
+
) -> list[dict[str, Any]]:
|
|
438
|
+
"""Identify functions with suspicious conditional logic (Magic Values).
|
|
439
|
+
|
|
440
|
+
Enhanced heuristics (v3.1):
|
|
441
|
+
- Entropy filter: Skip simple integers (0-100, powers of 2)
|
|
442
|
+
- Sequence detection: Flag time/env checks before cmp
|
|
443
|
+
- Known magic values: Prioritize known malware signatures
|
|
444
|
+
"""
|
|
445
|
+
suspicious = []
|
|
446
|
+
|
|
447
|
+
# Known magic values used by malware (high confidence)
|
|
448
|
+
KNOWN_MAGIC_VALUES = {
|
|
449
|
+
0xCAFEBABE, 0xDEADBEEF, 0xBAADF00D, 0xFEEDFACE,
|
|
450
|
+
0x8BADF00D, 0xDEADC0DE, 0xC0DEBABE, 0xFACEFEED,
|
|
451
|
+
0x1BADB002, # Multiboot magic
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
# Time-related API calls that often precede logic bombs
|
|
455
|
+
TIME_APIS = {
|
|
456
|
+
"gettickcount", "queryperformancecounter", "time", "getsystemtime",
|
|
457
|
+
"getlocaltime", "gettimeofday", "clock", "difftime", "mktime",
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
# Environment check APIs
|
|
461
|
+
ENV_APIS = {
|
|
462
|
+
"getenv", "getenvironmentvariable", "getcomputername",
|
|
463
|
+
"getusername", "gethostname", "isdebuggerpresent",
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
def _is_simple_value(value: int) -> bool:
|
|
467
|
+
"""Check if value is a simple integer unlikely to be a magic value."""
|
|
468
|
+
# Skip small values (loop counters, array indices)
|
|
469
|
+
if 0 <= value <= 100:
|
|
470
|
+
return True
|
|
471
|
+
# Skip powers of 2 (common buffer sizes, flags)
|
|
472
|
+
if value > 0 and (value & (value - 1)) == 0:
|
|
473
|
+
return True
|
|
474
|
+
# Skip common bitmasks
|
|
475
|
+
if value in {0xFF, 0xFFFF, 0xFFFFFFFF, 0x7FFFFFFF}:
|
|
476
|
+
return True
|
|
477
|
+
return False
|
|
478
|
+
|
|
479
|
+
def _calculate_entropy_score(value: int) -> float:
|
|
480
|
+
"""Calculate entropy-like score for a hex value. Higher = more suspicious."""
|
|
481
|
+
if value <= 0:
|
|
482
|
+
return 0.0
|
|
483
|
+
hex_str = f"{value:x}"
|
|
484
|
+
# Count unique hex digits
|
|
485
|
+
unique_chars = len(set(hex_str))
|
|
486
|
+
# Normalize by length
|
|
487
|
+
return unique_chars / max(len(hex_str), 1)
|
|
488
|
+
|
|
489
|
+
def _extract_hex_value(val_str: str) -> int | None:
|
|
490
|
+
"""Extract integer value from hex string."""
|
|
491
|
+
try:
|
|
492
|
+
val_str = val_str.strip()
|
|
493
|
+
if val_str.startswith("0x"):
|
|
494
|
+
return int(val_str, 16)
|
|
495
|
+
elif val_str.isdigit():
|
|
496
|
+
return int(val_str)
|
|
497
|
+
except ValueError:
|
|
498
|
+
pass
|
|
499
|
+
return None
|
|
500
|
+
|
|
501
|
+
async def _verify_reachability_with_esil(
|
|
502
|
+
start_addr: int | None, target_addr: str, max_steps: int = 100
|
|
503
|
+
) -> str:
|
|
504
|
+
"""Verify if target address is reachable from start address using ESIL emulation."""
|
|
505
|
+
if not start_addr:
|
|
506
|
+
return "unconfirmed (no start address)"
|
|
507
|
+
|
|
508
|
+
# Initialize ESIL, set PC, step loops
|
|
509
|
+
# aei: init, aeim: init memory stack, aes: step
|
|
510
|
+
target_int = int(target_addr, 16)
|
|
511
|
+
|
|
512
|
+
# Emulation command:
|
|
513
|
+
# aei; aeim; s {start}; loop: aes; ?rip=={target} ?BREAK; ?rip==0 ?BREAK;
|
|
514
|
+
# This is hard to script purely in one-liner without pipes.
|
|
515
|
+
# Instead we use 'aetr' (trace) or simple stepping.
|
|
516
|
+
# Using 'aes {steps}' and checking trace is safer.
|
|
517
|
+
|
|
518
|
+
try:
|
|
519
|
+
# cmd = f"aei; aeim; s {start_addr}; aes {max_steps}; aer rip" # x86
|
|
520
|
+
# Need arch-agnostic PC check. 'aer PC' works in r2.
|
|
521
|
+
|
|
522
|
+
# Simple approach: Run N steps and check if we hit the address in trace
|
|
523
|
+
# 'aedt' (ESIL data trace) is too heavy.
|
|
524
|
+
# We'll rely on 'aes' and then check range? No.
|
|
525
|
+
|
|
526
|
+
# Let's try 'aec' (ESIL continue) until addr? 'aec' is continue until user data.
|
|
527
|
+
# Better: 'aesu {target}' (Step until address).
|
|
528
|
+
|
|
529
|
+
cmd = f"aei; aeim; s {start_addr}; aesu {target_addr} {max_steps}"
|
|
530
|
+
out = await _run_r2_cmd(file_path, cmd, timeout=10)
|
|
531
|
+
|
|
532
|
+
# Check PC
|
|
533
|
+
regs = await _run_r2_cmd(file_path, "aer PC", timeout=5)
|
|
534
|
+
try:
|
|
535
|
+
final_pc = int(regs.strip(), 16)
|
|
536
|
+
if final_pc == target_int:
|
|
537
|
+
return "verified (reachable)"
|
|
538
|
+
else:
|
|
539
|
+
return f"unconfirmed (stopped at {hex(final_pc)})"
|
|
540
|
+
except ValueError:
|
|
541
|
+
return "unconfirmed (register parse error)"
|
|
542
|
+
|
|
543
|
+
except Exception as e:
|
|
544
|
+
return f"unconfirmed (error: {str(e)})"
|
|
545
|
+
|
|
546
|
+
# Batch process functions for better performance
|
|
547
|
+
batch_size = 10
|
|
548
|
+
total_functions = len(functions)
|
|
549
|
+
for i in range(0, total_functions, batch_size):
|
|
550
|
+
if ctx:
|
|
551
|
+
await ctx.report_progress(60 + int((i / total_functions) * 30), 100)
|
|
552
|
+
|
|
553
|
+
batch = functions[i : i + batch_size]
|
|
554
|
+
|
|
555
|
+
# Create batch command
|
|
556
|
+
cmds = []
|
|
557
|
+
for func in batch:
|
|
558
|
+
addr = func.get("offset")
|
|
559
|
+
if addr:
|
|
560
|
+
cmds.append(f"pdfj @ {addr}")
|
|
561
|
+
|
|
562
|
+
if not cmds:
|
|
563
|
+
continue
|
|
564
|
+
|
|
565
|
+
# Execute batch command
|
|
566
|
+
batch_cmd = "; ".join(cmds)
|
|
567
|
+
try:
|
|
568
|
+
out = await _run_r2_cmd(file_path, batch_cmd, timeout=60)
|
|
569
|
+
|
|
570
|
+
# Parse each function's output
|
|
571
|
+
json_outputs = _JSON_OBJECT_PATTERN.findall(out)
|
|
572
|
+
|
|
573
|
+
for func, json_str in zip(batch, json_outputs, strict=False):
|
|
574
|
+
try:
|
|
575
|
+
func_data = json.loads(json_str)
|
|
576
|
+
ops = func_data.get("ops", [])
|
|
577
|
+
name = func.get("name")
|
|
578
|
+
func_start = func.get("offset")
|
|
579
|
+
|
|
580
|
+
# Track recent API calls for sequence detection
|
|
581
|
+
recent_api_calls: list[str] = []
|
|
582
|
+
|
|
583
|
+
for op in ops:
|
|
584
|
+
disasm = op.get("disasm", "").lower()
|
|
585
|
+
|
|
586
|
+
# Track call instructions for sequence detection
|
|
587
|
+
if "call" in disasm:
|
|
588
|
+
for api in TIME_APIS | ENV_APIS:
|
|
589
|
+
if api in disasm:
|
|
590
|
+
recent_api_calls.append(api)
|
|
591
|
+
# Keep only last 5 calls
|
|
592
|
+
if len(recent_api_calls) > 5:
|
|
593
|
+
recent_api_calls.pop(0)
|
|
594
|
+
|
|
595
|
+
# Detect cmp instructions with hex values
|
|
596
|
+
if "cmp" in disasm and "0x" in disasm:
|
|
597
|
+
args = disasm.split(",")
|
|
598
|
+
if len(args) > 1:
|
|
599
|
+
val_str = args[1].strip()
|
|
600
|
+
value = _extract_hex_value(val_str)
|
|
601
|
+
|
|
602
|
+
if value is None:
|
|
603
|
+
continue
|
|
604
|
+
|
|
605
|
+
# Skip simple values (false positive filter)
|
|
606
|
+
if _is_simple_value(value):
|
|
607
|
+
continue
|
|
608
|
+
|
|
609
|
+
# Determine suspicion level
|
|
610
|
+
reason = None
|
|
611
|
+
confidence = "low"
|
|
612
|
+
|
|
613
|
+
# High confidence: Known magic values
|
|
614
|
+
if value in KNOWN_MAGIC_VALUES:
|
|
615
|
+
reason = f"Known magic value {hex(value)} detected (high confidence)"
|
|
616
|
+
confidence = "high"
|
|
617
|
+
|
|
618
|
+
# Medium confidence: Time/Env API before cmp (sequence pattern)
|
|
619
|
+
elif any(api in TIME_APIS for api in recent_api_calls):
|
|
620
|
+
reason = f"Time-based trigger pattern: API call followed by cmp {hex(value)}"
|
|
621
|
+
confidence = "medium"
|
|
622
|
+
elif any(api in ENV_APIS for api in recent_api_calls):
|
|
623
|
+
reason = f"Environment-based trigger pattern: API call followed by cmp {hex(value)}"
|
|
624
|
+
confidence = "medium"
|
|
625
|
+
|
|
626
|
+
# Low confidence: High entropy value (8+ hex digits, varied chars)
|
|
627
|
+
elif len(val_str) >= 10: # 0x + 8 hex digits
|
|
628
|
+
entropy = _calculate_entropy_score(value)
|
|
629
|
+
if entropy > 0.5:
|
|
630
|
+
reason = f"High-entropy magic value {hex(value)} (entropy: {entropy:.2f})"
|
|
631
|
+
confidence = "low"
|
|
632
|
+
|
|
633
|
+
if reason:
|
|
634
|
+
# Perform ESIL verification for high/medium confidence
|
|
635
|
+
verification = "unconfirmed"
|
|
636
|
+
if confidence in ("high", "medium"):
|
|
637
|
+
target_offset = hex(op.get("offset", 0))
|
|
638
|
+
verification = await _verify_reachability_with_esil(
|
|
639
|
+
func_start, target_offset
|
|
640
|
+
)
|
|
641
|
+
if "reachable" in verification:
|
|
642
|
+
confidence = "very_high"
|
|
643
|
+
|
|
644
|
+
suspicious.append({
|
|
645
|
+
"function": name,
|
|
646
|
+
"address": hex(op.get("offset", 0)),
|
|
647
|
+
"instruction": op.get("disasm", ""),
|
|
648
|
+
"reason": reason,
|
|
649
|
+
"confidence": confidence,
|
|
650
|
+
"value": hex(value),
|
|
651
|
+
"verification": verification
|
|
652
|
+
})
|
|
653
|
+
|
|
654
|
+
except json.JSONDecodeError as e:
|
|
655
|
+
logger.warning(f"Failed to parse function disassembly: {e}")
|
|
656
|
+
continue
|
|
657
|
+
except Exception as e:
|
|
658
|
+
logger.warning(f"Failed to analyze batch: {e}")
|
|
659
|
+
continue
|
|
660
|
+
|
|
661
|
+
# Sort by confidence (high first)
|
|
662
|
+
confidence_order = {"very_high": 0, "high": 1, "medium": 2, "low": 3}
|
|
663
|
+
suspicious.sort(key=lambda x: confidence_order.get(x.get("confidence", "low"), 3))
|
|
664
|
+
|
|
665
|
+
return suspicious
|
|
666
|
+
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
async def _verify_hypothesis_with_emulation(
|
|
672
|
+
file_path: Path, function_name: str, hypothesis: dict[str, Any], timeout: int
|
|
673
|
+
) -> ToolResult:
|
|
674
|
+
"""
|
|
675
|
+
Verify a hypothesis using partial emulation (ESIL).
|
|
676
|
+
|
|
677
|
+
Hypothesis format:
|
|
678
|
+
{
|
|
679
|
+
"registers": {"eax": "0xCAFEBABE"},
|
|
680
|
+
"max_steps": 100
|
|
681
|
+
}
|
|
682
|
+
"""
|
|
683
|
+
# Validate function name to prevent command injection
|
|
684
|
+
try:
|
|
685
|
+
validated_func = _validate_r2_identifier(function_name)
|
|
686
|
+
except ValidationError as e:
|
|
687
|
+
return failure("VALIDATION_ERROR", str(e))
|
|
688
|
+
|
|
689
|
+
# Construct ESIL script
|
|
690
|
+
# 1. Initialize ESIL (aei)
|
|
691
|
+
# 2. Initialize Stack (aeim)
|
|
692
|
+
# 3. Seek to function (s <func>)
|
|
693
|
+
# 4. Set registers (aer <reg>=<val>)
|
|
694
|
+
# 5. Step (aes) and trace
|
|
695
|
+
|
|
696
|
+
regs = hypothesis.get("registers", {})
|
|
697
|
+
max_steps = min(hypothesis.get("max_steps", 50), 1000) # Cap at 1000 for safety
|
|
698
|
+
|
|
699
|
+
cmds = [
|
|
700
|
+
"aaa", # Analyze
|
|
701
|
+
"aei", # Init ESIL
|
|
702
|
+
"aeim", # Init Stack
|
|
703
|
+
f"s {validated_func}", # Seek to function (validated)
|
|
704
|
+
]
|
|
705
|
+
|
|
706
|
+
# Set registers (validate register names and values)
|
|
707
|
+
for reg, val in regs.items():
|
|
708
|
+
# OPTIMIZATION: Use pre-compiled pattern (faster)
|
|
709
|
+
if not _REG_PATTERN.match(reg.lower()):
|
|
710
|
+
logger.warning(f"Skipping invalid register name: {reg}")
|
|
711
|
+
continue
|
|
712
|
+
# OPTIMIZATION: Use pre-compiled pattern (faster)
|
|
713
|
+
if not _VALUE_PATTERN.match(str(val)):
|
|
714
|
+
logger.warning(f"Skipping invalid register value: {val}")
|
|
715
|
+
continue
|
|
716
|
+
cmds.append(f"aer {reg}={val}")
|
|
717
|
+
|
|
718
|
+
# Step and record
|
|
719
|
+
# We'll use a loop in r2 or just run 'aes' N times and print registers
|
|
720
|
+
# 'aes <steps>' runs N steps.
|
|
721
|
+
# We want to see if it reaches a certain state or calls a function.
|
|
722
|
+
# For this MVP, we'll run steps and return the final register state and visited addresses.
|
|
723
|
+
|
|
724
|
+
# Run N steps and print disassembly of current instruction
|
|
725
|
+
# 'aes <max_steps>; aerj' (run steps, then print registers json)
|
|
726
|
+
cmds.append(f"aes {max_steps}")
|
|
727
|
+
cmds.append("aerj") # Get registers
|
|
728
|
+
|
|
729
|
+
full_cmd = "; ".join(cmds)
|
|
730
|
+
output = await _run_r2_cmd(file_path, full_cmd, timeout=timeout)
|
|
731
|
+
|
|
732
|
+
# Parse result (last json is registers) with safe extraction
|
|
733
|
+
final_regs = _extract_json_safely(output)
|
|
734
|
+
|
|
735
|
+
if final_regs is None:
|
|
736
|
+
logger.error(f"Failed to parse emulation results. Output: {output[:500]}")
|
|
737
|
+
return failure(
|
|
738
|
+
"EMULATION_ERROR",
|
|
739
|
+
"Emulation completed but failed to parse register state. "
|
|
740
|
+
"The function may have crashed or radare2 output was corrupted.",
|
|
741
|
+
)
|
|
742
|
+
|
|
743
|
+
return success(
|
|
744
|
+
{
|
|
745
|
+
"status": "emulation_complete",
|
|
746
|
+
"steps_executed": max_steps,
|
|
747
|
+
"final_registers": final_regs,
|
|
748
|
+
"hypothesis_verification": "Check final_registers to see if expected state was reached.",
|
|
749
|
+
"raw_output_preview": output[:200] + "..." if len(output) > 200 else output,
|
|
750
|
+
}
|
|
751
|
+
)
|
|
752
|
+
|
|
753
|
+
|
|
754
|
+
# NOTE: Legacy aliases (ghost_trace, DormantDetectorPlugin) were removed in v1.0.0
|
|
755
|
+
# Use dormant_detector and MalwareToolsPlugin instead
|
|
756
|
+
|