iflow-mcp_developermode-korea_reversecore-mcp 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/METADATA +543 -0
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/RECORD +79 -0
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/WHEEL +5 -0
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/entry_points.txt +2 -0
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/licenses/LICENSE +21 -0
- iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/top_level.txt +1 -0
- reversecore_mcp/__init__.py +9 -0
- reversecore_mcp/core/__init__.py +78 -0
- reversecore_mcp/core/audit.py +101 -0
- reversecore_mcp/core/binary_cache.py +138 -0
- reversecore_mcp/core/command_spec.py +357 -0
- reversecore_mcp/core/config.py +432 -0
- reversecore_mcp/core/container.py +288 -0
- reversecore_mcp/core/decorators.py +152 -0
- reversecore_mcp/core/error_formatting.py +93 -0
- reversecore_mcp/core/error_handling.py +142 -0
- reversecore_mcp/core/evidence.py +229 -0
- reversecore_mcp/core/exceptions.py +296 -0
- reversecore_mcp/core/execution.py +240 -0
- reversecore_mcp/core/ghidra.py +642 -0
- reversecore_mcp/core/ghidra_helper.py +481 -0
- reversecore_mcp/core/ghidra_manager.py +234 -0
- reversecore_mcp/core/json_utils.py +131 -0
- reversecore_mcp/core/loader.py +73 -0
- reversecore_mcp/core/logging_config.py +206 -0
- reversecore_mcp/core/memory.py +721 -0
- reversecore_mcp/core/metrics.py +198 -0
- reversecore_mcp/core/mitre_mapper.py +365 -0
- reversecore_mcp/core/plugin.py +45 -0
- reversecore_mcp/core/r2_helpers.py +404 -0
- reversecore_mcp/core/r2_pool.py +403 -0
- reversecore_mcp/core/report_generator.py +268 -0
- reversecore_mcp/core/resilience.py +252 -0
- reversecore_mcp/core/resource_manager.py +169 -0
- reversecore_mcp/core/result.py +132 -0
- reversecore_mcp/core/security.py +213 -0
- reversecore_mcp/core/validators.py +238 -0
- reversecore_mcp/dashboard/__init__.py +221 -0
- reversecore_mcp/prompts/__init__.py +56 -0
- reversecore_mcp/prompts/common.py +24 -0
- reversecore_mcp/prompts/game.py +280 -0
- reversecore_mcp/prompts/malware.py +1219 -0
- reversecore_mcp/prompts/report.py +150 -0
- reversecore_mcp/prompts/security.py +136 -0
- reversecore_mcp/resources.py +329 -0
- reversecore_mcp/server.py +727 -0
- reversecore_mcp/tools/__init__.py +49 -0
- reversecore_mcp/tools/analysis/__init__.py +74 -0
- reversecore_mcp/tools/analysis/capa_tools.py +215 -0
- reversecore_mcp/tools/analysis/die_tools.py +180 -0
- reversecore_mcp/tools/analysis/diff_tools.py +643 -0
- reversecore_mcp/tools/analysis/lief_tools.py +272 -0
- reversecore_mcp/tools/analysis/signature_tools.py +591 -0
- reversecore_mcp/tools/analysis/static_analysis.py +479 -0
- reversecore_mcp/tools/common/__init__.py +58 -0
- reversecore_mcp/tools/common/file_operations.py +352 -0
- reversecore_mcp/tools/common/memory_tools.py +516 -0
- reversecore_mcp/tools/common/patch_explainer.py +230 -0
- reversecore_mcp/tools/common/server_tools.py +115 -0
- reversecore_mcp/tools/ghidra/__init__.py +19 -0
- reversecore_mcp/tools/ghidra/decompilation.py +975 -0
- reversecore_mcp/tools/ghidra/ghidra_tools.py +1052 -0
- reversecore_mcp/tools/malware/__init__.py +61 -0
- reversecore_mcp/tools/malware/adaptive_vaccine.py +579 -0
- reversecore_mcp/tools/malware/dormant_detector.py +756 -0
- reversecore_mcp/tools/malware/ioc_tools.py +228 -0
- reversecore_mcp/tools/malware/vulnerability_hunter.py +519 -0
- reversecore_mcp/tools/malware/yara_tools.py +214 -0
- reversecore_mcp/tools/patch_explainer.py +19 -0
- reversecore_mcp/tools/radare2/__init__.py +13 -0
- reversecore_mcp/tools/radare2/r2_analysis.py +972 -0
- reversecore_mcp/tools/radare2/r2_session.py +376 -0
- reversecore_mcp/tools/radare2/radare2_mcp_tools.py +1183 -0
- reversecore_mcp/tools/report/__init__.py +4 -0
- reversecore_mcp/tools/report/email.py +82 -0
- reversecore_mcp/tools/report/report_mcp_tools.py +344 -0
- reversecore_mcp/tools/report/report_tools.py +1076 -0
- reversecore_mcp/tools/report/session.py +194 -0
- reversecore_mcp/tools/report_tools.py +11 -0
|
@@ -0,0 +1,643 @@
|
|
|
1
|
+
"""Binary diffing and library matching tools for comparing binaries and identifying library code."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
from functools import lru_cache
|
|
6
|
+
|
|
7
|
+
from fastmcp import Context
|
|
8
|
+
|
|
9
|
+
# Use high-performance JSON implementation (3-5x faster)
|
|
10
|
+
from reversecore_mcp.core import json_utils as json
|
|
11
|
+
from reversecore_mcp.core.config import get_config
|
|
12
|
+
from reversecore_mcp.core.decorators import log_execution
|
|
13
|
+
from reversecore_mcp.core.error_handling import handle_tool_errors
|
|
14
|
+
from reversecore_mcp.core.execution import execute_subprocess_async
|
|
15
|
+
from reversecore_mcp.core.metrics import track_metrics
|
|
16
|
+
from reversecore_mcp.core.r2_helpers import (
|
|
17
|
+
build_r2_cmd as _build_r2_cmd,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
# Import shared R2 helper functions from core (avoids circular dependencies)
|
|
21
|
+
from reversecore_mcp.core.r2_helpers import (
|
|
22
|
+
execute_r2_command as _execute_r2_command,
|
|
23
|
+
)
|
|
24
|
+
from reversecore_mcp.core.r2_helpers import (
|
|
25
|
+
parse_json_output as _parse_json_output,
|
|
26
|
+
)
|
|
27
|
+
from reversecore_mcp.core.result import ToolResult, failure, success
|
|
28
|
+
from reversecore_mcp.core.security import validate_file_path
|
|
29
|
+
from reversecore_mcp.core.validators import validate_tool_parameters
|
|
30
|
+
|
|
31
|
+
# Load default timeout from configuration
|
|
32
|
+
DEFAULT_TIMEOUT = get_config().default_tool_timeout
|
|
33
|
+
|
|
34
|
+
# OPTIMIZATION: Pre-compile regex patterns used in hot paths
|
|
35
|
+
_SIMILARITY_PATTERN = re.compile(r"similarity:\s*(\d+\.?\d*)")
|
|
36
|
+
_ADDRESS_PATTERN = re.compile(r"(0x[0-9a-fA-F]+)")
|
|
37
|
+
_HEX_PATTERN = re.compile(r"(?:0x)?([0-9a-fA-F]{4,})")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@lru_cache(maxsize=256)
|
|
41
|
+
def _extract_library_name(function_name: str) -> str:
|
|
42
|
+
"""
|
|
43
|
+
Extract library name from function name.
|
|
44
|
+
|
|
45
|
+
Cached to avoid repeated string comparisons for common function names.
|
|
46
|
+
Optimized to call lower() only once.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
function_name: Function name (e.g., "sym.imp.strcpy")
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
Extracted library name or "unknown"
|
|
53
|
+
"""
|
|
54
|
+
# Convert to lowercase once for efficient comparison
|
|
55
|
+
name_lower = function_name.lower()
|
|
56
|
+
|
|
57
|
+
# Simple heuristic extraction
|
|
58
|
+
if "kernel32" in name_lower:
|
|
59
|
+
return "kernel32"
|
|
60
|
+
if "msvcrt" in name_lower or "libc" in name_lower:
|
|
61
|
+
return "libc/msvcrt"
|
|
62
|
+
if "std::" in name_lower:
|
|
63
|
+
return "libstdc++"
|
|
64
|
+
if "imp." in name_lower:
|
|
65
|
+
return "import"
|
|
66
|
+
return "unknown"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@log_execution(tool_name="diff_binaries")
|
|
70
|
+
@track_metrics("diff_binaries")
|
|
71
|
+
@handle_tool_errors
|
|
72
|
+
async def diff_binaries(
|
|
73
|
+
file_path_a: str,
|
|
74
|
+
file_path_b: str,
|
|
75
|
+
function_name: str = None,
|
|
76
|
+
max_output_size: int = 10_000_000,
|
|
77
|
+
timeout: int = DEFAULT_TIMEOUT,
|
|
78
|
+
) -> ToolResult:
|
|
79
|
+
"""
|
|
80
|
+
Compare two binary files to identify code changes and modifications.
|
|
81
|
+
|
|
82
|
+
This tool uses radiff2 to perform binary diffing, which is essential for:
|
|
83
|
+
- **Patch Analysis (1-day Exploits)**: Compare pre-patch and post-patch binaries
|
|
84
|
+
to identify security vulnerabilities fixed in updates
|
|
85
|
+
- **Game Hacking**: Find offset changes after game updates to maintain functionality
|
|
86
|
+
- **Malware Variant Analysis**: Identify code differences between malware variants
|
|
87
|
+
(e.g., "90% similar to Lazarus malware, but C2 address generation changed")
|
|
88
|
+
|
|
89
|
+
The tool provides:
|
|
90
|
+
- Similarity score (0.0-1.0) between binaries
|
|
91
|
+
- List of code changes with addresses and descriptions
|
|
92
|
+
- Optional function-level comparison for targeted analysis
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
file_path_a: Path to the first binary file (e.g., pre-patch version)
|
|
96
|
+
file_path_b: Path to the second binary file (e.g., post-patch version)
|
|
97
|
+
function_name: Optional function name to compare (e.g., "main", "sym.decrypt").
|
|
98
|
+
If None, performs whole-binary comparison.
|
|
99
|
+
max_output_size: Maximum output size in bytes (default: 10MB)
|
|
100
|
+
timeout: Timeout in seconds (default: 300s)
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
ToolResult with structured JSON containing:
|
|
104
|
+
- similarity: Float between 0.0 and 1.0 indicating code similarity
|
|
105
|
+
- changes: List of detected changes with addresses and descriptions
|
|
106
|
+
- function_specific: Boolean indicating if function-level diff was performed
|
|
107
|
+
|
|
108
|
+
Example:
|
|
109
|
+
# Compare two versions of a patched binary
|
|
110
|
+
diff_binaries("/app/workspace/app_v1.0.exe", "/app/workspace/app_v1.1.exe")
|
|
111
|
+
|
|
112
|
+
# Compare specific function between versions
|
|
113
|
+
diff_binaries("/app/workspace/malware_old.exe", "/app/workspace/malware_new.exe", "main")
|
|
114
|
+
|
|
115
|
+
Output Format:
|
|
116
|
+
{
|
|
117
|
+
"similarity": 0.95,
|
|
118
|
+
"function_specific": false,
|
|
119
|
+
"changes": [
|
|
120
|
+
{
|
|
121
|
+
"address": "0x401050",
|
|
122
|
+
"type": "code_change",
|
|
123
|
+
"description": "Instruction changed from JNZ to JZ"
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
"address": "0x401080",
|
|
127
|
+
"type": "new_block",
|
|
128
|
+
"description": "Added security check"
|
|
129
|
+
}
|
|
130
|
+
],
|
|
131
|
+
"total_changes": 2
|
|
132
|
+
}
|
|
133
|
+
"""
|
|
134
|
+
# Validate both file paths
|
|
135
|
+
validated_path_a = validate_file_path(file_path_a)
|
|
136
|
+
validated_path_b = validate_file_path(file_path_b)
|
|
137
|
+
|
|
138
|
+
# Validate tool parameters
|
|
139
|
+
validate_tool_parameters(
|
|
140
|
+
"diff_binaries",
|
|
141
|
+
{
|
|
142
|
+
"function_name": function_name,
|
|
143
|
+
"max_output_size": max_output_size,
|
|
144
|
+
"timeout": timeout,
|
|
145
|
+
},
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
try:
|
|
149
|
+
# Build radiff2 command
|
|
150
|
+
# -s: similarity score
|
|
151
|
+
# -C: code comparison
|
|
152
|
+
# -g: graph diff (if function specified)
|
|
153
|
+
|
|
154
|
+
if function_name:
|
|
155
|
+
# Function-specific comparison using graph diff
|
|
156
|
+
cmd = [
|
|
157
|
+
"radiff2",
|
|
158
|
+
"-g",
|
|
159
|
+
function_name,
|
|
160
|
+
str(validated_path_a),
|
|
161
|
+
str(validated_path_b),
|
|
162
|
+
]
|
|
163
|
+
else:
|
|
164
|
+
# Whole-binary comparison with similarity scoring
|
|
165
|
+
cmd = [
|
|
166
|
+
"radiff2",
|
|
167
|
+
"-C",
|
|
168
|
+
str(validated_path_a),
|
|
169
|
+
str(validated_path_b),
|
|
170
|
+
]
|
|
171
|
+
|
|
172
|
+
output, bytes_read = await execute_subprocess_async(
|
|
173
|
+
cmd,
|
|
174
|
+
max_output_size=max_output_size,
|
|
175
|
+
timeout=timeout,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# Also get similarity score (format: "similarity: 0.95")
|
|
179
|
+
similarity_cmd = ["radiff2", "-s", str(validated_path_a), str(validated_path_b)]
|
|
180
|
+
similarity_output, _ = await execute_subprocess_async(
|
|
181
|
+
similarity_cmd,
|
|
182
|
+
max_output_size=1_000_000,
|
|
183
|
+
timeout=60,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
# Parse similarity score (format: "similarity: 0.95")
|
|
187
|
+
similarity = 0.0
|
|
188
|
+
# OPTIMIZATION: Use pre-compiled pattern (faster)
|
|
189
|
+
similarity_match = _SIMILARITY_PATTERN.search(similarity_output)
|
|
190
|
+
if similarity_match:
|
|
191
|
+
similarity = float(similarity_match.group(1))
|
|
192
|
+
|
|
193
|
+
# Parse changes from output
|
|
194
|
+
changes = []
|
|
195
|
+
|
|
196
|
+
# Parse the diff output to extract meaningful changes
|
|
197
|
+
# radiff2 output varies, so we'll capture the raw output and structure it
|
|
198
|
+
lines = output.strip().split("\n")
|
|
199
|
+
|
|
200
|
+
for line in lines:
|
|
201
|
+
if not line.strip():
|
|
202
|
+
continue
|
|
203
|
+
|
|
204
|
+
# Look for common patterns in radiff2 output
|
|
205
|
+
# OPTIMIZATION: Use pre-compiled pattern (faster)
|
|
206
|
+
addr_match = _ADDRESS_PATTERN.search(line)
|
|
207
|
+
|
|
208
|
+
if addr_match:
|
|
209
|
+
address = addr_match.group(1)
|
|
210
|
+
|
|
211
|
+
# Determine change type based on line content
|
|
212
|
+
change_type = "unknown"
|
|
213
|
+
description = line.strip()
|
|
214
|
+
|
|
215
|
+
if "new" in line.lower():
|
|
216
|
+
change_type = "new_block"
|
|
217
|
+
elif "removed" in line.lower() or "deleted" in line.lower():
|
|
218
|
+
change_type = "removed_block"
|
|
219
|
+
elif "modified" in line.lower() or "changed" in line.lower():
|
|
220
|
+
change_type = "code_change"
|
|
221
|
+
elif "jmp" in line.lower() or "call" in line.lower() or "jnz" in line.lower():
|
|
222
|
+
change_type = "control_flow_change"
|
|
223
|
+
|
|
224
|
+
changes.append(
|
|
225
|
+
{
|
|
226
|
+
"address": address,
|
|
227
|
+
"type": change_type,
|
|
228
|
+
"description": description,
|
|
229
|
+
}
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
# If no structured changes found, include summary info
|
|
233
|
+
if not changes and output.strip():
|
|
234
|
+
changes.append(
|
|
235
|
+
{
|
|
236
|
+
"type": "summary",
|
|
237
|
+
"description": "Binary comparison completed. See raw output for details.",
|
|
238
|
+
}
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
# Build result
|
|
242
|
+
result_data = {
|
|
243
|
+
"similarity": similarity,
|
|
244
|
+
"function_specific": bool(function_name),
|
|
245
|
+
"changes": changes,
|
|
246
|
+
"total_changes": len(changes),
|
|
247
|
+
"raw_output": (output if len(output) < 5000 else output[:5000] + "... (truncated)"),
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
return success(
|
|
251
|
+
json.dumps(result_data, indent=2),
|
|
252
|
+
bytes_read=bytes_read,
|
|
253
|
+
similarity=similarity,
|
|
254
|
+
total_changes=len(changes),
|
|
255
|
+
function_specific=bool(function_name),
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
except Exception as e:
|
|
259
|
+
return failure(
|
|
260
|
+
"DIFF_ERROR",
|
|
261
|
+
f"Binary diff failed: {str(e)}",
|
|
262
|
+
hint="Ensure both files are valid binaries and radiff2 is available. For function-level diff, verify function name exists in both binaries.",
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
@log_execution(tool_name="analyze_variant_changes")
|
|
267
|
+
@track_metrics("analyze_variant_changes")
|
|
268
|
+
@handle_tool_errors
|
|
269
|
+
async def analyze_variant_changes(
|
|
270
|
+
file_path_a: str,
|
|
271
|
+
file_path_b: str,
|
|
272
|
+
top_n: int = 3,
|
|
273
|
+
timeout: int = DEFAULT_TIMEOUT,
|
|
274
|
+
) -> ToolResult:
|
|
275
|
+
"""
|
|
276
|
+
Analyze structural changes between two binary variants (Lineage Mapper).
|
|
277
|
+
|
|
278
|
+
This tool combines binary diffing with control flow analysis to understand
|
|
279
|
+
*how* a binary has evolved. It identifies the most modified functions and
|
|
280
|
+
generates their Control Flow Graphs (CFG) for comparison.
|
|
281
|
+
|
|
282
|
+
**Use Cases:**
|
|
283
|
+
- **Malware Lineage**: "How did Lazarus Group modify their backdoor?"
|
|
284
|
+
- **Patch Diffing**: "What logic changed in the vulnerable function?"
|
|
285
|
+
- **Variant Analysis**: "Is this a new version of the same malware?"
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
file_path_a: Path to the original binary
|
|
289
|
+
file_path_b: Path to the variant binary
|
|
290
|
+
top_n: Number of top changed functions to analyze in detail (default: 3)
|
|
291
|
+
timeout: Execution timeout in seconds
|
|
292
|
+
|
|
293
|
+
Returns:
|
|
294
|
+
ToolResult with diff summary and CFG data for top changed functions.
|
|
295
|
+
"""
|
|
296
|
+
# Import here to avoid circular dependency
|
|
297
|
+
from reversecore_mcp.tools.radare2.r2_analysis import generate_function_graph
|
|
298
|
+
|
|
299
|
+
# 1. Run diff_binaries
|
|
300
|
+
diff_result = await diff_binaries(file_path_a, file_path_b, timeout=timeout)
|
|
301
|
+
|
|
302
|
+
if diff_result.status == "error":
|
|
303
|
+
return diff_result
|
|
304
|
+
|
|
305
|
+
diff_data = (
|
|
306
|
+
json.loads(diff_result.data) if isinstance(diff_result.data, str) else diff_result.data
|
|
307
|
+
)
|
|
308
|
+
changes = diff_data.get("changes", [])
|
|
309
|
+
|
|
310
|
+
# 2. Identify changed functions (heuristic: group changes by address proximity or use explicit function diff if available)
|
|
311
|
+
# Since diff_binaries returns a flat list of changes, we'll try to map them to functions.
|
|
312
|
+
# For this advanced tool, we'll assume we want to analyze the functions where changes occurred.
|
|
313
|
+
|
|
314
|
+
# Get function list for file B (variant) to map addresses to names
|
|
315
|
+
# We use a simple r2 command to get functions
|
|
316
|
+
validated_path_b = validate_file_path(file_path_b)
|
|
317
|
+
cmd = _build_r2_cmd(str(validated_path_b), ["aflj"], "aaa")
|
|
318
|
+
out, _ = await execute_subprocess_async(cmd, timeout=60)
|
|
319
|
+
|
|
320
|
+
try:
|
|
321
|
+
funcs_b = _parse_json_output(out)
|
|
322
|
+
except (json.JSONDecodeError, TypeError):
|
|
323
|
+
funcs_b = []
|
|
324
|
+
|
|
325
|
+
# OPTIMIZATION: Pre-sort functions by offset for binary search
|
|
326
|
+
# This reduces O(n*m) to O(n*log(m)) complexity
|
|
327
|
+
# Further optimized to minimize redundant dict.get() calls
|
|
328
|
+
sorted_funcs = []
|
|
329
|
+
for f in funcs_b:
|
|
330
|
+
offset = f.get("offset")
|
|
331
|
+
size = f.get("size")
|
|
332
|
+
name = f.get("name", "unknown")
|
|
333
|
+
if offset is not None and size is not None:
|
|
334
|
+
sorted_funcs.append((offset, offset + size, name))
|
|
335
|
+
sorted_funcs.sort(key=lambda x: x[0])
|
|
336
|
+
|
|
337
|
+
# Map changes to functions using binary search
|
|
338
|
+
changed_funcs = {} # {func_name: count}
|
|
339
|
+
|
|
340
|
+
for change in changes:
|
|
341
|
+
addr_str = change.get("address")
|
|
342
|
+
if not addr_str:
|
|
343
|
+
continue
|
|
344
|
+
try:
|
|
345
|
+
addr = int(addr_str, 16)
|
|
346
|
+
# Binary search to find the function containing this address
|
|
347
|
+
left, right = 0, len(sorted_funcs) - 1
|
|
348
|
+
found_func = None
|
|
349
|
+
|
|
350
|
+
while left <= right:
|
|
351
|
+
mid = (left + right) // 2
|
|
352
|
+
func_start, func_end, func_name = sorted_funcs[mid]
|
|
353
|
+
|
|
354
|
+
if func_start <= addr < func_end:
|
|
355
|
+
found_func = func_name
|
|
356
|
+
break
|
|
357
|
+
elif addr < func_start:
|
|
358
|
+
right = mid - 1
|
|
359
|
+
else:
|
|
360
|
+
left = mid + 1
|
|
361
|
+
|
|
362
|
+
if found_func:
|
|
363
|
+
changed_funcs[found_func] = changed_funcs.get(found_func, 0) + 1
|
|
364
|
+
except ValueError:
|
|
365
|
+
# Invalid hex address format
|
|
366
|
+
pass
|
|
367
|
+
|
|
368
|
+
# Sort by number of changes
|
|
369
|
+
sorted_funcs = sorted(changed_funcs.items(), key=lambda x: x[1], reverse=True)[:top_n]
|
|
370
|
+
|
|
371
|
+
detailed_analysis = []
|
|
372
|
+
|
|
373
|
+
# 3. Generate CFG for top changed functions
|
|
374
|
+
for func_name, count in sorted_funcs:
|
|
375
|
+
# Get CFG for variant
|
|
376
|
+
cfg_result = await generate_function_graph(file_path_b, func_name, format="mermaid")
|
|
377
|
+
cfg_mermaid = cfg_result.data if cfg_result.status == "success" else "Error generating CFG"
|
|
378
|
+
|
|
379
|
+
detailed_analysis.append(
|
|
380
|
+
{
|
|
381
|
+
"function": func_name,
|
|
382
|
+
"change_count": count,
|
|
383
|
+
"cfg_mermaid": cfg_mermaid,
|
|
384
|
+
"analysis_hint": f"Function {func_name} has {count} modifications. Compare its logic with the original.",
|
|
385
|
+
}
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
return success(
|
|
389
|
+
{
|
|
390
|
+
"similarity": diff_data.get("similarity"),
|
|
391
|
+
"total_changes": diff_data.get("total_changes"),
|
|
392
|
+
"top_modified_functions": detailed_analysis,
|
|
393
|
+
},
|
|
394
|
+
description=f"Analyzed variants. Similarity: {diff_data.get('similarity')}. Detailed analysis for {len(detailed_analysis)} functions.",
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
@log_execution(tool_name="match_libraries")
|
|
399
|
+
@track_metrics("match_libraries")
|
|
400
|
+
@handle_tool_errors
|
|
401
|
+
async def match_libraries(
|
|
402
|
+
file_path: str,
|
|
403
|
+
signature_db: str = None,
|
|
404
|
+
max_output_size: int = 10_000_000,
|
|
405
|
+
timeout: int = 600,
|
|
406
|
+
ctx: Context = None,
|
|
407
|
+
) -> ToolResult:
|
|
408
|
+
"""
|
|
409
|
+
Match and filter known library functions to focus on user code.
|
|
410
|
+
|
|
411
|
+
This tool uses radare2's zignatures (FLIRT-compatible signature matching) to:
|
|
412
|
+
- **Reduce Analysis Noise**: Skip analysis of known library functions (strcpy, malloc, etc.)
|
|
413
|
+
- **Focus on User Code**: Identify which functions are original vs library code
|
|
414
|
+
- **Save Time & Tokens**: Reduce analysis scope by 80% by filtering out standard libraries
|
|
415
|
+
- **Improve Accuracy**: Focus AI analysis on the actual malicious/interesting code
|
|
416
|
+
|
|
417
|
+
Common use cases:
|
|
418
|
+
- Analyzing large binaries (>25MB) where most code is OpenSSL, zlib, MFC, etc.
|
|
419
|
+
- Game client reverse engineering (filter out Unreal Engine / Unity standard library)
|
|
420
|
+
- Malware analysis (focus on custom malware code, skip Windows API wrappers)
|
|
421
|
+
|
|
422
|
+
The tool automatically uses built-in signature databases for common libraries
|
|
423
|
+
and can optionally use custom signature databases for specialized analysis.
|
|
424
|
+
|
|
425
|
+
Args:
|
|
426
|
+
file_path: Path to the binary file to analyze
|
|
427
|
+
signature_db: Optional path to custom signature database file (.sig format).
|
|
428
|
+
If None, uses radare2's built-in signature databases.
|
|
429
|
+
max_output_size: Maximum output size in bytes (default: 10MB)
|
|
430
|
+
timeout: Timeout in seconds (default: 600s)
|
|
431
|
+
ctx: FastMCP Context (auto-injected)
|
|
432
|
+
|
|
433
|
+
Returns:
|
|
434
|
+
ToolResult with structured JSON containing:
|
|
435
|
+
- total_functions: Total number of functions found
|
|
436
|
+
- library_functions: Number of matched library functions
|
|
437
|
+
- user_functions: Number of unmatched (user) functions to analyze
|
|
438
|
+
- library_matches: List of matched library functions with details
|
|
439
|
+
- user_function_list: List of user function addresses/names for further analysis
|
|
440
|
+
- noise_reduction_percentage: Percentage of functions filtered out
|
|
441
|
+
|
|
442
|
+
Example:
|
|
443
|
+
# Auto-detect standard libraries
|
|
444
|
+
match_libraries("/app/workspace/large_app.exe")
|
|
445
|
+
|
|
446
|
+
# Use custom signature database
|
|
447
|
+
match_libraries("/app/workspace/game.exe", "/app/rules/game_engine.sig")
|
|
448
|
+
|
|
449
|
+
Output Format:
|
|
450
|
+
{
|
|
451
|
+
"total_functions": 1250,
|
|
452
|
+
"library_functions": 1000,
|
|
453
|
+
"user_functions": 250,
|
|
454
|
+
"noise_reduction_percentage": 80.0,
|
|
455
|
+
"library_matches": [
|
|
456
|
+
{
|
|
457
|
+
"address": "0x401000",
|
|
458
|
+
"name": "strcpy",
|
|
459
|
+
"library": "msvcrt"
|
|
460
|
+
},
|
|
461
|
+
{
|
|
462
|
+
"address": "0x401050",
|
|
463
|
+
"name": "malloc",
|
|
464
|
+
"library": "msvcrt"
|
|
465
|
+
}
|
|
466
|
+
],
|
|
467
|
+
"user_function_list": [
|
|
468
|
+
"0x402000",
|
|
469
|
+
"0x402100",
|
|
470
|
+
"sym.custom_decrypt"
|
|
471
|
+
]
|
|
472
|
+
}
|
|
473
|
+
"""
|
|
474
|
+
# Validate file path
|
|
475
|
+
validated_path = validate_file_path(file_path)
|
|
476
|
+
|
|
477
|
+
# Validate optional signature database path
|
|
478
|
+
if signature_db:
|
|
479
|
+
validated_sig_path = validate_file_path(signature_db)
|
|
480
|
+
|
|
481
|
+
# Validate tool parameters
|
|
482
|
+
validate_tool_parameters(
|
|
483
|
+
"match_libraries",
|
|
484
|
+
{
|
|
485
|
+
"max_output_size": max_output_size,
|
|
486
|
+
"timeout": timeout,
|
|
487
|
+
},
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
try:
|
|
491
|
+
# Step 1: Determine analysis level based on file size
|
|
492
|
+
analysis_level = "aa" # Default to basic analysis
|
|
493
|
+
try:
|
|
494
|
+
file_size_mb = os.path.getsize(validated_path) / (1024 * 1024)
|
|
495
|
+
if file_size_mb < 10: # Full analysis for files under 10MB
|
|
496
|
+
analysis_level = "aaa"
|
|
497
|
+
if ctx:
|
|
498
|
+
await ctx.info(
|
|
499
|
+
f"File size: {file_size_mb:.1f}MB, using '{analysis_level}' analysis..."
|
|
500
|
+
)
|
|
501
|
+
except OSError:
|
|
502
|
+
pass
|
|
503
|
+
|
|
504
|
+
# Step 2: Build commands for signature matching
|
|
505
|
+
# Use radare2 to get function list with signature matching
|
|
506
|
+
if signature_db:
|
|
507
|
+
# Load custom signature database
|
|
508
|
+
r2_commands = [f"zg {validated_sig_path}", "aflj"]
|
|
509
|
+
else:
|
|
510
|
+
# Use built-in signatures
|
|
511
|
+
r2_commands = ["zg", "aflj"]
|
|
512
|
+
|
|
513
|
+
if ctx:
|
|
514
|
+
await ctx.report_progress(10, 100)
|
|
515
|
+
await ctx.info("Analyzing binary and matching signatures (this may take a while)...")
|
|
516
|
+
|
|
517
|
+
# Step 3: Execute radare2 command
|
|
518
|
+
output, bytes_read = await _execute_r2_command(
|
|
519
|
+
validated_path,
|
|
520
|
+
r2_commands,
|
|
521
|
+
analysis_level=analysis_level,
|
|
522
|
+
max_output_size=max_output_size,
|
|
523
|
+
base_timeout=timeout,
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
if ctx:
|
|
527
|
+
await ctx.report_progress(60, 100)
|
|
528
|
+
await ctx.info("Parsing function list...")
|
|
529
|
+
|
|
530
|
+
# Parse JSON output from aflj (function list JSON)
|
|
531
|
+
try:
|
|
532
|
+
# Attempt to find JSON array in output if direct parse fails
|
|
533
|
+
# This handles cases where 'zg' or 'aaa' might produce non-JSON output before the JSON result
|
|
534
|
+
functions = _parse_json_output(output)
|
|
535
|
+
except json.JSONDecodeError:
|
|
536
|
+
# If JSON parsing fails, fall back to text parsing
|
|
537
|
+
return failure(
|
|
538
|
+
"PARSE_ERROR",
|
|
539
|
+
"Failed to parse function list from radare2",
|
|
540
|
+
hint="The binary may not be analyzable or may be packed/obfuscated. Try running 'aaa' analysis first.",
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
# Categorize functions into library vs user code
|
|
544
|
+
library_functions = []
|
|
545
|
+
user_functions = []
|
|
546
|
+
|
|
547
|
+
total_functions = len(functions)
|
|
548
|
+
for idx, func in enumerate(functions):
|
|
549
|
+
# Report progress
|
|
550
|
+
if ctx and idx % 10 == 0: # Report every 10 functions to avoid spam
|
|
551
|
+
await ctx.report_progress(idx, total_functions)
|
|
552
|
+
|
|
553
|
+
name = func.get("name", "")
|
|
554
|
+
# Support both 'offset' (aflj) and 'vaddr' (isj) keys
|
|
555
|
+
# Fallback to 'realname' or other identifiers if needed
|
|
556
|
+
offset = func.get("offset", func.get("vaddr", 0))
|
|
557
|
+
|
|
558
|
+
# If offset is 0, try to parse it from the name if it looks like sym.func.0x...
|
|
559
|
+
if offset == 0 and name:
|
|
560
|
+
# OPTIMIZATION: Use pre-compiled pattern (faster)
|
|
561
|
+
hex_match = _HEX_PATTERN.search(name)
|
|
562
|
+
if hex_match:
|
|
563
|
+
try:
|
|
564
|
+
offset = int(hex_match.group(1), 16)
|
|
565
|
+
except ValueError:
|
|
566
|
+
pass
|
|
567
|
+
|
|
568
|
+
# Heuristic: library functions typically have names like:
|
|
569
|
+
# - sym.imp.* (imports)
|
|
570
|
+
# - sym.std::* (C++ standard library)
|
|
571
|
+
# - Known library prefixes
|
|
572
|
+
is_library = (
|
|
573
|
+
name.startswith("sym.imp.")
|
|
574
|
+
or name.startswith("sym.std::")
|
|
575
|
+
or name.startswith("fcn.imp.")
|
|
576
|
+
or "libc" in name.lower()
|
|
577
|
+
or "msvcrt" in name.lower()
|
|
578
|
+
or "kernel32" in name.lower()
|
|
579
|
+
)
|
|
580
|
+
|
|
581
|
+
if is_library:
|
|
582
|
+
library_functions.append(
|
|
583
|
+
{
|
|
584
|
+
"address": f"0x{offset:x}",
|
|
585
|
+
"name": name,
|
|
586
|
+
"library": _extract_library_name(name),
|
|
587
|
+
}
|
|
588
|
+
)
|
|
589
|
+
else:
|
|
590
|
+
user_functions.append({"address": f"0x{offset:x}", "name": name})
|
|
591
|
+
|
|
592
|
+
# Final progress report
|
|
593
|
+
if ctx:
|
|
594
|
+
await ctx.report_progress(total_functions, total_functions)
|
|
595
|
+
|
|
596
|
+
total_functions = len(functions)
|
|
597
|
+
library_count = len(library_functions)
|
|
598
|
+
user_count = len(user_functions)
|
|
599
|
+
|
|
600
|
+
# Calculate noise reduction percentage
|
|
601
|
+
noise_reduction = (library_count / total_functions * 100) if total_functions > 0 else 0.0
|
|
602
|
+
|
|
603
|
+
# Build result
|
|
604
|
+
result_data = {
|
|
605
|
+
"total_functions": total_functions,
|
|
606
|
+
"library_functions": library_count,
|
|
607
|
+
"user_functions": user_count,
|
|
608
|
+
"noise_reduction_percentage": round(noise_reduction, 2),
|
|
609
|
+
"library_matches": library_functions[:50], # Limit to first 50 for readability
|
|
610
|
+
"user_function_list": [
|
|
611
|
+
f["address"] for f in user_functions[:100]
|
|
612
|
+
], # First 100 user functions
|
|
613
|
+
"summary": f"Filtered out {library_count} library functions ({noise_reduction:.1f}% noise reduction). Focus analysis on {user_count} user functions.",
|
|
614
|
+
"signature_db_used": signature_db if signature_db else "built-in",
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
if library_count == 0:
|
|
618
|
+
result_data["hint"] = (
|
|
619
|
+
"No library functions matched. This could mean: "
|
|
620
|
+
"1. No signatures loaded (check signature_db). "
|
|
621
|
+
"2. Binary uses statically linked libraries not in DB. "
|
|
622
|
+
"3. Binary is fully custom."
|
|
623
|
+
)
|
|
624
|
+
|
|
625
|
+
return success(
|
|
626
|
+
json.dumps(result_data, indent=2),
|
|
627
|
+
bytes_read=bytes_read,
|
|
628
|
+
total_functions=total_functions,
|
|
629
|
+
library_functions=library_count,
|
|
630
|
+
user_functions=user_count,
|
|
631
|
+
noise_reduction=round(noise_reduction, 2),
|
|
632
|
+
)
|
|
633
|
+
|
|
634
|
+
except Exception as e:
|
|
635
|
+
return failure(
|
|
636
|
+
"LIBRARY_MATCH_ERROR",
|
|
637
|
+
f"Library signature matching failed: {str(e)}",
|
|
638
|
+
hint="Ensure the binary is valid and radare2 signature databases are available. For custom databases, verify the .sig file format.",
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
# Note: DiffToolsPlugin has been removed.
|
|
643
|
+
# The diff tools are now registered via AnalysisToolsPlugin in analysis/__init__.py.
|