iflow-mcp_developermode-korea_reversecore-mcp 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/METADATA +543 -0
  2. iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/RECORD +79 -0
  3. iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/WHEEL +5 -0
  4. iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/entry_points.txt +2 -0
  5. iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/licenses/LICENSE +21 -0
  6. iflow_mcp_developermode_korea_reversecore_mcp-1.0.0.dist-info/top_level.txt +1 -0
  7. reversecore_mcp/__init__.py +9 -0
  8. reversecore_mcp/core/__init__.py +78 -0
  9. reversecore_mcp/core/audit.py +101 -0
  10. reversecore_mcp/core/binary_cache.py +138 -0
  11. reversecore_mcp/core/command_spec.py +357 -0
  12. reversecore_mcp/core/config.py +432 -0
  13. reversecore_mcp/core/container.py +288 -0
  14. reversecore_mcp/core/decorators.py +152 -0
  15. reversecore_mcp/core/error_formatting.py +93 -0
  16. reversecore_mcp/core/error_handling.py +142 -0
  17. reversecore_mcp/core/evidence.py +229 -0
  18. reversecore_mcp/core/exceptions.py +296 -0
  19. reversecore_mcp/core/execution.py +240 -0
  20. reversecore_mcp/core/ghidra.py +642 -0
  21. reversecore_mcp/core/ghidra_helper.py +481 -0
  22. reversecore_mcp/core/ghidra_manager.py +234 -0
  23. reversecore_mcp/core/json_utils.py +131 -0
  24. reversecore_mcp/core/loader.py +73 -0
  25. reversecore_mcp/core/logging_config.py +206 -0
  26. reversecore_mcp/core/memory.py +721 -0
  27. reversecore_mcp/core/metrics.py +198 -0
  28. reversecore_mcp/core/mitre_mapper.py +365 -0
  29. reversecore_mcp/core/plugin.py +45 -0
  30. reversecore_mcp/core/r2_helpers.py +404 -0
  31. reversecore_mcp/core/r2_pool.py +403 -0
  32. reversecore_mcp/core/report_generator.py +268 -0
  33. reversecore_mcp/core/resilience.py +252 -0
  34. reversecore_mcp/core/resource_manager.py +169 -0
  35. reversecore_mcp/core/result.py +132 -0
  36. reversecore_mcp/core/security.py +213 -0
  37. reversecore_mcp/core/validators.py +238 -0
  38. reversecore_mcp/dashboard/__init__.py +221 -0
  39. reversecore_mcp/prompts/__init__.py +56 -0
  40. reversecore_mcp/prompts/common.py +24 -0
  41. reversecore_mcp/prompts/game.py +280 -0
  42. reversecore_mcp/prompts/malware.py +1219 -0
  43. reversecore_mcp/prompts/report.py +150 -0
  44. reversecore_mcp/prompts/security.py +136 -0
  45. reversecore_mcp/resources.py +329 -0
  46. reversecore_mcp/server.py +727 -0
  47. reversecore_mcp/tools/__init__.py +49 -0
  48. reversecore_mcp/tools/analysis/__init__.py +74 -0
  49. reversecore_mcp/tools/analysis/capa_tools.py +215 -0
  50. reversecore_mcp/tools/analysis/die_tools.py +180 -0
  51. reversecore_mcp/tools/analysis/diff_tools.py +643 -0
  52. reversecore_mcp/tools/analysis/lief_tools.py +272 -0
  53. reversecore_mcp/tools/analysis/signature_tools.py +591 -0
  54. reversecore_mcp/tools/analysis/static_analysis.py +479 -0
  55. reversecore_mcp/tools/common/__init__.py +58 -0
  56. reversecore_mcp/tools/common/file_operations.py +352 -0
  57. reversecore_mcp/tools/common/memory_tools.py +516 -0
  58. reversecore_mcp/tools/common/patch_explainer.py +230 -0
  59. reversecore_mcp/tools/common/server_tools.py +115 -0
  60. reversecore_mcp/tools/ghidra/__init__.py +19 -0
  61. reversecore_mcp/tools/ghidra/decompilation.py +975 -0
  62. reversecore_mcp/tools/ghidra/ghidra_tools.py +1052 -0
  63. reversecore_mcp/tools/malware/__init__.py +61 -0
  64. reversecore_mcp/tools/malware/adaptive_vaccine.py +579 -0
  65. reversecore_mcp/tools/malware/dormant_detector.py +756 -0
  66. reversecore_mcp/tools/malware/ioc_tools.py +228 -0
  67. reversecore_mcp/tools/malware/vulnerability_hunter.py +519 -0
  68. reversecore_mcp/tools/malware/yara_tools.py +214 -0
  69. reversecore_mcp/tools/patch_explainer.py +19 -0
  70. reversecore_mcp/tools/radare2/__init__.py +13 -0
  71. reversecore_mcp/tools/radare2/r2_analysis.py +972 -0
  72. reversecore_mcp/tools/radare2/r2_session.py +376 -0
  73. reversecore_mcp/tools/radare2/radare2_mcp_tools.py +1183 -0
  74. reversecore_mcp/tools/report/__init__.py +4 -0
  75. reversecore_mcp/tools/report/email.py +82 -0
  76. reversecore_mcp/tools/report/report_mcp_tools.py +344 -0
  77. reversecore_mcp/tools/report/report_tools.py +1076 -0
  78. reversecore_mcp/tools/report/session.py +194 -0
  79. reversecore_mcp/tools/report_tools.py +11 -0
@@ -0,0 +1,756 @@
1
+ """
2
+ Dormant Detector: Hybrid Reverse Engineering Tool.
3
+
4
+ This tool combines static analysis and partial emulation to detect hidden malicious behaviors
5
+ (Logic Bombs, Dormant Malware) that are often missed by traditional dynamic analysis.
6
+ """
7
+
8
+ import os
9
+ import re
10
+ from pathlib import Path
11
+ from typing import Any
12
+
13
+ from async_lru import alru_cache
14
+ from fastmcp import Context, FastMCP
15
+
16
+ from reversecore_mcp.core import json_utils as json # Use optimized JSON (3-5x faster)
17
+ try:
18
+ import lief
19
+ except ImportError:
20
+ lief = None
21
+
22
+ from reversecore_mcp.core.decorators import log_execution
23
+ from reversecore_mcp.core.error_handling import handle_tool_errors
24
+ from reversecore_mcp.core.exceptions import ValidationError
25
+ from reversecore_mcp.core.execution import execute_subprocess_async
26
+ from reversecore_mcp.core.logging_config import get_logger
27
+ from reversecore_mcp.core.metrics import track_metrics
28
+ from reversecore_mcp.core.r2_helpers import calculate_dynamic_timeout
29
+ from reversecore_mcp.core.result import ToolResult, failure, success
30
+ from reversecore_mcp.core.security import validate_file_path
31
+
32
+ logger = get_logger(__name__)
33
+
34
+ # OPTIMIZATION: Pre-compile regex patterns for identifier validation
35
+ _HEX_ADDRESS_PATTERN = re.compile(r"^0x[0-9a-fA-F]+$")
36
+ _SYMBOL_PATTERN = re.compile(r"^sym\.[a-zA-Z0-9_\.]+$")
37
+ _FUNCTION_NAME_PATTERN = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]*$")
38
+ _REG_PATTERN = re.compile(r"^[a-z0-9]+$")
39
+ _VALUE_PATTERN = re.compile(r"^(0x[0-9a-fA-F]+|\d+)$")
40
+
41
+
42
+ def _extract_json_safely(output: str) -> Any | None:
43
+ """Extract JSON from radare2 output using robust state-machine parser.
44
+
45
+ Delegates to r2_helpers.parse_json_output which uses O(n) algorithm
46
+ instead of fragile regex patterns.
47
+ """
48
+ if not output or not output.strip():
49
+ return None
50
+
51
+ try:
52
+ from reversecore_mcp.core.r2_helpers import parse_json_output
53
+ return parse_json_output(output)
54
+ except (json.JSONDecodeError, Exception) as e:
55
+ logger.warning(f"Failed to extract valid JSON from radare2 output: {e}")
56
+ return None
57
+
58
+
59
+ def _validate_r2_identifier(identifier: str) -> str:
60
+ """Validate and sanitize radare2 function/address identifier."""
61
+ # OPTIMIZATION: Use pre-compiled patterns (faster)
62
+ # Allow: hex addresses (0x...), symbols (sym.*), function names
63
+ if (_HEX_ADDRESS_PATTERN.match(identifier) or
64
+ _SYMBOL_PATTERN.match(identifier) or
65
+ _FUNCTION_NAME_PATTERN.match(identifier)):
66
+ return identifier
67
+
68
+ raise ValidationError(
69
+ f"Invalid radare2 identifier: '{identifier}'. "
70
+ "Must be hex address (0x...) or valid symbol name."
71
+ )
72
+
73
+
74
+
75
+ def _check_game_context(file_path: Path) -> dict[str, Any]:
76
+ """
77
+ Check if the file is likely a game client to reduce false positives.
78
+
79
+ Checks for:
80
+ 1. Game-related imported libraries (D3D, OpenGL, FMOD, Unity, etc.)
81
+ 2. Digital signatures from known game publishers.
82
+ """
83
+ context = {"is_game": False, "score": 0, "indicators": []}
84
+
85
+ if not lief:
86
+ return context
87
+
88
+ try:
89
+ binary = lief.parse(str(file_path))
90
+ if not binary:
91
+ return context
92
+
93
+ # 1. Check Libraries
94
+ game_libs = {
95
+ "d3d", "dxgi", "opengl", "vulkan", "fmod", "wwise",
96
+ "unity", "unreal", "physx", "steam_api", "galaxy",
97
+ "battleye", "easyanticheat", "mono", "discord"
98
+ }
99
+
100
+ # Binary.libraries is available for PE, ELF, MachO in LIEF
101
+ if hasattr(binary, "libraries"):
102
+ for lib in binary.libraries:
103
+ lib_lower = lib.lower()
104
+ if any(g in lib_lower for g in game_libs):
105
+ context["indicators"].append(f"Imported Game Lib: {lib}")
106
+ context["score"] += 10
107
+
108
+ # 2. Check Signature (PE specific)
109
+ if hasattr(binary, "signatures") and binary.signatures:
110
+ for sig in binary.signatures:
111
+ # Basic check for trusted publishers in signer info
112
+ # LIEF structure varies by version, handling broadly
113
+ try:
114
+ if hasattr(sig, "signers"):
115
+ for signer in sig.signers:
116
+ # Verify issuer string
117
+ issuer = str(signer.issuer).lower()
118
+ trusted = ["blizzard", "electronic arts", "ubisoft", "valve",
119
+ "unity", "epic games", "riot games", "nexon", "ncsoft"]
120
+ if any(t in issuer for t in trusted):
121
+ context["indicators"].append(f"Trusted Publisher: {signer.issuer}")
122
+ context["score"] += 50
123
+ except Exception:
124
+ pass
125
+
126
+ # 3. Check for specific sections (e.g. Unity resS)
127
+ if hasattr(binary, "sections"):
128
+ for section in binary.sections:
129
+ if "ress" in section.name.lower() or "il2cpp" in section.name.lower():
130
+ context["indicators"].append(f"Game Section: {section.name}")
131
+ context["score"] += 10
132
+
133
+ except Exception as e:
134
+ logger.debug(f"Game context check failed: {e}")
135
+
136
+ if context["score"] >= 20:
137
+ context["is_game"] = True
138
+
139
+ return context
140
+
141
+
142
+ def register_dormant_detector(mcp: FastMCP) -> None:
143
+ """Register the Dormant Detector tool with the FastMCP server."""
144
+ mcp.tool(dormant_detector)
145
+
146
+
147
+ def _get_file_cache_key(file_path: str) -> str:
148
+ """Generate a cache key based on file path and modification time.
149
+
150
+ This ensures cache invalidation when the file is modified.
151
+ """
152
+ try:
153
+ stat = os.stat(file_path)
154
+ return f"{file_path}:{stat.st_mtime}:{stat.st_size}"
155
+ except OSError:
156
+ # If file doesn't exist or can't be accessed, use path only
157
+ return file_path
158
+
159
+
160
+ @alru_cache(maxsize=64, ttl=300) # Cache for 5 minutes, max 64 entries
161
+ async def _run_r2_cmd_cached(
162
+ cache_key: str, file_path: str, cmd: str, timeout: int | None = None
163
+ ) -> str:
164
+ """Cached helper to run a single radare2 command.
165
+
166
+ The cache_key includes file modification time for automatic invalidation.
167
+ Uses dynamic timeout based on file size if timeout is not specified.
168
+ """
169
+ # Calculate dynamic timeout if not provided
170
+ effective_timeout = (
171
+ timeout if timeout else calculate_dynamic_timeout(file_path, base_timeout=30)
172
+ )
173
+ full_cmd = ["radare2", "-q", "-c", cmd, str(file_path)]
174
+ output, _ = await execute_subprocess_async(full_cmd, timeout=effective_timeout)
175
+ return output
176
+
177
+
178
+ async def _run_r2_cmd(
179
+ file_path: str, cmd: str, timeout: int | None = None, use_cache: bool = True
180
+ ) -> str:
181
+ """Helper to run a single radare2 command with optional caching.
182
+
183
+ Args:
184
+ file_path: Path to the binary file
185
+ cmd: Radare2 command to execute
186
+ timeout: Command timeout in seconds (uses dynamic timeout if None)
187
+ use_cache: Whether to use caching (default: True)
188
+
189
+ Returns:
190
+ Command output as string
191
+ """
192
+ # Calculate dynamic timeout based on file size
193
+ effective_timeout = (
194
+ timeout if timeout else calculate_dynamic_timeout(file_path, base_timeout=30)
195
+ )
196
+
197
+ if use_cache:
198
+ cache_key = _get_file_cache_key(file_path)
199
+ return await _run_r2_cmd_cached(cache_key, file_path, cmd, effective_timeout)
200
+
201
+ # Direct execution without caching (for commands with side effects)
202
+ full_cmd = ["radare2", "-q", "-c", cmd, str(file_path)]
203
+ output, _ = await execute_subprocess_async(full_cmd, timeout=effective_timeout)
204
+ return output
205
+
206
+
207
+ @log_execution(tool_name="dormant_detector")
208
+ @track_metrics("dormant_detector")
209
+ @handle_tool_errors
210
+ async def dormant_detector(
211
+ file_path: str,
212
+ focus_function: str | None = None,
213
+ hypothesis: dict[str, Any] | None = None,
214
+ timeout: int = 300,
215
+ ctx: Context = None,
216
+ ) -> ToolResult:
217
+ """
218
+ Detect hidden malicious behaviors using static analysis + emulation.
219
+
220
+ This tool performs a hybrid analysis:
221
+ 1. **Scan**: Finds "Orphan Functions" (not called by main) and "Suspicious Logic" (magic value checks).
222
+ 2. **Hypothesize**: (Optional) If `hypothesis` is provided, it sets up emulation conditions.
223
+ 3. **Emulate**: (Optional) If `focus_function` is provided, it emulates that specific function
224
+ to verify the hypothesis (e.g., "If register eax=0x1234, does it call system()?").
225
+
226
+ Args:
227
+ file_path: Path to the binary.
228
+ focus_function: (Optional) Name or address of a specific function to emulate.
229
+ hypothesis: (Optional) Dictionary defining emulation parameters:
230
+ {
231
+ "registers": {"eax": "0x1234", "zf": "1"},
232
+ "args": ["arg1", "arg2"],
233
+ "max_steps": 100
234
+ }
235
+ timeout: Execution timeout.
236
+
237
+ Returns:
238
+ ToolResult containing suspicious candidates or emulation results.
239
+ """
240
+ validated_path = validate_file_path(file_path)
241
+
242
+ # 1. If focus_function is provided, run emulation (Verification Phase)
243
+ if focus_function and hypothesis:
244
+ if ctx:
245
+ await ctx.info(f" Dormant Detector: Emulating {focus_function} with hypothesis...")
246
+ return await _verify_hypothesis_with_emulation(
247
+ validated_path, focus_function, hypothesis, timeout
248
+ )
249
+
250
+ # 2. Otherwise, run full scan (Discovery Phase)
251
+ if ctx:
252
+ await ctx.info(" Dormant Detector: Scanning for suspicious logic...")
253
+
254
+ # Run analysis
255
+ # We chain commands: aaa (analyze), aflj (list functions json)
256
+ # Note: 'aaa' can be slow on large binaries. Use 'aa' for faster but less complete analysis.
257
+ # For large binaries (>5MB), use lighter analysis
258
+ file_size = os.path.getsize(validated_path)
259
+ analysis_cmd = "aa" if file_size > 5_000_000 else "aaa"
260
+
261
+ cmd = f"{analysis_cmd}; aflj"
262
+ output = await _run_r2_cmd(validated_path, cmd, timeout=timeout, use_cache=False)
263
+
264
+ # Debug logging for troubleshooting
265
+ logger.debug(f"r2 output length: {len(output)}, first 500 chars: {output[:500]}")
266
+
267
+ # Parse functions with safe JSON extraction
268
+ functions = _extract_json_safely(output)
269
+
270
+ # Handle failed JSON extraction (not empty list which is valid)
271
+ if functions is None:
272
+ logger.warning(f"Could not extract JSON from r2 output. Output preview: {output[:300]}...")
273
+ # Try a fallback: run aflj separately with more time
274
+ fallback_output = await _run_r2_cmd(validated_path, "aflj", timeout=60, use_cache=False)
275
+ functions = _extract_json_safely(fallback_output)
276
+
277
+ # If still None after fallback, return error
278
+ if functions is None:
279
+ logger.error(
280
+ f"Failed to parse JSON after fallback. Fallback output: {fallback_output[:200]}..."
281
+ )
282
+ return failure(
283
+ "PARSE_ERROR",
284
+ "Failed to parse function list from radare2. "
285
+ "Output may be corrupted or analysis failed.",
286
+ hint="Try increasing timeout or using a simpler analysis mode.",
287
+ )
288
+
289
+ # Validate that functions is a list (empty list is valid for stripped binaries)
290
+ if not isinstance(functions, list):
291
+ logger.error(
292
+ f"Invalid function list format (type: {type(functions)}). Output preview: {output[:200]}..."
293
+ )
294
+ return failure(
295
+ "PARSE_ERROR",
296
+ "Failed to parse function list from radare2. "
297
+ "Output may be corrupted or analysis failed.",
298
+ hint="Try increasing timeout or using a simpler analysis mode.",
299
+ )
300
+
301
+ if not functions:
302
+ logger.info("No functions found in binary (possibly stripped or small)")
303
+
304
+ # Find orphans and suspicious logic
305
+ orphans = []
306
+ suspicious_logic = []
307
+
308
+ # We need xrefs to find orphans. 'aflj' doesn't give xrefs count reliably in all versions.
309
+ # We'll do a quick pass.
310
+ # Actually, 'aflj' has 'noreturn', 'stack', etc.
311
+ # Let's use a heuristic: if name is not 'main' and not 'entry' and not exported,
312
+ # and we can't easily see xrefs (we'd need 'ax' for all, which is slow).
313
+
314
+ # Better approach for orphans:
315
+ # 1. Get all functions.
316
+ # 2. Get entry points (ie).
317
+ # 3. For a subset of "interesting" functions (large size, specific strings), check xrefs.
318
+
319
+ # For this MVP, let's focus on "Suspicious Logic" (Magic Values) in ALL functions.
320
+ # We'll search for 'cmp' instructions with immediate values.
321
+
322
+ # Search for magic value checks: '/m' command in r2 finds "magic" signatures, but we want code logic.
323
+ # We'll search for instructions like 'cmp reg, 0x...'
324
+ # cmd: "aaa; /aj cmp,0x" (search for 'cmp' instructions with '0x' operand)
325
+ # This is a bit raw.
326
+
327
+ # Let's implement `find_orphan_functions` and `identify_conditional_paths` properly.
328
+
329
+ if ctx:
330
+ await ctx.report_progress(30, 100)
331
+ await ctx.info(" Dormant Detector: Identifying orphan functions...")
332
+
333
+ orphans = await _find_orphan_functions(validated_path, functions)
334
+
335
+ if ctx:
336
+ await ctx.report_progress(60, 100)
337
+ await ctx.info(" Dormant Detector: Analyzing conditional logic...")
338
+
339
+ suspicious_logic = await _identify_conditional_paths(
340
+ validated_path, functions[:20], ctx
341
+ ) # Limit to top 20 for speed in MVP
342
+
343
+ if ctx:
344
+ await ctx.report_progress(100, 100)
345
+
346
+ # --- Context Awareness: Game Client Check ---
347
+ game_ctx = _check_game_context(validated_path)
348
+ if game_ctx["is_game"]:
349
+ if ctx:
350
+ await ctx.info(f"🎮 Context: Detected Game Client ({', '.join(game_ctx['indicators'][:2])}). Adjusting sensitivity.")
351
+
352
+ # Suppress false positives for games
353
+ # 1. Games often have main loops with time checks (GetTickCount) -> Downgrade confidence
354
+ # 2. Anti-cheat checks (IsDebuggerPresent) are "normal" for games -> Mark as defensive
355
+
356
+ for item in suspicious_logic:
357
+ reason = item.get("reason", "").lower()
358
+ if "time-based" in reason:
359
+ # Games loop forever; time checks are normal.
360
+ # If confidence was medium, downgrade to low or suppress
361
+ item["confidence"] = "low"
362
+ item["note"] = "Common in game loops (suppressed)"
363
+
364
+ if "environment-based" in reason and "debugger" in reason:
365
+ # Anti-cheat behavior
366
+ item["confidence"] = "info"
367
+ item["reason"] += " (Likely Anti-Cheat)"
368
+
369
+ return success(
370
+ {
371
+ "scan_type": "discovery",
372
+ "context": game_ctx,
373
+ "orphan_functions": orphans,
374
+ "suspicious_logic": suspicious_logic,
375
+ "description": "Found potential logic bombs. Use 'focus_function' and 'hypothesis' to verify.",
376
+ }
377
+ )
378
+
379
+
380
+ def _functions_to_tuple(functions: list[dict[str, Any]]) -> tuple:
381
+ """Convert functions list to hashable tuple for caching."""
382
+ return tuple(
383
+ (
384
+ f.get("name", ""),
385
+ f.get("offset", 0),
386
+ f.get("size", 0),
387
+ tuple(f.get("codexrefs", []) or []),
388
+ )
389
+ for f in functions
390
+ )
391
+
392
+
393
+ @alru_cache(maxsize=32, ttl=300)
394
+ async def _find_orphan_functions_cached(
395
+ file_path_str: str, functions_tuple: tuple
396
+ ) -> tuple[dict[str, Any], ...]:
397
+ """Cached implementation of orphan function detection."""
398
+ orphans = []
399
+
400
+ for func_data in functions_tuple:
401
+ name, offset, size, codexrefs = func_data
402
+
403
+ if name.startswith("sym.imp"): # Skip imports
404
+ continue
405
+ if "main" in name or "entry" in name:
406
+ continue
407
+
408
+ # Check refs - only care about non-trivial functions
409
+ if not codexrefs and size > 50:
410
+ orphans.append(
411
+ {
412
+ "name": name,
413
+ "address": hex(offset),
414
+ "size": size,
415
+ "reason": "No code cross-references found (potential dormant code)",
416
+ }
417
+ )
418
+
419
+ return tuple(orphans)
420
+
421
+
422
+ async def _find_orphan_functions(
423
+ file_path: Path, functions: list[dict[str, Any]]
424
+ ) -> list[dict[str, Any]]:
425
+ """Identify functions with no direct XREFs (potential dead code/backdoors).
426
+
427
+ Uses caching to avoid recomputing orphan analysis for the same functions.
428
+ """
429
+ # Convert to hashable format for caching
430
+ functions_tuple = _functions_to_tuple(functions)
431
+ result = await _find_orphan_functions_cached(str(file_path), functions_tuple)
432
+ return list(result)
433
+
434
+
435
+ async def _identify_conditional_paths(
436
+ file_path: Path, functions: list[dict[str, Any]], ctx: Context = None
437
+ ) -> list[dict[str, Any]]:
438
+ """Identify functions with suspicious conditional logic (Magic Values).
439
+
440
+ Enhanced heuristics (v3.1):
441
+ - Entropy filter: Skip simple integers (0-100, powers of 2)
442
+ - Sequence detection: Flag time/env checks before cmp
443
+ - Known magic values: Prioritize known malware signatures
444
+ """
445
+ suspicious = []
446
+
447
+ # Known magic values used by malware (high confidence)
448
+ KNOWN_MAGIC_VALUES = {
449
+ 0xCAFEBABE, 0xDEADBEEF, 0xBAADF00D, 0xFEEDFACE,
450
+ 0x8BADF00D, 0xDEADC0DE, 0xC0DEBABE, 0xFACEFEED,
451
+ 0x1BADB002, # Multiboot magic
452
+ }
453
+
454
+ # Time-related API calls that often precede logic bombs
455
+ TIME_APIS = {
456
+ "gettickcount", "queryperformancecounter", "time", "getsystemtime",
457
+ "getlocaltime", "gettimeofday", "clock", "difftime", "mktime",
458
+ }
459
+
460
+ # Environment check APIs
461
+ ENV_APIS = {
462
+ "getenv", "getenvironmentvariable", "getcomputername",
463
+ "getusername", "gethostname", "isdebuggerpresent",
464
+ }
465
+
466
+ def _is_simple_value(value: int) -> bool:
467
+ """Check if value is a simple integer unlikely to be a magic value."""
468
+ # Skip small values (loop counters, array indices)
469
+ if 0 <= value <= 100:
470
+ return True
471
+ # Skip powers of 2 (common buffer sizes, flags)
472
+ if value > 0 and (value & (value - 1)) == 0:
473
+ return True
474
+ # Skip common bitmasks
475
+ if value in {0xFF, 0xFFFF, 0xFFFFFFFF, 0x7FFFFFFF}:
476
+ return True
477
+ return False
478
+
479
+ def _calculate_entropy_score(value: int) -> float:
480
+ """Calculate entropy-like score for a hex value. Higher = more suspicious."""
481
+ if value <= 0:
482
+ return 0.0
483
+ hex_str = f"{value:x}"
484
+ # Count unique hex digits
485
+ unique_chars = len(set(hex_str))
486
+ # Normalize by length
487
+ return unique_chars / max(len(hex_str), 1)
488
+
489
+ def _extract_hex_value(val_str: str) -> int | None:
490
+ """Extract integer value from hex string."""
491
+ try:
492
+ val_str = val_str.strip()
493
+ if val_str.startswith("0x"):
494
+ return int(val_str, 16)
495
+ elif val_str.isdigit():
496
+ return int(val_str)
497
+ except ValueError:
498
+ pass
499
+ return None
500
+
501
+ async def _verify_reachability_with_esil(
502
+ start_addr: int | None, target_addr: str, max_steps: int = 100
503
+ ) -> str:
504
+ """Verify if target address is reachable from start address using ESIL emulation."""
505
+ if not start_addr:
506
+ return "unconfirmed (no start address)"
507
+
508
+ # Initialize ESIL, set PC, step loops
509
+ # aei: init, aeim: init memory stack, aes: step
510
+ target_int = int(target_addr, 16)
511
+
512
+ # Emulation command:
513
+ # aei; aeim; s {start}; loop: aes; ?rip=={target} ?BREAK; ?rip==0 ?BREAK;
514
+ # This is hard to script purely in one-liner without pipes.
515
+ # Instead we use 'aetr' (trace) or simple stepping.
516
+ # Using 'aes {steps}' and checking trace is safer.
517
+
518
+ try:
519
+ # cmd = f"aei; aeim; s {start_addr}; aes {max_steps}; aer rip" # x86
520
+ # Need arch-agnostic PC check. 'aer PC' works in r2.
521
+
522
+ # Simple approach: Run N steps and check if we hit the address in trace
523
+ # 'aedt' (ESIL data trace) is too heavy.
524
+ # We'll rely on 'aes' and then check range? No.
525
+
526
+ # Let's try 'aec' (ESIL continue) until addr? 'aec' is continue until user data.
527
+ # Better: 'aesu {target}' (Step until address).
528
+
529
+ cmd = f"aei; aeim; s {start_addr}; aesu {target_addr} {max_steps}"
530
+ out = await _run_r2_cmd(file_path, cmd, timeout=10)
531
+
532
+ # Check PC
533
+ regs = await _run_r2_cmd(file_path, "aer PC", timeout=5)
534
+ try:
535
+ final_pc = int(regs.strip(), 16)
536
+ if final_pc == target_int:
537
+ return "verified (reachable)"
538
+ else:
539
+ return f"unconfirmed (stopped at {hex(final_pc)})"
540
+ except ValueError:
541
+ return "unconfirmed (register parse error)"
542
+
543
+ except Exception as e:
544
+ return f"unconfirmed (error: {str(e)})"
545
+
546
+ # Batch process functions for better performance
547
+ batch_size = 10
548
+ total_functions = len(functions)
549
+ for i in range(0, total_functions, batch_size):
550
+ if ctx:
551
+ await ctx.report_progress(60 + int((i / total_functions) * 30), 100)
552
+
553
+ batch = functions[i : i + batch_size]
554
+
555
+ # Create batch command
556
+ cmds = []
557
+ for func in batch:
558
+ addr = func.get("offset")
559
+ if addr:
560
+ cmds.append(f"pdfj @ {addr}")
561
+
562
+ if not cmds:
563
+ continue
564
+
565
+ # Execute batch command
566
+ batch_cmd = "; ".join(cmds)
567
+ try:
568
+ out = await _run_r2_cmd(file_path, batch_cmd, timeout=60)
569
+
570
+ # Parse each function's output
571
+ json_outputs = _JSON_OBJECT_PATTERN.findall(out)
572
+
573
+ for func, json_str in zip(batch, json_outputs, strict=False):
574
+ try:
575
+ func_data = json.loads(json_str)
576
+ ops = func_data.get("ops", [])
577
+ name = func.get("name")
578
+ func_start = func.get("offset")
579
+
580
+ # Track recent API calls for sequence detection
581
+ recent_api_calls: list[str] = []
582
+
583
+ for op in ops:
584
+ disasm = op.get("disasm", "").lower()
585
+
586
+ # Track call instructions for sequence detection
587
+ if "call" in disasm:
588
+ for api in TIME_APIS | ENV_APIS:
589
+ if api in disasm:
590
+ recent_api_calls.append(api)
591
+ # Keep only last 5 calls
592
+ if len(recent_api_calls) > 5:
593
+ recent_api_calls.pop(0)
594
+
595
+ # Detect cmp instructions with hex values
596
+ if "cmp" in disasm and "0x" in disasm:
597
+ args = disasm.split(",")
598
+ if len(args) > 1:
599
+ val_str = args[1].strip()
600
+ value = _extract_hex_value(val_str)
601
+
602
+ if value is None:
603
+ continue
604
+
605
+ # Skip simple values (false positive filter)
606
+ if _is_simple_value(value):
607
+ continue
608
+
609
+ # Determine suspicion level
610
+ reason = None
611
+ confidence = "low"
612
+
613
+ # High confidence: Known magic values
614
+ if value in KNOWN_MAGIC_VALUES:
615
+ reason = f"Known magic value {hex(value)} detected (high confidence)"
616
+ confidence = "high"
617
+
618
+ # Medium confidence: Time/Env API before cmp (sequence pattern)
619
+ elif any(api in TIME_APIS for api in recent_api_calls):
620
+ reason = f"Time-based trigger pattern: API call followed by cmp {hex(value)}"
621
+ confidence = "medium"
622
+ elif any(api in ENV_APIS for api in recent_api_calls):
623
+ reason = f"Environment-based trigger pattern: API call followed by cmp {hex(value)}"
624
+ confidence = "medium"
625
+
626
+ # Low confidence: High entropy value (8+ hex digits, varied chars)
627
+ elif len(val_str) >= 10: # 0x + 8 hex digits
628
+ entropy = _calculate_entropy_score(value)
629
+ if entropy > 0.5:
630
+ reason = f"High-entropy magic value {hex(value)} (entropy: {entropy:.2f})"
631
+ confidence = "low"
632
+
633
+ if reason:
634
+ # Perform ESIL verification for high/medium confidence
635
+ verification = "unconfirmed"
636
+ if confidence in ("high", "medium"):
637
+ target_offset = hex(op.get("offset", 0))
638
+ verification = await _verify_reachability_with_esil(
639
+ func_start, target_offset
640
+ )
641
+ if "reachable" in verification:
642
+ confidence = "very_high"
643
+
644
+ suspicious.append({
645
+ "function": name,
646
+ "address": hex(op.get("offset", 0)),
647
+ "instruction": op.get("disasm", ""),
648
+ "reason": reason,
649
+ "confidence": confidence,
650
+ "value": hex(value),
651
+ "verification": verification
652
+ })
653
+
654
+ except json.JSONDecodeError as e:
655
+ logger.warning(f"Failed to parse function disassembly: {e}")
656
+ continue
657
+ except Exception as e:
658
+ logger.warning(f"Failed to analyze batch: {e}")
659
+ continue
660
+
661
+ # Sort by confidence (high first)
662
+ confidence_order = {"very_high": 0, "high": 1, "medium": 2, "low": 3}
663
+ suspicious.sort(key=lambda x: confidence_order.get(x.get("confidence", "low"), 3))
664
+
665
+ return suspicious
666
+
667
+
668
+
669
+
670
+
671
+ async def _verify_hypothesis_with_emulation(
672
+ file_path: Path, function_name: str, hypothesis: dict[str, Any], timeout: int
673
+ ) -> ToolResult:
674
+ """
675
+ Verify a hypothesis using partial emulation (ESIL).
676
+
677
+ Hypothesis format:
678
+ {
679
+ "registers": {"eax": "0xCAFEBABE"},
680
+ "max_steps": 100
681
+ }
682
+ """
683
+ # Validate function name to prevent command injection
684
+ try:
685
+ validated_func = _validate_r2_identifier(function_name)
686
+ except ValidationError as e:
687
+ return failure("VALIDATION_ERROR", str(e))
688
+
689
+ # Construct ESIL script
690
+ # 1. Initialize ESIL (aei)
691
+ # 2. Initialize Stack (aeim)
692
+ # 3. Seek to function (s <func>)
693
+ # 4. Set registers (aer <reg>=<val>)
694
+ # 5. Step (aes) and trace
695
+
696
+ regs = hypothesis.get("registers", {})
697
+ max_steps = min(hypothesis.get("max_steps", 50), 1000) # Cap at 1000 for safety
698
+
699
+ cmds = [
700
+ "aaa", # Analyze
701
+ "aei", # Init ESIL
702
+ "aeim", # Init Stack
703
+ f"s {validated_func}", # Seek to function (validated)
704
+ ]
705
+
706
+ # Set registers (validate register names and values)
707
+ for reg, val in regs.items():
708
+ # OPTIMIZATION: Use pre-compiled pattern (faster)
709
+ if not _REG_PATTERN.match(reg.lower()):
710
+ logger.warning(f"Skipping invalid register name: {reg}")
711
+ continue
712
+ # OPTIMIZATION: Use pre-compiled pattern (faster)
713
+ if not _VALUE_PATTERN.match(str(val)):
714
+ logger.warning(f"Skipping invalid register value: {val}")
715
+ continue
716
+ cmds.append(f"aer {reg}={val}")
717
+
718
+ # Step and record
719
+ # We'll use a loop in r2 or just run 'aes' N times and print registers
720
+ # 'aes <steps>' runs N steps.
721
+ # We want to see if it reaches a certain state or calls a function.
722
+ # For this MVP, we'll run steps and return the final register state and visited addresses.
723
+
724
+ # Run N steps and print disassembly of current instruction
725
+ # 'aes <max_steps>; aerj' (run steps, then print registers json)
726
+ cmds.append(f"aes {max_steps}")
727
+ cmds.append("aerj") # Get registers
728
+
729
+ full_cmd = "; ".join(cmds)
730
+ output = await _run_r2_cmd(file_path, full_cmd, timeout=timeout)
731
+
732
+ # Parse result (last json is registers) with safe extraction
733
+ final_regs = _extract_json_safely(output)
734
+
735
+ if final_regs is None:
736
+ logger.error(f"Failed to parse emulation results. Output: {output[:500]}")
737
+ return failure(
738
+ "EMULATION_ERROR",
739
+ "Emulation completed but failed to parse register state. "
740
+ "The function may have crashed or radare2 output was corrupted.",
741
+ )
742
+
743
+ return success(
744
+ {
745
+ "status": "emulation_complete",
746
+ "steps_executed": max_steps,
747
+ "final_registers": final_regs,
748
+ "hypothesis_verification": "Check final_registers to see if expected state was reached.",
749
+ "raw_output_preview": output[:200] + "..." if len(output) > 200 else output,
750
+ }
751
+ )
752
+
753
+
754
+ # NOTE: Legacy aliases (ghost_trace, DormantDetectorPlugin) were removed in v1.0.0
755
+ # Use dormant_detector and MalwareToolsPlugin instead
756
+