angr 9.2.78__py3-none-win_amd64.whl → 9.2.80__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/__main__.py +59 -0
- angr/analyses/cfg/cfg_fast.py +140 -3
- angr/analyses/decompiler/ail_simplifier.py +8 -0
- angr/analyses/decompiler/block_simplifier.py +25 -5
- angr/analyses/decompiler/clinic.py +33 -19
- angr/analyses/decompiler/decompilation_options.py +9 -0
- angr/analyses/decompiler/optimization_passes/__init__.py +6 -0
- angr/analyses/decompiler/optimization_passes/engine_base.py +2 -2
- angr/analyses/decompiler/optimization_passes/ite_region_converter.py +2 -2
- angr/analyses/decompiler/optimization_passes/multi_simplifier.py +0 -12
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +8 -5
- angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +82 -12
- angr/analyses/decompiler/peephole_optimizations/__init__.py +11 -2
- angr/analyses/decompiler/peephole_optimizations/base.py +29 -2
- angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +1 -1
- angr/analyses/decompiler/peephole_optimizations/eager_eval.py +14 -2
- angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +83 -0
- angr/analyses/decompiler/peephole_optimizations/inlined_strcpy_consolidation.py +103 -0
- angr/analyses/decompiler/region_simplifiers/ifelse.py +19 -10
- angr/analyses/decompiler/region_simplifiers/region_simplifier.py +4 -2
- angr/analyses/decompiler/structured_codegen/c.py +20 -4
- angr/analyses/decompiler/utils.py +131 -2
- angr/analyses/propagator/engine_ail.py +3 -1
- angr/analyses/propagator/engine_vex.py +45 -0
- angr/analyses/propagator/propagator.py +24 -15
- angr/analyses/proximity_graph.py +30 -0
- angr/analyses/reaching_definitions/engine_ail.py +1 -1
- angr/analyses/stack_pointer_tracker.py +55 -0
- angr/callable.py +4 -4
- angr/engines/light/engine.py +30 -18
- angr/knowledge_plugins/__init__.py +1 -0
- angr/knowledge_plugins/custom_strings.py +40 -0
- angr/knowledge_plugins/functions/function.py +29 -0
- angr/knowledge_plugins/propagations/propagation_model.py +4 -0
- angr/knowledge_plugins/propagations/states.py +54 -4
- angr/lib/angr_native.dll +0 -0
- angr/procedures/definitions/__init__.py +2 -1
- angr/procedures/definitions/msvcr.py +0 -3
- angr/procedures/definitions/ntoskrnl.py +9 -0
- angr/procedures/win32_kernel/ExAllocatePool.py +12 -0
- angr/procedures/win32_kernel/ExFreePoolWithTag.py +7 -0
- angr/procedures/win32_kernel/__init__.py +3 -0
- angr/sim_type.py +3 -0
- angr/storage/memory_mixins/__init__.py +1 -1
- angr/utils/funcid.py +128 -0
- {angr-9.2.78.dist-info → angr-9.2.80.dist-info}/METADATA +6 -6
- {angr-9.2.78.dist-info → angr-9.2.80.dist-info}/RECORD +55 -45
- {angr-9.2.78.dist-info → angr-9.2.80.dist-info}/WHEEL +1 -1
- angr-9.2.80.dist-info/entry_points.txt +2 -0
- tests/analyses/cfg/test_cfgfast.py +24 -0
- tests/analyses/decompiler/test_decompiler.py +128 -0
- tests/analyses/test_constantpropagation.py +34 -0
- {angr-9.2.78.dist-info → angr-9.2.80.dist-info}/LICENSE +0 -0
- {angr-9.2.78.dist-info → angr-9.2.80.dist-info}/top_level.txt +0 -0
angr/__init__.py
CHANGED
angr/__main__.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
|
|
3
|
+
from angr.analyses.decompiler.structuring import STRUCTURER_CLASSES
|
|
4
|
+
from angr.analyses.decompiler.utils import decompile_functions
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class COMMANDS:
|
|
8
|
+
"""
|
|
9
|
+
The commands that the angr CLI supports.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
DECOMPILE = "decompile"
|
|
13
|
+
ALL_COMMANDS = [DECOMPILE]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def main():
|
|
17
|
+
parser = argparse.ArgumentParser(description="The angr CLI allows you to decompile and analyze binaries.")
|
|
18
|
+
parser.add_argument(
|
|
19
|
+
"command",
|
|
20
|
+
help="""
|
|
21
|
+
The analysis type to run on the binary. All analysis is output to stdout.""",
|
|
22
|
+
choices=COMMANDS.ALL_COMMANDS,
|
|
23
|
+
)
|
|
24
|
+
parser.add_argument("binary", help="The path to the binary to analyze.")
|
|
25
|
+
parser.add_argument(
|
|
26
|
+
"--functions",
|
|
27
|
+
help="""
|
|
28
|
+
The functions to analyze under the current command. Functions can either be expressed as names found in the
|
|
29
|
+
symbols of the binary or as addresses like: 0x401000.""",
|
|
30
|
+
nargs="+",
|
|
31
|
+
)
|
|
32
|
+
parser.add_argument(
|
|
33
|
+
"--catch-exceptions",
|
|
34
|
+
help="""
|
|
35
|
+
Catch exceptions during analysis. The scope of error handling may depend on the command used for analysis.
|
|
36
|
+
If multiple functions are specified for analysis, each function will be handled individually.""",
|
|
37
|
+
action="store_true",
|
|
38
|
+
default=False,
|
|
39
|
+
)
|
|
40
|
+
# decompilation-specific arguments
|
|
41
|
+
parser.add_argument(
|
|
42
|
+
"--structurer",
|
|
43
|
+
help="The structuring algorithm to use for decompilation.",
|
|
44
|
+
choices=STRUCTURER_CLASSES.keys(),
|
|
45
|
+
default="phoenix",
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
args = parser.parse_args()
|
|
49
|
+
if args.command == COMMANDS.DECOMPILE:
|
|
50
|
+
decompilation = decompile_functions(
|
|
51
|
+
args.binary, functions=args.functions, structurer=args.structurer, catch_errors=args.catch_exceptions
|
|
52
|
+
)
|
|
53
|
+
print(decompilation)
|
|
54
|
+
else:
|
|
55
|
+
parser.print_help()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
if __name__ == "__main__":
|
|
59
|
+
main()
|
angr/analyses/cfg/cfg_fast.py
CHANGED
|
@@ -40,6 +40,12 @@ from angr.errors import (
|
|
|
40
40
|
SimIRSBNoDecodeError,
|
|
41
41
|
)
|
|
42
42
|
from angr.utils.constants import DEFAULT_STATEMENT
|
|
43
|
+
from angr.utils.funcid import (
|
|
44
|
+
is_function_security_check_cookie,
|
|
45
|
+
is_function_security_init_cookie,
|
|
46
|
+
is_function_security_init_cookie_win8,
|
|
47
|
+
is_function_likely_security_init_cookie,
|
|
48
|
+
)
|
|
43
49
|
from angr.analyses import ForwardAnalysis
|
|
44
50
|
from .cfg_arch_options import CFGArchOptions
|
|
45
51
|
from .cfg_base import CFGBase
|
|
@@ -1617,6 +1623,8 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
1617
1623
|
if self._collect_data_ref:
|
|
1618
1624
|
self._post_process_string_references()
|
|
1619
1625
|
|
|
1626
|
+
self._rename_common_functions_and_symbols()
|
|
1627
|
+
|
|
1620
1628
|
CFGBase._post_analysis(self)
|
|
1621
1629
|
|
|
1622
1630
|
# Clean up
|
|
@@ -1653,6 +1661,69 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
1653
1661
|
else:
|
|
1654
1662
|
l.exception("Error collecting XRefs for function %#x.", f_addr, exc_info=True)
|
|
1655
1663
|
|
|
1664
|
+
def _rename_common_functions_and_symbols(self):
|
|
1665
|
+
"""
|
|
1666
|
+
This function implements logic for renaming some commonly seen functions in an architecture- and OS-specific
|
|
1667
|
+
way.
|
|
1668
|
+
"""
|
|
1669
|
+
|
|
1670
|
+
if (
|
|
1671
|
+
self.project.simos is not None
|
|
1672
|
+
and self.project.arch.name == "AMD64"
|
|
1673
|
+
and self.project.simos.name == "Win32"
|
|
1674
|
+
and isinstance(self.project.loader.main_object, cle.PE)
|
|
1675
|
+
):
|
|
1676
|
+
security_cookie_addr = self.project.loader.main_object.load_config.get("SecurityCookie", None)
|
|
1677
|
+
security_check_cookie_found = False
|
|
1678
|
+
security_init_cookie_found = False
|
|
1679
|
+
if security_cookie_addr is not None:
|
|
1680
|
+
if security_cookie_addr not in self.kb.labels:
|
|
1681
|
+
self.kb.labels[security_cookie_addr] = "_security_cookie"
|
|
1682
|
+
# identify _security_init_cookie and _security_check_cookie
|
|
1683
|
+
xrefs = self.kb.xrefs.get_xrefs_by_dst(security_cookie_addr)
|
|
1684
|
+
tested_func_addrs = set()
|
|
1685
|
+
for xref in xrefs:
|
|
1686
|
+
cfg_node = self.model.get_any_node(xref.block_addr)
|
|
1687
|
+
if cfg_node is None:
|
|
1688
|
+
continue
|
|
1689
|
+
func_addr = cfg_node.function_address
|
|
1690
|
+
if func_addr not in tested_func_addrs:
|
|
1691
|
+
func = self.kb.functions.get_by_addr(func_addr)
|
|
1692
|
+
if not security_check_cookie_found and is_function_security_check_cookie(
|
|
1693
|
+
func, self.project, security_cookie_addr
|
|
1694
|
+
):
|
|
1695
|
+
security_check_cookie_found = True
|
|
1696
|
+
func.is_default_name = False
|
|
1697
|
+
func.name = "_security_check_cookie"
|
|
1698
|
+
elif not security_init_cookie_found and is_function_security_init_cookie(
|
|
1699
|
+
func, self.project, security_cookie_addr
|
|
1700
|
+
):
|
|
1701
|
+
security_init_cookie_found = True
|
|
1702
|
+
func.is_default_name = False
|
|
1703
|
+
func.name = "_security_init_cookie"
|
|
1704
|
+
elif not security_init_cookie_found and is_function_security_init_cookie_win8(
|
|
1705
|
+
func, self.project, security_cookie_addr
|
|
1706
|
+
):
|
|
1707
|
+
security_init_cookie_found = True
|
|
1708
|
+
func.is_default_name = False
|
|
1709
|
+
func.name = "_security_init_cookie"
|
|
1710
|
+
tested_func_addrs.add(func_addr)
|
|
1711
|
+
if security_init_cookie_found and security_check_cookie_found:
|
|
1712
|
+
# both are found. exit from the loop
|
|
1713
|
+
break
|
|
1714
|
+
|
|
1715
|
+
# special handling: some binaries do not have SecurityCookie set, but still contain _security_init_cookie
|
|
1716
|
+
if security_init_cookie_found is False:
|
|
1717
|
+
start_func = self.functions.get_by_addr(self.project.entry)
|
|
1718
|
+
if start_func is not None:
|
|
1719
|
+
for callee in start_func.transition_graph:
|
|
1720
|
+
if isinstance(callee, Function):
|
|
1721
|
+
if not security_init_cookie_found and is_function_likely_security_init_cookie(callee):
|
|
1722
|
+
security_init_cookie_found = True
|
|
1723
|
+
callee.is_default_name = False
|
|
1724
|
+
callee.name = "_security_init_cookie"
|
|
1725
|
+
break
|
|
1726
|
+
|
|
1656
1727
|
def _post_process_string_references(self) -> None:
|
|
1657
1728
|
"""
|
|
1658
1729
|
Finds overlapping string references and retrofit them so that we see full strings in memory data.
|
|
@@ -2008,9 +2079,10 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
2008
2079
|
ins_addr = addr
|
|
2009
2080
|
for i, stmt in enumerate(irsb.statements):
|
|
2010
2081
|
if isinstance(stmt, pyvex.IRStmt.Exit):
|
|
2011
|
-
|
|
2012
|
-
|
|
2013
|
-
|
|
2082
|
+
branch_ins_addr = last_ins_addr if self.project.arch.branch_delay_slot else ins_addr
|
|
2083
|
+
if self._is_branch_vex_artifact_only(irsb, branch_ins_addr, stmt):
|
|
2084
|
+
continue
|
|
2085
|
+
successors.append((i, branch_ins_addr, stmt.dst, stmt.jumpkind))
|
|
2014
2086
|
elif isinstance(stmt, pyvex.IRStmt.IMark):
|
|
2015
2087
|
last_ins_addr = ins_addr
|
|
2016
2088
|
ins_addr = stmt.addr + stmt.delta
|
|
@@ -2025,6 +2097,8 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
2025
2097
|
idx_ = irsb.instruction_addresses.index(ins_addr)
|
|
2026
2098
|
if idx_ > 0:
|
|
2027
2099
|
branch_ins_addr = irsb.instruction_addresses[idx_ - 1]
|
|
2100
|
+
elif self._is_branch_vex_artifact_only(irsb, branch_ins_addr, exit_stmt):
|
|
2101
|
+
continue
|
|
2028
2102
|
successors.append((stmt_idx, branch_ins_addr, exit_stmt.dst, exit_stmt.jumpkind))
|
|
2029
2103
|
|
|
2030
2104
|
# default statement
|
|
@@ -4586,6 +4660,18 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
4586
4660
|
):
|
|
4587
4661
|
rbp_as_gpr = False
|
|
4588
4662
|
break
|
|
4663
|
+
elif (
|
|
4664
|
+
insn.mnemonic == "lea"
|
|
4665
|
+
and len(insn.operands) == 2
|
|
4666
|
+
and insn.operands[0].type == capstone.x86.X86_OP_REG
|
|
4667
|
+
and insn.operands[1].type == capstone.x86.X86_OP_MEM
|
|
4668
|
+
):
|
|
4669
|
+
if (
|
|
4670
|
+
insn.operands[0].reg == capstone.x86.X86_REG_RBP
|
|
4671
|
+
and insn.operands[1].mem.base == capstone.x86.X86_REG_RSP
|
|
4672
|
+
):
|
|
4673
|
+
rbp_as_gpr = False
|
|
4674
|
+
break
|
|
4589
4675
|
func = self.kb.functions.get_by_addr(func_addr)
|
|
4590
4676
|
func.info["bp_as_gpr"] = rbp_as_gpr
|
|
4591
4677
|
|
|
@@ -4608,6 +4694,57 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
4608
4694
|
queue.append(succ_addr)
|
|
4609
4695
|
return to_remove
|
|
4610
4696
|
|
|
4697
|
+
def _is_branch_vex_artifact_only(self, irsb, branch_ins_addr: int, exit_stmt) -> bool:
|
|
4698
|
+
"""
|
|
4699
|
+
Check if an exit is merely the result of VEX lifting. We should drop these exits.
|
|
4700
|
+
These exits point to the same instruction and do not terminate the block.
|
|
4701
|
+
|
|
4702
|
+
Example block:
|
|
4703
|
+
|
|
4704
|
+
1400061c2 lock or byte ptr [rsp], 0x0
|
|
4705
|
+
1400061c7 mov r9, r8
|
|
4706
|
+
1400061ca shr r9, 0x5
|
|
4707
|
+
1400061ce jne 0x1400060dc
|
|
4708
|
+
|
|
4709
|
+
VEX block:
|
|
4710
|
+
|
|
4711
|
+
00 | ------ IMark(0x1400061c2, 5, 0) ------
|
|
4712
|
+
01 | t3 = GET:I64(rsp)
|
|
4713
|
+
02 | t2 = LDle:I8(t3)
|
|
4714
|
+
03 | t(4,4294967295) = CASle(t3 :: (t2,None)->(t2,None))
|
|
4715
|
+
04 | t13 = CasCmpNE8(t4,t2)
|
|
4716
|
+
05 | if (t13) { PUT(rip) = 0x1400061c2; Ijk_Boring }
|
|
4717
|
+
06 | ------ IMark(0x1400061c7, 3, 0) ------
|
|
4718
|
+
07 | t15 = GET:I64(r8)
|
|
4719
|
+
08 | ------ IMark(0x1400061ca, 4, 0) ------
|
|
4720
|
+
09 | t9 = Shr64(t15,0x05)
|
|
4721
|
+
10 | t16 = Shr64(t15,0x04)
|
|
4722
|
+
11 | PUT(cc_op) = 0x0000000000000024
|
|
4723
|
+
12 | PUT(cc_dep1) = t9
|
|
4724
|
+
13 | PUT(cc_dep2) = t16
|
|
4725
|
+
14 | PUT(r9) = t9
|
|
4726
|
+
15 | PUT(rip) = 0x00000001400061ce
|
|
4727
|
+
16 | ------ IMark(0x1400061ce, 6, 0) ------
|
|
4728
|
+
17 | t29 = GET:I64(cc_ndep)
|
|
4729
|
+
18 | t30 = amd64g_calculate_condition(0x0000000000000004,0x0000000000000024,t9,t16,t29):Ity_I64
|
|
4730
|
+
19 | t25 = 64to1(t30)
|
|
4731
|
+
20 | if (t25) { PUT(rip) = 0x1400061d4; Ijk_Boring }
|
|
4732
|
+
NEXT: PUT(rip) = 0x00000001400060dc; Ijk_Boring
|
|
4733
|
+
|
|
4734
|
+
Statement 5 should not introduce a new exit in the CFG.
|
|
4735
|
+
"""
|
|
4736
|
+
|
|
4737
|
+
if (
|
|
4738
|
+
not self.project.arch.branch_delay_slot
|
|
4739
|
+
and irsb.instruction_addresses
|
|
4740
|
+
and branch_ins_addr != irsb.instruction_addresses[-1]
|
|
4741
|
+
and isinstance(exit_stmt.dst, pyvex.const.IRConst)
|
|
4742
|
+
and exit_stmt.dst.value == branch_ins_addr
|
|
4743
|
+
and exit_stmt.jumpkind == "Ijk_Boring"
|
|
4744
|
+
):
|
|
4745
|
+
return True
|
|
4746
|
+
return False
|
|
4747
|
+
|
|
4611
4748
|
def _remove_jobs_by_source_node_addr(self, addr: int):
|
|
4612
4749
|
self._remove_job(lambda j: j.src_node is not None and j.src_node.addr == addr)
|
|
4613
4750
|
|
|
@@ -1044,6 +1044,14 @@ class AILSimplifier(Analysis):
|
|
|
1044
1044
|
if u.block_addr not in {b.addr for b in super_node_blocks}:
|
|
1045
1045
|
continue
|
|
1046
1046
|
|
|
1047
|
+
# check if the register has been overwritten by statements in between the def site and the use site
|
|
1048
|
+
usesite_atom_defs = set(rd.get_defs(the_def.atom, u, OP_BEFORE))
|
|
1049
|
+
if len(usesite_atom_defs) != 1:
|
|
1050
|
+
continue
|
|
1051
|
+
usesite_atom_def = next(iter(usesite_atom_defs))
|
|
1052
|
+
if usesite_atom_def != the_def:
|
|
1053
|
+
continue
|
|
1054
|
+
|
|
1047
1055
|
# check if any atoms that the call relies on has been overwritten by statements in between the def site
|
|
1048
1056
|
# and the use site.
|
|
1049
1057
|
defsite_all_expr_uses = set(rd.all_uses.get_uses_by_location(the_def.codeloc))
|
|
@@ -15,8 +15,15 @@ from ...analyses.propagator import PropagatorAnalysis
|
|
|
15
15
|
from ...analyses.reaching_definitions import ReachingDefinitionsAnalysis
|
|
16
16
|
from ...errors import SimMemoryMissingError
|
|
17
17
|
from .. import Analysis, register_analysis
|
|
18
|
-
from .peephole_optimizations import
|
|
19
|
-
|
|
18
|
+
from .peephole_optimizations import (
|
|
19
|
+
MULTI_STMT_OPTS,
|
|
20
|
+
STMT_OPTS,
|
|
21
|
+
EXPR_OPTS,
|
|
22
|
+
PeepholeOptimizationStmtBase,
|
|
23
|
+
PeepholeOptimizationExprBase,
|
|
24
|
+
PeepholeOptimizationMultiStmtBase,
|
|
25
|
+
)
|
|
26
|
+
from .utils import peephole_optimize_exprs, peephole_optimize_stmts, peephole_optimize_multistmts
|
|
20
27
|
|
|
21
28
|
if TYPE_CHECKING:
|
|
22
29
|
from angr.storage.memory_mixins.paged_memory.pages.multi_values import MultiValues
|
|
@@ -78,6 +85,7 @@ class BlockSimplifier(Analysis):
|
|
|
78
85
|
if peephole_optimizations is None:
|
|
79
86
|
self._expr_peephole_opts = [cls(self.project, self.kb, self.func_addr) for cls in EXPR_OPTS]
|
|
80
87
|
self._stmt_peephole_opts = [cls(self.project, self.kb, self.func_addr) for cls in STMT_OPTS]
|
|
88
|
+
self._multistmt_peephole_opts = [cls(self.project, self.kb, self.func_addr) for cls in MULTI_STMT_OPTS]
|
|
81
89
|
else:
|
|
82
90
|
self._expr_peephole_opts = [
|
|
83
91
|
cls(self.project, self.kb, self.func_addr)
|
|
@@ -89,6 +97,11 @@ class BlockSimplifier(Analysis):
|
|
|
89
97
|
for cls in peephole_optimizations
|
|
90
98
|
if issubclass(cls, PeepholeOptimizationStmtBase)
|
|
91
99
|
]
|
|
100
|
+
self._multistmt_peephole_opts = [
|
|
101
|
+
cls(self.project, self.kb, self.func_addr)
|
|
102
|
+
for cls in peephole_optimizations
|
|
103
|
+
if issubclass(cls, PeepholeOptimizationMultiStmtBase)
|
|
104
|
+
]
|
|
92
105
|
|
|
93
106
|
self.result_block = None
|
|
94
107
|
|
|
@@ -404,9 +417,16 @@ class BlockSimplifier(Analysis):
|
|
|
404
417
|
# run statement-level optimizations
|
|
405
418
|
statements, stmts_updated = peephole_optimize_stmts(block, self._stmt_peephole_opts)
|
|
406
419
|
|
|
407
|
-
if
|
|
408
|
-
|
|
409
|
-
|
|
420
|
+
if stmts_updated:
|
|
421
|
+
new_block = block.copy(statements=statements)
|
|
422
|
+
else:
|
|
423
|
+
new_block = block
|
|
424
|
+
|
|
425
|
+
statements, multi_stmts_updated = peephole_optimize_multistmts(new_block, self._multistmt_peephole_opts)
|
|
426
|
+
|
|
427
|
+
if not multi_stmts_updated:
|
|
428
|
+
return new_block
|
|
429
|
+
new_block = new_block.copy(statements=statements)
|
|
410
430
|
return new_block
|
|
411
431
|
|
|
412
432
|
|
|
@@ -20,6 +20,7 @@ from ...sim_type import (
|
|
|
20
20
|
SimTypeFunction,
|
|
21
21
|
SimTypeBottom,
|
|
22
22
|
SimTypeFloat,
|
|
23
|
+
SimTypePointer,
|
|
23
24
|
)
|
|
24
25
|
from ...sim_variable import SimVariable, SimStackVariable, SimRegisterVariable, SimMemoryVariable
|
|
25
26
|
from ...knowledge_plugins.key_definitions.constants import OP_BEFORE
|
|
@@ -259,6 +260,10 @@ class Clinic(Analysis):
|
|
|
259
260
|
self._update_progress(50.0, text="Making callsites")
|
|
260
261
|
_, stackarg_offsets = self._make_callsites(ail_graph, stack_pointer_tracker=spt)
|
|
261
262
|
|
|
263
|
+
# Run simplification passes
|
|
264
|
+
self._update_progress(65.0, text="Running simplifications 2")
|
|
265
|
+
ail_graph = self._run_simplification_passes(ail_graph, stage=OptimizationPassStage.AFTER_MAKING_CALLSITES)
|
|
266
|
+
|
|
262
267
|
# Simplify the entire function for the second time
|
|
263
268
|
self._update_progress(55.0, text="Simplifying function 2")
|
|
264
269
|
self._simplify_function(
|
|
@@ -281,7 +286,7 @@ class Clinic(Analysis):
|
|
|
281
286
|
)
|
|
282
287
|
|
|
283
288
|
# Run simplification passes
|
|
284
|
-
self._update_progress(65.0, text="Running simplifications
|
|
289
|
+
self._update_progress(65.0, text="Running simplifications 3 ")
|
|
285
290
|
ail_graph = self._run_simplification_passes(ail_graph, stage=OptimizationPassStage.AFTER_GLOBAL_SIMPLIFICATION)
|
|
286
291
|
|
|
287
292
|
# Simplify the entire function for the third time
|
|
@@ -316,7 +321,7 @@ class Clinic(Analysis):
|
|
|
316
321
|
self._make_function_prototype(arg_list, variable_kb)
|
|
317
322
|
|
|
318
323
|
# Run simplification passes
|
|
319
|
-
self._update_progress(95.0, text="Running simplifications
|
|
324
|
+
self._update_progress(95.0, text="Running simplifications 4")
|
|
320
325
|
ail_graph = self._run_simplification_passes(
|
|
321
326
|
ail_graph, stage=OptimizationPassStage.AFTER_VARIABLE_RECOVERY, variable_kb=variable_kb
|
|
322
327
|
)
|
|
@@ -1247,23 +1252,32 @@ class Clinic(Analysis):
|
|
|
1247
1252
|
expr.variable_offset = offset
|
|
1248
1253
|
|
|
1249
1254
|
elif isinstance(expr, ailment.Expr.Const):
|
|
1250
|
-
#
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1255
|
+
# custom string?
|
|
1256
|
+
if hasattr(expr, "custom_string") and expr.custom_string is True:
|
|
1257
|
+
s = self.kb.custom_strings[expr.value]
|
|
1258
|
+
expr.tags["reference_values"] = {
|
|
1259
|
+
SimTypePointer(SimTypeChar().with_arch(self.project.arch)).with_arch(self.project.arch): s.decode(
|
|
1260
|
+
"ascii"
|
|
1261
|
+
),
|
|
1262
|
+
}
|
|
1263
|
+
else:
|
|
1264
|
+
# global variable?
|
|
1265
|
+
global_vars = global_variables.get_global_variables(expr.value)
|
|
1266
|
+
if not global_vars:
|
|
1267
|
+
# detect if there is a related symbol
|
|
1268
|
+
if self.project.loader.find_object_containing(expr.value):
|
|
1269
|
+
symbol = self.project.loader.find_symbol(expr.value)
|
|
1270
|
+
if symbol is not None:
|
|
1271
|
+
# Create a new global variable if there isn't one already
|
|
1272
|
+
global_vars = global_variables.get_global_variables(symbol.rebased_addr)
|
|
1273
|
+
if not global_vars:
|
|
1274
|
+
global_var = SimMemoryVariable(symbol.rebased_addr, symbol.size, name=symbol.name)
|
|
1275
|
+
global_variables.add_variable("global", global_var.addr, global_var)
|
|
1276
|
+
global_vars = {global_var}
|
|
1277
|
+
if global_vars:
|
|
1278
|
+
global_var = next(iter(global_vars))
|
|
1279
|
+
expr.tags["reference_variable"] = global_var
|
|
1280
|
+
expr.tags["reference_variable_offset"] = 0
|
|
1267
1281
|
|
|
1268
1282
|
elif isinstance(expr, ailment.Stmt.Call):
|
|
1269
1283
|
self._link_variables_on_call(variable_manager, global_variables, block, stmt_idx, expr, is_expr=True)
|
|
@@ -97,6 +97,15 @@ options = [
|
|
|
97
97
|
category="Graph",
|
|
98
98
|
default_value=True,
|
|
99
99
|
),
|
|
100
|
+
O(
|
|
101
|
+
"Simplify if-else to remove terminating else scopes",
|
|
102
|
+
"Removes terminating else scopes to make the code appear more flat.",
|
|
103
|
+
bool,
|
|
104
|
+
"region_simplifier",
|
|
105
|
+
"simplify_ifelse",
|
|
106
|
+
category="Graph",
|
|
107
|
+
default_value=True,
|
|
108
|
+
),
|
|
100
109
|
O(
|
|
101
110
|
"Show casts",
|
|
102
111
|
"Disabling this option will blindly remove all C typecast constructs from pseudocode output.",
|
|
@@ -50,6 +50,8 @@ def get_optimization_passes(arch, platform):
|
|
|
50
50
|
|
|
51
51
|
if platform is not None:
|
|
52
52
|
platform = platform.lower()
|
|
53
|
+
if platform == "win32":
|
|
54
|
+
platform = "windows" # sigh
|
|
53
55
|
|
|
54
56
|
passes = []
|
|
55
57
|
for pass_, _ in _all_optimization_passes:
|
|
@@ -80,3 +82,7 @@ def get_default_optimization_passes(arch: Union[Arch, str], platform: Optional[s
|
|
|
80
82
|
passes.append(pass_)
|
|
81
83
|
|
|
82
84
|
return passes
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def register_optimization_pass(opt_pass, enable_by_default: bool):
|
|
88
|
+
_all_optimization_passes.append((opt_pass, enable_by_default))
|
|
@@ -89,7 +89,7 @@ class SimplifierAILEngine(
|
|
|
89
89
|
if hasattr(self, handler):
|
|
90
90
|
return getattr(self, handler)(stmt)
|
|
91
91
|
else:
|
|
92
|
-
_l.
|
|
92
|
+
_l.debug("Unsupported statement type %s.", type(stmt).__name__)
|
|
93
93
|
return stmt
|
|
94
94
|
|
|
95
95
|
def _ail_handle_Assignment(self, stmt):
|
|
@@ -176,7 +176,7 @@ class SimplifierAILEngine(
|
|
|
176
176
|
if v is None:
|
|
177
177
|
return expr
|
|
178
178
|
return v
|
|
179
|
-
_l.
|
|
179
|
+
_l.debug("Unsupported expression type %s.", type(expr).__name__)
|
|
180
180
|
return expr
|
|
181
181
|
|
|
182
182
|
def _ail_handle_StackBaseOffset(self, expr): # pylint:disable=no-self-use
|
|
@@ -84,9 +84,9 @@ class ITERegionConverter(OptimizationPass):
|
|
|
84
84
|
|
|
85
85
|
true_child, false_child = None, None
|
|
86
86
|
for child in children:
|
|
87
|
-
if child.addr == if_stmt.true_target.value:
|
|
87
|
+
if if_stmt.true_target is not None and child.addr == if_stmt.true_target.value:
|
|
88
88
|
true_child = child
|
|
89
|
-
elif child.addr == if_stmt.false_target.value:
|
|
89
|
+
elif if_stmt.false_target is not None and child.addr == if_stmt.false_target.value:
|
|
90
90
|
false_child = child
|
|
91
91
|
|
|
92
92
|
if (
|
|
@@ -184,18 +184,6 @@ class MultiSimplifierAILEngine(SimplifierAILEngine):
|
|
|
184
184
|
new_const = Expr.Const(const_.idx, None, const_.value * const_x0.value, const_.bits)
|
|
185
185
|
new_expr = Expr.BinaryOp(expr.idx, "Mul", [x, new_const], expr.signed, **expr.tags)
|
|
186
186
|
return new_expr
|
|
187
|
-
elif (
|
|
188
|
-
isinstance(operand_0, Expr.Convert)
|
|
189
|
-
and isinstance(operand_0.operand, Expr.BinaryOp)
|
|
190
|
-
and operand_0.operand.op == "Mul"
|
|
191
|
-
and isinstance(operand_0.operand.operands[1], Expr.Const)
|
|
192
|
-
):
|
|
193
|
-
x = operand_0.operand.operands[0]
|
|
194
|
-
new_const = Expr.Const(
|
|
195
|
-
operand_1.idx, None, operand_1.value * operand_0.operand.operands[1].value, operand_1.bits
|
|
196
|
-
)
|
|
197
|
-
new_expr = Expr.BinaryOp(expr.idx, "Mul", [x, new_const], expr.signed, **expr.tags)
|
|
198
|
-
return new_expr
|
|
199
187
|
|
|
200
188
|
if (operand_0, operand_1) != (expr.operands[0], expr.operands[1]):
|
|
201
189
|
return Expr.BinaryOp(expr.idx, "Mul", [operand_0, operand_1], expr.signed, **expr.tags)
|
|
@@ -35,11 +35,12 @@ class OptimizationPassStage(Enum):
|
|
|
35
35
|
|
|
36
36
|
AFTER_AIL_GRAPH_CREATION = 0
|
|
37
37
|
AFTER_SINGLE_BLOCK_SIMPLIFICATION = 1
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
38
|
+
AFTER_MAKING_CALLSITES = 2
|
|
39
|
+
AFTER_GLOBAL_SIMPLIFICATION = 3
|
|
40
|
+
AFTER_VARIABLE_RECOVERY = 4
|
|
41
|
+
BEFORE_REGION_IDENTIFICATION = 5
|
|
42
|
+
DURING_REGION_IDENTIFICATION = 6
|
|
43
|
+
AFTER_STRUCTURING = 7
|
|
43
44
|
|
|
44
45
|
|
|
45
46
|
class BaseOptimizationPass:
|
|
@@ -53,6 +54,8 @@ class BaseOptimizationPass:
|
|
|
53
54
|
STRUCTURING: Optional[
|
|
54
55
|
str
|
|
55
56
|
] = None # specifies if this optimization pass is specific to a certain structuring algorithm
|
|
57
|
+
NAME = "N/A"
|
|
58
|
+
DESCRIPTION = "N/A"
|
|
56
59
|
|
|
57
60
|
def __init__(self, func):
|
|
58
61
|
self._func: "Function" = func
|