angr 9.2.97__py3-none-macosx_10_9_x86_64.whl → 9.2.99__py3-none-macosx_10_9_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/cfg/cfg_base.py +14 -1
- angr/analyses/cfg/cfg_fast.py +3 -3
- angr/analyses/cfg/indirect_jump_resolvers/propagator_utils.py +10 -6
- angr/analyses/decompiler/clinic.py +2 -40
- angr/analyses/decompiler/optimization_passes/__init__.py +2 -0
- angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +380 -0
- angr/analyses/decompiler/optimization_passes/ite_region_converter.py +10 -2
- angr/analyses/decompiler/optimization_passes/x86_gcc_getpc_simplifier.py +4 -1
- angr/analyses/decompiler/peephole_optimizations/__init__.py +1 -0
- angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +71 -3
- angr/analyses/decompiler/peephole_optimizations/inlined_wstrcpy.py +162 -0
- angr/analyses/decompiler/region_simplifiers/expr_folding.py +5 -3
- angr/analyses/decompiler/return_maker.py +71 -0
- angr/analyses/decompiler/structured_codegen/__init__.py +1 -1
- angr/analyses/decompiler/structured_codegen/c.py +72 -99
- angr/analyses/decompiler/utils.py +5 -1
- angr/analyses/propagator/engine_vex.py +15 -0
- angr/analyses/reaching_definitions/engine_vex.py +6 -0
- angr/analyses/variable_recovery/engine_vex.py +6 -0
- angr/analyses/variable_recovery/irsb_scanner.py +12 -0
- angr/engines/light/engine.py +126 -15
- angr/knowledge_plugins/functions/function.py +4 -0
- angr/lib/angr_native.dylib +0 -0
- angr/storage/memory_mixins/paged_memory/pages/list_page.py +20 -5
- angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +2 -1
- angr/storage/memory_mixins/simple_interface_mixin.py +4 -0
- {angr-9.2.97.dist-info → angr-9.2.99.dist-info}/METADATA +6 -6
- {angr-9.2.97.dist-info → angr-9.2.99.dist-info}/RECORD +33 -30
- {angr-9.2.97.dist-info → angr-9.2.99.dist-info}/LICENSE +0 -0
- {angr-9.2.97.dist-info → angr-9.2.99.dist-info}/WHEEL +0 -0
- {angr-9.2.97.dist-info → angr-9.2.99.dist-info}/entry_points.txt +0 -0
- {angr-9.2.97.dist-info → angr-9.2.99.dist-info}/top_level.txt +0 -0
angr/__init__.py
CHANGED
angr/analyses/cfg/cfg_base.py
CHANGED
|
@@ -8,7 +8,7 @@ from sortedcontainers import SortedDict
|
|
|
8
8
|
|
|
9
9
|
import pyvex
|
|
10
10
|
from claripy.utils.orderedset import OrderedSet
|
|
11
|
-
from cle import ELF, PE, Blob, TLSObject, MachO, ExternObject, KernelObject, FunctionHintSource, Hex, Coff, SRec
|
|
11
|
+
from cle import ELF, PE, Blob, TLSObject, MachO, ExternObject, KernelObject, FunctionHintSource, Hex, Coff, SRec, XBE
|
|
12
12
|
from cle.backends import NamedRegion
|
|
13
13
|
import archinfo
|
|
14
14
|
from archinfo.arch_soot import SootAddressDescriptor
|
|
@@ -778,6 +778,17 @@ class CFGBase(Analysis):
|
|
|
778
778
|
tpl = (section.min_addr, section.max_addr + 1)
|
|
779
779
|
memory_regions.append(tpl)
|
|
780
780
|
|
|
781
|
+
elif isinstance(b, XBE):
|
|
782
|
+
# some XBE files will mark the data sections as executable
|
|
783
|
+
for section in b.sections:
|
|
784
|
+
if (
|
|
785
|
+
section.is_executable
|
|
786
|
+
and not section.is_writable
|
|
787
|
+
and section.name not in {".data", ".rdata", ".rodata"}
|
|
788
|
+
):
|
|
789
|
+
tpl = (section.min_addr, section.max_addr + 1)
|
|
790
|
+
memory_regions.append(tpl)
|
|
791
|
+
|
|
781
792
|
elif isinstance(b, MachO):
|
|
782
793
|
if b.segments:
|
|
783
794
|
# Get all executable segments
|
|
@@ -797,9 +808,11 @@ class CFGBase(Analysis):
|
|
|
797
808
|
# a blob is entirely executable
|
|
798
809
|
tpl = (b.min_addr, b.max_addr + 1)
|
|
799
810
|
memory_regions.append(tpl)
|
|
811
|
+
|
|
800
812
|
elif isinstance(b, NamedRegion):
|
|
801
813
|
# NamedRegions have no content! Ignore
|
|
802
814
|
pass
|
|
815
|
+
|
|
803
816
|
elif isinstance(b, self._cle_pseudo_objects):
|
|
804
817
|
pass
|
|
805
818
|
|
angr/analyses/cfg/cfg_fast.py
CHANGED
|
@@ -3287,7 +3287,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
3287
3287
|
|
|
3288
3288
|
removed_nodes = set()
|
|
3289
3289
|
|
|
3290
|
-
a = None # it always
|
|
3290
|
+
a = None # it always holds the very recent non-removed node
|
|
3291
3291
|
is_arm = is_arm_arch(self.project.arch)
|
|
3292
3292
|
|
|
3293
3293
|
for i in range(len(sorted_nodes)): # pylint:disable=consider-using-enumerate
|
|
@@ -3341,7 +3341,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
3341
3341
|
# but somehow we thought b is the beginning
|
|
3342
3342
|
if a.addr + a.size == b.addr + b.size:
|
|
3343
3343
|
in_edges = len([_ for _, _, data in self.graph.in_edges([b], data=True)])
|
|
3344
|
-
if in_edges == 0:
|
|
3344
|
+
if in_edges == 0 and b in self.graph:
|
|
3345
3345
|
# we use node a to replace node b
|
|
3346
3346
|
# link all successors of b to a
|
|
3347
3347
|
for _, dst, data in self.graph.out_edges([b], data=True):
|
|
@@ -3360,7 +3360,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
3360
3360
|
|
|
3361
3361
|
# next case - if b is directly from function prologue detection, or a basic block that is a successor of
|
|
3362
3362
|
# a wrongly identified basic block, we might be totally misdecoding b
|
|
3363
|
-
if b.instruction_addrs[0] not in a.instruction_addrs:
|
|
3363
|
+
if b.instruction_addrs[0] not in a.instruction_addrs and b in self.graph:
|
|
3364
3364
|
# use a, truncate b
|
|
3365
3365
|
|
|
3366
3366
|
new_b_addr = a.addr + a.size # b starts right after a terminates
|
|
@@ -13,10 +13,14 @@ class PropagatorLoadCallback:
|
|
|
13
13
|
# only allow loading if the address falls into a read-only region
|
|
14
14
|
if isinstance(addr, claripy.ast.BV) and addr.op == "BVV":
|
|
15
15
|
addr_v = addr.args[0]
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
16
|
+
elif isinstance(addr, int):
|
|
17
|
+
addr_v = addr
|
|
18
|
+
else:
|
|
19
|
+
return False
|
|
20
|
+
section = self.project.loader.find_section_containing(addr_v)
|
|
21
|
+
if section is not None:
|
|
22
|
+
return section.is_readable and not section.is_writable
|
|
23
|
+
segment = self.project.loader.find_segment_containing(addr_v)
|
|
24
|
+
if segment is not None:
|
|
25
|
+
return segment.is_readable and not segment.is_writable
|
|
22
26
|
return False
|
|
@@ -33,6 +33,7 @@ from ...procedures.stubs.UnresolvableJumpTarget import UnresolvableJumpTarget
|
|
|
33
33
|
from .. import Analysis, register_analysis
|
|
34
34
|
from ..cfg.cfg_base import CFGBase
|
|
35
35
|
from ..reaching_definitions import ReachingDefinitionsAnalysis
|
|
36
|
+
from .return_maker import ReturnMaker
|
|
36
37
|
from .ailgraph_walker import AILGraphWalker, RemoveNodeNotice
|
|
37
38
|
from .optimization_passes import (
|
|
38
39
|
get_default_optimization_passes,
|
|
@@ -1054,46 +1055,7 @@ class Clinic(Analysis):
|
|
|
1054
1055
|
# unknown calling convention. cannot do much about return expressions.
|
|
1055
1056
|
return ail_graph
|
|
1056
1057
|
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
def _handle_Return(
|
|
1060
|
-
stmt_idx: int, stmt: ailment.Stmt.Return, block: Optional[ailment.Block]
|
|
1061
|
-
): # pylint:disable=unused-argument
|
|
1062
|
-
if (
|
|
1063
|
-
block is not None
|
|
1064
|
-
and not stmt.ret_exprs
|
|
1065
|
-
and self.function.prototype is not None
|
|
1066
|
-
and self.function.prototype.returnty is not None
|
|
1067
|
-
and type(self.function.prototype.returnty) is not SimTypeBottom
|
|
1068
|
-
):
|
|
1069
|
-
new_stmt = stmt.copy()
|
|
1070
|
-
ret_val = self.function.calling_convention.return_val(self.function.prototype.returnty)
|
|
1071
|
-
if isinstance(ret_val, SimRegArg):
|
|
1072
|
-
reg = self.project.arch.registers[ret_val.reg_name]
|
|
1073
|
-
new_stmt.ret_exprs.append(
|
|
1074
|
-
ailment.Expr.Register(
|
|
1075
|
-
self._next_atom(),
|
|
1076
|
-
None,
|
|
1077
|
-
reg[0],
|
|
1078
|
-
ret_val.size * self.project.arch.byte_width,
|
|
1079
|
-
reg_name=self.project.arch.translate_register_name(reg[0], ret_val.size),
|
|
1080
|
-
)
|
|
1081
|
-
)
|
|
1082
|
-
else:
|
|
1083
|
-
l.warning("Unsupported type of return expression %s.", type(ret_val))
|
|
1084
|
-
block.statements[stmt_idx] = new_stmt
|
|
1085
|
-
|
|
1086
|
-
def _handler(block):
|
|
1087
|
-
walker = ailment.AILBlockWalker()
|
|
1088
|
-
# we don't need to handle any statement besides Returns
|
|
1089
|
-
walker.stmt_handlers.clear()
|
|
1090
|
-
walker.expr_handlers.clear()
|
|
1091
|
-
walker.stmt_handlers[ailment.Stmt.Return] = _handle_Return
|
|
1092
|
-
walker.walk(block)
|
|
1093
|
-
|
|
1094
|
-
# Graph walker
|
|
1095
|
-
|
|
1096
|
-
AILGraphWalker(ail_graph, _handler, replace_nodes=True).walk()
|
|
1058
|
+
ReturnMaker(self._ail_manager, self.project.arch, self.function, ail_graph)
|
|
1097
1059
|
|
|
1098
1060
|
return ail_graph
|
|
1099
1061
|
|
|
@@ -25,6 +25,7 @@ from .win_stack_canary_simplifier import WinStackCanarySimplifier
|
|
|
25
25
|
from .cross_jump_reverter import CrossJumpReverter
|
|
26
26
|
from .code_motion import CodeMotionOptimization
|
|
27
27
|
from .switch_default_case_duplicator import SwitchDefaultCaseDuplicator
|
|
28
|
+
from .inlined_string_transformation_simplifier import InlinedStringTransformationSimplifier
|
|
28
29
|
|
|
29
30
|
# order matters!
|
|
30
31
|
_all_optimization_passes = [
|
|
@@ -49,6 +50,7 @@ _all_optimization_passes = [
|
|
|
49
50
|
(CodeMotionOptimization, True),
|
|
50
51
|
(CrossJumpReverter, True),
|
|
51
52
|
(FlipBooleanCmp, True),
|
|
53
|
+
(InlinedStringTransformationSimplifier, True),
|
|
52
54
|
]
|
|
53
55
|
|
|
54
56
|
# these passes may duplicate code to remove gotos or improve the structure of the graph
|
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
# pylint:disable=arguments-renamed,too-many-boolean-expressions,no-self-use
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from typing import Any, DefaultDict
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
|
|
6
|
+
from archinfo import Endness
|
|
7
|
+
from ailment.expression import Const, Register, Load, StackBaseOffset, Convert, BinaryOp
|
|
8
|
+
from ailment.statement import Store, ConditionalJump, Jump
|
|
9
|
+
import claripy
|
|
10
|
+
|
|
11
|
+
from angr.engines.light import SimEngineLightAILMixin
|
|
12
|
+
from angr.storage.memory_mixins import (
|
|
13
|
+
SimpleInterfaceMixin,
|
|
14
|
+
DefaultFillerMixin,
|
|
15
|
+
PagedMemoryMixin,
|
|
16
|
+
UltraPagesMixin,
|
|
17
|
+
)
|
|
18
|
+
from angr.code_location import CodeLocation
|
|
19
|
+
from angr.errors import SimMemoryMissingError
|
|
20
|
+
from .optimization_pass import OptimizationPass, OptimizationPassStage
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class FasterMemory(
|
|
24
|
+
SimpleInterfaceMixin,
|
|
25
|
+
DefaultFillerMixin,
|
|
26
|
+
UltraPagesMixin,
|
|
27
|
+
PagedMemoryMixin,
|
|
28
|
+
):
|
|
29
|
+
"""
|
|
30
|
+
A fast memory model used in InlinedStringTransformationState.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class InlinedStringTransformationState:
|
|
35
|
+
"""
|
|
36
|
+
The abstract state used in InlinedStringTransformationAILEngine.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(self, project):
|
|
40
|
+
self.arch = project.arch
|
|
41
|
+
self.project = project
|
|
42
|
+
|
|
43
|
+
self.registers = FasterMemory(memory_id="reg")
|
|
44
|
+
self.memory = FasterMemory(memory_id="mem")
|
|
45
|
+
|
|
46
|
+
self.registers.set_state(self)
|
|
47
|
+
self.memory.set_state(self)
|
|
48
|
+
|
|
49
|
+
def _get_weakref(self):
|
|
50
|
+
return self
|
|
51
|
+
|
|
52
|
+
def reg_store(self, reg: Register, value: claripy.Bits) -> None:
|
|
53
|
+
self.registers.store(
|
|
54
|
+
reg.reg_offset, value, size=value.size() // self.arch.byte_width, endness=str(self.arch.register_endness)
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
def reg_load(self, reg: Register) -> claripy.Bits | None:
|
|
58
|
+
try:
|
|
59
|
+
return self.registers.load(
|
|
60
|
+
reg.reg_offset, size=reg.size, endness=self.arch.register_endness, fill_missing=False
|
|
61
|
+
)
|
|
62
|
+
except SimMemoryMissingError:
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
def mem_store(self, addr: int, value: claripy.Bits, endness: str) -> None:
|
|
66
|
+
self.memory.store(addr, value, size=value.size() // self.arch.byte_width, endness=endness)
|
|
67
|
+
|
|
68
|
+
def mem_load(self, addr: int, size: int, endness) -> claripy.Bits | None:
|
|
69
|
+
try:
|
|
70
|
+
return self.memory.load(addr, size=size, endness=str(endness), fill_missing=False)
|
|
71
|
+
except SimMemoryMissingError:
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class InlinedStringTransformationAILEngine(SimEngineLightAILMixin):
|
|
76
|
+
"""
|
|
77
|
+
A simple AIL execution engine
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def __init__(self, project, nodes: dict[int, Any], start: int, end: int, step_limit: int):
|
|
81
|
+
super().__init__()
|
|
82
|
+
|
|
83
|
+
self.arch = project.arch
|
|
84
|
+
self.nodes: dict[int, Any] = nodes
|
|
85
|
+
self.start: int = start
|
|
86
|
+
self.end: int = end
|
|
87
|
+
self.step_limit: int = step_limit
|
|
88
|
+
|
|
89
|
+
self.STACK_BASE = 0x7FFF_FFF0 if self.arch.bits == 32 else 0x7FFF_FFFF_F000
|
|
90
|
+
self.MASK = 0xFFFF_FFFF if self.arch.bits == 32 else 0xFFFF_FFFF_FFFF_FFFF
|
|
91
|
+
|
|
92
|
+
state = InlinedStringTransformationState(project)
|
|
93
|
+
self.stack_accesses: DefaultDict[int, list[tuple[str, CodeLocation, claripy.Bits]]] = defaultdict(list)
|
|
94
|
+
self.finished: bool = False
|
|
95
|
+
|
|
96
|
+
i = 0
|
|
97
|
+
self.pc = self.start
|
|
98
|
+
while i < self.step_limit:
|
|
99
|
+
if self.pc not in self.nodes:
|
|
100
|
+
# jumped to a node that we do not know about
|
|
101
|
+
break
|
|
102
|
+
block = self.nodes[self.pc]
|
|
103
|
+
self._process(state, None, block=block)
|
|
104
|
+
if self.pc is None:
|
|
105
|
+
# not sure where to jump...
|
|
106
|
+
break
|
|
107
|
+
if self.pc == self.end:
|
|
108
|
+
# we reach the end of execution!
|
|
109
|
+
self.finished = True
|
|
110
|
+
break
|
|
111
|
+
i += 1
|
|
112
|
+
|
|
113
|
+
def _process_address(self, addr: Const | StackBaseOffset) -> tuple[int, str] | None:
|
|
114
|
+
if isinstance(addr, Const):
|
|
115
|
+
return addr.value, "mem"
|
|
116
|
+
if isinstance(addr, StackBaseOffset):
|
|
117
|
+
return (addr.offset + self.STACK_BASE) & self.MASK, "stack"
|
|
118
|
+
if isinstance(addr, BinaryOp) and isinstance(addr.operands[0], StackBaseOffset):
|
|
119
|
+
v0_and_type = self._process_address(addr.operands[0])
|
|
120
|
+
if v0_and_type is not None:
|
|
121
|
+
v0 = v0_and_type[0]
|
|
122
|
+
v1 = self._expr(addr.operands[1])
|
|
123
|
+
if isinstance(v1, claripy.Bits) and v1.concrete:
|
|
124
|
+
return (v0 + v1.concrete_value) & self.MASK, "stack"
|
|
125
|
+
return None
|
|
126
|
+
|
|
127
|
+
def _handle_Assignment(self, stmt):
|
|
128
|
+
if isinstance(stmt.dst, Register):
|
|
129
|
+
val = self._expr(stmt.src)
|
|
130
|
+
if isinstance(val, claripy.Bits):
|
|
131
|
+
self.state.reg_store(stmt.dst, val)
|
|
132
|
+
|
|
133
|
+
def _handle_Store(self, stmt):
|
|
134
|
+
addr_and_type = self._process_address(stmt.addr)
|
|
135
|
+
if addr_and_type is not None:
|
|
136
|
+
addr, addr_type = addr_and_type
|
|
137
|
+
val = self._expr(stmt.data)
|
|
138
|
+
if isinstance(val, claripy.ast.BV):
|
|
139
|
+
self.state.mem_store(addr, val, stmt.endness)
|
|
140
|
+
# log it
|
|
141
|
+
if addr_type == "stack":
|
|
142
|
+
for i in range(0, val.size() // self.arch.byte_width):
|
|
143
|
+
byte_off = i
|
|
144
|
+
if self.arch.memory_endness == Endness.LE:
|
|
145
|
+
byte_off = val.size() // self.arch.byte_width - i - 1
|
|
146
|
+
self.stack_accesses[addr + i].append(("store", self._codeloc(), val.get_byte(byte_off)))
|
|
147
|
+
|
|
148
|
+
def _handle_Jump(self, stmt):
|
|
149
|
+
if isinstance(stmt.target, Const):
|
|
150
|
+
self.pc = stmt.target.value
|
|
151
|
+
else:
|
|
152
|
+
self.pc = None
|
|
153
|
+
|
|
154
|
+
def _handle_ConditionalJump(self, stmt):
|
|
155
|
+
self.pc = None
|
|
156
|
+
if isinstance(stmt.true_target, Const) and isinstance(stmt.false_target, Const):
|
|
157
|
+
cond = self._expr(stmt.condition)
|
|
158
|
+
if cond is not None:
|
|
159
|
+
if isinstance(cond, claripy.Bits) and cond.concrete_value == 1:
|
|
160
|
+
self.pc = stmt.true_target.value
|
|
161
|
+
elif isinstance(cond, claripy.Bits) and cond.concrete_value == 0:
|
|
162
|
+
self.pc = stmt.false_target.value
|
|
163
|
+
|
|
164
|
+
def _handle_Const(self, expr):
|
|
165
|
+
return claripy.BVV(expr.value, expr.bits)
|
|
166
|
+
|
|
167
|
+
def _handle_Load(self, expr: Load):
|
|
168
|
+
addr_and_type = self._process_address(expr.addr)
|
|
169
|
+
if addr_and_type is not None:
|
|
170
|
+
addr, addr_type = addr_and_type
|
|
171
|
+
v = self.state.mem_load(addr, expr.size, expr.endness)
|
|
172
|
+
# log it
|
|
173
|
+
if addr_type == "stack" and isinstance(v, claripy.ast.BV):
|
|
174
|
+
for i in range(0, expr.size):
|
|
175
|
+
byte_off = i
|
|
176
|
+
if self.arch.memory_endness == Endness.LE:
|
|
177
|
+
byte_off = expr.size - i - 1
|
|
178
|
+
self.stack_accesses[addr + i].append(("load", self._codeloc(), v.get_byte(byte_off)))
|
|
179
|
+
return v
|
|
180
|
+
return None
|
|
181
|
+
|
|
182
|
+
def _handle_Register(self, expr: Register):
|
|
183
|
+
return self.state.reg_load(expr)
|
|
184
|
+
|
|
185
|
+
def _handle_Convert(self, expr: Convert):
|
|
186
|
+
v = self._expr(expr.operand)
|
|
187
|
+
if isinstance(v, claripy.Bits):
|
|
188
|
+
if expr.to_bits > expr.from_bits:
|
|
189
|
+
if not expr.is_signed:
|
|
190
|
+
return claripy.ZeroExt(expr.to_bits - expr.from_bits, v)
|
|
191
|
+
return claripy.SignExt(expr.to_bits - expr.from_bits, v)
|
|
192
|
+
elif expr.to_bits < expr.from_bits:
|
|
193
|
+
return claripy.Extract(expr.to_bits - 1, 0, v)
|
|
194
|
+
else:
|
|
195
|
+
return v
|
|
196
|
+
return None
|
|
197
|
+
|
|
198
|
+
def _handle_CmpEQ(self, expr):
|
|
199
|
+
op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
|
|
200
|
+
if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
|
|
201
|
+
return claripy.BVV(1, 1) if op0.concrete_value == op1.concrete_value else claripy.BVV(0, 1)
|
|
202
|
+
return None
|
|
203
|
+
|
|
204
|
+
def _handle_CmpNE(self, expr):
|
|
205
|
+
op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
|
|
206
|
+
if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
|
|
207
|
+
return claripy.BVV(1, 1) if op0.concrete_value != op1.concrete_value else claripy.BVV(0, 1)
|
|
208
|
+
return None
|
|
209
|
+
|
|
210
|
+
def _handle_CmpLT(self, expr):
|
|
211
|
+
op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
|
|
212
|
+
if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
|
|
213
|
+
return claripy.BVV(1, 1) if op0.concrete_value < op1.concrete_value else claripy.BVV(0, 1)
|
|
214
|
+
return None
|
|
215
|
+
|
|
216
|
+
def _handle_CmpLE(self, expr):
|
|
217
|
+
op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
|
|
218
|
+
if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
|
|
219
|
+
return claripy.BVV(1, 1) if op0.concrete_value <= op1.concrete_value else claripy.BVV(0, 1)
|
|
220
|
+
return None
|
|
221
|
+
|
|
222
|
+
def _handle_CmpGT(self, expr):
|
|
223
|
+
op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
|
|
224
|
+
if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
|
|
225
|
+
return claripy.BVV(1, 1) if op0.concrete_value > op1.concrete_value else claripy.BVV(0, 1)
|
|
226
|
+
return None
|
|
227
|
+
|
|
228
|
+
def _handle_CmpGE(self, expr):
|
|
229
|
+
op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
|
|
230
|
+
if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
|
|
231
|
+
return claripy.BVV(1, 1) if op0.concrete_value >= op1.concrete_value else claripy.BVV(0, 1)
|
|
232
|
+
return None
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
class InlineStringTransformationDescriptor:
|
|
236
|
+
"""
|
|
237
|
+
Describes an instance of inline string transformation.
|
|
238
|
+
"""
|
|
239
|
+
|
|
240
|
+
def __init__(self, store_block, loop_body, stack_accesses, beginning_stack_offset):
|
|
241
|
+
self.store_block = store_block
|
|
242
|
+
self.loop_body = loop_body
|
|
243
|
+
self.stack_accesses = stack_accesses
|
|
244
|
+
self.beginning_stack_offset = beginning_stack_offset
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
class InlinedStringTransformationSimplifier(OptimizationPass):
|
|
248
|
+
"""
|
|
249
|
+
Simplifies inlined string transformation routines.
|
|
250
|
+
"""
|
|
251
|
+
|
|
252
|
+
ARCHES = None
|
|
253
|
+
PLATFORMS = None
|
|
254
|
+
STAGE = OptimizationPassStage.AFTER_GLOBAL_SIMPLIFICATION
|
|
255
|
+
NAME = "Simplify string transformations"
|
|
256
|
+
DESCRIPTION = "Simplify string transformations that are commonly used in obfuscated functions."
|
|
257
|
+
|
|
258
|
+
def __init__(self, func, **kwargs):
|
|
259
|
+
super().__init__(func, **kwargs)
|
|
260
|
+
self.analyze()
|
|
261
|
+
|
|
262
|
+
def _check(self):
|
|
263
|
+
string_transformation_descs = self._find_string_transformation_loops()
|
|
264
|
+
|
|
265
|
+
return bool(string_transformation_descs), {"descs": string_transformation_descs}
|
|
266
|
+
|
|
267
|
+
def _analyze(self, cache=None):
|
|
268
|
+
if not cache or "descs" not in cache:
|
|
269
|
+
return
|
|
270
|
+
|
|
271
|
+
for desc in cache["descs"]:
|
|
272
|
+
desc: InlineStringTransformationDescriptor
|
|
273
|
+
|
|
274
|
+
# remove the original statements
|
|
275
|
+
skip_stmt_indices = set()
|
|
276
|
+
for stack_accesses in desc.stack_accesses:
|
|
277
|
+
# the first element is the initial storing statement
|
|
278
|
+
codeloc = stack_accesses[0][1]
|
|
279
|
+
assert codeloc.block_addr == desc.store_block.addr
|
|
280
|
+
skip_stmt_indices.add(codeloc.stmt_idx)
|
|
281
|
+
new_statements = [
|
|
282
|
+
stmt for idx, stmt in enumerate(desc.store_block.statements) if idx not in skip_stmt_indices
|
|
283
|
+
]
|
|
284
|
+
|
|
285
|
+
# add new statements
|
|
286
|
+
store_statements = []
|
|
287
|
+
for off, stack_accesses in enumerate(desc.stack_accesses):
|
|
288
|
+
# the last element is the final storing statement
|
|
289
|
+
stack_addr = StackBaseOffset(None, self.project.arch.bits, desc.beginning_stack_offset + off)
|
|
290
|
+
new_value_ast = stack_accesses[-1][2]
|
|
291
|
+
new_value = Const(None, None, new_value_ast.concrete_value, self.project.arch.byte_width)
|
|
292
|
+
stmt = Store(
|
|
293
|
+
None,
|
|
294
|
+
stack_addr,
|
|
295
|
+
new_value,
|
|
296
|
+
1,
|
|
297
|
+
"Iend_LE",
|
|
298
|
+
ins_addr=desc.store_block.addr + desc.store_block.original_size - 1,
|
|
299
|
+
)
|
|
300
|
+
store_statements.append(stmt)
|
|
301
|
+
if new_statements and isinstance(new_statements[-1], (ConditionalJump, Jump)):
|
|
302
|
+
new_statements = new_statements[:-1] + store_statements + new_statements[-1:]
|
|
303
|
+
else:
|
|
304
|
+
new_statements += store_statements
|
|
305
|
+
|
|
306
|
+
new_store_block = desc.store_block.copy(statements=new_statements)
|
|
307
|
+
self._update_block(desc.store_block, new_store_block)
|
|
308
|
+
|
|
309
|
+
# remote the loop node
|
|
310
|
+
# since the loop node has exactly one external predecessor and one external successor, we can get rid of it
|
|
311
|
+
pred = next(iter(nn for nn in self.out_graph.predecessors(desc.loop_body) if nn is not desc.loop_body))
|
|
312
|
+
succ = next(iter(nn for nn in self.out_graph.successors(desc.loop_body) if nn is not desc.loop_body))
|
|
313
|
+
|
|
314
|
+
self.out_graph.remove_node(desc.loop_body)
|
|
315
|
+
self.out_graph.add_edge(pred, succ)
|
|
316
|
+
|
|
317
|
+
if pred.statements and isinstance(pred.statements[-1], ConditionalJump):
|
|
318
|
+
pred.statements[-1] = Jump(
|
|
319
|
+
None,
|
|
320
|
+
Const(None, None, succ.addr, self.project.arch.bits),
|
|
321
|
+
succ.idx,
|
|
322
|
+
**pred.statements[-1].tags,
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
def _find_string_transformation_loops(self):
|
|
326
|
+
# find self loops
|
|
327
|
+
self_loops = []
|
|
328
|
+
for node in self._graph.nodes:
|
|
329
|
+
preds = list(self._graph.predecessors(node))
|
|
330
|
+
succs = list(self._graph.successors(node))
|
|
331
|
+
if len(preds) == 2 and len(succs) == 2 and node in preds and node in succs:
|
|
332
|
+
pred = next(iter(nn for nn in preds if nn is not node))
|
|
333
|
+
succ = next(iter(nn for nn in succs if nn is not node))
|
|
334
|
+
if (
|
|
335
|
+
self._graph.out_degree[pred] == 1
|
|
336
|
+
and self._graph.in_degree[succ] == 1
|
|
337
|
+
or self._graph.out_degree[pred] == 2
|
|
338
|
+
and self._graph.in_degree[succ] == 2
|
|
339
|
+
and self._graph.has_edge(pred, succ)
|
|
340
|
+
):
|
|
341
|
+
# found it
|
|
342
|
+
self_loops.append(node)
|
|
343
|
+
|
|
344
|
+
if not self_loops:
|
|
345
|
+
return []
|
|
346
|
+
|
|
347
|
+
descs = []
|
|
348
|
+
for loop_node in self_loops:
|
|
349
|
+
pred = next(iter(nn for nn in self._graph.predecessors(loop_node) if nn is not loop_node))
|
|
350
|
+
succ = next(iter(nn for nn in self._graph.successors(loop_node) if nn is not loop_node))
|
|
351
|
+
engine = InlinedStringTransformationAILEngine(
|
|
352
|
+
self.project, {pred.addr: pred, loop_node.addr: loop_node}, pred.addr, succ.addr, 1024
|
|
353
|
+
)
|
|
354
|
+
if engine.finished:
|
|
355
|
+
# find the longest slide where the stack accesses are like the following:
|
|
356
|
+
# "store", code_location_a, value_a
|
|
357
|
+
# "load", code_location_b, value_a
|
|
358
|
+
# "store", code_location_b, value_b
|
|
359
|
+
# where value_a and value_b may be the same
|
|
360
|
+
candidate_stack_addrs = []
|
|
361
|
+
for stack_addr in sorted(engine.stack_accesses.keys()):
|
|
362
|
+
stack_accesses = engine.stack_accesses[stack_addr]
|
|
363
|
+
if len(stack_accesses) == 3:
|
|
364
|
+
item0, item1, item2 = stack_accesses
|
|
365
|
+
if item0[0] == "store" and item1[0] == "load" and item2[0] == "store":
|
|
366
|
+
if item0[1] != item1[1] and item1[1] == item2[1]:
|
|
367
|
+
if item0[2] is item1[2]:
|
|
368
|
+
# found one!
|
|
369
|
+
candidate_stack_addrs.append(stack_addr)
|
|
370
|
+
|
|
371
|
+
if (
|
|
372
|
+
len(candidate_stack_addrs) >= 2
|
|
373
|
+
and candidate_stack_addrs[-1] == candidate_stack_addrs[0] + len(candidate_stack_addrs) - 1
|
|
374
|
+
):
|
|
375
|
+
filtered_stack_accesses = [engine.stack_accesses[a] for a in candidate_stack_addrs]
|
|
376
|
+
stack_offset = candidate_stack_addrs[0] - engine.STACK_BASE
|
|
377
|
+
info = InlineStringTransformationDescriptor(pred, loop_node, filtered_stack_accesses, stack_offset)
|
|
378
|
+
descs.append(info)
|
|
379
|
+
|
|
380
|
+
return descs
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
import logging
|
|
3
3
|
|
|
4
4
|
from ailment.statement import ConditionalJump, Assignment, Jump
|
|
5
|
-
from ailment.expression import ITE
|
|
5
|
+
from ailment.expression import ITE, Const
|
|
6
6
|
|
|
7
7
|
from ....utils.graph import subgraph_between_nodes
|
|
8
8
|
from ..utils import remove_labels, to_ail_supergraph
|
|
@@ -146,10 +146,11 @@ class ITERegionConverter(OptimizationPass):
|
|
|
146
146
|
#
|
|
147
147
|
|
|
148
148
|
new_region_head = region_head.copy()
|
|
149
|
+
conditional_jump: ConditionalJump = region_head.statements[-1]
|
|
149
150
|
addr_obj = true_stmt.src if "ins_addr" in true_stmt.src.tags else true_stmt
|
|
150
151
|
ternary_expr = ITE(
|
|
151
152
|
None,
|
|
152
|
-
|
|
153
|
+
conditional_jump.condition,
|
|
153
154
|
true_stmt.src,
|
|
154
155
|
false_stmt.src,
|
|
155
156
|
ins_addr=addr_obj.ins_addr,
|
|
@@ -160,6 +161,13 @@ class ITERegionConverter(OptimizationPass):
|
|
|
160
161
|
new_assignment.src = ternary_expr
|
|
161
162
|
new_region_head.statements[-1] = new_assignment
|
|
162
163
|
|
|
164
|
+
# add a goto statement to the region tail so it can be transformed into a break or other types of control-flow
|
|
165
|
+
# transitioning statement in the future
|
|
166
|
+
goto_stmt = Jump(
|
|
167
|
+
None, Const(None, None, region_tail.addr, self.project.arch.bits), region_tail.idx, **conditional_jump.tags
|
|
168
|
+
)
|
|
169
|
+
new_region_head.statements.append(goto_stmt)
|
|
170
|
+
|
|
163
171
|
#
|
|
164
172
|
# destroy all the old region blocks
|
|
165
173
|
#
|
|
@@ -76,7 +76,10 @@ class X86GccGetPcSimplifier(OptimizationPass):
|
|
|
76
76
|
and isinstance(block.statements[-1].target, ailment.Expr.Const)
|
|
77
77
|
):
|
|
78
78
|
call_func_addr = block.statements[-1].target.value
|
|
79
|
-
|
|
79
|
+
try:
|
|
80
|
+
call_func = self.kb.functions.get_by_addr(call_func_addr)
|
|
81
|
+
except KeyError:
|
|
82
|
+
continue
|
|
80
83
|
if "get_pc" in call_func.info:
|
|
81
84
|
results.append(
|
|
82
85
|
(key, len(block.statements) - 1, call_func.info["get_pc"], block.addr + block.original_size),
|
|
@@ -42,6 +42,7 @@ from .invert_negated_logical_conjuction_disjunction import InvertNegatedLogicalC
|
|
|
42
42
|
from .rol_ror import RolRorRewriter
|
|
43
43
|
from .inlined_strcpy import InlinedStrcpy
|
|
44
44
|
from .inlined_strcpy_consolidation import InlinedStrcpyConsolidation
|
|
45
|
+
from .inlined_wstrcpy import InlinedWstrcpy
|
|
45
46
|
|
|
46
47
|
from .base import PeepholeOptimizationExprBase, PeepholeOptimizationStmtBase, PeepholeOptimizationMultiStmtBase
|
|
47
48
|
|