angr 9.2.97__py3-none-manylinux2014_x86_64.whl → 9.2.98__py3-none-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/cfg/cfg_base.py +14 -1
- angr/analyses/cfg/indirect_jump_resolvers/propagator_utils.py +10 -6
- angr/analyses/decompiler/optimization_passes/__init__.py +2 -0
- angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +380 -0
- angr/analyses/decompiler/optimization_passes/x86_gcc_getpc_simplifier.py +4 -1
- angr/analyses/decompiler/peephole_optimizations/__init__.py +1 -0
- angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +71 -3
- angr/analyses/decompiler/peephole_optimizations/inlined_wstrcpy.py +162 -0
- angr/analyses/decompiler/structured_codegen/__init__.py +1 -1
- angr/analyses/decompiler/structured_codegen/c.py +72 -99
- angr/analyses/decompiler/utils.py +5 -1
- angr/analyses/propagator/engine_vex.py +15 -0
- angr/analyses/reaching_definitions/engine_vex.py +6 -0
- angr/analyses/variable_recovery/engine_vex.py +6 -0
- angr/analyses/variable_recovery/irsb_scanner.py +12 -0
- angr/engines/light/engine.py +126 -15
- angr/knowledge_plugins/functions/function.py +4 -0
- angr/storage/memory_mixins/paged_memory/pages/list_page.py +20 -5
- angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +2 -1
- angr/storage/memory_mixins/simple_interface_mixin.py +4 -0
- {angr-9.2.97.dist-info → angr-9.2.98.dist-info}/METADATA +6 -6
- {angr-9.2.97.dist-info → angr-9.2.98.dist-info}/RECORD +27 -25
- {angr-9.2.97.dist-info → angr-9.2.98.dist-info}/LICENSE +0 -0
- {angr-9.2.97.dist-info → angr-9.2.98.dist-info}/WHEEL +0 -0
- {angr-9.2.97.dist-info → angr-9.2.98.dist-info}/entry_points.txt +0 -0
- {angr-9.2.97.dist-info → angr-9.2.98.dist-info}/top_level.txt +0 -0
angr/__init__.py
CHANGED
angr/analyses/cfg/cfg_base.py
CHANGED
|
@@ -8,7 +8,7 @@ from sortedcontainers import SortedDict
|
|
|
8
8
|
|
|
9
9
|
import pyvex
|
|
10
10
|
from claripy.utils.orderedset import OrderedSet
|
|
11
|
-
from cle import ELF, PE, Blob, TLSObject, MachO, ExternObject, KernelObject, FunctionHintSource, Hex, Coff, SRec
|
|
11
|
+
from cle import ELF, PE, Blob, TLSObject, MachO, ExternObject, KernelObject, FunctionHintSource, Hex, Coff, SRec, XBE
|
|
12
12
|
from cle.backends import NamedRegion
|
|
13
13
|
import archinfo
|
|
14
14
|
from archinfo.arch_soot import SootAddressDescriptor
|
|
@@ -778,6 +778,17 @@ class CFGBase(Analysis):
|
|
|
778
778
|
tpl = (section.min_addr, section.max_addr + 1)
|
|
779
779
|
memory_regions.append(tpl)
|
|
780
780
|
|
|
781
|
+
elif isinstance(b, XBE):
|
|
782
|
+
# some XBE files will mark the data sections as executable
|
|
783
|
+
for section in b.sections:
|
|
784
|
+
if (
|
|
785
|
+
section.is_executable
|
|
786
|
+
and not section.is_writable
|
|
787
|
+
and section.name not in {".data", ".rdata", ".rodata"}
|
|
788
|
+
):
|
|
789
|
+
tpl = (section.min_addr, section.max_addr + 1)
|
|
790
|
+
memory_regions.append(tpl)
|
|
791
|
+
|
|
781
792
|
elif isinstance(b, MachO):
|
|
782
793
|
if b.segments:
|
|
783
794
|
# Get all executable segments
|
|
@@ -797,9 +808,11 @@ class CFGBase(Analysis):
|
|
|
797
808
|
# a blob is entirely executable
|
|
798
809
|
tpl = (b.min_addr, b.max_addr + 1)
|
|
799
810
|
memory_regions.append(tpl)
|
|
811
|
+
|
|
800
812
|
elif isinstance(b, NamedRegion):
|
|
801
813
|
# NamedRegions have no content! Ignore
|
|
802
814
|
pass
|
|
815
|
+
|
|
803
816
|
elif isinstance(b, self._cle_pseudo_objects):
|
|
804
817
|
pass
|
|
805
818
|
|
|
@@ -13,10 +13,14 @@ class PropagatorLoadCallback:
|
|
|
13
13
|
# only allow loading if the address falls into a read-only region
|
|
14
14
|
if isinstance(addr, claripy.ast.BV) and addr.op == "BVV":
|
|
15
15
|
addr_v = addr.args[0]
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
16
|
+
elif isinstance(addr, int):
|
|
17
|
+
addr_v = addr
|
|
18
|
+
else:
|
|
19
|
+
return False
|
|
20
|
+
section = self.project.loader.find_section_containing(addr_v)
|
|
21
|
+
if section is not None:
|
|
22
|
+
return section.is_readable and not section.is_writable
|
|
23
|
+
segment = self.project.loader.find_segment_containing(addr_v)
|
|
24
|
+
if segment is not None:
|
|
25
|
+
return segment.is_readable and not segment.is_writable
|
|
22
26
|
return False
|
|
@@ -25,6 +25,7 @@ from .win_stack_canary_simplifier import WinStackCanarySimplifier
|
|
|
25
25
|
from .cross_jump_reverter import CrossJumpReverter
|
|
26
26
|
from .code_motion import CodeMotionOptimization
|
|
27
27
|
from .switch_default_case_duplicator import SwitchDefaultCaseDuplicator
|
|
28
|
+
from .inlined_string_transformation_simplifier import InlinedStringTransformationSimplifier
|
|
28
29
|
|
|
29
30
|
# order matters!
|
|
30
31
|
_all_optimization_passes = [
|
|
@@ -49,6 +50,7 @@ _all_optimization_passes = [
|
|
|
49
50
|
(CodeMotionOptimization, True),
|
|
50
51
|
(CrossJumpReverter, True),
|
|
51
52
|
(FlipBooleanCmp, True),
|
|
53
|
+
(InlinedStringTransformationSimplifier, True),
|
|
52
54
|
]
|
|
53
55
|
|
|
54
56
|
# these passes may duplicate code to remove gotos or improve the structure of the graph
|
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
# pylint:disable=arguments-renamed,too-many-boolean-expressions,no-self-use
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from typing import Any, DefaultDict
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
|
|
6
|
+
from archinfo import Endness
|
|
7
|
+
from ailment.expression import Const, Register, Load, StackBaseOffset, Convert, BinaryOp
|
|
8
|
+
from ailment.statement import Store, ConditionalJump, Jump
|
|
9
|
+
import claripy
|
|
10
|
+
|
|
11
|
+
from angr.engines.light import SimEngineLightAILMixin
|
|
12
|
+
from angr.storage.memory_mixins import (
|
|
13
|
+
SimpleInterfaceMixin,
|
|
14
|
+
DefaultFillerMixin,
|
|
15
|
+
PagedMemoryMixin,
|
|
16
|
+
UltraPagesMixin,
|
|
17
|
+
)
|
|
18
|
+
from angr.code_location import CodeLocation
|
|
19
|
+
from angr.errors import SimMemoryMissingError
|
|
20
|
+
from .optimization_pass import OptimizationPass, OptimizationPassStage
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class FasterMemory(
|
|
24
|
+
SimpleInterfaceMixin,
|
|
25
|
+
DefaultFillerMixin,
|
|
26
|
+
UltraPagesMixin,
|
|
27
|
+
PagedMemoryMixin,
|
|
28
|
+
):
|
|
29
|
+
"""
|
|
30
|
+
A fast memory model used in InlinedStringTransformationState.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class InlinedStringTransformationState:
|
|
35
|
+
"""
|
|
36
|
+
The abstract state used in InlinedStringTransformationAILEngine.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(self, project):
|
|
40
|
+
self.arch = project.arch
|
|
41
|
+
self.project = project
|
|
42
|
+
|
|
43
|
+
self.registers = FasterMemory(memory_id="reg")
|
|
44
|
+
self.memory = FasterMemory(memory_id="mem")
|
|
45
|
+
|
|
46
|
+
self.registers.set_state(self)
|
|
47
|
+
self.memory.set_state(self)
|
|
48
|
+
|
|
49
|
+
def _get_weakref(self):
|
|
50
|
+
return self
|
|
51
|
+
|
|
52
|
+
def reg_store(self, reg: Register, value: claripy.Bits) -> None:
|
|
53
|
+
self.registers.store(
|
|
54
|
+
reg.reg_offset, value, size=value.size() // self.arch.byte_width, endness=str(self.arch.register_endness)
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
def reg_load(self, reg: Register) -> claripy.Bits | None:
|
|
58
|
+
try:
|
|
59
|
+
return self.registers.load(
|
|
60
|
+
reg.reg_offset, size=reg.size, endness=self.arch.register_endness, fill_missing=False
|
|
61
|
+
)
|
|
62
|
+
except SimMemoryMissingError:
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
def mem_store(self, addr: int, value: claripy.Bits, endness: str) -> None:
|
|
66
|
+
self.memory.store(addr, value, size=value.size() // self.arch.byte_width, endness=endness)
|
|
67
|
+
|
|
68
|
+
def mem_load(self, addr: int, size: int, endness) -> claripy.Bits | None:
|
|
69
|
+
try:
|
|
70
|
+
return self.memory.load(addr, size=size, endness=str(endness), fill_missing=False)
|
|
71
|
+
except SimMemoryMissingError:
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class InlinedStringTransformationAILEngine(SimEngineLightAILMixin):
|
|
76
|
+
"""
|
|
77
|
+
A simple AIL execution engine
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def __init__(self, project, nodes: dict[int, Any], start: int, end: int, step_limit: int):
|
|
81
|
+
super().__init__()
|
|
82
|
+
|
|
83
|
+
self.arch = project.arch
|
|
84
|
+
self.nodes: dict[int, Any] = nodes
|
|
85
|
+
self.start: int = start
|
|
86
|
+
self.end: int = end
|
|
87
|
+
self.step_limit: int = step_limit
|
|
88
|
+
|
|
89
|
+
self.STACK_BASE = 0x7FFF_FFF0 if self.arch.bits == 32 else 0x7FFF_FFFF_F000
|
|
90
|
+
self.MASK = 0xFFFF_FFFF if self.arch.bits == 32 else 0xFFFF_FFFF_FFFF_FFFF
|
|
91
|
+
|
|
92
|
+
state = InlinedStringTransformationState(project)
|
|
93
|
+
self.stack_accesses: DefaultDict[int, list[tuple[str, CodeLocation, claripy.Bits]]] = defaultdict(list)
|
|
94
|
+
self.finished: bool = False
|
|
95
|
+
|
|
96
|
+
i = 0
|
|
97
|
+
self.pc = self.start
|
|
98
|
+
while i < self.step_limit:
|
|
99
|
+
if self.pc not in self.nodes:
|
|
100
|
+
# jumped to a node that we do not know about
|
|
101
|
+
break
|
|
102
|
+
block = self.nodes[self.pc]
|
|
103
|
+
self._process(state, None, block=block)
|
|
104
|
+
if self.pc is None:
|
|
105
|
+
# not sure where to jump...
|
|
106
|
+
break
|
|
107
|
+
if self.pc == self.end:
|
|
108
|
+
# we reach the end of execution!
|
|
109
|
+
self.finished = True
|
|
110
|
+
break
|
|
111
|
+
i += 1
|
|
112
|
+
|
|
113
|
+
def _process_address(self, addr: Const | StackBaseOffset) -> tuple[int, str] | None:
|
|
114
|
+
if isinstance(addr, Const):
|
|
115
|
+
return addr.value, "mem"
|
|
116
|
+
if isinstance(addr, StackBaseOffset):
|
|
117
|
+
return (addr.offset + self.STACK_BASE) & self.MASK, "stack"
|
|
118
|
+
if isinstance(addr, BinaryOp) and isinstance(addr.operands[0], StackBaseOffset):
|
|
119
|
+
v0_and_type = self._process_address(addr.operands[0])
|
|
120
|
+
if v0_and_type is not None:
|
|
121
|
+
v0 = v0_and_type[0]
|
|
122
|
+
v1 = self._expr(addr.operands[1])
|
|
123
|
+
if isinstance(v1, claripy.Bits) and v1.concrete:
|
|
124
|
+
return (v0 + v1.concrete_value) & self.MASK, "stack"
|
|
125
|
+
return None
|
|
126
|
+
|
|
127
|
+
def _handle_Assignment(self, stmt):
|
|
128
|
+
if isinstance(stmt.dst, Register):
|
|
129
|
+
val = self._expr(stmt.src)
|
|
130
|
+
if isinstance(val, claripy.Bits):
|
|
131
|
+
self.state.reg_store(stmt.dst, val)
|
|
132
|
+
|
|
133
|
+
def _handle_Store(self, stmt):
|
|
134
|
+
addr_and_type = self._process_address(stmt.addr)
|
|
135
|
+
if addr_and_type is not None:
|
|
136
|
+
addr, addr_type = addr_and_type
|
|
137
|
+
val = self._expr(stmt.data)
|
|
138
|
+
if isinstance(val, claripy.ast.BV):
|
|
139
|
+
self.state.mem_store(addr, val, stmt.endness)
|
|
140
|
+
# log it
|
|
141
|
+
if addr_type == "stack":
|
|
142
|
+
for i in range(0, val.size() // self.arch.byte_width):
|
|
143
|
+
byte_off = i
|
|
144
|
+
if self.arch.memory_endness == Endness.LE:
|
|
145
|
+
byte_off = val.size() // self.arch.byte_width - i - 1
|
|
146
|
+
self.stack_accesses[addr + i].append(("store", self._codeloc(), val.get_byte(byte_off)))
|
|
147
|
+
|
|
148
|
+
def _handle_Jump(self, stmt):
|
|
149
|
+
if isinstance(stmt.target, Const):
|
|
150
|
+
self.pc = stmt.target.value
|
|
151
|
+
else:
|
|
152
|
+
self.pc = None
|
|
153
|
+
|
|
154
|
+
def _handle_ConditionalJump(self, stmt):
|
|
155
|
+
self.pc = None
|
|
156
|
+
if isinstance(stmt.true_target, Const) and isinstance(stmt.false_target, Const):
|
|
157
|
+
cond = self._expr(stmt.condition)
|
|
158
|
+
if cond is not None:
|
|
159
|
+
if isinstance(cond, claripy.Bits) and cond.concrete_value == 1:
|
|
160
|
+
self.pc = stmt.true_target.value
|
|
161
|
+
elif isinstance(cond, claripy.Bits) and cond.concrete_value == 0:
|
|
162
|
+
self.pc = stmt.false_target.value
|
|
163
|
+
|
|
164
|
+
def _handle_Const(self, expr):
|
|
165
|
+
return claripy.BVV(expr.value, expr.bits)
|
|
166
|
+
|
|
167
|
+
def _handle_Load(self, expr: Load):
|
|
168
|
+
addr_and_type = self._process_address(expr.addr)
|
|
169
|
+
if addr_and_type is not None:
|
|
170
|
+
addr, addr_type = addr_and_type
|
|
171
|
+
v = self.state.mem_load(addr, expr.size, expr.endness)
|
|
172
|
+
# log it
|
|
173
|
+
if addr_type == "stack" and isinstance(v, claripy.ast.BV):
|
|
174
|
+
for i in range(0, expr.size):
|
|
175
|
+
byte_off = i
|
|
176
|
+
if self.arch.memory_endness == Endness.LE:
|
|
177
|
+
byte_off = expr.size - i - 1
|
|
178
|
+
self.stack_accesses[addr + i].append(("load", self._codeloc(), v.get_byte(byte_off)))
|
|
179
|
+
return v
|
|
180
|
+
return None
|
|
181
|
+
|
|
182
|
+
def _handle_Register(self, expr: Register):
|
|
183
|
+
return self.state.reg_load(expr)
|
|
184
|
+
|
|
185
|
+
def _handle_Convert(self, expr: Convert):
|
|
186
|
+
v = self._expr(expr.operand)
|
|
187
|
+
if isinstance(v, claripy.Bits):
|
|
188
|
+
if expr.to_bits > expr.from_bits:
|
|
189
|
+
if not expr.is_signed:
|
|
190
|
+
return claripy.ZeroExt(expr.to_bits - expr.from_bits, v)
|
|
191
|
+
return claripy.SignExt(expr.to_bits - expr.from_bits, v)
|
|
192
|
+
elif expr.to_bits < expr.from_bits:
|
|
193
|
+
return claripy.Extract(expr.to_bits - 1, 0, v)
|
|
194
|
+
else:
|
|
195
|
+
return v
|
|
196
|
+
return None
|
|
197
|
+
|
|
198
|
+
def _handle_CmpEQ(self, expr):
|
|
199
|
+
op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
|
|
200
|
+
if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
|
|
201
|
+
return claripy.BVV(1, 1) if op0.concrete_value == op1.concrete_value else claripy.BVV(0, 1)
|
|
202
|
+
return None
|
|
203
|
+
|
|
204
|
+
def _handle_CmpNE(self, expr):
|
|
205
|
+
op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
|
|
206
|
+
if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
|
|
207
|
+
return claripy.BVV(1, 1) if op0.concrete_value != op1.concrete_value else claripy.BVV(0, 1)
|
|
208
|
+
return None
|
|
209
|
+
|
|
210
|
+
def _handle_CmpLT(self, expr):
|
|
211
|
+
op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
|
|
212
|
+
if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
|
|
213
|
+
return claripy.BVV(1, 1) if op0.concrete_value < op1.concrete_value else claripy.BVV(0, 1)
|
|
214
|
+
return None
|
|
215
|
+
|
|
216
|
+
def _handle_CmpLE(self, expr):
|
|
217
|
+
op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
|
|
218
|
+
if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
|
|
219
|
+
return claripy.BVV(1, 1) if op0.concrete_value <= op1.concrete_value else claripy.BVV(0, 1)
|
|
220
|
+
return None
|
|
221
|
+
|
|
222
|
+
def _handle_CmpGT(self, expr):
|
|
223
|
+
op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
|
|
224
|
+
if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
|
|
225
|
+
return claripy.BVV(1, 1) if op0.concrete_value > op1.concrete_value else claripy.BVV(0, 1)
|
|
226
|
+
return None
|
|
227
|
+
|
|
228
|
+
def _handle_CmpGE(self, expr):
|
|
229
|
+
op0, op1 = self._expr(expr.operands[0]), self._expr(expr.operands[1])
|
|
230
|
+
if isinstance(op0, claripy.Bits) and isinstance(op1, claripy.Bits) and op0.concrete and op1.concrete:
|
|
231
|
+
return claripy.BVV(1, 1) if op0.concrete_value >= op1.concrete_value else claripy.BVV(0, 1)
|
|
232
|
+
return None
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
class InlineStringTransformationDescriptor:
|
|
236
|
+
"""
|
|
237
|
+
Describes an instance of inline string transformation.
|
|
238
|
+
"""
|
|
239
|
+
|
|
240
|
+
def __init__(self, store_block, loop_body, stack_accesses, beginning_stack_offset):
|
|
241
|
+
self.store_block = store_block
|
|
242
|
+
self.loop_body = loop_body
|
|
243
|
+
self.stack_accesses = stack_accesses
|
|
244
|
+
self.beginning_stack_offset = beginning_stack_offset
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
class InlinedStringTransformationSimplifier(OptimizationPass):
|
|
248
|
+
"""
|
|
249
|
+
Simplifies inlined string transformation routines.
|
|
250
|
+
"""
|
|
251
|
+
|
|
252
|
+
ARCHES = None
|
|
253
|
+
PLATFORMS = None
|
|
254
|
+
STAGE = OptimizationPassStage.AFTER_GLOBAL_SIMPLIFICATION
|
|
255
|
+
NAME = "Simplify string transformations"
|
|
256
|
+
DESCRIPTION = "Simplify string transformations that are commonly used in obfuscated functions."
|
|
257
|
+
|
|
258
|
+
def __init__(self, func, **kwargs):
|
|
259
|
+
super().__init__(func, **kwargs)
|
|
260
|
+
self.analyze()
|
|
261
|
+
|
|
262
|
+
def _check(self):
|
|
263
|
+
string_transformation_descs = self._find_string_transformation_loops()
|
|
264
|
+
|
|
265
|
+
return bool(string_transformation_descs), {"descs": string_transformation_descs}
|
|
266
|
+
|
|
267
|
+
def _analyze(self, cache=None):
|
|
268
|
+
if not cache or "descs" not in cache:
|
|
269
|
+
return
|
|
270
|
+
|
|
271
|
+
for desc in cache["descs"]:
|
|
272
|
+
desc: InlineStringTransformationDescriptor
|
|
273
|
+
|
|
274
|
+
# remove the original statements
|
|
275
|
+
skip_stmt_indices = set()
|
|
276
|
+
for stack_accesses in desc.stack_accesses:
|
|
277
|
+
# the first element is the initial storing statement
|
|
278
|
+
codeloc = stack_accesses[0][1]
|
|
279
|
+
assert codeloc.block_addr == desc.store_block.addr
|
|
280
|
+
skip_stmt_indices.add(codeloc.stmt_idx)
|
|
281
|
+
new_statements = [
|
|
282
|
+
stmt for idx, stmt in enumerate(desc.store_block.statements) if idx not in skip_stmt_indices
|
|
283
|
+
]
|
|
284
|
+
|
|
285
|
+
# add new statements
|
|
286
|
+
store_statements = []
|
|
287
|
+
for off, stack_accesses in enumerate(desc.stack_accesses):
|
|
288
|
+
# the last element is the final storing statement
|
|
289
|
+
stack_addr = StackBaseOffset(None, self.project.arch.bits, desc.beginning_stack_offset + off)
|
|
290
|
+
new_value_ast = stack_accesses[-1][2]
|
|
291
|
+
new_value = Const(None, None, new_value_ast.concrete_value, self.project.arch.byte_width)
|
|
292
|
+
stmt = Store(
|
|
293
|
+
None,
|
|
294
|
+
stack_addr,
|
|
295
|
+
new_value,
|
|
296
|
+
1,
|
|
297
|
+
"Iend_LE",
|
|
298
|
+
ins_addr=desc.store_block.addr + desc.store_block.original_size - 1,
|
|
299
|
+
)
|
|
300
|
+
store_statements.append(stmt)
|
|
301
|
+
if new_statements and isinstance(new_statements[-1], (ConditionalJump, Jump)):
|
|
302
|
+
new_statements = new_statements[:-1] + store_statements + new_statements[-1:]
|
|
303
|
+
else:
|
|
304
|
+
new_statements += store_statements
|
|
305
|
+
|
|
306
|
+
new_store_block = desc.store_block.copy(statements=new_statements)
|
|
307
|
+
self._update_block(desc.store_block, new_store_block)
|
|
308
|
+
|
|
309
|
+
# remote the loop node
|
|
310
|
+
# since the loop node has exactly one external predecessor and one external successor, we can get rid of it
|
|
311
|
+
pred = next(iter(nn for nn in self.out_graph.predecessors(desc.loop_body) if nn is not desc.loop_body))
|
|
312
|
+
succ = next(iter(nn for nn in self.out_graph.successors(desc.loop_body) if nn is not desc.loop_body))
|
|
313
|
+
|
|
314
|
+
self.out_graph.remove_node(desc.loop_body)
|
|
315
|
+
self.out_graph.add_edge(pred, succ)
|
|
316
|
+
|
|
317
|
+
if pred.statements and isinstance(pred.statements[-1], ConditionalJump):
|
|
318
|
+
pred.statements[-1] = Jump(
|
|
319
|
+
None,
|
|
320
|
+
Const(None, None, succ.addr, self.project.arch.bits),
|
|
321
|
+
succ.idx,
|
|
322
|
+
**pred.statements[-1].tags,
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
def _find_string_transformation_loops(self):
|
|
326
|
+
# find self loops
|
|
327
|
+
self_loops = []
|
|
328
|
+
for node in self._graph.nodes:
|
|
329
|
+
preds = list(self._graph.predecessors(node))
|
|
330
|
+
succs = list(self._graph.successors(node))
|
|
331
|
+
if len(preds) == 2 and len(succs) == 2 and node in preds and node in succs:
|
|
332
|
+
pred = next(iter(nn for nn in preds if nn is not node))
|
|
333
|
+
succ = next(iter(nn for nn in succs if nn is not node))
|
|
334
|
+
if (
|
|
335
|
+
self._graph.out_degree[pred] == 1
|
|
336
|
+
and self._graph.in_degree[succ] == 1
|
|
337
|
+
or self._graph.out_degree[pred] == 2
|
|
338
|
+
and self._graph.in_degree[succ] == 2
|
|
339
|
+
and self._graph.has_edge(pred, succ)
|
|
340
|
+
):
|
|
341
|
+
# found it
|
|
342
|
+
self_loops.append(node)
|
|
343
|
+
|
|
344
|
+
if not self_loops:
|
|
345
|
+
return []
|
|
346
|
+
|
|
347
|
+
descs = []
|
|
348
|
+
for loop_node in self_loops:
|
|
349
|
+
pred = next(iter(nn for nn in self._graph.predecessors(loop_node) if nn is not loop_node))
|
|
350
|
+
succ = next(iter(nn for nn in self._graph.successors(loop_node) if nn is not loop_node))
|
|
351
|
+
engine = InlinedStringTransformationAILEngine(
|
|
352
|
+
self.project, {pred.addr: pred, loop_node.addr: loop_node}, pred.addr, succ.addr, 1024
|
|
353
|
+
)
|
|
354
|
+
if engine.finished:
|
|
355
|
+
# find the longest slide where the stack accesses are like the following:
|
|
356
|
+
# "store", code_location_a, value_a
|
|
357
|
+
# "load", code_location_b, value_a
|
|
358
|
+
# "store", code_location_b, value_b
|
|
359
|
+
# where value_a and value_b may be the same
|
|
360
|
+
candidate_stack_addrs = []
|
|
361
|
+
for stack_addr in sorted(engine.stack_accesses.keys()):
|
|
362
|
+
stack_accesses = engine.stack_accesses[stack_addr]
|
|
363
|
+
if len(stack_accesses) == 3:
|
|
364
|
+
item0, item1, item2 = stack_accesses
|
|
365
|
+
if item0[0] == "store" and item1[0] == "load" and item2[0] == "store":
|
|
366
|
+
if item0[1] != item1[1] and item1[1] == item2[1]:
|
|
367
|
+
if item0[2] is item1[2]:
|
|
368
|
+
# found one!
|
|
369
|
+
candidate_stack_addrs.append(stack_addr)
|
|
370
|
+
|
|
371
|
+
if (
|
|
372
|
+
len(candidate_stack_addrs) >= 2
|
|
373
|
+
and candidate_stack_addrs[-1] == candidate_stack_addrs[0] + len(candidate_stack_addrs) - 1
|
|
374
|
+
):
|
|
375
|
+
filtered_stack_accesses = [engine.stack_accesses[a] for a in candidate_stack_addrs]
|
|
376
|
+
stack_offset = candidate_stack_addrs[0] - engine.STACK_BASE
|
|
377
|
+
info = InlineStringTransformationDescriptor(pred, loop_node, filtered_stack_accesses, stack_offset)
|
|
378
|
+
descs.append(info)
|
|
379
|
+
|
|
380
|
+
return descs
|
|
@@ -76,7 +76,10 @@ class X86GccGetPcSimplifier(OptimizationPass):
|
|
|
76
76
|
and isinstance(block.statements[-1].target, ailment.Expr.Const)
|
|
77
77
|
):
|
|
78
78
|
call_func_addr = block.statements[-1].target.value
|
|
79
|
-
|
|
79
|
+
try:
|
|
80
|
+
call_func = self.kb.functions.get_by_addr(call_func_addr)
|
|
81
|
+
except KeyError:
|
|
82
|
+
continue
|
|
80
83
|
if "get_pc" in call_func.info:
|
|
81
84
|
results.append(
|
|
82
85
|
(key, len(block.statements) - 1, call_func.info["get_pc"], block.addr + block.original_size),
|
|
@@ -42,6 +42,7 @@ from .invert_negated_logical_conjuction_disjunction import InvertNegatedLogicalC
|
|
|
42
42
|
from .rol_ror import RolRorRewriter
|
|
43
43
|
from .inlined_strcpy import InlinedStrcpy
|
|
44
44
|
from .inlined_strcpy_consolidation import InlinedStrcpyConsolidation
|
|
45
|
+
from .inlined_wstrcpy import InlinedWstrcpy
|
|
45
46
|
|
|
46
47
|
from .base import PeepholeOptimizationExprBase, PeepholeOptimizationStmtBase, PeepholeOptimizationMultiStmtBase
|
|
47
48
|
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# pylint:disable=arguments-differ
|
|
2
|
-
from typing import Tuple, Optional
|
|
2
|
+
from typing import Tuple, Optional, Dict, List
|
|
3
3
|
import string
|
|
4
4
|
|
|
5
5
|
from archinfo import Endness
|
|
6
6
|
|
|
7
|
-
from ailment.expression import Const
|
|
7
|
+
from ailment.expression import Const, StackBaseOffset
|
|
8
8
|
from ailment.statement import Call, Store
|
|
9
9
|
|
|
10
10
|
from .base import PeepholeOptimizationStmtBase
|
|
@@ -24,7 +24,7 @@ class InlinedStrcpy(PeepholeOptimizationStmtBase):
|
|
|
24
24
|
NAME = "Simplifying inlined strcpy"
|
|
25
25
|
stmt_classes = (Store,)
|
|
26
26
|
|
|
27
|
-
def optimize(self, stmt: Store, **kwargs):
|
|
27
|
+
def optimize(self, stmt: Store, stmt_idx: int = None, block=None, **kwargs):
|
|
28
28
|
if isinstance(stmt.data, Const):
|
|
29
29
|
r, s = self.is_integer_likely_a_string(stmt.data.value, stmt.data.size, stmt.endness)
|
|
30
30
|
if r:
|
|
@@ -41,8 +41,76 @@ class InlinedStrcpy(PeepholeOptimizationStmtBase):
|
|
|
41
41
|
**stmt.tags,
|
|
42
42
|
)
|
|
43
43
|
|
|
44
|
+
# scan forward in the current block to find all consecutive constant stores
|
|
45
|
+
if block is not None and stmt_idx is not None:
|
|
46
|
+
all_constant_stores: Dict[int, Tuple[int, Optional[Const]]] = self.collect_constant_stores(
|
|
47
|
+
block, stmt_idx
|
|
48
|
+
)
|
|
49
|
+
if all_constant_stores:
|
|
50
|
+
offsets = sorted(all_constant_stores.keys())
|
|
51
|
+
next_offset = min(offsets)
|
|
52
|
+
stride = []
|
|
53
|
+
for offset in offsets:
|
|
54
|
+
if next_offset is not None and offset != next_offset:
|
|
55
|
+
next_offset = None
|
|
56
|
+
stride = []
|
|
57
|
+
stmt_idx_, v = all_constant_stores[offset]
|
|
58
|
+
if v is not None:
|
|
59
|
+
stride.append((offset, stmt_idx_, v))
|
|
60
|
+
next_offset = offset + v.size
|
|
61
|
+
else:
|
|
62
|
+
next_offset = None
|
|
63
|
+
stride = []
|
|
64
|
+
|
|
65
|
+
integer, size = self.stride_to_int(stride)
|
|
66
|
+
r, s = self.is_integer_likely_a_string(integer, size, Endness.BE)
|
|
67
|
+
if r:
|
|
68
|
+
# we remove all involved statements whose statement IDs are greater than the current one
|
|
69
|
+
for _, stmt_idx_, _ in reversed(stride):
|
|
70
|
+
if stmt_idx_ <= stmt_idx:
|
|
71
|
+
continue
|
|
72
|
+
block.statements[stmt_idx_] = None
|
|
73
|
+
block.statements = [ss for ss in block.statements if ss is not None]
|
|
74
|
+
|
|
75
|
+
str_id = self.kb.custom_strings.allocate(s.encode("ascii"))
|
|
76
|
+
return Call(
|
|
77
|
+
stmt.idx,
|
|
78
|
+
"strncpy",
|
|
79
|
+
args=[
|
|
80
|
+
stmt.addr,
|
|
81
|
+
Const(None, None, str_id, stmt.addr.bits, custom_string=True),
|
|
82
|
+
Const(None, None, len(s), self.project.arch.bits),
|
|
83
|
+
],
|
|
84
|
+
**stmt.tags,
|
|
85
|
+
)
|
|
86
|
+
|
|
44
87
|
return None
|
|
45
88
|
|
|
89
|
+
@staticmethod
|
|
90
|
+
def stride_to_int(stride: List[Tuple[int, int, Const]]) -> Tuple[int, int]:
|
|
91
|
+
stride = sorted(stride, key=lambda x: x[0])
|
|
92
|
+
n = 0
|
|
93
|
+
size = 0
|
|
94
|
+
for _, _, v in stride:
|
|
95
|
+
size += v.size
|
|
96
|
+
n <<= v.bits
|
|
97
|
+
n |= v.value
|
|
98
|
+
return n, size
|
|
99
|
+
|
|
100
|
+
@staticmethod
|
|
101
|
+
def collect_constant_stores(block, starting_stmt_idx: int) -> Dict[int, Tuple[int, Optional[Const]]]:
|
|
102
|
+
r = {}
|
|
103
|
+
for idx, stmt in enumerate(block.statements):
|
|
104
|
+
if idx < starting_stmt_idx:
|
|
105
|
+
continue
|
|
106
|
+
if isinstance(stmt, Store) and isinstance(stmt.addr, StackBaseOffset) and isinstance(stmt.addr.offset, int):
|
|
107
|
+
if isinstance(stmt.data, Const):
|
|
108
|
+
r[stmt.addr.offset] = idx, stmt.data
|
|
109
|
+
else:
|
|
110
|
+
r[stmt.addr.offset] = idx, None
|
|
111
|
+
|
|
112
|
+
return r
|
|
113
|
+
|
|
46
114
|
@staticmethod
|
|
47
115
|
def is_integer_likely_a_string(
|
|
48
116
|
v: int, size: int, endness: Endness, min_length: int = 4
|