angr 9.2.78__py3-none-win_amd64.whl → 9.2.80__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/__main__.py +59 -0
- angr/analyses/cfg/cfg_fast.py +140 -3
- angr/analyses/decompiler/ail_simplifier.py +8 -0
- angr/analyses/decompiler/block_simplifier.py +25 -5
- angr/analyses/decompiler/clinic.py +33 -19
- angr/analyses/decompiler/decompilation_options.py +9 -0
- angr/analyses/decompiler/optimization_passes/__init__.py +6 -0
- angr/analyses/decompiler/optimization_passes/engine_base.py +2 -2
- angr/analyses/decompiler/optimization_passes/ite_region_converter.py +2 -2
- angr/analyses/decompiler/optimization_passes/multi_simplifier.py +0 -12
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +8 -5
- angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +82 -12
- angr/analyses/decompiler/peephole_optimizations/__init__.py +11 -2
- angr/analyses/decompiler/peephole_optimizations/base.py +29 -2
- angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +1 -1
- angr/analyses/decompiler/peephole_optimizations/eager_eval.py +14 -2
- angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +83 -0
- angr/analyses/decompiler/peephole_optimizations/inlined_strcpy_consolidation.py +103 -0
- angr/analyses/decompiler/region_simplifiers/ifelse.py +19 -10
- angr/analyses/decompiler/region_simplifiers/region_simplifier.py +4 -2
- angr/analyses/decompiler/structured_codegen/c.py +20 -4
- angr/analyses/decompiler/utils.py +131 -2
- angr/analyses/propagator/engine_ail.py +3 -1
- angr/analyses/propagator/engine_vex.py +45 -0
- angr/analyses/propagator/propagator.py +24 -15
- angr/analyses/proximity_graph.py +30 -0
- angr/analyses/reaching_definitions/engine_ail.py +1 -1
- angr/analyses/stack_pointer_tracker.py +55 -0
- angr/callable.py +4 -4
- angr/engines/light/engine.py +30 -18
- angr/knowledge_plugins/__init__.py +1 -0
- angr/knowledge_plugins/custom_strings.py +40 -0
- angr/knowledge_plugins/functions/function.py +29 -0
- angr/knowledge_plugins/propagations/propagation_model.py +4 -0
- angr/knowledge_plugins/propagations/states.py +54 -4
- angr/lib/angr_native.dll +0 -0
- angr/procedures/definitions/__init__.py +2 -1
- angr/procedures/definitions/msvcr.py +0 -3
- angr/procedures/definitions/ntoskrnl.py +9 -0
- angr/procedures/win32_kernel/ExAllocatePool.py +12 -0
- angr/procedures/win32_kernel/ExFreePoolWithTag.py +7 -0
- angr/procedures/win32_kernel/__init__.py +3 -0
- angr/sim_type.py +3 -0
- angr/storage/memory_mixins/__init__.py +1 -1
- angr/utils/funcid.py +128 -0
- {angr-9.2.78.dist-info → angr-9.2.80.dist-info}/METADATA +6 -6
- {angr-9.2.78.dist-info → angr-9.2.80.dist-info}/RECORD +55 -45
- {angr-9.2.78.dist-info → angr-9.2.80.dist-info}/WHEEL +1 -1
- angr-9.2.80.dist-info/entry_points.txt +2 -0
- tests/analyses/cfg/test_cfgfast.py +24 -0
- tests/analyses/decompiler/test_decompiler.py +128 -0
- tests/analyses/test_constantpropagation.py +34 -0
- {angr-9.2.78.dist-info → angr-9.2.80.dist-info}/LICENSE +0 -0
- {angr-9.2.78.dist-info → angr-9.2.80.dist-info}/top_level.txt +0 -0
|
@@ -6,6 +6,7 @@ import logging
|
|
|
6
6
|
import ailment
|
|
7
7
|
import cle
|
|
8
8
|
|
|
9
|
+
from angr.utils.funcid import is_function_security_check_cookie
|
|
9
10
|
from .optimization_pass import OptimizationPass, OptimizationPassStage
|
|
10
11
|
|
|
11
12
|
|
|
@@ -48,24 +49,24 @@ class WinStackCanarySimplifier(OptimizationPass):
|
|
|
48
49
|
return False, None
|
|
49
50
|
|
|
50
51
|
# Check the first block and see if there is any statement reading data from _security_cookie
|
|
51
|
-
|
|
52
|
+
init_stmts = self._find_canary_init_stmt()
|
|
52
53
|
|
|
53
|
-
return
|
|
54
|
+
return init_stmts is not None, {"init_stmts": init_stmts}
|
|
54
55
|
|
|
55
56
|
def _analyze(self, cache=None):
|
|
56
|
-
|
|
57
|
+
init_stmts = None
|
|
57
58
|
if cache is not None:
|
|
58
|
-
|
|
59
|
+
init_stmts = cache.get("init_stmts", None)
|
|
59
60
|
|
|
60
|
-
if
|
|
61
|
-
|
|
61
|
+
if init_stmts is None:
|
|
62
|
+
init_stmts = self._find_canary_init_stmt()
|
|
62
63
|
|
|
63
|
-
if
|
|
64
|
+
if init_stmts is None:
|
|
64
65
|
return
|
|
65
66
|
|
|
66
67
|
# Look for the statement that loads back canary value from the stack
|
|
67
|
-
first_block,
|
|
68
|
-
canary_init_stmt = first_block.statements[
|
|
68
|
+
first_block, canary_init_stmt_ids = init_stmts
|
|
69
|
+
canary_init_stmt = first_block.statements[canary_init_stmt_ids[-1]]
|
|
69
70
|
# where is the stack canary stored?
|
|
70
71
|
if not isinstance(canary_init_stmt.addr, ailment.Expr.StackBaseOffset):
|
|
71
72
|
_l.debug(
|
|
@@ -142,7 +143,8 @@ class WinStackCanarySimplifier(OptimizationPass):
|
|
|
142
143
|
if found_endpoints:
|
|
143
144
|
# Remove the statement that loads the stack canary from fs
|
|
144
145
|
first_block_copy = first_block.copy()
|
|
145
|
-
|
|
146
|
+
for stmt_idx in sorted(canary_init_stmt_ids, reverse=True):
|
|
147
|
+
first_block_copy.statements.pop(stmt_idx)
|
|
146
148
|
self._update_block(first_block, first_block_copy)
|
|
147
149
|
|
|
148
150
|
def _find_canary_init_stmt(self):
|
|
@@ -150,7 +152,13 @@ class WinStackCanarySimplifier(OptimizationPass):
|
|
|
150
152
|
if first_block is None:
|
|
151
153
|
return None
|
|
152
154
|
|
|
155
|
+
load_stmt_idx = None
|
|
156
|
+
load_reg = None
|
|
157
|
+
xor_stmt_idx = None
|
|
158
|
+
xored_reg = None
|
|
159
|
+
|
|
153
160
|
for idx, stmt in enumerate(first_block.statements):
|
|
161
|
+
# if we are lucky and things get folded into one statement:
|
|
154
162
|
if (
|
|
155
163
|
isinstance(stmt, ailment.Stmt.Store)
|
|
156
164
|
and isinstance(stmt.addr, ailment.Expr.StackBaseOffset)
|
|
@@ -163,13 +171,51 @@ class WinStackCanarySimplifier(OptimizationPass):
|
|
|
163
171
|
# Check addr: must be __security_cookie
|
|
164
172
|
load_addr = stmt.data.operands[0].addr.value
|
|
165
173
|
if load_addr == self._security_cookie_addr:
|
|
166
|
-
return first_block, idx
|
|
174
|
+
return first_block, [idx]
|
|
175
|
+
# or if we are unlucky and the load and the xor are two different statements
|
|
176
|
+
if (
|
|
177
|
+
isinstance(stmt, ailment.Stmt.Assignment)
|
|
178
|
+
and isinstance(stmt.dst, ailment.Expr.Register)
|
|
179
|
+
and isinstance(stmt.src, ailment.Expr.Load)
|
|
180
|
+
and isinstance(stmt.src.addr, ailment.Expr.Const)
|
|
181
|
+
):
|
|
182
|
+
load_addr = stmt.src.addr.value
|
|
183
|
+
if load_addr == self._security_cookie_addr:
|
|
184
|
+
load_stmt_idx = idx
|
|
185
|
+
load_reg = stmt.dst.reg_offset
|
|
186
|
+
if load_stmt_idx is not None and idx == load_stmt_idx + 1:
|
|
187
|
+
if (
|
|
188
|
+
isinstance(stmt, ailment.Stmt.Assignment)
|
|
189
|
+
and isinstance(stmt.dst, ailment.Expr.Register)
|
|
190
|
+
and isinstance(stmt.src, ailment.Expr.BinaryOp)
|
|
191
|
+
and stmt.src.op == "Xor"
|
|
192
|
+
and isinstance(stmt.src.operands[0], ailment.Expr.Register)
|
|
193
|
+
and stmt.src.operands[0].reg_offset == load_reg
|
|
194
|
+
and isinstance(stmt.src.operands[1], ailment.Expr.StackBaseOffset)
|
|
195
|
+
):
|
|
196
|
+
xor_stmt_idx = idx
|
|
197
|
+
xored_reg = stmt.dst.reg_offset
|
|
198
|
+
else:
|
|
199
|
+
break
|
|
200
|
+
if xor_stmt_idx is not None and idx == xor_stmt_idx + 1:
|
|
201
|
+
if (
|
|
202
|
+
isinstance(stmt, ailment.Stmt.Store)
|
|
203
|
+
and isinstance(stmt.addr, ailment.Expr.StackBaseOffset)
|
|
204
|
+
and isinstance(stmt.data, ailment.Expr.Register)
|
|
205
|
+
and stmt.data.reg_offset == xored_reg
|
|
206
|
+
):
|
|
207
|
+
return first_block, [load_stmt_idx, xor_stmt_idx, idx]
|
|
208
|
+
else:
|
|
209
|
+
break
|
|
167
210
|
|
|
168
211
|
return None
|
|
169
212
|
|
|
170
213
|
@staticmethod
|
|
171
214
|
def _find_amd64_canary_storing_stmt(block, canary_value_stack_offset):
|
|
215
|
+
load_stmt_idx = None
|
|
216
|
+
|
|
172
217
|
for idx, stmt in enumerate(block.statements):
|
|
218
|
+
# when we are lucky, we have one instruction
|
|
173
219
|
if (
|
|
174
220
|
isinstance(stmt, ailment.Stmt.Assignment)
|
|
175
221
|
and isinstance(stmt.dst, ailment.Expr.Register)
|
|
@@ -185,7 +231,29 @@ class WinStackCanarySimplifier(OptimizationPass):
|
|
|
185
231
|
if isinstance(op1, ailment.Expr.StackBaseOffset):
|
|
186
232
|
# found it
|
|
187
233
|
return idx
|
|
188
|
-
|
|
234
|
+
# or when we are unlucky, we have two instructions...
|
|
235
|
+
if (
|
|
236
|
+
isinstance(stmt, ailment.Stmt.Assignment)
|
|
237
|
+
and isinstance(stmt.dst, ailment.Expr.Register)
|
|
238
|
+
and stmt.dst.reg_name == "rcx"
|
|
239
|
+
and isinstance(stmt.src, ailment.Expr.Load)
|
|
240
|
+
and isinstance(stmt.src.addr, ailment.Expr.StackBaseOffset)
|
|
241
|
+
and stmt.src.addr.offset == canary_value_stack_offset
|
|
242
|
+
):
|
|
243
|
+
load_stmt_idx = idx
|
|
244
|
+
if load_stmt_idx is not None and idx == load_stmt_idx + 1:
|
|
245
|
+
if (
|
|
246
|
+
isinstance(stmt, ailment.Stmt.Assignment)
|
|
247
|
+
and isinstance(stmt.dst, ailment.Expr.Register)
|
|
248
|
+
and isinstance(stmt.src, ailment.Expr.BinaryOp)
|
|
249
|
+
and stmt.src.op == "Xor"
|
|
250
|
+
):
|
|
251
|
+
if (
|
|
252
|
+
isinstance(stmt.src.operands[0], ailment.Expr.Register)
|
|
253
|
+
and stmt.src.operands[0].reg_name == "rcx"
|
|
254
|
+
and isinstance(stmt.src.operands[1], ailment.Expr.StackBaseOffset)
|
|
255
|
+
):
|
|
256
|
+
return idx
|
|
189
257
|
return None
|
|
190
258
|
|
|
191
259
|
@staticmethod
|
|
@@ -208,5 +276,7 @@ class WinStackCanarySimplifier(OptimizationPass):
|
|
|
208
276
|
func = self.kb.functions.function(addr=const_target)
|
|
209
277
|
if func.name == "_security_check_cookie":
|
|
210
278
|
return idx
|
|
279
|
+
elif is_function_security_check_cookie(func, self.project, self._security_cookie_addr):
|
|
280
|
+
return idx
|
|
211
281
|
|
|
212
282
|
return None
|
|
@@ -40,10 +40,12 @@ from .sar_to_signed_div import SarToSignedDiv
|
|
|
40
40
|
from .tidy_stack_addr import TidyStackAddr
|
|
41
41
|
from .invert_negated_logical_conjuction_disjunction import InvertNegatedLogicalConjunctionsAndDisjunctions
|
|
42
42
|
from .rol_ror import RolRorRewriter
|
|
43
|
+
from .inlined_strcpy import InlinedStrcpy
|
|
44
|
+
from .inlined_strcpy_consolidation import InlinedStrcpyConsolidation
|
|
43
45
|
|
|
44
|
-
from .base import PeepholeOptimizationExprBase, PeepholeOptimizationStmtBase
|
|
45
|
-
|
|
46
|
+
from .base import PeepholeOptimizationExprBase, PeepholeOptimizationStmtBase, PeepholeOptimizationMultiStmtBase
|
|
46
47
|
|
|
48
|
+
MULTI_STMT_OPTS: List[Type[PeepholeOptimizationMultiStmtBase]] = []
|
|
47
49
|
STMT_OPTS: List[Type[PeepholeOptimizationStmtBase]] = []
|
|
48
50
|
EXPR_OPTS: List[Type[PeepholeOptimizationExprBase]] = []
|
|
49
51
|
|
|
@@ -55,4 +57,11 @@ for v in _g.values():
|
|
|
55
57
|
if isinstance(v, type) and issubclass(v, PeepholeOptimizationStmtBase) and v is not PeepholeOptimizationStmtBase:
|
|
56
58
|
STMT_OPTS.append(v)
|
|
57
59
|
|
|
60
|
+
if (
|
|
61
|
+
isinstance(v, type)
|
|
62
|
+
and issubclass(v, PeepholeOptimizationMultiStmtBase)
|
|
63
|
+
and v is not PeepholeOptimizationMultiStmtBase
|
|
64
|
+
):
|
|
65
|
+
MULTI_STMT_OPTS.append(v)
|
|
66
|
+
|
|
58
67
|
_g = None
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
from typing import Optional
|
|
1
|
+
from typing import List, Optional
|
|
2
2
|
|
|
3
3
|
from ailment.expression import BinaryOp, UnaryOp, Expression
|
|
4
|
-
from ailment.statement import Assignment
|
|
4
|
+
from ailment.statement import Statement, Assignment
|
|
5
5
|
from ailment import Block
|
|
6
6
|
from angr.project import Project
|
|
7
7
|
from angr.knowledge_base import KnowledgeBase
|
|
@@ -34,6 +34,33 @@ class PeepholeOptimizationStmtBase:
|
|
|
34
34
|
raise NotImplementedError("_optimize() is not implemented.")
|
|
35
35
|
|
|
36
36
|
|
|
37
|
+
class PeepholeOptimizationMultiStmtBase:
|
|
38
|
+
"""
|
|
39
|
+
The base class for all peephole optimizations that are applied on multiple AIL statements at once.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
__slots__ = (
|
|
43
|
+
"project",
|
|
44
|
+
"kb",
|
|
45
|
+
"func_addr",
|
|
46
|
+
)
|
|
47
|
+
project: Optional[Project]
|
|
48
|
+
kb: Optional[KnowledgeBase]
|
|
49
|
+
func_addr: Optional[int]
|
|
50
|
+
|
|
51
|
+
NAME = "Peephole Optimization - Multi-statement"
|
|
52
|
+
DESCRIPTION = "Peephole Optimization - Multi-statement"
|
|
53
|
+
stmt_classes = None
|
|
54
|
+
|
|
55
|
+
def __init__(self, project: Optional[Project], kb: Optional[KnowledgeBase], func_addr: Optional[int] = None):
|
|
56
|
+
self.project = project
|
|
57
|
+
self.kb = kb
|
|
58
|
+
self.func_addr = func_addr
|
|
59
|
+
|
|
60
|
+
def optimize(self, stmts: List[Statement], stmt_idx: Optional[int] = None, block=None, **kwargs):
|
|
61
|
+
raise NotImplementedError("_optimize() is not implemented.")
|
|
62
|
+
|
|
63
|
+
|
|
37
64
|
class PeepholeOptimizationExprBase:
|
|
38
65
|
"""
|
|
39
66
|
The base class for all peephole optimizations that are applied on AIL expressions.
|
|
@@ -20,7 +20,7 @@ class ConstantDereferences(PeepholeOptimizationExprBase):
|
|
|
20
20
|
if sec is not None and sec.is_readable and (not sec.is_writable or "got" in sec.name):
|
|
21
21
|
# do we know the value that it's reading?
|
|
22
22
|
try:
|
|
23
|
-
val = self.project.loader.memory.unpack_word(expr.addr.value, size=
|
|
23
|
+
val = self.project.loader.memory.unpack_word(expr.addr.value, size=expr.size)
|
|
24
24
|
except KeyError:
|
|
25
25
|
return expr
|
|
26
26
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from math import gcd
|
|
2
2
|
|
|
3
|
-
from ailment.expression import BinaryOp, UnaryOp, Const
|
|
3
|
+
from ailment.expression import BinaryOp, UnaryOp, Const, Convert
|
|
4
4
|
|
|
5
5
|
from .base import PeepholeOptimizationExprBase
|
|
6
6
|
|
|
@@ -13,11 +13,13 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
|
|
|
13
13
|
__slots__ = ()
|
|
14
14
|
|
|
15
15
|
NAME = "Eager expression evaluation"
|
|
16
|
-
expr_classes = (BinaryOp, UnaryOp)
|
|
16
|
+
expr_classes = (BinaryOp, UnaryOp, Convert)
|
|
17
17
|
|
|
18
18
|
def optimize(self, expr, **kwargs):
|
|
19
19
|
if isinstance(expr, BinaryOp):
|
|
20
20
|
return self._optimize_binaryop(expr)
|
|
21
|
+
elif isinstance(expr, Convert):
|
|
22
|
+
return self._optimize_convert(expr)
|
|
21
23
|
elif isinstance(expr, UnaryOp):
|
|
22
24
|
return self._optimize_unaryop(expr)
|
|
23
25
|
return None
|
|
@@ -192,3 +194,13 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
|
|
|
192
194
|
return new_expr
|
|
193
195
|
|
|
194
196
|
return None
|
|
197
|
+
|
|
198
|
+
@staticmethod
|
|
199
|
+
def _optimize_convert(expr: Convert):
|
|
200
|
+
if isinstance(expr.operand, Const):
|
|
201
|
+
if expr.from_bits > expr.to_bits:
|
|
202
|
+
# truncation
|
|
203
|
+
mask = (1 << expr.to_bits) - 1
|
|
204
|
+
v = expr.operand.value & mask
|
|
205
|
+
return Const(expr.idx, expr.operand.variable, v, expr.to_bits, **expr.operand.tags)
|
|
206
|
+
return None
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# pylint:disable=arguments-differ
|
|
2
|
+
from typing import Tuple, Optional
|
|
3
|
+
import string
|
|
4
|
+
|
|
5
|
+
from archinfo import Endness
|
|
6
|
+
|
|
7
|
+
from ailment.expression import Const
|
|
8
|
+
from ailment.statement import Call, Store
|
|
9
|
+
|
|
10
|
+
from .base import PeepholeOptimizationStmtBase
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
ASCII_PRINTABLES = set(string.printable)
|
|
14
|
+
ASCII_DIGITS = set(string.digits)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class InlinedStrcpy(PeepholeOptimizationStmtBase):
|
|
18
|
+
"""
|
|
19
|
+
Simplifies inlined string copying logic into calls to strcpy.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
__slots__ = ()
|
|
23
|
+
|
|
24
|
+
NAME = "Simplifying inlined strcpy"
|
|
25
|
+
stmt_classes = (Store,)
|
|
26
|
+
|
|
27
|
+
def optimize(self, stmt: Store, **kwargs):
|
|
28
|
+
if isinstance(stmt.data, Const):
|
|
29
|
+
r, s = self.is_integer_likely_a_string(stmt.data.value, stmt.data.size, stmt.endness)
|
|
30
|
+
if r:
|
|
31
|
+
# replace it with a call to strncpy
|
|
32
|
+
str_id = self.kb.custom_strings.allocate(s.encode("ascii"))
|
|
33
|
+
return Call(
|
|
34
|
+
stmt.idx,
|
|
35
|
+
"strncpy",
|
|
36
|
+
args=[
|
|
37
|
+
stmt.addr,
|
|
38
|
+
Const(None, None, str_id, stmt.addr.bits, custom_string=True),
|
|
39
|
+
Const(None, None, len(s), self.project.arch.bits),
|
|
40
|
+
],
|
|
41
|
+
**stmt.tags,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
@staticmethod
|
|
47
|
+
def is_integer_likely_a_string(
|
|
48
|
+
v: int, size: int, endness: Endness, min_length: int = 4
|
|
49
|
+
) -> Tuple[bool, Optional[str]]:
|
|
50
|
+
# we need at least four bytes of printable characters
|
|
51
|
+
|
|
52
|
+
chars = []
|
|
53
|
+
if endness == Endness.LE:
|
|
54
|
+
while v != 0:
|
|
55
|
+
byt = v & 0xFF
|
|
56
|
+
if chr(byt) not in ASCII_PRINTABLES:
|
|
57
|
+
return False, None
|
|
58
|
+
chars.append(chr(byt))
|
|
59
|
+
v >>= 8
|
|
60
|
+
|
|
61
|
+
elif endness == Endness.BE:
|
|
62
|
+
first_non_zero = False
|
|
63
|
+
for _ in range(size):
|
|
64
|
+
byt = v & 0xFF
|
|
65
|
+
v >>= 8
|
|
66
|
+
if byt == 0:
|
|
67
|
+
if first_non_zero:
|
|
68
|
+
return False, None
|
|
69
|
+
continue
|
|
70
|
+
first_non_zero = True # this is the first non-zero byte
|
|
71
|
+
if chr(byt) not in ASCII_PRINTABLES:
|
|
72
|
+
return False, None
|
|
73
|
+
chars.append(chr(byt))
|
|
74
|
+
chars = chars[::-1]
|
|
75
|
+
else:
|
|
76
|
+
# unsupported endness
|
|
77
|
+
return False, None
|
|
78
|
+
|
|
79
|
+
if len(chars) >= min_length:
|
|
80
|
+
if len(chars) <= 4 and all(ch in ASCII_DIGITS for ch in chars):
|
|
81
|
+
return False, None
|
|
82
|
+
return True, "".join(chars)
|
|
83
|
+
return False, None
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# pylint:disable=arguments-differ
|
|
2
|
+
from typing import List, Tuple, Optional
|
|
3
|
+
|
|
4
|
+
from ailment.expression import Expression, BinaryOp, Const, Register, StackBaseOffset
|
|
5
|
+
from ailment.statement import Call, Store
|
|
6
|
+
|
|
7
|
+
from .base import PeepholeOptimizationMultiStmtBase
|
|
8
|
+
from .inlined_strcpy import InlinedStrcpy
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class InlinedStrcpyConsolidation(PeepholeOptimizationMultiStmtBase):
|
|
12
|
+
"""
|
|
13
|
+
Consolidate multiple inlined strcpy calls.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
__slots__ = ()
|
|
17
|
+
|
|
18
|
+
NAME = "Consolidate multiple inlined strcpy calls"
|
|
19
|
+
stmt_classes = ((Call, Call), (Call, Store))
|
|
20
|
+
|
|
21
|
+
def optimize(self, stmts: List[Call], **kwargs):
|
|
22
|
+
last_stmt, stmt = stmts
|
|
23
|
+
if InlinedStrcpyConsolidation._is_inlined_strcpy(last_stmt):
|
|
24
|
+
s_last: bytes = self.kb.custom_strings[last_stmt.args[1].value]
|
|
25
|
+
addr_last = last_stmt.args[0]
|
|
26
|
+
new_str = None # will be set if consolidation should happen
|
|
27
|
+
|
|
28
|
+
if isinstance(stmt, Call) and InlinedStrcpyConsolidation._is_inlined_strcpy(stmt):
|
|
29
|
+
# consolidating two calls
|
|
30
|
+
s_curr: bytes = self.kb.custom_strings[stmt.args[1].value]
|
|
31
|
+
addr_curr = stmt.args[0]
|
|
32
|
+
# determine if the two addresses are consecutive
|
|
33
|
+
delta = self._get_delta(addr_last, addr_curr)
|
|
34
|
+
if delta is not None and delta == len(s_last):
|
|
35
|
+
# consolidate both calls!
|
|
36
|
+
new_str = s_last + s_curr
|
|
37
|
+
elif isinstance(stmt, Store) and isinstance(stmt.data, Const):
|
|
38
|
+
# consolidating a call and a store, in case the store statement is storing the suffix of a string (but
|
|
39
|
+
# the suffix is too short to qualify an inlined strcpy optimization)
|
|
40
|
+
addr_curr = stmt.addr
|
|
41
|
+
delta = self._get_delta(addr_last, addr_curr)
|
|
42
|
+
if delta is not None and delta == len(s_last):
|
|
43
|
+
if stmt.size == 1 and stmt.data.value == 0:
|
|
44
|
+
# it's probably the terminating null byte
|
|
45
|
+
r, s = True, "\x00"
|
|
46
|
+
else:
|
|
47
|
+
r, s = InlinedStrcpy.is_integer_likely_a_string(
|
|
48
|
+
stmt.data.value, stmt.size, stmt.endness, min_length=1
|
|
49
|
+
)
|
|
50
|
+
if r:
|
|
51
|
+
new_str = s_last + s.encode("ascii")
|
|
52
|
+
|
|
53
|
+
if new_str is not None:
|
|
54
|
+
if new_str.endswith(b"\x00"):
|
|
55
|
+
call_name = "strcpy"
|
|
56
|
+
new_str_idx = self.kb.custom_strings.allocate(new_str[:-1])
|
|
57
|
+
args = [
|
|
58
|
+
last_stmt.args[0],
|
|
59
|
+
Const(None, None, new_str_idx, last_stmt.args[0].bits, custom_string=True),
|
|
60
|
+
]
|
|
61
|
+
else:
|
|
62
|
+
call_name = "strncpy"
|
|
63
|
+
new_str_idx = self.kb.custom_strings.allocate(new_str)
|
|
64
|
+
args = [
|
|
65
|
+
last_stmt.args[0],
|
|
66
|
+
Const(None, None, new_str_idx, last_stmt.args[0].bits, custom_string=True),
|
|
67
|
+
Const(None, None, len(new_str), self.project.arch.bits),
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
return [Call(stmt.idx, call_name, args=args, **stmt.tags)]
|
|
71
|
+
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
@staticmethod
|
|
75
|
+
def _is_inlined_strcpy(stmt: Call):
|
|
76
|
+
if isinstance(stmt.target, str) and stmt.target == "strncpy":
|
|
77
|
+
if len(stmt.args) == 3 and isinstance(stmt.args[1], Const) and hasattr(stmt.args[1], "custom_string"):
|
|
78
|
+
return True
|
|
79
|
+
return False
|
|
80
|
+
|
|
81
|
+
@staticmethod
|
|
82
|
+
def _parse_addr(addr: Expression) -> Tuple[Expression, int]:
|
|
83
|
+
if isinstance(addr, Register):
|
|
84
|
+
return addr, 0
|
|
85
|
+
if isinstance(addr, StackBaseOffset):
|
|
86
|
+
return StackBaseOffset(None, addr.bits, 0), addr.offset
|
|
87
|
+
if isinstance(addr, BinaryOp):
|
|
88
|
+
if addr.op == "Add" and isinstance(addr.operands[1], Const):
|
|
89
|
+
base_0, offset_0 = InlinedStrcpyConsolidation._parse_addr(addr.operands[0])
|
|
90
|
+
return base_0, offset_0 + addr.operands[1].value
|
|
91
|
+
if addr.op == "Sub" and isinstance(addr.operands[1], Const):
|
|
92
|
+
base_0, offset_0 = InlinedStrcpyConsolidation._parse_addr(addr.operands[0])
|
|
93
|
+
return base_0, offset_0 - addr.operands[1].value
|
|
94
|
+
|
|
95
|
+
return addr, 0
|
|
96
|
+
|
|
97
|
+
@staticmethod
|
|
98
|
+
def _get_delta(addr_0: Expression, addr_1: Expression) -> Optional[int]:
|
|
99
|
+
base_0, offset_0 = InlinedStrcpyConsolidation._parse_addr(addr_0)
|
|
100
|
+
base_1, offset_1 = InlinedStrcpyConsolidation._parse_addr(addr_1)
|
|
101
|
+
if base_0.likes(base_1):
|
|
102
|
+
return offset_1 - offset_0
|
|
103
|
+
return None
|
|
@@ -42,20 +42,29 @@ class IfElseFlattener(SequenceWalker):
|
|
|
42
42
|
|
|
43
43
|
if node.true_node is not None and node.false_node is not None:
|
|
44
44
|
try:
|
|
45
|
-
|
|
45
|
+
true_last_stmts = ConditionProcessor.get_last_statements(node.true_node)
|
|
46
46
|
except EmptyBlockNotice:
|
|
47
|
-
|
|
47
|
+
true_last_stmts = None
|
|
48
48
|
if (
|
|
49
|
-
|
|
50
|
-
and None not in
|
|
51
|
-
and all(is_statement_terminating(stmt, self.functions) for stmt in
|
|
49
|
+
true_last_stmts is not None
|
|
50
|
+
and None not in true_last_stmts
|
|
51
|
+
and all(is_statement_terminating(stmt, self.functions) for stmt in true_last_stmts)
|
|
52
52
|
):
|
|
53
53
|
# all end points in the true node are returning
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
54
|
+
try:
|
|
55
|
+
false_last_stmts = ConditionProcessor.get_last_statements(node.false_node)
|
|
56
|
+
except EmptyBlockNotice:
|
|
57
|
+
false_last_stmts = None
|
|
58
|
+
if (
|
|
59
|
+
false_last_stmts is not None
|
|
60
|
+
and None not in false_last_stmts
|
|
61
|
+
and not all(is_statement_terminating(stmt, self.functions) for stmt in false_last_stmts)
|
|
62
|
+
):
|
|
63
|
+
# not all end points in the false node are returning. in this case, we remove the else node and
|
|
64
|
+
# make it a new node following node
|
|
65
|
+
else_node = node.false_node
|
|
66
|
+
node.false_node = None
|
|
67
|
+
insert_node(parent, "after", else_node, index, **kwargs)
|
|
59
68
|
|
|
60
69
|
def _handle_CascadingCondition(self, node: CascadingConditionNode, parent=None, index=None, **kwargs):
|
|
61
70
|
super()._handle_CascadingCondition(node, parent=parent, index=index, **kwargs)
|
|
@@ -24,11 +24,12 @@ class RegionSimplifier(Analysis):
|
|
|
24
24
|
Simplifies a given region.
|
|
25
25
|
"""
|
|
26
26
|
|
|
27
|
-
def __init__(self, func, region, variable_kb=None, simplify_switches: bool = True):
|
|
27
|
+
def __init__(self, func, region, variable_kb=None, simplify_switches: bool = True, simplify_ifelse: bool = True):
|
|
28
28
|
self.func = func
|
|
29
29
|
self.region = region
|
|
30
30
|
self.variable_kb = variable_kb
|
|
31
31
|
self._simplify_switches = simplify_switches
|
|
32
|
+
self._should_simplify_ifelses = simplify_ifelse
|
|
32
33
|
|
|
33
34
|
self.goto_manager: Optional[GotoManager] = None
|
|
34
35
|
self.result = None
|
|
@@ -70,7 +71,8 @@ class RegionSimplifier(Analysis):
|
|
|
70
71
|
# Remove empty nodes
|
|
71
72
|
r = self._remove_empty_nodes(r)
|
|
72
73
|
# Remove unnecessary else branches if the if branch will always return
|
|
73
|
-
|
|
74
|
+
if self._should_simplify_ifelses:
|
|
75
|
+
r = self._simplify_ifelses(r)
|
|
74
76
|
#
|
|
75
77
|
r = self._simplify_cascading_ifs(r)
|
|
76
78
|
#
|
|
@@ -2037,11 +2037,22 @@ class CConstant(CExpression):
|
|
|
2037
2037
|
return
|
|
2038
2038
|
yield hex(self.reference_values[self._type]), self
|
|
2039
2039
|
elif isinstance(self._type, SimTypePointer) and isinstance(self._type.pts_to, SimTypeChar):
|
|
2040
|
-
refval = self.reference_values[self._type]
|
|
2041
|
-
|
|
2040
|
+
refval = self.reference_values[self._type]
|
|
2041
|
+
if isinstance(refval, MemoryData):
|
|
2042
|
+
v = refval.content.decode("utf-8")
|
|
2043
|
+
else:
|
|
2044
|
+
# it's a string
|
|
2045
|
+
assert isinstance(v, str)
|
|
2046
|
+
v = refval
|
|
2047
|
+
yield CConstant.str_to_c_str(v), self
|
|
2042
2048
|
elif isinstance(self._type, SimTypePointer) and isinstance(self._type.pts_to, SimTypeWideChar):
|
|
2043
|
-
refval = self.reference_values[self._type]
|
|
2044
|
-
|
|
2049
|
+
refval = self.reference_values[self._type]
|
|
2050
|
+
if isinstance(refval, MemoryData):
|
|
2051
|
+
v = refval.content.decode("utf_16_le")
|
|
2052
|
+
else:
|
|
2053
|
+
# it's a string
|
|
2054
|
+
v = refval
|
|
2055
|
+
yield CConstant.str_to_c_str(v, prefix="L"), self
|
|
2045
2056
|
else:
|
|
2046
2057
|
if isinstance(self.reference_values[self._type], int):
|
|
2047
2058
|
yield self.fmt_int(self.reference_values[self._type]), self
|
|
@@ -3199,6 +3210,11 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
3199
3210
|
inline_string = False
|
|
3200
3211
|
function_pointer = False
|
|
3201
3212
|
|
|
3213
|
+
if reference_values is None and hasattr(expr, "reference_values"):
|
|
3214
|
+
reference_values = expr.reference_values.copy()
|
|
3215
|
+
if reference_values:
|
|
3216
|
+
type_ = next(iter(reference_values))
|
|
3217
|
+
|
|
3202
3218
|
if reference_values is None:
|
|
3203
3219
|
reference_values = {}
|
|
3204
3220
|
type_ = unpack_typeref(type_)
|