angr 9.2.148__py3-none-manylinux2014_aarch64.whl → 9.2.149__py3-none-manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/calling_convention/calling_convention.py +42 -2
- angr/analyses/cfg/cfg_emulated.py +5 -2
- angr/analyses/cfg/cfg_fast.py +48 -46
- angr/analyses/decompiler/ail_simplifier.py +65 -32
- angr/analyses/decompiler/block_simplifier.py +20 -6
- angr/analyses/decompiler/clinic.py +80 -13
- angr/analyses/decompiler/dephication/rewriting_engine.py +24 -2
- angr/analyses/decompiler/optimization_passes/__init__.py +5 -0
- angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +15 -13
- angr/analyses/decompiler/optimization_passes/determine_load_sizes.py +64 -0
- angr/analyses/decompiler/optimization_passes/eager_std_string_concatenation.py +165 -0
- angr/analyses/decompiler/optimization_passes/engine_base.py +11 -2
- angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +17 -2
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +10 -6
- angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +99 -30
- angr/analyses/decompiler/peephole_optimizations/__init__.py +6 -0
- angr/analyses/decompiler/peephole_optimizations/base.py +43 -3
- angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +1 -1
- angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +3 -0
- angr/analyses/decompiler/peephole_optimizations/inlined_strcpy_consolidation.py +4 -1
- angr/analyses/decompiler/peephole_optimizations/remove_cxx_destructor_calls.py +32 -0
- angr/analyses/decompiler/peephole_optimizations/remove_redundant_bitmasks.py +69 -2
- angr/analyses/decompiler/peephole_optimizations/rewrite_conv_mul.py +40 -0
- angr/analyses/decompiler/peephole_optimizations/rewrite_cxx_operator_calls.py +90 -0
- angr/analyses/decompiler/presets/fast.py +2 -0
- angr/analyses/decompiler/presets/full.py +2 -0
- angr/analyses/decompiler/ssailification/rewriting_engine.py +51 -4
- angr/analyses/decompiler/ssailification/ssailification.py +23 -3
- angr/analyses/decompiler/ssailification/traversal_engine.py +15 -1
- angr/analyses/decompiler/structured_codegen/c.py +141 -10
- angr/analyses/decompiler/utils.py +6 -1
- angr/analyses/s_reaching_definitions/s_rda_view.py +1 -0
- angr/analyses/typehoon/lifter.py +20 -0
- angr/analyses/typehoon/simple_solver.py +42 -9
- angr/analyses/typehoon/translator.py +4 -1
- angr/analyses/typehoon/typeconsts.py +17 -6
- angr/analyses/typehoon/typehoon.py +21 -5
- angr/analyses/variable_recovery/engine_ail.py +44 -5
- angr/analyses/variable_recovery/engine_base.py +35 -12
- angr/analyses/variable_recovery/variable_recovery_fast.py +33 -2
- angr/calling_conventions.py +23 -5
- angr/engines/light/engine.py +7 -0
- angr/knowledge_plugins/functions/function.py +68 -0
- angr/knowledge_plugins/propagations/states.py +5 -2
- angr/knowledge_plugins/variables/variable_manager.py +3 -3
- angr/procedures/definitions/__init__.py +1 -1
- angr/procedures/definitions/types_stl.py +22 -0
- angr/sim_type.py +251 -130
- {angr-9.2.148.dist-info → angr-9.2.149.dist-info}/METADATA +7 -7
- {angr-9.2.148.dist-info → angr-9.2.149.dist-info}/RECORD +55 -49
- {angr-9.2.148.dist-info → angr-9.2.149.dist-info}/WHEEL +1 -1
- {angr-9.2.148.dist-info → angr-9.2.149.dist-info}/licenses/LICENSE +3 -0
- {angr-9.2.148.dist-info → angr-9.2.149.dist-info}/entry_points.txt +0 -0
- {angr-9.2.148.dist-info → angr-9.2.149.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from ailment.expression import BinaryOp, Const, Convert
|
|
3
|
+
|
|
4
|
+
from .base import PeepholeOptimizationExprBase
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class RewriteConvMul(PeepholeOptimizationExprBase):
|
|
8
|
+
"""
|
|
9
|
+
Rewrites multiplication to be inside conversion.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
__slots__ = ()
|
|
13
|
+
|
|
14
|
+
NAME = "Rewrite Conv Mul"
|
|
15
|
+
expr_classes = (BinaryOp,)
|
|
16
|
+
|
|
17
|
+
# Conv(64->32, (Conv(32->64, expr) * N<64>)) * N<32>)
|
|
18
|
+
# => Conv(64->32, (Conv(32->64, expr) * N<64>) * Conv(32->64,N<32>))
|
|
19
|
+
def optimize(self, expr: BinaryOp, **kwargs):
|
|
20
|
+
if (
|
|
21
|
+
expr.op == "Mul"
|
|
22
|
+
and isinstance(expr.operands[1], Const)
|
|
23
|
+
and expr.operands[1].bits == 32
|
|
24
|
+
and isinstance(expr.operands[0], Convert)
|
|
25
|
+
and expr.operands[0].from_bits > expr.operands[0].to_bits
|
|
26
|
+
):
|
|
27
|
+
op0, op1 = expr.operands
|
|
28
|
+
operand_expr = op0.operand
|
|
29
|
+
if (
|
|
30
|
+
isinstance(operand_expr, BinaryOp)
|
|
31
|
+
and operand_expr.op == "Mul"
|
|
32
|
+
and isinstance(operand_expr.operands[1], Const)
|
|
33
|
+
and operand_expr.operands[1].bits == 64
|
|
34
|
+
):
|
|
35
|
+
new_op1 = Convert(op1.idx, op1.bits, op0.from_bits, False, op1, **op1.tags)
|
|
36
|
+
new_op0 = op0.operand
|
|
37
|
+
new_expr = BinaryOp(expr.idx, "Mul", [new_op0, new_op1], expr.signed, **expr.tags)
|
|
38
|
+
return Convert(new_expr.idx, op0.from_bits, op0.to_bits, False, new_expr, **expr.tags)
|
|
39
|
+
|
|
40
|
+
return None
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# pylint:disable=arguments-differ,too-many-boolean-expressions,no-self-use
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from archinfo import Endness
|
|
5
|
+
from ailment.constant import UNDETERMINED_SIZE
|
|
6
|
+
from ailment.expression import Const, VirtualVariable, BinaryOp, UnaryOp, Load
|
|
7
|
+
from ailment.statement import Call, WeakAssignment
|
|
8
|
+
|
|
9
|
+
from angr.sim_type import SimTypeReference, SimCppClass
|
|
10
|
+
from angr.knowledge_plugins.key_definitions import atoms
|
|
11
|
+
from .base import PeepholeOptimizationStmtBase
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class RewriteCxxOperatorCalls(PeepholeOptimizationStmtBase):
|
|
15
|
+
"""
|
|
16
|
+
Rewrite C++ operator function calls into operations.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
__slots__ = ()
|
|
20
|
+
|
|
21
|
+
NAME = "Rewrite C++ operator function calls into operations"
|
|
22
|
+
stmt_classes = (Call,)
|
|
23
|
+
|
|
24
|
+
def optimize(self, stmt: Call, block=None, **kwargs): # type: ignore
|
|
25
|
+
assert self.project is not None
|
|
26
|
+
|
|
27
|
+
# are we calling a function that we deem as an overridden operator function?
|
|
28
|
+
if isinstance(stmt.target, Const):
|
|
29
|
+
func_addr = stmt.target.value
|
|
30
|
+
if not self.project.kb.functions.contains_addr(func_addr):
|
|
31
|
+
return None
|
|
32
|
+
func = self.project.kb.functions[func_addr]
|
|
33
|
+
if "operator=" in func.demangled_name and stmt.args is not None:
|
|
34
|
+
return self._optimize_operator_equal(stmt)
|
|
35
|
+
if "operator+" in func.demangled_name and stmt.args is not None:
|
|
36
|
+
return self._optimize_operator_add(stmt)
|
|
37
|
+
# TODO: Support other types of C++ operator functions
|
|
38
|
+
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
def _optimize_operator_equal(self, stmt: Call) -> WeakAssignment | None:
|
|
42
|
+
if stmt.args and len(stmt.args) == 2 and isinstance(stmt.args[0], UnaryOp) and stmt.args[0].op == "Reference":
|
|
43
|
+
dst = stmt.args[0].operand
|
|
44
|
+
if isinstance(dst, VirtualVariable):
|
|
45
|
+
self.preserve_vvar_ids.add(dst.varid)
|
|
46
|
+
atom = atoms.VirtualVariable(dst.varid, dst.size, dst.category, dst.oident)
|
|
47
|
+
if stmt.prototype is not None and isinstance(stmt.prototype.returnty, SimTypeReference):
|
|
48
|
+
type_hint = self._type_hint_from_typeref(stmt.prototype.returnty)
|
|
49
|
+
if type_hint is not None:
|
|
50
|
+
self.type_hints.append((atom, type_hint))
|
|
51
|
+
arg1 = (
|
|
52
|
+
Load(None, stmt.args[1], UNDETERMINED_SIZE, Endness.BE, **stmt.tags)
|
|
53
|
+
if isinstance(stmt.args[1], Const)
|
|
54
|
+
else stmt.args[1]
|
|
55
|
+
)
|
|
56
|
+
type_ = None
|
|
57
|
+
if stmt.prototype is not None:
|
|
58
|
+
dst_ty = stmt.prototype.returnty
|
|
59
|
+
if isinstance(dst_ty, SimTypeReference):
|
|
60
|
+
dst_ty = dst_ty.refs
|
|
61
|
+
type_ = {"dst": dst_ty, "src": stmt.prototype.args[1]}
|
|
62
|
+
return WeakAssignment(stmt.idx, stmt.args[0].operand, arg1, type=type_, **stmt.tags) # type:ignore
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
def _optimize_operator_add(self, stmt: Call) -> WeakAssignment | None:
|
|
66
|
+
if (
|
|
67
|
+
stmt.args
|
|
68
|
+
and len(stmt.args) == 3
|
|
69
|
+
and isinstance(stmt.args[1], UnaryOp)
|
|
70
|
+
and stmt.args[1].op == "Reference"
|
|
71
|
+
and isinstance(stmt.args[1].operand, VirtualVariable)
|
|
72
|
+
and isinstance(stmt.args[2], Const)
|
|
73
|
+
and isinstance(stmt.ret_expr, VirtualVariable)
|
|
74
|
+
):
|
|
75
|
+
arg2 = Load(None, stmt.args[2], UNDETERMINED_SIZE, Endness.BE, **stmt.tags)
|
|
76
|
+
addition = BinaryOp(None, "Add", [stmt.args[1].operand, arg2], **stmt.tags)
|
|
77
|
+
type_ = None
|
|
78
|
+
if stmt.prototype is not None:
|
|
79
|
+
dst_ty = stmt.prototype.returnty
|
|
80
|
+
if isinstance(dst_ty, SimTypeReference):
|
|
81
|
+
dst_ty = dst_ty.refs
|
|
82
|
+
type_ = {"dst": dst_ty, "src": stmt.prototype.args[1]}
|
|
83
|
+
return WeakAssignment(stmt.idx, stmt.ret_expr, addition, type=type_, **stmt.tags)
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
@staticmethod
|
|
87
|
+
def _type_hint_from_typeref(typeref: SimTypeReference) -> str | None:
|
|
88
|
+
if isinstance(typeref.refs, SimCppClass) and typeref.refs.unique_name:
|
|
89
|
+
return typeref.refs.unique_name
|
|
90
|
+
return None
|
|
@@ -22,6 +22,7 @@ from angr.analyses.decompiler.optimization_passes import (
|
|
|
22
22
|
DeadblockRemover,
|
|
23
23
|
SwitchReusedEntryRewriter,
|
|
24
24
|
ConditionConstantPropagation,
|
|
25
|
+
DetermineLoadSizes,
|
|
25
26
|
)
|
|
26
27
|
|
|
27
28
|
|
|
@@ -49,6 +50,7 @@ preset_fast = DecompilationPreset(
|
|
|
49
50
|
InlinedStringTransformationSimplifier,
|
|
50
51
|
CallStatementRewriter,
|
|
51
52
|
ConditionConstantPropagation,
|
|
53
|
+
DetermineLoadSizes,
|
|
52
54
|
],
|
|
53
55
|
)
|
|
54
56
|
|
|
@@ -27,6 +27,7 @@ from angr.analyses.decompiler.optimization_passes import (
|
|
|
27
27
|
CallStatementRewriter,
|
|
28
28
|
SwitchReusedEntryRewriter,
|
|
29
29
|
ConditionConstantPropagation,
|
|
30
|
+
DetermineLoadSizes,
|
|
30
31
|
)
|
|
31
32
|
|
|
32
33
|
|
|
@@ -59,6 +60,7 @@ preset_full = DecompilationPreset(
|
|
|
59
60
|
CallStatementRewriter,
|
|
60
61
|
SwitchReusedEntryRewriter,
|
|
61
62
|
ConditionConstantPropagation,
|
|
63
|
+
DetermineLoadSizes,
|
|
62
64
|
],
|
|
63
65
|
)
|
|
64
66
|
|
|
@@ -6,7 +6,17 @@ import logging
|
|
|
6
6
|
from archinfo import Endness
|
|
7
7
|
from ailment.block import Block
|
|
8
8
|
from ailment.manager import Manager
|
|
9
|
-
from ailment.statement import
|
|
9
|
+
from ailment.statement import (
|
|
10
|
+
Statement,
|
|
11
|
+
Assignment,
|
|
12
|
+
Store,
|
|
13
|
+
Call,
|
|
14
|
+
Return,
|
|
15
|
+
ConditionalJump,
|
|
16
|
+
DirtyStatement,
|
|
17
|
+
Jump,
|
|
18
|
+
WeakAssignment,
|
|
19
|
+
)
|
|
10
20
|
from ailment.expression import (
|
|
11
21
|
Expression,
|
|
12
22
|
Register,
|
|
@@ -181,6 +191,19 @@ class SimEngineSSARewriting(
|
|
|
181
191
|
return new_stmt
|
|
182
192
|
return None
|
|
183
193
|
|
|
194
|
+
def _handle_stmt_WeakAssignment(self, stmt) -> WeakAssignment | None:
|
|
195
|
+
new_src = self._expr(stmt.src)
|
|
196
|
+
new_dst = self._expr(stmt.dst)
|
|
197
|
+
|
|
198
|
+
if new_dst is not None or new_src is not None:
|
|
199
|
+
return WeakAssignment(
|
|
200
|
+
stmt.idx,
|
|
201
|
+
stmt.dst if new_dst is None else new_dst, # type: ignore
|
|
202
|
+
stmt.src if new_src is None else new_src,
|
|
203
|
+
**stmt.tags,
|
|
204
|
+
)
|
|
205
|
+
return None
|
|
206
|
+
|
|
184
207
|
def _handle_stmt_Store(self, stmt: Store) -> Store | Assignment | tuple[Assignment, ...] | None:
|
|
185
208
|
new_data = self._expr(stmt.data)
|
|
186
209
|
if stmt.guard is None:
|
|
@@ -505,7 +528,28 @@ class SimEngineSSARewriting(
|
|
|
505
528
|
return None
|
|
506
529
|
|
|
507
530
|
def _handle_expr_StackBaseOffset(self, expr):
|
|
508
|
-
|
|
531
|
+
if expr.offset not in self.state.stackvars:
|
|
532
|
+
# create it on the fly
|
|
533
|
+
vvar_id = self.get_vvid_by_def(
|
|
534
|
+
self.block.addr,
|
|
535
|
+
self.block.idx,
|
|
536
|
+
self.stmt_idx,
|
|
537
|
+
atoms.MemoryLocation(expr.offset, 1, self.project.arch.memory_endness),
|
|
538
|
+
"l",
|
|
539
|
+
)
|
|
540
|
+
vvar = VirtualVariable(
|
|
541
|
+
self.ail_manager.next_atom(),
|
|
542
|
+
vvar_id,
|
|
543
|
+
1 * self.arch.byte_width,
|
|
544
|
+
category=VirtualVariableCategory.STACK,
|
|
545
|
+
oident=expr.offset,
|
|
546
|
+
**expr.tags,
|
|
547
|
+
)
|
|
548
|
+
self.state.stackvars[expr.offset][1] = vvar
|
|
549
|
+
else:
|
|
550
|
+
sz = 1 if 1 in self.state.stackvars[expr.offset] else max(self.state.stackvars[expr.offset])
|
|
551
|
+
vvar = self.state.stackvars[expr.offset][sz]
|
|
552
|
+
return UnaryOp(expr.idx, "Reference", vvar, bits=expr.bits, **expr.tags)
|
|
509
553
|
|
|
510
554
|
def _handle_expr_VirtualVariable(self, expr):
|
|
511
555
|
return None
|
|
@@ -807,7 +851,8 @@ class SimEngineSSARewriting(
|
|
|
807
851
|
and expr.size in self.stackvar_locs[expr.addr.offset]
|
|
808
852
|
):
|
|
809
853
|
if expr.size not in self.state.stackvars[expr.addr.offset]:
|
|
810
|
-
#
|
|
854
|
+
# we have not seen its use before (which does not necessarily mean it's never created!), so we create
|
|
855
|
+
# it on the fly and record it in self.state.stackvars
|
|
811
856
|
vvar_id = self.get_vvid_by_def(
|
|
812
857
|
self.block.addr,
|
|
813
858
|
self.block.idx,
|
|
@@ -815,7 +860,7 @@ class SimEngineSSARewriting(
|
|
|
815
860
|
atoms.MemoryLocation(expr.addr.offset, expr.size, Endness(expr.endness)),
|
|
816
861
|
"l",
|
|
817
862
|
)
|
|
818
|
-
|
|
863
|
+
var = VirtualVariable(
|
|
819
864
|
self.ail_manager.next_atom(),
|
|
820
865
|
vvar_id,
|
|
821
866
|
expr.size * self.arch.byte_width,
|
|
@@ -823,6 +868,8 @@ class SimEngineSSARewriting(
|
|
|
823
868
|
oident=expr.addr.offset,
|
|
824
869
|
**expr.tags,
|
|
825
870
|
)
|
|
871
|
+
self.state.stackvars[expr.addr.offset][expr.size] = var
|
|
872
|
+
return var
|
|
826
873
|
|
|
827
874
|
# TODO: Support truncation
|
|
828
875
|
# TODO: Maybe also support concatenation
|
|
@@ -134,7 +134,9 @@ class Ssailification(Analysis): # pylint:disable=abstract-method
|
|
|
134
134
|
if self._ssa_stackvars:
|
|
135
135
|
# for stack variables, we collect all definitions and identify stack variable locations using heuristics
|
|
136
136
|
|
|
137
|
-
stackvar_locs = self._synthesize_stackvar_locs(
|
|
137
|
+
stackvar_locs = self._synthesize_stackvar_locs(
|
|
138
|
+
[def_ for def_, _ in def_to_loc if isinstance(def_, (Store, StackBaseOffset))]
|
|
139
|
+
)
|
|
138
140
|
# handle function arguments
|
|
139
141
|
if self._func_args:
|
|
140
142
|
for func_arg in self._func_args:
|
|
@@ -173,6 +175,20 @@ class Ssailification(Analysis): # pylint:disable=abstract-method
|
|
|
173
175
|
if def_.size in stackvar_locs[off] and def_.size < full_sz:
|
|
174
176
|
udef_to_defs[("stack", off, def_.size)].add(def_)
|
|
175
177
|
udef_to_blockkeys[("stack", off, def_.size)].add((loc.block_addr, loc.block_idx))
|
|
178
|
+
elif isinstance(def_, StackBaseOffset):
|
|
179
|
+
sz = 1
|
|
180
|
+
idx_begin = bisect_left(sorted_stackvar_offs, def_.offset)
|
|
181
|
+
for i in range(idx_begin, len(sorted_stackvar_offs)):
|
|
182
|
+
off = sorted_stackvar_offs[i]
|
|
183
|
+
if off >= def_.offset + sz:
|
|
184
|
+
break
|
|
185
|
+
full_sz = max(stackvar_locs[off])
|
|
186
|
+
udef_to_defs[("stack", off, full_sz)].add(def_)
|
|
187
|
+
udef_to_blockkeys[("stack", off, full_sz)].add((loc.block_addr, loc.block_idx))
|
|
188
|
+
# add a definition for the partial stack variable
|
|
189
|
+
if sz in stackvar_locs[off] and sz < full_sz:
|
|
190
|
+
udef_to_defs[("stack", off, sz)].add(def_)
|
|
191
|
+
udef_to_blockkeys[("stack", off, sz)].add((loc.block_addr, loc.block_idx))
|
|
176
192
|
elif isinstance(def_, Tmp):
|
|
177
193
|
# Tmps are local to each block and do not need phi nodes
|
|
178
194
|
pass
|
|
@@ -211,7 +227,7 @@ class Ssailification(Analysis): # pylint:disable=abstract-method
|
|
|
211
227
|
return last_frontier
|
|
212
228
|
|
|
213
229
|
@staticmethod
|
|
214
|
-
def _synthesize_stackvar_locs(defs: list[Store]) -> dict[int, set[int]]:
|
|
230
|
+
def _synthesize_stackvar_locs(defs: list[Store | StackBaseOffset]) -> dict[int, set[int]]:
|
|
215
231
|
"""
|
|
216
232
|
Derive potential locations (in terms of offsets and sizes) for stack variables based on all stack variable
|
|
217
233
|
definitions provided.
|
|
@@ -224,7 +240,11 @@ class Ssailification(Analysis): # pylint:disable=abstract-method
|
|
|
224
240
|
offs: set[int] = set()
|
|
225
241
|
|
|
226
242
|
for def_ in defs:
|
|
227
|
-
if isinstance(def_
|
|
243
|
+
if isinstance(def_, StackBaseOffset):
|
|
244
|
+
stack_off = def_.offset
|
|
245
|
+
accesses[stack_off].add(1)
|
|
246
|
+
offs.add(stack_off)
|
|
247
|
+
elif isinstance(def_, Store) and isinstance(def_.addr, StackBaseOffset):
|
|
228
248
|
stack_off = def_.addr.offset
|
|
229
249
|
accesses[stack_off].add(def_.size)
|
|
230
250
|
offs.add(stack_off)
|
|
@@ -60,6 +60,10 @@ class SimEngineSSATraversal(SimEngineLightAIL[TraversalState, None, None, None])
|
|
|
60
60
|
|
|
61
61
|
self._expr(stmt.src)
|
|
62
62
|
|
|
63
|
+
def _handle_stmt_WeakAssignment(self, stmt):
|
|
64
|
+
self._expr(stmt.src)
|
|
65
|
+
self._expr(stmt.dst)
|
|
66
|
+
|
|
63
67
|
def _handle_stmt_Store(self, stmt: Store):
|
|
64
68
|
self._expr(stmt.addr)
|
|
65
69
|
self._expr(stmt.data)
|
|
@@ -149,6 +153,17 @@ class SimEngineSSATraversal(SimEngineLightAIL[TraversalState, None, None, None])
|
|
|
149
153
|
self.loc_to_defs[codeloc].add(expr)
|
|
150
154
|
self.state.live_stackvars.add((expr.addr.offset, expr.size))
|
|
151
155
|
|
|
156
|
+
def _handle_expr_StackBaseOffset(self, expr: StackBaseOffset):
|
|
157
|
+
# we don't know the size, so we assume the size is 1 for now...
|
|
158
|
+
sz = 1
|
|
159
|
+
if isinstance(expr.offset, int) and (expr.offset, sz) not in self.state.live_stackvars:
|
|
160
|
+
codeloc = self._codeloc()
|
|
161
|
+
self.def_to_loc.append((expr, codeloc))
|
|
162
|
+
if codeloc not in self.loc_to_defs:
|
|
163
|
+
self.loc_to_defs[codeloc] = OrderedSet()
|
|
164
|
+
self.loc_to_defs[codeloc].add(expr)
|
|
165
|
+
self.state.live_stackvars.add((expr.offset, sz))
|
|
166
|
+
|
|
152
167
|
def _handle_expr_Tmp(self, expr: Tmp):
|
|
153
168
|
if self.use_tmps:
|
|
154
169
|
codeloc = self._codeloc()
|
|
@@ -269,6 +284,5 @@ class SimEngineSSATraversal(SimEngineLightAIL[TraversalState, None, None, None])
|
|
|
269
284
|
_handle_expr_Phi = _handle_Dummy
|
|
270
285
|
_handle_expr_Const = _handle_Dummy
|
|
271
286
|
_handle_expr_MultiStatementExpression = _handle_Dummy
|
|
272
|
-
_handle_expr_StackBaseOffset = _handle_Dummy
|
|
273
287
|
_handle_expr_BasePointerOffset = _handle_Dummy
|
|
274
288
|
_handle_expr_Call = _handle_Dummy
|
|
@@ -5,8 +5,10 @@ from collections.abc import Callable
|
|
|
5
5
|
from collections import defaultdict, Counter
|
|
6
6
|
import logging
|
|
7
7
|
import struct
|
|
8
|
+
import re
|
|
8
9
|
|
|
9
10
|
from ailment import Block, Expr, Stmt, Tmp
|
|
11
|
+
from ailment.constant import UNDETERMINED_SIZE
|
|
10
12
|
from ailment.expression import StackBaseOffset, BinaryOp
|
|
11
13
|
from unique_log_filter import UniqueLogFilter
|
|
12
14
|
|
|
@@ -34,6 +36,7 @@ from angr.sim_type import (
|
|
|
34
36
|
SimTypeInt128,
|
|
35
37
|
SimTypeInt256,
|
|
36
38
|
SimTypeInt512,
|
|
39
|
+
SimCppClass,
|
|
37
40
|
)
|
|
38
41
|
from angr.knowledge_plugins.functions import Function
|
|
39
42
|
from angr.sim_variable import SimVariable, SimTemporaryVariable, SimStackVariable, SimMemoryVariable
|
|
@@ -156,6 +159,18 @@ def guess_value_type(value: int, project: angr.Project) -> SimType | None:
|
|
|
156
159
|
return None
|
|
157
160
|
|
|
158
161
|
|
|
162
|
+
def type_equals(t0: SimType, t1: SimType) -> bool:
|
|
163
|
+
# special logic for C++ classes
|
|
164
|
+
if isinstance(t0, SimCppClass) and isinstance(t1, SimCppClass): # noqa: SIM102
|
|
165
|
+
# TODO: Use the information (class names, etc.) in types_stl
|
|
166
|
+
if {t1.name, t0.name} == {
|
|
167
|
+
"std::string",
|
|
168
|
+
"class std::basic_string<char, struct std::char_traits<char>, class std::allocator<char>>",
|
|
169
|
+
}:
|
|
170
|
+
return True
|
|
171
|
+
return t0 == t1
|
|
172
|
+
|
|
173
|
+
|
|
159
174
|
def type_to_c_repr_chunks(ty: SimType, name=None, name_type=None, full=False, indent_str=""):
|
|
160
175
|
"""
|
|
161
176
|
Helper generator function to turn a SimType into generated tuples of (C-string, AST node).
|
|
@@ -164,7 +179,10 @@ def type_to_c_repr_chunks(ty: SimType, name=None, name_type=None, full=False, in
|
|
|
164
179
|
if full:
|
|
165
180
|
# struct def preamble
|
|
166
181
|
yield indent_str, None
|
|
167
|
-
|
|
182
|
+
if isinstance(ty, SimCppClass):
|
|
183
|
+
yield "class ", None
|
|
184
|
+
else:
|
|
185
|
+
yield "typedef struct ", None
|
|
168
186
|
yield ty.name, ty
|
|
169
187
|
yield " {\n", None
|
|
170
188
|
|
|
@@ -1242,6 +1260,7 @@ class CFunctionCall(CStatement, CExpression):
|
|
|
1242
1260
|
"callee_func",
|
|
1243
1261
|
"callee_target",
|
|
1244
1262
|
"is_expr",
|
|
1263
|
+
"prettify_thiscall",
|
|
1245
1264
|
"ret_expr",
|
|
1246
1265
|
"returning",
|
|
1247
1266
|
"show_demangled_name",
|
|
@@ -1258,6 +1277,7 @@ class CFunctionCall(CStatement, CExpression):
|
|
|
1258
1277
|
is_expr: bool = False,
|
|
1259
1278
|
show_demangled_name=True,
|
|
1260
1279
|
show_disambiguated_name: bool = True,
|
|
1280
|
+
prettify_thiscall: bool = True,
|
|
1261
1281
|
tags=None,
|
|
1262
1282
|
codegen=None,
|
|
1263
1283
|
**kwargs,
|
|
@@ -1273,6 +1293,7 @@ class CFunctionCall(CStatement, CExpression):
|
|
|
1273
1293
|
self.is_expr = is_expr
|
|
1274
1294
|
self.show_demangled_name = show_demangled_name
|
|
1275
1295
|
self.show_disambiguated_name = show_disambiguated_name
|
|
1296
|
+
self.prettify_thiscall = prettify_thiscall
|
|
1276
1297
|
|
|
1277
1298
|
@property
|
|
1278
1299
|
def prototype(self) -> SimTypeFunction | None: # TODO there should be a prototype for each callsite!
|
|
@@ -1313,6 +1334,13 @@ class CFunctionCall(CStatement, CExpression):
|
|
|
1313
1334
|
|
|
1314
1335
|
return False
|
|
1315
1336
|
|
|
1337
|
+
@staticmethod
|
|
1338
|
+
def _is_func_likely_cxx_class_method(func_name: str) -> bool:
|
|
1339
|
+
if "::" not in func_name:
|
|
1340
|
+
return False
|
|
1341
|
+
chunks = func_name.split("::")
|
|
1342
|
+
return re.match(r"[a-zA-Z_][a-zA-Z0-9_]*", chunks[-1]) is not None
|
|
1343
|
+
|
|
1316
1344
|
def c_repr_chunks(self, indent=0, asexpr: bool = False):
|
|
1317
1345
|
"""
|
|
1318
1346
|
|
|
@@ -1332,8 +1360,13 @@ class CFunctionCall(CStatement, CExpression):
|
|
|
1332
1360
|
func_name = get_cpp_function_name(self.callee_func.demangled_name, specialized=False, qualified=True)
|
|
1333
1361
|
else:
|
|
1334
1362
|
func_name = self.callee_func.name
|
|
1363
|
+
if self.prettify_thiscall and self.args and self._is_func_likely_cxx_class_method(func_name):
|
|
1364
|
+
func_name = self.callee_func.short_name
|
|
1365
|
+
yield from self._c_repr_chunks_thiscall(func_name, asexpr=asexpr)
|
|
1366
|
+
return
|
|
1335
1367
|
if self.show_disambiguated_name and self._is_target_ambiguous(func_name):
|
|
1336
1368
|
func_name = self.callee_func.get_unambiguous_name(display_name=func_name)
|
|
1369
|
+
|
|
1337
1370
|
yield func_name, self
|
|
1338
1371
|
elif isinstance(self.callee_target, str):
|
|
1339
1372
|
yield self.callee_target, self
|
|
@@ -1356,6 +1389,37 @@ class CFunctionCall(CStatement, CExpression):
|
|
|
1356
1389
|
yield " /* do not return */", None
|
|
1357
1390
|
yield "\n", None
|
|
1358
1391
|
|
|
1392
|
+
def _c_repr_chunks_thiscall(self, func_name: str, asexpr: bool = False):
|
|
1393
|
+
# The first argument is the `this` pointer
|
|
1394
|
+
assert self.args
|
|
1395
|
+
this_ref = self.args[0]
|
|
1396
|
+
if isinstance(this_ref, CUnaryOp) and this_ref.op == "Reference":
|
|
1397
|
+
yield from CExpression._try_c_repr_chunks(this_ref.operand)
|
|
1398
|
+
else:
|
|
1399
|
+
yield from CExpression._try_c_repr_chunks(this_ref)
|
|
1400
|
+
|
|
1401
|
+
yield ".", None
|
|
1402
|
+
yield func_name, self
|
|
1403
|
+
|
|
1404
|
+
# the remaining arguments
|
|
1405
|
+
paren = CClosingObject("(")
|
|
1406
|
+
yield "(", paren
|
|
1407
|
+
|
|
1408
|
+
for i, arg in enumerate(self.args):
|
|
1409
|
+
if i == 0:
|
|
1410
|
+
continue
|
|
1411
|
+
if i > 1:
|
|
1412
|
+
yield ", ", None
|
|
1413
|
+
yield from CExpression._try_c_repr_chunks(arg)
|
|
1414
|
+
|
|
1415
|
+
yield ")", paren
|
|
1416
|
+
|
|
1417
|
+
if not self.is_expr and not asexpr:
|
|
1418
|
+
yield ";", None
|
|
1419
|
+
if not self.returning:
|
|
1420
|
+
yield " /* do not return */", None
|
|
1421
|
+
yield "\n", None
|
|
1422
|
+
|
|
1359
1423
|
|
|
1360
1424
|
class CReturn(CStatement):
|
|
1361
1425
|
__slots__ = ("retval",)
|
|
@@ -1761,6 +1825,13 @@ class CBinaryOp(CExpression):
|
|
|
1761
1825
|
# C spec https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2596.pdf 6.3.1.8 Usual arithmetic conversions
|
|
1762
1826
|
rhs_ptr = isinstance(rhs_ty, SimTypePointer)
|
|
1763
1827
|
lhs_ptr = isinstance(lhs_ty, SimTypePointer)
|
|
1828
|
+
rhs_cls = isinstance(unpack_typeref(rhs_ty), SimCppClass)
|
|
1829
|
+
lhs_cls = isinstance(unpack_typeref(lhs_ty), SimCppClass)
|
|
1830
|
+
|
|
1831
|
+
if lhs_cls:
|
|
1832
|
+
return lhs_ty
|
|
1833
|
+
if rhs_cls:
|
|
1834
|
+
return rhs_ty
|
|
1764
1835
|
|
|
1765
1836
|
if op in ("Add", "Sub"):
|
|
1766
1837
|
if lhs_ptr and rhs_ptr:
|
|
@@ -2462,6 +2533,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
2462
2533
|
# AIL statements
|
|
2463
2534
|
Stmt.Store: self._handle_Stmt_Store,
|
|
2464
2535
|
Stmt.Assignment: self._handle_Stmt_Assignment,
|
|
2536
|
+
Stmt.WeakAssignment: self._handle_Stmt_WeakAssignment,
|
|
2465
2537
|
Stmt.Call: self._handle_Stmt_Call,
|
|
2466
2538
|
Stmt.Jump: self._handle_Stmt_Jump,
|
|
2467
2539
|
Stmt.ConditionalJump: self._handle_Stmt_ConditionalJump,
|
|
@@ -2798,17 +2870,17 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
2798
2870
|
|
|
2799
2871
|
if offset == 0:
|
|
2800
2872
|
data_type = renegotiate_type(data_type, base_type)
|
|
2801
|
-
if base_type
|
|
2873
|
+
if type_equals(base_type, data_type) or (
|
|
2802
2874
|
base_type.size is not None and data_type.size is not None and base_type.size < data_type.size
|
|
2803
2875
|
):
|
|
2804
2876
|
# case 1: we're done because we found it
|
|
2805
2877
|
# case 2: we're done because we can never find it and we might as well stop early
|
|
2806
2878
|
if base_expr:
|
|
2807
|
-
if base_type
|
|
2879
|
+
if not type_equals(base_type, data_type):
|
|
2808
2880
|
return _force_type_cast(base_type, data_type, base_expr)
|
|
2809
2881
|
return base_expr
|
|
2810
2882
|
|
|
2811
|
-
if base_type
|
|
2883
|
+
if not type_equals(base_type, data_type):
|
|
2812
2884
|
return _force_type_cast(base_type, data_type, expr)
|
|
2813
2885
|
return CUnaryOp("Dereference", expr, codegen=self)
|
|
2814
2886
|
|
|
@@ -3265,13 +3337,58 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
3265
3337
|
csrc = self._handle(stmt.src, lvalue=False)
|
|
3266
3338
|
cdst = None
|
|
3267
3339
|
|
|
3340
|
+
src_type = csrc.type
|
|
3341
|
+
dst_type = src_type
|
|
3342
|
+
if hasattr(stmt, "type"):
|
|
3343
|
+
src_type = stmt.type.get("src", None)
|
|
3344
|
+
dst_type = stmt.type.get("dst", None)
|
|
3345
|
+
|
|
3346
|
+
if isinstance(stmt.dst, Expr.VirtualVariable) and stmt.dst.was_stack:
|
|
3347
|
+
|
|
3348
|
+
def negotiate(old_ty, proposed_ty):
|
|
3349
|
+
# transfer casts from the dst to the src if possible
|
|
3350
|
+
# if we see something like *(size_t*)&v4 = x; where v4 is a pointer, change to v4 = (void*)x;
|
|
3351
|
+
nonlocal csrc
|
|
3352
|
+
if not type_equals(old_ty, proposed_ty) and qualifies_for_simple_cast(old_ty, proposed_ty):
|
|
3353
|
+
csrc = CTypeCast(csrc.type, proposed_ty, csrc, codegen=self)
|
|
3354
|
+
return proposed_ty
|
|
3355
|
+
return old_ty
|
|
3356
|
+
|
|
3357
|
+
if stmt.dst.variable is not None:
|
|
3358
|
+
if "struct_member_info" in stmt.dst.tags:
|
|
3359
|
+
offset, var, _ = stmt.dst.struct_member_info
|
|
3360
|
+
cvar = self._variable(var, stmt.dst.size, vvar_id=stmt.dst.varid)
|
|
3361
|
+
else:
|
|
3362
|
+
cvar = self._variable(stmt.dst.variable, stmt.dst.size, vvar_id=stmt.dst.varid)
|
|
3363
|
+
offset = stmt.dst.variable_offset or 0
|
|
3364
|
+
assert type(offset) is int # I refuse to deal with the alternative
|
|
3365
|
+
|
|
3366
|
+
cdst = self._access_constant_offset(
|
|
3367
|
+
self._get_variable_reference(cvar), offset, dst_type, True, negotiate
|
|
3368
|
+
)
|
|
3369
|
+
|
|
3370
|
+
if cdst is None:
|
|
3371
|
+
cdst = self._handle(stmt.dst, lvalue=True)
|
|
3372
|
+
|
|
3373
|
+
return CAssignment(cdst, csrc, tags=stmt.tags, codegen=self)
|
|
3374
|
+
|
|
3375
|
+
def _handle_Stmt_WeakAssignment(self, stmt, **kwargs):
|
|
3376
|
+
csrc = self._handle(stmt.src, lvalue=False)
|
|
3377
|
+
cdst = None
|
|
3378
|
+
|
|
3379
|
+
src_type = csrc.type
|
|
3380
|
+
dst_type = src_type
|
|
3381
|
+
if hasattr(stmt, "type"):
|
|
3382
|
+
src_type = stmt.type.get("src", None)
|
|
3383
|
+
dst_type = stmt.type.get("dst", None)
|
|
3384
|
+
|
|
3268
3385
|
if isinstance(stmt.dst, Expr.VirtualVariable) and stmt.dst.was_stack:
|
|
3269
3386
|
|
|
3270
3387
|
def negotiate(old_ty, proposed_ty):
|
|
3271
3388
|
# transfer casts from the dst to the src if possible
|
|
3272
3389
|
# if we see something like *(size_t*)&v4 = x; where v4 is a pointer, change to v4 = (void*)x;
|
|
3273
3390
|
nonlocal csrc
|
|
3274
|
-
if old_ty
|
|
3391
|
+
if not type_equals(old_ty, proposed_ty) and qualifies_for_simple_cast(old_ty, proposed_ty):
|
|
3275
3392
|
csrc = CTypeCast(csrc.type, proposed_ty, csrc, codegen=self)
|
|
3276
3393
|
return proposed_ty
|
|
3277
3394
|
return old_ty
|
|
@@ -3286,7 +3403,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
3286
3403
|
assert type(offset) is int # I refuse to deal with the alternative
|
|
3287
3404
|
|
|
3288
3405
|
cdst = self._access_constant_offset(
|
|
3289
|
-
self._get_variable_reference(cvar), offset,
|
|
3406
|
+
self._get_variable_reference(cvar), offset, dst_type, True, negotiate
|
|
3290
3407
|
)
|
|
3291
3408
|
|
|
3292
3409
|
if cdst is None:
|
|
@@ -3413,7 +3530,18 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
3413
3530
|
return CRegister(expr, tags=expr.tags, codegen=self)
|
|
3414
3531
|
|
|
3415
3532
|
def _handle_Expr_Load(self, expr: Expr.Load, **kwargs):
|
|
3416
|
-
|
|
3533
|
+
if expr.size == UNDETERMINED_SIZE:
|
|
3534
|
+
# the size is undetermined; we force it to 1
|
|
3535
|
+
expr_size = 1
|
|
3536
|
+
expr_bits = 8
|
|
3537
|
+
else:
|
|
3538
|
+
expr_size = expr.size
|
|
3539
|
+
expr_bits = expr.bits
|
|
3540
|
+
|
|
3541
|
+
if expr.size > 100 and isinstance(expr.addr, Expr.Const):
|
|
3542
|
+
return self._handle_Expr_Const(expr.addr, type_=SimTypePointer(SimTypeChar()).with_arch(self.project.arch))
|
|
3543
|
+
|
|
3544
|
+
ty = self.default_simtype_from_bits(expr_bits)
|
|
3417
3545
|
|
|
3418
3546
|
def negotiate(old_ty: SimType, proposed_ty: SimType) -> SimType:
|
|
3419
3547
|
# we do not allow returning a struct for a primitive type
|
|
@@ -3430,7 +3558,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
3430
3558
|
offset, var, _ = expr.struct_member_info
|
|
3431
3559
|
cvar = self._variable(var, var.size)
|
|
3432
3560
|
else:
|
|
3433
|
-
cvar = self._variable(expr.variable,
|
|
3561
|
+
cvar = self._variable(expr.variable, expr_size)
|
|
3434
3562
|
offset = expr.variable_offset or 0
|
|
3435
3563
|
|
|
3436
3564
|
assert type(offset) is int # I refuse to deal with the alternative
|
|
@@ -3449,7 +3577,10 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
3449
3577
|
inline_string = False
|
|
3450
3578
|
function_pointer = False
|
|
3451
3579
|
|
|
3452
|
-
if
|
|
3580
|
+
if type_ is None and hasattr(expr, "type"):
|
|
3581
|
+
type_ = expr.type
|
|
3582
|
+
|
|
3583
|
+
if type_ is None and reference_values is None and hasattr(expr, "reference_values"):
|
|
3453
3584
|
reference_values = expr.reference_values.copy()
|
|
3454
3585
|
if reference_values:
|
|
3455
3586
|
type_ = next(iter(reference_values))
|
|
@@ -3665,7 +3796,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
3665
3796
|
if expr.variable is not None:
|
|
3666
3797
|
if "struct_member_info" in expr.tags:
|
|
3667
3798
|
offset, var, _ = expr.struct_member_info
|
|
3668
|
-
cbasevar = self._variable(var, expr.size)
|
|
3799
|
+
cbasevar = self._variable(var, expr.size, vvar_id=expr.varid)
|
|
3669
3800
|
cvar = self._access_constant_offset(
|
|
3670
3801
|
self._get_variable_reference(cbasevar), offset, cbasevar.type, False, negotiate
|
|
3671
3802
|
)
|
|
@@ -851,11 +851,16 @@ def peephole_optimize_stmts(block, stmt_opts):
|
|
|
851
851
|
r = opt.optimize(stmt, stmt_idx=stmt_idx, block=block)
|
|
852
852
|
if r is not None and r is not stmt:
|
|
853
853
|
stmt = r
|
|
854
|
+
if r == ():
|
|
855
|
+
# the statement is gone; no more redo
|
|
856
|
+
redo = False
|
|
857
|
+
break
|
|
854
858
|
redo = True
|
|
855
859
|
break
|
|
856
860
|
|
|
857
861
|
if stmt is not None and stmt is not old_stmt:
|
|
858
|
-
|
|
862
|
+
if stmt != ():
|
|
863
|
+
statements.append(stmt)
|
|
859
864
|
any_update = True
|
|
860
865
|
else:
|
|
861
866
|
statements.append(old_stmt)
|