angr 9.2.142__py3-none-manylinux2014_aarch64.whl → 9.2.144__py3-none-manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/calling_convention/calling_convention.py +22 -10
- angr/analyses/calling_convention/fact_collector.py +72 -14
- angr/analyses/cfg/cfg_base.py +7 -2
- angr/analyses/cfg/cfg_emulated.py +13 -4
- angr/analyses/cfg/cfg_fast.py +21 -60
- angr/analyses/cfg/indirect_jump_resolvers/__init__.py +2 -0
- angr/analyses/cfg/indirect_jump_resolvers/const_resolver.py +12 -1
- angr/analyses/cfg/indirect_jump_resolvers/constant_value_manager.py +107 -0
- angr/analyses/cfg/indirect_jump_resolvers/default_resolvers.py +2 -1
- angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +6 -102
- angr/analyses/cfg/indirect_jump_resolvers/syscall_resolver.py +92 -0
- angr/analyses/complete_calling_conventions.py +18 -5
- angr/analyses/decompiler/ail_simplifier.py +95 -65
- angr/analyses/decompiler/clinic.py +162 -68
- angr/analyses/decompiler/decompiler.py +4 -4
- angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +1 -1
- angr/analyses/decompiler/optimization_passes/condition_constprop.py +49 -14
- angr/analyses/decompiler/optimization_passes/ite_region_converter.py +8 -0
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +5 -5
- angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +5 -0
- angr/analyses/decompiler/peephole_optimizations/__init__.py +2 -0
- angr/analyses/decompiler/peephole_optimizations/a_sub_a_shr_const_shr_const.py +37 -0
- angr/analyses/decompiler/peephole_optimizations/simplify_pc_relative_loads.py +15 -1
- angr/analyses/decompiler/sequence_walker.py +8 -0
- angr/analyses/decompiler/ssailification/rewriting_engine.py +2 -0
- angr/analyses/decompiler/ssailification/ssailification.py +10 -2
- angr/analyses/decompiler/ssailification/traversal_engine.py +17 -2
- angr/analyses/decompiler/structured_codegen/c.py +25 -4
- angr/analyses/decompiler/utils.py +13 -0
- angr/analyses/disassembly.py +3 -3
- angr/analyses/fcp/fcp.py +1 -4
- angr/analyses/s_propagator.py +40 -29
- angr/analyses/s_reaching_definitions/s_rda_model.py +45 -36
- angr/analyses/s_reaching_definitions/s_rda_view.py +6 -3
- angr/analyses/s_reaching_definitions/s_reaching_definitions.py +41 -42
- angr/analyses/typehoon/dfa.py +13 -3
- angr/analyses/typehoon/typehoon.py +60 -18
- angr/analyses/typehoon/typevars.py +11 -7
- angr/analyses/variable_recovery/engine_ail.py +19 -23
- angr/analyses/variable_recovery/engine_base.py +26 -30
- angr/analyses/variable_recovery/variable_recovery_fast.py +17 -21
- angr/calling_conventions.py +18 -8
- angr/knowledge_plugins/functions/function.py +29 -15
- angr/knowledge_plugins/key_definitions/constants.py +2 -2
- angr/knowledge_plugins/key_definitions/liveness.py +4 -4
- angr/lib/angr_native.so +0 -0
- angr/procedures/definitions/linux_kernel.py +5 -0
- angr/state_plugins/unicorn_engine.py +24 -8
- angr/storage/memory_mixins/paged_memory/page_backer_mixins.py +1 -2
- angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +2 -2
- angr/utils/doms.py +40 -33
- angr/utils/graph.py +26 -20
- angr/utils/ssa/__init__.py +21 -14
- angr/utils/ssa/vvar_uses_collector.py +2 -2
- {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/METADATA +11 -8
- {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/RECORD +61 -58
- {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/WHEEL +1 -1
- {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/LICENSE +0 -0
- {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/entry_points.txt +0 -0
- {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# pylint:disable=no-self-use,too-many-boolean-expressions
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from ailment.expression import BinaryOp, Const
|
|
4
|
+
|
|
5
|
+
from .base import PeepholeOptimizationExprBase
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ASubAShrConstShrConst(PeepholeOptimizationExprBase):
|
|
9
|
+
"""
|
|
10
|
+
Convert `cdq; sub eax, edx; sar eax, 1` to `eax /= 2`.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
__slots__ = ()
|
|
14
|
+
|
|
15
|
+
NAME = "(a - (a >> 31)) >> N => a / 2 ** N (signed)"
|
|
16
|
+
expr_classes = (BinaryOp,)
|
|
17
|
+
|
|
18
|
+
def optimize(self, expr: BinaryOp, **kwargs):
|
|
19
|
+
if (
|
|
20
|
+
expr.op == "Sar"
|
|
21
|
+
and len(expr.operands) == 2
|
|
22
|
+
and isinstance(expr.operands[1], Const)
|
|
23
|
+
and expr.operands[1].is_int
|
|
24
|
+
and isinstance(expr.operands[0], BinaryOp)
|
|
25
|
+
and expr.operands[0].op == "Sub"
|
|
26
|
+
):
|
|
27
|
+
a0, a1 = expr.operands[0].operands
|
|
28
|
+
if (
|
|
29
|
+
isinstance(a1, BinaryOp)
|
|
30
|
+
and a1.op == "Sar"
|
|
31
|
+
and isinstance(a1.operands[1], Const)
|
|
32
|
+
and a1.operands[1].value == 31
|
|
33
|
+
and a0.likes(a1.operands[0])
|
|
34
|
+
):
|
|
35
|
+
dividend = 2 ** expr.operands[1].value
|
|
36
|
+
return BinaryOp(a0.idx, "Div", [a0, Const(None, None, dividend, expr.bits)], True, **expr.tags)
|
|
37
|
+
return None
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# pylint:disable=too-many-boolean-expressions
|
|
1
2
|
from __future__ import annotations
|
|
2
3
|
from ailment.expression import BinaryOp, Const, Load
|
|
3
4
|
|
|
@@ -20,10 +21,23 @@ class SimplifyPcRelativeLoads(PeepholeOptimizationExprBase):
|
|
|
20
21
|
if expr.op == "Add" and len(expr.operands) == 2 and isinstance(expr.operands[0], Load):
|
|
21
22
|
op0, op1 = expr.operands
|
|
22
23
|
|
|
24
|
+
assert self.project is not None
|
|
25
|
+
if not hasattr(expr, "ins_addr"):
|
|
26
|
+
return expr
|
|
27
|
+
assert expr.ins_addr is not None
|
|
28
|
+
|
|
23
29
|
# check if op1 is PC
|
|
24
|
-
if
|
|
30
|
+
if (
|
|
31
|
+
isinstance(op1, Const)
|
|
32
|
+
and op1.is_int
|
|
33
|
+
and hasattr(expr, "ins_addr")
|
|
34
|
+
and is_pc(self.project, expr.ins_addr, op1.value) # type: ignore
|
|
35
|
+
and isinstance(op0.addr, Const)
|
|
36
|
+
and op0.addr.is_int
|
|
37
|
+
):
|
|
25
38
|
# check if op0.addr points to a read-only section
|
|
26
39
|
addr = op0.addr.value
|
|
40
|
+
assert isinstance(addr, int)
|
|
27
41
|
if is_in_readonly_section(self.project, addr) or is_in_readonly_segment(self.project, addr):
|
|
28
42
|
# found it!
|
|
29
43
|
# do the load first
|
|
@@ -186,6 +186,14 @@ class SequenceWalker:
|
|
|
186
186
|
new_condition = (
|
|
187
187
|
self._handle(node.condition, parent=node, label="condition") if node.condition is not None else None
|
|
188
188
|
)
|
|
189
|
+
|
|
190
|
+
# note that initializer and iterator are both statements, so they can return empty tuples
|
|
191
|
+
# TODO: Handle the case where multiple statements are returned
|
|
192
|
+
if new_initializer == ():
|
|
193
|
+
new_initializer = None
|
|
194
|
+
if new_iterator == ():
|
|
195
|
+
new_iterator = None
|
|
196
|
+
|
|
189
197
|
seq_node = self._handle(node.sequence_node, parent=node, label="body", index=0)
|
|
190
198
|
if seq_node is not None or new_initializer is not None or new_iterator is not None or new_condition is not None:
|
|
191
199
|
return LoopNode(
|
|
@@ -698,6 +698,8 @@ class SimEngineSSARewriting(
|
|
|
698
698
|
raise NotImplementedError("Store expressions are not supported in _replace_use_expr.")
|
|
699
699
|
if isinstance(thing, Tmp) and self.rewrite_tmps:
|
|
700
700
|
return self._replace_use_tmp(self.block.addr, self.block.idx, self.stmt_idx, thing)
|
|
701
|
+
if isinstance(thing, Load):
|
|
702
|
+
return self._replace_use_load(thing)
|
|
701
703
|
return None
|
|
702
704
|
|
|
703
705
|
def _replace_use_reg(self, reg_expr: Register) -> VirtualVariable | Expression:
|
|
@@ -5,7 +5,15 @@ from collections import defaultdict
|
|
|
5
5
|
from itertools import count
|
|
6
6
|
from bisect import bisect_left
|
|
7
7
|
|
|
8
|
-
from ailment.expression import
|
|
8
|
+
from ailment.expression import (
|
|
9
|
+
Expression,
|
|
10
|
+
Register,
|
|
11
|
+
StackBaseOffset,
|
|
12
|
+
Tmp,
|
|
13
|
+
VirtualVariable,
|
|
14
|
+
VirtualVariableCategory,
|
|
15
|
+
Load,
|
|
16
|
+
)
|
|
9
17
|
from ailment.statement import Statement, Store
|
|
10
18
|
|
|
11
19
|
from angr.knowledge_plugins.functions import Function
|
|
@@ -151,7 +159,7 @@ class Ssailification(Analysis): # pylint:disable=abstract-method
|
|
|
151
159
|
reg_bits = def_.size * self.project.arch.byte_width
|
|
152
160
|
udef_to_defs[("reg", def_.reg_offset, reg_bits)].add(def_)
|
|
153
161
|
udef_to_blockkeys[("reg", def_.reg_offset, reg_bits)].add((loc.block_addr, loc.block_idx))
|
|
154
|
-
elif isinstance(def_, Store):
|
|
162
|
+
elif isinstance(def_, (Store, Load)):
|
|
155
163
|
if isinstance(def_.addr, StackBaseOffset) and isinstance(def_.addr.offset, int):
|
|
156
164
|
idx_begin = bisect_left(sorted_stackvar_offs, def_.addr.offset)
|
|
157
165
|
for i in range(idx_begin, len(sorted_stackvar_offs)):
|
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
from collections import OrderedDict
|
|
3
3
|
|
|
4
4
|
from ailment.statement import Call, Store, ConditionalJump
|
|
5
|
-
from ailment.expression import Register, BinaryOp, StackBaseOffset, ITE, VEXCCallExpression, Tmp, DirtyExpression
|
|
5
|
+
from ailment.expression import Register, BinaryOp, StackBaseOffset, ITE, VEXCCallExpression, Tmp, DirtyExpression, Load
|
|
6
6
|
|
|
7
7
|
from angr.engines.light import SimEngineLightAIL
|
|
8
8
|
from angr.project import Project
|
|
@@ -133,6 +133,22 @@ class SimEngineSSATraversal(SimEngineLightAIL[TraversalState, None, None, None])
|
|
|
133
133
|
|
|
134
134
|
self.state.live_registers.add(base_offset)
|
|
135
135
|
|
|
136
|
+
def _handle_expr_Load(self, expr: Load):
|
|
137
|
+
self._expr(expr.addr)
|
|
138
|
+
if (
|
|
139
|
+
self.stackvars
|
|
140
|
+
and isinstance(expr.addr, StackBaseOffset)
|
|
141
|
+
and isinstance(expr.addr.offset, int)
|
|
142
|
+
and (expr.addr.offset, expr.size) not in self.state.live_stackvars
|
|
143
|
+
):
|
|
144
|
+
# we must create this stack variable on the fly; we did not see its creation before it is first used
|
|
145
|
+
codeloc = self._codeloc()
|
|
146
|
+
self.def_to_loc.append((expr, codeloc))
|
|
147
|
+
if codeloc not in self.loc_to_defs:
|
|
148
|
+
self.loc_to_defs[codeloc] = OrderedSet()
|
|
149
|
+
self.loc_to_defs[codeloc].add(expr)
|
|
150
|
+
self.state.live_stackvars.add((expr.addr.offset, expr.size))
|
|
151
|
+
|
|
136
152
|
def _handle_expr_Tmp(self, expr: Tmp):
|
|
137
153
|
if self.use_tmps:
|
|
138
154
|
codeloc = self._codeloc()
|
|
@@ -251,7 +267,6 @@ class SimEngineSSATraversal(SimEngineLightAIL[TraversalState, None, None, None])
|
|
|
251
267
|
|
|
252
268
|
_handle_expr_VirtualVariable = _handle_Dummy
|
|
253
269
|
_handle_expr_Phi = _handle_Dummy
|
|
254
|
-
_handle_expr_Load = _handle_Dummy
|
|
255
270
|
_handle_expr_Const = _handle_Dummy
|
|
256
271
|
_handle_expr_MultiStatementExpression = _handle_Dummy
|
|
257
272
|
_handle_expr_StackBaseOffset = _handle_Dummy
|
|
@@ -3426,8 +3426,13 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
3426
3426
|
return old_ty
|
|
3427
3427
|
|
|
3428
3428
|
if expr.variable is not None:
|
|
3429
|
-
|
|
3430
|
-
|
|
3429
|
+
if "struct_member_info" in expr.tags:
|
|
3430
|
+
offset, var, _ = expr.struct_member_info
|
|
3431
|
+
cvar = self._variable(var, var.size)
|
|
3432
|
+
else:
|
|
3433
|
+
cvar = self._variable(expr.variable, expr.size)
|
|
3434
|
+
offset = expr.variable_offset or 0
|
|
3435
|
+
|
|
3431
3436
|
assert type(offset) is int # I refuse to deal with the alternative
|
|
3432
3437
|
return self._access_constant_offset(CUnaryOp("Reference", cvar, codegen=self), offset, ty, False, negotiate)
|
|
3433
3438
|
|
|
@@ -3649,8 +3654,24 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
3649
3654
|
return CMultiStatementExpression(cstmts, cexpr, tags=expr.tags, codegen=self)
|
|
3650
3655
|
|
|
3651
3656
|
def _handle_VirtualVariable(self, expr: Expr.VirtualVariable, **kwargs):
|
|
3652
|
-
|
|
3653
|
-
|
|
3657
|
+
def negotiate(old_ty: SimType, proposed_ty: SimType) -> SimType:
|
|
3658
|
+
# we do not allow returning a struct for a primitive type
|
|
3659
|
+
if old_ty.size == proposed_ty.size and (
|
|
3660
|
+
not isinstance(proposed_ty, SimStruct) or isinstance(old_ty, SimStruct)
|
|
3661
|
+
):
|
|
3662
|
+
return proposed_ty
|
|
3663
|
+
return old_ty
|
|
3664
|
+
|
|
3665
|
+
if expr.variable is not None:
|
|
3666
|
+
if "struct_member_info" in expr.tags:
|
|
3667
|
+
offset, var, _ = expr.struct_member_info
|
|
3668
|
+
cbasevar = self._variable(var, expr.size)
|
|
3669
|
+
cvar = self._access_constant_offset(
|
|
3670
|
+
self._get_variable_reference(cbasevar), offset, cbasevar.type, False, negotiate
|
|
3671
|
+
)
|
|
3672
|
+
else:
|
|
3673
|
+
cvar = self._variable(expr.variable, None, vvar_id=expr.varid)
|
|
3674
|
+
|
|
3654
3675
|
if expr.variable.size != expr.size:
|
|
3655
3676
|
l.warning(
|
|
3656
3677
|
"VirtualVariable size (%d) and variable size (%d) do not match. Force a type cast.",
|
|
@@ -214,6 +214,19 @@ def switch_extract_switch_expr_from_jump_target(target: ailment.Expr.Expression)
|
|
|
214
214
|
target = target.operands[0]
|
|
215
215
|
else:
|
|
216
216
|
return None
|
|
217
|
+
elif target.op == "And":
|
|
218
|
+
# it must be and-ing the target expr with a constant
|
|
219
|
+
if (
|
|
220
|
+
isinstance(target.operands[1], ailment.Expr.VirtualVariable)
|
|
221
|
+
and isinstance(target.operands[0], ailment.Expr.Const)
|
|
222
|
+
) or (
|
|
223
|
+
isinstance(target.operands[0], ailment.Expr.VirtualVariable)
|
|
224
|
+
and isinstance(target.operands[1], ailment.Expr.Const)
|
|
225
|
+
):
|
|
226
|
+
break
|
|
227
|
+
return None
|
|
228
|
+
else:
|
|
229
|
+
return None
|
|
217
230
|
elif isinstance(target, ailment.Expr.Load):
|
|
218
231
|
# we want the address!
|
|
219
232
|
found_load = True
|
angr/analyses/disassembly.py
CHANGED
|
@@ -4,7 +4,7 @@ import contextlib
|
|
|
4
4
|
import logging
|
|
5
5
|
from collections import defaultdict
|
|
6
6
|
from collections.abc import Sequence
|
|
7
|
-
from typing import
|
|
7
|
+
from typing import Any
|
|
8
8
|
|
|
9
9
|
import pyvex
|
|
10
10
|
import archinfo
|
|
@@ -24,8 +24,8 @@ try:
|
|
|
24
24
|
from angr.engines import pcode
|
|
25
25
|
import pypcode
|
|
26
26
|
|
|
27
|
-
IRSBType =
|
|
28
|
-
IROpObjType =
|
|
27
|
+
IRSBType = pyvex.IRSB | pcode.lifter.IRSB
|
|
28
|
+
IROpObjType = pyvex.stmt.IRStmt | pypcode.PcodeOp
|
|
29
29
|
except ImportError:
|
|
30
30
|
pcode = None
|
|
31
31
|
IRSBType = pyvex.IRSB
|
angr/analyses/fcp/fcp.py
CHANGED
|
@@ -407,10 +407,7 @@ class FastConstantPropagation(Analysis):
|
|
|
407
407
|
except (TypeError, ValueError):
|
|
408
408
|
arg_locs = None
|
|
409
409
|
|
|
410
|
-
if None in arg_locs:
|
|
411
|
-
arg_locs = None
|
|
412
|
-
|
|
413
|
-
if arg_locs is not None:
|
|
410
|
+
if arg_locs is not None and None not in arg_locs:
|
|
414
411
|
for arg_loc in arg_locs:
|
|
415
412
|
for loc in arg_loc.get_footprint():
|
|
416
413
|
if isinstance(loc, SimStackArg):
|
angr/analyses/s_propagator.py
CHANGED
|
@@ -26,6 +26,7 @@ from angr.utils.ssa import (
|
|
|
26
26
|
get_vvar_deflocs,
|
|
27
27
|
has_ite_expr,
|
|
28
28
|
has_ite_stmt,
|
|
29
|
+
has_tmp_expr,
|
|
29
30
|
is_phi_assignment,
|
|
30
31
|
is_const_assignment,
|
|
31
32
|
is_const_and_vvar_assignment,
|
|
@@ -126,7 +127,7 @@ class SPropagatorAnalysis(Analysis):
|
|
|
126
127
|
# update vvar_deflocs using function arguments
|
|
127
128
|
if self.func_args:
|
|
128
129
|
for func_arg in self.func_args:
|
|
129
|
-
vvar_deflocs[func_arg] = ExternalCodeLocation()
|
|
130
|
+
vvar_deflocs[func_arg.varid] = func_arg, ExternalCodeLocation()
|
|
130
131
|
|
|
131
132
|
# find all ret sites and indirect jump sites
|
|
132
133
|
retsites: set[tuple[int, int | None, int]] = set()
|
|
@@ -143,11 +144,11 @@ class SPropagatorAnalysis(Analysis):
|
|
|
143
144
|
# find constant and other propagatable assignments
|
|
144
145
|
vvarid_to_vvar = {}
|
|
145
146
|
const_vvars: dict[int, Const] = {}
|
|
146
|
-
for vvar, defloc in vvar_deflocs.items():
|
|
147
|
+
for vvar_id, (vvar, defloc) in vvar_deflocs.items():
|
|
147
148
|
if not vvar.was_reg and not vvar.was_parameter:
|
|
148
149
|
continue
|
|
149
150
|
|
|
150
|
-
vvarid_to_vvar[
|
|
151
|
+
vvarid_to_vvar[vvar_id] = vvar
|
|
151
152
|
if isinstance(defloc, ExternalCodeLocation):
|
|
152
153
|
continue
|
|
153
154
|
|
|
@@ -160,8 +161,8 @@ class SPropagatorAnalysis(Analysis):
|
|
|
160
161
|
if r:
|
|
161
162
|
# replace wherever it's used
|
|
162
163
|
assert v is not None
|
|
163
|
-
const_vvars[
|
|
164
|
-
for vvar_at_use, useloc in vvar_uselocs[
|
|
164
|
+
const_vvars[vvar_id] = v
|
|
165
|
+
for vvar_at_use, useloc in vvar_uselocs[vvar_id]:
|
|
165
166
|
replacements[useloc][vvar_at_use] = v
|
|
166
167
|
continue
|
|
167
168
|
|
|
@@ -189,10 +190,10 @@ class SPropagatorAnalysis(Analysis):
|
|
|
189
190
|
if self.mode == "function":
|
|
190
191
|
assert self.func_graph is not None
|
|
191
192
|
|
|
192
|
-
for vvar, defloc in vvar_deflocs.items():
|
|
193
|
-
if
|
|
193
|
+
for vvar_id, (vvar, defloc) in vvar_deflocs.items():
|
|
194
|
+
if vvar_id not in vvar_uselocs:
|
|
194
195
|
continue
|
|
195
|
-
if
|
|
196
|
+
if vvar_id in const_vvars:
|
|
196
197
|
continue
|
|
197
198
|
if isinstance(defloc, ExternalCodeLocation):
|
|
198
199
|
continue
|
|
@@ -200,11 +201,13 @@ class SPropagatorAnalysis(Analysis):
|
|
|
200
201
|
assert defloc.block_addr is not None
|
|
201
202
|
assert defloc.stmt_idx is not None
|
|
202
203
|
|
|
204
|
+
vvar_uselocs_set = set(vvar_uselocs[vvar_id]) # deduplicate
|
|
205
|
+
|
|
203
206
|
block = blocks[(defloc.block_addr, defloc.block_idx)]
|
|
204
207
|
stmt = block.statements[defloc.stmt_idx]
|
|
205
208
|
if (
|
|
206
209
|
(vvar.was_reg or vvar.was_parameter)
|
|
207
|
-
and len(
|
|
210
|
+
and len(vvar_uselocs_set) <= 2
|
|
208
211
|
and isinstance(stmt, Assignment)
|
|
209
212
|
and isinstance(stmt.src, Load)
|
|
210
213
|
):
|
|
@@ -215,43 +218,46 @@ class SPropagatorAnalysis(Analysis):
|
|
|
215
218
|
# v1 = v0 + 1;
|
|
216
219
|
# }
|
|
217
220
|
can_replace = True
|
|
218
|
-
for _, vvar_useloc in
|
|
221
|
+
for _, vvar_useloc in vvar_uselocs_set:
|
|
219
222
|
if has_store_stmt_in_between_stmts(self.func_graph, blocks, defloc, vvar_useloc):
|
|
220
223
|
can_replace = False
|
|
221
224
|
|
|
222
225
|
if can_replace:
|
|
223
226
|
# we can propagate this load because there is no store between its def and use
|
|
224
|
-
for vvar_used, vvar_useloc in
|
|
227
|
+
for vvar_used, vvar_useloc in vvar_uselocs_set:
|
|
225
228
|
replacements[vvar_useloc][vvar_used] = stmt.src
|
|
226
229
|
continue
|
|
227
230
|
|
|
228
231
|
if (
|
|
229
232
|
(vvar.was_reg or vvar.was_stack)
|
|
230
|
-
and len(
|
|
233
|
+
and len(vvar_uselocs_set) == 2
|
|
234
|
+
and isinstance(stmt, Assignment)
|
|
231
235
|
and not is_phi_assignment(stmt)
|
|
232
236
|
):
|
|
233
237
|
# a special case: in a typical switch-case construct, a variable may be used once for comparison
|
|
234
238
|
# for the default case and then used again for constructing the jump target. we can propagate this
|
|
235
239
|
# variable for such cases.
|
|
236
|
-
uselocs = {loc for _, loc in
|
|
237
|
-
if self.is_vvar_used_for_addr_loading_switch_case(uselocs, blocks):
|
|
238
|
-
for vvar_used, vvar_useloc in
|
|
240
|
+
uselocs = {loc for _, loc in vvar_uselocs_set}
|
|
241
|
+
if self.is_vvar_used_for_addr_loading_switch_case(uselocs, blocks) and not has_tmp_expr(stmt.src):
|
|
242
|
+
for vvar_used, vvar_useloc in vvar_uselocs_set:
|
|
239
243
|
replacements[vvar_useloc][vvar_used] = stmt.src
|
|
240
244
|
# mark the vvar as dead and should be removed
|
|
241
245
|
self.model.dead_vvar_ids.add(vvar.varid)
|
|
242
246
|
continue
|
|
243
247
|
|
|
244
248
|
if vvar.was_reg or vvar.was_parameter:
|
|
245
|
-
if len(
|
|
246
|
-
vvar_used, vvar_useloc = next(iter(
|
|
247
|
-
if
|
|
248
|
-
|
|
249
|
+
if len(vvar_uselocs_set) == 1:
|
|
250
|
+
vvar_used, vvar_useloc = next(iter(vvar_uselocs_set))
|
|
251
|
+
if (
|
|
252
|
+
is_const_vvar_load_assignment(stmt)
|
|
253
|
+
and not has_store_stmt_in_between_stmts(self.func_graph, blocks, defloc, vvar_useloc)
|
|
254
|
+
and not has_tmp_expr(stmt.src)
|
|
249
255
|
):
|
|
250
256
|
# we can propagate this load because there is no store between its def and use
|
|
251
257
|
replacements[vvar_useloc][vvar_used] = stmt.src
|
|
252
258
|
continue
|
|
253
259
|
|
|
254
|
-
if is_const_and_vvar_assignment(stmt):
|
|
260
|
+
if is_const_and_vvar_assignment(stmt) and not has_tmp_expr(stmt.src):
|
|
255
261
|
# if the useloc is a phi assignment statement, ensure that stmt.src is the same as the phi
|
|
256
262
|
# variable
|
|
257
263
|
assert vvar_useloc.block_addr is not None
|
|
@@ -273,18 +279,22 @@ class SPropagatorAnalysis(Analysis):
|
|
|
273
279
|
else:
|
|
274
280
|
non_exitsite_uselocs = [
|
|
275
281
|
loc
|
|
276
|
-
for _, loc in
|
|
282
|
+
for _, loc in vvar_uselocs_set
|
|
277
283
|
if (loc.block_addr, loc.block_idx, loc.stmt_idx) not in (retsites | jumpsites)
|
|
278
284
|
]
|
|
279
285
|
if is_const_and_vvar_assignment(stmt):
|
|
280
286
|
if len(non_exitsite_uselocs) == 1:
|
|
281
287
|
# this vvar is used once if we exclude its uses at ret sites or jump sites. we can
|
|
282
288
|
# propagate it
|
|
283
|
-
for vvar_used, vvar_useloc in
|
|
289
|
+
for vvar_used, vvar_useloc in vvar_uselocs_set:
|
|
284
290
|
replacements[vvar_useloc][vvar_used] = stmt.src
|
|
285
291
|
continue
|
|
286
292
|
|
|
287
|
-
if
|
|
293
|
+
if (
|
|
294
|
+
len(set(non_exitsite_uselocs)) == 1
|
|
295
|
+
and not has_ite_expr(stmt.src)
|
|
296
|
+
and not has_tmp_expr(stmt.src)
|
|
297
|
+
):
|
|
288
298
|
useloc = non_exitsite_uselocs[0]
|
|
289
299
|
assert useloc.block_addr is not None
|
|
290
300
|
assert useloc.stmt_idx is not None
|
|
@@ -292,13 +302,13 @@ class SPropagatorAnalysis(Analysis):
|
|
|
292
302
|
if stmt.src.depth <= 3 and not has_ite_stmt(useloc_stmt):
|
|
293
303
|
# remove duplicate use locs (e.g., if the variable is used multiple times by the
|
|
294
304
|
# same statement) - but ensure stmt is simple enough
|
|
295
|
-
for vvar_used, vvar_useloc in
|
|
305
|
+
for vvar_used, vvar_useloc in vvar_uselocs_set:
|
|
296
306
|
replacements[vvar_useloc][vvar_used] = stmt.src
|
|
297
307
|
continue
|
|
298
308
|
|
|
299
309
|
# special logic for global variables: if it's used once or multiple times, and the variable is never
|
|
300
310
|
# updated before it's used, we will propagate the load
|
|
301
|
-
if (vvar.was_reg or vvar.was_parameter) and isinstance(stmt, Assignment):
|
|
311
|
+
if (vvar.was_reg or vvar.was_parameter) and isinstance(stmt, Assignment) and not has_tmp_expr(stmt.src):
|
|
302
312
|
stmt_src = stmt.src
|
|
303
313
|
# unpack conversions
|
|
304
314
|
while isinstance(stmt_src, Convert):
|
|
@@ -309,7 +319,7 @@ class SPropagatorAnalysis(Analysis):
|
|
|
309
319
|
and isinstance(stmt_src.addr.value, int)
|
|
310
320
|
):
|
|
311
321
|
gv_updated = False
|
|
312
|
-
for _vvar_used, vvar_useloc in
|
|
322
|
+
for _vvar_used, vvar_useloc in vvar_uselocs_set:
|
|
313
323
|
gv_updated |= self.is_global_variable_updated(
|
|
314
324
|
self.func_graph,
|
|
315
325
|
blocks,
|
|
@@ -320,12 +330,13 @@ class SPropagatorAnalysis(Analysis):
|
|
|
320
330
|
vvar_useloc,
|
|
321
331
|
)
|
|
322
332
|
if not gv_updated:
|
|
323
|
-
for vvar_used, vvar_useloc in
|
|
333
|
+
for vvar_used, vvar_useloc in vvar_uselocs_set:
|
|
324
334
|
replacements[vvar_useloc][vvar_used] = stmt.src
|
|
325
335
|
continue
|
|
326
336
|
|
|
327
337
|
for vvar_id, uselocs in vvar_uselocs.items():
|
|
328
338
|
vvar = next(iter(uselocs))[0] if vvar_id not in vvarid_to_vvar else vvarid_to_vvar[vvar_id]
|
|
339
|
+
vvar_uselocs_set = set(uselocs) # deduplicate
|
|
329
340
|
|
|
330
341
|
if self._sp_tracker is not None and vvar.category == VirtualVariableCategory.REGISTER:
|
|
331
342
|
if vvar.oident == self.project.arch.sp_offset:
|
|
@@ -334,7 +345,7 @@ class SPropagatorAnalysis(Analysis):
|
|
|
334
345
|
if "sp" in self.project.arch.registers
|
|
335
346
|
else None
|
|
336
347
|
)
|
|
337
|
-
for vvar_at_use, useloc in
|
|
348
|
+
for vvar_at_use, useloc in vvar_uselocs_set:
|
|
338
349
|
sb_offset = self._sp_tracker.offset_before(useloc.ins_addr, self.project.arch.sp_offset)
|
|
339
350
|
if sb_offset is not None:
|
|
340
351
|
v = StackBaseOffset(None, self.project.arch.bits, sb_offset)
|
|
@@ -349,7 +360,7 @@ class SPropagatorAnalysis(Analysis):
|
|
|
349
360
|
if "bp" in self.project.arch.registers
|
|
350
361
|
else None
|
|
351
362
|
)
|
|
352
|
-
for vvar_at_use, useloc in
|
|
363
|
+
for vvar_at_use, useloc in vvar_uselocs_set:
|
|
353
364
|
sb_offset = self._sp_tracker.offset_before(useloc.ins_addr, self.project.arch.bp_offset)
|
|
354
365
|
if sb_offset is not None:
|
|
355
366
|
v = StackBaseOffset(None, self.project.arch.bits, sb_offset)
|
|
@@ -20,26 +20,35 @@ class SRDAModel:
|
|
|
20
20
|
self.func_args = func_args
|
|
21
21
|
self.arch = arch
|
|
22
22
|
self.varid_to_vvar: dict[int, VirtualVariable] = {}
|
|
23
|
-
self.all_vvar_definitions: dict[
|
|
24
|
-
self.all_vvar_uses: dict[
|
|
23
|
+
self.all_vvar_definitions: dict[int, CodeLocation] = {}
|
|
24
|
+
self.all_vvar_uses: dict[int, list[tuple[VirtualVariable | None, CodeLocation]]] = defaultdict(list)
|
|
25
25
|
self.all_tmp_definitions: dict[CodeLocation, dict[atoms.Tmp, int]] = defaultdict(dict)
|
|
26
26
|
self.all_tmp_uses: dict[CodeLocation, dict[atoms.Tmp, set[tuple[Tmp, int]]]] = defaultdict(dict)
|
|
27
27
|
self.phi_vvar_ids: set[int] = set()
|
|
28
28
|
self.phivarid_to_varids: dict[int, set[int]] = {}
|
|
29
|
+
self.vvar_uses_by_loc: dict[CodeLocation, list[int]] = {}
|
|
30
|
+
|
|
31
|
+
def add_vvar_use(self, vvar_id: int, expr: VirtualVariable | None, loc: CodeLocation) -> None:
|
|
32
|
+
self.all_vvar_uses[vvar_id].append((expr, loc))
|
|
33
|
+
if loc not in self.vvar_uses_by_loc:
|
|
34
|
+
self.vvar_uses_by_loc[loc] = []
|
|
35
|
+
self.vvar_uses_by_loc[loc].append(vvar_id)
|
|
29
36
|
|
|
30
37
|
@property
|
|
31
38
|
def all_definitions(self) -> Generator[Definition]:
|
|
32
|
-
for
|
|
33
|
-
|
|
39
|
+
for vvar_id, defloc in self.all_vvar_definitions.items():
|
|
40
|
+
vvar = self.varid_to_vvar[vvar_id]
|
|
41
|
+
yield Definition(atoms.VirtualVariable(vvar_id, vvar.size, vvar.category, vvar.oident), defloc)
|
|
34
42
|
|
|
35
43
|
def is_phi_vvar_id(self, idx: int) -> bool:
|
|
36
44
|
return idx in self.phi_vvar_ids
|
|
37
45
|
|
|
38
46
|
def get_all_definitions(self, block_loc: CodeLocation) -> set[Definition]:
|
|
39
47
|
s = set()
|
|
40
|
-
for
|
|
48
|
+
for vvar_id, codeloc in self.all_vvar_definitions.items():
|
|
49
|
+
vvar = self.varid_to_vvar[vvar_id]
|
|
41
50
|
if codeloc.block_addr == block_loc.block_addr and codeloc.block_idx == block_loc.block_idx:
|
|
42
|
-
s.add(Definition(atoms.VirtualVariable(
|
|
51
|
+
s.add(Definition(atoms.VirtualVariable(vvar_id, vvar.size, vvar.category, vvar.oident), codeloc))
|
|
43
52
|
return s | self.get_all_tmp_definitions(block_loc)
|
|
44
53
|
|
|
45
54
|
def get_all_tmp_definitions(self, block_loc: CodeLocation) -> set[Definition]:
|
|
@@ -64,45 +73,45 @@ class SRDAModel:
|
|
|
64
73
|
:return: A set of definitions that are used at the given location.
|
|
65
74
|
"""
|
|
66
75
|
if exprs:
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
atoms.VirtualVariable(vvar.varid, vvar.size, vvar.category, vvar.oident),
|
|
75
|
-
self.all_vvar_definitions[vvar],
|
|
76
|
-
),
|
|
77
|
-
expr,
|
|
78
|
-
)
|
|
79
|
-
)
|
|
80
|
-
return defs
|
|
81
|
-
|
|
82
|
-
defs: set[Definition] = set()
|
|
83
|
-
for vvar, uses in self.all_vvar_uses.items():
|
|
84
|
-
for _, loc_ in uses:
|
|
85
|
-
if loc_ == loc:
|
|
86
|
-
defs.add(
|
|
76
|
+
def_with_exprs: set[tuple[Definition, Any]] = set()
|
|
77
|
+
if loc not in self.vvar_uses_by_loc:
|
|
78
|
+
return def_with_exprs
|
|
79
|
+
for vvar_id in self.vvar_uses_by_loc[loc]:
|
|
80
|
+
vvar = self.varid_to_vvar[vvar_id]
|
|
81
|
+
def_with_exprs.add(
|
|
82
|
+
(
|
|
87
83
|
Definition(
|
|
88
|
-
atoms.VirtualVariable(
|
|
89
|
-
self.all_vvar_definitions[
|
|
90
|
-
)
|
|
84
|
+
atoms.VirtualVariable(vvar_id, vvar.size, vvar.category, vvar.oident),
|
|
85
|
+
self.all_vvar_definitions[vvar_id],
|
|
86
|
+
),
|
|
87
|
+
vvar,
|
|
91
88
|
)
|
|
89
|
+
)
|
|
90
|
+
return def_with_exprs
|
|
91
|
+
|
|
92
|
+
defs: set[Definition] = set()
|
|
93
|
+
if loc not in self.vvar_uses_by_loc:
|
|
94
|
+
return defs
|
|
95
|
+
for vvar_id in self.vvar_uses_by_loc[loc]:
|
|
96
|
+
vvar = self.varid_to_vvar[vvar_id]
|
|
97
|
+
defs.add(
|
|
98
|
+
Definition(
|
|
99
|
+
atoms.VirtualVariable(vvar_id, vvar.size, vvar.category, vvar.oident),
|
|
100
|
+
self.all_vvar_definitions[vvar_id],
|
|
101
|
+
)
|
|
102
|
+
)
|
|
92
103
|
return defs
|
|
93
104
|
|
|
94
105
|
def get_vvar_uses(self, obj: VirtualVariable | atoms.VirtualVariable) -> set[CodeLocation]:
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
return {loc for _, loc in self.all_vvar_uses[the_vvar]}
|
|
106
|
+
if obj.varid in self.all_vvar_uses:
|
|
107
|
+
return {loc for _, loc in self.all_vvar_uses[obj.varid]}
|
|
98
108
|
return set()
|
|
99
109
|
|
|
100
110
|
def get_vvar_uses_with_expr(
|
|
101
111
|
self, obj: VirtualVariable | atoms.VirtualVariable
|
|
102
|
-
) -> set[tuple[
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
return {(loc, expr) for expr, loc in self.all_vvar_uses[the_vvar]}
|
|
112
|
+
) -> set[tuple[VirtualVariable | None, CodeLocation]]:
|
|
113
|
+
if obj.varid in self.all_vvar_uses:
|
|
114
|
+
return set(self.all_vvar_uses[obj.varid])
|
|
106
115
|
return set()
|
|
107
116
|
|
|
108
117
|
def get_tmp_uses(self, obj: atoms.Tmp, block_loc: CodeLocation) -> set[CodeLocation]:
|
|
@@ -185,7 +185,10 @@ class SRDAView:
|
|
|
185
185
|
vvars.append(func_arg)
|
|
186
186
|
# there might be multiple vvars; we prioritize the one whose size fits the best
|
|
187
187
|
for v in vvars:
|
|
188
|
-
if
|
|
188
|
+
if (
|
|
189
|
+
(v.was_stack and v.stack_offset == stack_offset)
|
|
190
|
+
or (v.was_parameter and v.parameter_stack_offset == stack_offset)
|
|
191
|
+
) and v.size == size:
|
|
189
192
|
return v
|
|
190
193
|
return vvars[0] if vvars else None
|
|
191
194
|
|
|
@@ -239,9 +242,9 @@ class SRDAView:
|
|
|
239
242
|
return vvars[0] if vvars else None
|
|
240
243
|
|
|
241
244
|
def get_vvar_value(self, vvar: VirtualVariable) -> Expression | None:
|
|
242
|
-
if vvar not in self.model.all_vvar_definitions:
|
|
245
|
+
if vvar.varid not in self.model.all_vvar_definitions:
|
|
243
246
|
return None
|
|
244
|
-
codeloc = self.model.all_vvar_definitions[vvar]
|
|
247
|
+
codeloc = self.model.all_vvar_definitions[vvar.varid]
|
|
245
248
|
|
|
246
249
|
for block in self.model.func_graph:
|
|
247
250
|
if block.addr == codeloc.block_addr and block.idx == codeloc.block_idx:
|