angr 9.2.163__cp310-abi3-manylinux2014_aarch64.whl → 9.2.164__cp310-abi3-manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/ailment/converter_vex.py +1 -1
- angr/ailment/expression.py +5 -1
- angr/analyses/cfg/cfg_base.py +16 -13
- angr/analyses/cfg/cfg_emulated.py +5 -1
- angr/analyses/cfg/cfg_fast.py +27 -4
- angr/analyses/cfg/indirect_jump_resolvers/arm_elf_fast.py +11 -1
- angr/analyses/cfg/indirect_jump_resolvers/const_resolver.py +194 -41
- angr/analyses/decompiler/ail_simplifier.py +19 -5
- angr/analyses/decompiler/callsite_maker.py +33 -17
- angr/analyses/decompiler/graph_region.py +19 -0
- angr/analyses/decompiler/optimization_passes/deadblock_remover.py +1 -1
- angr/analyses/decompiler/region_identifier.py +22 -1
- angr/analyses/decompiler/structuring/phoenix.py +72 -20
- angr/analyses/decompiler/structuring/recursive_structurer.py +3 -4
- angr/analyses/decompiler/structuring/structurer_nodes.py +3 -0
- angr/analyses/decompiler/utils.py +17 -5
- angr/analyses/s_reaching_definitions/s_rda_view.py +2 -1
- angr/analyses/typehoon/typeconsts.py +3 -1
- angr/blade.py +20 -15
- angr/engines/icicle.py +7 -2
- angr/knowledge_plugins/propagations/propagation_model.py +7 -0
- angr/rustylib.abi3.so +0 -0
- angr/sim_type.py +16 -1
- angr/utils/constants.py +1 -1
- angr/utils/graph.py +1 -1
- angr/utils/vex.py +11 -0
- {angr-9.2.163.dist-info → angr-9.2.164.dist-info}/METADATA +5 -5
- {angr-9.2.163.dist-info → angr-9.2.164.dist-info}/RECORD +33 -32
- {angr-9.2.163.dist-info → angr-9.2.164.dist-info}/WHEEL +0 -0
- {angr-9.2.163.dist-info → angr-9.2.164.dist-info}/entry_points.txt +0 -0
- {angr-9.2.163.dist-info → angr-9.2.164.dist-info}/licenses/LICENSE +0 -0
- {angr-9.2.163.dist-info → angr-9.2.164.dist-info}/top_level.txt +0 -0
angr/__init__.py
CHANGED
angr/ailment/converter_vex.py
CHANGED
|
@@ -606,7 +606,7 @@ class VEXStmtConverter(Converter):
|
|
|
606
606
|
expd_hi = VEXExprConverter.convert(stmt.expdHi, manager) if stmt.expdHi is not None else None
|
|
607
607
|
old_lo = VEXExprConverter.tmp(stmt.oldLo, manager.tyenv.sizeof(stmt.oldLo), manager)
|
|
608
608
|
old_hi = (
|
|
609
|
-
VEXExprConverter.tmp(stmt.oldHi,
|
|
609
|
+
VEXExprConverter.tmp(stmt.oldHi, manager.tyenv.sizeof(stmt.oldHi), manager)
|
|
610
610
|
if stmt.oldHi != 0xFFFFFFFF
|
|
611
611
|
else None
|
|
612
612
|
)
|
angr/ailment/expression.py
CHANGED
|
@@ -616,7 +616,11 @@ class Convert(UnaryOp):
|
|
|
616
616
|
self.rounding_mode = rounding_mode
|
|
617
617
|
|
|
618
618
|
def __str__(self):
|
|
619
|
-
|
|
619
|
+
from_type = "I" if self.from_type == Convert.TYPE_INT else "F"
|
|
620
|
+
to_type = "I" if self.to_type == Convert.TYPE_INT else "F"
|
|
621
|
+
return (
|
|
622
|
+
f"Conv({self.from_bits}{from_type}->{'s' if self.is_signed else ''}{self.to_bits}{to_type}, {self.operand})"
|
|
623
|
+
)
|
|
620
624
|
|
|
621
625
|
def __repr__(self):
|
|
622
626
|
return str(self)
|
angr/analyses/cfg/cfg_base.py
CHANGED
|
@@ -1952,11 +1952,11 @@ class CFGBase(Analysis):
|
|
|
1952
1952
|
# skip empty blocks (that are usually caused by lifting failures)
|
|
1953
1953
|
continue
|
|
1954
1954
|
block = func_0.get_block(block_node.addr, block_node.size)
|
|
1955
|
-
if block.vex_nostmt.jumpkind not in ("Ijk_Boring", "Ijk_InvalICache"):
|
|
1956
|
-
continue
|
|
1957
1955
|
# Skip alignment blocks
|
|
1958
1956
|
if self._is_noop_block(self.project.arch, block):
|
|
1959
1957
|
continue
|
|
1958
|
+
if block.vex_nostmt.jumpkind not in ("Ijk_Boring", "Ijk_InvalICache"):
|
|
1959
|
+
continue
|
|
1960
1960
|
|
|
1961
1961
|
# does the first block transition to the next function?
|
|
1962
1962
|
transition_found = False
|
|
@@ -2001,17 +2001,20 @@ class CFGBase(Analysis):
|
|
|
2001
2001
|
|
|
2002
2002
|
cfgnode_1_merged = False
|
|
2003
2003
|
# we only merge two CFG nodes if the first one does not end with a branch instruction
|
|
2004
|
-
if (
|
|
2005
|
-
|
|
2006
|
-
|
|
2007
|
-
|
|
2008
|
-
|
|
2009
|
-
|
|
2010
|
-
|
|
2011
|
-
|
|
2012
|
-
|
|
2013
|
-
|
|
2014
|
-
|
|
2004
|
+
if len(func_0.block_addrs_set) == 1 and len(out_edges) == 1:
|
|
2005
|
+
outedge_src, outedge_dst, outedge_data = out_edges[0]
|
|
2006
|
+
if (
|
|
2007
|
+
outedge_src.addr == cfgnode_0.addr
|
|
2008
|
+
and outedge_src.size == cfgnode_0.size
|
|
2009
|
+
and outedge_dst.addr == cfgnode_1.addr
|
|
2010
|
+
and cfgnode_0.addr + cfgnode_0.size == cfgnode_1.addr
|
|
2011
|
+
and outedge_data.get("type", None) == "transition"
|
|
2012
|
+
and outedge_data.get("stmt_idx", None) == DEFAULT_STATEMENT
|
|
2013
|
+
):
|
|
2014
|
+
cfgnode_1_merged = True
|
|
2015
|
+
self._merge_cfgnodes(cfgnode_0, cfgnode_1)
|
|
2016
|
+
adjusted_cfgnodes.add(cfgnode_0)
|
|
2017
|
+
adjusted_cfgnodes.add(cfgnode_1)
|
|
2015
2018
|
|
|
2016
2019
|
# Merge it
|
|
2017
2020
|
func_1 = functions[addr_1]
|
|
@@ -28,6 +28,7 @@ from angr.errors import (
|
|
|
28
28
|
AngrCFGError,
|
|
29
29
|
AngrError,
|
|
30
30
|
AngrSkipJobNotice,
|
|
31
|
+
AngrSyscallError,
|
|
31
32
|
SimError,
|
|
32
33
|
SimValueError,
|
|
33
34
|
SimSolverModeError,
|
|
@@ -1806,7 +1807,10 @@ class CFGEmulated(ForwardAnalysis, CFGBase): # pylint: disable=abstract-method
|
|
|
1806
1807
|
|
|
1807
1808
|
# Fix target_addr for syscalls
|
|
1808
1809
|
if suc_jumpkind.startswith("Ijk_Sys"):
|
|
1809
|
-
|
|
1810
|
+
try:
|
|
1811
|
+
syscall_proc = self.project.simos.syscall(new_state)
|
|
1812
|
+
except AngrSyscallError:
|
|
1813
|
+
syscall_proc = None
|
|
1810
1814
|
if syscall_proc is not None:
|
|
1811
1815
|
target_addr = syscall_proc.addr
|
|
1812
1816
|
|
angr/analyses/cfg/cfg_fast.py
CHANGED
|
@@ -1077,12 +1077,12 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
1077
1077
|
# no wide string is found
|
|
1078
1078
|
return 0
|
|
1079
1079
|
|
|
1080
|
-
def _scan_for_repeating_bytes(self, start_addr: int, repeating_byte: int, threshold: int = 2) -> int:
|
|
1080
|
+
def _scan_for_repeating_bytes(self, start_addr: int, repeating_byte: int | None, threshold: int = 2) -> int:
|
|
1081
1081
|
"""
|
|
1082
1082
|
Scan from a given address and determine the occurrences of a given byte.
|
|
1083
1083
|
|
|
1084
1084
|
:param start_addr: The address in memory to start scanning.
|
|
1085
|
-
:param repeating_byte: The repeating byte to scan for.
|
|
1085
|
+
:param repeating_byte: The repeating byte to scan for; None for *any* repeating byte.
|
|
1086
1086
|
:param threshold: The minimum occurrences.
|
|
1087
1087
|
:return: The occurrences of a given byte.
|
|
1088
1088
|
"""
|
|
@@ -1090,12 +1090,15 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
1090
1090
|
addr = start_addr
|
|
1091
1091
|
|
|
1092
1092
|
repeating_length = 0
|
|
1093
|
+
last_byte = repeating_byte
|
|
1093
1094
|
|
|
1094
1095
|
while self._inside_regions(addr):
|
|
1095
1096
|
val = self._load_a_byte_as_int(addr)
|
|
1096
1097
|
if val is None:
|
|
1097
1098
|
break
|
|
1098
|
-
if
|
|
1099
|
+
if last_byte is None:
|
|
1100
|
+
last_byte = val
|
|
1101
|
+
elif val == last_byte:
|
|
1099
1102
|
repeating_length += 1
|
|
1100
1103
|
else:
|
|
1101
1104
|
break
|
|
@@ -1249,6 +1252,16 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
1249
1252
|
self.model.memory_data[start_addr] = MemoryData(start_addr, zeros_length, MemoryDataSort.Alignment)
|
|
1250
1253
|
start_addr += zeros_length
|
|
1251
1254
|
|
|
1255
|
+
# we consider over 16 bytes of any repeated bytes to be bad
|
|
1256
|
+
repeating_byte_length = self._scan_for_repeating_bytes(start_addr, None, threshold=16)
|
|
1257
|
+
if repeating_byte_length:
|
|
1258
|
+
matched_something = True
|
|
1259
|
+
self._seg_list.occupy(start_addr, repeating_byte_length, "nodecode")
|
|
1260
|
+
self.model.memory_data[start_addr] = MemoryData(
|
|
1261
|
+
start_addr, repeating_byte_length, MemoryDataSort.Unknown
|
|
1262
|
+
)
|
|
1263
|
+
start_addr += repeating_byte_length
|
|
1264
|
+
|
|
1252
1265
|
if not matched_something:
|
|
1253
1266
|
# umm now it's probably code
|
|
1254
1267
|
break
|
|
@@ -1259,7 +1272,16 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
1259
1272
|
if start_addr % instr_alignment > 0:
|
|
1260
1273
|
# occupy those few bytes
|
|
1261
1274
|
size = instr_alignment - (start_addr % instr_alignment)
|
|
1262
|
-
|
|
1275
|
+
|
|
1276
|
+
# to avoid extremely fragmented segmentation, we mark the current segment as the same type as the previous
|
|
1277
|
+
# adjacent segment if its type is nodecode
|
|
1278
|
+
segment_sort = "alignment"
|
|
1279
|
+
if start_addr >= 1:
|
|
1280
|
+
previous_segment_sort = self._seg_list.occupied_by_sort(start_addr - 1)
|
|
1281
|
+
if previous_segment_sort == "nodecode":
|
|
1282
|
+
segment_sort = "nodecode"
|
|
1283
|
+
|
|
1284
|
+
self._seg_list.occupy(start_addr, size, segment_sort)
|
|
1263
1285
|
self.model.memory_data[start_addr] = MemoryData(start_addr, size, MemoryDataSort.Unknown)
|
|
1264
1286
|
start_addr = start_addr - start_addr % instr_alignment + instr_alignment
|
|
1265
1287
|
# trickiness: aligning the start_addr may create a new address that is outside any mapped region.
|
|
@@ -4504,6 +4526,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
4504
4526
|
|
|
4505
4527
|
if not self._arch_options.has_arm_code and addr % 2 == 0:
|
|
4506
4528
|
# No ARM code for this architecture!
|
|
4529
|
+
self._seg_list.occupy(real_addr, 2, "nodecode")
|
|
4507
4530
|
return None, None, None, None
|
|
4508
4531
|
|
|
4509
4532
|
initial_regs = self._get_initial_registers(addr, cfg_job, current_function_addr)
|
|
@@ -125,7 +125,17 @@ class ArmElfFastResolver(IndirectJumpResolver):
|
|
|
125
125
|
# Note that this function assumes the IRSB is optimized (opt_level > 0)
|
|
126
126
|
# the logic will be vastly different if the IRSB is not optimized (opt_level == 0)
|
|
127
127
|
|
|
128
|
-
b = Blade(
|
|
128
|
+
b = Blade(
|
|
129
|
+
cfg.graph,
|
|
130
|
+
addr,
|
|
131
|
+
-1,
|
|
132
|
+
cfg=cfg,
|
|
133
|
+
project=self.project,
|
|
134
|
+
ignore_sp=True,
|
|
135
|
+
ignore_bp=True,
|
|
136
|
+
max_level=2,
|
|
137
|
+
control_dependence=False,
|
|
138
|
+
)
|
|
129
139
|
sources = [n for n in b.slice.nodes() if b.slice.in_degree(n) == 0]
|
|
130
140
|
if not sources:
|
|
131
141
|
return False, []
|
|
@@ -5,10 +5,12 @@ import logging
|
|
|
5
5
|
import claripy
|
|
6
6
|
import pyvex
|
|
7
7
|
|
|
8
|
+
from angr.knowledge_plugins.propagations import PropagationModel
|
|
8
9
|
from angr.utils.constants import DEFAULT_STATEMENT
|
|
9
10
|
from angr.code_location import CodeLocation
|
|
10
11
|
from angr.blade import Blade
|
|
11
12
|
from angr.analyses.propagator import vex_vars
|
|
13
|
+
from angr.utils.vex import get_tmp_def_stmt
|
|
12
14
|
from .resolver import IndirectJumpResolver
|
|
13
15
|
from .propagator_utils import PropagatorLoadCallback
|
|
14
16
|
|
|
@@ -47,6 +49,12 @@ class ConstantResolver(IndirectJumpResolver):
|
|
|
47
49
|
super().__init__(project, timeless=False)
|
|
48
50
|
self.max_func_nodes = max_func_nodes
|
|
49
51
|
|
|
52
|
+
# stats
|
|
53
|
+
self._resolved = 0
|
|
54
|
+
self._unresolved = 0
|
|
55
|
+
self._cache_hits = 0
|
|
56
|
+
self._props_saved = 0
|
|
57
|
+
|
|
50
58
|
def filter(self, cfg, addr, func_addr, block, jumpkind):
|
|
51
59
|
if not cfg.functions.contains_addr(func_addr):
|
|
52
60
|
# the function does not exist
|
|
@@ -122,58 +130,203 @@ class ConstantResolver(IndirectJumpResolver):
|
|
|
122
130
|
max_level=3,
|
|
123
131
|
stop_at_calls=True,
|
|
124
132
|
cross_insn_opt=True,
|
|
133
|
+
control_dependence=False,
|
|
125
134
|
)
|
|
126
135
|
stmt_loc = addr, DEFAULT_STATEMENT
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
if isinstance(block.statements[stmt_idx], pyvex.IRStmt.IMark):
|
|
135
|
-
preds = list(b.slice.predecessors(preds[0]))
|
|
136
|
-
continue
|
|
136
|
+
if self._check_jump_target_is_loaded_from_dynamic_addr(b, stmt_loc):
|
|
137
|
+
# loading from memory - unsupported
|
|
138
|
+
return False, []
|
|
139
|
+
if self._check_jump_target_is_compared_against(b, stmt_loc):
|
|
140
|
+
# the jump/call target is compared against another value, which means it's not deterministic
|
|
141
|
+
# ConstantResolver does not support such cases by design
|
|
142
|
+
return False, []
|
|
137
143
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
144
|
+
# first check the replacements cache
|
|
145
|
+
resolved_tmp = None
|
|
146
|
+
is_full_func_prop = None
|
|
147
|
+
block_loc = CodeLocation(block.addr, tmp_stmt_idx, ins_addr=tmp_ins_addr)
|
|
148
|
+
tmp_var = vex_vars.VEXTmp(vex_block.next.tmp)
|
|
149
|
+
prop_key = "FCP", func_addr
|
|
150
|
+
cached_prop = cfg.kb.propagations.get(prop_key)
|
|
151
|
+
if cached_prop is not None:
|
|
152
|
+
is_full_func_prop = len(func.block_addrs_set) == cached_prop.function_block_count
|
|
153
|
+
replacements = cached_prop.replacements
|
|
154
|
+
if exists_in_replacements(replacements, block_loc, tmp_var):
|
|
155
|
+
self._cache_hits += 1
|
|
156
|
+
resolved_tmp = replacements[block_loc][tmp_var]
|
|
150
157
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
func,
|
|
154
|
-
vex_cross_insn_opt=False,
|
|
155
|
-
load_callback=PropagatorLoadCallback(self.project).propagator_load_callback,
|
|
156
|
-
)
|
|
158
|
+
if resolved_tmp is None and is_full_func_prop:
|
|
159
|
+
self._props_saved += 1
|
|
157
160
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
161
|
+
if resolved_tmp is None and not is_full_func_prop:
|
|
162
|
+
_l.debug("ConstantResolver: Propagating for %r at %#x.", func, addr)
|
|
163
|
+
prop = self.project.analyses.FastConstantPropagation(
|
|
164
|
+
func,
|
|
165
|
+
vex_cross_insn_opt=False,
|
|
166
|
+
load_callback=PropagatorLoadCallback(self.project).propagator_load_callback,
|
|
167
|
+
)
|
|
168
|
+
# update the cache
|
|
169
|
+
model = PropagationModel(
|
|
170
|
+
prop_key, replacements=prop.replacements, function_block_count=len(func.block_addrs_set)
|
|
171
|
+
)
|
|
172
|
+
cfg.kb.propagations.update(prop_key, model)
|
|
162
173
|
|
|
163
|
-
|
|
174
|
+
replacements = prop.replacements
|
|
175
|
+
if replacements and exists_in_replacements(replacements, block_loc, tmp_var):
|
|
164
176
|
resolved_tmp = replacements[block_loc][tmp_var]
|
|
165
177
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
)
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
178
|
+
if resolved_tmp is not None:
|
|
179
|
+
if (
|
|
180
|
+
isinstance(resolved_tmp, claripy.ast.Base)
|
|
181
|
+
and resolved_tmp.op == "BVV"
|
|
182
|
+
and self._is_target_valid(cfg, resolved_tmp.args[0])
|
|
183
|
+
):
|
|
184
|
+
self._resolved += 1
|
|
185
|
+
# print(f"{self._resolved} ({self._props_saved} saved, {self._cache_hits} cached) / "
|
|
186
|
+
# f"{self._resolved + self._unresolved}")
|
|
187
|
+
# print(f"+ Function: {func_addr:#x}, block {addr:#x}, target {resolved_tmp.args[0]:#x}")
|
|
188
|
+
return True, [resolved_tmp.args[0]]
|
|
189
|
+
if isinstance(resolved_tmp, int) and self._is_target_valid(cfg, resolved_tmp):
|
|
190
|
+
self._resolved += 1
|
|
191
|
+
# print(f"{self._resolved} ({self._props_saved} saved, {self._cache_hits} cached) / "
|
|
192
|
+
# f"{self._resolved + self._unresolved}")
|
|
193
|
+
# print(f"+ Function: {func_addr:#x}, block {addr:#x}, target {resolved_tmp:#x}")
|
|
194
|
+
return True, [resolved_tmp]
|
|
174
195
|
|
|
196
|
+
self._unresolved += 1
|
|
197
|
+
# print(f"{RESOLVED} ({SAVED_PROPS} saved, {HIT_CACHE} cached) / {RESOLVED + UNRESOLVED}")
|
|
198
|
+
# print(f"- Function: {func_addr:#x}, block {addr:#x}, FAILED")
|
|
175
199
|
return False, []
|
|
176
200
|
|
|
201
|
+
def _check_jump_target_is_loaded_from_dynamic_addr(self, b, stmt_loc) -> bool:
|
|
202
|
+
queue: list[tuple[int, int, int]] = [] # depth, block_addr, stmt_idx
|
|
203
|
+
seen_locs: set[tuple[int, int]] = set()
|
|
204
|
+
for block_addr, stmt_idx in b.slice.predecessors(stmt_loc):
|
|
205
|
+
if (block_addr, stmt_idx) in seen_locs:
|
|
206
|
+
continue
|
|
207
|
+
seen_locs.add((block_addr, stmt_idx))
|
|
208
|
+
queue.append((0, block_addr, stmt_idx))
|
|
209
|
+
while queue:
|
|
210
|
+
depth, pred_addr, stmt_idx = queue.pop(0)
|
|
211
|
+
if depth >= 3:
|
|
212
|
+
break
|
|
213
|
+
|
|
214
|
+
# skip all IMarks
|
|
215
|
+
if stmt_idx != DEFAULT_STATEMENT:
|
|
216
|
+
block = self.project.factory.block(pred_addr, cross_insn_opt=True).vex
|
|
217
|
+
stmt = block.statements[stmt_idx]
|
|
218
|
+
if isinstance(stmt, pyvex.IRStmt.IMark):
|
|
219
|
+
for succ_addr, succ_stmt_idx in b.slice.predecessors((pred_addr, stmt_idx)):
|
|
220
|
+
if (succ_addr, succ_stmt_idx) in seen_locs:
|
|
221
|
+
continue
|
|
222
|
+
seen_locs.add((succ_addr, succ_stmt_idx))
|
|
223
|
+
queue.append((depth + 1 if succ_addr != pred_addr else depth, succ_addr, succ_stmt_idx))
|
|
224
|
+
continue
|
|
225
|
+
|
|
226
|
+
if (
|
|
227
|
+
isinstance(stmt, pyvex.IRStmt.WrTmp)
|
|
228
|
+
and isinstance(stmt.data, pyvex.IRExpr.Load)
|
|
229
|
+
and not isinstance(stmt.data.addr, pyvex.IRExpr.Const)
|
|
230
|
+
):
|
|
231
|
+
# loading from memory
|
|
232
|
+
return True
|
|
233
|
+
|
|
234
|
+
for succ_addr, succ_stmt_idx in b.slice.predecessors((pred_addr, stmt_idx)):
|
|
235
|
+
if (succ_addr, succ_stmt_idx) in seen_locs:
|
|
236
|
+
continue
|
|
237
|
+
seen_locs.add((succ_addr, succ_stmt_idx))
|
|
238
|
+
queue.append((depth + 1 if succ_addr != pred_addr else depth, succ_addr, succ_stmt_idx))
|
|
239
|
+
|
|
240
|
+
return False
|
|
241
|
+
|
|
242
|
+
def _check_jump_target_is_compared_against(self, b, stmt_loc) -> bool:
|
|
243
|
+
# let's find which register the jump uses
|
|
244
|
+
jump_site = self.project.factory.block(stmt_loc[0], cross_insn_opt=True).vex
|
|
245
|
+
if not isinstance(jump_site.next, pyvex.IRExpr.RdTmp):
|
|
246
|
+
return False
|
|
247
|
+
next_tmp = jump_site.next.tmp
|
|
248
|
+
# find its definition
|
|
249
|
+
next_tmp_def = get_tmp_def_stmt(jump_site, next_tmp)
|
|
250
|
+
if next_tmp_def is None:
|
|
251
|
+
return False
|
|
252
|
+
next_tmp_def_stmt = jump_site.statements[next_tmp_def]
|
|
253
|
+
if not (
|
|
254
|
+
isinstance(next_tmp_def_stmt, pyvex.IRStmt.WrTmp) and isinstance(next_tmp_def_stmt.data, pyvex.IRExpr.Get)
|
|
255
|
+
):
|
|
256
|
+
return False
|
|
257
|
+
next_reg = next_tmp_def_stmt.data.offset
|
|
258
|
+
|
|
259
|
+
# traverse back at most one level and check:
|
|
260
|
+
# - this register has never been updated
|
|
261
|
+
# - a comparison is conducted on this register (via a tmp, most likely)
|
|
262
|
+
queue = []
|
|
263
|
+
seen = set()
|
|
264
|
+
for block_addr, stmt_idx in b.slice.predecessors(stmt_loc):
|
|
265
|
+
if (block_addr, stmt_idx) in seen:
|
|
266
|
+
continue
|
|
267
|
+
seen.add((block_addr, stmt_idx))
|
|
268
|
+
queue.append((0, block_addr, stmt_idx))
|
|
269
|
+
while queue:
|
|
270
|
+
depth, pred_addr, stmt_idx = queue.pop(0)
|
|
271
|
+
if depth > 1:
|
|
272
|
+
continue
|
|
273
|
+
|
|
274
|
+
# skip all IMarks
|
|
275
|
+
pred = pred_addr, stmt_idx
|
|
276
|
+
if stmt_idx != DEFAULT_STATEMENT:
|
|
277
|
+
block = self.project.factory.block(pred_addr, cross_insn_opt=True).vex
|
|
278
|
+
stmt = block.statements[stmt_idx]
|
|
279
|
+
if isinstance(stmt, pyvex.IRStmt.IMark):
|
|
280
|
+
for succ_addr, succ_stmt_idx in b.slice.predecessors(pred):
|
|
281
|
+
if (succ_addr, succ_stmt_idx) in seen:
|
|
282
|
+
continue
|
|
283
|
+
seen.add((succ_addr, succ_stmt_idx))
|
|
284
|
+
queue.append((depth + 1 if succ_addr != pred_addr else depth, succ_addr, succ_stmt_idx))
|
|
285
|
+
continue
|
|
286
|
+
|
|
287
|
+
if isinstance(stmt, pyvex.IRStmt.Put) and stmt.offset == next_reg:
|
|
288
|
+
# this register has been updated before we find a comparison; do not continue along this path
|
|
289
|
+
continue
|
|
290
|
+
|
|
291
|
+
if (
|
|
292
|
+
isinstance(stmt, pyvex.IRStmt.WrTmp)
|
|
293
|
+
and isinstance(stmt.data, pyvex.IRExpr.Binop)
|
|
294
|
+
and stmt.data.op.startswith("Iop_Cmp")
|
|
295
|
+
):
|
|
296
|
+
# what is it comparing against?
|
|
297
|
+
for arg in stmt.data.args:
|
|
298
|
+
if isinstance(arg, pyvex.IRExpr.RdTmp):
|
|
299
|
+
arg_tmp_def = get_tmp_def_stmt(block, arg.tmp)
|
|
300
|
+
if arg_tmp_def is not None:
|
|
301
|
+
arg_tmp_def_stmt = block.statements[arg_tmp_def]
|
|
302
|
+
if (
|
|
303
|
+
isinstance(arg_tmp_def_stmt, pyvex.IRStmt.WrTmp)
|
|
304
|
+
and isinstance(arg_tmp_def_stmt.data, pyvex.IRExpr.Get)
|
|
305
|
+
and arg_tmp_def_stmt.data.offset == next_reg
|
|
306
|
+
):
|
|
307
|
+
# the jump target is compared against this register
|
|
308
|
+
return True
|
|
309
|
+
# another case: VEX optimization may have caused the tmp to be stored in the target
|
|
310
|
+
# register. we need handle this case as well.
|
|
311
|
+
if any(
|
|
312
|
+
isinstance(stmt_, pyvex.IRStmt.Put)
|
|
313
|
+
and stmt_.offset == next_reg
|
|
314
|
+
and isinstance(stmt_.data, pyvex.IRExpr.RdTmp)
|
|
315
|
+
and stmt_.data.tmp == arg.tmp
|
|
316
|
+
for stmt_ in block.statements[arg_tmp_def + 1 : stmt_idx]
|
|
317
|
+
):
|
|
318
|
+
# the jump target is compared against this register
|
|
319
|
+
return True
|
|
320
|
+
|
|
321
|
+
# continue traversing predecessors
|
|
322
|
+
for succ_addr, succ_stmt_idx in b.slice.predecessors(pred):
|
|
323
|
+
if (succ_addr, succ_stmt_idx) in seen:
|
|
324
|
+
continue
|
|
325
|
+
seen.add((succ_addr, succ_stmt_idx))
|
|
326
|
+
queue.append((depth + 1 if succ_addr != pred_addr else depth, succ_addr, succ_stmt_idx))
|
|
327
|
+
|
|
328
|
+
return False
|
|
329
|
+
|
|
177
330
|
@staticmethod
|
|
178
331
|
def _find_tmp_write_stmt_and_ins(vex_block, tmp: int) -> tuple[int | None, int | None]:
|
|
179
332
|
stmt_idx = None
|
|
@@ -397,9 +397,11 @@ class AILSimplifier(Analysis):
|
|
|
397
397
|
if isinstance(def_.atom, atoms.VirtualVariable) and (def_.atom.was_reg or def_.atom.was_parameter):
|
|
398
398
|
# only do this for general purpose register
|
|
399
399
|
skip_def = False
|
|
400
|
+
reg = None
|
|
400
401
|
for reg in self.project.arch.register_list:
|
|
401
|
-
if
|
|
402
|
-
|
|
402
|
+
if reg.vex_offset == def_.atom.reg_offset:
|
|
403
|
+
if not reg.artificial and not reg.general_purpose and not reg.vector:
|
|
404
|
+
skip_def = True
|
|
403
405
|
break
|
|
404
406
|
|
|
405
407
|
if skip_def:
|
|
@@ -659,6 +661,16 @@ class AILSimplifier(Analysis):
|
|
|
659
661
|
first_op = ops[0]
|
|
660
662
|
if isinstance(first_op, Convert) and first_op.to_bits >= self.project.arch.byte_width:
|
|
661
663
|
# we need at least one byte!
|
|
664
|
+
if (
|
|
665
|
+
len({(op.from_bits, op.to_bits) for op in ops if isinstance(op, Convert) and op.operand.likes(expr)})
|
|
666
|
+
> 1
|
|
667
|
+
):
|
|
668
|
+
# there are more Convert operations; it's probably because there are multiple expressions involving the
|
|
669
|
+
# same core expr. just give up (for now)
|
|
670
|
+
return None, None
|
|
671
|
+
if any(op for op in ops if isinstance(op, BinaryOp) and op.op == "Shr" and op.operands[0].likes(expr)):
|
|
672
|
+
# the expression is right-shifted, which means higher bits might be used.
|
|
673
|
+
return None, None
|
|
662
674
|
return first_op.to_bits // self.project.arch.byte_width, ("convert", (first_op,))
|
|
663
675
|
if isinstance(first_op, BinaryOp):
|
|
664
676
|
second_op = None
|
|
@@ -1816,13 +1828,11 @@ class AILSimplifier(Analysis):
|
|
|
1816
1828
|
if codeloc in self._assignments_to_remove:
|
|
1817
1829
|
# it should be removed
|
|
1818
1830
|
simplified = True
|
|
1819
|
-
self._assignments_to_remove.discard(codeloc)
|
|
1820
1831
|
continue
|
|
1821
1832
|
|
|
1822
1833
|
if self._statement_has_call_exprs(stmt):
|
|
1823
1834
|
if codeloc in self._calls_to_remove:
|
|
1824
1835
|
# it has a call and must be removed
|
|
1825
|
-
self._calls_to_remove.discard(codeloc)
|
|
1826
1836
|
simplified = True
|
|
1827
1837
|
continue
|
|
1828
1838
|
if isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable):
|
|
@@ -1845,7 +1855,6 @@ class AILSimplifier(Analysis):
|
|
|
1845
1855
|
codeloc = CodeLocation(block.addr, idx, ins_addr=stmt.ins_addr, block_idx=block.idx)
|
|
1846
1856
|
if codeloc in self._calls_to_remove:
|
|
1847
1857
|
# this call can be removed
|
|
1848
|
-
self._calls_to_remove.discard(codeloc)
|
|
1849
1858
|
simplified = True
|
|
1850
1859
|
continue
|
|
1851
1860
|
|
|
@@ -1865,6 +1874,11 @@ class AILSimplifier(Analysis):
|
|
|
1865
1874
|
new_block.statements = new_statements
|
|
1866
1875
|
self.blocks[old_block] = new_block
|
|
1867
1876
|
|
|
1877
|
+
# we can only use calls_to_remove and assignments_to_remove once; if any statements in blocks are removed, then
|
|
1878
|
+
# the statement IDs in calls_to_remove and assignments_to_remove no longer match!
|
|
1879
|
+
self._calls_to_remove.clear()
|
|
1880
|
+
self._assignments_to_remove.clear()
|
|
1881
|
+
|
|
1868
1882
|
return simplified
|
|
1869
1883
|
|
|
1870
1884
|
@staticmethod
|
|
@@ -17,7 +17,15 @@ from angr.sim_type import (
|
|
|
17
17
|
SimTypeFunction,
|
|
18
18
|
SimTypeLongLong,
|
|
19
19
|
)
|
|
20
|
-
from angr.calling_conventions import
|
|
20
|
+
from angr.calling_conventions import (
|
|
21
|
+
SimReferenceArgument,
|
|
22
|
+
SimRegArg,
|
|
23
|
+
SimStackArg,
|
|
24
|
+
SimCC,
|
|
25
|
+
SimStructArg,
|
|
26
|
+
SimComboArg,
|
|
27
|
+
SimFunctionArgument,
|
|
28
|
+
)
|
|
21
29
|
from angr.knowledge_plugins.key_definitions.constants import OP_BEFORE
|
|
22
30
|
from angr.analyses import Analysis, register_analysis
|
|
23
31
|
from angr.analyses.s_reaching_definitions import SRDAView
|
|
@@ -137,22 +145,7 @@ class CallSiteMaker(Analysis):
|
|
|
137
145
|
arg_locs = cc.arg_locs(callsite_ty)
|
|
138
146
|
|
|
139
147
|
if arg_locs is not None and cc is not None:
|
|
140
|
-
expanded_arg_locs
|
|
141
|
-
for arg_loc in arg_locs:
|
|
142
|
-
if isinstance(arg_loc, SimComboArg):
|
|
143
|
-
# a ComboArg spans across multiple locations (mostly stack but *in theory* can also be spanning
|
|
144
|
-
# across registers). most importantly, a ComboArg represents one variable, not multiple, but we
|
|
145
|
-
# have no way to know that until later down the pipeline.
|
|
146
|
-
expanded_arg_locs += arg_loc.locations
|
|
147
|
-
elif isinstance(arg_loc, SimStructArg):
|
|
148
|
-
expanded_arg_locs += [ # type: ignore
|
|
149
|
-
arg_loc.locs[field_name] for field_name in arg_loc.struct.fields if field_name in arg_loc.locs
|
|
150
|
-
]
|
|
151
|
-
elif isinstance(arg_loc, (SimRegArg, SimStackArg, SimReferenceArgument)):
|
|
152
|
-
expanded_arg_locs.append(arg_loc)
|
|
153
|
-
else:
|
|
154
|
-
raise NotImplementedError("Not implemented yet.")
|
|
155
|
-
|
|
148
|
+
expanded_arg_locs = self._expand_arglocs(arg_locs)
|
|
156
149
|
for arg_loc in expanded_arg_locs:
|
|
157
150
|
if isinstance(arg_loc, SimReferenceArgument):
|
|
158
151
|
if not isinstance(arg_loc.ptr_loc, (SimRegArg, SimStackArg)):
|
|
@@ -548,6 +541,29 @@ class CallSiteMaker(Analysis):
|
|
|
548
541
|
return None
|
|
549
542
|
return len(specifiers)
|
|
550
543
|
|
|
544
|
+
def _expand_arglocs(
|
|
545
|
+
self, arg_locs: list[SimFunctionArgument]
|
|
546
|
+
) -> list[SimStackArg | SimRegArg | SimReferenceArgument]:
|
|
547
|
+
expanded_arg_locs: list[SimStackArg | SimRegArg | SimReferenceArgument] = []
|
|
548
|
+
|
|
549
|
+
for arg_loc in arg_locs:
|
|
550
|
+
if isinstance(arg_loc, SimComboArg):
|
|
551
|
+
# a ComboArg spans across multiple locations (mostly stack but *in theory* can also be spanning
|
|
552
|
+
# across registers). most importantly, a ComboArg represents one variable, not multiple, but we
|
|
553
|
+
# have no way to know that until later down the pipeline.
|
|
554
|
+
expanded_arg_locs += arg_loc.locations
|
|
555
|
+
elif isinstance(arg_loc, SimStructArg):
|
|
556
|
+
for field_name in arg_loc.struct.fields:
|
|
557
|
+
if field_name not in arg_loc.locs:
|
|
558
|
+
continue
|
|
559
|
+
expanded_arg_locs += self._expand_arglocs([arg_loc.locs[field_name]])
|
|
560
|
+
elif isinstance(arg_loc, (SimRegArg, SimStackArg, SimReferenceArgument)):
|
|
561
|
+
expanded_arg_locs.append(arg_loc)
|
|
562
|
+
else:
|
|
563
|
+
raise NotImplementedError("Not implemented yet.")
|
|
564
|
+
|
|
565
|
+
return expanded_arg_locs
|
|
566
|
+
|
|
551
567
|
def _atom_idx(self) -> int | None:
|
|
552
568
|
return self._ail_manager.next_atom() if self._ail_manager is not None else None
|
|
553
569
|
|
|
@@ -271,6 +271,13 @@ class GraphRegion:
|
|
|
271
271
|
else:
|
|
272
272
|
replace_with_graph_with_successors = replace_with.graph_with_successors
|
|
273
273
|
|
|
274
|
+
# if complete_successors is True for RegionIdentifier, replace_with.graph_with_successors may include nodes
|
|
275
|
+
# and edges that are *only* reachable from immediate successors. we will want to remove these nodes and edges,
|
|
276
|
+
# otherwise we may end up structuring the same region twice!
|
|
277
|
+
replace_with_graph_with_successors = self._cleanup_graph_with_successors(
|
|
278
|
+
replace_with.graph, replace_with_graph_with_successors
|
|
279
|
+
)
|
|
280
|
+
|
|
274
281
|
self._replace_node_in_graph_with_subgraph(
|
|
275
282
|
self.graph,
|
|
276
283
|
self.successors,
|
|
@@ -289,6 +296,18 @@ class GraphRegion:
|
|
|
289
296
|
replace_with.head,
|
|
290
297
|
)
|
|
291
298
|
|
|
299
|
+
@staticmethod
|
|
300
|
+
def _cleanup_graph_with_successors(
|
|
301
|
+
graph: networkx.DiGraph, graph_with_successors: networkx.DiGraph
|
|
302
|
+
) -> networkx.DiGraph:
|
|
303
|
+
expected_nodes = set(graph)
|
|
304
|
+
for n in list(expected_nodes):
|
|
305
|
+
for succ in graph_with_successors.successors(n):
|
|
306
|
+
expected_nodes.add(succ)
|
|
307
|
+
if all(n in expected_nodes for n in graph_with_successors):
|
|
308
|
+
return graph_with_successors
|
|
309
|
+
return graph_with_successors.subgraph(expected_nodes).to_directed()
|
|
310
|
+
|
|
292
311
|
@staticmethod
|
|
293
312
|
def _replace_node_in_graph(graph: networkx.DiGraph, node, replace_with, removed_edges: set):
|
|
294
313
|
in_edges = [(src, dst) for src, dst in graph.in_edges(node) if (src, dst) not in removed_edges]
|
|
@@ -60,7 +60,7 @@ class DeadblockRemover(OptimizationPass):
|
|
|
60
60
|
blk
|
|
61
61
|
for blk in self._graph.nodes()
|
|
62
62
|
if (blk.addr != self._func.addr and self._graph.in_degree(blk) == 0)
|
|
63
|
-
or claripy.is_false(cond_proc.reaching_conditions
|
|
63
|
+
or claripy.is_false(cond_proc.reaching_conditions.get(blk, claripy.true()))
|
|
64
64
|
}
|
|
65
65
|
|
|
66
66
|
# fix up predecessors
|