angr 9.2.162__cp310-abi3-macosx_11_0_arm64.whl → 9.2.164__cp310-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (44) hide show
  1. angr/__init__.py +1 -1
  2. angr/ailment/converter_vex.py +1 -1
  3. angr/ailment/expression.py +17 -1
  4. angr/analyses/cfg/cfg_base.py +17 -14
  5. angr/analyses/cfg/cfg_emulated.py +5 -1
  6. angr/analyses/cfg/cfg_fast.py +27 -4
  7. angr/analyses/cfg/indirect_jump_resolvers/arm_elf_fast.py +11 -1
  8. angr/analyses/cfg/indirect_jump_resolvers/const_resolver.py +194 -41
  9. angr/analyses/decompiler/ail_simplifier.py +19 -5
  10. angr/analyses/decompiler/callsite_maker.py +33 -17
  11. angr/analyses/decompiler/clinic.py +6 -6
  12. angr/analyses/decompiler/graph_region.py +19 -0
  13. angr/analyses/decompiler/optimization_passes/deadblock_remover.py +1 -1
  14. angr/analyses/decompiler/region_identifier.py +22 -1
  15. angr/analyses/decompiler/structuring/phoenix.py +72 -20
  16. angr/analyses/decompiler/structuring/recursive_structurer.py +3 -4
  17. angr/analyses/decompiler/structuring/structurer_nodes.py +3 -0
  18. angr/analyses/decompiler/utils.py +17 -5
  19. angr/analyses/fcp/fcp.py +11 -10
  20. angr/analyses/flirt/flirt_sig.py +5 -2
  21. angr/analyses/reaching_definitions/function_handler.py +1 -1
  22. angr/analyses/reaching_definitions/function_handler_library/stdio.py +7 -6
  23. angr/analyses/reaching_definitions/function_handler_library/stdlib.py +10 -4
  24. angr/analyses/reaching_definitions/function_handler_library/string.py +13 -2
  25. angr/analyses/reaching_definitions/function_handler_library/unistd.py +7 -0
  26. angr/analyses/s_reaching_definitions/s_rda_view.py +2 -1
  27. angr/analyses/typehoon/typeconsts.py +3 -1
  28. angr/analyses/variable_recovery/engine_base.py +6 -10
  29. angr/blade.py +20 -15
  30. angr/engines/icicle.py +7 -2
  31. angr/knowledge_plugins/propagations/propagation_model.py +7 -0
  32. angr/project.py +5 -2
  33. angr/rustylib.abi3.so +0 -0
  34. angr/sim_type.py +18 -3
  35. angr/unicornlib.dylib +0 -0
  36. angr/utils/constants.py +1 -1
  37. angr/utils/graph.py +1 -1
  38. angr/utils/vex.py +11 -0
  39. {angr-9.2.162.dist-info → angr-9.2.164.dist-info}/METADATA +5 -5
  40. {angr-9.2.162.dist-info → angr-9.2.164.dist-info}/RECORD +44 -43
  41. {angr-9.2.162.dist-info → angr-9.2.164.dist-info}/WHEEL +0 -0
  42. {angr-9.2.162.dist-info → angr-9.2.164.dist-info}/entry_points.txt +0 -0
  43. {angr-9.2.162.dist-info → angr-9.2.164.dist-info}/licenses/LICENSE +0 -0
  44. {angr-9.2.162.dist-info → angr-9.2.164.dist-info}/top_level.txt +0 -0
angr/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
2
  # pylint: disable=wrong-import-position
3
3
  from __future__ import annotations
4
4
 
5
- __version__ = "9.2.162"
5
+ __version__ = "9.2.164"
6
6
 
7
7
  if bytes is str:
8
8
  raise Exception(
@@ -606,7 +606,7 @@ class VEXStmtConverter(Converter):
606
606
  expd_hi = VEXExprConverter.convert(stmt.expdHi, manager) if stmt.expdHi is not None else None
607
607
  old_lo = VEXExprConverter.tmp(stmt.oldLo, manager.tyenv.sizeof(stmt.oldLo), manager)
608
608
  old_hi = (
609
- VEXExprConverter.tmp(stmt.oldHi, stmt.oldHi.result_size(manager.tyenv), manager)
609
+ VEXExprConverter.tmp(stmt.oldHi, manager.tyenv.sizeof(stmt.oldHi), manager)
610
610
  if stmt.oldHi != 0xFFFFFFFF
611
611
  else None
612
612
  )
@@ -102,6 +102,18 @@ class Const(Atom):
102
102
  self.value = value
103
103
  self.bits = bits
104
104
 
105
+ @property
106
+ def value_int(self) -> int:
107
+ if isinstance(self.value, int):
108
+ return self.value
109
+ raise TypeError(f"Incorrect value type; expect int, got {type(self.value)}")
110
+
111
+ @property
112
+ def value_float(self) -> float:
113
+ if isinstance(self.value, float):
114
+ return self.value
115
+ raise TypeError(f"Incorrect value type; expect float, got {type(self.value)}")
116
+
105
117
  @property
106
118
  def size(self):
107
119
  return self.bits // 8
@@ -604,7 +616,11 @@ class Convert(UnaryOp):
604
616
  self.rounding_mode = rounding_mode
605
617
 
606
618
  def __str__(self):
607
- return f"Conv({self.from_bits}->{'s' if self.is_signed else ''}{self.to_bits}, {self.operand})"
619
+ from_type = "I" if self.from_type == Convert.TYPE_INT else "F"
620
+ to_type = "I" if self.to_type == Convert.TYPE_INT else "F"
621
+ return (
622
+ f"Conv({self.from_bits}{from_type}->{'s' if self.is_signed else ''}{self.to_bits}{to_type}, {self.operand})"
623
+ )
608
624
 
609
625
  def __repr__(self):
610
626
  return str(self)
@@ -1868,7 +1868,7 @@ class CFGBase(Analysis):
1868
1868
  should_merge = True
1869
1869
  functions_to_merge = set()
1870
1870
  i = func_pos + 1
1871
- while i < len(all_func_addrs):
1871
+ while i < len(all_func_addrs) and all_func_addrs[i] < endpoint_addr:
1872
1872
  f_addr = all_func_addrs[i]
1873
1873
  i += 1
1874
1874
  f = functions[f_addr]
@@ -1952,11 +1952,11 @@ class CFGBase(Analysis):
1952
1952
  # skip empty blocks (that are usually caused by lifting failures)
1953
1953
  continue
1954
1954
  block = func_0.get_block(block_node.addr, block_node.size)
1955
- if block.vex_nostmt.jumpkind not in ("Ijk_Boring", "Ijk_InvalICache"):
1956
- continue
1957
1955
  # Skip alignment blocks
1958
1956
  if self._is_noop_block(self.project.arch, block):
1959
1957
  continue
1958
+ if block.vex_nostmt.jumpkind not in ("Ijk_Boring", "Ijk_InvalICache"):
1959
+ continue
1960
1960
 
1961
1961
  # does the first block transition to the next function?
1962
1962
  transition_found = False
@@ -2001,17 +2001,20 @@ class CFGBase(Analysis):
2001
2001
 
2002
2002
  cfgnode_1_merged = False
2003
2003
  # we only merge two CFG nodes if the first one does not end with a branch instruction
2004
- if (
2005
- len(func_0.block_addrs_set) == 1
2006
- and len(out_edges) == 1
2007
- and out_edges[0][0].addr == cfgnode_0.addr
2008
- and out_edges[0][0].size == cfgnode_0.size
2009
- and self.project.factory.block(cfgnode_0.addr, strict_block_end=True).size > cfgnode_0.size
2010
- ):
2011
- cfgnode_1_merged = True
2012
- self._merge_cfgnodes(cfgnode_0, cfgnode_1)
2013
- adjusted_cfgnodes.add(cfgnode_0)
2014
- adjusted_cfgnodes.add(cfgnode_1)
2004
+ if len(func_0.block_addrs_set) == 1 and len(out_edges) == 1:
2005
+ outedge_src, outedge_dst, outedge_data = out_edges[0]
2006
+ if (
2007
+ outedge_src.addr == cfgnode_0.addr
2008
+ and outedge_src.size == cfgnode_0.size
2009
+ and outedge_dst.addr == cfgnode_1.addr
2010
+ and cfgnode_0.addr + cfgnode_0.size == cfgnode_1.addr
2011
+ and outedge_data.get("type", None) == "transition"
2012
+ and outedge_data.get("stmt_idx", None) == DEFAULT_STATEMENT
2013
+ ):
2014
+ cfgnode_1_merged = True
2015
+ self._merge_cfgnodes(cfgnode_0, cfgnode_1)
2016
+ adjusted_cfgnodes.add(cfgnode_0)
2017
+ adjusted_cfgnodes.add(cfgnode_1)
2015
2018
 
2016
2019
  # Merge it
2017
2020
  func_1 = functions[addr_1]
@@ -28,6 +28,7 @@ from angr.errors import (
28
28
  AngrCFGError,
29
29
  AngrError,
30
30
  AngrSkipJobNotice,
31
+ AngrSyscallError,
31
32
  SimError,
32
33
  SimValueError,
33
34
  SimSolverModeError,
@@ -1806,7 +1807,10 @@ class CFGEmulated(ForwardAnalysis, CFGBase): # pylint: disable=abstract-method
1806
1807
 
1807
1808
  # Fix target_addr for syscalls
1808
1809
  if suc_jumpkind.startswith("Ijk_Sys"):
1809
- syscall_proc = self.project.simos.syscall(new_state)
1810
+ try:
1811
+ syscall_proc = self.project.simos.syscall(new_state)
1812
+ except AngrSyscallError:
1813
+ syscall_proc = None
1810
1814
  if syscall_proc is not None:
1811
1815
  target_addr = syscall_proc.addr
1812
1816
 
@@ -1077,12 +1077,12 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
1077
1077
  # no wide string is found
1078
1078
  return 0
1079
1079
 
1080
- def _scan_for_repeating_bytes(self, start_addr: int, repeating_byte: int, threshold: int = 2) -> int:
1080
+ def _scan_for_repeating_bytes(self, start_addr: int, repeating_byte: int | None, threshold: int = 2) -> int:
1081
1081
  """
1082
1082
  Scan from a given address and determine the occurrences of a given byte.
1083
1083
 
1084
1084
  :param start_addr: The address in memory to start scanning.
1085
- :param repeating_byte: The repeating byte to scan for.
1085
+ :param repeating_byte: The repeating byte to scan for; None for *any* repeating byte.
1086
1086
  :param threshold: The minimum occurrences.
1087
1087
  :return: The occurrences of a given byte.
1088
1088
  """
@@ -1090,12 +1090,15 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
1090
1090
  addr = start_addr
1091
1091
 
1092
1092
  repeating_length = 0
1093
+ last_byte = repeating_byte
1093
1094
 
1094
1095
  while self._inside_regions(addr):
1095
1096
  val = self._load_a_byte_as_int(addr)
1096
1097
  if val is None:
1097
1098
  break
1098
- if val == repeating_byte:
1099
+ if last_byte is None:
1100
+ last_byte = val
1101
+ elif val == last_byte:
1099
1102
  repeating_length += 1
1100
1103
  else:
1101
1104
  break
@@ -1249,6 +1252,16 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
1249
1252
  self.model.memory_data[start_addr] = MemoryData(start_addr, zeros_length, MemoryDataSort.Alignment)
1250
1253
  start_addr += zeros_length
1251
1254
 
1255
+ # we consider over 16 bytes of any repeated bytes to be bad
1256
+ repeating_byte_length = self._scan_for_repeating_bytes(start_addr, None, threshold=16)
1257
+ if repeating_byte_length:
1258
+ matched_something = True
1259
+ self._seg_list.occupy(start_addr, repeating_byte_length, "nodecode")
1260
+ self.model.memory_data[start_addr] = MemoryData(
1261
+ start_addr, repeating_byte_length, MemoryDataSort.Unknown
1262
+ )
1263
+ start_addr += repeating_byte_length
1264
+
1252
1265
  if not matched_something:
1253
1266
  # umm now it's probably code
1254
1267
  break
@@ -1259,7 +1272,16 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
1259
1272
  if start_addr % instr_alignment > 0:
1260
1273
  # occupy those few bytes
1261
1274
  size = instr_alignment - (start_addr % instr_alignment)
1262
- self._seg_list.occupy(start_addr, size, "alignment")
1275
+
1276
+ # to avoid extremely fragmented segmentation, we mark the current segment as the same type as the previous
1277
+ # adjacent segment if its type is nodecode
1278
+ segment_sort = "alignment"
1279
+ if start_addr >= 1:
1280
+ previous_segment_sort = self._seg_list.occupied_by_sort(start_addr - 1)
1281
+ if previous_segment_sort == "nodecode":
1282
+ segment_sort = "nodecode"
1283
+
1284
+ self._seg_list.occupy(start_addr, size, segment_sort)
1263
1285
  self.model.memory_data[start_addr] = MemoryData(start_addr, size, MemoryDataSort.Unknown)
1264
1286
  start_addr = start_addr - start_addr % instr_alignment + instr_alignment
1265
1287
  # trickiness: aligning the start_addr may create a new address that is outside any mapped region.
@@ -4504,6 +4526,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
4504
4526
 
4505
4527
  if not self._arch_options.has_arm_code and addr % 2 == 0:
4506
4528
  # No ARM code for this architecture!
4529
+ self._seg_list.occupy(real_addr, 2, "nodecode")
4507
4530
  return None, None, None, None
4508
4531
 
4509
4532
  initial_regs = self._get_initial_registers(addr, cfg_job, current_function_addr)
@@ -125,7 +125,17 @@ class ArmElfFastResolver(IndirectJumpResolver):
125
125
  # Note that this function assumes the IRSB is optimized (opt_level > 0)
126
126
  # the logic will be vastly different if the IRSB is not optimized (opt_level == 0)
127
127
 
128
- b = Blade(cfg.graph, addr, -1, cfg=cfg, project=self.project, ignore_sp=True, ignore_bp=True, max_level=2)
128
+ b = Blade(
129
+ cfg.graph,
130
+ addr,
131
+ -1,
132
+ cfg=cfg,
133
+ project=self.project,
134
+ ignore_sp=True,
135
+ ignore_bp=True,
136
+ max_level=2,
137
+ control_dependence=False,
138
+ )
129
139
  sources = [n for n in b.slice.nodes() if b.slice.in_degree(n) == 0]
130
140
  if not sources:
131
141
  return False, []
@@ -5,10 +5,12 @@ import logging
5
5
  import claripy
6
6
  import pyvex
7
7
 
8
+ from angr.knowledge_plugins.propagations import PropagationModel
8
9
  from angr.utils.constants import DEFAULT_STATEMENT
9
10
  from angr.code_location import CodeLocation
10
11
  from angr.blade import Blade
11
12
  from angr.analyses.propagator import vex_vars
13
+ from angr.utils.vex import get_tmp_def_stmt
12
14
  from .resolver import IndirectJumpResolver
13
15
  from .propagator_utils import PropagatorLoadCallback
14
16
 
@@ -47,6 +49,12 @@ class ConstantResolver(IndirectJumpResolver):
47
49
  super().__init__(project, timeless=False)
48
50
  self.max_func_nodes = max_func_nodes
49
51
 
52
+ # stats
53
+ self._resolved = 0
54
+ self._unresolved = 0
55
+ self._cache_hits = 0
56
+ self._props_saved = 0
57
+
50
58
  def filter(self, cfg, addr, func_addr, block, jumpkind):
51
59
  if not cfg.functions.contains_addr(func_addr):
52
60
  # the function does not exist
@@ -122,58 +130,203 @@ class ConstantResolver(IndirectJumpResolver):
122
130
  max_level=3,
123
131
  stop_at_calls=True,
124
132
  cross_insn_opt=True,
133
+ control_dependence=False,
125
134
  )
126
135
  stmt_loc = addr, DEFAULT_STATEMENT
127
- preds = list(b.slice.predecessors(stmt_loc))
128
- while preds:
129
- if len(preds) == 1:
130
- # skip all IMarks
131
- pred_addr, stmt_idx = preds[0]
132
- if stmt_idx != DEFAULT_STATEMENT:
133
- block = self.project.factory.block(pred_addr, cross_insn_opt=True).vex
134
- if isinstance(block.statements[stmt_idx], pyvex.IRStmt.IMark):
135
- preds = list(b.slice.predecessors(preds[0]))
136
- continue
136
+ if self._check_jump_target_is_loaded_from_dynamic_addr(b, stmt_loc):
137
+ # loading from memory - unsupported
138
+ return False, []
139
+ if self._check_jump_target_is_compared_against(b, stmt_loc):
140
+ # the jump/call target is compared against another value, which means it's not deterministic
141
+ # ConstantResolver does not support such cases by design
142
+ return False, []
137
143
 
138
- for pred_addr, stmt_idx in preds:
139
- block = self.project.factory.block(pred_addr, cross_insn_opt=True).vex
140
- if stmt_idx != DEFAULT_STATEMENT:
141
- stmt = block.statements[stmt_idx]
142
- if (
143
- isinstance(stmt, pyvex.IRStmt.WrTmp)
144
- and isinstance(stmt.data, pyvex.IRExpr.Load)
145
- and not isinstance(stmt.data.addr, pyvex.IRExpr.Const)
146
- ):
147
- # loading from memory - unsupported
148
- return False, []
149
- break
144
+ # first check the replacements cache
145
+ resolved_tmp = None
146
+ is_full_func_prop = None
147
+ block_loc = CodeLocation(block.addr, tmp_stmt_idx, ins_addr=tmp_ins_addr)
148
+ tmp_var = vex_vars.VEXTmp(vex_block.next.tmp)
149
+ prop_key = "FCP", func_addr
150
+ cached_prop = cfg.kb.propagations.get(prop_key)
151
+ if cached_prop is not None:
152
+ is_full_func_prop = len(func.block_addrs_set) == cached_prop.function_block_count
153
+ replacements = cached_prop.replacements
154
+ if exists_in_replacements(replacements, block_loc, tmp_var):
155
+ self._cache_hits += 1
156
+ resolved_tmp = replacements[block_loc][tmp_var]
150
157
 
151
- _l.debug("ConstantResolver: Propagating for %r at %#x.", func, addr)
152
- prop = self.project.analyses.FastConstantPropagation(
153
- func,
154
- vex_cross_insn_opt=False,
155
- load_callback=PropagatorLoadCallback(self.project).propagator_load_callback,
156
- )
158
+ if resolved_tmp is None and is_full_func_prop:
159
+ self._props_saved += 1
157
160
 
158
- replacements = prop.replacements
159
- if replacements:
160
- block_loc = CodeLocation(block.addr, tmp_stmt_idx, ins_addr=tmp_ins_addr)
161
- tmp_var = vex_vars.VEXTmp(vex_block.next.tmp)
161
+ if resolved_tmp is None and not is_full_func_prop:
162
+ _l.debug("ConstantResolver: Propagating for %r at %#x.", func, addr)
163
+ prop = self.project.analyses.FastConstantPropagation(
164
+ func,
165
+ vex_cross_insn_opt=False,
166
+ load_callback=PropagatorLoadCallback(self.project).propagator_load_callback,
167
+ )
168
+ # update the cache
169
+ model = PropagationModel(
170
+ prop_key, replacements=prop.replacements, function_block_count=len(func.block_addrs_set)
171
+ )
172
+ cfg.kb.propagations.update(prop_key, model)
162
173
 
163
- if exists_in_replacements(replacements, block_loc, tmp_var):
174
+ replacements = prop.replacements
175
+ if replacements and exists_in_replacements(replacements, block_loc, tmp_var):
164
176
  resolved_tmp = replacements[block_loc][tmp_var]
165
177
 
166
- if (
167
- isinstance(resolved_tmp, claripy.ast.Base)
168
- and resolved_tmp.op == "BVV"
169
- and self._is_target_valid(cfg, resolved_tmp.args[0])
170
- ):
171
- return True, [resolved_tmp.args[0]]
172
- if isinstance(resolved_tmp, int) and self._is_target_valid(cfg, resolved_tmp):
173
- return True, [resolved_tmp]
178
+ if resolved_tmp is not None:
179
+ if (
180
+ isinstance(resolved_tmp, claripy.ast.Base)
181
+ and resolved_tmp.op == "BVV"
182
+ and self._is_target_valid(cfg, resolved_tmp.args[0])
183
+ ):
184
+ self._resolved += 1
185
+ # print(f"{self._resolved} ({self._props_saved} saved, {self._cache_hits} cached) / "
186
+ # f"{self._resolved + self._unresolved}")
187
+ # print(f"+ Function: {func_addr:#x}, block {addr:#x}, target {resolved_tmp.args[0]:#x}")
188
+ return True, [resolved_tmp.args[0]]
189
+ if isinstance(resolved_tmp, int) and self._is_target_valid(cfg, resolved_tmp):
190
+ self._resolved += 1
191
+ # print(f"{self._resolved} ({self._props_saved} saved, {self._cache_hits} cached) / "
192
+ # f"{self._resolved + self._unresolved}")
193
+ # print(f"+ Function: {func_addr:#x}, block {addr:#x}, target {resolved_tmp:#x}")
194
+ return True, [resolved_tmp]
174
195
 
196
+ self._unresolved += 1
197
+ # print(f"{RESOLVED} ({SAVED_PROPS} saved, {HIT_CACHE} cached) / {RESOLVED + UNRESOLVED}")
198
+ # print(f"- Function: {func_addr:#x}, block {addr:#x}, FAILED")
175
199
  return False, []
176
200
 
201
+ def _check_jump_target_is_loaded_from_dynamic_addr(self, b, stmt_loc) -> bool:
202
+ queue: list[tuple[int, int, int]] = [] # depth, block_addr, stmt_idx
203
+ seen_locs: set[tuple[int, int]] = set()
204
+ for block_addr, stmt_idx in b.slice.predecessors(stmt_loc):
205
+ if (block_addr, stmt_idx) in seen_locs:
206
+ continue
207
+ seen_locs.add((block_addr, stmt_idx))
208
+ queue.append((0, block_addr, stmt_idx))
209
+ while queue:
210
+ depth, pred_addr, stmt_idx = queue.pop(0)
211
+ if depth >= 3:
212
+ break
213
+
214
+ # skip all IMarks
215
+ if stmt_idx != DEFAULT_STATEMENT:
216
+ block = self.project.factory.block(pred_addr, cross_insn_opt=True).vex
217
+ stmt = block.statements[stmt_idx]
218
+ if isinstance(stmt, pyvex.IRStmt.IMark):
219
+ for succ_addr, succ_stmt_idx in b.slice.predecessors((pred_addr, stmt_idx)):
220
+ if (succ_addr, succ_stmt_idx) in seen_locs:
221
+ continue
222
+ seen_locs.add((succ_addr, succ_stmt_idx))
223
+ queue.append((depth + 1 if succ_addr != pred_addr else depth, succ_addr, succ_stmt_idx))
224
+ continue
225
+
226
+ if (
227
+ isinstance(stmt, pyvex.IRStmt.WrTmp)
228
+ and isinstance(stmt.data, pyvex.IRExpr.Load)
229
+ and not isinstance(stmt.data.addr, pyvex.IRExpr.Const)
230
+ ):
231
+ # loading from memory
232
+ return True
233
+
234
+ for succ_addr, succ_stmt_idx in b.slice.predecessors((pred_addr, stmt_idx)):
235
+ if (succ_addr, succ_stmt_idx) in seen_locs:
236
+ continue
237
+ seen_locs.add((succ_addr, succ_stmt_idx))
238
+ queue.append((depth + 1 if succ_addr != pred_addr else depth, succ_addr, succ_stmt_idx))
239
+
240
+ return False
241
+
242
+ def _check_jump_target_is_compared_against(self, b, stmt_loc) -> bool:
243
+ # let's find which register the jump uses
244
+ jump_site = self.project.factory.block(stmt_loc[0], cross_insn_opt=True).vex
245
+ if not isinstance(jump_site.next, pyvex.IRExpr.RdTmp):
246
+ return False
247
+ next_tmp = jump_site.next.tmp
248
+ # find its definition
249
+ next_tmp_def = get_tmp_def_stmt(jump_site, next_tmp)
250
+ if next_tmp_def is None:
251
+ return False
252
+ next_tmp_def_stmt = jump_site.statements[next_tmp_def]
253
+ if not (
254
+ isinstance(next_tmp_def_stmt, pyvex.IRStmt.WrTmp) and isinstance(next_tmp_def_stmt.data, pyvex.IRExpr.Get)
255
+ ):
256
+ return False
257
+ next_reg = next_tmp_def_stmt.data.offset
258
+
259
+ # traverse back at most one level and check:
260
+ # - this register has never been updated
261
+ # - a comparison is conducted on this register (via a tmp, most likely)
262
+ queue = []
263
+ seen = set()
264
+ for block_addr, stmt_idx in b.slice.predecessors(stmt_loc):
265
+ if (block_addr, stmt_idx) in seen:
266
+ continue
267
+ seen.add((block_addr, stmt_idx))
268
+ queue.append((0, block_addr, stmt_idx))
269
+ while queue:
270
+ depth, pred_addr, stmt_idx = queue.pop(0)
271
+ if depth > 1:
272
+ continue
273
+
274
+ # skip all IMarks
275
+ pred = pred_addr, stmt_idx
276
+ if stmt_idx != DEFAULT_STATEMENT:
277
+ block = self.project.factory.block(pred_addr, cross_insn_opt=True).vex
278
+ stmt = block.statements[stmt_idx]
279
+ if isinstance(stmt, pyvex.IRStmt.IMark):
280
+ for succ_addr, succ_stmt_idx in b.slice.predecessors(pred):
281
+ if (succ_addr, succ_stmt_idx) in seen:
282
+ continue
283
+ seen.add((succ_addr, succ_stmt_idx))
284
+ queue.append((depth + 1 if succ_addr != pred_addr else depth, succ_addr, succ_stmt_idx))
285
+ continue
286
+
287
+ if isinstance(stmt, pyvex.IRStmt.Put) and stmt.offset == next_reg:
288
+ # this register has been updated before we find a comparison; do not continue along this path
289
+ continue
290
+
291
+ if (
292
+ isinstance(stmt, pyvex.IRStmt.WrTmp)
293
+ and isinstance(stmt.data, pyvex.IRExpr.Binop)
294
+ and stmt.data.op.startswith("Iop_Cmp")
295
+ ):
296
+ # what is it comparing against?
297
+ for arg in stmt.data.args:
298
+ if isinstance(arg, pyvex.IRExpr.RdTmp):
299
+ arg_tmp_def = get_tmp_def_stmt(block, arg.tmp)
300
+ if arg_tmp_def is not None:
301
+ arg_tmp_def_stmt = block.statements[arg_tmp_def]
302
+ if (
303
+ isinstance(arg_tmp_def_stmt, pyvex.IRStmt.WrTmp)
304
+ and isinstance(arg_tmp_def_stmt.data, pyvex.IRExpr.Get)
305
+ and arg_tmp_def_stmt.data.offset == next_reg
306
+ ):
307
+ # the jump target is compared against this register
308
+ return True
309
+ # another case: VEX optimization may have caused the tmp to be stored in the target
310
+ # register. we need handle this case as well.
311
+ if any(
312
+ isinstance(stmt_, pyvex.IRStmt.Put)
313
+ and stmt_.offset == next_reg
314
+ and isinstance(stmt_.data, pyvex.IRExpr.RdTmp)
315
+ and stmt_.data.tmp == arg.tmp
316
+ for stmt_ in block.statements[arg_tmp_def + 1 : stmt_idx]
317
+ ):
318
+ # the jump target is compared against this register
319
+ return True
320
+
321
+ # continue traversing predecessors
322
+ for succ_addr, succ_stmt_idx in b.slice.predecessors(pred):
323
+ if (succ_addr, succ_stmt_idx) in seen:
324
+ continue
325
+ seen.add((succ_addr, succ_stmt_idx))
326
+ queue.append((depth + 1 if succ_addr != pred_addr else depth, succ_addr, succ_stmt_idx))
327
+
328
+ return False
329
+
177
330
  @staticmethod
178
331
  def _find_tmp_write_stmt_and_ins(vex_block, tmp: int) -> tuple[int | None, int | None]:
179
332
  stmt_idx = None
@@ -397,9 +397,11 @@ class AILSimplifier(Analysis):
397
397
  if isinstance(def_.atom, atoms.VirtualVariable) and (def_.atom.was_reg or def_.atom.was_parameter):
398
398
  # only do this for general purpose register
399
399
  skip_def = False
400
+ reg = None
400
401
  for reg in self.project.arch.register_list:
401
- if not reg.artificial and reg.vex_offset == def_.atom.reg_offset and not reg.general_purpose:
402
- skip_def = True
402
+ if reg.vex_offset == def_.atom.reg_offset:
403
+ if not reg.artificial and not reg.general_purpose and not reg.vector:
404
+ skip_def = True
403
405
  break
404
406
 
405
407
  if skip_def:
@@ -659,6 +661,16 @@ class AILSimplifier(Analysis):
659
661
  first_op = ops[0]
660
662
  if isinstance(first_op, Convert) and first_op.to_bits >= self.project.arch.byte_width:
661
663
  # we need at least one byte!
664
+ if (
665
+ len({(op.from_bits, op.to_bits) for op in ops if isinstance(op, Convert) and op.operand.likes(expr)})
666
+ > 1
667
+ ):
668
+ # there are more Convert operations; it's probably because there are multiple expressions involving the
669
+ # same core expr. just give up (for now)
670
+ return None, None
671
+ if any(op for op in ops if isinstance(op, BinaryOp) and op.op == "Shr" and op.operands[0].likes(expr)):
672
+ # the expression is right-shifted, which means higher bits might be used.
673
+ return None, None
662
674
  return first_op.to_bits // self.project.arch.byte_width, ("convert", (first_op,))
663
675
  if isinstance(first_op, BinaryOp):
664
676
  second_op = None
@@ -1816,13 +1828,11 @@ class AILSimplifier(Analysis):
1816
1828
  if codeloc in self._assignments_to_remove:
1817
1829
  # it should be removed
1818
1830
  simplified = True
1819
- self._assignments_to_remove.discard(codeloc)
1820
1831
  continue
1821
1832
 
1822
1833
  if self._statement_has_call_exprs(stmt):
1823
1834
  if codeloc in self._calls_to_remove:
1824
1835
  # it has a call and must be removed
1825
- self._calls_to_remove.discard(codeloc)
1826
1836
  simplified = True
1827
1837
  continue
1828
1838
  if isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable):
@@ -1845,7 +1855,6 @@ class AILSimplifier(Analysis):
1845
1855
  codeloc = CodeLocation(block.addr, idx, ins_addr=stmt.ins_addr, block_idx=block.idx)
1846
1856
  if codeloc in self._calls_to_remove:
1847
1857
  # this call can be removed
1848
- self._calls_to_remove.discard(codeloc)
1849
1858
  simplified = True
1850
1859
  continue
1851
1860
 
@@ -1865,6 +1874,11 @@ class AILSimplifier(Analysis):
1865
1874
  new_block.statements = new_statements
1866
1875
  self.blocks[old_block] = new_block
1867
1876
 
1877
+ # we can only use calls_to_remove and assignments_to_remove once; if any statements in blocks are removed, then
1878
+ # the statement IDs in calls_to_remove and assignments_to_remove no longer match!
1879
+ self._calls_to_remove.clear()
1880
+ self._assignments_to_remove.clear()
1881
+
1868
1882
  return simplified
1869
1883
 
1870
1884
  @staticmethod
@@ -17,7 +17,15 @@ from angr.sim_type import (
17
17
  SimTypeFunction,
18
18
  SimTypeLongLong,
19
19
  )
20
- from angr.calling_conventions import SimReferenceArgument, SimRegArg, SimStackArg, SimCC, SimStructArg, SimComboArg
20
+ from angr.calling_conventions import (
21
+ SimReferenceArgument,
22
+ SimRegArg,
23
+ SimStackArg,
24
+ SimCC,
25
+ SimStructArg,
26
+ SimComboArg,
27
+ SimFunctionArgument,
28
+ )
21
29
  from angr.knowledge_plugins.key_definitions.constants import OP_BEFORE
22
30
  from angr.analyses import Analysis, register_analysis
23
31
  from angr.analyses.s_reaching_definitions import SRDAView
@@ -137,22 +145,7 @@ class CallSiteMaker(Analysis):
137
145
  arg_locs = cc.arg_locs(callsite_ty)
138
146
 
139
147
  if arg_locs is not None and cc is not None:
140
- expanded_arg_locs: list[SimStackArg | SimRegArg | SimReferenceArgument] = []
141
- for arg_loc in arg_locs:
142
- if isinstance(arg_loc, SimComboArg):
143
- # a ComboArg spans across multiple locations (mostly stack but *in theory* can also be spanning
144
- # across registers). most importantly, a ComboArg represents one variable, not multiple, but we
145
- # have no way to know that until later down the pipeline.
146
- expanded_arg_locs += arg_loc.locations
147
- elif isinstance(arg_loc, SimStructArg):
148
- expanded_arg_locs += [ # type: ignore
149
- arg_loc.locs[field_name] for field_name in arg_loc.struct.fields if field_name in arg_loc.locs
150
- ]
151
- elif isinstance(arg_loc, (SimRegArg, SimStackArg, SimReferenceArgument)):
152
- expanded_arg_locs.append(arg_loc)
153
- else:
154
- raise NotImplementedError("Not implemented yet.")
155
-
148
+ expanded_arg_locs = self._expand_arglocs(arg_locs)
156
149
  for arg_loc in expanded_arg_locs:
157
150
  if isinstance(arg_loc, SimReferenceArgument):
158
151
  if not isinstance(arg_loc.ptr_loc, (SimRegArg, SimStackArg)):
@@ -548,6 +541,29 @@ class CallSiteMaker(Analysis):
548
541
  return None
549
542
  return len(specifiers)
550
543
 
544
+ def _expand_arglocs(
545
+ self, arg_locs: list[SimFunctionArgument]
546
+ ) -> list[SimStackArg | SimRegArg | SimReferenceArgument]:
547
+ expanded_arg_locs: list[SimStackArg | SimRegArg | SimReferenceArgument] = []
548
+
549
+ for arg_loc in arg_locs:
550
+ if isinstance(arg_loc, SimComboArg):
551
+ # a ComboArg spans across multiple locations (mostly stack but *in theory* can also be spanning
552
+ # across registers). most importantly, a ComboArg represents one variable, not multiple, but we
553
+ # have no way to know that until later down the pipeline.
554
+ expanded_arg_locs += arg_loc.locations
555
+ elif isinstance(arg_loc, SimStructArg):
556
+ for field_name in arg_loc.struct.fields:
557
+ if field_name not in arg_loc.locs:
558
+ continue
559
+ expanded_arg_locs += self._expand_arglocs([arg_loc.locs[field_name]])
560
+ elif isinstance(arg_loc, (SimRegArg, SimStackArg, SimReferenceArgument)):
561
+ expanded_arg_locs.append(arg_loc)
562
+ else:
563
+ raise NotImplementedError("Not implemented yet.")
564
+
565
+ return expanded_arg_locs
566
+
551
567
  def _atom_idx(self) -> int | None:
552
568
  return self._ail_manager.next_atom() if self._ail_manager is not None else None
553
569