angr 9.2.139__py3-none-macosx_10_9_x86_64.whl → 9.2.140__py3-none-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (69) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +48 -21
  3. angr/analyses/cfg/cfg_base.py +13 -0
  4. angr/analyses/cfg/cfg_fast.py +11 -0
  5. angr/analyses/decompiler/ail_simplifier.py +67 -52
  6. angr/analyses/decompiler/clinic.py +68 -43
  7. angr/analyses/decompiler/decompiler.py +17 -7
  8. angr/analyses/decompiler/expression_narrower.py +1 -1
  9. angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +8 -7
  10. angr/analyses/decompiler/optimization_passes/ite_region_converter.py +21 -13
  11. angr/analyses/decompiler/optimization_passes/optimization_pass.py +16 -10
  12. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +2 -2
  13. angr/analyses/decompiler/region_simplifiers/expr_folding.py +259 -108
  14. angr/analyses/decompiler/region_simplifiers/region_simplifier.py +27 -12
  15. angr/analyses/decompiler/structuring/dream.py +21 -17
  16. angr/analyses/decompiler/structuring/phoenix.py +152 -40
  17. angr/analyses/decompiler/structuring/recursive_structurer.py +1 -0
  18. angr/analyses/decompiler/structuring/structurer_base.py +36 -10
  19. angr/analyses/decompiler/structuring/structurer_nodes.py +4 -1
  20. angr/analyses/decompiler/utils.py +60 -1
  21. angr/analyses/deobfuscator/api_obf_finder.py +8 -5
  22. angr/analyses/deobfuscator/api_obf_type2_finder.py +18 -10
  23. angr/analyses/deobfuscator/string_obf_finder.py +105 -18
  24. angr/analyses/forward_analysis/forward_analysis.py +1 -1
  25. angr/analyses/propagator/top_checker_mixin.py +6 -6
  26. angr/analyses/reaching_definitions/__init__.py +2 -1
  27. angr/analyses/reaching_definitions/dep_graph.py +1 -12
  28. angr/analyses/reaching_definitions/engine_vex.py +36 -31
  29. angr/analyses/reaching_definitions/function_handler.py +15 -2
  30. angr/analyses/reaching_definitions/rd_state.py +1 -37
  31. angr/analyses/reaching_definitions/reaching_definitions.py +13 -24
  32. angr/analyses/s_propagator.py +6 -41
  33. angr/analyses/s_reaching_definitions/s_rda_model.py +7 -1
  34. angr/analyses/stack_pointer_tracker.py +36 -22
  35. angr/analyses/typehoon/simple_solver.py +45 -7
  36. angr/analyses/typehoon/typeconsts.py +18 -5
  37. angr/analyses/variable_recovery/engine_base.py +7 -5
  38. angr/block.py +69 -107
  39. angr/callable.py +14 -7
  40. angr/calling_conventions.py +15 -1
  41. angr/distributed/__init__.py +1 -1
  42. angr/engines/__init__.py +7 -8
  43. angr/engines/engine.py +1 -120
  44. angr/engines/failure.py +2 -2
  45. angr/engines/hook.py +2 -2
  46. angr/engines/light/engine.py +2 -2
  47. angr/engines/pcode/engine.py +2 -14
  48. angr/engines/procedure.py +2 -2
  49. angr/engines/soot/engine.py +2 -2
  50. angr/engines/soot/statements/switch.py +1 -1
  51. angr/engines/successors.py +124 -11
  52. angr/engines/syscall.py +2 -2
  53. angr/engines/unicorn.py +3 -3
  54. angr/engines/vex/heavy/heavy.py +3 -15
  55. angr/factory.py +4 -19
  56. angr/knowledge_plugins/key_definitions/atoms.py +8 -4
  57. angr/knowledge_plugins/key_definitions/live_definitions.py +41 -103
  58. angr/lib/angr_native.dylib +0 -0
  59. angr/sim_type.py +19 -17
  60. angr/state_plugins/plugin.py +19 -4
  61. angr/storage/memory_mixins/memory_mixin.py +1 -1
  62. angr/storage/memory_mixins/paged_memory/pages/multi_values.py +10 -5
  63. angr/utils/ssa/__init__.py +119 -4
  64. {angr-9.2.139.dist-info → angr-9.2.140.dist-info}/METADATA +6 -6
  65. {angr-9.2.139.dist-info → angr-9.2.140.dist-info}/RECORD +69 -69
  66. {angr-9.2.139.dist-info → angr-9.2.140.dist-info}/LICENSE +0 -0
  67. {angr-9.2.139.dist-info → angr-9.2.140.dist-info}/WHEEL +0 -0
  68. {angr-9.2.139.dist-info → angr-9.2.140.dist-info}/entry_points.txt +0 -0
  69. {angr-9.2.139.dist-info → angr-9.2.140.dist-info}/top_level.txt +0 -0
angr/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
2
  # pylint: disable=wrong-import-position
3
3
  from __future__ import annotations
4
4
 
5
- __version__ = "9.2.139"
5
+ __version__ = "9.2.140"
6
6
 
7
7
  if bytes is str:
8
8
  raise Exception(
@@ -150,6 +150,8 @@ class CallingConventionAnalysis(Analysis):
150
150
  The major analysis routine.
151
151
  """
152
152
 
153
+ assert self._function is not None
154
+
153
155
  if self._function.is_simprocedure:
154
156
  hooker = self.project.hooked_by(self._function.addr)
155
157
  if isinstance(
@@ -200,8 +202,8 @@ class CallingConventionAnalysis(Analysis):
200
202
  )
201
203
  if prototype.args:
202
204
  break
203
- self.cc = cc
204
- self.prototype = prototype
205
+ self.cc = cc # type: ignore
206
+ self.prototype = prototype # type: ignore
205
207
  return
206
208
  if self._function.is_plt:
207
209
  r = self._analyze_plt()
@@ -218,23 +220,33 @@ class CallingConventionAnalysis(Analysis):
218
220
  if self.analyze_callsites:
219
221
  # only take the first 3 because running reaching definition analysis on all functions is costly
220
222
  callsite_facts = self._extract_and_analyze_callsites(max_analyzing_callsites=3)
221
- prototype = self._adjust_prototype(
222
- prototype, callsite_facts, update_arguments=UpdateArgumentsOption.UpdateWhenCCHasNoArgs
223
+ prototype = (
224
+ self._adjust_prototype(
225
+ prototype, callsite_facts, update_arguments=UpdateArgumentsOption.UpdateWhenCCHasNoArgs
226
+ )
227
+ if prototype is not None
228
+ else None
223
229
  )
224
230
 
225
231
  self.cc = cc
226
232
  self.prototype = prototype
227
233
 
228
234
  def _analyze_callsite_only(self):
235
+ assert self.caller_func_addr is not None
236
+ assert self.callsite_block_addr is not None
237
+ assert self.callsite_insn_addr is not None
238
+ cc, prototype = None, None
239
+
229
240
  for include_callsite_preds in [False, True]:
230
- callsite_facts = [
231
- self._analyze_callsite(
232
- self.caller_func_addr,
233
- self.callsite_block_addr,
234
- self.callsite_insn_addr,
235
- include_preds=include_callsite_preds,
236
- )
237
- ]
241
+ fact = self._analyze_callsite(
242
+ self.caller_func_addr,
243
+ self.callsite_block_addr,
244
+ self.callsite_insn_addr,
245
+ include_preds=include_callsite_preds,
246
+ )
247
+ if fact is None:
248
+ continue
249
+ callsite_facts = [fact]
238
250
  cc_cls = default_cc(
239
251
  self.project.arch.name,
240
252
  platform=(
@@ -258,6 +270,7 @@ class CallingConventionAnalysis(Analysis):
258
270
 
259
271
  :return: A calling convention.
260
272
  """
273
+ assert self._function is not None
261
274
 
262
275
  if len(self._function.jumpout_sites) != 1:
263
276
  l.warning(
@@ -314,6 +327,7 @@ class CallingConventionAnalysis(Analysis):
314
327
  Go over the variable information in variable manager for this function, and return all uninitialized
315
328
  register/stack variables.
316
329
  """
330
+ assert self._function is not None
317
331
 
318
332
  if self._function.is_simprocedure or self._function.is_plt:
319
333
  # we do not analyze SimProcedures or PLT stubs
@@ -403,6 +417,8 @@ class CallingConventionAnalysis(Analysis):
403
417
  returns anything or not.
404
418
  """
405
419
 
420
+ assert self._function is not None
421
+
406
422
  if self._cfg is None:
407
423
  l.warning("CFG is not provided. Skip calling convention analysis at call sites.")
408
424
  return []
@@ -656,13 +672,10 @@ class CallingConventionAnalysis(Analysis):
656
672
 
657
673
  def _adjust_prototype(
658
674
  self,
659
- proto: SimTypeFunction | None,
675
+ proto: SimTypeFunction,
660
676
  facts: list[CallSiteFact],
661
677
  update_arguments: int = UpdateArgumentsOption.DoNotUpdate,
662
- ) -> SimTypeFunction | None:
663
- if proto is None:
664
- return None
665
-
678
+ ) -> SimTypeFunction:
666
679
  # is the return value used anywhere?
667
680
  if facts:
668
681
  if all(fact.return_value_used is False for fact in facts):
@@ -691,6 +704,8 @@ class CallingConventionAnalysis(Analysis):
691
704
  :return:
692
705
  """
693
706
 
707
+ assert self._function is not None
708
+
694
709
  args = set()
695
710
  ret_addr_offset = 0 if not self.project.arch.call_pushes_ret else self.project.arch.bytes
696
711
 
@@ -839,17 +854,27 @@ class CallingConventionAnalysis(Analysis):
839
854
  return SimTypeBottom()
840
855
 
841
856
  def _guess_retval_type(self, cc: SimCC, ret_val_size: int | None) -> SimType:
857
+ assert self._function is not None
858
+
842
859
  if cc.FP_RETURN_VAL and self._function.ret_sites:
843
860
  # examine the last block of the function and see which registers are assigned to
844
861
  for ret_block in self._function.ret_sites:
862
+ fpretval_updated, retval_updated = False, False
863
+ fp_reg_size = 0
845
864
  irsb = self.project.factory.block(ret_block.addr, size=ret_block.size).vex
846
865
  for stmt in irsb.statements:
847
866
  if isinstance(stmt, Put) and isinstance(stmt.data, RdTmp):
848
- reg_size = irsb.tyenv.sizeof(stmt.data.tmp) // self.project.arch.byte_width
867
+ reg_size = irsb.tyenv.sizeof(stmt.data.tmp) // self.project.arch.byte_width # type: ignore
849
868
  reg_name = self.project.arch.translate_register_name(stmt.offset, size=reg_size)
850
- if reg_name == cc.FP_RETURN_VAL.reg_name:
851
- # possibly float
852
- return SimTypeFloat() if reg_size == 4 else SimTypeDouble()
869
+ if isinstance(cc.FP_RETURN_VAL, SimRegArg) and reg_name == cc.FP_RETURN_VAL.reg_name:
870
+ fpretval_updated = True
871
+ fp_reg_size = reg_size
872
+ elif isinstance(cc.RETURN_VAL, SimRegArg) and reg_name == cc.RETURN_VAL.reg_name:
873
+ retval_updated = True
874
+
875
+ if fpretval_updated and not retval_updated:
876
+ # possibly float
877
+ return SimTypeFloat() if fp_reg_size == 4 else SimTypeDouble()
853
878
 
854
879
  if ret_val_size is not None:
855
880
  if ret_val_size == 1:
@@ -884,6 +909,8 @@ class CallingConventionAnalysis(Analysis):
884
909
  # TODO: Use a better pattern matching approach
885
910
  if len(func.block_addrs_set) < 3:
886
911
  return False, None
912
+ if func.startpoint is None:
913
+ return False, None
887
914
 
888
915
  head = func.startpoint
889
916
  out_edges = list(func.transition_graph.out_edges(head, data=True))
@@ -1421,6 +1421,19 @@ class CFGBase(Analysis):
1421
1421
  # We gotta create a new one
1422
1422
  l.error("normalize(): Please report it to Fish.")
1423
1423
 
1424
+ # update the jump tables dict and the indirect jumps dict
1425
+ if smallest_node.addr not in self.model.jump_tables:
1426
+ for n in other_nodes:
1427
+ if n.addr in self.model.jump_tables:
1428
+ self.model.jump_tables[n.addr].addr = smallest_node.addr
1429
+ self.model.jump_tables[smallest_node.addr] = self.model.jump_tables[n.addr]
1430
+ break
1431
+ if smallest_node.addr not in self.indirect_jumps:
1432
+ for n in other_nodes:
1433
+ if n.addr in self.indirect_jumps:
1434
+ self.indirect_jumps[n.addr].addr = smallest_node.addr
1435
+ self.indirect_jumps[smallest_node.addr] = self.indirect_jumps[n.addr]
1436
+ break
1424
1437
  # deal with duplicated entries in self.jump_tables and self.indirect_jumps
1425
1438
  if smallest_node.addr in self.model.jump_tables:
1426
1439
  for n in other_nodes:
@@ -1524,6 +1524,17 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
1524
1524
  }:
1525
1525
  func.info["is_rust_probestack"] = True
1526
1526
 
1527
+ # determine if the function is __alloca_probe
1528
+ if func is not None and len(func.block_addrs_set) == 4:
1529
+ block_bytes = {func.get_block(block_addr).bytes for block_addr in func.block_addrs_set}
1530
+ if block_bytes == {
1531
+ b"H\x83\xec\x10L\x89\x14$L\x89\\$\x08M3\xdbL\x8dT$\x18L+\xd0M\x0fB\xd3eL\x8b\x1c%\x10\x00\x00\x00M;\xd3s\x16",
1532
+ b"fA\x81\xe2\x00\xf0M\x8d\x9b\x00\xf0\xff\xffA\xc6\x03\x00M;\xd3u\xf0",
1533
+ b"M\x8d\x9b\x00\xf0\xff\xffA\xc6\x03\x00M;\xd3u\xf0",
1534
+ b"L\x8b\x14$L\x8b\\$\x08H\x83\xc4\x10\xc3",
1535
+ }:
1536
+ func.info["is_alloca_probe"] = True
1537
+
1527
1538
  if self._collect_data_ref and self.project is not None and ":" in self.project.arch.name:
1528
1539
  # this is a pcode arch - use Clinic to recover data references
1529
1540
 
@@ -27,6 +27,7 @@ from ailment.expression import (
27
27
  from angr.analyses.s_propagator import SPropagatorAnalysis
28
28
  from angr.analyses.s_reaching_definitions import SRDAModel
29
29
  from angr.utils.ail import is_phi_assignment, HasExprWalker
30
+ from angr.utils.ssa import has_call_in_between_stmts, has_store_stmt_in_between_stmts, has_load_expr_in_between_stmts
30
31
  from angr.code_location import CodeLocation, ExternalCodeLocation
31
32
  from angr.sim_variable import SimStackVariable, SimMemoryVariable, SimVariable
32
33
  from angr.knowledge_plugins.propagations.states import Equivalence
@@ -292,7 +293,7 @@ class AILSimplifier(Analysis):
292
293
 
293
294
  narrowed = False
294
295
 
295
- addr_and_idx_to_block: dict[tuple[int, int], Block] = {}
296
+ addr_and_idx_to_block: dict[tuple[int, int | None], Block] = {}
296
297
  for block in self.func_graph.nodes():
297
298
  addr_and_idx_to_block[(block.addr, block.idx)] = block
298
299
 
@@ -424,6 +425,7 @@ class AILSimplifier(Analysis):
424
425
  return ExprNarrowingInfo(False)
425
426
 
426
427
  block = self.blocks.get(old_block, old_block)
428
+ assert loc.stmt_idx is not None
427
429
  if loc.stmt_idx >= len(block.statements):
428
430
  # missing a statement for whatever reason
429
431
  return ExprNarrowingInfo(False)
@@ -551,14 +553,23 @@ class AILSimplifier(Analysis):
551
553
  return None, None
552
554
  return expr.size, ("expr", (expr,))
553
555
 
554
- first_op = walker.operations[0]
556
+ ops = walker.operations
557
+ first_op = ops[0]
558
+ if isinstance(first_op, BinaryOp) and first_op.op in {"Add", "Sub"}:
559
+ # expr + x
560
+ ops = ops[1:]
561
+ if not ops:
562
+ if expr is None:
563
+ return None, None
564
+ return expr.size, ("expr", (expr,))
565
+ first_op = ops[0]
555
566
  if isinstance(first_op, Convert) and first_op.to_bits >= self.project.arch.byte_width:
556
567
  # we need at least one byte!
557
568
  return first_op.to_bits // self.project.arch.byte_width, ("convert", (first_op,))
558
569
  if isinstance(first_op, BinaryOp):
559
570
  second_op = None
560
- if len(walker.operations) >= 2:
561
- second_op = walker.operations[1]
571
+ if len(ops) >= 2:
572
+ second_op = ops[1]
562
573
  if (
563
574
  first_op.op == "And"
564
575
  and isinstance(first_op.operands[1], Const)
@@ -623,9 +634,9 @@ class AILSimplifier(Analysis):
623
634
  block = blocks_by_addr_and_idx[(block_addr, block_idx)]
624
635
 
625
636
  # only replace loads if there are stack arguments in this block
626
- replace_loads = insn_addrs_using_stack_args is not None and {
627
- stmt.ins_addr for stmt in block.statements
628
- }.intersection(insn_addrs_using_stack_args)
637
+ replace_loads: bool = insn_addrs_using_stack_args is not None and bool(
638
+ {stmt.ins_addr for stmt in block.statements}.intersection(insn_addrs_using_stack_args)
639
+ )
629
640
 
630
641
  # remove virtual variables in the avoid list
631
642
  if self._avoid_vvar_ids:
@@ -662,7 +673,7 @@ class AILSimplifier(Analysis):
662
673
  if not equivalence:
663
674
  return simplified
664
675
 
665
- addr_and_idx_to_block: dict[tuple[int, int], Block] = {}
676
+ addr_and_idx_to_block: dict[tuple[int, int | None], Block] = {}
666
677
  for block in self.func_graph.nodes():
667
678
  addr_and_idx_to_block[(block.addr, block.idx)] = block
668
679
 
@@ -943,6 +954,8 @@ class AILSimplifier(Analysis):
943
954
  for use_loc in all_use_locs:
944
955
  if use_loc == eq.codeloc:
945
956
  continue
957
+ assert use_loc.block_addr is not None
958
+ assert use_loc.stmt_idx is not None
946
959
  block = addr_and_idx_to_block[(use_loc.block_addr, use_loc.block_idx)]
947
960
  stmt = block.statements[use_loc.stmt_idx]
948
961
  if isinstance(stmt, Assignment) or (isinstance(replace_with, Load) and isinstance(stmt, Store)):
@@ -954,11 +967,15 @@ class AILSimplifier(Analysis):
954
967
 
955
968
  remove_initial_assignment = False # expression folding will take care of it
956
969
 
970
+ assert replace_with is not None
971
+
957
972
  if any(not isinstance(use_and_expr[1], VirtualVariable) for _, use_and_expr in all_uses_with_def):
958
973
  # if any of the uses are phi assignments, we skip
959
974
  used_in_phi_assignment = False
960
975
  for _, use_and_expr in all_uses_with_def:
961
976
  u = use_and_expr[0]
977
+ assert u.block_addr is not None
978
+ assert u.stmt_idx is not None
962
979
  block = addr_and_idx_to_block[(u.block_addr, u.block_idx)]
963
980
  stmt = block.statements[u.stmt_idx]
964
981
  if is_phi_assignment(stmt):
@@ -1120,8 +1137,6 @@ class AILSimplifier(Analysis):
1120
1137
  than once after simplification and graph structuring where conditions might be duplicated (e.g., in Dream).
1121
1138
  In such cases, the one-use expression folder in RegionSimplifier will perform this transformation.
1122
1139
  """
1123
- # Disabled until https://github.com/angr/angr/issues/5112 and related folding issues are fixed
1124
- return False
1125
1140
 
1126
1141
  # pylint:disable=unreachable
1127
1142
  simplified = False
@@ -1130,7 +1145,7 @@ class AILSimplifier(Analysis):
1130
1145
  if not equivalence:
1131
1146
  return simplified
1132
1147
 
1133
- addr_and_idx_to_block: dict[tuple[int, int], Block] = {}
1148
+ addr_and_idx_to_block: dict[tuple[int, int | None], Block] = {}
1134
1149
  for block in self.func_graph.nodes():
1135
1150
  addr_and_idx_to_block[(block.addr, block.idx)] = block
1136
1151
 
@@ -1168,6 +1183,8 @@ class AILSimplifier(Analysis):
1168
1183
  ),
1169
1184
  eq.codeloc,
1170
1185
  )
1186
+ assert the_def.codeloc.block_addr is not None
1187
+ assert the_def.codeloc.stmt_idx is not None
1171
1188
 
1172
1189
  all_uses: set[tuple[CodeLocation, Any]] = set(rd.get_vvar_uses_with_expr(the_def.atom))
1173
1190
 
@@ -1176,6 +1193,8 @@ class AILSimplifier(Analysis):
1176
1193
  u, used_expr = next(iter(all_uses))
1177
1194
  if used_expr is None:
1178
1195
  continue
1196
+ assert u.block_addr is not None
1197
+ assert u.stmt_idx is not None
1179
1198
 
1180
1199
  if u in def_locations_to_remove:
1181
1200
  # this use site has been altered by previous folding attempts. the corresponding statement will be
@@ -1196,41 +1215,28 @@ class AILSimplifier(Analysis):
1196
1215
  if u.block_addr not in {b.addr for b in super_node_blocks}:
1197
1216
  continue
1198
1217
 
1199
- # check if the register has been overwritten by statements in between the def site and the use site
1200
- # usesite_atom_defs = set(rd.get_defs(the_def.atom, u, OP_BEFORE))
1201
- # if len(usesite_atom_defs) != 1:
1202
- # continue
1203
- # usesite_atom_def = next(iter(usesite_atom_defs))
1204
- # if usesite_atom_def != the_def:
1205
- # continue
1206
-
1207
- # check if any atoms that the call relies on has been overwritten by statements in between the def site
1208
- # and the use site.
1209
- # TODO: Prove non-interference
1210
- # defsite_all_expr_uses = set(rd.all_uses.get_uses_by_location(the_def.codeloc))
1211
- # defsite_used_atoms = set()
1212
- # for dd in defsite_all_expr_uses:
1213
- # defsite_used_atoms.add(dd.atom)
1214
- # usesite_expr_def_outdated = False
1215
- # for defsite_expr_atom in defsite_used_atoms:
1216
- # usesite_expr_uses = set(rd.get_defs(defsite_expr_atom, u, OP_BEFORE))
1217
- # if not usesite_expr_uses:
1218
- # # the atom is not defined at the use site - it's fine
1219
- # continue
1220
- # defsite_expr_uses = set(rd.get_defs(defsite_expr_atom, the_def.codeloc, OP_BEFORE))
1221
- # if usesite_expr_uses != defsite_expr_uses:
1222
- # # special case: ok if this atom is assigned to at the def site and has not been overwritten
1223
- # if len(usesite_expr_uses) == 1:
1224
- # usesite_expr_use = next(iter(usesite_expr_uses))
1225
- # if usesite_expr_use.atom == defsite_expr_atom and (
1226
- # usesite_expr_use.codeloc == the_def.codeloc
1227
- # or usesite_expr_use.codeloc.block_addr == call_addr
1228
- # ):
1229
- # continue
1230
- # usesite_expr_def_outdated = True
1231
- # break
1232
- # if usesite_expr_def_outdated:
1233
- # continue
1218
+ # ensure there are no other calls between the def site and the use site.
1219
+ # this is because we do not want to alter the order of calls.
1220
+ u_inclusive = CodeLocation(u.block_addr, u.stmt_idx + 1, block_idx=u.block_idx)
1221
+ # note that the target statement being a store is fine
1222
+ if (
1223
+ has_call_in_between_stmts(
1224
+ self.func_graph,
1225
+ addr_and_idx_to_block,
1226
+ the_def.codeloc,
1227
+ u_inclusive,
1228
+ skip_if_contains_vvar=the_def.atom.varid,
1229
+ )
1230
+ or has_store_stmt_in_between_stmts(self.func_graph, addr_and_idx_to_block, the_def.codeloc, u)
1231
+ or has_load_expr_in_between_stmts(
1232
+ self.func_graph,
1233
+ addr_and_idx_to_block,
1234
+ the_def.codeloc,
1235
+ u_inclusive,
1236
+ skip_if_contains_vvar=the_def.atom.varid,
1237
+ )
1238
+ ):
1239
+ continue
1234
1240
 
1235
1241
  # check if there are any calls in between the def site and the use site
1236
1242
  if self._count_calls_in_supernodeblocks(super_node_blocks, the_def.codeloc, u) > 0:
@@ -1316,8 +1322,8 @@ class AILSimplifier(Analysis):
1316
1322
  # keeping tracking of statements to remove and statements (as well as dead vvars) to keep allows us to handle
1317
1323
  # cases where a statement defines more than one atoms, e.g., a call statement that defines both the return
1318
1324
  # value and the floating-point return value.
1319
- stmts_to_remove_per_block: dict[tuple[int, int], set[int]] = defaultdict(set)
1320
- stmts_to_keep_per_block: dict[tuple[int, int], set[int]] = defaultdict(set)
1325
+ stmts_to_remove_per_block: dict[tuple[int, int | None], set[int]] = defaultdict(set)
1326
+ stmts_to_keep_per_block: dict[tuple[int, int | None], set[int]] = defaultdict(set)
1321
1327
  dead_vvar_ids: set[int] = set()
1322
1328
 
1323
1329
  # Find all statements that should be removed
@@ -1344,6 +1350,7 @@ class AILSimplifier(Analysis):
1344
1350
  pass
1345
1351
  elif stackarg_offsets is not None:
1346
1352
  # we always remove definitions for stack arguments
1353
+ assert def_.atom.stack_offset is not None
1347
1354
  if (def_.atom.stack_offset & mask) not in stackarg_offsets:
1348
1355
  continue
1349
1356
  else:
@@ -1364,10 +1371,15 @@ class AILSimplifier(Analysis):
1364
1371
  dead_vvar_ids.add(def_.atom.varid)
1365
1372
 
1366
1373
  if not isinstance(def_.codeloc, ExternalCodeLocation):
1374
+ assert def_.codeloc.block_addr is not None
1375
+ assert def_.codeloc.stmt_idx is not None
1367
1376
  stmts_to_remove_per_block[(def_.codeloc.block_addr, def_.codeloc.block_idx)].add(
1368
1377
  def_.codeloc.stmt_idx
1369
1378
  )
1370
1379
  else:
1380
+ if not isinstance(def_.codeloc, ExternalCodeLocation):
1381
+ assert def_.codeloc.block_addr is not None
1382
+ assert def_.codeloc.stmt_idx is not None
1371
1383
  stmts_to_keep_per_block[(def_.codeloc.block_addr, def_.codeloc.block_idx)].add(def_.codeloc.stmt_idx)
1372
1384
 
1373
1385
  # find all phi variables that rely on variables that no longer exist
@@ -1380,6 +1392,7 @@ class AILSimplifier(Analysis):
1380
1392
  vvarid in removed_vvar_ids for vvarid in phi_use_varids
1381
1393
  ):
1382
1394
  loc = rd.all_vvar_definitions[rd.varid_to_vvar[phi_varid]]
1395
+ assert loc.block_addr is not None and loc.stmt_idx is not None
1383
1396
  stmts_to_remove_per_block[(loc.block_addr, loc.block_idx)].add(loc.stmt_idx)
1384
1397
  new_removed_vvar_ids.add(phi_varid)
1385
1398
  all_removed_var_ids.add(phi_varid)
@@ -1391,11 +1404,13 @@ class AILSimplifier(Analysis):
1391
1404
  redundant_phi_and_dirty_varids = self._find_cyclic_dependent_phis_and_dirty_vvars(rd)
1392
1405
  for varid in redundant_phi_and_dirty_varids:
1393
1406
  loc = rd.all_vvar_definitions[rd.varid_to_vvar[varid]]
1407
+ assert loc.block_addr is not None and loc.stmt_idx is not None
1394
1408
  stmts_to_remove_per_block[(loc.block_addr, loc.block_idx)].add(loc.stmt_idx)
1395
1409
  stmts_to_keep_per_block[(loc.block_addr, loc.block_idx)].discard(loc.stmt_idx)
1396
1410
 
1397
1411
  for codeloc in self._calls_to_remove | self._assignments_to_remove:
1398
1412
  # this call can be removed. make sure it exists in stmts_to_remove_per_block
1413
+ assert codeloc.block_addr is not None and codeloc.stmt_idx is not None
1399
1414
  stmts_to_remove_per_block[codeloc.block_addr, codeloc.block_idx].add(codeloc.stmt_idx)
1400
1415
 
1401
1416
  simplified = False
@@ -1565,7 +1580,7 @@ class AILSimplifier(Analysis):
1565
1580
  if rewriter_cls is None:
1566
1581
  return False
1567
1582
 
1568
- walker = None
1583
+ walker = AILBlockWalker()
1569
1584
 
1570
1585
  class _any_update:
1571
1586
  """
@@ -1574,7 +1589,9 @@ class AILSimplifier(Analysis):
1574
1589
 
1575
1590
  v = False
1576
1591
 
1577
- def _handle_expr(expr_idx: int, expr: Expression, stmt_idx: int, stmt: Statement, block) -> Expression | None:
1592
+ def _handle_expr(
1593
+ expr_idx: int, expr: Expression, stmt_idx: int, stmt: Statement | None, block: Block | None
1594
+ ) -> Expression | None:
1578
1595
  if isinstance(expr, VEXCCallExpression):
1579
1596
  rewriter = rewriter_cls(expr, self.project.arch)
1580
1597
  if rewriter.result is not None:
@@ -1585,8 +1602,6 @@ class AILSimplifier(Analysis):
1585
1602
  return AILBlockWalker._handle_expr(walker, expr_idx, expr, stmt_idx, stmt, block)
1586
1603
 
1587
1604
  blocks_by_addr_and_idx = {(node.addr, node.idx): node for node in self.func_graph.nodes()}
1588
-
1589
- walker = AILBlockWalker()
1590
1605
  walker._handle_expr = _handle_expr
1591
1606
 
1592
1607
  updated = False