angr 9.2.141__py3-none-manylinux2014_x86_64.whl → 9.2.143__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (71) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +26 -12
  3. angr/analyses/calling_convention/fact_collector.py +31 -9
  4. angr/analyses/cfg/cfg_base.py +38 -4
  5. angr/analyses/cfg/cfg_fast.py +23 -7
  6. angr/analyses/cfg/indirect_jump_resolvers/const_resolver.py +12 -1
  7. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +8 -1
  8. angr/analyses/class_identifier.py +8 -7
  9. angr/analyses/complete_calling_conventions.py +19 -6
  10. angr/analyses/decompiler/ail_simplifier.py +138 -98
  11. angr/analyses/decompiler/clinic.py +73 -5
  12. angr/analyses/decompiler/condition_processor.py +7 -7
  13. angr/analyses/decompiler/decompilation_cache.py +2 -1
  14. angr/analyses/decompiler/decompiler.py +10 -2
  15. angr/analyses/decompiler/dephication/graph_vvar_mapping.py +4 -6
  16. angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +8 -2
  17. angr/analyses/decompiler/optimization_passes/condition_constprop.py +110 -46
  18. angr/analyses/decompiler/optimization_passes/ite_region_converter.py +8 -0
  19. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +1 -1
  20. angr/analyses/decompiler/optimization_passes/optimization_pass.py +2 -0
  21. angr/analyses/decompiler/optimization_passes/register_save_area_simplifier.py +29 -7
  22. angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +6 -0
  23. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +9 -1
  24. angr/analyses/decompiler/peephole_optimizations/simplify_pc_relative_loads.py +15 -1
  25. angr/analyses/decompiler/region_identifier.py +70 -47
  26. angr/analyses/decompiler/sequence_walker.py +8 -0
  27. angr/analyses/decompiler/ssailification/rewriting.py +47 -17
  28. angr/analyses/decompiler/ssailification/rewriting_engine.py +13 -0
  29. angr/analyses/decompiler/stack_item.py +36 -0
  30. angr/analyses/decompiler/structured_codegen/c.py +14 -9
  31. angr/analyses/decompiler/structuring/phoenix.py +3 -3
  32. angr/analyses/decompiler/utils.py +13 -0
  33. angr/analyses/find_objects_static.py +2 -1
  34. angr/analyses/reaching_definitions/engine_vex.py +13 -0
  35. angr/analyses/reaching_definitions/function_handler.py +24 -10
  36. angr/analyses/reaching_definitions/function_handler_library/stdio.py +1 -0
  37. angr/analyses/reaching_definitions/function_handler_library/stdlib.py +45 -12
  38. angr/analyses/reaching_definitions/function_handler_library/string.py +77 -21
  39. angr/analyses/reaching_definitions/function_handler_library/unistd.py +21 -1
  40. angr/analyses/reaching_definitions/rd_state.py +11 -7
  41. angr/analyses/s_liveness.py +44 -6
  42. angr/analyses/s_propagator.py +40 -29
  43. angr/analyses/s_reaching_definitions/s_rda_model.py +48 -37
  44. angr/analyses/s_reaching_definitions/s_rda_view.py +6 -3
  45. angr/analyses/s_reaching_definitions/s_reaching_definitions.py +21 -21
  46. angr/analyses/typehoon/simple_solver.py +35 -8
  47. angr/analyses/typehoon/typehoon.py +3 -1
  48. angr/analyses/variable_recovery/engine_ail.py +6 -6
  49. angr/calling_conventions.py +20 -10
  50. angr/knowledge_plugins/functions/function.py +5 -10
  51. angr/knowledge_plugins/variables/variable_manager.py +27 -0
  52. angr/procedures/definitions/__init__.py +3 -10
  53. angr/procedures/definitions/linux_kernel.py +5 -0
  54. angr/procedures/definitions/wdk_ntoskrnl.py +2 -0
  55. angr/procedures/win32_kernel/__fastfail.py +15 -0
  56. angr/sim_procedure.py +2 -2
  57. angr/simos/simos.py +14 -10
  58. angr/simos/windows.py +42 -1
  59. angr/utils/ail.py +41 -1
  60. angr/utils/cpp.py +17 -0
  61. angr/utils/doms.py +149 -0
  62. angr/utils/library.py +1 -1
  63. angr/utils/ssa/__init__.py +21 -14
  64. angr/utils/ssa/vvar_uses_collector.py +2 -2
  65. angr/utils/types.py +12 -1
  66. {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/METADATA +7 -7
  67. {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/RECORD +71 -67
  68. {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/LICENSE +0 -0
  69. {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/WHEEL +0 -0
  70. {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/entry_points.txt +0 -0
  71. {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/top_level.txt +0 -0
@@ -36,6 +36,7 @@ from angr.knowledge_plugins.key_definitions.definition import Definition
36
36
  from angr.knowledge_plugins.key_definitions.constants import OP_BEFORE
37
37
  from angr.errors import AngrRuntimeError
38
38
  from angr.analyses import Analysis, AnalysesHub
39
+ from angr.utils.timing import timethis
39
40
  from .ailgraph_walker import AILGraphWalker
40
41
  from .expression_narrower import ExprNarrowingInfo, NarrowingInfoExtractor, ExpressionNarrower
41
42
  from .block_simplifier import BlockSimplifier
@@ -118,7 +119,7 @@ class AILSimplifier(Analysis):
118
119
  self._should_rewrite_ccalls = rewrite_ccalls
119
120
  self._removed_vvar_ids = removed_vvar_ids if removed_vvar_ids is not None else set()
120
121
  self._arg_vvars = arg_vvars
121
- self._avoid_vvar_ids = avoid_vvar_ids
122
+ self._avoid_vvar_ids = avoid_vvar_ids if avoid_vvar_ids is not None else set()
122
123
  self._propagator_dead_vvar_ids: set[int] = set()
123
124
  self._secondary_stackvars: set[int] = secondary_stackvars if secondary_stackvars is not None else set()
124
125
 
@@ -132,12 +133,10 @@ class AILSimplifier(Analysis):
132
133
  def _simplify(self):
133
134
  if self._narrow_expressions:
134
135
  _l.debug("Removing dead assignments before narrowing expressions")
135
- r = self._remove_dead_assignments()
136
+ r = self._iteratively_remove_dead_assignments()
136
137
  if r:
137
138
  _l.debug("... dead assignments removed")
138
139
  self.simplified = True
139
- self._rebuild_func_graph()
140
- self._clear_cache()
141
140
 
142
141
  _l.debug("Narrowing expressions")
143
142
  narrowed_exprs = self._narrow_exprs()
@@ -170,12 +169,10 @@ class AILSimplifier(Analysis):
170
169
 
171
170
  if self._unify_vars:
172
171
  _l.debug("Removing dead assignments")
173
- r = self._remove_dead_assignments()
172
+ r = self._iteratively_remove_dead_assignments()
174
173
  if r:
175
174
  _l.debug("... dead assignments removed")
176
175
  self.simplified = True
177
- self._rebuild_func_graph()
178
- self._clear_cache()
179
176
 
180
177
  _l.debug("Unifying local variables")
181
178
  r = self._unify_local_variables()
@@ -194,11 +191,10 @@ class AILSimplifier(Analysis):
194
191
  self._clear_cache()
195
192
 
196
193
  _l.debug("Removing dead assignments")
197
- r = self._remove_dead_assignments()
194
+ r = self._iteratively_remove_dead_assignments()
198
195
  if r:
199
196
  _l.debug("... dead assignments removed")
200
197
  self.simplified = True
201
- self._rebuild_func_graph()
202
198
 
203
199
  def _rebuild_func_graph(self):
204
200
  def _handler(node):
@@ -207,6 +203,7 @@ class AILSimplifier(Analysis):
207
203
  AILGraphWalker(self.func_graph, _handler, replace_nodes=True).walk()
208
204
  self.blocks = {}
209
205
 
206
+ @timethis
210
207
  def _compute_reaching_definitions(self) -> SRDAModel:
211
208
  # Computing reaching definitions or return the cached one
212
209
  if self._reaching_definitions is not None:
@@ -222,6 +219,7 @@ class AILSimplifier(Analysis):
222
219
  self._reaching_definitions = rd
223
220
  return rd
224
221
 
222
+ @timethis
225
223
  def _compute_propagation(self) -> SPropagatorAnalysis:
226
224
  # Propagate expressions or return the existing result
227
225
  if self._propagator is not None:
@@ -238,6 +236,7 @@ class AILSimplifier(Analysis):
238
236
  self._propagator_dead_vvar_ids = prop.dead_vvar_ids
239
237
  return prop
240
238
 
239
+ @timethis
241
240
  def _compute_equivalence(self) -> set[Equivalence]:
242
241
  equivalence = set()
243
242
  for block in self.func_graph:
@@ -286,6 +285,7 @@ class AILSimplifier(Analysis):
286
285
  # Expression narrowing
287
286
  #
288
287
 
288
+ @timethis
289
289
  def _narrow_exprs(self) -> bool:
290
290
  """
291
291
  A register may be used with full width even when only the lower bytes are really needed. This results in the
@@ -516,9 +516,9 @@ class AILSimplifier(Analysis):
516
516
  atom = atom_queue.pop(0)
517
517
  seen.add(atom)
518
518
 
519
- use_and_exprs = rd.get_vvar_uses_with_expr(atom)
519
+ expr_and_uses = rd.all_vvar_uses[atom.varid]
520
520
 
521
- for loc, expr in use_and_exprs:
521
+ for expr, loc in set(expr_and_uses):
522
522
  old_block = block_dict.get((loc.block_addr, loc.block_idx), None)
523
523
  if old_block is None:
524
524
  # missing a block for whatever reason
@@ -537,6 +537,7 @@ class AILSimplifier(Analysis):
537
537
  )
538
538
  if new_atom not in seen:
539
539
  atom_queue.append(new_atom)
540
+ seen.add(new_atom)
540
541
  else:
541
542
  result.append((atom, loc, expr))
542
543
  return result, phi_vars
@@ -664,6 +665,7 @@ class AILSimplifier(Analysis):
664
665
  # Unifying local variables
665
666
  #
666
667
 
668
+ @timethis
667
669
  def _unify_local_variables(self) -> bool:
668
670
  """
669
671
  Find variables that are definitely equivalent and then eliminate unnecessary copies.
@@ -827,14 +829,14 @@ class AILSimplifier(Analysis):
827
829
  continue
828
830
 
829
831
  # find all its uses
830
- all_arg_copy_var_uses: set[tuple[CodeLocation, Any]] = set(
831
- rd.get_vvar_uses_with_expr(arg_copy_def.atom)
832
+ all_arg_copy_var_uses: set[tuple[Any, CodeLocation]] = rd.get_vvar_uses_with_expr(
833
+ arg_copy_def.atom
832
834
  )
833
835
  all_uses_with_def = set()
834
836
 
835
837
  should_abort = False
836
838
  for use in all_arg_copy_var_uses:
837
- used_expr = use[1]
839
+ used_expr = use[0]
838
840
  if used_expr is not None and used_expr.size != arg_copy_def.size:
839
841
  should_abort = True
840
842
  break
@@ -929,15 +931,19 @@ class AILSimplifier(Analysis):
929
931
 
930
932
  # find all uses of this definition
931
933
  # we make a copy of the set since we may touch the set (uses) when replacing expressions
932
- all_uses: set[tuple[CodeLocation, Any]] = set(rd.get_vvar_uses_with_expr(to_replace_def.atom))
934
+ all_uses: set[tuple[Any, CodeLocation]] = set(rd.all_vvar_uses[to_replace_def.atom.varid])
933
935
  # make sure none of these uses are phi nodes (depends on more than one def)
934
936
  all_uses_with_unique_def = set()
935
- for use_and_expr in all_uses:
936
- use_loc, used_expr = use_and_expr
937
+ for expr_and_use in all_uses:
938
+ used_expr, use_loc = expr_and_use
937
939
  defs_and_exprs = rd.get_uses_by_location(use_loc, exprs=True)
938
- filtered_defs = {def_ for def_, expr_ in defs_and_exprs if expr_ == used_expr}
940
+ filtered_defs = {
941
+ def_
942
+ for def_, expr_ in defs_and_exprs
943
+ if expr_ is not None and used_expr is not None and expr_.varid == used_expr.varid
944
+ }
939
945
  if len(filtered_defs) == 1:
940
- all_uses_with_unique_def.add(use_and_expr)
946
+ all_uses_with_unique_def.add(expr_and_use)
941
947
  else:
942
948
  # optimization: break early
943
949
  break
@@ -952,7 +958,7 @@ class AILSimplifier(Analysis):
952
958
 
953
959
  if not (isinstance(replace_with, VirtualVariable) and replace_with.was_parameter):
954
960
  assignment_ctr = 0
955
- all_use_locs = {use_loc for use_loc, _ in all_uses}
961
+ all_use_locs = {use_loc for _, use_loc in all_uses}
956
962
  for use_loc in all_use_locs:
957
963
  if use_loc == eq.codeloc:
958
964
  continue
@@ -965,17 +971,17 @@ class AILSimplifier(Analysis):
965
971
  if assignment_ctr > 1:
966
972
  continue
967
973
 
968
- all_uses_with_def = {(to_replace_def, use_and_expr) for use_and_expr in all_uses}
974
+ all_uses_with_def = {(to_replace_def, expr_and_use) for expr_and_use in all_uses}
969
975
 
970
976
  remove_initial_assignment = False # expression folding will take care of it
971
977
 
972
978
  assert replace_with is not None
973
979
 
974
- if any(not isinstance(use_and_expr[1], VirtualVariable) for _, use_and_expr in all_uses_with_def):
980
+ if any(not isinstance(expr_and_use[0], VirtualVariable) for _, expr_and_use in all_uses_with_def):
975
981
  # if any of the uses are phi assignments, we skip
976
982
  used_in_phi_assignment = False
977
- for _, use_and_expr in all_uses_with_def:
978
- u = use_and_expr[0]
983
+ for _, expr_and_use in all_uses_with_def:
984
+ u = expr_and_use[1]
979
985
  assert u.block_addr is not None
980
986
  assert u.stmt_idx is not None
981
987
  block = addr_and_idx_to_block[(u.block_addr, u.block_idx)]
@@ -988,8 +994,8 @@ class AILSimplifier(Analysis):
988
994
 
989
995
  # ensure the uses we consider are all after the eq location
990
996
  filtered_all_uses_with_def = []
991
- for def_, use_and_expr in all_uses_with_def:
992
- u = use_and_expr[0]
997
+ for def_, expr_and_use in all_uses_with_def:
998
+ u = expr_and_use[1]
993
999
  if (
994
1000
  u.block_addr == eq.codeloc.block_addr
995
1001
  and u.block_idx == eq.codeloc.block_idx
@@ -997,7 +1003,7 @@ class AILSimplifier(Analysis):
997
1003
  ):
998
1004
  # this use happens before the assignment - ignore it
999
1005
  continue
1000
- filtered_all_uses_with_def.append((def_, use_and_expr))
1006
+ filtered_all_uses_with_def.append((def_, expr_and_use))
1001
1007
  all_uses_with_def = filtered_all_uses_with_def
1002
1008
 
1003
1009
  if not all_uses_with_def:
@@ -1009,8 +1015,8 @@ class AILSimplifier(Analysis):
1009
1015
 
1010
1016
  # replace all uses
1011
1017
  all_uses_replaced = True
1012
- for def_, use_and_expr in all_uses_with_def:
1013
- u, used_expr = use_and_expr
1018
+ for def_, expr_and_use in all_uses_with_def:
1019
+ used_expr, u = expr_and_use
1014
1020
 
1015
1021
  use_expr_defns = []
1016
1022
  for d in rd.get_uses_by_location(u):
@@ -1115,6 +1121,7 @@ class AILSimplifier(Analysis):
1115
1121
  walker.walk_statement(stmt)
1116
1122
  return len(walker.temps) > 0
1117
1123
 
1124
+ @timethis
1118
1125
  def _fold_call_exprs(self) -> bool:
1119
1126
  """
1120
1127
  Fold a call expression (statement) into other statements if the return value of the call expression (statement)
@@ -1188,11 +1195,11 @@ class AILSimplifier(Analysis):
1188
1195
  assert the_def.codeloc.block_addr is not None
1189
1196
  assert the_def.codeloc.stmt_idx is not None
1190
1197
 
1191
- all_uses: set[tuple[CodeLocation, Any]] = set(rd.get_vvar_uses_with_expr(the_def.atom))
1198
+ all_uses: set[tuple[Any, CodeLocation]] = rd.get_vvar_uses_with_expr(the_def.atom)
1192
1199
 
1193
1200
  if len(all_uses) != 1:
1194
1201
  continue
1195
- u, used_expr = next(iter(all_uses))
1202
+ used_expr, u = next(iter(all_uses))
1196
1203
  if used_expr is None:
1197
1204
  continue
1198
1205
  assert u.block_addr is not None
@@ -1319,14 +1326,29 @@ class AILSimplifier(Analysis):
1319
1326
 
1320
1327
  return False, None
1321
1328
 
1329
+ @timethis
1330
+ def _iteratively_remove_dead_assignments(self) -> bool:
1331
+ anything_removed = False
1332
+ while True:
1333
+ r = self._remove_dead_assignments()
1334
+ if not r:
1335
+ return anything_removed
1336
+ self._rebuild_func_graph()
1337
+ self._clear_cache()
1338
+
1339
+ @timethis
1322
1340
  def _remove_dead_assignments(self) -> bool:
1323
1341
 
1324
1342
  # keeping tracking of statements to remove and statements (as well as dead vvars) to keep allows us to handle
1325
- # cases where a statement defines more than one atoms, e.g., a call statement that defines both the return
1343
+ # cases where a statement defines more than one atom, e.g., a call statement that defines both the return
1326
1344
  # value and the floating-point return value.
1327
1345
  stmts_to_remove_per_block: dict[tuple[int, int | None], set[int]] = defaultdict(set)
1328
1346
  stmts_to_keep_per_block: dict[tuple[int, int | None], set[int]] = defaultdict(set)
1329
- dead_vvar_ids: set[int] = set()
1347
+ dead_vvar_ids: set[int] = self._removed_vvar_ids.copy()
1348
+ dead_vvar_codelocs: set[CodeLocation] = set()
1349
+ blocks: dict[tuple[int, int | None], Block] = {
1350
+ (node.addr, node.idx): self.blocks.get(node, node) for node in self.func_graph.nodes()
1351
+ }
1330
1352
 
1331
1353
  # Find all statements that should be removed
1332
1354
  mask = (1 << self.project.arch.bits) - 1
@@ -1335,85 +1357,96 @@ class AILSimplifier(Analysis):
1335
1357
  stackarg_offsets = (
1336
1358
  {(tpl[1] & mask) for tpl in self._stack_arg_offsets} if self._stack_arg_offsets is not None else None
1337
1359
  )
1338
- for def_ in rd.all_definitions:
1339
- if def_.dummy:
1340
- continue
1341
- # we do not remove references to global memory regions no matter what
1342
- if isinstance(def_.atom, atoms.MemoryLocation) and isinstance(def_.atom.addr, int):
1343
- continue
1344
- if isinstance(def_.atom, atoms.VirtualVariable):
1345
- if def_.atom.varid in self._propagator_dead_vvar_ids:
1360
+
1361
+ while True:
1362
+ new_dead_vars_found = False
1363
+
1364
+ # traverse all virtual variable definitions
1365
+ for vvar_id, codeloc in rd.all_vvar_definitions.items():
1366
+ if vvar_id in dead_vvar_ids:
1367
+ continue
1368
+ uses = None
1369
+ if vvar_id in self._propagator_dead_vvar_ids:
1346
1370
  # we are definitely removing this variable if it has no uses
1347
- uses = rd.get_vvar_uses(def_.atom)
1348
- elif def_.atom.was_stack:
1349
- if not self._remove_dead_memdefs:
1350
- if rd.is_phi_vvar_id(def_.atom.varid):
1351
- # we always remove unused phi variables
1352
- pass
1353
- elif def_.atom.varid in self._secondary_stackvars:
1354
- # secondary stack variables are potentially removable
1355
- pass
1356
- elif stackarg_offsets is not None:
1357
- # we always remove definitions for stack arguments
1358
- assert def_.atom.stack_offset is not None
1359
- if (def_.atom.stack_offset & mask) not in stackarg_offsets:
1371
+ uses = rd.all_vvar_uses[vvar_id]
1372
+
1373
+ if uses is None:
1374
+ vvar = rd.varid_to_vvar[vvar_id]
1375
+ if vvar.was_stack:
1376
+ if not self._remove_dead_memdefs:
1377
+ if rd.is_phi_vvar_id(vvar_id):
1378
+ # we always remove unused phi variables
1379
+ pass
1380
+ elif vvar_id in self._secondary_stackvars:
1381
+ # secondary stack variables are potentially removable
1382
+ pass
1383
+ elif stackarg_offsets is not None:
1384
+ # we always remove definitions for stack arguments
1385
+ assert vvar.stack_offset is not None
1386
+ if (vvar.stack_offset & mask) not in stackarg_offsets:
1387
+ continue
1388
+ else:
1360
1389
  continue
1361
- else:
1362
- continue
1363
- uses = rd.get_vvar_uses(def_.atom)
1390
+ uses = rd.all_vvar_uses[vvar_id]
1364
1391
 
1365
- elif def_.atom.was_tmp or def_.atom.was_reg or def_.atom.was_parameter:
1366
- uses = rd.get_vvar_uses(def_.atom)
1392
+ elif vvar.was_tmp or vvar.was_reg or vvar.was_parameter:
1393
+ uses = rd.all_vvar_uses[vvar_id]
1367
1394
 
1395
+ else:
1396
+ uses = set()
1397
+
1398
+ # remove uses where vvars are going to be removed
1399
+ filtered_uses_count = 0
1400
+ for _, loc in uses:
1401
+ if loc in dead_vvar_codelocs and loc.block_addr is not None and loc.stmt_idx is not None:
1402
+ stmt = blocks[(loc.block_addr, loc.block_idx)].statements[loc.stmt_idx]
1403
+ if not self._statement_has_call_exprs(stmt) and not isinstance(stmt, (DirtyStatement, Call)):
1404
+ continue
1405
+ filtered_uses_count += 1
1406
+
1407
+ if filtered_uses_count == 0:
1408
+ new_dead_vars_found = True
1409
+ dead_vvar_ids.add(vvar_id)
1410
+ dead_vvar_codelocs.add(codeloc)
1411
+ if not isinstance(codeloc, ExternalCodeLocation):
1412
+ assert codeloc.block_addr is not None
1413
+ assert codeloc.stmt_idx is not None
1414
+ stmts_to_remove_per_block[(codeloc.block_addr, codeloc.block_idx)].add(codeloc.stmt_idx)
1415
+ stmts_to_keep_per_block[(codeloc.block_addr, codeloc.block_idx)].discard(codeloc.stmt_idx)
1368
1416
  else:
1369
- uses = set()
1370
-
1371
- else:
1372
- continue
1373
-
1374
- if not uses:
1375
- if isinstance(def_.atom, atoms.VirtualVariable):
1376
- dead_vvar_ids.add(def_.atom.varid)
1417
+ if not isinstance(codeloc, ExternalCodeLocation):
1418
+ assert codeloc.block_addr is not None
1419
+ assert codeloc.stmt_idx is not None
1420
+ stmts_to_keep_per_block[(codeloc.block_addr, codeloc.block_idx)].add(codeloc.stmt_idx)
1377
1421
 
1378
- if not isinstance(def_.codeloc, ExternalCodeLocation):
1379
- assert def_.codeloc.block_addr is not None
1380
- assert def_.codeloc.stmt_idx is not None
1381
- stmts_to_remove_per_block[(def_.codeloc.block_addr, def_.codeloc.block_idx)].add(
1382
- def_.codeloc.stmt_idx
1383
- )
1384
- else:
1385
- if not isinstance(def_.codeloc, ExternalCodeLocation):
1386
- assert def_.codeloc.block_addr is not None
1387
- assert def_.codeloc.stmt_idx is not None
1388
- stmts_to_keep_per_block[(def_.codeloc.block_addr, def_.codeloc.block_idx)].add(
1389
- def_.codeloc.stmt_idx
1390
- )
1422
+ if not new_dead_vars_found:
1423
+ # nothing more is found. let's end the loop
1424
+ break
1391
1425
 
1392
1426
  # find all phi variables that rely on variables that no longer exist
1393
- all_removed_var_ids = self._removed_vvar_ids.copy()
1394
1427
  removed_vvar_ids = self._removed_vvar_ids
1395
1428
  while True:
1396
1429
  new_removed_vvar_ids = set()
1397
1430
  for phi_varid, phi_use_varids in rd.phivarid_to_varids.items():
1398
- if phi_varid not in all_removed_var_ids and any(
1399
- vvarid in removed_vvar_ids for vvarid in phi_use_varids
1400
- ):
1401
- loc = rd.all_vvar_definitions[rd.varid_to_vvar[phi_varid]]
1431
+ if phi_varid not in dead_vvar_ids and any(vvarid in removed_vvar_ids for vvarid in phi_use_varids):
1432
+ loc = rd.all_vvar_definitions[phi_varid]
1402
1433
  assert loc.block_addr is not None and loc.stmt_idx is not None
1403
- stmts_to_remove_per_block[(loc.block_addr, loc.block_idx)].add(loc.stmt_idx)
1404
- new_removed_vvar_ids.add(phi_varid)
1405
- all_removed_var_ids.add(phi_varid)
1434
+ if loc.stmt_idx not in stmts_to_remove_per_block[(loc.block_addr, loc.block_idx)]:
1435
+ stmts_to_remove_per_block[(loc.block_addr, loc.block_idx)].add(loc.stmt_idx)
1436
+ new_removed_vvar_ids.add(phi_varid)
1437
+ dead_vvar_ids.add(phi_varid)
1406
1438
  if not new_removed_vvar_ids:
1407
1439
  break
1408
1440
  removed_vvar_ids = new_removed_vvar_ids
1409
1441
 
1410
1442
  # find all phi variables that are only ever used by other phi variables
1411
- redundant_phi_and_dirty_varids = self._find_cyclic_dependent_phis_and_dirty_vvars(rd)
1443
+ redundant_phi_and_dirty_varids = self._find_cyclic_dependent_phis_and_dirty_vvars(rd, dead_vvar_ids)
1412
1444
  for varid in redundant_phi_and_dirty_varids:
1413
- loc = rd.all_vvar_definitions[rd.varid_to_vvar[varid]]
1445
+ loc = rd.all_vvar_definitions[varid]
1414
1446
  assert loc.block_addr is not None and loc.stmt_idx is not None
1415
- stmts_to_remove_per_block[(loc.block_addr, loc.block_idx)].add(loc.stmt_idx)
1416
- stmts_to_keep_per_block[(loc.block_addr, loc.block_idx)].discard(loc.stmt_idx)
1447
+ if loc.stmt_idx not in stmts_to_remove_per_block[(loc.block_addr, loc.block_idx)]:
1448
+ stmts_to_remove_per_block[(loc.block_addr, loc.block_idx)].add(loc.stmt_idx)
1449
+ stmts_to_keep_per_block[(loc.block_addr, loc.block_idx)].discard(loc.stmt_idx)
1417
1450
 
1418
1451
  for codeloc in self._calls_to_remove | self._assignments_to_remove:
1419
1452
  # this call can be removed. make sure it exists in stmts_to_remove_per_block
@@ -1462,11 +1495,13 @@ class AILSimplifier(Analysis):
1462
1495
  if codeloc in self._assignments_to_remove:
1463
1496
  # it should be removed
1464
1497
  simplified = True
1498
+ self._assignments_to_remove.discard(codeloc)
1465
1499
  continue
1466
1500
 
1467
1501
  if self._statement_has_call_exprs(stmt):
1468
1502
  if codeloc in self._calls_to_remove:
1469
1503
  # it has a call and must be removed
1504
+ self._calls_to_remove.discard(codeloc)
1470
1505
  simplified = True
1471
1506
  continue
1472
1507
  if isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable):
@@ -1489,6 +1524,7 @@ class AILSimplifier(Analysis):
1489
1524
  codeloc = CodeLocation(block.addr, idx, ins_addr=stmt.ins_addr, block_idx=block.idx)
1490
1525
  if codeloc in self._calls_to_remove:
1491
1526
  # this call can be removed
1527
+ self._calls_to_remove.discard(codeloc)
1492
1528
  simplified = True
1493
1529
  continue
1494
1530
 
@@ -1523,9 +1559,8 @@ class AILSimplifier(Analysis):
1523
1559
  :return: The set of vvar use atoms.
1524
1560
  """
1525
1561
 
1526
- vvar = rd.varid_to_vvar[vvar_id]
1527
1562
  used_by: set[int | None] = set()
1528
- for used_vvar, loc in rd.all_vvar_uses[vvar]:
1563
+ for used_vvar, loc in rd.all_vvar_uses[vvar_id]:
1529
1564
  if used_vvar is None:
1530
1565
  # no explicit reference
1531
1566
  used_by.add(None)
@@ -1538,7 +1573,7 @@ class AILSimplifier(Analysis):
1538
1573
  used_by.add(None)
1539
1574
  return used_by
1540
1575
 
1541
- def _find_cyclic_dependent_phis_and_dirty_vvars(self, rd: SRDAModel) -> set[int]:
1576
+ def _find_cyclic_dependent_phis_and_dirty_vvars(self, rd: SRDAModel, dead_vvar_ids: set[int]) -> set[int]:
1542
1577
  blocks_dict: dict[tuple[int, int | None], Block] = {(bb.addr, bb.idx): bb for bb in self.func_graph}
1543
1578
 
1544
1579
  # find dirty vvars and vexccall vvars
@@ -1553,16 +1588,21 @@ class AILSimplifier(Analysis):
1553
1588
  ):
1554
1589
  dirty_vvar_ids.add(stmt.dst.varid)
1555
1590
 
1556
- phi_and_dirty_vvar_ids = rd.phi_vvar_ids | dirty_vvar_ids
1591
+ phi_and_dirty_vvar_ids = (rd.phi_vvar_ids | dirty_vvar_ids).difference(dead_vvar_ids)
1557
1592
 
1558
1593
  vvar_used_by: dict[int, set[int | None]] = defaultdict(set)
1559
1594
  for var_id in phi_and_dirty_vvar_ids:
1560
1595
  if var_id in rd.phivarid_to_varids:
1561
1596
  for used_by_varid in rd.phivarid_to_varids[var_id]:
1597
+ if used_by_varid in dead_vvar_ids:
1598
+ # this variable no longer exists
1599
+ continue
1562
1600
  if used_by_varid not in vvar_used_by:
1563
- vvar_used_by[used_by_varid] |= self._get_vvar_used_by(used_by_varid, rd, blocks_dict)
1601
+ vvar_used_by[used_by_varid] |= self._get_vvar_used_by(
1602
+ used_by_varid, rd, blocks_dict
1603
+ ).difference(dead_vvar_ids)
1564
1604
  vvar_used_by[used_by_varid].add(var_id) # probably unnecessary
1565
- vvar_used_by[var_id] |= self._get_vvar_used_by(var_id, rd, blocks_dict)
1605
+ vvar_used_by[var_id] |= self._get_vvar_used_by(var_id, rd, blocks_dict).difference(dead_vvar_ids)
1566
1606
 
1567
1607
  g = networkx.DiGraph()
1568
1608
  dummy_vvar_id = -1