angr 9.2.140__py3-none-manylinux2014_x86_64.whl → 9.2.141__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (40) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +88 -32
  3. angr/analyses/calling_convention/fact_collector.py +44 -18
  4. angr/analyses/calling_convention/utils.py +3 -1
  5. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +9 -8
  6. angr/analyses/decompiler/ail_simplifier.py +48 -20
  7. angr/analyses/decompiler/callsite_maker.py +24 -11
  8. angr/analyses/decompiler/clinic.py +10 -0
  9. angr/analyses/decompiler/decompiler.py +1 -0
  10. angr/analyses/decompiler/optimization_passes/duplication_reverter/duplication_reverter.py +3 -1
  11. angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py +21 -2
  12. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +84 -15
  13. angr/analyses/decompiler/optimization_passes/optimization_pass.py +76 -1
  14. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +51 -7
  15. angr/analyses/decompiler/peephole_optimizations/eager_eval.py +44 -7
  16. angr/analyses/decompiler/region_identifier.py +6 -4
  17. angr/analyses/decompiler/region_simplifiers/expr_folding.py +32 -18
  18. angr/analyses/decompiler/region_simplifiers/region_simplifier.py +4 -1
  19. angr/analyses/decompiler/ssailification/rewriting.py +23 -15
  20. angr/analyses/decompiler/ssailification/rewriting_engine.py +105 -24
  21. angr/analyses/decompiler/ssailification/ssailification.py +22 -14
  22. angr/analyses/decompiler/structured_codegen/c.py +73 -137
  23. angr/analyses/decompiler/structuring/dream.py +1 -1
  24. angr/analyses/decompiler/structuring/phoenix.py +6 -1
  25. angr/analyses/decompiler/structuring/structurer_base.py +2 -1
  26. angr/analyses/decompiler/utils.py +46 -20
  27. angr/analyses/s_reaching_definitions/s_rda_view.py +43 -25
  28. angr/analyses/variable_recovery/engine_ail.py +1 -1
  29. angr/analyses/variable_recovery/engine_vex.py +20 -4
  30. angr/calling_conventions.py +15 -10
  31. angr/factory.py +8 -3
  32. angr/knowledge_plugins/variables/variable_manager.py +7 -5
  33. angr/simos/simos.py +3 -1
  34. angr/utils/types.py +48 -0
  35. {angr-9.2.140.dist-info → angr-9.2.141.dist-info}/METADATA +6 -6
  36. {angr-9.2.140.dist-info → angr-9.2.141.dist-info}/RECORD +40 -39
  37. {angr-9.2.140.dist-info → angr-9.2.141.dist-info}/LICENSE +0 -0
  38. {angr-9.2.140.dist-info → angr-9.2.141.dist-info}/WHEEL +0 -0
  39. {angr-9.2.140.dist-info → angr-9.2.141.dist-info}/entry_points.txt +0 -0
  40. {angr-9.2.140.dist-info → angr-9.2.141.dist-info}/top_level.txt +0 -0
angr/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
2
  # pylint: disable=wrong-import-position
3
3
  from __future__ import annotations
4
4
 
5
- __version__ = "9.2.140"
5
+ __version__ = "9.2.141"
6
6
 
7
7
  if bytes is str:
8
8
  raise Exception(
@@ -33,6 +33,7 @@ from angr.knowledge_plugins.key_definitions.rd_model import ReachingDefinitionsM
33
33
  from angr.knowledge_plugins.variables.variable_access import VariableAccessSort
34
34
  from angr.knowledge_plugins.functions import Function
35
35
  from angr.utils.constants import DEFAULT_STATEMENT
36
+ from angr.utils.ssa import get_reg_offset_base_and_size, get_reg_offset_base
36
37
  from angr import SIM_PROCEDURES
37
38
  from angr.analyses import Analysis, register_analysis, ReachingDefinitionsAnalysis
38
39
  from angr.analyses.reaching_definitions import get_all_definitions
@@ -264,7 +265,7 @@ class CallingConventionAnalysis(Analysis):
264
265
  self.cc = cc
265
266
  self.prototype = prototype
266
267
 
267
- def _analyze_plt(self) -> tuple[SimCC, SimTypeFunction] | None:
268
+ def _analyze_plt(self) -> tuple[SimCC, SimTypeFunction | None] | None:
268
269
  """
269
270
  Get the calling convention for a PLT stub.
270
271
 
@@ -296,6 +297,14 @@ class CallingConventionAnalysis(Analysis):
296
297
  real_func = None
297
298
 
298
299
  if real_func is not None:
300
+ if real_func.calling_convention is None:
301
+ cc_cls = default_cc(self.project.arch.name)
302
+ if cc_cls is None:
303
+ # can't determine the default calling convention for this architecture
304
+ return None
305
+ cc = cc_cls(self.project.arch)
306
+ else:
307
+ cc = real_func.calling_convention
299
308
  if real_func.is_simprocedure:
300
309
  if self.project.is_hooked(real_func.addr):
301
310
  # prioritize the hooker
@@ -303,17 +312,20 @@ class CallingConventionAnalysis(Analysis):
303
312
  if hooker is not None and (
304
313
  not hooker.is_stub or (hooker.is_function and not hooker.guessed_prototype)
305
314
  ):
306
- return real_func.calling_convention, hooker.prototype
307
- if real_func.calling_convention and real_func.prototype:
308
- return real_func.calling_convention, real_func.prototype
315
+ return cc, hooker.prototype
316
+ if real_func.prototype is not None:
317
+ return cc, real_func.prototype
309
318
  else:
310
- return real_func.calling_convention, real_func.prototype
319
+ return cc, real_func.prototype
311
320
 
312
321
  if self.analyze_callsites:
313
322
  # determine the calling convention by analyzing its callsites
314
323
  callsite_facts = self._extract_and_analyze_callsites(max_analyzing_callsites=1)
315
324
  cc_cls = default_cc(self.project.arch.name)
316
- cc = cc_cls(self.project.arch) if cc_cls is not None else None
325
+ if cc_cls is None:
326
+ # can't determine the default calling convention for this architecture
327
+ return None
328
+ cc = cc_cls(self.project.arch)
317
329
  prototype = SimTypeFunction([], None)
318
330
  prototype = self._adjust_prototype(
319
331
  prototype, callsite_facts, update_arguments=UpdateArgumentsOption.AlwaysUpdate
@@ -342,7 +354,7 @@ class CallingConventionAnalysis(Analysis):
342
354
  input_variables = vm.input_variables()
343
355
  input_args = self._args_from_vars(input_variables, vm)
344
356
  else:
345
- input_args = self._input_args
357
+ input_args = set(self._input_args)
346
358
  retval_size = self._retval_size
347
359
 
348
360
  # check if this function is a variadic function
@@ -355,8 +367,14 @@ class CallingConventionAnalysis(Analysis):
355
367
  # TODO: properly determine sp_delta
356
368
  sp_delta = self.project.arch.bytes if self.project.arch.call_pushes_ret else 0
357
369
 
358
- input_args = list(input_args) # input_args might be modified by find_cc()
359
- cc = SimCC.find_cc(self.project.arch, input_args, sp_delta, platform=self.project.simos.name)
370
+ full_input_args = self._consolidate_input_args(input_args)
371
+ full_input_args_copy = list(full_input_args) # input_args might be modified by find_cc()
372
+ cc = SimCC.find_cc(self.project.arch, full_input_args_copy, sp_delta, platform=self.project.simos.name)
373
+
374
+ # update input_args according to the difference between full_input_args and full_input_args_copy
375
+ for a in full_input_args:
376
+ if a not in full_input_args_copy and a in input_args:
377
+ input_args.remove(a)
360
378
 
361
379
  if cc is None:
362
380
  l.warning(
@@ -657,12 +675,6 @@ class CallingConventionAnalysis(Analysis):
657
675
  else:
658
676
  break
659
677
 
660
- if None in temp_args:
661
- first_none_idx = temp_args.index(None)
662
- # test if there is at least one argument set after None; if so, we ignore the first None
663
- if any(arg is not None for arg in temp_args[first_none_idx:]):
664
- temp_args[first_none_idx] = expected_args[first_none_idx]
665
-
666
678
  if None in temp_args:
667
679
  # we be very conservative here and ignore all arguments starting from the first missing one
668
680
  first_none_idx = temp_args.index(None)
@@ -681,17 +693,18 @@ class CallingConventionAnalysis(Analysis):
681
693
  if all(fact.return_value_used is False for fact in facts):
682
694
  proto.returnty = SimTypeBottom(label="void")
683
695
  else:
684
- proto.returnty = SimTypeInt().with_arch(self.project.arch)
696
+ if proto.returnty is None or isinstance(proto.returnty, SimTypeBottom):
697
+ proto.returnty = SimTypeInt().with_arch(self.project.arch)
685
698
 
686
699
  if (
687
700
  update_arguments == UpdateArgumentsOption.AlwaysUpdate
688
701
  or (update_arguments == UpdateArgumentsOption.UpdateWhenCCHasNoArgs and not proto.args)
689
702
  ) and len({len(fact.args) for fact in facts}) == 1:
690
703
  fact = next(iter(facts))
691
- proto.args = [
704
+ proto.args = tuple(
692
705
  self._guess_arg_type(arg) if arg is not None else SimTypeInt().with_arch(self.project.arch)
693
706
  for arg in fact.args
694
- ]
707
+ )
695
708
 
696
709
  return proto
697
710
 
@@ -730,13 +743,8 @@ class CallingConventionAnalysis(Analysis):
730
743
  # a register variable, convert it to a register argument
731
744
  if not is_sane_register_variable(self.project.arch, variable.reg, variable.size, def_cc=def_cc):
732
745
  continue
733
- if self.project.arch.name in {"AMD64", "X86"} and variable.size < self.project.arch.bytes:
734
- # use complete registers on AMD64 and X86
735
- reg_name = self.project.arch.translate_register_name(variable.reg, size=self.project.arch.bytes)
736
- arg = SimRegArg(reg_name, self.project.arch.bytes)
737
- else:
738
- reg_name = self.project.arch.translate_register_name(variable.reg, size=variable.size)
739
- arg = SimRegArg(reg_name, variable.size)
746
+ reg_name = self.project.arch.translate_register_name(variable.reg, size=variable.size)
747
+ arg = SimRegArg(reg_name, variable.size)
740
748
  args.add(arg)
741
749
 
742
750
  accesses = var_manager.get_variable_accesses(variable)
@@ -778,15 +786,58 @@ class CallingConventionAnalysis(Analysis):
778
786
 
779
787
  return args.difference(restored_reg_vars)
780
788
 
781
- def _reorder_args(self, args: list[SimRegArg | SimStackArg], cc: SimCC) -> list[SimRegArg | SimStackArg]:
789
+ def _consolidate_input_args(self, input_args: set[SimRegArg | SimStackArg]) -> set[SimRegArg | SimStackArg]:
790
+ """
791
+ Consolidate register arguments by converting partial registers to full registers on certain architectures.
792
+
793
+ :param input_args: A set of input arguments.
794
+ :return: A set of consolidated input args.
795
+ """
796
+
797
+ if self.project.arch.name in {"AMD64", "X86"}:
798
+ new_input_args = set()
799
+ for a in input_args:
800
+ if isinstance(a, SimRegArg) and a.size < self.project.arch.bytes:
801
+ # use complete registers on AMD64 and X86
802
+ reg_offset, reg_size = self.project.arch.registers[a.reg_name]
803
+ full_reg_offset, full_reg_size = get_reg_offset_base_and_size(
804
+ reg_offset, self.project.arch, size=reg_size
805
+ )
806
+ full_reg_name = self.project.arch.translate_register_name(full_reg_offset, size=full_reg_size)
807
+ arg = SimRegArg(full_reg_name, full_reg_size)
808
+ if arg not in new_input_args:
809
+ new_input_args.add(arg)
810
+ else:
811
+ new_input_args.add(a)
812
+ return new_input_args
813
+
814
+ return input_args
815
+
816
+ def _reorder_args(self, args: set[SimRegArg | SimStackArg], cc: SimCC) -> list[SimRegArg | SimStackArg]:
782
817
  """
783
818
  Reorder arguments according to the calling convention identified.
784
819
 
785
- :param args: A list of arguments that haven't been ordered.
820
+ :param args: A set of arguments that haven't been ordered.
786
821
  :param cc: The identified calling convention.
787
822
  :return: A reordered list of args.
788
823
  """
789
824
 
825
+ def _is_same_reg(rn0: str, rn1: str) -> bool:
826
+ """
827
+ Check if rn0 and rn1 belong to the same base register.
828
+
829
+ :param rn0: Register name of the first register.
830
+ :param rn1: Register name of the second register.
831
+ :return: True if they belong to the same base register; False otherwise.
832
+ """
833
+ if rn0 == rn1:
834
+ return True
835
+ off0, sz0 = self.project.arch.registers[rn0]
836
+ full_off0 = get_reg_offset_base(off0, self.project.arch, sz0)
837
+ off1, sz1 = self.project.arch.registers[rn1]
838
+ full_off1 = get_reg_offset_base(off1, self.project.arch, sz1)
839
+ return full_off0 == full_off1
840
+
790
841
  reg_args = []
791
842
 
792
843
  # split args into two lists
@@ -805,7 +856,7 @@ class CallingConventionAnalysis(Analysis):
805
856
  # match int args first
806
857
  for reg_name in cc.ARG_REGS:
807
858
  try:
808
- arg = next(iter(a for a in int_args if isinstance(a, SimRegArg) and a.reg_name == reg_name))
859
+ arg = next(iter(a for a in int_args if isinstance(a, SimRegArg) and _is_same_reg(a.reg_name, reg_name)))
809
860
  except StopIteration:
810
861
  # have we reached the end of the args list?
811
862
  if [a for a in int_args if isinstance(a, SimRegArg)] or len(stack_int_args) > 0:
@@ -821,7 +872,9 @@ class CallingConventionAnalysis(Analysis):
821
872
  if fp_args:
822
873
  for reg_name in cc.FP_ARG_REGS:
823
874
  try:
824
- arg = next(iter(a for a in fp_args if isinstance(a, SimRegArg) and a.reg_name == reg_name))
875
+ arg = next(
876
+ iter(a for a in fp_args if isinstance(a, SimRegArg) and _is_same_reg(a.reg_name, reg_name))
877
+ )
825
878
  except StopIteration:
826
879
  # have we reached the end of the args list?
827
880
  if [a for a in fp_args if isinstance(a, SimRegArg)] or len(stack_fp_args) > 0:
@@ -886,12 +939,15 @@ class CallingConventionAnalysis(Analysis):
886
939
  if 5 <= ret_val_size <= 8:
887
940
  return SimTypeLongLong()
888
941
 
889
- # fallback
890
- return SimTypeInt() if cc.arch.bits == 32 else SimTypeLongLong()
942
+ return SimTypeBottom(label="void")
891
943
 
892
944
  @staticmethod
893
945
  def _likely_saving_temp_reg(ail_block: ailment.Block, d: Definition, all_reg_defs: set[Definition]) -> bool:
894
- if d.codeloc.block_addr == ail_block.addr and d.codeloc.stmt_idx < len(ail_block.statements):
946
+ if (
947
+ d.codeloc.block_addr == ail_block.addr
948
+ and d.codeloc.stmt_idx is not None
949
+ and d.codeloc.stmt_idx < len(ail_block.statements)
950
+ ):
895
951
  stmt = ail_block.statements[d.codeloc.stmt_idx]
896
952
  if isinstance(stmt, ailment.Stmt.Assignment) and isinstance(stmt.src, ailment.Expr.Register):
897
953
  src_offset = stmt.src.reg_offset
@@ -90,7 +90,7 @@ binop_handler = SimEngineNostmtVEX[FactCollectorState, claripy.ast.BV, FactColle
90
90
 
91
91
  class SimEngineFactCollectorVEX(
92
92
  SimEngineNostmtVEX[FactCollectorState, SpOffset | RegisterOffset | int, None],
93
- SimEngineLight[type[FactCollectorState], SpOffset | RegisterOffset | int, Block, None],
93
+ SimEngineLight[FactCollectorState, SpOffset | RegisterOffset | int, Block, None],
94
94
  ):
95
95
  """
96
96
  THe engine for FactCollector.
@@ -101,7 +101,7 @@ class SimEngineFactCollectorVEX(
101
101
  super().__init__(project)
102
102
 
103
103
  def _process_block_end(self, stmt_result: list, whitelist: set[int] | None) -> None:
104
- if self.block.vex.jumpkind == "Ijk_Call":
104
+ if self.block.vex.jumpkind == "Ijk_Call" and self.arch.ret_offset is not None:
105
105
  self.state.register_written(self.arch.ret_offset, self.arch.bytes)
106
106
 
107
107
  def _top(self, bits: int):
@@ -110,7 +110,7 @@ class SimEngineFactCollectorVEX(
110
110
  def _is_top(self, expr: Any) -> bool:
111
111
  raise NotImplementedError
112
112
 
113
- def _handle_conversion(self, from_size: int, to_size: int, signed: bool, operand: pyvex.IRExpr) -> Any:
113
+ def _handle_conversion(self, from_size: int, to_size: int, signed: bool, operand: pyvex.expr.IRExpr) -> Any:
114
114
  return None
115
115
 
116
116
  def _handle_stmt_Put(self, stmt):
@@ -142,9 +142,9 @@ class SimEngineFactCollectorVEX(
142
142
  return expr.con.value
143
143
 
144
144
  def _handle_expr_GSPTR(self, expr):
145
- return None
145
+ return 0
146
146
 
147
- def _handle_expr_Get(self, expr) -> SpOffset | None:
147
+ def _handle_expr_Get(self, expr) -> SpOffset | RegisterOffset:
148
148
  if expr.offset == self.arch.sp_offset:
149
149
  return SpOffset(self.arch.bits, self.state.sp_value, is_base=False)
150
150
  if expr.offset == self.arch.bp_offset and not self.bp_as_gpr:
@@ -304,7 +304,10 @@ class FactCollector(Analysis):
304
304
 
305
305
  def _handle_function(self, state: FactCollectorState, func: Function) -> None:
306
306
  try:
307
- arg_locs = func.calling_convention.arg_locs(func.prototype)
307
+ if func.calling_convention is not None and func.prototype is not None:
308
+ arg_locs = func.calling_convention.arg_locs(func.prototype)
309
+ else:
310
+ return
308
311
  except (TypeError, ValueError):
309
312
  return
310
313
 
@@ -355,6 +358,7 @@ class FactCollector(Analysis):
355
358
 
356
359
  if isinstance(node, BlockNode) and node.size == 0:
357
360
  continue
361
+
358
362
  if isinstance(node, HookNode):
359
363
  # attempt to convert it into a function
360
364
  if self.kb.functions.contains_addr(node.addr):
@@ -369,17 +373,43 @@ class FactCollector(Analysis):
369
373
  and not isinstance(node.prototype.returnty, SimTypeBottom)
370
374
  ):
371
375
  # assume the function overwrites the return variable
372
- retval_size = (
373
- node.prototype.returnty.with_arch(self.project.arch).size // self.project.arch.byte_width
374
- )
376
+ returnty_size = node.prototype.returnty.with_arch(self.project.arch).size
377
+ assert returnty_size is not None
378
+ retval_size = returnty_size // self.project.arch.byte_width
375
379
  retval_sizes.append(retval_size)
376
380
  continue
377
381
 
382
+ # if this block ends with a call to a function, we process the function first
383
+ func_succs = [
384
+ succ
385
+ for succ in func_graph.successors(node)
386
+ if isinstance(succ, (Function, HookNode)) or self.kb.functions.contains_addr(succ.addr)
387
+ ]
388
+ if len(func_succs) == 1:
389
+ func_succ = func_succs[0]
390
+ if isinstance(func_succ, (BlockNode, HookNode)) and self.kb.functions.contains_addr(func_succ.addr):
391
+ # attempt to convert it into a function
392
+ func_succ = self.kb.functions.get_by_addr(func_succ.addr)
393
+ if isinstance(func_succ, Function):
394
+ if (
395
+ func_succ.calling_convention is not None
396
+ and func_succ.prototype is not None
397
+ and func_succ.prototype.returnty is not None
398
+ and not isinstance(func_succ.prototype.returnty, SimTypeBottom)
399
+ ):
400
+ # assume the function overwrites the return variable
401
+ returnty_size = func_succ.prototype.returnty.with_arch(self.project.arch).size
402
+ assert returnty_size is not None
403
+ retval_size = returnty_size // self.project.arch.byte_width
404
+ retval_sizes.append(retval_size)
405
+ continue
406
+
378
407
  block = self.project.factory.block(node.addr, size=node.size)
379
408
  # scan the block statements backwards to find writes to the return value register
380
409
  retval_size = None
381
410
  for stmt in reversed(block.vex.statements):
382
411
  if isinstance(stmt, pyvex.IRStmt.Put):
412
+ assert block.vex.tyenv is not None
383
413
  size = stmt.data.result_size(block.vex.tyenv) // self.project.arch.byte_width
384
414
  if stmt.offset == retreg_offset:
385
415
  retval_size = max(size, 1)
@@ -391,9 +421,9 @@ class FactCollector(Analysis):
391
421
  for pred, _, data in func_graph.in_edges(node, data=True):
392
422
  edge_type = data.get("type")
393
423
  if pred not in traversed and depth + 1 <= self._max_depth:
394
- if edge_type == "fake_return":
424
+ if edge_type == "call":
395
425
  continue
396
- if edge_type in {"transition", "call"}:
426
+ if edge_type in {"transition", "fake_return"}:
397
427
  queue.append((depth + 1, pred))
398
428
 
399
429
  self.retval_size = max(retval_sizes) if retval_sizes else None
@@ -472,6 +502,7 @@ class FactCollector(Analysis):
472
502
  ):
473
503
  tmps[stmt.tmp] = "sp"
474
504
  if isinstance(stmt, pyvex.IRStmt.Put):
505
+ assert block.vex.tyenv is not None
475
506
  size = stmt.data.result_size(block.vex.tyenv) // self.project.arch.byte_width
476
507
  # is the data loaded from the stack?
477
508
  if (
@@ -532,13 +563,8 @@ class FactCollector(Analysis):
532
563
  ):
533
564
  continue
534
565
  reg_offset_created.add(offset)
535
- if self.project.arch.name in {"AMD64", "X86"} and size < self.project.arch.bytes:
536
- # use complete registers on AMD64 and X86
537
- reg_name = self.project.arch.translate_register_name(offset, size=self.project.arch.bytes)
538
- arg = SimRegArg(reg_name, self.project.arch.bytes)
539
- else:
540
- reg_name = self.project.arch.translate_register_name(offset, size=size)
541
- arg = SimRegArg(reg_name, size)
566
+ reg_name = self.project.arch.translate_register_name(offset, size=size)
567
+ arg = SimRegArg(reg_name, size)
542
568
  self.input_args.append(arg)
543
569
 
544
570
  stack_offset_created = set()
@@ -9,7 +9,9 @@ from angr.calling_conventions import SimCC
9
9
  l = logging.getLogger(__name__)
10
10
 
11
11
 
12
- def is_sane_register_variable(arch: archinfo.Arch, reg_offset: int, reg_size: int, def_cc: SimCC | None = None) -> bool:
12
+ def is_sane_register_variable(
13
+ arch: archinfo.Arch, reg_offset: int, reg_size: int, def_cc: SimCC | type[SimCC] | None = None
14
+ ) -> bool:
13
15
  """
14
16
  Filters all registers that are surly not members of function arguments.
15
17
  This can be seen as a workaround, since VariableRecoveryFast sometimes gives input variables of cc_ndep (which
@@ -182,23 +182,24 @@ class ConstantValueManager:
182
182
 
183
183
  # determine blocks to run FCP on
184
184
 
185
- # - include at most three levels of successors from the entrypoint
185
+ # - include at most three levels of superblock successors from the entrypoint
186
186
  startpoint = self.func.startpoint
187
187
  blocks = set()
188
- succs = [startpoint]
189
- for _ in range(3):
188
+ succ_and_levels = [(startpoint, 0)]
189
+ while succ_and_levels:
190
190
  new_succs = []
191
- for node in succs:
191
+ for node, level in succ_and_levels:
192
192
  if node in blocks:
193
193
  continue
194
194
  blocks.add(node)
195
195
  if node.addr == self.indirect_jump_addr:
196
196
  # stop at the indirect jump block
197
197
  continue
198
- new_succs += list(self.func.graph.successors(node))
199
- succs = new_succs
200
- if not succs:
201
- break
198
+ for _, succ, data in self.func.graph.out_edges(node, data=True):
199
+ new_level = level if data.get("type") == "fake_return" else level + 1
200
+ if new_level <= 3:
201
+ new_succs.append((succ, new_level))
202
+ succ_and_levels = new_succs
202
203
 
203
204
  # - include at most six levels of predecessors from the indirect jump block
204
205
  ij_block = self.func.get_node(self.indirect_jump_addr)
@@ -99,6 +99,7 @@ class AILSimplifier(Analysis):
99
99
  removed_vvar_ids: set[int] | None = None,
100
100
  arg_vvars: dict[int, tuple[VirtualVariable, SimVariable]] | None = None,
101
101
  avoid_vvar_ids: set[int] | None = None,
102
+ secondary_stackvars: set[int] | None = None,
102
103
  ):
103
104
  self.func = func
104
105
  self.func_graph = func_graph if func_graph is not None else func.graph
@@ -119,6 +120,7 @@ class AILSimplifier(Analysis):
119
120
  self._arg_vvars = arg_vvars
120
121
  self._avoid_vvar_ids = avoid_vvar_ids
121
122
  self._propagator_dead_vvar_ids: set[int] = set()
123
+ self._secondary_stackvars: set[int] = secondary_stackvars if secondary_stackvars is not None else set()
122
124
 
123
125
  self._calls_to_remove: set[CodeLocation] = set()
124
126
  self._assignments_to_remove: set[CodeLocation] = set()
@@ -1348,6 +1350,9 @@ class AILSimplifier(Analysis):
1348
1350
  if rd.is_phi_vvar_id(def_.atom.varid):
1349
1351
  # we always remove unused phi variables
1350
1352
  pass
1353
+ elif def_.atom.varid in self._secondary_stackvars:
1354
+ # secondary stack variables are potentially removable
1355
+ pass
1351
1356
  elif stackarg_offsets is not None:
1352
1357
  # we always remove definitions for stack arguments
1353
1358
  assert def_.atom.stack_offset is not None
@@ -1380,7 +1385,9 @@ class AILSimplifier(Analysis):
1380
1385
  if not isinstance(def_.codeloc, ExternalCodeLocation):
1381
1386
  assert def_.codeloc.block_addr is not None
1382
1387
  assert def_.codeloc.stmt_idx is not None
1383
- stmts_to_keep_per_block[(def_.codeloc.block_addr, def_.codeloc.block_idx)].add(def_.codeloc.stmt_idx)
1388
+ stmts_to_keep_per_block[(def_.codeloc.block_addr, def_.codeloc.block_idx)].add(
1389
+ def_.codeloc.stmt_idx
1390
+ )
1384
1391
 
1385
1392
  # find all phi variables that rely on variables that no longer exist
1386
1393
  all_removed_var_ids = self._removed_vvar_ids.copy()
@@ -1503,8 +1510,36 @@ class AILSimplifier(Analysis):
1503
1510
 
1504
1511
  return simplified
1505
1512
 
1513
+ @staticmethod
1514
+ def _get_vvar_used_by(
1515
+ vvar_id: int, rd: SRDAModel, blocks_dict: dict[tuple[int, int | None], Block]
1516
+ ) -> set[int | None]:
1517
+ """
1518
+ Get all atoms that use a specified virtual variable. The atoms are in the form of virtual variable ID or None
1519
+ (indicating the virtual variable is used by another statement like Store).
1520
+
1521
+ :param vvar_id: ID of the virtual variable.
1522
+ :param rd: The SRDA model.
1523
+ :return: The set of vvar use atoms.
1524
+ """
1525
+
1526
+ vvar = rd.varid_to_vvar[vvar_id]
1527
+ used_by: set[int | None] = set()
1528
+ for used_vvar, loc in rd.all_vvar_uses[vvar]:
1529
+ if used_vvar is None:
1530
+ # no explicit reference
1531
+ used_by.add(None)
1532
+ elif loc.block_addr is not None:
1533
+ assert loc.stmt_idx is not None
1534
+ stmt = blocks_dict[(loc.block_addr, loc.block_idx)].statements[loc.stmt_idx]
1535
+ if isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable):
1536
+ used_by.add(stmt.dst.varid)
1537
+ else:
1538
+ used_by.add(None)
1539
+ return used_by
1540
+
1506
1541
  def _find_cyclic_dependent_phis_and_dirty_vvars(self, rd: SRDAModel) -> set[int]:
1507
- blocks_dict = {(bb.addr, bb.idx): bb for bb in self.func_graph}
1542
+ blocks_dict: dict[tuple[int, int | None], Block] = {(bb.addr, bb.idx): bb for bb in self.func_graph}
1508
1543
 
1509
1544
  # find dirty vvars and vexccall vvars
1510
1545
  dirty_vvar_ids = set()
@@ -1520,25 +1555,14 @@ class AILSimplifier(Analysis):
1520
1555
 
1521
1556
  phi_and_dirty_vvar_ids = rd.phi_vvar_ids | dirty_vvar_ids
1522
1557
 
1523
- vvar_used_by: dict[int, set[int]] = defaultdict(set)
1558
+ vvar_used_by: dict[int, set[int | None]] = defaultdict(set)
1524
1559
  for var_id in phi_and_dirty_vvar_ids:
1525
1560
  if var_id in rd.phivarid_to_varids:
1526
1561
  for used_by_varid in rd.phivarid_to_varids[var_id]:
1527
- vvar_used_by[used_by_varid].add(var_id)
1528
-
1529
- vvar = rd.varid_to_vvar[var_id]
1530
- used_by = set()
1531
- for used_vvar, loc in rd.all_vvar_uses[vvar]:
1532
- if used_vvar is None:
1533
- # no explicit reference
1534
- used_by.add(None)
1535
- else:
1536
- stmt = blocks_dict[loc.block_addr, loc.block_idx].statements[loc.stmt_idx]
1537
- if isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable):
1538
- used_by.add(stmt.dst.varid)
1539
- else:
1540
- used_by.add(None)
1541
- vvar_used_by[var_id] |= used_by
1562
+ if used_by_varid not in vvar_used_by:
1563
+ vvar_used_by[used_by_varid] |= self._get_vvar_used_by(used_by_varid, rd, blocks_dict)
1564
+ vvar_used_by[used_by_varid].add(var_id) # probably unnecessary
1565
+ vvar_used_by[var_id] |= self._get_vvar_used_by(var_id, rd, blocks_dict)
1542
1566
 
1543
1567
  g = networkx.DiGraph()
1544
1568
  dummy_vvar_id = -1
@@ -1557,8 +1581,12 @@ class AILSimplifier(Analysis):
1557
1581
 
1558
1582
  bail = False
1559
1583
  for varid in scc:
1560
- # if this vvar is a phi var, ensure this vvar is not used by anything else outside the scc
1561
- if varid in rd.phi_vvar_ids:
1584
+ # ensure this vvar is not used by anything else outside the scc (regardless of whether this vvar is a
1585
+ # phi variable or not)
1586
+ if varid in vvar_used_by and None in vvar_used_by[varid]:
1587
+ bail = True
1588
+ break
1589
+ if bail is False:
1562
1590
  succs = list(g.successors(varid))
1563
1591
  if any(succ_varid not in scc for succ_varid in succs):
1564
1592
  bail = True
@@ -45,7 +45,7 @@ class CallSiteMaker(Analysis):
45
45
  self._ail_manager = ail_manager
46
46
 
47
47
  self.result_block = None
48
- self.stack_arg_offsets: set[tuple[int, int]] | None = None # ins_addr, stack_offset
48
+ self.stack_arg_offsets: set[tuple[int, int]] | None = None # call ins addr, stack_offset
49
49
  self.removed_vvar_ids: set[int] = set()
50
50
 
51
51
  self._analyze()
@@ -372,7 +372,9 @@ class CallSiteMaker(Analysis):
372
372
 
373
373
  return None
374
374
 
375
- def _resolve_stack_argument(self, call_stmt, arg_loc) -> tuple[Any, Any]: # pylint:disable=unused-argument
375
+ def _resolve_stack_argument(
376
+ self, call_stmt: Stmt.Call, arg_loc
377
+ ) -> tuple[Any, Any]: # pylint:disable=unused-argument
376
378
  assert self._stack_pointer_tracker is not None
377
379
 
378
380
  size = arg_loc.size
@@ -399,15 +401,26 @@ class CallSiteMaker(Analysis):
399
401
  # FIXME: vvar may be larger than that we ask; we may need to chop the correct value of vvar
400
402
  value = view.get_vvar_value(vvar)
401
403
  if value is not None and not isinstance(value, Expr.Phi):
402
- return None, value
403
- return None, Expr.VirtualVariable(
404
- self._atom_idx(),
405
- vvar.varid,
406
- vvar.bits,
407
- vvar.category,
408
- oident=vvar.oident,
409
- ins_addr=call_stmt.ins_addr,
410
- )
404
+ v: Expr.Expression = value
405
+ else:
406
+ v: Expr.Expression = Expr.VirtualVariable(
407
+ self._atom_idx(),
408
+ vvar.varid,
409
+ vvar.bits,
410
+ vvar.category,
411
+ oident=vvar.oident,
412
+ ins_addr=call_stmt.ins_addr,
413
+ )
414
+ if v.size > size:
415
+ v = Expr.Convert(
416
+ self._atom_idx(),
417
+ v.bits,
418
+ size * self.project.arch.byte_width,
419
+ False,
420
+ v,
421
+ ins_addr=call_stmt.ins_addr,
422
+ )
423
+ return None, v
411
424
 
412
425
  return None, Expr.Load(
413
426
  self._atom_idx(),
@@ -154,6 +154,9 @@ class Clinic(Analysis):
154
154
  self._mode = mode
155
155
  self.vvar_id_start = vvar_id_start
156
156
  self.vvar_to_vvar: dict[int, int] | None = None
157
+ # during SSA conversion, we create secondary stack variables because they overlap and are larger than the
158
+ # actual stack variables. these secondary stack variables can be safely eliminated if not used by anything.
159
+ self.secondary_stackvars: set[int] = set()
157
160
 
158
161
  # inlining help
159
162
  self._sp_shift = sp_shift
@@ -1239,6 +1242,7 @@ class Clinic(Analysis):
1239
1242
  rewrite_ccalls=rewrite_ccalls,
1240
1243
  removed_vvar_ids=removed_vvar_ids,
1241
1244
  arg_vvars=arg_vvars,
1245
+ secondary_stackvars=self.secondary_stackvars,
1242
1246
  )
1243
1247
  # cache the simplifier's RDA analysis
1244
1248
  self.reaching_definitions = simp._reaching_definitions
@@ -1364,6 +1368,7 @@ class Clinic(Analysis):
1364
1368
  vvar_id_start=self.vvar_id_start,
1365
1369
  )
1366
1370
  self.vvar_id_start = ssailification.max_vvar_id + 1
1371
+ self.secondary_stackvars = ssailification.secondary_stackvars
1367
1372
  return ssailification.out_graph
1368
1373
 
1369
1374
  @timethis
@@ -1864,6 +1869,11 @@ class Clinic(Analysis):
1864
1869
  if expr.guard:
1865
1870
  self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, expr.guard)
1866
1871
 
1872
+ elif isinstance(expr, ailment.Expr.Phi):
1873
+ for _, vvar in expr.src_and_vvars:
1874
+ if vvar is not None:
1875
+ self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, vvar)
1876
+
1867
1877
  def _function_graph_to_ail_graph(self, func_graph, blocks_by_addr_and_size=None):
1868
1878
  if blocks_by_addr_and_size is None:
1869
1879
  blocks_by_addr_and_size = self._blocks_by_addr_and_size
@@ -500,6 +500,7 @@ class Decompiler(Analysis):
500
500
  scratch=self._optimization_scratch,
501
501
  force_loop_single_exit=self._force_loop_single_exit,
502
502
  complete_successors=self._complete_successors,
503
+ peephole_optimizations=self._peephole_optimizations,
503
504
  **kwargs,
504
505
  )
505
506
 
@@ -950,7 +950,9 @@ class DuplicationReverter(StructuringOptimizationPass):
950
950
  #
951
951
 
952
952
  def _share_subregion(self, blocks: list[Block]) -> bool:
953
- return any(all(block.addr in region for block in blocks) for region in self._ri.regions_by_block_addrs)
953
+ return any(
954
+ all((block.addr, block.idx) in region for block in blocks) for region in self._ri.regions_by_block_addrs
955
+ )
954
956
 
955
957
  def _is_valid_candidate(self, b0, b1):
956
958
  # blocks must have statements