angr 9.2.140__py3-none-win_amd64.whl → 9.2.142__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (76) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +105 -35
  3. angr/analyses/calling_convention/fact_collector.py +44 -18
  4. angr/analyses/calling_convention/utils.py +3 -1
  5. angr/analyses/cfg/cfg_base.py +38 -4
  6. angr/analyses/cfg/cfg_fast.py +23 -7
  7. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +13 -8
  8. angr/analyses/class_identifier.py +8 -7
  9. angr/analyses/complete_calling_conventions.py +1 -1
  10. angr/analyses/decompiler/ail_simplifier.py +105 -62
  11. angr/analyses/decompiler/callsite_maker.py +24 -11
  12. angr/analyses/decompiler/clinic.py +83 -5
  13. angr/analyses/decompiler/condition_processor.py +7 -7
  14. angr/analyses/decompiler/decompilation_cache.py +2 -1
  15. angr/analyses/decompiler/decompiler.py +11 -2
  16. angr/analyses/decompiler/dephication/graph_vvar_mapping.py +4 -6
  17. angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +8 -2
  18. angr/analyses/decompiler/optimization_passes/condition_constprop.py +63 -34
  19. angr/analyses/decompiler/optimization_passes/duplication_reverter/duplication_reverter.py +3 -1
  20. angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py +21 -2
  21. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +85 -16
  22. angr/analyses/decompiler/optimization_passes/optimization_pass.py +78 -1
  23. angr/analyses/decompiler/optimization_passes/register_save_area_simplifier.py +29 -7
  24. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +51 -7
  25. angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +6 -0
  26. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +9 -1
  27. angr/analyses/decompiler/peephole_optimizations/eager_eval.py +44 -7
  28. angr/analyses/decompiler/region_identifier.py +76 -51
  29. angr/analyses/decompiler/region_simplifiers/expr_folding.py +32 -18
  30. angr/analyses/decompiler/region_simplifiers/region_simplifier.py +4 -1
  31. angr/analyses/decompiler/ssailification/rewriting.py +70 -32
  32. angr/analyses/decompiler/ssailification/rewriting_engine.py +118 -24
  33. angr/analyses/decompiler/ssailification/ssailification.py +22 -14
  34. angr/analyses/decompiler/stack_item.py +36 -0
  35. angr/analyses/decompiler/structured_codegen/c.py +86 -145
  36. angr/analyses/decompiler/structuring/dream.py +1 -1
  37. angr/analyses/decompiler/structuring/phoenix.py +9 -4
  38. angr/analyses/decompiler/structuring/structurer_base.py +2 -1
  39. angr/analyses/decompiler/utils.py +46 -20
  40. angr/analyses/find_objects_static.py +2 -1
  41. angr/analyses/reaching_definitions/engine_vex.py +13 -0
  42. angr/analyses/reaching_definitions/function_handler.py +24 -10
  43. angr/analyses/reaching_definitions/function_handler_library/stdio.py +1 -0
  44. angr/analyses/reaching_definitions/function_handler_library/stdlib.py +45 -12
  45. angr/analyses/reaching_definitions/function_handler_library/string.py +77 -21
  46. angr/analyses/reaching_definitions/function_handler_library/unistd.py +21 -1
  47. angr/analyses/reaching_definitions/rd_state.py +11 -7
  48. angr/analyses/s_liveness.py +44 -6
  49. angr/analyses/s_reaching_definitions/s_rda_model.py +4 -2
  50. angr/analyses/s_reaching_definitions/s_rda_view.py +43 -25
  51. angr/analyses/typehoon/simple_solver.py +35 -8
  52. angr/analyses/typehoon/typehoon.py +3 -1
  53. angr/analyses/variable_recovery/engine_ail.py +1 -1
  54. angr/analyses/variable_recovery/engine_vex.py +20 -4
  55. angr/calling_conventions.py +17 -12
  56. angr/factory.py +8 -3
  57. angr/knowledge_plugins/functions/function.py +5 -10
  58. angr/knowledge_plugins/variables/variable_manager.py +34 -5
  59. angr/lib/angr_native.dll +0 -0
  60. angr/procedures/definitions/__init__.py +3 -10
  61. angr/procedures/definitions/wdk_ntoskrnl.py +2 -0
  62. angr/procedures/win32_kernel/__fastfail.py +15 -0
  63. angr/sim_procedure.py +2 -2
  64. angr/simos/simos.py +17 -11
  65. angr/simos/windows.py +42 -1
  66. angr/utils/ail.py +41 -1
  67. angr/utils/cpp.py +17 -0
  68. angr/utils/doms.py +142 -0
  69. angr/utils/library.py +1 -1
  70. angr/utils/types.py +59 -0
  71. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/METADATA +7 -7
  72. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/RECORD +76 -71
  73. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/LICENSE +0 -0
  74. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/WHEEL +0 -0
  75. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/entry_points.txt +0 -0
  76. {angr-9.2.140.dist-info → angr-9.2.142.dist-info}/top_level.txt +0 -0
angr/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
2
  # pylint: disable=wrong-import-position
3
3
  from __future__ import annotations
4
4
 
5
- __version__ = "9.2.140"
5
+ __version__ = "9.2.142"
6
6
 
7
7
  if bytes is str:
8
8
  raise Exception(
@@ -33,6 +33,7 @@ from angr.knowledge_plugins.key_definitions.rd_model import ReachingDefinitionsM
33
33
  from angr.knowledge_plugins.variables.variable_access import VariableAccessSort
34
34
  from angr.knowledge_plugins.functions import Function
35
35
  from angr.utils.constants import DEFAULT_STATEMENT
36
+ from angr.utils.ssa import get_reg_offset_base_and_size, get_reg_offset_base
36
37
  from angr import SIM_PROCEDURES
37
38
  from angr.analyses import Analysis, register_analysis, ReachingDefinitionsAnalysis
38
39
  from angr.analyses.reaching_definitions import get_all_definitions
@@ -164,6 +165,19 @@ class CallingConventionAnalysis(Analysis):
164
165
  ):
165
166
  return
166
167
 
168
+ if (
169
+ hooker is not None
170
+ and hooker.cc is not None
171
+ and hooker.is_function
172
+ and not hooker.guessed_prototype
173
+ and hooker.prototype is not None
174
+ ):
175
+ # copy the calling convention and prototype from the SimProcedure instance
176
+ self.cc = hooker.cc
177
+ self.prototype = hooker.prototype
178
+ self.prototype_libname = hooker.library_name
179
+ return
180
+
167
181
  if self._function.prototype is None:
168
182
  # try our luck
169
183
  # we set ignore_binary_name to True because the binary name SimProcedures is "cle##externs" and does not
@@ -264,7 +278,7 @@ class CallingConventionAnalysis(Analysis):
264
278
  self.cc = cc
265
279
  self.prototype = prototype
266
280
 
267
- def _analyze_plt(self) -> tuple[SimCC, SimTypeFunction] | None:
281
+ def _analyze_plt(self) -> tuple[SimCC, SimTypeFunction | None] | None:
268
282
  """
269
283
  Get the calling convention for a PLT stub.
270
284
 
@@ -296,24 +310,36 @@ class CallingConventionAnalysis(Analysis):
296
310
  real_func = None
297
311
 
298
312
  if real_func is not None:
313
+ if real_func.calling_convention is None:
314
+ cc_cls = default_cc(self.project.arch.name)
315
+ if cc_cls is None:
316
+ # can't determine the default calling convention for this architecture
317
+ return None
318
+ cc = cc_cls(self.project.arch)
319
+ else:
320
+ cc = real_func.calling_convention
299
321
  if real_func.is_simprocedure:
300
322
  if self.project.is_hooked(real_func.addr):
301
323
  # prioritize the hooker
302
324
  hooker = self.project.hooked_by(real_func.addr)
303
- if hooker is not None and (
304
- not hooker.is_stub or (hooker.is_function and not hooker.guessed_prototype)
305
- ):
306
- return real_func.calling_convention, hooker.prototype
307
- if real_func.calling_convention and real_func.prototype:
308
- return real_func.calling_convention, real_func.prototype
325
+ if hooker is not None and hooker.is_function and not hooker.guessed_prototype:
326
+ # we only take the prototype from the SimProcedure if
327
+ # - the SimProcedure is a function
328
+ # - the prototype of the SimProcedure is not guessed
329
+ return cc, hooker.prototype
330
+ if real_func.prototype is not None:
331
+ return cc, real_func.prototype
309
332
  else:
310
- return real_func.calling_convention, real_func.prototype
333
+ return cc, real_func.prototype
311
334
 
312
335
  if self.analyze_callsites:
313
336
  # determine the calling convention by analyzing its callsites
314
337
  callsite_facts = self._extract_and_analyze_callsites(max_analyzing_callsites=1)
315
338
  cc_cls = default_cc(self.project.arch.name)
316
- cc = cc_cls(self.project.arch) if cc_cls is not None else None
339
+ if cc_cls is None:
340
+ # can't determine the default calling convention for this architecture
341
+ return None
342
+ cc = cc_cls(self.project.arch)
317
343
  prototype = SimTypeFunction([], None)
318
344
  prototype = self._adjust_prototype(
319
345
  prototype, callsite_facts, update_arguments=UpdateArgumentsOption.AlwaysUpdate
@@ -342,7 +368,7 @@ class CallingConventionAnalysis(Analysis):
342
368
  input_variables = vm.input_variables()
343
369
  input_args = self._args_from_vars(input_variables, vm)
344
370
  else:
345
- input_args = self._input_args
371
+ input_args = set(self._input_args)
346
372
  retval_size = self._retval_size
347
373
 
348
374
  # check if this function is a variadic function
@@ -355,8 +381,14 @@ class CallingConventionAnalysis(Analysis):
355
381
  # TODO: properly determine sp_delta
356
382
  sp_delta = self.project.arch.bytes if self.project.arch.call_pushes_ret else 0
357
383
 
358
- input_args = list(input_args) # input_args might be modified by find_cc()
359
- cc = SimCC.find_cc(self.project.arch, input_args, sp_delta, platform=self.project.simos.name)
384
+ full_input_args = self._consolidate_input_args(input_args)
385
+ full_input_args_copy = list(full_input_args) # input_args might be modified by find_cc()
386
+ cc = SimCC.find_cc(self.project.arch, full_input_args_copy, sp_delta, platform=self.project.simos.name)
387
+
388
+ # update input_args according to the difference between full_input_args and full_input_args_copy
389
+ for a in full_input_args:
390
+ if a not in full_input_args_copy and a in input_args:
391
+ input_args.remove(a)
360
392
 
361
393
  if cc is None:
362
394
  l.warning(
@@ -657,12 +689,6 @@ class CallingConventionAnalysis(Analysis):
657
689
  else:
658
690
  break
659
691
 
660
- if None in temp_args:
661
- first_none_idx = temp_args.index(None)
662
- # test if there is at least one argument set after None; if so, we ignore the first None
663
- if any(arg is not None for arg in temp_args[first_none_idx:]):
664
- temp_args[first_none_idx] = expected_args[first_none_idx]
665
-
666
692
  if None in temp_args:
667
693
  # we be very conservative here and ignore all arguments starting from the first missing one
668
694
  first_none_idx = temp_args.index(None)
@@ -681,17 +707,18 @@ class CallingConventionAnalysis(Analysis):
681
707
  if all(fact.return_value_used is False for fact in facts):
682
708
  proto.returnty = SimTypeBottom(label="void")
683
709
  else:
684
- proto.returnty = SimTypeInt().with_arch(self.project.arch)
710
+ if proto.returnty is None or isinstance(proto.returnty, SimTypeBottom):
711
+ proto.returnty = SimTypeInt().with_arch(self.project.arch)
685
712
 
686
713
  if (
687
714
  update_arguments == UpdateArgumentsOption.AlwaysUpdate
688
715
  or (update_arguments == UpdateArgumentsOption.UpdateWhenCCHasNoArgs and not proto.args)
689
716
  ) and len({len(fact.args) for fact in facts}) == 1:
690
717
  fact = next(iter(facts))
691
- proto.args = [
718
+ proto.args = tuple(
692
719
  self._guess_arg_type(arg) if arg is not None else SimTypeInt().with_arch(self.project.arch)
693
720
  for arg in fact.args
694
- ]
721
+ )
695
722
 
696
723
  return proto
697
724
 
@@ -730,13 +757,8 @@ class CallingConventionAnalysis(Analysis):
730
757
  # a register variable, convert it to a register argument
731
758
  if not is_sane_register_variable(self.project.arch, variable.reg, variable.size, def_cc=def_cc):
732
759
  continue
733
- if self.project.arch.name in {"AMD64", "X86"} and variable.size < self.project.arch.bytes:
734
- # use complete registers on AMD64 and X86
735
- reg_name = self.project.arch.translate_register_name(variable.reg, size=self.project.arch.bytes)
736
- arg = SimRegArg(reg_name, self.project.arch.bytes)
737
- else:
738
- reg_name = self.project.arch.translate_register_name(variable.reg, size=variable.size)
739
- arg = SimRegArg(reg_name, variable.size)
760
+ reg_name = self.project.arch.translate_register_name(variable.reg, size=variable.size)
761
+ arg = SimRegArg(reg_name, variable.size)
740
762
  args.add(arg)
741
763
 
742
764
  accesses = var_manager.get_variable_accesses(variable)
@@ -778,15 +800,58 @@ class CallingConventionAnalysis(Analysis):
778
800
 
779
801
  return args.difference(restored_reg_vars)
780
802
 
781
- def _reorder_args(self, args: list[SimRegArg | SimStackArg], cc: SimCC) -> list[SimRegArg | SimStackArg]:
803
+ def _consolidate_input_args(self, input_args: set[SimRegArg | SimStackArg]) -> set[SimRegArg | SimStackArg]:
804
+ """
805
+ Consolidate register arguments by converting partial registers to full registers on certain architectures.
806
+
807
+ :param input_args: A set of input arguments.
808
+ :return: A set of consolidated input args.
809
+ """
810
+
811
+ if self.project.arch.name in {"AMD64", "X86"}:
812
+ new_input_args = set()
813
+ for a in input_args:
814
+ if isinstance(a, SimRegArg) and a.size < self.project.arch.bytes:
815
+ # use complete registers on AMD64 and X86
816
+ reg_offset, reg_size = self.project.arch.registers[a.reg_name]
817
+ full_reg_offset, full_reg_size = get_reg_offset_base_and_size(
818
+ reg_offset, self.project.arch, size=reg_size
819
+ )
820
+ full_reg_name = self.project.arch.translate_register_name(full_reg_offset, size=full_reg_size)
821
+ arg = SimRegArg(full_reg_name, full_reg_size)
822
+ if arg not in new_input_args:
823
+ new_input_args.add(arg)
824
+ else:
825
+ new_input_args.add(a)
826
+ return new_input_args
827
+
828
+ return input_args
829
+
830
+ def _reorder_args(self, args: set[SimRegArg | SimStackArg], cc: SimCC) -> list[SimRegArg | SimStackArg]:
782
831
  """
783
832
  Reorder arguments according to the calling convention identified.
784
833
 
785
- :param args: A list of arguments that haven't been ordered.
834
+ :param args: A set of arguments that haven't been ordered.
786
835
  :param cc: The identified calling convention.
787
836
  :return: A reordered list of args.
788
837
  """
789
838
 
839
+ def _is_same_reg(rn0: str, rn1: str) -> bool:
840
+ """
841
+ Check if rn0 and rn1 belong to the same base register.
842
+
843
+ :param rn0: Register name of the first register.
844
+ :param rn1: Register name of the second register.
845
+ :return: True if they belong to the same base register; False otherwise.
846
+ """
847
+ if rn0 == rn1:
848
+ return True
849
+ off0, sz0 = self.project.arch.registers[rn0]
850
+ full_off0 = get_reg_offset_base(off0, self.project.arch, sz0)
851
+ off1, sz1 = self.project.arch.registers[rn1]
852
+ full_off1 = get_reg_offset_base(off1, self.project.arch, sz1)
853
+ return full_off0 == full_off1
854
+
790
855
  reg_args = []
791
856
 
792
857
  # split args into two lists
@@ -805,7 +870,7 @@ class CallingConventionAnalysis(Analysis):
805
870
  # match int args first
806
871
  for reg_name in cc.ARG_REGS:
807
872
  try:
808
- arg = next(iter(a for a in int_args if isinstance(a, SimRegArg) and a.reg_name == reg_name))
873
+ arg = next(iter(a for a in int_args if isinstance(a, SimRegArg) and _is_same_reg(a.reg_name, reg_name)))
809
874
  except StopIteration:
810
875
  # have we reached the end of the args list?
811
876
  if [a for a in int_args if isinstance(a, SimRegArg)] or len(stack_int_args) > 0:
@@ -821,7 +886,9 @@ class CallingConventionAnalysis(Analysis):
821
886
  if fp_args:
822
887
  for reg_name in cc.FP_ARG_REGS:
823
888
  try:
824
- arg = next(iter(a for a in fp_args if isinstance(a, SimRegArg) and a.reg_name == reg_name))
889
+ arg = next(
890
+ iter(a for a in fp_args if isinstance(a, SimRegArg) and _is_same_reg(a.reg_name, reg_name))
891
+ )
825
892
  except StopIteration:
826
893
  # have we reached the end of the args list?
827
894
  if [a for a in fp_args if isinstance(a, SimRegArg)] or len(stack_fp_args) > 0:
@@ -886,12 +953,15 @@ class CallingConventionAnalysis(Analysis):
886
953
  if 5 <= ret_val_size <= 8:
887
954
  return SimTypeLongLong()
888
955
 
889
- # fallback
890
- return SimTypeInt() if cc.arch.bits == 32 else SimTypeLongLong()
956
+ return SimTypeBottom(label="void")
891
957
 
892
958
  @staticmethod
893
959
  def _likely_saving_temp_reg(ail_block: ailment.Block, d: Definition, all_reg_defs: set[Definition]) -> bool:
894
- if d.codeloc.block_addr == ail_block.addr and d.codeloc.stmt_idx < len(ail_block.statements):
960
+ if (
961
+ d.codeloc.block_addr == ail_block.addr
962
+ and d.codeloc.stmt_idx is not None
963
+ and d.codeloc.stmt_idx < len(ail_block.statements)
964
+ ):
895
965
  stmt = ail_block.statements[d.codeloc.stmt_idx]
896
966
  if isinstance(stmt, ailment.Stmt.Assignment) and isinstance(stmt.src, ailment.Expr.Register):
897
967
  src_offset = stmt.src.reg_offset
@@ -90,7 +90,7 @@ binop_handler = SimEngineNostmtVEX[FactCollectorState, claripy.ast.BV, FactColle
90
90
 
91
91
  class SimEngineFactCollectorVEX(
92
92
  SimEngineNostmtVEX[FactCollectorState, SpOffset | RegisterOffset | int, None],
93
- SimEngineLight[type[FactCollectorState], SpOffset | RegisterOffset | int, Block, None],
93
+ SimEngineLight[FactCollectorState, SpOffset | RegisterOffset | int, Block, None],
94
94
  ):
95
95
  """
96
96
  THe engine for FactCollector.
@@ -101,7 +101,7 @@ class SimEngineFactCollectorVEX(
101
101
  super().__init__(project)
102
102
 
103
103
  def _process_block_end(self, stmt_result: list, whitelist: set[int] | None) -> None:
104
- if self.block.vex.jumpkind == "Ijk_Call":
104
+ if self.block.vex.jumpkind == "Ijk_Call" and self.arch.ret_offset is not None:
105
105
  self.state.register_written(self.arch.ret_offset, self.arch.bytes)
106
106
 
107
107
  def _top(self, bits: int):
@@ -110,7 +110,7 @@ class SimEngineFactCollectorVEX(
110
110
  def _is_top(self, expr: Any) -> bool:
111
111
  raise NotImplementedError
112
112
 
113
- def _handle_conversion(self, from_size: int, to_size: int, signed: bool, operand: pyvex.IRExpr) -> Any:
113
+ def _handle_conversion(self, from_size: int, to_size: int, signed: bool, operand: pyvex.expr.IRExpr) -> Any:
114
114
  return None
115
115
 
116
116
  def _handle_stmt_Put(self, stmt):
@@ -142,9 +142,9 @@ class SimEngineFactCollectorVEX(
142
142
  return expr.con.value
143
143
 
144
144
  def _handle_expr_GSPTR(self, expr):
145
- return None
145
+ return 0
146
146
 
147
- def _handle_expr_Get(self, expr) -> SpOffset | None:
147
+ def _handle_expr_Get(self, expr) -> SpOffset | RegisterOffset:
148
148
  if expr.offset == self.arch.sp_offset:
149
149
  return SpOffset(self.arch.bits, self.state.sp_value, is_base=False)
150
150
  if expr.offset == self.arch.bp_offset and not self.bp_as_gpr:
@@ -304,7 +304,10 @@ class FactCollector(Analysis):
304
304
 
305
305
  def _handle_function(self, state: FactCollectorState, func: Function) -> None:
306
306
  try:
307
- arg_locs = func.calling_convention.arg_locs(func.prototype)
307
+ if func.calling_convention is not None and func.prototype is not None:
308
+ arg_locs = func.calling_convention.arg_locs(func.prototype)
309
+ else:
310
+ return
308
311
  except (TypeError, ValueError):
309
312
  return
310
313
 
@@ -355,6 +358,7 @@ class FactCollector(Analysis):
355
358
 
356
359
  if isinstance(node, BlockNode) and node.size == 0:
357
360
  continue
361
+
358
362
  if isinstance(node, HookNode):
359
363
  # attempt to convert it into a function
360
364
  if self.kb.functions.contains_addr(node.addr):
@@ -369,17 +373,43 @@ class FactCollector(Analysis):
369
373
  and not isinstance(node.prototype.returnty, SimTypeBottom)
370
374
  ):
371
375
  # assume the function overwrites the return variable
372
- retval_size = (
373
- node.prototype.returnty.with_arch(self.project.arch).size // self.project.arch.byte_width
374
- )
376
+ returnty_size = node.prototype.returnty.with_arch(self.project.arch).size
377
+ assert returnty_size is not None
378
+ retval_size = returnty_size // self.project.arch.byte_width
375
379
  retval_sizes.append(retval_size)
376
380
  continue
377
381
 
382
+ # if this block ends with a call to a function, we process the function first
383
+ func_succs = [
384
+ succ
385
+ for succ in func_graph.successors(node)
386
+ if isinstance(succ, (Function, HookNode)) or self.kb.functions.contains_addr(succ.addr)
387
+ ]
388
+ if len(func_succs) == 1:
389
+ func_succ = func_succs[0]
390
+ if isinstance(func_succ, (BlockNode, HookNode)) and self.kb.functions.contains_addr(func_succ.addr):
391
+ # attempt to convert it into a function
392
+ func_succ = self.kb.functions.get_by_addr(func_succ.addr)
393
+ if isinstance(func_succ, Function):
394
+ if (
395
+ func_succ.calling_convention is not None
396
+ and func_succ.prototype is not None
397
+ and func_succ.prototype.returnty is not None
398
+ and not isinstance(func_succ.prototype.returnty, SimTypeBottom)
399
+ ):
400
+ # assume the function overwrites the return variable
401
+ returnty_size = func_succ.prototype.returnty.with_arch(self.project.arch).size
402
+ assert returnty_size is not None
403
+ retval_size = returnty_size // self.project.arch.byte_width
404
+ retval_sizes.append(retval_size)
405
+ continue
406
+
378
407
  block = self.project.factory.block(node.addr, size=node.size)
379
408
  # scan the block statements backwards to find writes to the return value register
380
409
  retval_size = None
381
410
  for stmt in reversed(block.vex.statements):
382
411
  if isinstance(stmt, pyvex.IRStmt.Put):
412
+ assert block.vex.tyenv is not None
383
413
  size = stmt.data.result_size(block.vex.tyenv) // self.project.arch.byte_width
384
414
  if stmt.offset == retreg_offset:
385
415
  retval_size = max(size, 1)
@@ -391,9 +421,9 @@ class FactCollector(Analysis):
391
421
  for pred, _, data in func_graph.in_edges(node, data=True):
392
422
  edge_type = data.get("type")
393
423
  if pred not in traversed and depth + 1 <= self._max_depth:
394
- if edge_type == "fake_return":
424
+ if edge_type == "call":
395
425
  continue
396
- if edge_type in {"transition", "call"}:
426
+ if edge_type in {"transition", "fake_return"}:
397
427
  queue.append((depth + 1, pred))
398
428
 
399
429
  self.retval_size = max(retval_sizes) if retval_sizes else None
@@ -472,6 +502,7 @@ class FactCollector(Analysis):
472
502
  ):
473
503
  tmps[stmt.tmp] = "sp"
474
504
  if isinstance(stmt, pyvex.IRStmt.Put):
505
+ assert block.vex.tyenv is not None
475
506
  size = stmt.data.result_size(block.vex.tyenv) // self.project.arch.byte_width
476
507
  # is the data loaded from the stack?
477
508
  if (
@@ -532,13 +563,8 @@ class FactCollector(Analysis):
532
563
  ):
533
564
  continue
534
565
  reg_offset_created.add(offset)
535
- if self.project.arch.name in {"AMD64", "X86"} and size < self.project.arch.bytes:
536
- # use complete registers on AMD64 and X86
537
- reg_name = self.project.arch.translate_register_name(offset, size=self.project.arch.bytes)
538
- arg = SimRegArg(reg_name, self.project.arch.bytes)
539
- else:
540
- reg_name = self.project.arch.translate_register_name(offset, size=size)
541
- arg = SimRegArg(reg_name, size)
566
+ reg_name = self.project.arch.translate_register_name(offset, size=size)
567
+ arg = SimRegArg(reg_name, size)
542
568
  self.input_args.append(arg)
543
569
 
544
570
  stack_offset_created = set()
@@ -9,7 +9,9 @@ from angr.calling_conventions import SimCC
9
9
  l = logging.getLogger(__name__)
10
10
 
11
11
 
12
- def is_sane_register_variable(arch: archinfo.Arch, reg_offset: int, reg_size: int, def_cc: SimCC | None = None) -> bool:
12
+ def is_sane_register_variable(
13
+ arch: archinfo.Arch, reg_offset: int, reg_size: int, def_cc: SimCC | type[SimCC] | None = None
14
+ ) -> bool:
13
15
  """
14
16
  Filters all registers that are surly not members of function arguments.
15
17
  This can be seen as a workaround, since VariableRecoveryFast sometimes gives input variables of cc_ndep (which
@@ -11,7 +11,7 @@ import pyvex
11
11
  from cle import ELF, PE, Blob, TLSObject, MachO, ExternObject, KernelObject, FunctionHintSource, Hex, Coff, SRec, XBE
12
12
  from cle.backends import NamedRegion
13
13
  import archinfo
14
- from archinfo.arch_soot import SootAddressDescriptor
14
+ from archinfo.arch_soot import SootAddressDescriptor, SootMethodDescriptor
15
15
  from archinfo.arch_arm import is_arm_arch, get_real_address_if_arm
16
16
 
17
17
  from angr.knowledge_plugins.functions.function_manager import FunctionManager
@@ -129,7 +129,7 @@ class CFGBase(Analysis):
129
129
 
130
130
  # Store all the functions analyzed before the set is cleared
131
131
  # Used for performance optimization
132
- self._updated_nonreturning_functions: set[int] | None = None
132
+ self._updated_nonreturning_functions: set[int | SootMethodDescriptor] | None = None
133
133
 
134
134
  self._normalize = normalize
135
135
 
@@ -246,7 +246,7 @@ class CFGBase(Analysis):
246
246
  )
247
247
 
248
248
  self._regions_size = sum((end - start) for start, end in regions)
249
- self._regions: dict[int, int] = SortedDict(regions)
249
+ self._regions: SortedDict = SortedDict(regions)
250
250
 
251
251
  l.debug("CFG recovery covers %d regions:", len(self._regions))
252
252
  for start, end in self._regions.items():
@@ -1556,6 +1556,7 @@ class CFGBase(Analysis):
1556
1556
  self.kb.functions[func_addr].alignment = True
1557
1557
  continue
1558
1558
  node = function.get_node(block.addr)
1559
+ assert node is not None
1559
1560
  successors = list(function.graph.successors(node))
1560
1561
  if len(successors) == 1 and successors[0].addr == node.addr:
1561
1562
  # self loop. mark this function as a function alignment
@@ -2151,6 +2152,11 @@ class CFGBase(Analysis):
2151
2152
  f = self.kb.functions.function(addr=addr)
2152
2153
  assert f is not None
2153
2154
 
2155
+ # copy over existing metadata
2156
+ if known_functions.contains_addr(addr):
2157
+ kf = known_functions.get_by_addr(addr)
2158
+ f.is_plt = kf.is_plt
2159
+
2154
2160
  blockaddr_to_function[addr] = f
2155
2161
 
2156
2162
  function_is_returning = False
@@ -2532,6 +2538,34 @@ class CFGBase(Analysis):
2532
2538
  # Other functions
2533
2539
  #
2534
2540
 
2541
+ @staticmethod
2542
+ def _is_noop_jump_block(block) -> bool:
2543
+ """
2544
+ Check if the block does nothing but jumping to a constant address.
2545
+
2546
+ :param block: The block instance. We assume the block is already optimized.
2547
+ :return: True if the entire block is a jump to a constant address, False otherwise.
2548
+ """
2549
+
2550
+ vex = block.vex
2551
+ if vex.jumpkind != "Ijk_Boring":
2552
+ return False
2553
+ if isinstance(vex.next, pyvex.expr.Const):
2554
+ return all(isinstance(stmt, pyvex.stmt.IMark) for stmt in vex.statements)
2555
+ if isinstance(vex.next, pyvex.expr.RdTmp):
2556
+ next_tmp = vex.next.tmp
2557
+ return all(
2558
+ isinstance(stmt, pyvex.stmt.IMark)
2559
+ or (
2560
+ isinstance(stmt, pyvex.stmt.WrTmp)
2561
+ and stmt.tmp == next_tmp
2562
+ and isinstance(stmt.data, pyvex.expr.Load)
2563
+ and isinstance(stmt.data.addr, pyvex.expr.Const)
2564
+ )
2565
+ for stmt in vex.statements
2566
+ )
2567
+ return False
2568
+
2535
2569
  @staticmethod
2536
2570
  def _is_noop_block(arch: archinfo.Arch, block) -> bool:
2537
2571
  """
@@ -2755,7 +2789,7 @@ class CFGBase(Analysis):
2755
2789
  cfg_node: CFGNode,
2756
2790
  irsb: pyvex.IRSB,
2757
2791
  func_addr: int,
2758
- stmt_idx: int | str = DEFAULT_STATEMENT,
2792
+ stmt_idx: int = DEFAULT_STATEMENT,
2759
2793
  ) -> tuple[bool, set[int], IndirectJump | None]:
2760
2794
  """
2761
2795
  Called when we encounter an indirect jump. We will try to resolve this indirect jump using timeless (fast)
@@ -1782,7 +1782,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
1782
1782
  self.project.loader.discard_ro_memview()
1783
1783
 
1784
1784
  # Clean up
1785
- self._traced_addresses = None
1785
+ self._traced_addresses = None # type: ignore
1786
1786
  self._lifter_deregister_readonly_regions()
1787
1787
  self._function_returns = None
1788
1788
 
@@ -1838,6 +1838,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
1838
1838
  xrefs = self.kb.xrefs.get_xrefs_by_dst(security_cookie_addr)
1839
1839
  tested_func_addrs = set()
1840
1840
  for xref in xrefs:
1841
+ assert xref.block_addr is not None
1841
1842
  cfg_node = self.model.get_any_node(xref.block_addr)
1842
1843
  if cfg_node is None:
1843
1844
  continue
@@ -2081,13 +2082,20 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
2081
2082
 
2082
2083
  if (
2083
2084
  cfg_job.src_node is not None
2084
- and self.functions.contains_addr(cfg_job.src_node.addr)
2085
- and self.functions[cfg_job.src_node.addr].is_default_name
2086
2085
  and cfg_job.src_node.addr not in self.kb.labels
2087
2086
  and cfg_job.jumpkind == "Ijk_Boring"
2087
+ and self._is_noop_jump_block(cfg_job.src_node.block)
2088
2088
  ):
2089
- # assign a name to the caller function that jumps to this procedure
2090
- self.functions[cfg_job.src_node.addr].name = procedure.display_name
2089
+ # the caller node is very likely to be a PLT stub
2090
+ if not self.functions.contains_addr(cfg_job.src_node.addr):
2091
+ src_func = self.functions.function(addr=cfg_job.src_node.addr, create=True)
2092
+ else:
2093
+ src_func = self.functions.get_by_addr(cfg_job.src_node.addr)
2094
+ if len(src_func.block_addrs_set) <= 1 and src_func.is_default_name:
2095
+ # assign a name to the caller function that jumps to this procedure
2096
+ src_func.name = procedure.display_name
2097
+ # mark it as PLT
2098
+ src_func.is_plt = True
2091
2099
 
2092
2100
  if procedure.ADDS_EXITS:
2093
2101
  # Get two blocks ahead
@@ -3714,7 +3722,12 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
3714
3722
  #
3715
3723
 
3716
3724
  def _graph_add_edge(
3717
- self, cfg_node: CFGNode, src_node: CFGNode | None, src_jumpkind: str, src_ins_addr: int, src_stmt_idx: int
3725
+ self,
3726
+ cfg_node: CFGNode,
3727
+ src_node: CFGNode | None,
3728
+ src_jumpkind: str,
3729
+ src_ins_addr: int | None,
3730
+ src_stmt_idx: int | None,
3718
3731
  ):
3719
3732
  """
3720
3733
  Add edge between nodes, or add node if entry point
@@ -4584,6 +4597,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
4584
4597
  elif (
4585
4598
  lifted_block is not None
4586
4599
  and is_x86_x64_arch
4600
+ and lifted_block.bytes is not None
4587
4601
  and len(lifted_block.bytes) - irsb_size > 2
4588
4602
  and lifted_block.bytes[irsb_size : irsb_size + 2]
4589
4603
  in {
@@ -4659,7 +4673,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
4659
4673
  self._seg_list.occupy(real_addr + irsb_size, nodecode_size, "nodecode")
4660
4674
 
4661
4675
  # Occupy the block in segment list
4662
- if irsb.size > 0:
4676
+ if irsb is not None and irsb.size > 0:
4663
4677
  self._seg_list.occupy(real_addr, irsb.size, "code")
4664
4678
 
4665
4679
  # Create a CFG node, and add it to the graph
@@ -4969,6 +4983,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
4969
4983
 
4970
4984
  for assumption_addr in to_remove:
4971
4985
  # remove this assumption from the graph (since we may have new relationships formed later)
4986
+ assert self._decoding_assumption_relations is not None
4972
4987
  if assumption_addr in self._decoding_assumption_relations:
4973
4988
  self._decoding_assumption_relations.remove_node(assumption_addr)
4974
4989
 
@@ -5159,6 +5174,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
5159
5174
  target_func = edges[0][1]
5160
5175
  if isinstance(target_func, (HookNode, Function)) and self.project.is_hooked(target_func.addr):
5161
5176
  hooker = self.project.hooked_by(target_func.addr)
5177
+ assert hooker is not None
5162
5178
  if hooker.DYNAMIC_RET:
5163
5179
  return self._is_call_returning(callsite_cfgnode, target_func.addr)
5164
5180
 
@@ -182,23 +182,28 @@ class ConstantValueManager:
182
182
 
183
183
  # determine blocks to run FCP on
184
184
 
185
- # - include at most three levels of successors from the entrypoint
185
+ # - include at most three levels of superblock successors from the entrypoint
186
+ self.mapping = {}
186
187
  startpoint = self.func.startpoint
188
+ if startpoint is None:
189
+ return
190
+
187
191
  blocks = set()
188
- succs = [startpoint]
189
- for _ in range(3):
192
+ succ_and_levels = [(startpoint, 0)]
193
+ while succ_and_levels:
190
194
  new_succs = []
191
- for node in succs:
195
+ for node, level in succ_and_levels:
192
196
  if node in blocks:
193
197
  continue
194
198
  blocks.add(node)
195
199
  if node.addr == self.indirect_jump_addr:
196
200
  # stop at the indirect jump block
197
201
  continue
198
- new_succs += list(self.func.graph.successors(node))
199
- succs = new_succs
200
- if not succs:
201
- break
202
+ for _, succ, data in self.func.graph.out_edges(node, data=True):
203
+ new_level = level if data.get("type") == "fake_return" else level + 1
204
+ if new_level <= 3:
205
+ new_succs.append((succ, new_level))
206
+ succ_and_levels = new_succs
202
207
 
203
208
  # - include at most six levels of predecessors from the indirect jump block
204
209
  ij_block = self.func.get_node(self.indirect_jump_addr)