angr 9.2.139__py3-none-macosx_10_9_x86_64.whl → 9.2.141__py3-none-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (88) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +136 -53
  3. angr/analyses/calling_convention/fact_collector.py +44 -18
  4. angr/analyses/calling_convention/utils.py +3 -1
  5. angr/analyses/cfg/cfg_base.py +13 -0
  6. angr/analyses/cfg/cfg_fast.py +11 -0
  7. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +9 -8
  8. angr/analyses/decompiler/ail_simplifier.py +115 -72
  9. angr/analyses/decompiler/callsite_maker.py +24 -11
  10. angr/analyses/decompiler/clinic.py +78 -43
  11. angr/analyses/decompiler/decompiler.py +18 -7
  12. angr/analyses/decompiler/expression_narrower.py +1 -1
  13. angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +8 -7
  14. angr/analyses/decompiler/optimization_passes/duplication_reverter/duplication_reverter.py +3 -1
  15. angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py +21 -2
  16. angr/analyses/decompiler/optimization_passes/ite_region_converter.py +21 -13
  17. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +84 -15
  18. angr/analyses/decompiler/optimization_passes/optimization_pass.py +92 -11
  19. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +53 -9
  20. angr/analyses/decompiler/peephole_optimizations/eager_eval.py +44 -7
  21. angr/analyses/decompiler/region_identifier.py +6 -4
  22. angr/analyses/decompiler/region_simplifiers/expr_folding.py +287 -122
  23. angr/analyses/decompiler/region_simplifiers/region_simplifier.py +31 -13
  24. angr/analyses/decompiler/ssailification/rewriting.py +23 -15
  25. angr/analyses/decompiler/ssailification/rewriting_engine.py +105 -24
  26. angr/analyses/decompiler/ssailification/ssailification.py +22 -14
  27. angr/analyses/decompiler/structured_codegen/c.py +73 -137
  28. angr/analyses/decompiler/structuring/dream.py +22 -18
  29. angr/analyses/decompiler/structuring/phoenix.py +158 -41
  30. angr/analyses/decompiler/structuring/recursive_structurer.py +1 -0
  31. angr/analyses/decompiler/structuring/structurer_base.py +37 -10
  32. angr/analyses/decompiler/structuring/structurer_nodes.py +4 -1
  33. angr/analyses/decompiler/utils.py +106 -21
  34. angr/analyses/deobfuscator/api_obf_finder.py +8 -5
  35. angr/analyses/deobfuscator/api_obf_type2_finder.py +18 -10
  36. angr/analyses/deobfuscator/string_obf_finder.py +105 -18
  37. angr/analyses/forward_analysis/forward_analysis.py +1 -1
  38. angr/analyses/propagator/top_checker_mixin.py +6 -6
  39. angr/analyses/reaching_definitions/__init__.py +2 -1
  40. angr/analyses/reaching_definitions/dep_graph.py +1 -12
  41. angr/analyses/reaching_definitions/engine_vex.py +36 -31
  42. angr/analyses/reaching_definitions/function_handler.py +15 -2
  43. angr/analyses/reaching_definitions/rd_state.py +1 -37
  44. angr/analyses/reaching_definitions/reaching_definitions.py +13 -24
  45. angr/analyses/s_propagator.py +6 -41
  46. angr/analyses/s_reaching_definitions/s_rda_model.py +7 -1
  47. angr/analyses/s_reaching_definitions/s_rda_view.py +43 -25
  48. angr/analyses/stack_pointer_tracker.py +36 -22
  49. angr/analyses/typehoon/simple_solver.py +45 -7
  50. angr/analyses/typehoon/typeconsts.py +18 -5
  51. angr/analyses/variable_recovery/engine_ail.py +1 -1
  52. angr/analyses/variable_recovery/engine_base.py +7 -5
  53. angr/analyses/variable_recovery/engine_vex.py +20 -4
  54. angr/block.py +69 -107
  55. angr/callable.py +14 -7
  56. angr/calling_conventions.py +30 -11
  57. angr/distributed/__init__.py +1 -1
  58. angr/engines/__init__.py +7 -8
  59. angr/engines/engine.py +1 -120
  60. angr/engines/failure.py +2 -2
  61. angr/engines/hook.py +2 -2
  62. angr/engines/light/engine.py +2 -2
  63. angr/engines/pcode/engine.py +2 -14
  64. angr/engines/procedure.py +2 -2
  65. angr/engines/soot/engine.py +2 -2
  66. angr/engines/soot/statements/switch.py +1 -1
  67. angr/engines/successors.py +124 -11
  68. angr/engines/syscall.py +2 -2
  69. angr/engines/unicorn.py +3 -3
  70. angr/engines/vex/heavy/heavy.py +3 -15
  71. angr/factory.py +12 -22
  72. angr/knowledge_plugins/key_definitions/atoms.py +8 -4
  73. angr/knowledge_plugins/key_definitions/live_definitions.py +41 -103
  74. angr/knowledge_plugins/variables/variable_manager.py +7 -5
  75. angr/lib/angr_native.dylib +0 -0
  76. angr/sim_type.py +19 -17
  77. angr/simos/simos.py +3 -1
  78. angr/state_plugins/plugin.py +19 -4
  79. angr/storage/memory_mixins/memory_mixin.py +1 -1
  80. angr/storage/memory_mixins/paged_memory/pages/multi_values.py +10 -5
  81. angr/utils/ssa/__init__.py +119 -4
  82. angr/utils/types.py +48 -0
  83. {angr-9.2.139.dist-info → angr-9.2.141.dist-info}/METADATA +6 -6
  84. {angr-9.2.139.dist-info → angr-9.2.141.dist-info}/RECORD +88 -87
  85. {angr-9.2.139.dist-info → angr-9.2.141.dist-info}/LICENSE +0 -0
  86. {angr-9.2.139.dist-info → angr-9.2.141.dist-info}/WHEEL +0 -0
  87. {angr-9.2.139.dist-info → angr-9.2.141.dist-info}/entry_points.txt +0 -0
  88. {angr-9.2.139.dist-info → angr-9.2.141.dist-info}/top_level.txt +0 -0
angr/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
2
  # pylint: disable=wrong-import-position
3
3
  from __future__ import annotations
4
4
 
5
- __version__ = "9.2.139"
5
+ __version__ = "9.2.141"
6
6
 
7
7
  if bytes is str:
8
8
  raise Exception(
@@ -33,6 +33,7 @@ from angr.knowledge_plugins.key_definitions.rd_model import ReachingDefinitionsM
33
33
  from angr.knowledge_plugins.variables.variable_access import VariableAccessSort
34
34
  from angr.knowledge_plugins.functions import Function
35
35
  from angr.utils.constants import DEFAULT_STATEMENT
36
+ from angr.utils.ssa import get_reg_offset_base_and_size, get_reg_offset_base
36
37
  from angr import SIM_PROCEDURES
37
38
  from angr.analyses import Analysis, register_analysis, ReachingDefinitionsAnalysis
38
39
  from angr.analyses.reaching_definitions import get_all_definitions
@@ -150,6 +151,8 @@ class CallingConventionAnalysis(Analysis):
150
151
  The major analysis routine.
151
152
  """
152
153
 
154
+ assert self._function is not None
155
+
153
156
  if self._function.is_simprocedure:
154
157
  hooker = self.project.hooked_by(self._function.addr)
155
158
  if isinstance(
@@ -200,8 +203,8 @@ class CallingConventionAnalysis(Analysis):
200
203
  )
201
204
  if prototype.args:
202
205
  break
203
- self.cc = cc
204
- self.prototype = prototype
206
+ self.cc = cc # type: ignore
207
+ self.prototype = prototype # type: ignore
205
208
  return
206
209
  if self._function.is_plt:
207
210
  r = self._analyze_plt()
@@ -218,23 +221,33 @@ class CallingConventionAnalysis(Analysis):
218
221
  if self.analyze_callsites:
219
222
  # only take the first 3 because running reaching definition analysis on all functions is costly
220
223
  callsite_facts = self._extract_and_analyze_callsites(max_analyzing_callsites=3)
221
- prototype = self._adjust_prototype(
222
- prototype, callsite_facts, update_arguments=UpdateArgumentsOption.UpdateWhenCCHasNoArgs
224
+ prototype = (
225
+ self._adjust_prototype(
226
+ prototype, callsite_facts, update_arguments=UpdateArgumentsOption.UpdateWhenCCHasNoArgs
227
+ )
228
+ if prototype is not None
229
+ else None
223
230
  )
224
231
 
225
232
  self.cc = cc
226
233
  self.prototype = prototype
227
234
 
228
235
  def _analyze_callsite_only(self):
236
+ assert self.caller_func_addr is not None
237
+ assert self.callsite_block_addr is not None
238
+ assert self.callsite_insn_addr is not None
239
+ cc, prototype = None, None
240
+
229
241
  for include_callsite_preds in [False, True]:
230
- callsite_facts = [
231
- self._analyze_callsite(
232
- self.caller_func_addr,
233
- self.callsite_block_addr,
234
- self.callsite_insn_addr,
235
- include_preds=include_callsite_preds,
236
- )
237
- ]
242
+ fact = self._analyze_callsite(
243
+ self.caller_func_addr,
244
+ self.callsite_block_addr,
245
+ self.callsite_insn_addr,
246
+ include_preds=include_callsite_preds,
247
+ )
248
+ if fact is None:
249
+ continue
250
+ callsite_facts = [fact]
238
251
  cc_cls = default_cc(
239
252
  self.project.arch.name,
240
253
  platform=(
@@ -252,12 +265,13 @@ class CallingConventionAnalysis(Analysis):
252
265
  self.cc = cc
253
266
  self.prototype = prototype
254
267
 
255
- def _analyze_plt(self) -> tuple[SimCC, SimTypeFunction] | None:
268
+ def _analyze_plt(self) -> tuple[SimCC, SimTypeFunction | None] | None:
256
269
  """
257
270
  Get the calling convention for a PLT stub.
258
271
 
259
272
  :return: A calling convention.
260
273
  """
274
+ assert self._function is not None
261
275
 
262
276
  if len(self._function.jumpout_sites) != 1:
263
277
  l.warning(
@@ -283,6 +297,14 @@ class CallingConventionAnalysis(Analysis):
283
297
  real_func = None
284
298
 
285
299
  if real_func is not None:
300
+ if real_func.calling_convention is None:
301
+ cc_cls = default_cc(self.project.arch.name)
302
+ if cc_cls is None:
303
+ # can't determine the default calling convention for this architecture
304
+ return None
305
+ cc = cc_cls(self.project.arch)
306
+ else:
307
+ cc = real_func.calling_convention
286
308
  if real_func.is_simprocedure:
287
309
  if self.project.is_hooked(real_func.addr):
288
310
  # prioritize the hooker
@@ -290,17 +312,20 @@ class CallingConventionAnalysis(Analysis):
290
312
  if hooker is not None and (
291
313
  not hooker.is_stub or (hooker.is_function and not hooker.guessed_prototype)
292
314
  ):
293
- return real_func.calling_convention, hooker.prototype
294
- if real_func.calling_convention and real_func.prototype:
295
- return real_func.calling_convention, real_func.prototype
315
+ return cc, hooker.prototype
316
+ if real_func.prototype is not None:
317
+ return cc, real_func.prototype
296
318
  else:
297
- return real_func.calling_convention, real_func.prototype
319
+ return cc, real_func.prototype
298
320
 
299
321
  if self.analyze_callsites:
300
322
  # determine the calling convention by analyzing its callsites
301
323
  callsite_facts = self._extract_and_analyze_callsites(max_analyzing_callsites=1)
302
324
  cc_cls = default_cc(self.project.arch.name)
303
- cc = cc_cls(self.project.arch) if cc_cls is not None else None
325
+ if cc_cls is None:
326
+ # can't determine the default calling convention for this architecture
327
+ return None
328
+ cc = cc_cls(self.project.arch)
304
329
  prototype = SimTypeFunction([], None)
305
330
  prototype = self._adjust_prototype(
306
331
  prototype, callsite_facts, update_arguments=UpdateArgumentsOption.AlwaysUpdate
@@ -314,6 +339,7 @@ class CallingConventionAnalysis(Analysis):
314
339
  Go over the variable information in variable manager for this function, and return all uninitialized
315
340
  register/stack variables.
316
341
  """
342
+ assert self._function is not None
317
343
 
318
344
  if self._function.is_simprocedure or self._function.is_plt:
319
345
  # we do not analyze SimProcedures or PLT stubs
@@ -328,7 +354,7 @@ class CallingConventionAnalysis(Analysis):
328
354
  input_variables = vm.input_variables()
329
355
  input_args = self._args_from_vars(input_variables, vm)
330
356
  else:
331
- input_args = self._input_args
357
+ input_args = set(self._input_args)
332
358
  retval_size = self._retval_size
333
359
 
334
360
  # check if this function is a variadic function
@@ -341,8 +367,14 @@ class CallingConventionAnalysis(Analysis):
341
367
  # TODO: properly determine sp_delta
342
368
  sp_delta = self.project.arch.bytes if self.project.arch.call_pushes_ret else 0
343
369
 
344
- input_args = list(input_args) # input_args might be modified by find_cc()
345
- cc = SimCC.find_cc(self.project.arch, input_args, sp_delta, platform=self.project.simos.name)
370
+ full_input_args = self._consolidate_input_args(input_args)
371
+ full_input_args_copy = list(full_input_args) # input_args might be modified by find_cc()
372
+ cc = SimCC.find_cc(self.project.arch, full_input_args_copy, sp_delta, platform=self.project.simos.name)
373
+
374
+ # update input_args according to the difference between full_input_args and full_input_args_copy
375
+ for a in full_input_args:
376
+ if a not in full_input_args_copy and a in input_args:
377
+ input_args.remove(a)
346
378
 
347
379
  if cc is None:
348
380
  l.warning(
@@ -403,6 +435,8 @@ class CallingConventionAnalysis(Analysis):
403
435
  returns anything or not.
404
436
  """
405
437
 
438
+ assert self._function is not None
439
+
406
440
  if self._cfg is None:
407
441
  l.warning("CFG is not provided. Skip calling convention analysis at call sites.")
408
442
  return []
@@ -641,12 +675,6 @@ class CallingConventionAnalysis(Analysis):
641
675
  else:
642
676
  break
643
677
 
644
- if None in temp_args:
645
- first_none_idx = temp_args.index(None)
646
- # test if there is at least one argument set after None; if so, we ignore the first None
647
- if any(arg is not None for arg in temp_args[first_none_idx:]):
648
- temp_args[first_none_idx] = expected_args[first_none_idx]
649
-
650
678
  if None in temp_args:
651
679
  # we be very conservative here and ignore all arguments starting from the first missing one
652
680
  first_none_idx = temp_args.index(None)
@@ -656,29 +684,27 @@ class CallingConventionAnalysis(Analysis):
656
684
 
657
685
  def _adjust_prototype(
658
686
  self,
659
- proto: SimTypeFunction | None,
687
+ proto: SimTypeFunction,
660
688
  facts: list[CallSiteFact],
661
689
  update_arguments: int = UpdateArgumentsOption.DoNotUpdate,
662
- ) -> SimTypeFunction | None:
663
- if proto is None:
664
- return None
665
-
690
+ ) -> SimTypeFunction:
666
691
  # is the return value used anywhere?
667
692
  if facts:
668
693
  if all(fact.return_value_used is False for fact in facts):
669
694
  proto.returnty = SimTypeBottom(label="void")
670
695
  else:
671
- proto.returnty = SimTypeInt().with_arch(self.project.arch)
696
+ if proto.returnty is None or isinstance(proto.returnty, SimTypeBottom):
697
+ proto.returnty = SimTypeInt().with_arch(self.project.arch)
672
698
 
673
699
  if (
674
700
  update_arguments == UpdateArgumentsOption.AlwaysUpdate
675
701
  or (update_arguments == UpdateArgumentsOption.UpdateWhenCCHasNoArgs and not proto.args)
676
702
  ) and len({len(fact.args) for fact in facts}) == 1:
677
703
  fact = next(iter(facts))
678
- proto.args = [
704
+ proto.args = tuple(
679
705
  self._guess_arg_type(arg) if arg is not None else SimTypeInt().with_arch(self.project.arch)
680
706
  for arg in fact.args
681
- ]
707
+ )
682
708
 
683
709
  return proto
684
710
 
@@ -691,6 +717,8 @@ class CallingConventionAnalysis(Analysis):
691
717
  :return:
692
718
  """
693
719
 
720
+ assert self._function is not None
721
+
694
722
  args = set()
695
723
  ret_addr_offset = 0 if not self.project.arch.call_pushes_ret else self.project.arch.bytes
696
724
 
@@ -715,13 +743,8 @@ class CallingConventionAnalysis(Analysis):
715
743
  # a register variable, convert it to a register argument
716
744
  if not is_sane_register_variable(self.project.arch, variable.reg, variable.size, def_cc=def_cc):
717
745
  continue
718
- if self.project.arch.name in {"AMD64", "X86"} and variable.size < self.project.arch.bytes:
719
- # use complete registers on AMD64 and X86
720
- reg_name = self.project.arch.translate_register_name(variable.reg, size=self.project.arch.bytes)
721
- arg = SimRegArg(reg_name, self.project.arch.bytes)
722
- else:
723
- reg_name = self.project.arch.translate_register_name(variable.reg, size=variable.size)
724
- arg = SimRegArg(reg_name, variable.size)
746
+ reg_name = self.project.arch.translate_register_name(variable.reg, size=variable.size)
747
+ arg = SimRegArg(reg_name, variable.size)
725
748
  args.add(arg)
726
749
 
727
750
  accesses = var_manager.get_variable_accesses(variable)
@@ -763,15 +786,58 @@ class CallingConventionAnalysis(Analysis):
763
786
 
764
787
  return args.difference(restored_reg_vars)
765
788
 
766
- def _reorder_args(self, args: list[SimRegArg | SimStackArg], cc: SimCC) -> list[SimRegArg | SimStackArg]:
789
+ def _consolidate_input_args(self, input_args: set[SimRegArg | SimStackArg]) -> set[SimRegArg | SimStackArg]:
790
+ """
791
+ Consolidate register arguments by converting partial registers to full registers on certain architectures.
792
+
793
+ :param input_args: A set of input arguments.
794
+ :return: A set of consolidated input args.
795
+ """
796
+
797
+ if self.project.arch.name in {"AMD64", "X86"}:
798
+ new_input_args = set()
799
+ for a in input_args:
800
+ if isinstance(a, SimRegArg) and a.size < self.project.arch.bytes:
801
+ # use complete registers on AMD64 and X86
802
+ reg_offset, reg_size = self.project.arch.registers[a.reg_name]
803
+ full_reg_offset, full_reg_size = get_reg_offset_base_and_size(
804
+ reg_offset, self.project.arch, size=reg_size
805
+ )
806
+ full_reg_name = self.project.arch.translate_register_name(full_reg_offset, size=full_reg_size)
807
+ arg = SimRegArg(full_reg_name, full_reg_size)
808
+ if arg not in new_input_args:
809
+ new_input_args.add(arg)
810
+ else:
811
+ new_input_args.add(a)
812
+ return new_input_args
813
+
814
+ return input_args
815
+
816
+ def _reorder_args(self, args: set[SimRegArg | SimStackArg], cc: SimCC) -> list[SimRegArg | SimStackArg]:
767
817
  """
768
818
  Reorder arguments according to the calling convention identified.
769
819
 
770
- :param args: A list of arguments that haven't been ordered.
820
+ :param args: A set of arguments that haven't been ordered.
771
821
  :param cc: The identified calling convention.
772
822
  :return: A reordered list of args.
773
823
  """
774
824
 
825
+ def _is_same_reg(rn0: str, rn1: str) -> bool:
826
+ """
827
+ Check if rn0 and rn1 belong to the same base register.
828
+
829
+ :param rn0: Register name of the first register.
830
+ :param rn1: Register name of the second register.
831
+ :return: True if they belong to the same base register; False otherwise.
832
+ """
833
+ if rn0 == rn1:
834
+ return True
835
+ off0, sz0 = self.project.arch.registers[rn0]
836
+ full_off0 = get_reg_offset_base(off0, self.project.arch, sz0)
837
+ off1, sz1 = self.project.arch.registers[rn1]
838
+ full_off1 = get_reg_offset_base(off1, self.project.arch, sz1)
839
+ return full_off0 == full_off1
840
+
775
841
  reg_args = []
776
842
 
777
843
  # split args into two lists
@@ -790,7 +856,7 @@ class CallingConventionAnalysis(Analysis):
790
856
  # match int args first
791
857
  for reg_name in cc.ARG_REGS:
792
858
  try:
793
- arg = next(iter(a for a in int_args if isinstance(a, SimRegArg) and a.reg_name == reg_name))
859
+ arg = next(iter(a for a in int_args if isinstance(a, SimRegArg) and _is_same_reg(a.reg_name, reg_name)))
794
860
  except StopIteration:
795
861
  # have we reached the end of the args list?
796
862
  if [a for a in int_args if isinstance(a, SimRegArg)] or len(stack_int_args) > 0:
@@ -806,7 +872,9 @@ class CallingConventionAnalysis(Analysis):
806
872
  if fp_args:
807
873
  for reg_name in cc.FP_ARG_REGS:
808
874
  try:
809
- arg = next(iter(a for a in fp_args if isinstance(a, SimRegArg) and a.reg_name == reg_name))
875
+ arg = next(
876
+ iter(a for a in fp_args if isinstance(a, SimRegArg) and _is_same_reg(a.reg_name, reg_name))
877
+ )
810
878
  except StopIteration:
811
879
  # have we reached the end of the args list?
812
880
  if [a for a in fp_args if isinstance(a, SimRegArg)] or len(stack_fp_args) > 0:
@@ -839,17 +907,27 @@ class CallingConventionAnalysis(Analysis):
839
907
  return SimTypeBottom()
840
908
 
841
909
  def _guess_retval_type(self, cc: SimCC, ret_val_size: int | None) -> SimType:
910
+ assert self._function is not None
911
+
842
912
  if cc.FP_RETURN_VAL and self._function.ret_sites:
843
913
  # examine the last block of the function and see which registers are assigned to
844
914
  for ret_block in self._function.ret_sites:
915
+ fpretval_updated, retval_updated = False, False
916
+ fp_reg_size = 0
845
917
  irsb = self.project.factory.block(ret_block.addr, size=ret_block.size).vex
846
918
  for stmt in irsb.statements:
847
919
  if isinstance(stmt, Put) and isinstance(stmt.data, RdTmp):
848
- reg_size = irsb.tyenv.sizeof(stmt.data.tmp) // self.project.arch.byte_width
920
+ reg_size = irsb.tyenv.sizeof(stmt.data.tmp) // self.project.arch.byte_width # type: ignore
849
921
  reg_name = self.project.arch.translate_register_name(stmt.offset, size=reg_size)
850
- if reg_name == cc.FP_RETURN_VAL.reg_name:
851
- # possibly float
852
- return SimTypeFloat() if reg_size == 4 else SimTypeDouble()
922
+ if isinstance(cc.FP_RETURN_VAL, SimRegArg) and reg_name == cc.FP_RETURN_VAL.reg_name:
923
+ fpretval_updated = True
924
+ fp_reg_size = reg_size
925
+ elif isinstance(cc.RETURN_VAL, SimRegArg) and reg_name == cc.RETURN_VAL.reg_name:
926
+ retval_updated = True
927
+
928
+ if fpretval_updated and not retval_updated:
929
+ # possibly float
930
+ return SimTypeFloat() if fp_reg_size == 4 else SimTypeDouble()
853
931
 
854
932
  if ret_val_size is not None:
855
933
  if ret_val_size == 1:
@@ -861,12 +939,15 @@ class CallingConventionAnalysis(Analysis):
861
939
  if 5 <= ret_val_size <= 8:
862
940
  return SimTypeLongLong()
863
941
 
864
- # fallback
865
- return SimTypeInt() if cc.arch.bits == 32 else SimTypeLongLong()
942
+ return SimTypeBottom(label="void")
866
943
 
867
944
  @staticmethod
868
945
  def _likely_saving_temp_reg(ail_block: ailment.Block, d: Definition, all_reg_defs: set[Definition]) -> bool:
869
- if d.codeloc.block_addr == ail_block.addr and d.codeloc.stmt_idx < len(ail_block.statements):
946
+ if (
947
+ d.codeloc.block_addr == ail_block.addr
948
+ and d.codeloc.stmt_idx is not None
949
+ and d.codeloc.stmt_idx < len(ail_block.statements)
950
+ ):
870
951
  stmt = ail_block.statements[d.codeloc.stmt_idx]
871
952
  if isinstance(stmt, ailment.Stmt.Assignment) and isinstance(stmt.src, ailment.Expr.Register):
872
953
  src_offset = stmt.src.reg_offset
@@ -884,6 +965,8 @@ class CallingConventionAnalysis(Analysis):
884
965
  # TODO: Use a better pattern matching approach
885
966
  if len(func.block_addrs_set) < 3:
886
967
  return False, None
968
+ if func.startpoint is None:
969
+ return False, None
887
970
 
888
971
  head = func.startpoint
889
972
  out_edges = list(func.transition_graph.out_edges(head, data=True))
@@ -90,7 +90,7 @@ binop_handler = SimEngineNostmtVEX[FactCollectorState, claripy.ast.BV, FactColle
90
90
 
91
91
  class SimEngineFactCollectorVEX(
92
92
  SimEngineNostmtVEX[FactCollectorState, SpOffset | RegisterOffset | int, None],
93
- SimEngineLight[type[FactCollectorState], SpOffset | RegisterOffset | int, Block, None],
93
+ SimEngineLight[FactCollectorState, SpOffset | RegisterOffset | int, Block, None],
94
94
  ):
95
95
  """
96
96
  THe engine for FactCollector.
@@ -101,7 +101,7 @@ class SimEngineFactCollectorVEX(
101
101
  super().__init__(project)
102
102
 
103
103
  def _process_block_end(self, stmt_result: list, whitelist: set[int] | None) -> None:
104
- if self.block.vex.jumpkind == "Ijk_Call":
104
+ if self.block.vex.jumpkind == "Ijk_Call" and self.arch.ret_offset is not None:
105
105
  self.state.register_written(self.arch.ret_offset, self.arch.bytes)
106
106
 
107
107
  def _top(self, bits: int):
@@ -110,7 +110,7 @@ class SimEngineFactCollectorVEX(
110
110
  def _is_top(self, expr: Any) -> bool:
111
111
  raise NotImplementedError
112
112
 
113
- def _handle_conversion(self, from_size: int, to_size: int, signed: bool, operand: pyvex.IRExpr) -> Any:
113
+ def _handle_conversion(self, from_size: int, to_size: int, signed: bool, operand: pyvex.expr.IRExpr) -> Any:
114
114
  return None
115
115
 
116
116
  def _handle_stmt_Put(self, stmt):
@@ -142,9 +142,9 @@ class SimEngineFactCollectorVEX(
142
142
  return expr.con.value
143
143
 
144
144
  def _handle_expr_GSPTR(self, expr):
145
- return None
145
+ return 0
146
146
 
147
- def _handle_expr_Get(self, expr) -> SpOffset | None:
147
+ def _handle_expr_Get(self, expr) -> SpOffset | RegisterOffset:
148
148
  if expr.offset == self.arch.sp_offset:
149
149
  return SpOffset(self.arch.bits, self.state.sp_value, is_base=False)
150
150
  if expr.offset == self.arch.bp_offset and not self.bp_as_gpr:
@@ -304,7 +304,10 @@ class FactCollector(Analysis):
304
304
 
305
305
  def _handle_function(self, state: FactCollectorState, func: Function) -> None:
306
306
  try:
307
- arg_locs = func.calling_convention.arg_locs(func.prototype)
307
+ if func.calling_convention is not None and func.prototype is not None:
308
+ arg_locs = func.calling_convention.arg_locs(func.prototype)
309
+ else:
310
+ return
308
311
  except (TypeError, ValueError):
309
312
  return
310
313
 
@@ -355,6 +358,7 @@ class FactCollector(Analysis):
355
358
 
356
359
  if isinstance(node, BlockNode) and node.size == 0:
357
360
  continue
361
+
358
362
  if isinstance(node, HookNode):
359
363
  # attempt to convert it into a function
360
364
  if self.kb.functions.contains_addr(node.addr):
@@ -369,17 +373,43 @@ class FactCollector(Analysis):
369
373
  and not isinstance(node.prototype.returnty, SimTypeBottom)
370
374
  ):
371
375
  # assume the function overwrites the return variable
372
- retval_size = (
373
- node.prototype.returnty.with_arch(self.project.arch).size // self.project.arch.byte_width
374
- )
376
+ returnty_size = node.prototype.returnty.with_arch(self.project.arch).size
377
+ assert returnty_size is not None
378
+ retval_size = returnty_size // self.project.arch.byte_width
375
379
  retval_sizes.append(retval_size)
376
380
  continue
377
381
 
382
+ # if this block ends with a call to a function, we process the function first
383
+ func_succs = [
384
+ succ
385
+ for succ in func_graph.successors(node)
386
+ if isinstance(succ, (Function, HookNode)) or self.kb.functions.contains_addr(succ.addr)
387
+ ]
388
+ if len(func_succs) == 1:
389
+ func_succ = func_succs[0]
390
+ if isinstance(func_succ, (BlockNode, HookNode)) and self.kb.functions.contains_addr(func_succ.addr):
391
+ # attempt to convert it into a function
392
+ func_succ = self.kb.functions.get_by_addr(func_succ.addr)
393
+ if isinstance(func_succ, Function):
394
+ if (
395
+ func_succ.calling_convention is not None
396
+ and func_succ.prototype is not None
397
+ and func_succ.prototype.returnty is not None
398
+ and not isinstance(func_succ.prototype.returnty, SimTypeBottom)
399
+ ):
400
+ # assume the function overwrites the return variable
401
+ returnty_size = func_succ.prototype.returnty.with_arch(self.project.arch).size
402
+ assert returnty_size is not None
403
+ retval_size = returnty_size // self.project.arch.byte_width
404
+ retval_sizes.append(retval_size)
405
+ continue
406
+
378
407
  block = self.project.factory.block(node.addr, size=node.size)
379
408
  # scan the block statements backwards to find writes to the return value register
380
409
  retval_size = None
381
410
  for stmt in reversed(block.vex.statements):
382
411
  if isinstance(stmt, pyvex.IRStmt.Put):
412
+ assert block.vex.tyenv is not None
383
413
  size = stmt.data.result_size(block.vex.tyenv) // self.project.arch.byte_width
384
414
  if stmt.offset == retreg_offset:
385
415
  retval_size = max(size, 1)
@@ -391,9 +421,9 @@ class FactCollector(Analysis):
391
421
  for pred, _, data in func_graph.in_edges(node, data=True):
392
422
  edge_type = data.get("type")
393
423
  if pred not in traversed and depth + 1 <= self._max_depth:
394
- if edge_type == "fake_return":
424
+ if edge_type == "call":
395
425
  continue
396
- if edge_type in {"transition", "call"}:
426
+ if edge_type in {"transition", "fake_return"}:
397
427
  queue.append((depth + 1, pred))
398
428
 
399
429
  self.retval_size = max(retval_sizes) if retval_sizes else None
@@ -472,6 +502,7 @@ class FactCollector(Analysis):
472
502
  ):
473
503
  tmps[stmt.tmp] = "sp"
474
504
  if isinstance(stmt, pyvex.IRStmt.Put):
505
+ assert block.vex.tyenv is not None
475
506
  size = stmt.data.result_size(block.vex.tyenv) // self.project.arch.byte_width
476
507
  # is the data loaded from the stack?
477
508
  if (
@@ -532,13 +563,8 @@ class FactCollector(Analysis):
532
563
  ):
533
564
  continue
534
565
  reg_offset_created.add(offset)
535
- if self.project.arch.name in {"AMD64", "X86"} and size < self.project.arch.bytes:
536
- # use complete registers on AMD64 and X86
537
- reg_name = self.project.arch.translate_register_name(offset, size=self.project.arch.bytes)
538
- arg = SimRegArg(reg_name, self.project.arch.bytes)
539
- else:
540
- reg_name = self.project.arch.translate_register_name(offset, size=size)
541
- arg = SimRegArg(reg_name, size)
566
+ reg_name = self.project.arch.translate_register_name(offset, size=size)
567
+ arg = SimRegArg(reg_name, size)
542
568
  self.input_args.append(arg)
543
569
 
544
570
  stack_offset_created = set()
@@ -9,7 +9,9 @@ from angr.calling_conventions import SimCC
9
9
  l = logging.getLogger(__name__)
10
10
 
11
11
 
12
- def is_sane_register_variable(arch: archinfo.Arch, reg_offset: int, reg_size: int, def_cc: SimCC | None = None) -> bool:
12
+ def is_sane_register_variable(
13
+ arch: archinfo.Arch, reg_offset: int, reg_size: int, def_cc: SimCC | type[SimCC] | None = None
14
+ ) -> bool:
13
15
  """
14
16
  Filters all registers that are surly not members of function arguments.
15
17
  This can be seen as a workaround, since VariableRecoveryFast sometimes gives input variables of cc_ndep (which
@@ -1421,6 +1421,19 @@ class CFGBase(Analysis):
1421
1421
  # We gotta create a new one
1422
1422
  l.error("normalize(): Please report it to Fish.")
1423
1423
 
1424
+ # update the jump tables dict and the indirect jumps dict
1425
+ if smallest_node.addr not in self.model.jump_tables:
1426
+ for n in other_nodes:
1427
+ if n.addr in self.model.jump_tables:
1428
+ self.model.jump_tables[n.addr].addr = smallest_node.addr
1429
+ self.model.jump_tables[smallest_node.addr] = self.model.jump_tables[n.addr]
1430
+ break
1431
+ if smallest_node.addr not in self.indirect_jumps:
1432
+ for n in other_nodes:
1433
+ if n.addr in self.indirect_jumps:
1434
+ self.indirect_jumps[n.addr].addr = smallest_node.addr
1435
+ self.indirect_jumps[smallest_node.addr] = self.indirect_jumps[n.addr]
1436
+ break
1424
1437
  # deal with duplicated entries in self.jump_tables and self.indirect_jumps
1425
1438
  if smallest_node.addr in self.model.jump_tables:
1426
1439
  for n in other_nodes:
@@ -1524,6 +1524,17 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
1524
1524
  }:
1525
1525
  func.info["is_rust_probestack"] = True
1526
1526
 
1527
+ # determine if the function is __alloca_probe
1528
+ if func is not None and len(func.block_addrs_set) == 4:
1529
+ block_bytes = {func.get_block(block_addr).bytes for block_addr in func.block_addrs_set}
1530
+ if block_bytes == {
1531
+ b"H\x83\xec\x10L\x89\x14$L\x89\\$\x08M3\xdbL\x8dT$\x18L+\xd0M\x0fB\xd3eL\x8b\x1c%\x10\x00\x00\x00M;\xd3s\x16",
1532
+ b"fA\x81\xe2\x00\xf0M\x8d\x9b\x00\xf0\xff\xffA\xc6\x03\x00M;\xd3u\xf0",
1533
+ b"M\x8d\x9b\x00\xf0\xff\xffA\xc6\x03\x00M;\xd3u\xf0",
1534
+ b"L\x8b\x14$L\x8b\\$\x08H\x83\xc4\x10\xc3",
1535
+ }:
1536
+ func.info["is_alloca_probe"] = True
1537
+
1527
1538
  if self._collect_data_ref and self.project is not None and ":" in self.project.arch.name:
1528
1539
  # this is a pcode arch - use Clinic to recover data references
1529
1540
 
@@ -182,23 +182,24 @@ class ConstantValueManager:
182
182
 
183
183
  # determine blocks to run FCP on
184
184
 
185
- # - include at most three levels of successors from the entrypoint
185
+ # - include at most three levels of superblock successors from the entrypoint
186
186
  startpoint = self.func.startpoint
187
187
  blocks = set()
188
- succs = [startpoint]
189
- for _ in range(3):
188
+ succ_and_levels = [(startpoint, 0)]
189
+ while succ_and_levels:
190
190
  new_succs = []
191
- for node in succs:
191
+ for node, level in succ_and_levels:
192
192
  if node in blocks:
193
193
  continue
194
194
  blocks.add(node)
195
195
  if node.addr == self.indirect_jump_addr:
196
196
  # stop at the indirect jump block
197
197
  continue
198
- new_succs += list(self.func.graph.successors(node))
199
- succs = new_succs
200
- if not succs:
201
- break
198
+ for _, succ, data in self.func.graph.out_edges(node, data=True):
199
+ new_level = level if data.get("type") == "fake_return" else level + 1
200
+ if new_level <= 3:
201
+ new_succs.append((succ, new_level))
202
+ succ_and_levels = new_succs
202
203
 
203
204
  # - include at most six levels of predecessors from the indirect jump block
204
205
  ij_block = self.func.get_node(self.indirect_jump_addr)