angr 9.2.142__py3-none-win_amd64.whl → 9.2.144__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (61) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +22 -10
  3. angr/analyses/calling_convention/fact_collector.py +72 -14
  4. angr/analyses/cfg/cfg_base.py +7 -2
  5. angr/analyses/cfg/cfg_emulated.py +13 -4
  6. angr/analyses/cfg/cfg_fast.py +21 -60
  7. angr/analyses/cfg/indirect_jump_resolvers/__init__.py +2 -0
  8. angr/analyses/cfg/indirect_jump_resolvers/const_resolver.py +12 -1
  9. angr/analyses/cfg/indirect_jump_resolvers/constant_value_manager.py +107 -0
  10. angr/analyses/cfg/indirect_jump_resolvers/default_resolvers.py +2 -1
  11. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +6 -102
  12. angr/analyses/cfg/indirect_jump_resolvers/syscall_resolver.py +92 -0
  13. angr/analyses/complete_calling_conventions.py +18 -5
  14. angr/analyses/decompiler/ail_simplifier.py +95 -65
  15. angr/analyses/decompiler/clinic.py +162 -68
  16. angr/analyses/decompiler/decompiler.py +4 -4
  17. angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +1 -1
  18. angr/analyses/decompiler/optimization_passes/condition_constprop.py +49 -14
  19. angr/analyses/decompiler/optimization_passes/ite_region_converter.py +8 -0
  20. angr/analyses/decompiler/optimization_passes/optimization_pass.py +5 -5
  21. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +5 -0
  22. angr/analyses/decompiler/peephole_optimizations/__init__.py +2 -0
  23. angr/analyses/decompiler/peephole_optimizations/a_sub_a_shr_const_shr_const.py +37 -0
  24. angr/analyses/decompiler/peephole_optimizations/simplify_pc_relative_loads.py +15 -1
  25. angr/analyses/decompiler/sequence_walker.py +8 -0
  26. angr/analyses/decompiler/ssailification/rewriting_engine.py +2 -0
  27. angr/analyses/decompiler/ssailification/ssailification.py +10 -2
  28. angr/analyses/decompiler/ssailification/traversal_engine.py +17 -2
  29. angr/analyses/decompiler/structured_codegen/c.py +25 -4
  30. angr/analyses/decompiler/utils.py +13 -0
  31. angr/analyses/disassembly.py +3 -3
  32. angr/analyses/fcp/fcp.py +1 -4
  33. angr/analyses/s_propagator.py +40 -29
  34. angr/analyses/s_reaching_definitions/s_rda_model.py +45 -36
  35. angr/analyses/s_reaching_definitions/s_rda_view.py +6 -3
  36. angr/analyses/s_reaching_definitions/s_reaching_definitions.py +41 -42
  37. angr/analyses/typehoon/dfa.py +13 -3
  38. angr/analyses/typehoon/typehoon.py +60 -18
  39. angr/analyses/typehoon/typevars.py +11 -7
  40. angr/analyses/variable_recovery/engine_ail.py +19 -23
  41. angr/analyses/variable_recovery/engine_base.py +26 -30
  42. angr/analyses/variable_recovery/variable_recovery_fast.py +17 -21
  43. angr/calling_conventions.py +18 -8
  44. angr/knowledge_plugins/functions/function.py +29 -15
  45. angr/knowledge_plugins/key_definitions/constants.py +2 -2
  46. angr/knowledge_plugins/key_definitions/liveness.py +4 -4
  47. angr/lib/angr_native.dll +0 -0
  48. angr/procedures/definitions/linux_kernel.py +5 -0
  49. angr/state_plugins/unicorn_engine.py +24 -8
  50. angr/storage/memory_mixins/paged_memory/page_backer_mixins.py +1 -2
  51. angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +2 -2
  52. angr/utils/doms.py +40 -33
  53. angr/utils/graph.py +26 -20
  54. angr/utils/ssa/__init__.py +21 -14
  55. angr/utils/ssa/vvar_uses_collector.py +2 -2
  56. {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/METADATA +11 -8
  57. {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/RECORD +61 -58
  58. {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/WHEEL +1 -1
  59. {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/LICENSE +0 -0
  60. {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/entry_points.txt +0 -0
  61. {angr-9.2.142.dist-info → angr-9.2.144.dist-info}/top_level.txt +0 -0
angr/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
2
  # pylint: disable=wrong-import-position
3
3
  from __future__ import annotations
4
4
 
5
- __version__ = "9.2.142"
5
+ __version__ = "9.2.144"
6
6
 
7
7
  if bytes is str:
8
8
  raise Exception(
@@ -220,9 +220,9 @@ class CallingConventionAnalysis(Analysis):
220
220
  self.prototype = prototype # type: ignore
221
221
  return
222
222
  if self._function.is_plt:
223
- r = self._analyze_plt()
224
- if r is not None:
225
- self.cc, self.prototype = r
223
+ r_plt = self._analyze_plt()
224
+ if r_plt is not None:
225
+ self.cc, self.prototype, self.prototype_libname = r_plt
226
226
  return
227
227
 
228
228
  r = self._analyze_function()
@@ -278,11 +278,11 @@ class CallingConventionAnalysis(Analysis):
278
278
  self.cc = cc
279
279
  self.prototype = prototype
280
280
 
281
- def _analyze_plt(self) -> tuple[SimCC, SimTypeFunction | None] | None:
281
+ def _analyze_plt(self) -> tuple[SimCC, SimTypeFunction | None, str | None] | None:
282
282
  """
283
283
  Get the calling convention for a PLT stub.
284
284
 
285
- :return: A calling convention.
285
+ :return: A calling convention, the function type, as well as the library name if available.
286
286
  """
287
287
  assert self._function is not None
288
288
 
@@ -326,11 +326,11 @@ class CallingConventionAnalysis(Analysis):
326
326
  # we only take the prototype from the SimProcedure if
327
327
  # - the SimProcedure is a function
328
328
  # - the prototype of the SimProcedure is not guessed
329
- return cc, hooker.prototype
329
+ return cc, hooker.prototype, hooker.library_name
330
330
  if real_func.prototype is not None:
331
- return cc, real_func.prototype
331
+ return cc, real_func.prototype, real_func.prototype_libname
332
332
  else:
333
- return cc, real_func.prototype
333
+ return cc, real_func.prototype, real_func.prototype_libname
334
334
 
335
335
  if self.analyze_callsites:
336
336
  # determine the calling convention by analyzing its callsites
@@ -344,7 +344,7 @@ class CallingConventionAnalysis(Analysis):
344
344
  prototype = self._adjust_prototype(
345
345
  prototype, callsite_facts, update_arguments=UpdateArgumentsOption.AlwaysUpdate
346
346
  )
347
- return cc, prototype
347
+ return cc, prototype, None
348
348
 
349
349
  return None
350
350
 
@@ -864,7 +864,19 @@ class CallingConventionAnalysis(Analysis):
864
864
  else:
865
865
  int_args.append(arg)
866
866
 
867
- stack_args = sorted([a for a in args if isinstance(a, SimStackArg)], key=lambda a: a.stack_offset)
867
+ initial_stack_args = sorted([a for a in args if isinstance(a, SimStackArg)], key=lambda a: a.stack_offset)
868
+ # ensure stack args are consecutive if necessary
869
+ if cc.STACKARG_SP_DIFF is not None and initial_stack_args:
870
+ arg_by_offset = {a.stack_offset: a for a in initial_stack_args}
871
+ init_stackarg_offset = cc.STACKARG_SP_DIFF + cc.STACKARG_SP_BUFF
872
+ int_arg_size = self.project.arch.bytes
873
+ for stackarg_offset in range(init_stackarg_offset, max(arg_by_offset), int_arg_size):
874
+ if stackarg_offset not in arg_by_offset:
875
+ arg_by_offset[stackarg_offset] = SimStackArg(stackarg_offset, int_arg_size)
876
+ stack_args = [arg_by_offset[offset] for offset in sorted(arg_by_offset)]
877
+ else:
878
+ stack_args = initial_stack_args
879
+
868
880
  stack_int_args = [a for a in stack_args if not a.is_fp]
869
881
  stack_fp_args = [a for a in stack_args if a.is_fp]
870
882
  # match int args first
@@ -1,10 +1,12 @@
1
1
  # pylint:disable=too-many-boolean-expressions
2
2
  from __future__ import annotations
3
- from typing import Any
3
+ from typing import Any, TYPE_CHECKING
4
+ from collections import defaultdict
4
5
 
5
6
  import pyvex
6
7
  import claripy
7
8
 
9
+ from angr import SIM_LIBRARIES, SIM_TYPE_COLLECTIONS
8
10
  from angr.utils.bits import s2u, u2s
9
11
  from angr.block import Block
10
12
  from angr.analyses.analysis import Analysis
@@ -13,9 +15,12 @@ from angr.knowledge_plugins.functions import Function
13
15
  from angr.codenode import BlockNode, HookNode
14
16
  from angr.engines.light import SimEngineNostmtVEX, SimEngineLight, SpOffset, RegisterOffset
15
17
  from angr.calling_conventions import SimRegArg, SimStackArg, default_cc
16
- from angr.sim_type import SimTypeBottom
18
+ from angr.sim_type import SimTypeBottom, dereference_simtype, SimTypeFunction
17
19
  from .utils import is_sane_register_variable
18
20
 
21
+ if TYPE_CHECKING:
22
+ from angr.codenode import CodeNode
23
+
19
24
 
20
25
  class FactCollectorState:
21
26
  """
@@ -26,6 +31,7 @@ class FactCollectorState:
26
31
  "bp_value",
27
32
  "callee_stored_regs",
28
33
  "reg_reads",
34
+ "reg_reads_count",
29
35
  "reg_writes",
30
36
  "simple_stack",
31
37
  "sp_value",
@@ -40,6 +46,7 @@ class FactCollectorState:
40
46
 
41
47
  self.callee_stored_regs: dict[int, int] = {} # reg offset -> stack offset
42
48
  self.reg_reads = {}
49
+ self.reg_reads_count = defaultdict(int)
43
50
  self.reg_writes: set[int] = set()
44
51
  self.stack_reads = {}
45
52
  self.stack_writes: set[int] = set()
@@ -47,6 +54,7 @@ class FactCollectorState:
47
54
  self.bp_value = 0
48
55
 
49
56
  def register_read(self, offset: int, size_in_bytes: int):
57
+ self.reg_reads_count[offset] += 1
50
58
  if offset in self.reg_writes:
51
59
  return
52
60
  if offset not in self.reg_reads:
@@ -54,6 +62,14 @@ class FactCollectorState:
54
62
  else:
55
63
  self.reg_reads[offset] = max(self.reg_reads[offset], size_in_bytes)
56
64
 
65
+ def register_read_undo(self, offset: int) -> None:
66
+ if offset not in self.reg_reads or offset not in self.reg_reads_count:
67
+ return
68
+ self.reg_reads_count[offset] -= 1
69
+ if self.reg_reads_count[offset] == 0:
70
+ self.reg_reads.pop(offset)
71
+ self.reg_reads_count.pop(offset)
72
+
57
73
  def register_written(self, offset: int, size_in_bytes: int):
58
74
  for o in range(size_in_bytes):
59
75
  self.reg_writes.add(offset + o)
@@ -80,6 +96,7 @@ class FactCollectorState:
80
96
  new_state.sp_value = self.sp_value
81
97
  new_state.bp_value = self.bp_value
82
98
  new_state.simple_stack = self.simple_stack.copy()
99
+ new_state.reg_reads_count = self.reg_reads_count.copy()
83
100
  if with_tmps:
84
101
  new_state.tmps = self.tmps.copy()
85
102
  return new_state
@@ -115,6 +132,26 @@ class SimEngineFactCollectorVEX(
115
132
 
116
133
  def _handle_stmt_Put(self, stmt):
117
134
  v = self._expr(stmt.data)
135
+ # there are cases like VMOV.F32 S0, S0
136
+ # so we need to check if this register write is actually a no-op
137
+ if isinstance(stmt.data, pyvex.IRExpr.RdTmp):
138
+ t = self.state.tmps.get(stmt.data.tmp, None)
139
+ if isinstance(t, RegisterOffset) and t.reg == stmt.offset:
140
+ same_ins_read = False
141
+ for i in range(self.stmt_idx, -1, -1):
142
+ if i >= self.block.vex.stmts_used:
143
+ break
144
+ prev_stmt = self.block.vex.statements[i]
145
+ if isinstance(prev_stmt, pyvex.IRStmt.IMark):
146
+ break
147
+ if isinstance(prev_stmt, pyvex.IRStmt.WrTmp) and prev_stmt.tmp == stmt.data.tmp:
148
+ same_ins_read = True
149
+ break
150
+ if same_ins_read:
151
+ # we need to revert the read operation as well
152
+ self.state.register_read_undo(stmt.offset)
153
+ return
154
+
118
155
  if stmt.offset == self.arch.sp_offset and isinstance(v, SpOffset):
119
156
  self.state.sp_value = v.offset
120
157
  elif stmt.offset == self.arch.bp_offset and isinstance(v, SpOffset):
@@ -206,7 +243,7 @@ class FactCollector(Analysis):
206
243
  decision on the calling convention and prototype of a function.
207
244
  """
208
245
 
209
- def __init__(self, func: Function, max_depth: int = 5):
246
+ def __init__(self, func: Function, max_depth: int = 30):
210
247
  self.function = func
211
248
  self._max_depth = max_depth
212
249
 
@@ -224,9 +261,12 @@ class FactCollector(Analysis):
224
261
  callee_restored_regs = self._analyze_endpoints_for_restored_regs()
225
262
  self._determine_input_args(end_states, callee_restored_regs)
226
263
 
227
- def _analyze_startpoint(self):
264
+ def _analyze_startpoint(self) -> list[FactCollectorState]:
228
265
  func_graph = self.function.transition_graph
229
266
  startpoint = self.function.startpoint
267
+ if startpoint is None:
268
+ return []
269
+
230
270
  bp_as_gpr = self.function.info.get("bp_as_gpr", False)
231
271
  engine = SimEngineFactCollectorVEX(self.project, bp_as_gpr)
232
272
  init_state = FactCollectorState()
@@ -235,9 +275,9 @@ class FactCollector(Analysis):
235
275
  init_state.bp_value = init_state.sp_value
236
276
 
237
277
  traversed = set()
238
- queue: list[tuple[int, FactCollectorState, BlockNode | HookNode | Function, BlockNode | HookNode | None]] = [
239
- (0, init_state, startpoint, None)
240
- ]
278
+ queue: list[
279
+ tuple[int, FactCollectorState, CodeNode | BlockNode | HookNode | Function, BlockNode | HookNode | None]
280
+ ] = [(0, init_state, startpoint, None)]
241
281
  end_states: list[FactCollectorState] = []
242
282
  while queue:
243
283
  depth, state, node, retnode = queue.pop(0)
@@ -278,14 +318,17 @@ class FactCollector(Analysis):
278
318
  for _, succ, data in func_graph.out_edges(node, data=True):
279
319
  edge_type = data.get("type")
280
320
  outside = data.get("outside", False)
281
- if succ not in traversed and depth + 1 <= self._max_depth:
321
+ if depth + 1 <= self._max_depth:
282
322
  if edge_type == "fake_return":
283
- ret_succ = succ
323
+ if succ not in traversed:
324
+ ret_succ = succ
284
325
  elif edge_type == "transition" and not outside:
285
- successor_added = True
286
- queue.append((depth + 1, state.copy(), succ, None))
326
+ if succ not in traversed:
327
+ successor_added = True
328
+ queue.append((depth + 1, state.copy(), succ, None))
287
329
  elif edge_type == "call" or (edge_type == "transition" and outside):
288
330
  # a call or a tail-call
331
+ # note that it's ok to traverse a called function multiple times
289
332
  if not isinstance(succ, Function):
290
333
  if self.kb.functions.contains_addr(succ.addr):
291
334
  succ = self.kb.functions.get_by_addr(succ.addr)
@@ -398,9 +441,24 @@ class FactCollector(Analysis):
398
441
  and not isinstance(func_succ.prototype.returnty, SimTypeBottom)
399
442
  ):
400
443
  # assume the function overwrites the return variable
401
- returnty_size = func_succ.prototype.returnty.with_arch(self.project.arch).size
402
- assert returnty_size is not None
403
- retval_size = returnty_size // self.project.arch.byte_width
444
+ proto = func_succ.prototype
445
+ if func_succ.prototype_libname is not None:
446
+ # we need to deref the prototype in case it uses SimTypeRef internally
447
+ type_collections = []
448
+ prototype_lib = SIM_LIBRARIES[func_succ.prototype_libname]
449
+ if prototype_lib.type_collection_names:
450
+ for typelib_name in prototype_lib.type_collection_names:
451
+ type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
452
+ proto = dereference_simtype(proto, type_collections)
453
+
454
+ assert isinstance(proto, SimTypeFunction) and proto.returnty is not None
455
+ returnty_size = proto.returnty.with_arch(self.project.arch).size
456
+ if returnty_size is None:
457
+ # it may be None if somehow we cannot resolve a SimTypeRef; we fall back to the full
458
+ # machine word size
459
+ retval_size = self.project.arch.bytes
460
+ else:
461
+ retval_size = returnty_size // self.project.arch.byte_width
404
462
  retval_sizes.append(retval_size)
405
463
  continue
406
464
 
@@ -1701,7 +1701,12 @@ class CFGBase(Analysis):
1701
1701
  self._update_progress(progress)
1702
1702
 
1703
1703
  self._graph_bfs_custom(
1704
- self.graph, [fn], self._graph_traversal_handler, blockaddr_to_function, tmp_functions
1704
+ self.graph,
1705
+ [fn],
1706
+ self._graph_traversal_handler,
1707
+ blockaddr_to_function,
1708
+ tmp_functions,
1709
+ traversed_cfg_nodes,
1705
1710
  )
1706
1711
 
1707
1712
  to_remove = set()
@@ -2731,7 +2736,7 @@ class CFGBase(Analysis):
2731
2736
  relifted = self.project.factory.block(block.addr, size=block.size, opt_level=1, cross_insn_opt=True).vex
2732
2737
  except SimError:
2733
2738
  return False, []
2734
- if isinstance(relifted.next, pyvex.IRExpr.Const):
2739
+ if not relifted.jumpkind.startswith("Ijk_Sys") and isinstance(relifted.next, pyvex.IRExpr.Const):
2735
2740
  # yes!
2736
2741
  return True, [relifted.next.con.value]
2737
2742
 
@@ -1,9 +1,11 @@
1
1
  from __future__ import annotations
2
+ from typing import TYPE_CHECKING
2
3
  import itertools
3
4
  import logging
4
5
  import sys
5
6
  from collections import defaultdict
6
7
  from functools import reduce
8
+ import contextlib
7
9
 
8
10
  import angr
9
11
  import claripy
@@ -45,7 +47,10 @@ from angr.analyses.backward_slice import BackwardSlice
45
47
  from angr.analyses.loopfinder import LoopFinder, Loop
46
48
  from .cfg_base import CFGBase
47
49
  from .cfg_job_base import BlockID, CFGJobBase
48
- import contextlib
50
+
51
+ if TYPE_CHECKING:
52
+ from angr.knowledge_plugins.cfg import CFGNode
53
+
49
54
 
50
55
  l = logging.getLogger(name=__name__)
51
56
 
@@ -505,6 +510,8 @@ class CFGEmulated(ForwardAnalysis, CFGBase): # pylint: disable=abstract-method
505
510
  :return: None
506
511
  """
507
512
 
513
+ assert self._starts is not None
514
+
508
515
  if not isinstance(max_loop_unrolling_times, int) or max_loop_unrolling_times < 0:
509
516
  raise AngrCFGError(
510
517
  "Max loop unrolling times must be set to an integer greater than or equal to 0 if "
@@ -586,6 +593,7 @@ class CFGEmulated(ForwardAnalysis, CFGBase): # pylint: disable=abstract-method
586
593
 
587
594
  graph_copy.remove_node(new_end_node)
588
595
  src, dst = loop_backedge
596
+ assert src is not None and dst is not None
589
597
  if graph_copy.has_edge(src, dst): # It might have been removed before
590
598
  # Duplicate the dst node
591
599
  new_dst = dst.copy()
@@ -713,9 +721,10 @@ class CFGEmulated(ForwardAnalysis, CFGBase): # pylint: disable=abstract-method
713
721
  # FIXME: start should also take a CFGNode instance
714
722
 
715
723
  start_node = self.get_any_node(start)
724
+ assert start_node is not None
716
725
 
717
726
  node_wrapper = (start_node, 0)
718
- stack = [node_wrapper]
727
+ stack: list[tuple[CFGNode, int]] = [node_wrapper]
719
728
  traversed_nodes = {start_node}
720
729
  subgraph_nodes = {start_node}
721
730
 
@@ -727,6 +736,7 @@ class CFGEmulated(ForwardAnalysis, CFGBase): # pylint: disable=abstract-method
727
736
  edges = self.graph.out_edges(n, data=True)
728
737
 
729
738
  for _, dst, data in edges:
739
+ assert dst is not None
730
740
  if dst not in traversed_nodes:
731
741
  # We see a new node!
732
742
  traversed_nodes.add(dst)
@@ -1687,9 +1697,8 @@ class CFGEmulated(ForwardAnalysis, CFGBase): # pylint: disable=abstract-method
1687
1697
 
1688
1698
  for block_id in pending_exits_to_remove:
1689
1699
  l.debug(
1690
- "Removing all pending exits to %#x since the target function %#x does not return",
1700
+ "Removing all pending exits to %#x since the target function does not return",
1691
1701
  self._block_id_addr(block_id),
1692
- next(iter(self._pending_jobs[block_id])).returning_source,
1693
1702
  )
1694
1703
 
1695
1704
  for to_remove in self._pending_jobs[block_id]:
@@ -31,13 +31,10 @@ from angr import sim_options as o
31
31
  from angr.errors import (
32
32
  AngrCFGError,
33
33
  AngrSkipJobNotice,
34
- AngrUnsupportedSyscallError,
35
34
  SimEngineError,
36
35
  SimMemoryError,
37
36
  SimTranslationError,
38
37
  SimValueError,
39
- SimOperationError,
40
- SimError,
41
38
  SimIRSBNoDecodeError,
42
39
  )
43
40
  from angr.utils.constants import DEFAULT_STATEMENT
@@ -200,7 +197,7 @@ class PendingJobs:
200
197
  return self._pop_job(next(reversed(self._jobs.keys())))
201
198
 
202
199
  # Prioritize returning functions
203
- for func_addr in reversed(self._jobs.keys()):
200
+ for func_addr in reversed(self._jobs):
204
201
  if func_addr not in self._returning_functions:
205
202
  continue
206
203
  return self._pop_job(func_addr)
@@ -621,6 +618,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
621
618
  nodecode_window_size=512,
622
619
  nodecode_threshold=0.3,
623
620
  nodecode_step=16483,
621
+ check_funcret_max_job=500,
624
622
  indirect_calls_always_return: bool | None = None,
625
623
  jumptable_resolver_resolves_calls: bool | None = None,
626
624
  start=None, # deprecated
@@ -680,6 +678,12 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
680
678
  table resolver and must be resolved using their specific resolvers. By default,
681
679
  we will only disable JumpTableResolver from resolving indirect calls for large
682
680
  binaries (region > 50 KB).
681
+ :param check_funcret_max_job When popping return-site jobs out of the job queue, angr will prioritize jobs
682
+ for which the callee is known to return. This check may be slow when there are
683
+ a large amount of jobs in different caller functions, and this situation often
684
+ occurs in obfuscated binaries where many functions never return. This parameter
685
+ acts as a threshold to disable this check when the number of jobs in the queue
686
+ exceeds this threshold.
683
687
  :param int start: (Deprecated) The beginning address of CFG recovery.
684
688
  :param int end: (Deprecated) The end address of CFG recovery.
685
689
  :param CFGArchOptions arch_options: Architecture-specific options.
@@ -768,6 +772,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
768
772
  self._force_complete_scan = force_complete_scan
769
773
  self._use_elf_eh_frame = elf_eh_frame
770
774
  self._use_exceptions = exceptions
775
+ self._check_funcret_max_job = check_funcret_max_job
771
776
 
772
777
  self._nodecode_window_size = nodecode_window_size
773
778
  self._nodecode_threshold = nodecode_threshold
@@ -2576,38 +2581,16 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
2576
2581
  jobs: list[CFGJob] = []
2577
2582
 
2578
2583
  if is_syscall:
2579
- # Fix the target_addr for syscalls
2580
- tmp_state = self.project.factory.blank_state(
2581
- mode="fastpath",
2582
- addr=cfg_node.addr,
2583
- add_options={o.SYMBOL_FILL_UNCONSTRAINED_MEMORY, o.SYMBOL_FILL_UNCONSTRAINED_REGISTERS},
2584
+ resolved, resolved_targets, ij = self._indirect_jump_encountered(
2585
+ addr, cfg_node, irsb, current_function_addr, stmt_idx
2584
2586
  )
2585
- # Find the first successor with a syscall jumpkind
2586
- successors = self._simulate_block_with_resilience(tmp_state)
2587
- if successors is not None:
2588
- succ = next(
2589
- iter(
2590
- succ
2591
- for succ in successors.flat_successors
2592
- if succ.history.jumpkind and succ.history.jumpkind.startswith("Ijk_Sys")
2593
- ),
2594
- None,
2595
- )
2596
- else:
2597
- succ = None
2598
- if succ is None:
2599
- # For some reason, there is no such successor with a syscall jumpkind
2600
- target_addr = self._unresolvable_call_target_addr
2587
+ target_addr = None
2588
+ if resolved:
2589
+ if len(resolved_targets) == 1:
2590
+ (target_addr,) = resolved_targets
2601
2591
  else:
2602
- try:
2603
- syscall_stub = self.project.simos.syscall(succ)
2604
- if syscall_stub: # can be None if simos is not a subclass of SimUserspace
2605
- syscall_addr = syscall_stub.addr
2606
- target_addr = syscall_addr
2607
- else:
2608
- target_addr = self._unresolvable_call_target_addr
2609
- except AngrUnsupportedSyscallError:
2610
- target_addr = self._unresolvable_call_target_addr
2592
+ if ij is not None:
2593
+ self._indirect_jumps_to_resolve.add(ij)
2611
2594
 
2612
2595
  new_function_addr = target_addr.method if isinstance(target_addr, SootAddressDescriptor) else target_addr
2613
2596
 
@@ -2732,30 +2715,6 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
2732
2715
 
2733
2716
  return jobs
2734
2717
 
2735
- def _simulate_block_with_resilience(self, state):
2736
- """
2737
- Execute a basic block with "On Error Resume Next". Give up when there is no way moving forward.
2738
-
2739
- :param SimState state: The initial state to start simulation with.
2740
- :return: A SimSuccessors instance or None if we are unable to resume execution with resilience.
2741
- :rtype: SimSuccessors or None
2742
- """
2743
-
2744
- stmt_idx = 0
2745
- successors = None # make PyCharm's linting happy
2746
-
2747
- while True:
2748
- try:
2749
- successors = self.project.factory.successors(state, skip_stmts=stmt_idx)
2750
- break
2751
- except SimOperationError as ex:
2752
- stmt_idx = ex.stmt_idx + 1
2753
- continue
2754
- except SimError:
2755
- return None
2756
-
2757
- return successors
2758
-
2759
2718
  def _is_branching_to_outside(self, src_addr, target_addr, current_function_addr):
2760
2719
  """
2761
2720
  Determine if a branch is branching to a different function (i.e., branching to outside the current function).
@@ -3236,7 +3195,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
3236
3195
  if jump.jumpkind == "Ijk_Boring":
3237
3196
  unresolvable_target_addr = self._unresolvable_jump_target_addr
3238
3197
  simprocedure_name = "UnresolvableJumpTarget"
3239
- elif jump.jumpkind == "Ijk_Call":
3198
+ elif jump.jumpkind == "Ijk_Call" or jump.jumpkind.startswith("Ijk_Sys"):
3240
3199
  unresolvable_target_addr = self._unresolvable_call_target_addr
3241
3200
  simprocedure_name = "UnresolvableCallTarget"
3242
3201
  else:
@@ -3707,7 +3666,9 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
3707
3666
 
3708
3667
  def _pop_pending_job(self, returning=True) -> CFGJob | None:
3709
3668
  while self._pending_jobs:
3710
- job = self._pending_jobs.pop_job(returning=returning)
3669
+ job = self._pending_jobs.pop_job(
3670
+ returning=returning if len(self._pending_jobs) < self._check_funcret_max_job else False
3671
+ )
3711
3672
  if job is not None and job.job_type == CFGJobType.DATAREF_HINTS and self._seg_list.is_occupied(job.addr):
3712
3673
  # ignore this hint from data refs because the target address has already been analyzed
3713
3674
  continue
@@ -10,6 +10,7 @@ from .arm_elf_fast import ArmElfFastResolver
10
10
  from .const_resolver import ConstantResolver
11
11
  from .amd64_pe_iat import AMD64PeIatResolver
12
12
  from .memload_resolver import MemoryLoadResolver
13
+ from .syscall_resolver import SyscallResolver
13
14
 
14
15
 
15
16
  __all__ = (
@@ -21,6 +22,7 @@ __all__ = (
21
22
  "MemoryLoadResolver",
22
23
  "MipsElfFastResolver",
23
24
  "MipsElfGotResolver",
25
+ "SyscallResolver",
24
26
  "X86ElfPicPltResolver",
25
27
  "X86PeIatResolver",
26
28
  )
@@ -43,11 +43,22 @@ class ConstantResolver(IndirectJumpResolver):
43
43
  be resolved to a constant value. This resolver must be run after all other more specific resolvers.
44
44
  """
45
45
 
46
- def __init__(self, project):
46
+ def __init__(self, project, max_func_nodes: int = 512):
47
47
  super().__init__(project, timeless=False)
48
+ self.max_func_nodes = max_func_nodes
48
49
 
49
50
  def filter(self, cfg, addr, func_addr, block, jumpkind):
51
+ if not cfg.functions.contains_addr(func_addr):
52
+ # the function does not exist
53
+ return False
54
+
55
+ # for performance, we don't run constant resolver if the function is too large
56
+ func = cfg.functions.get_by_addr(func_addr)
57
+ if len(func.block_addrs_set) > self.max_func_nodes:
58
+ return False
59
+
50
60
  # we support both an indirect call and jump since the value can be resolved
61
+
51
62
  return jumpkind in {"Ijk_Boring", "Ijk_Call"}
52
63
 
53
64
  def resolve( # pylint:disable=unused-argument
@@ -0,0 +1,107 @@
1
+ from __future__ import annotations
2
+ from typing import TYPE_CHECKING, Any
3
+ import logging
4
+
5
+ import claripy
6
+
7
+ from angr.code_location import CodeLocation
8
+ from angr.project import Project
9
+ from angr.analyses.propagator.vex_vars import VEXReg
10
+ from .propagator_utils import PropagatorLoadCallback
11
+
12
+ if TYPE_CHECKING:
13
+ from angr import SimState
14
+ from angr.knowledge_plugins import Function
15
+
16
+
17
+ l = logging.getLogger(name=__name__)
18
+
19
+
20
+ class ConstantValueManager:
21
+ """
22
+ Manages the loading of registers who hold constant values.
23
+ """
24
+
25
+ __slots__ = (
26
+ "func",
27
+ "indirect_jump_addr",
28
+ "kb",
29
+ "mapping",
30
+ "project",
31
+ )
32
+
33
+ def __init__(self, project: Project, kb, func: Function, ij_addr: int):
34
+ self.project = project
35
+ self.kb = kb
36
+ self.func = func
37
+ self.indirect_jump_addr = ij_addr
38
+
39
+ self.mapping: dict[Any, dict[Any, claripy.ast.Base]] | None = None
40
+
41
+ def reg_read_callback(self, state: SimState):
42
+ if self.mapping is None:
43
+ self._build_mapping()
44
+ assert self.mapping is not None
45
+
46
+ codeloc = CodeLocation(state.scratch.bbl_addr, state.scratch.stmt_idx, ins_addr=state.scratch.ins_addr)
47
+ if codeloc in self.mapping:
48
+ reg_read_offset = state.inspect.reg_read_offset
49
+ if isinstance(reg_read_offset, claripy.ast.BV) and reg_read_offset.op == "BVV":
50
+ reg_read_offset = reg_read_offset.args[0]
51
+ variable = VEXReg(reg_read_offset, state.inspect.reg_read_length)
52
+ if variable in self.mapping[codeloc]:
53
+ v = self.mapping[codeloc][variable]
54
+ if isinstance(v, int):
55
+ v = claripy.BVV(v, state.inspect.reg_read_length * state.arch.byte_width)
56
+ state.inspect.reg_read_expr = v
57
+
58
+ def _build_mapping(self):
59
+ # constant propagation
60
+ l.debug("JumpTable: Propagating for %r at %#x.", self.func, self.indirect_jump_addr)
61
+
62
+ # determine blocks to run FCP on
63
+
64
+ # - include at most three levels of superblock successors from the entrypoint
65
+ self.mapping = {}
66
+ startpoint = self.func.startpoint
67
+ if startpoint is None:
68
+ return
69
+
70
+ blocks = set()
71
+ succ_and_levels = [(startpoint, 0)]
72
+ while succ_and_levels:
73
+ new_succs = []
74
+ for node, level in succ_and_levels:
75
+ if node in blocks:
76
+ continue
77
+ blocks.add(node)
78
+ if node.addr == self.indirect_jump_addr:
79
+ # stop at the indirect jump block
80
+ continue
81
+ for _, succ, data in self.func.graph.out_edges(node, data=True):
82
+ new_level = level if data.get("type") == "fake_return" else level + 1
83
+ if new_level <= 3:
84
+ new_succs.append((succ, new_level))
85
+ succ_and_levels = new_succs
86
+
87
+ # - include at most six levels of predecessors from the indirect jump block
88
+ ij_block = self.func.get_node(self.indirect_jump_addr)
89
+ preds = [ij_block]
90
+ for _ in range(6):
91
+ new_preds = []
92
+ for node in preds:
93
+ if node in blocks:
94
+ continue
95
+ blocks.add(node)
96
+ new_preds += list(self.func.graph.predecessors(node))
97
+ preds = new_preds
98
+ if not preds:
99
+ break
100
+
101
+ prop = self.project.analyses.FastConstantPropagation(
102
+ self.func,
103
+ blocks=blocks,
104
+ vex_cross_insn_opt=True,
105
+ load_callback=PropagatorLoadCallback(self.project).propagator_load_callback,
106
+ )
107
+ self.mapping = prop.replacements
@@ -11,6 +11,7 @@ from . import ConstantResolver
11
11
  from . import ArmElfFastResolver
12
12
  from . import AMD64PeIatResolver
13
13
  from . import MipsElfGotResolver
14
+ from . import SyscallResolver
14
15
 
15
16
  DEFAULT_RESOLVERS = {
16
17
  "X86": {
@@ -58,7 +59,7 @@ DEFAULT_RESOLVERS = {
58
59
  ArmElfFastResolver,
59
60
  ]
60
61
  },
61
- "ALL": [MemoryLoadResolver, JumpTableResolver, ConstantResolver],
62
+ "ALL": [MemoryLoadResolver, JumpTableResolver, ConstantResolver, SyscallResolver],
62
63
  }
63
64
 
64
65