angr 9.2.163__cp310-abi3-macosx_11_0_arm64.whl → 9.2.165__cp310-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (42) hide show
  1. angr/__init__.py +1 -1
  2. angr/ailment/converter_vex.py +1 -1
  3. angr/ailment/expression.py +5 -1
  4. angr/analyses/analysis.py +27 -4
  5. angr/analyses/cfg/cfg_base.py +16 -13
  6. angr/analyses/cfg/cfg_emulated.py +5 -1
  7. angr/analyses/cfg/cfg_fast.py +43 -5
  8. angr/analyses/cfg/indirect_jump_resolvers/arm_elf_fast.py +11 -1
  9. angr/analyses/cfg/indirect_jump_resolvers/const_resolver.py +194 -41
  10. angr/analyses/decompiler/ail_simplifier.py +19 -5
  11. angr/analyses/decompiler/callsite_maker.py +33 -17
  12. angr/analyses/decompiler/condition_processor.py +9 -8
  13. angr/analyses/decompiler/graph_region.py +19 -0
  14. angr/analyses/decompiler/optimization_passes/deadblock_remover.py +1 -1
  15. angr/analyses/decompiler/peephole_optimizations/__init__.py +2 -0
  16. angr/analyses/decompiler/peephole_optimizations/inlined_memcpy.py +78 -0
  17. angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +67 -10
  18. angr/analyses/decompiler/peephole_optimizations/inlined_strcpy_consolidation.py +10 -13
  19. angr/analyses/decompiler/region_identifier.py +22 -1
  20. angr/analyses/decompiler/structuring/phoenix.py +72 -20
  21. angr/analyses/decompiler/structuring/recursive_structurer.py +3 -4
  22. angr/analyses/decompiler/structuring/structurer_nodes.py +3 -0
  23. angr/analyses/decompiler/utils.py +17 -5
  24. angr/analyses/deobfuscator/string_obf_finder.py +130 -32
  25. angr/analyses/s_reaching_definitions/s_rda_view.py +2 -1
  26. angr/analyses/typehoon/typeconsts.py +3 -1
  27. angr/blade.py +20 -15
  28. angr/engines/icicle.py +16 -3
  29. angr/knowledge_plugins/propagations/propagation_model.py +7 -0
  30. angr/rustylib.abi3.so +0 -0
  31. angr/sim_type.py +16 -1
  32. angr/state_plugins/history.py +16 -0
  33. angr/unicornlib.dylib +0 -0
  34. angr/utils/constants.py +1 -1
  35. angr/utils/graph.py +1 -1
  36. angr/utils/vex.py +11 -0
  37. {angr-9.2.163.dist-info → angr-9.2.165.dist-info}/METADATA +5 -5
  38. {angr-9.2.163.dist-info → angr-9.2.165.dist-info}/RECORD +1409 -1407
  39. {angr-9.2.163.dist-info → angr-9.2.165.dist-info}/WHEEL +1 -0
  40. {angr-9.2.163.dist-info → angr-9.2.165.dist-info}/entry_points.txt +0 -0
  41. {angr-9.2.163.dist-info → angr-9.2.165.dist-info}/licenses/LICENSE +0 -0
  42. {angr-9.2.163.dist-info → angr-9.2.165.dist-info}/top_level.txt +0 -0
@@ -397,9 +397,11 @@ class AILSimplifier(Analysis):
397
397
  if isinstance(def_.atom, atoms.VirtualVariable) and (def_.atom.was_reg or def_.atom.was_parameter):
398
398
  # only do this for general purpose register
399
399
  skip_def = False
400
+ reg = None
400
401
  for reg in self.project.arch.register_list:
401
- if not reg.artificial and reg.vex_offset == def_.atom.reg_offset and not reg.general_purpose:
402
- skip_def = True
402
+ if reg.vex_offset == def_.atom.reg_offset:
403
+ if not reg.artificial and not reg.general_purpose and not reg.vector:
404
+ skip_def = True
403
405
  break
404
406
 
405
407
  if skip_def:
@@ -659,6 +661,16 @@ class AILSimplifier(Analysis):
659
661
  first_op = ops[0]
660
662
  if isinstance(first_op, Convert) and first_op.to_bits >= self.project.arch.byte_width:
661
663
  # we need at least one byte!
664
+ if (
665
+ len({(op.from_bits, op.to_bits) for op in ops if isinstance(op, Convert) and op.operand.likes(expr)})
666
+ > 1
667
+ ):
668
+ # there are more Convert operations; it's probably because there are multiple expressions involving the
669
+ # same core expr. just give up (for now)
670
+ return None, None
671
+ if any(op for op in ops if isinstance(op, BinaryOp) and op.op == "Shr" and op.operands[0].likes(expr)):
672
+ # the expression is right-shifted, which means higher bits might be used.
673
+ return None, None
662
674
  return first_op.to_bits // self.project.arch.byte_width, ("convert", (first_op,))
663
675
  if isinstance(first_op, BinaryOp):
664
676
  second_op = None
@@ -1816,13 +1828,11 @@ class AILSimplifier(Analysis):
1816
1828
  if codeloc in self._assignments_to_remove:
1817
1829
  # it should be removed
1818
1830
  simplified = True
1819
- self._assignments_to_remove.discard(codeloc)
1820
1831
  continue
1821
1832
 
1822
1833
  if self._statement_has_call_exprs(stmt):
1823
1834
  if codeloc in self._calls_to_remove:
1824
1835
  # it has a call and must be removed
1825
- self._calls_to_remove.discard(codeloc)
1826
1836
  simplified = True
1827
1837
  continue
1828
1838
  if isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable):
@@ -1845,7 +1855,6 @@ class AILSimplifier(Analysis):
1845
1855
  codeloc = CodeLocation(block.addr, idx, ins_addr=stmt.ins_addr, block_idx=block.idx)
1846
1856
  if codeloc in self._calls_to_remove:
1847
1857
  # this call can be removed
1848
- self._calls_to_remove.discard(codeloc)
1849
1858
  simplified = True
1850
1859
  continue
1851
1860
 
@@ -1865,6 +1874,11 @@ class AILSimplifier(Analysis):
1865
1874
  new_block.statements = new_statements
1866
1875
  self.blocks[old_block] = new_block
1867
1876
 
1877
+ # we can only use calls_to_remove and assignments_to_remove once; if any statements in blocks are removed, then
1878
+ # the statement IDs in calls_to_remove and assignments_to_remove no longer match!
1879
+ self._calls_to_remove.clear()
1880
+ self._assignments_to_remove.clear()
1881
+
1868
1882
  return simplified
1869
1883
 
1870
1884
  @staticmethod
@@ -17,7 +17,15 @@ from angr.sim_type import (
17
17
  SimTypeFunction,
18
18
  SimTypeLongLong,
19
19
  )
20
- from angr.calling_conventions import SimReferenceArgument, SimRegArg, SimStackArg, SimCC, SimStructArg, SimComboArg
20
+ from angr.calling_conventions import (
21
+ SimReferenceArgument,
22
+ SimRegArg,
23
+ SimStackArg,
24
+ SimCC,
25
+ SimStructArg,
26
+ SimComboArg,
27
+ SimFunctionArgument,
28
+ )
21
29
  from angr.knowledge_plugins.key_definitions.constants import OP_BEFORE
22
30
  from angr.analyses import Analysis, register_analysis
23
31
  from angr.analyses.s_reaching_definitions import SRDAView
@@ -137,22 +145,7 @@ class CallSiteMaker(Analysis):
137
145
  arg_locs = cc.arg_locs(callsite_ty)
138
146
 
139
147
  if arg_locs is not None and cc is not None:
140
- expanded_arg_locs: list[SimStackArg | SimRegArg | SimReferenceArgument] = []
141
- for arg_loc in arg_locs:
142
- if isinstance(arg_loc, SimComboArg):
143
- # a ComboArg spans across multiple locations (mostly stack but *in theory* can also be spanning
144
- # across registers). most importantly, a ComboArg represents one variable, not multiple, but we
145
- # have no way to know that until later down the pipeline.
146
- expanded_arg_locs += arg_loc.locations
147
- elif isinstance(arg_loc, SimStructArg):
148
- expanded_arg_locs += [ # type: ignore
149
- arg_loc.locs[field_name] for field_name in arg_loc.struct.fields if field_name in arg_loc.locs
150
- ]
151
- elif isinstance(arg_loc, (SimRegArg, SimStackArg, SimReferenceArgument)):
152
- expanded_arg_locs.append(arg_loc)
153
- else:
154
- raise NotImplementedError("Not implemented yet.")
155
-
148
+ expanded_arg_locs = self._expand_arglocs(arg_locs)
156
149
  for arg_loc in expanded_arg_locs:
157
150
  if isinstance(arg_loc, SimReferenceArgument):
158
151
  if not isinstance(arg_loc.ptr_loc, (SimRegArg, SimStackArg)):
@@ -548,6 +541,29 @@ class CallSiteMaker(Analysis):
548
541
  return None
549
542
  return len(specifiers)
550
543
 
544
+ def _expand_arglocs(
545
+ self, arg_locs: list[SimFunctionArgument]
546
+ ) -> list[SimStackArg | SimRegArg | SimReferenceArgument]:
547
+ expanded_arg_locs: list[SimStackArg | SimRegArg | SimReferenceArgument] = []
548
+
549
+ for arg_loc in arg_locs:
550
+ if isinstance(arg_loc, SimComboArg):
551
+ # a ComboArg spans across multiple locations (mostly stack but *in theory* can also be spanning
552
+ # across registers). most importantly, a ComboArg represents one variable, not multiple, but we
553
+ # have no way to know that until later down the pipeline.
554
+ expanded_arg_locs += arg_loc.locations
555
+ elif isinstance(arg_loc, SimStructArg):
556
+ for field_name in arg_loc.struct.fields:
557
+ if field_name not in arg_loc.locs:
558
+ continue
559
+ expanded_arg_locs += self._expand_arglocs([arg_loc.locs[field_name]])
560
+ elif isinstance(arg_loc, (SimRegArg, SimStackArg, SimReferenceArgument)):
561
+ expanded_arg_locs.append(arg_loc)
562
+ else:
563
+ raise NotImplementedError("Not implemented yet.")
564
+
565
+ return expanded_arg_locs
566
+
551
567
  def _atom_idx(self) -> int | None:
552
568
  return self._ail_manager.next_atom() if self._ail_manager is not None else None
553
569
 
@@ -13,8 +13,6 @@ from unique_log_filter import UniqueLogFilter
13
13
 
14
14
 
15
15
  from angr.utils.graph import GraphUtils
16
- from angr.utils.lazy_import import lazy_import
17
- from angr.utils import is_pyinstaller
18
16
  from angr.utils.graph import dominates, inverted_idoms
19
17
  from angr.utils.ail import is_head_controlled_loop_block
20
18
  from angr.block import Block, BlockNode
@@ -37,12 +35,6 @@ from .structuring.structurer_nodes import (
37
35
  from .graph_region import GraphRegion
38
36
  from .utils import peephole_optimize_expr
39
37
 
40
- if is_pyinstaller():
41
- # PyInstaller is not happy with lazy import
42
- import sympy
43
- else:
44
- sympy = lazy_import("sympy")
45
-
46
38
 
47
39
  l = logging.getLogger(__name__)
48
40
  l.addFilter(UniqueLogFilter())
@@ -953,6 +945,9 @@ class ConditionProcessor:
953
945
 
954
946
  @staticmethod
955
947
  def claripy_ast_to_sympy_expr(ast, memo=None):
948
+
949
+ import sympy # pylint:disable=import-outside-toplevel
950
+
956
951
  if ast.op == "And":
957
952
  return sympy.And(*(ConditionProcessor.claripy_ast_to_sympy_expr(arg, memo=memo) for arg in ast.args))
958
953
  if ast.op == "Or":
@@ -974,6 +969,9 @@ class ConditionProcessor:
974
969
 
975
970
  @staticmethod
976
971
  def sympy_expr_to_claripy_ast(expr, memo: dict):
972
+
973
+ import sympy # pylint:disable=import-outside-toplevel
974
+
977
975
  if expr.is_Symbol:
978
976
  return memo[expr]
979
977
  if isinstance(expr, sympy.Or):
@@ -990,6 +988,9 @@ class ConditionProcessor:
990
988
 
991
989
  @staticmethod
992
990
  def simplify_condition(cond, depth_limit=8, variables_limit=8):
991
+
992
+ import sympy # pylint:disable=import-outside-toplevel
993
+
993
994
  memo = {}
994
995
  if cond.depth > depth_limit or len(cond.variables) > variables_limit:
995
996
  return cond
@@ -271,6 +271,13 @@ class GraphRegion:
271
271
  else:
272
272
  replace_with_graph_with_successors = replace_with.graph_with_successors
273
273
 
274
+ # if complete_successors is True for RegionIdentifier, replace_with.graph_with_successors may include nodes
275
+ # and edges that are *only* reachable from immediate successors. we will want to remove these nodes and edges,
276
+ # otherwise we may end up structuring the same region twice!
277
+ replace_with_graph_with_successors = self._cleanup_graph_with_successors(
278
+ replace_with.graph, replace_with_graph_with_successors
279
+ )
280
+
274
281
  self._replace_node_in_graph_with_subgraph(
275
282
  self.graph,
276
283
  self.successors,
@@ -289,6 +296,18 @@ class GraphRegion:
289
296
  replace_with.head,
290
297
  )
291
298
 
299
+ @staticmethod
300
+ def _cleanup_graph_with_successors(
301
+ graph: networkx.DiGraph, graph_with_successors: networkx.DiGraph
302
+ ) -> networkx.DiGraph:
303
+ expected_nodes = set(graph)
304
+ for n in list(expected_nodes):
305
+ for succ in graph_with_successors.successors(n):
306
+ expected_nodes.add(succ)
307
+ if all(n in expected_nodes for n in graph_with_successors):
308
+ return graph_with_successors
309
+ return graph_with_successors.subgraph(expected_nodes).to_directed()
310
+
292
311
  @staticmethod
293
312
  def _replace_node_in_graph(graph: networkx.DiGraph, node, replace_with, removed_edges: set):
294
313
  in_edges = [(src, dst) for src, dst in graph.in_edges(node) if (src, dst) not in removed_edges]
@@ -60,7 +60,7 @@ class DeadblockRemover(OptimizationPass):
60
60
  blk
61
61
  for blk in self._graph.nodes()
62
62
  if (blk.addr != self._func.addr and self._graph.in_degree(blk) == 0)
63
- or claripy.is_false(cond_proc.reaching_conditions[blk])
63
+ or claripy.is_false(cond_proc.reaching_conditions.get(blk, claripy.true()))
64
64
  }
65
65
 
66
66
  # fix up predecessors
@@ -43,6 +43,7 @@ from .sar_to_signed_div import SarToSignedDiv
43
43
  from .tidy_stack_addr import TidyStackAddr
44
44
  from .invert_negated_logical_conjuction_disjunction import InvertNegatedLogicalConjunctionsAndDisjunctions
45
45
  from .rol_ror import RolRorRewriter
46
+ from .inlined_memcpy import InlinedMemcpy
46
47
  from .inlined_strcpy import InlinedStrcpy
47
48
  from .inlined_strcpy_consolidation import InlinedStrcpyConsolidation
48
49
  from .inlined_wstrcpy import InlinedWstrcpy
@@ -99,6 +100,7 @@ ALL_PEEPHOLE_OPTS: list[type[PeepholeOptimizationExprBase]] = [
99
100
  TidyStackAddr,
100
101
  InvertNegatedLogicalConjunctionsAndDisjunctions,
101
102
  RolRorRewriter,
103
+ InlinedMemcpy,
102
104
  InlinedStrcpy,
103
105
  InlinedStrcpyConsolidation,
104
106
  InlinedWstrcpy,
@@ -0,0 +1,78 @@
1
+ # pylint:disable=arguments-differ
2
+ from __future__ import annotations
3
+
4
+ from angr.ailment.expression import Const, StackBaseOffset, VirtualVariable, Load, UnaryOp
5
+ from angr.ailment.statement import Call, Assignment, Store
6
+ from angr import SIM_LIBRARIES
7
+ from .base import PeepholeOptimizationStmtBase
8
+
9
+
10
+ class InlinedMemcpy(PeepholeOptimizationStmtBase):
11
+ """
12
+ Simplifies inlined data copying logic into calls to memcpy.
13
+ """
14
+
15
+ __slots__ = ()
16
+
17
+ NAME = "Simplifying inlined strcpy"
18
+ stmt_classes = (Assignment, Store)
19
+
20
+ def optimize(self, stmt: Assignment | Store, stmt_idx: int | None = None, block=None, **kwargs):
21
+ should_replace = False
22
+ dst_offset, src_offset, store_size = None, None, None
23
+ if (
24
+ isinstance(stmt, Assignment)
25
+ and isinstance(stmt.dst, VirtualVariable)
26
+ and stmt.dst.was_stack
27
+ and stmt.dst.size == 16
28
+ and isinstance(stmt.src, Load)
29
+ ):
30
+ dst_offset = stmt.dst.stack_offset
31
+ store_size = stmt.dst.size
32
+ if (
33
+ isinstance(stmt.src.addr, UnaryOp)
34
+ and stmt.src.addr.op == "Reference"
35
+ and isinstance(stmt.src.addr.operand, VirtualVariable)
36
+ ):
37
+ should_replace = True
38
+ src_offset = stmt.src.addr.operand.stack_offset
39
+ elif isinstance(stmt.src.addr, StackBaseOffset):
40
+ should_replace = True
41
+ src_offset = stmt.src.addr.offset
42
+
43
+ if (
44
+ isinstance(stmt, Store)
45
+ and isinstance(stmt.addr, StackBaseOffset)
46
+ and stmt.size == 16
47
+ and isinstance(stmt.data, Load)
48
+ ):
49
+ dst_offset = stmt.addr.offset
50
+ store_size = stmt.size
51
+ if (
52
+ isinstance(stmt.data.addr, UnaryOp)
53
+ and stmt.data.addr.op == "Reference"
54
+ and isinstance(stmt.data.addr.operand, VirtualVariable)
55
+ ):
56
+ should_replace = True
57
+ src_offset = stmt.data.addr.operand.stack_offset
58
+ elif isinstance(stmt.data.addr, StackBaseOffset):
59
+ should_replace = True
60
+ src_offset = stmt.data.addr.offset
61
+
62
+ if should_replace:
63
+ assert dst_offset is not None and src_offset is not None and store_size is not None
64
+ # replace it with a call to memcpy
65
+ assert self.project is not None
66
+ return Call(
67
+ stmt.idx,
68
+ "memcpy",
69
+ args=[
70
+ StackBaseOffset(None, self.project.arch.bits, dst_offset),
71
+ StackBaseOffset(None, self.project.arch.bits, src_offset),
72
+ Const(None, None, store_size, self.project.arch.bits),
73
+ ],
74
+ prototype=SIM_LIBRARIES["libc.so"][0].get_prototype("memcpy"),
75
+ **stmt.tags,
76
+ )
77
+
78
+ return None
@@ -1,11 +1,11 @@
1
- # pylint:disable=arguments-differ
1
+ # pylint:disable=arguments-differ,too-many-boolean-expressions
2
2
  from __future__ import annotations
3
3
  import string
4
4
 
5
5
  from archinfo import Endness
6
6
 
7
- from angr.ailment.expression import Const, StackBaseOffset, VirtualVariable
8
- from angr.ailment.statement import Call, Assignment
7
+ from angr.ailment.expression import Const, StackBaseOffset, VirtualVariable, UnaryOp
8
+ from angr.ailment.statement import Call, Assignment, Store, Statement
9
9
 
10
10
  from angr import SIM_LIBRARIES
11
11
  from angr.utils.endness import ail_const_to_be
@@ -24,24 +24,54 @@ class InlinedStrcpy(PeepholeOptimizationStmtBase):
24
24
  __slots__ = ()
25
25
 
26
26
  NAME = "Simplifying inlined strcpy"
27
- stmt_classes = (Assignment,)
27
+ stmt_classes = (Assignment, Store)
28
+
29
+ def optimize(self, stmt: Assignment | Store, stmt_idx: int | None = None, block=None, **kwargs):
30
+ inlined_strcpy_candidate = False
31
+ src: Const | None = None
32
+ strcpy_dst: StackBaseOffset | UnaryOp | None = None
33
+
34
+ assert self.project is not None
28
35
 
29
- def optimize(self, stmt: Assignment, stmt_idx: int | None = None, block=None, **kwargs):
30
36
  if (
31
- isinstance(stmt.dst, VirtualVariable)
37
+ isinstance(stmt, Assignment)
38
+ and isinstance(stmt.dst, VirtualVariable)
32
39
  and stmt.dst.was_stack
33
40
  and isinstance(stmt.src, Const)
34
41
  and isinstance(stmt.src.value, int)
35
42
  ):
36
- r, s = self.is_integer_likely_a_string(stmt.src.value, stmt.src.size, self.project.arch.memory_endness)
43
+ inlined_strcpy_candidate = True
44
+ src = stmt.src
45
+ strcpy_dst = StackBaseOffset(None, self.project.arch.bits, stmt.dst.stack_offset)
46
+ elif (
47
+ isinstance(stmt, Store)
48
+ and isinstance(stmt.addr, UnaryOp)
49
+ and stmt.addr.op == "Reference"
50
+ and isinstance(stmt.addr.operand, VirtualVariable)
51
+ and stmt.addr.operand.was_stack
52
+ and isinstance(stmt.data, Const)
53
+ and isinstance(stmt.data.value, int)
54
+ ):
55
+ inlined_strcpy_candidate = True
56
+ src = stmt.data
57
+ strcpy_dst = stmt.addr
58
+
59
+ if inlined_strcpy_candidate:
60
+ assert src is not None and strcpy_dst is not None
61
+ assert isinstance(src.value, int)
62
+ assert self.kb is not None
63
+
64
+ r, s = self.is_integer_likely_a_string(src.value, src.size, self.project.arch.memory_endness)
37
65
  if r:
66
+ assert s is not None
67
+
38
68
  # replace it with a call to strncpy
39
69
  str_id = self.kb.custom_strings.allocate(s.encode("ascii"))
40
70
  return Call(
41
71
  stmt.idx,
42
72
  "strncpy",
43
73
  args=[
44
- StackBaseOffset(None, self.project.arch.bits, stmt.dst.stack_offset),
74
+ strcpy_dst,
45
75
  Const(None, None, str_id, self.project.arch.bits, custom_string=True),
46
76
  Const(None, None, len(s), self.project.arch.bits),
47
77
  ],
@@ -68,9 +98,21 @@ class InlinedStrcpy(PeepholeOptimizationStmtBase):
68
98
  next_offset = None
69
99
  stride = []
70
100
 
101
+ if not stride:
102
+ return None
103
+ min_stride_stmt_idx = min(stmt_idx_ for _, stmt_idx_, _ in stride)
104
+ if min_stride_stmt_idx > stmt_idx:
105
+ # the current statement is not involved in the stride. we can't simplify here, otherwise we
106
+ # will incorrectly remove the current statement
107
+ return None
108
+
71
109
  integer, size = self.stride_to_int(stride)
72
- r, s = self.is_integer_likely_a_string(integer, size, Endness.BE)
110
+ prev_stmt = None if stmt_idx == 0 else block.statements[stmt_idx - 1]
111
+ min_str_length = 1 if prev_stmt is not None and self.is_inlined_strcpy(prev_stmt) else 4
112
+ r, s = self.is_integer_likely_a_string(integer, size, Endness.BE, min_length=min_str_length)
73
113
  if r:
114
+ assert s is not None
115
+
74
116
  # we remove all involved statements whose statement IDs are greater than the current one
75
117
  for _, stmt_idx_, _ in reversed(stride):
76
118
  if stmt_idx_ <= stmt_idx:
@@ -83,7 +125,7 @@ class InlinedStrcpy(PeepholeOptimizationStmtBase):
83
125
  stmt.idx,
84
126
  "strncpy",
85
127
  args=[
86
- StackBaseOffset(None, self.project.arch.bits, stmt.dst.stack_offset),
128
+ strcpy_dst,
87
129
  Const(None, None, str_id, self.project.arch.bits, custom_string=True),
88
130
  Const(None, None, len(s), self.project.arch.bits),
89
131
  ],
@@ -101,10 +143,13 @@ class InlinedStrcpy(PeepholeOptimizationStmtBase):
101
143
  for _, _, v in stride:
102
144
  size += v.size
103
145
  n <<= v.bits
146
+ assert isinstance(v.value, int)
104
147
  n |= v.value
105
148
  return n, size
106
149
 
107
150
  def collect_constant_stores(self, block, starting_stmt_idx: int) -> dict[int, tuple[int, Const | None]]:
151
+ assert self.project is not None
152
+
108
153
  r = {}
109
154
  for idx, stmt in enumerate(block.statements):
110
155
  if idx < starting_stmt_idx:
@@ -158,3 +203,15 @@ class InlinedStrcpy(PeepholeOptimizationStmtBase):
158
203
  return False, None
159
204
  return True, "".join(chars)
160
205
  return False, None
206
+
207
+ @staticmethod
208
+ def is_inlined_strcpy(stmt: Statement) -> bool:
209
+ return (
210
+ isinstance(stmt, Call)
211
+ and isinstance(stmt.target, str)
212
+ and stmt.target == "strncpy"
213
+ and stmt.args is not None
214
+ and len(stmt.args) == 3
215
+ and isinstance(stmt.args[1], Const)
216
+ and hasattr(stmt.args[1], "custom_string")
217
+ )
@@ -1,7 +1,7 @@
1
1
  # pylint:disable=arguments-differ
2
2
  from __future__ import annotations
3
3
 
4
- from angr.ailment.expression import Expression, BinaryOp, Const, Register, StackBaseOffset
4
+ from angr.ailment.expression import Expression, BinaryOp, Const, Register, StackBaseOffset, UnaryOp, VirtualVariable
5
5
  from angr.ailment.statement import Call, Store
6
6
 
7
7
  from angr import SIM_LIBRARIES
@@ -21,12 +21,12 @@ class InlinedStrcpyConsolidation(PeepholeOptimizationMultiStmtBase):
21
21
 
22
22
  def optimize(self, stmts: list[Call], **kwargs):
23
23
  last_stmt, stmt = stmts
24
- if InlinedStrcpyConsolidation._is_inlined_strcpy(last_stmt):
24
+ if InlinedStrcpy.is_inlined_strcpy(last_stmt):
25
25
  s_last: bytes = self.kb.custom_strings[last_stmt.args[1].value]
26
26
  addr_last = last_stmt.args[0]
27
27
  new_str = None # will be set if consolidation should happen
28
28
 
29
- if isinstance(stmt, Call) and InlinedStrcpyConsolidation._is_inlined_strcpy(stmt):
29
+ if isinstance(stmt, Call) and InlinedStrcpy.is_inlined_strcpy(stmt):
30
30
  # consolidating two calls
31
31
  s_curr: bytes = self.kb.custom_strings[stmt.args[1].value]
32
32
  addr_curr = stmt.args[0]
@@ -74,22 +74,19 @@ class InlinedStrcpyConsolidation(PeepholeOptimizationMultiStmtBase):
74
74
 
75
75
  return None
76
76
 
77
- @staticmethod
78
- def _is_inlined_strcpy(stmt: Call):
79
- return (
80
- isinstance(stmt.target, str)
81
- and stmt.target == "strncpy"
82
- and len(stmt.args) == 3
83
- and isinstance(stmt.args[1], Const)
84
- and hasattr(stmt.args[1], "custom_string")
85
- )
86
-
87
77
  @staticmethod
88
78
  def _parse_addr(addr: Expression) -> tuple[Expression, int]:
89
79
  if isinstance(addr, Register):
90
80
  return addr, 0
91
81
  if isinstance(addr, StackBaseOffset):
92
82
  return StackBaseOffset(None, addr.bits, 0), addr.offset
83
+ if (
84
+ isinstance(addr, UnaryOp)
85
+ and addr.op == "Reference"
86
+ and isinstance(addr.operand, VirtualVariable)
87
+ and addr.operand.was_stack
88
+ ):
89
+ return StackBaseOffset(None, addr.bits, 0), addr.operand.stack_offset
93
90
  if isinstance(addr, BinaryOp):
94
91
  if addr.op == "Add" and isinstance(addr.operands[1], Const):
95
92
  base_0, offset_0 = InlinedStrcpyConsolidation._parse_addr(addr.operands[0])
@@ -99,7 +99,7 @@ class RegionIdentifier(Analysis):
99
99
 
100
100
  def _analyze(self):
101
101
  # make a copy of the graph
102
- graph = networkx.DiGraph(self._graph)
102
+ graph = self._pick_one_connected_component(self._graph, as_copy=True)
103
103
 
104
104
  # preprocess: make it a super graph
105
105
  self._make_supergraph(graph)
@@ -113,6 +113,27 @@ class RegionIdentifier(Analysis):
113
113
  # make regions into block address lists
114
114
  self.regions_by_block_addrs = self._make_regions_by_block_addrs()
115
115
 
116
+ def _pick_one_connected_component(self, digraph: networkx.DiGraph, as_copy: bool = False) -> networkx.DiGraph:
117
+ g = networkx.Graph(digraph)
118
+ components = list(networkx.connected_components(g))
119
+ if len(components) <= 1:
120
+ return networkx.DiGraph(digraph) if as_copy else digraph
121
+
122
+ the_component = None
123
+ largest_component = None
124
+ for component in components:
125
+ if largest_component is None or len(component) > len(largest_component):
126
+ largest_component = component
127
+ if any((block.addr, block.idx) == self.entry_node_addr for block in component):
128
+ the_component = component
129
+ break
130
+
131
+ if the_component is None:
132
+ the_component = largest_component
133
+
134
+ assert the_component is not None
135
+ return digraph.subgraph(the_component).to_directed()
136
+
116
137
  @staticmethod
117
138
  def _compute_node_order(graph: networkx.DiGraph) -> dict[Any, tuple[int, int]]:
118
139
  sorted_nodes = GraphUtils.quasi_topological_sort_nodes(graph)