angr 9.2.92__py3-none-manylinux2014_x86_64.whl → 9.2.94__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (45) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/cfg/cfg_base.py +20 -10
  3. angr/analyses/cfg/indirect_jump_resolvers/amd64_elf_got.py +1 -1
  4. angr/analyses/cfg/indirect_jump_resolvers/arm_elf_fast.py +89 -32
  5. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +276 -133
  6. angr/analyses/complete_calling_conventions.py +1 -1
  7. angr/analyses/decompiler/ail_simplifier.py +20 -0
  8. angr/analyses/decompiler/block_io_finder.py +293 -0
  9. angr/analyses/decompiler/block_similarity.py +190 -0
  10. angr/analyses/decompiler/callsite_maker.py +5 -0
  11. angr/analyses/decompiler/clinic.py +103 -1
  12. angr/analyses/decompiler/decompilation_cache.py +2 -0
  13. angr/analyses/decompiler/decompiler.py +21 -4
  14. angr/analyses/decompiler/optimization_passes/__init__.py +6 -0
  15. angr/analyses/decompiler/optimization_passes/code_motion.py +361 -0
  16. angr/analyses/decompiler/optimization_passes/optimization_pass.py +1 -0
  17. angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +30 -18
  18. angr/analyses/decompiler/optimization_passes/switch_default_case_duplicator.py +110 -0
  19. angr/analyses/decompiler/peephole_optimizations/bswap.py +53 -2
  20. angr/analyses/decompiler/peephole_optimizations/eager_eval.py +20 -1
  21. angr/analyses/decompiler/structured_codegen/c.py +76 -41
  22. angr/analyses/decompiler/structuring/phoenix.py +41 -9
  23. angr/analyses/decompiler/utils.py +13 -4
  24. angr/analyses/propagator/engine_ail.py +3 -0
  25. angr/analyses/reaching_definitions/engine_ail.py +3 -0
  26. angr/analyses/reaching_definitions/reaching_definitions.py +7 -0
  27. angr/analyses/stack_pointer_tracker.py +60 -10
  28. angr/analyses/typehoon/simple_solver.py +95 -24
  29. angr/analyses/typehoon/typeconsts.py +1 -1
  30. angr/calling_conventions.py +0 -3
  31. angr/engines/pcode/cc.py +1 -1
  32. angr/engines/successors.py +6 -0
  33. angr/knowledge_plugins/propagations/states.py +2 -1
  34. angr/procedures/definitions/glibc.py +3 -1
  35. angr/procedures/definitions/parse_win32json.py +2135 -383
  36. angr/procedures/definitions/wdk_ntoskrnl.py +956 -0
  37. angr/sim_type.py +53 -13
  38. angr/utils/library.py +2 -2
  39. {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/METADATA +6 -6
  40. {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/RECORD +44 -41
  41. {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/WHEEL +1 -1
  42. angr/procedures/definitions/wdk_ntdll.py +0 -994
  43. {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/LICENSE +0 -0
  44. {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/entry_points.txt +0 -0
  45. {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/top_level.txt +0 -0
@@ -9,8 +9,8 @@ from .base import PeepholeOptimizationExprBase
9
9
  class Bswap(PeepholeOptimizationExprBase):
10
10
  __slots__ = ()
11
11
 
12
- NAME = "Simplifying bswap_16()"
13
- expr_classes = (BinaryOp,) # all expressions are allowed
12
+ NAME = "Simplifying bswap_16() and bswap_32()"
13
+ expr_classes = (BinaryOp, Convert)
14
14
 
15
15
  def optimize(self, expr: BinaryOp, **kwargs):
16
16
  # bswap_16
@@ -48,6 +48,57 @@ class Bswap(PeepholeOptimizationExprBase):
48
48
 
49
49
  return None
50
50
 
51
+ # bswap_32
52
+ # (Conv(64->32, rax<8>) << 0x18<8>) |
53
+ # (((Conv(64->32, rax<8>) << 0x8<8>) & 0xff0000<32>) |
54
+ # (((Conv(64->32, rax<8>) >> 0x8<8>) & 0xff00<32>) |
55
+ # ((Conv(64->32, rax<8>) >> 0x18<8>) & 0xff<32>))))
56
+ if expr.op == "Or":
57
+ # fully flatten the expression
58
+ or_pieces = []
59
+ queue = [expr]
60
+ while queue:
61
+ operand = queue.pop(0)
62
+ if isinstance(operand, BinaryOp) and operand.op == "Or":
63
+ queue.append(operand.operands[0])
64
+ queue.append(operand.operands[1])
65
+ else:
66
+ or_pieces.append(operand)
67
+ if len(or_pieces) == 4:
68
+ # parse pieces
69
+ shifts = set()
70
+ cores = set()
71
+ for piece in or_pieces:
72
+ if isinstance(piece, BinaryOp):
73
+ if piece.op == "Shl" and isinstance(piece.operands[1], Const):
74
+ cores.add(piece.operands[0])
75
+ shifts.add(("<<", piece.operands[1].value, 0xFFFFFFFF))
76
+ elif piece.op == "And" and isinstance(piece.operands[1], Const):
77
+ and_amount = piece.operands[1].value
78
+ and_core = piece.operands[0]
79
+ if (
80
+ isinstance(and_core, BinaryOp)
81
+ and and_core.op == "Shl"
82
+ and isinstance(and_core.operands[1], Const)
83
+ ):
84
+ cores.add(and_core.operands[0])
85
+ shifts.add(("<<", and_core.operands[1].value, and_amount))
86
+ elif (
87
+ isinstance(and_core, BinaryOp)
88
+ and and_core.op == "Shr"
89
+ and isinstance(and_core.operands[1], Const)
90
+ ):
91
+ cores.add(and_core.operands[0])
92
+ shifts.add((">>", and_core.operands[1].value, and_amount))
93
+ if len(cores) == 1 and shifts == {
94
+ ("<<", 0x18, 0xFFFFFFFF),
95
+ ("<<", 8, 0xFF0000),
96
+ (">>", 0x18, 0xFF),
97
+ (">>", 8, 0xFF00),
98
+ }:
99
+ core_expr = next(iter(cores))
100
+ return Call(expr.idx, "__buildin_bswap32", args=[core_expr], bits=expr.bits, **expr.tags)
101
+
51
102
  return None
52
103
 
53
104
  def _match_inner(self, or_first: BinaryOp, or_second: BinaryOp) -> Tuple[bool, Optional[Expression]]:
@@ -1,6 +1,6 @@
1
1
  from math import gcd
2
2
 
3
- from ailment.expression import BinaryOp, UnaryOp, Const, Convert
3
+ from ailment.expression import BinaryOp, UnaryOp, Const, Convert, StackBaseOffset
4
4
 
5
5
  from .base import PeepholeOptimizationExprBase
6
6
 
@@ -59,6 +59,22 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
59
59
  expr.signed,
60
60
  **expr.tags,
61
61
  )
62
+ if (
63
+ isinstance(expr.operands[0], BinaryOp)
64
+ and expr.operands[0].op == "Mul"
65
+ and isinstance(expr.operands[0].operands[1], Const)
66
+ and expr.operands[0].operands[0].likes(expr.operands[1])
67
+ ):
68
+ # A * x + x => (A + 1) * x
69
+ coeff_expr = expr.operands[0].operands[1]
70
+ new_coeff = coeff_expr.value + 1
71
+ return BinaryOp(
72
+ expr.idx,
73
+ "Mul",
74
+ [Const(coeff_expr.idx, None, new_coeff, coeff_expr.bits), expr.operands[1]],
75
+ expr.signed,
76
+ **expr.tags,
77
+ )
62
78
  elif expr.op == "Sub":
63
79
  if isinstance(expr.operands[0], Const) and isinstance(expr.operands[1], Const):
64
80
  mask = (1 << expr.bits) - 1
@@ -93,6 +109,9 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
93
109
  if isinstance(expr.operands[0], Const) and expr.operands[0].value == 0:
94
110
  return UnaryOp(expr.idx, "Neg", expr.operands[1], **expr.tags)
95
111
 
112
+ if isinstance(expr.operands[0], StackBaseOffset) and isinstance(expr.operands[1], StackBaseOffset):
113
+ return Const(expr.idx, None, expr.operands[0].offset - expr.operands[1].offset, expr.bits, **expr.tags)
114
+
96
115
  elif expr.op == "And":
97
116
  if isinstance(expr.operands[0], Const) and isinstance(expr.operands[1], Const):
98
117
  new_expr = Const(
@@ -3,12 +3,12 @@ from typing import Optional, Dict, List, Tuple, Set, Any, Union, TYPE_CHECKING,
3
3
  from collections import defaultdict
4
4
  import logging
5
5
  import struct
6
- from functools import reduce
7
6
 
8
7
  from ailment import Block, Expr, Stmt, Tmp
9
8
  from ailment.expression import StackBaseOffset, BinaryOp
10
9
  from unique_log_filter import UniqueLogFilter
11
10
 
11
+ from ....procedures import SIM_LIBRARIES, SIM_TYPE_COLLECTIONS
12
12
  from ....sim_type import (
13
13
  SimTypeLongLong,
14
14
  SimTypeInt,
@@ -28,6 +28,7 @@ from ....sim_type import (
28
28
  SimTypeFixedSizeArray,
29
29
  SimTypeLength,
30
30
  SimTypeReg,
31
+ dereference_simtype,
31
32
  )
32
33
  from ....knowledge_plugins.functions import Function
33
34
  from ....sim_variable import SimVariable, SimTemporaryVariable, SimStackVariable, SimMemoryVariable
@@ -537,8 +538,10 @@ class CFunction(CConstruct): # pylint:disable=abstract-method
537
538
  indent_str = self.indent_str(indent)
538
539
  if self.codegen.show_local_types:
539
540
  local_types = [unpack_typeref(ty) for ty in self.variable_manager.types.iter_own()]
541
+ name_to_structtypes = {}
540
542
  for ty in local_types:
541
543
  if isinstance(ty, SimStruct):
544
+ name_to_structtypes[ty.name] = ty
542
545
  for field in ty.fields.values():
543
546
  if isinstance(field, SimTypePointer):
544
547
  if isinstance(field.pts_to, (SimTypeArray, SimTypeFixedSizeArray)):
@@ -546,6 +549,12 @@ class CFunction(CConstruct): # pylint:disable=abstract-method
546
549
  else:
547
550
  field = field.pts_to
548
551
  if isinstance(field, SimStruct) and field not in local_types:
552
+ if field.name and not field.fields and field.name in name_to_structtypes:
553
+ # we use SimStruct types with empty fields to refer to already defined struct types
554
+ # for example, see how struct _IO_marker is defined in sim_type.py
555
+ continue
556
+ if field.name:
557
+ name_to_structtypes[field.name] = field
549
558
  local_types.append(field)
550
559
 
551
560
  yield from type_to_c_repr_chunks(ty, full=True, indent_str=indent_str)
@@ -577,7 +586,7 @@ class CFunction(CConstruct): # pylint:disable=abstract-method
577
586
  yield " ", None
578
587
  # function name
579
588
  if self.demangled_name and self.show_demangled_name:
580
- normalized_name = get_cpp_function_name(self.demangled_name, specialized=False, qualified=False)
589
+ normalized_name = get_cpp_function_name(self.demangled_name, specialized=False, qualified=True)
581
590
  else:
582
591
  normalized_name = self.name
583
592
  yield normalized_name, self
@@ -1234,7 +1243,16 @@ class CFunctionCall(CStatement, CExpression):
1234
1243
  @property
1235
1244
  def prototype(self) -> Optional[SimTypeFunction]: # TODO there should be a prototype for each callsite!
1236
1245
  if self.callee_func is not None and self.callee_func.prototype is not None:
1237
- return self.callee_func.prototype
1246
+ proto = self.callee_func.prototype
1247
+ if self.callee_func.prototype_libname is not None:
1248
+ # we need to deref the prototype in case it uses SimTypeRef internally
1249
+ type_collections = []
1250
+ prototype_lib = SIM_LIBRARIES[self.callee_func.prototype_libname]
1251
+ if prototype_lib.type_collection_names:
1252
+ for typelib_name in prototype_lib.type_collection_names:
1253
+ type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
1254
+ proto = dereference_simtype(proto, type_collections)
1255
+ return proto
1238
1256
  returnty = SimTypeInt(signed=False)
1239
1257
  return SimTypeFunction([arg.type for arg in self.args], returnty).with_arch(self.codegen.project.arch)
1240
1258
 
@@ -2165,29 +2183,24 @@ class CConstant(CExpression):
2165
2183
 
2166
2184
  if self.fmt_float:
2167
2185
  if 0 < value <= 0xFFFF_FFFF:
2168
- str_value = str(struct.unpack("f", struct.pack("I", value))[0])
2169
- return str_value
2186
+ return str(struct.unpack("f", struct.pack("I", value))[0])
2187
+
2188
+ if self.fmt_char:
2189
+ if value < 0:
2190
+ value += 2**self._type.size
2191
+ value &= 0xFF
2192
+ return repr(chr(value)) if value < 0x80 else f"'\\x{value:x}'"
2170
2193
 
2171
2194
  if self.fmt_neg:
2172
2195
  if value > 0:
2173
- value = value - 2**self._type.size
2196
+ value -= 2**self._type.size
2174
2197
  elif value < 0:
2175
- value = value + 2**self._type.size
2176
-
2177
- str_value = None
2178
- if self.fmt_char:
2179
- try:
2180
- str_value = f"'{chr(value)}'"
2181
- except ValueError:
2182
- str_value = None
2198
+ value += 2**self._type.size
2183
2199
 
2184
- if str_value is None:
2185
- if self.fmt_hex:
2186
- str_value = hex(value)
2187
- else:
2188
- str_value = str(value)
2200
+ if self.fmt_hex:
2201
+ return hex(value)
2189
2202
 
2190
- return str_value
2203
+ return str(value)
2191
2204
 
2192
2205
 
2193
2206
  class CRegister(CExpression):
@@ -2590,6 +2603,15 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
2590
2603
  if isinstance(var, CVariable):
2591
2604
  var.variable_type = self._get_variable_type(var.variable, is_global=True)
2592
2605
 
2606
+ for cvar in self.cfunc.arg_list:
2607
+ vartype = self._get_variable_type(
2608
+ cvar.variable,
2609
+ is_global=isinstance(cvar.variable, SimMemoryVariable)
2610
+ and not isinstance(cvar.variable, SimStackVariable),
2611
+ )
2612
+ if vartype is not None:
2613
+ cvar.variable_type = vartype.with_arch(self.project.arch)
2614
+
2593
2615
  #
2594
2616
  # Util methods
2595
2617
  #
@@ -2823,30 +2845,40 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
2823
2845
  if len(o_terms) == 0:
2824
2846
  # probably a plain integer, return as is
2825
2847
  return expr
2826
- result = reduce(
2827
- lambda a1, a2: CBinaryOp("Add", a1, a2, codegen=self),
2828
- (
2829
- (
2830
- CBinaryOp(
2831
- "Mul",
2832
- CConstant(c, t.type, codegen=self),
2833
- (
2834
- t
2835
- if not isinstance(t.type, SimTypePointer)
2836
- else CTypeCast(t.type, SimTypePointer(SimTypeChar()), t, codegen=self)
2837
- ),
2838
- codegen=self,
2839
- )
2840
- if c != 1
2841
- else (
2848
+ result = None
2849
+ pointer_length_int_type = (
2850
+ SimTypeLongLong(signed=False) if self.project.arch.bits == 64 else SimTypeInt(signed=False)
2851
+ )
2852
+ for c, t in o_terms:
2853
+ op = "Add"
2854
+ if c == -1 and result is not None:
2855
+ op = "Sub"
2856
+ piece = (
2857
+ t
2858
+ if not isinstance(t.type, SimTypePointer)
2859
+ else CTypeCast(t.type, SimTypePointer(SimTypeChar()), t, codegen=self)
2860
+ )
2861
+ elif c == 1:
2862
+ piece = (
2863
+ t
2864
+ if not isinstance(t.type, SimTypePointer)
2865
+ else CTypeCast(t.type, SimTypePointer(SimTypeChar()), t, codegen=self)
2866
+ )
2867
+ else:
2868
+ piece = CBinaryOp(
2869
+ "Mul",
2870
+ CConstant(c, t.type, codegen=self),
2871
+ (
2842
2872
  t
2843
2873
  if not isinstance(t.type, SimTypePointer)
2844
- else CTypeCast(t.type, SimTypePointer(SimTypeChar()), t, codegen=self)
2845
- )
2874
+ else CTypeCast(t.type, pointer_length_int_type, t, codegen=self)
2875
+ ),
2876
+ codegen=self,
2846
2877
  )
2847
- for c, t in o_terms
2848
- ),
2849
- )
2878
+ if result is None:
2879
+ result = piece
2880
+ else:
2881
+ result = CBinaryOp(op, result, piece, codegen=self)
2850
2882
  if o_constant != 0:
2851
2883
  result = CBinaryOp("Add", CConstant(o_constant, SimTypeInt(), codegen=self), result, codegen=self)
2852
2884
 
@@ -2869,6 +2901,9 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
2869
2901
  if kernel is not None:
2870
2902
  l.warning("Summing two different pointers together. Uh oh!")
2871
2903
  return bail_out()
2904
+ if c == -1:
2905
+ # legit case: you can deduct a pointer from another pointer and get an integer as result in C
2906
+ return bail_out()
2872
2907
  if c != 1:
2873
2908
  l.warning("Multiplying a pointer by a constant??")
2874
2909
  return bail_out()
@@ -14,7 +14,7 @@ from ailment.expression import Const, UnaryOp, MultiStatementExpression
14
14
  from angr.utils.graph import GraphUtils
15
15
  from ....knowledge_plugins.cfg import IndirectJumpType
16
16
  from ....utils.constants import SWITCH_MISSING_DEFAULT_NODE_ADDR
17
- from ....utils.graph import dominates, to_acyclic_graph
17
+ from ....utils.graph import dominates, to_acyclic_graph, dfs_back_edges
18
18
  from ..sequence_walker import SequenceWalker
19
19
  from ..utils import (
20
20
  remove_last_statement,
@@ -140,6 +140,10 @@ class PhoenixStructurer(StructurerBase):
140
140
  if len(self._region.graph.nodes) == 1 and has_cycle:
141
141
  self._analyze_cyclic()
142
142
 
143
+ # backup the region prior to conducting a cyclic refinement because we may not be able to structure a cycle out
144
+ # of the refined graph. in that case, we restore the original region and return.
145
+ pre_refinement_region = None
146
+
143
147
  while len(self._region.graph.nodes) > 1:
144
148
  progressed = self._analyze_acyclic()
145
149
  if progressed and self._region.head not in self._region.graph:
@@ -151,12 +155,14 @@ class PhoenixStructurer(StructurerBase):
151
155
  if has_cycle:
152
156
  progressed |= self._analyze_cyclic()
153
157
  if progressed:
158
+ pre_refinement_region = None
154
159
  if self._region.head not in self._region.graph:
155
160
  # update the loop head
156
161
  self._region.head = next(
157
162
  iter(node for node in self._region.graph.nodes if node.addr == self._region.head.addr)
158
163
  )
159
- else:
164
+ elif pre_refinement_region is None:
165
+ pre_refinement_region = self._region.copy()
160
166
  refined = self._refine_cyclic()
161
167
  if refined:
162
168
  if self._region.head not in self._region.graph:
@@ -194,6 +200,10 @@ class PhoenixStructurer(StructurerBase):
194
200
  # successfully structured
195
201
  self.result = next(iter(self._region.graph.nodes))
196
202
  else:
203
+ if pre_refinement_region is not None:
204
+ # we could not make a loop after the last cycle refinement. restore the graph
205
+ self._region = pre_refinement_region
206
+
197
207
  self.result = None # the actual result is in self._region.graph and self._region.graph_with_successors
198
208
 
199
209
  def _analyze_cyclic(self) -> bool:
@@ -572,7 +582,16 @@ class PhoenixStructurer(StructurerBase):
572
582
  return True, loop_node
573
583
 
574
584
  def _refine_cyclic(self) -> bool:
575
- return self._refine_cyclic_core(self._region.head)
585
+ loop_heads = {t for _, t in dfs_back_edges(self._region.graph, self._region.head)}
586
+ sorted_loop_heads = GraphUtils.quasi_topological_sort_nodes(self._region.graph, nodes=list(loop_heads))
587
+
588
+ for head in sorted_loop_heads:
589
+ l.debug("... refining cyclic at %r", head)
590
+ refined = self._refine_cyclic_core(head)
591
+ l.debug("... refined: %s", refined)
592
+ if refined:
593
+ return True
594
+ return False
576
595
 
577
596
  def _refine_cyclic_core(self, loop_head) -> bool:
578
597
  graph: networkx.DiGraph = self._region.graph
@@ -621,7 +640,7 @@ class PhoenixStructurer(StructurerBase):
621
640
  # natural loop. select *any* exit edge to determine the successor
622
641
  # well actually, to maintain determinism, we select the successor with the highest address
623
642
  successor_candidates = set()
624
- for node in graph.nodes:
643
+ for node in networkx.descendants(graph, loop_head):
625
644
  for succ in fullgraph.successors(node):
626
645
  if succ not in graph:
627
646
  successor_candidates.add(succ)
@@ -637,8 +656,15 @@ class PhoenixStructurer(StructurerBase):
637
656
  outgoing_edges.append((pred, succ))
638
657
 
639
658
  if outgoing_edges:
640
- # convert all out-going edges into breaks (if there is a single successor) or gotos (if there are multiple
641
- # successors)
659
+ # if there is a single successor, we convert all out-going edges into breaks;
660
+ # if there are multiple successors, and if the current region does not have a parent region, then we
661
+ # convert all out-going edges into gotos;
662
+ # otherwise we give up.
663
+
664
+ if self._parent_region is not None and len({dst for _, dst in outgoing_edges}) > 1:
665
+ # give up because there is a parent region
666
+ return False
667
+
642
668
  if successor is None:
643
669
  successor_and_edgecounts = defaultdict(int)
644
670
  for _, dst in outgoing_edges:
@@ -794,7 +820,7 @@ class PhoenixStructurer(StructurerBase):
794
820
  # case being the loop head. in such cases, we can just remove the edge.
795
821
  if src.addr not in self.kb.cfgs["CFGFast"].jump_tables:
796
822
  l.warning(
797
- "_refine_cyclic_core: Cannot find the block going to loop head for edge %r -> %r."
823
+ "_refine_cyclic_core: Cannot find the block going to loop head for edge %r -> %r. "
798
824
  "Remove the edge anyway.",
799
825
  src,
800
826
  continue_node,
@@ -847,6 +873,7 @@ class PhoenixStructurer(StructurerBase):
847
873
  def _refine_cyclic_is_while_loop(
848
874
  self, graph, fullgraph, loop_head, head_succs
849
875
  ) -> Tuple[bool, Optional[Tuple[List, List, BaseNode, BaseNode]]]:
876
+
850
877
  if len(head_succs) == 2 and any(head_succ not in graph for head_succ in head_succs):
851
878
  # make sure the head_pred is not already structured
852
879
  _, _, head_block_0 = self._find_node_going_to_dst(loop_head, head_succs[0])
@@ -857,7 +884,7 @@ class PhoenixStructurer(StructurerBase):
857
884
  continue_edges: List[Tuple[BaseNode, BaseNode]] = []
858
885
  outgoing_edges = []
859
886
  successor = next(iter(head_succ for head_succ in head_succs if head_succ not in graph))
860
- for node in graph.nodes:
887
+ for node in networkx.descendants(graph, loop_head):
861
888
  succs = list(fullgraph.successors(node))
862
889
  if loop_head in succs:
863
890
  continue_edges.append((node, loop_head))
@@ -888,7 +915,7 @@ class PhoenixStructurer(StructurerBase):
888
915
  # virtualize all other edges
889
916
  successor = next(iter(nn for nn in head_pred_succs if nn not in graph))
890
917
  continue_node = head_pred
891
- for node in graph.nodes:
918
+ for node in networkx.descendants(graph, loop_head):
892
919
  if node is head_pred:
893
920
  continue
894
921
  succs = list(fullgraph.successors(node))
@@ -2243,6 +2270,11 @@ class PhoenixStructurer(StructurerBase):
2243
2270
  last_stmt = block.statements[-1]
2244
2271
  if _check(last_stmt):
2245
2272
  walker.parent_and_block.append((walker.block_id, parent, block))
2273
+ elif (
2274
+ not isinstance(last_stmt, (Jump, ConditionalJump))
2275
+ and block.addr + block.original_size == dst_addr
2276
+ ):
2277
+ walker.parent_and_block.append((walker.block_id, parent, block))
2246
2278
 
2247
2279
  def _handle_MultiNode(block: MultiNode, parent=None, **kwargs): # pylint:disable=unused-argument
2248
2280
  if block.nodes and isinstance(block.nodes[-1], Block) and block.nodes[-1].statements:
@@ -1,11 +1,10 @@
1
- # pylint:disable=wrong-import-position
1
+ # pylint:disable=wrong-import-position,broad-exception-caught,ungrouped-imports
2
2
  import pathlib
3
3
  import copy
4
4
  from typing import Optional, Tuple, Any, Union, List, Iterable
5
5
  import logging
6
6
 
7
7
  import networkx
8
-
9
8
  import ailment
10
9
 
11
10
  import angr
@@ -251,6 +250,8 @@ def _merge_ail_nodes(graph, node_a: ailment.Block, node_b: ailment.Block) -> ail
251
250
  in_edges = list(graph.in_edges(node_a, data=True))
252
251
  out_edges = list(graph.out_edges(node_b, data=True))
253
252
 
253
+ a_ogs = graph.nodes[node_a].get("original_nodes", set())
254
+ b_ogs = graph.nodes[node_b].get("original_nodes", set())
254
255
  new_node = node_a.copy() if node_a.addr <= node_b.addr else node_b.copy()
255
256
  old_node = node_b if new_node == node_a else node_a
256
257
  # remove jumps in the middle of nodes when merging
@@ -263,8 +264,7 @@ def _merge_ail_nodes(graph, node_a: ailment.Block, node_b: ailment.Block) -> ail
263
264
  graph.remove_node(node_b)
264
265
 
265
266
  if new_node is not None:
266
- graph.add_node(new_node)
267
-
267
+ graph.add_node(new_node, original_nodes=a_ogs.union(b_ogs))
268
268
  for src, _, data in in_edges:
269
269
  if src is node_b:
270
270
  src = new_node
@@ -289,6 +289,7 @@ def to_ail_supergraph(transition_graph: networkx.DiGraph) -> networkx.DiGraph:
289
289
  """
290
290
  # make a copy of the graph
291
291
  transition_graph = networkx.DiGraph(transition_graph)
292
+ networkx.set_node_attributes(transition_graph, {node: {node} for node in transition_graph.nodes}, "original_nodes")
292
293
 
293
294
  while True:
294
295
  for src, dst, data in transition_graph.edges(data=True):
@@ -725,6 +726,14 @@ def calls_in_graph(graph: networkx.DiGraph) -> int:
725
726
  return counter.calls
726
727
 
727
728
 
729
+ def find_block_by_addr(graph: networkx.DiGraph, addr: int):
730
+ for block in graph.nodes():
731
+ if block.addr == addr:
732
+ return block
733
+
734
+ raise KeyError("The block is not in the graph!")
735
+
736
+
728
737
  # delayed import
729
738
  from .structuring.structurer_nodes import (
730
739
  MultiNode,
@@ -1148,6 +1148,9 @@ class SimEnginePropagatorAIL(
1148
1148
  )
1149
1149
  return PropValue.from_value_and_details(value, expr.size, new_expr, self._codeloc())
1150
1150
 
1151
+ _ail_handle_AddV = _ail_handle_Add
1152
+ _ail_handle_MulV = _ail_handle_Mul
1153
+
1151
1154
  def _ail_handle_Mull(self, expr):
1152
1155
  o0_value = self._expr(expr.operands[0])
1153
1156
  o1_value = self._expr(expr.operands[1])
@@ -728,6 +728,9 @@ class SimEngineRDAIL(
728
728
  r = MultiValues(self.state.top(bits))
729
729
  return r
730
730
 
731
+ _ail_handle_AddV = _ail_handle_Add
732
+ _ail_handle_MulV = _ail_handle_Mul
733
+
731
734
  def _ail_handle_Mull(self, expr):
732
735
  arg0, arg1 = expr.operands
733
736
 
@@ -539,6 +539,13 @@ class ReachingDefinitionsAnalysis(
539
539
  for use in [state.stack_uses, state.heap_uses, state.register_uses, state.memory_uses]:
540
540
  self.all_uses.merge(use)
541
541
 
542
+ if self._track_tmps:
543
+ # merge tmp uses to all_uses
544
+ for tmp_idx, locs in state.tmp_uses.items():
545
+ tmp_def = next(iter(state.tmps[tmp_idx]))
546
+ for loc in locs:
547
+ self.all_uses.add_use(tmp_def, loc)
548
+
542
549
  # drop definitions and uses because we will not need them anymore
543
550
  state.downsize()
544
551
 
@@ -3,6 +3,7 @@
3
3
  from typing import Set, List, Optional, TYPE_CHECKING
4
4
  import re
5
5
  import logging
6
+ from collections import defaultdict
6
7
 
7
8
  import pyvex
8
9
 
@@ -148,6 +149,21 @@ class OffsetVal:
148
149
  return f"reg({self.reg}){(self.offset - 2**self.reg.bitlen) if self.offset != 0 else 0:+}"
149
150
 
150
151
 
152
+ class Eq:
153
+ """
154
+ Represent an equivalence condition.
155
+ """
156
+
157
+ __slots__ = ("val0", "val1")
158
+
159
+ def __init__(self, val0, val1):
160
+ self.val0 = val0
161
+ self.val1 = val1
162
+
163
+ def __hash__(self):
164
+ return hash((type(self), self.val0, self.val1))
165
+
166
+
151
167
  class FrozenStackPointerTrackerState:
152
168
  """
153
169
  Abstract state for StackPointerTracker analysis with registers and memory values being in frozensets.
@@ -296,7 +312,12 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
296
312
  """
297
313
 
298
314
  def __init__(
299
- self, func: Optional[Function], reg_offsets: Set[int], block: Optional["Block"] = None, track_memory=True
315
+ self,
316
+ func: Optional[Function],
317
+ reg_offsets: Set[int],
318
+ block: Optional["Block"] = None,
319
+ track_memory=True,
320
+ cross_insn_opt=True,
300
321
  ):
301
322
  if func is not None:
302
323
  if not func.normalized:
@@ -316,6 +337,8 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
316
337
  self.reg_offsets = reg_offsets
317
338
  self.states = {}
318
339
  self._blocks = {}
340
+ self._reg_value_at_block_start = defaultdict(dict)
341
+ self.cross_insn_opt = cross_insn_opt
319
342
 
320
343
  _l.debug("Running on function %r", self._func)
321
344
  self._analyze()
@@ -468,7 +491,7 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
468
491
  self._set_state(addr, new_val, "pre")
469
492
 
470
493
  def _run_on_node(self, node: BlockNode, state):
471
- block = self.project.factory.block(node.addr, size=node.size)
494
+ block = self.project.factory.block(node.addr, size=node.size, cross_insn_opt=self.cross_insn_opt)
472
495
  self._blocks[node.addr] = block
473
496
 
474
497
  state = state.unfreeze()
@@ -483,6 +506,10 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
483
506
  except SimTranslationError:
484
507
  pass
485
508
 
509
+ if node.addr in self._reg_value_at_block_start:
510
+ for reg, val in self._reg_value_at_block_start[node.addr].items():
511
+ state.put(reg, val)
512
+
486
513
  if vex_block is not None:
487
514
  if isinstance(vex_block, pyvex.IRSB):
488
515
  curr_stmt_start_addr = self._process_vex_irsb(node, vex_block, state)
@@ -548,7 +575,12 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
548
575
  and is_alignment_mask(arg1_expr.val)
549
576
  ):
550
577
  return arg0_expr
551
- raise CouldNotResolveException()
578
+ elif expr.op.startswith("Iop_CmpEQ"):
579
+ arg0_expr = _resolve_expr(arg0)
580
+ arg1_expr = _resolve_expr(arg1)
581
+ if isinstance(arg0_expr, (Register, OffsetVal)) and isinstance(arg1_expr, (Register, OffsetVal)):
582
+ return Eq(arg0_expr, arg1_expr)
583
+ raise CouldNotResolveException()
552
584
  elif type(expr) is pyvex.IRExpr.RdTmp and expr.tmp in tmps and tmps[expr.tmp] is not None:
553
585
  return tmps[expr.tmp]
554
586
  elif type(expr) is pyvex.IRExpr.Const:
@@ -563,13 +595,15 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
563
595
  to_bits = int(m.group(3))
564
596
  # to_unsigned = m.group(4) == "U"
565
597
  v = resolve_expr(expr.args[0])
566
- if not isinstance(v, Constant):
567
- return TOP
568
- if from_bits > to_bits:
569
- # truncation
570
- mask = (1 << to_bits) - 1
571
- return Constant(v.val & mask)
572
- return v
598
+ if isinstance(v, Constant):
599
+ if from_bits > to_bits:
600
+ # truncation
601
+ mask = (1 << to_bits) - 1
602
+ return Constant(v.val & mask)
603
+ return v
604
+ elif isinstance(v, Eq):
605
+ return v
606
+ return TOP
573
607
  elif self.track_mem and type(expr) is pyvex.IRExpr.Load:
574
608
  return state.load(_resolve_expr(expr.addr))
575
609
  raise CouldNotResolveException()
@@ -606,6 +640,22 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
606
640
  and vex_block.instruction_addresses.index(curr_stmt_start_addr) == vex_block.instructions - 1
607
641
  ):
608
642
  exit_observed = True
643
+ if (
644
+ type(stmt.guard) is pyvex.IRExpr.RdTmp
645
+ and stmt.guard.tmp in tmps
646
+ and isinstance(stmt.dst, pyvex.IRConst.IRConst)
647
+ ):
648
+ guard = tmps[stmt.guard.tmp]
649
+ if isinstance(guard, Eq):
650
+ for reg, val in state.regs.items():
651
+ if reg in {self.project.arch.sp_offset, self.project.arch.bp_offset}:
652
+ cond = None
653
+ if val == guard.val0:
654
+ cond = guard.val1
655
+ elif val == guard.val1:
656
+ cond = guard.val0
657
+ if cond is not None:
658
+ self._reg_value_at_block_start[stmt.dst.value][reg] = cond
609
659
  else:
610
660
  try:
611
661
  resolve_stmt(stmt)