angr 9.2.92__py3-none-manylinux2014_x86_64.whl → 9.2.94__py3-none-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/cfg/cfg_base.py +20 -10
- angr/analyses/cfg/indirect_jump_resolvers/amd64_elf_got.py +1 -1
- angr/analyses/cfg/indirect_jump_resolvers/arm_elf_fast.py +89 -32
- angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +276 -133
- angr/analyses/complete_calling_conventions.py +1 -1
- angr/analyses/decompiler/ail_simplifier.py +20 -0
- angr/analyses/decompiler/block_io_finder.py +293 -0
- angr/analyses/decompiler/block_similarity.py +190 -0
- angr/analyses/decompiler/callsite_maker.py +5 -0
- angr/analyses/decompiler/clinic.py +103 -1
- angr/analyses/decompiler/decompilation_cache.py +2 -0
- angr/analyses/decompiler/decompiler.py +21 -4
- angr/analyses/decompiler/optimization_passes/__init__.py +6 -0
- angr/analyses/decompiler/optimization_passes/code_motion.py +361 -0
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +1 -0
- angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +30 -18
- angr/analyses/decompiler/optimization_passes/switch_default_case_duplicator.py +110 -0
- angr/analyses/decompiler/peephole_optimizations/bswap.py +53 -2
- angr/analyses/decompiler/peephole_optimizations/eager_eval.py +20 -1
- angr/analyses/decompiler/structured_codegen/c.py +76 -41
- angr/analyses/decompiler/structuring/phoenix.py +41 -9
- angr/analyses/decompiler/utils.py +13 -4
- angr/analyses/propagator/engine_ail.py +3 -0
- angr/analyses/reaching_definitions/engine_ail.py +3 -0
- angr/analyses/reaching_definitions/reaching_definitions.py +7 -0
- angr/analyses/stack_pointer_tracker.py +60 -10
- angr/analyses/typehoon/simple_solver.py +95 -24
- angr/analyses/typehoon/typeconsts.py +1 -1
- angr/calling_conventions.py +0 -3
- angr/engines/pcode/cc.py +1 -1
- angr/engines/successors.py +6 -0
- angr/knowledge_plugins/propagations/states.py +2 -1
- angr/procedures/definitions/glibc.py +3 -1
- angr/procedures/definitions/parse_win32json.py +2135 -383
- angr/procedures/definitions/wdk_ntoskrnl.py +956 -0
- angr/sim_type.py +53 -13
- angr/utils/library.py +2 -2
- {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/METADATA +6 -6
- {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/RECORD +44 -41
- {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/WHEEL +1 -1
- angr/procedures/definitions/wdk_ntdll.py +0 -994
- {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/LICENSE +0 -0
- {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/entry_points.txt +0 -0
- {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/top_level.txt +0 -0
|
@@ -9,8 +9,8 @@ from .base import PeepholeOptimizationExprBase
|
|
|
9
9
|
class Bswap(PeepholeOptimizationExprBase):
|
|
10
10
|
__slots__ = ()
|
|
11
11
|
|
|
12
|
-
NAME = "Simplifying bswap_16()"
|
|
13
|
-
expr_classes = (BinaryOp,)
|
|
12
|
+
NAME = "Simplifying bswap_16() and bswap_32()"
|
|
13
|
+
expr_classes = (BinaryOp, Convert)
|
|
14
14
|
|
|
15
15
|
def optimize(self, expr: BinaryOp, **kwargs):
|
|
16
16
|
# bswap_16
|
|
@@ -48,6 +48,57 @@ class Bswap(PeepholeOptimizationExprBase):
|
|
|
48
48
|
|
|
49
49
|
return None
|
|
50
50
|
|
|
51
|
+
# bswap_32
|
|
52
|
+
# (Conv(64->32, rax<8>) << 0x18<8>) |
|
|
53
|
+
# (((Conv(64->32, rax<8>) << 0x8<8>) & 0xff0000<32>) |
|
|
54
|
+
# (((Conv(64->32, rax<8>) >> 0x8<8>) & 0xff00<32>) |
|
|
55
|
+
# ((Conv(64->32, rax<8>) >> 0x18<8>) & 0xff<32>))))
|
|
56
|
+
if expr.op == "Or":
|
|
57
|
+
# fully flatten the expression
|
|
58
|
+
or_pieces = []
|
|
59
|
+
queue = [expr]
|
|
60
|
+
while queue:
|
|
61
|
+
operand = queue.pop(0)
|
|
62
|
+
if isinstance(operand, BinaryOp) and operand.op == "Or":
|
|
63
|
+
queue.append(operand.operands[0])
|
|
64
|
+
queue.append(operand.operands[1])
|
|
65
|
+
else:
|
|
66
|
+
or_pieces.append(operand)
|
|
67
|
+
if len(or_pieces) == 4:
|
|
68
|
+
# parse pieces
|
|
69
|
+
shifts = set()
|
|
70
|
+
cores = set()
|
|
71
|
+
for piece in or_pieces:
|
|
72
|
+
if isinstance(piece, BinaryOp):
|
|
73
|
+
if piece.op == "Shl" and isinstance(piece.operands[1], Const):
|
|
74
|
+
cores.add(piece.operands[0])
|
|
75
|
+
shifts.add(("<<", piece.operands[1].value, 0xFFFFFFFF))
|
|
76
|
+
elif piece.op == "And" and isinstance(piece.operands[1], Const):
|
|
77
|
+
and_amount = piece.operands[1].value
|
|
78
|
+
and_core = piece.operands[0]
|
|
79
|
+
if (
|
|
80
|
+
isinstance(and_core, BinaryOp)
|
|
81
|
+
and and_core.op == "Shl"
|
|
82
|
+
and isinstance(and_core.operands[1], Const)
|
|
83
|
+
):
|
|
84
|
+
cores.add(and_core.operands[0])
|
|
85
|
+
shifts.add(("<<", and_core.operands[1].value, and_amount))
|
|
86
|
+
elif (
|
|
87
|
+
isinstance(and_core, BinaryOp)
|
|
88
|
+
and and_core.op == "Shr"
|
|
89
|
+
and isinstance(and_core.operands[1], Const)
|
|
90
|
+
):
|
|
91
|
+
cores.add(and_core.operands[0])
|
|
92
|
+
shifts.add((">>", and_core.operands[1].value, and_amount))
|
|
93
|
+
if len(cores) == 1 and shifts == {
|
|
94
|
+
("<<", 0x18, 0xFFFFFFFF),
|
|
95
|
+
("<<", 8, 0xFF0000),
|
|
96
|
+
(">>", 0x18, 0xFF),
|
|
97
|
+
(">>", 8, 0xFF00),
|
|
98
|
+
}:
|
|
99
|
+
core_expr = next(iter(cores))
|
|
100
|
+
return Call(expr.idx, "__buildin_bswap32", args=[core_expr], bits=expr.bits, **expr.tags)
|
|
101
|
+
|
|
51
102
|
return None
|
|
52
103
|
|
|
53
104
|
def _match_inner(self, or_first: BinaryOp, or_second: BinaryOp) -> Tuple[bool, Optional[Expression]]:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from math import gcd
|
|
2
2
|
|
|
3
|
-
from ailment.expression import BinaryOp, UnaryOp, Const, Convert
|
|
3
|
+
from ailment.expression import BinaryOp, UnaryOp, Const, Convert, StackBaseOffset
|
|
4
4
|
|
|
5
5
|
from .base import PeepholeOptimizationExprBase
|
|
6
6
|
|
|
@@ -59,6 +59,22 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
|
|
|
59
59
|
expr.signed,
|
|
60
60
|
**expr.tags,
|
|
61
61
|
)
|
|
62
|
+
if (
|
|
63
|
+
isinstance(expr.operands[0], BinaryOp)
|
|
64
|
+
and expr.operands[0].op == "Mul"
|
|
65
|
+
and isinstance(expr.operands[0].operands[1], Const)
|
|
66
|
+
and expr.operands[0].operands[0].likes(expr.operands[1])
|
|
67
|
+
):
|
|
68
|
+
# A * x + x => (A + 1) * x
|
|
69
|
+
coeff_expr = expr.operands[0].operands[1]
|
|
70
|
+
new_coeff = coeff_expr.value + 1
|
|
71
|
+
return BinaryOp(
|
|
72
|
+
expr.idx,
|
|
73
|
+
"Mul",
|
|
74
|
+
[Const(coeff_expr.idx, None, new_coeff, coeff_expr.bits), expr.operands[1]],
|
|
75
|
+
expr.signed,
|
|
76
|
+
**expr.tags,
|
|
77
|
+
)
|
|
62
78
|
elif expr.op == "Sub":
|
|
63
79
|
if isinstance(expr.operands[0], Const) and isinstance(expr.operands[1], Const):
|
|
64
80
|
mask = (1 << expr.bits) - 1
|
|
@@ -93,6 +109,9 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
|
|
|
93
109
|
if isinstance(expr.operands[0], Const) and expr.operands[0].value == 0:
|
|
94
110
|
return UnaryOp(expr.idx, "Neg", expr.operands[1], **expr.tags)
|
|
95
111
|
|
|
112
|
+
if isinstance(expr.operands[0], StackBaseOffset) and isinstance(expr.operands[1], StackBaseOffset):
|
|
113
|
+
return Const(expr.idx, None, expr.operands[0].offset - expr.operands[1].offset, expr.bits, **expr.tags)
|
|
114
|
+
|
|
96
115
|
elif expr.op == "And":
|
|
97
116
|
if isinstance(expr.operands[0], Const) and isinstance(expr.operands[1], Const):
|
|
98
117
|
new_expr = Const(
|
|
@@ -3,12 +3,12 @@ from typing import Optional, Dict, List, Tuple, Set, Any, Union, TYPE_CHECKING,
|
|
|
3
3
|
from collections import defaultdict
|
|
4
4
|
import logging
|
|
5
5
|
import struct
|
|
6
|
-
from functools import reduce
|
|
7
6
|
|
|
8
7
|
from ailment import Block, Expr, Stmt, Tmp
|
|
9
8
|
from ailment.expression import StackBaseOffset, BinaryOp
|
|
10
9
|
from unique_log_filter import UniqueLogFilter
|
|
11
10
|
|
|
11
|
+
from ....procedures import SIM_LIBRARIES, SIM_TYPE_COLLECTIONS
|
|
12
12
|
from ....sim_type import (
|
|
13
13
|
SimTypeLongLong,
|
|
14
14
|
SimTypeInt,
|
|
@@ -28,6 +28,7 @@ from ....sim_type import (
|
|
|
28
28
|
SimTypeFixedSizeArray,
|
|
29
29
|
SimTypeLength,
|
|
30
30
|
SimTypeReg,
|
|
31
|
+
dereference_simtype,
|
|
31
32
|
)
|
|
32
33
|
from ....knowledge_plugins.functions import Function
|
|
33
34
|
from ....sim_variable import SimVariable, SimTemporaryVariable, SimStackVariable, SimMemoryVariable
|
|
@@ -537,8 +538,10 @@ class CFunction(CConstruct): # pylint:disable=abstract-method
|
|
|
537
538
|
indent_str = self.indent_str(indent)
|
|
538
539
|
if self.codegen.show_local_types:
|
|
539
540
|
local_types = [unpack_typeref(ty) for ty in self.variable_manager.types.iter_own()]
|
|
541
|
+
name_to_structtypes = {}
|
|
540
542
|
for ty in local_types:
|
|
541
543
|
if isinstance(ty, SimStruct):
|
|
544
|
+
name_to_structtypes[ty.name] = ty
|
|
542
545
|
for field in ty.fields.values():
|
|
543
546
|
if isinstance(field, SimTypePointer):
|
|
544
547
|
if isinstance(field.pts_to, (SimTypeArray, SimTypeFixedSizeArray)):
|
|
@@ -546,6 +549,12 @@ class CFunction(CConstruct): # pylint:disable=abstract-method
|
|
|
546
549
|
else:
|
|
547
550
|
field = field.pts_to
|
|
548
551
|
if isinstance(field, SimStruct) and field not in local_types:
|
|
552
|
+
if field.name and not field.fields and field.name in name_to_structtypes:
|
|
553
|
+
# we use SimStruct types with empty fields to refer to already defined struct types
|
|
554
|
+
# for example, see how struct _IO_marker is defined in sim_type.py
|
|
555
|
+
continue
|
|
556
|
+
if field.name:
|
|
557
|
+
name_to_structtypes[field.name] = field
|
|
549
558
|
local_types.append(field)
|
|
550
559
|
|
|
551
560
|
yield from type_to_c_repr_chunks(ty, full=True, indent_str=indent_str)
|
|
@@ -577,7 +586,7 @@ class CFunction(CConstruct): # pylint:disable=abstract-method
|
|
|
577
586
|
yield " ", None
|
|
578
587
|
# function name
|
|
579
588
|
if self.demangled_name and self.show_demangled_name:
|
|
580
|
-
normalized_name = get_cpp_function_name(self.demangled_name, specialized=False, qualified=
|
|
589
|
+
normalized_name = get_cpp_function_name(self.demangled_name, specialized=False, qualified=True)
|
|
581
590
|
else:
|
|
582
591
|
normalized_name = self.name
|
|
583
592
|
yield normalized_name, self
|
|
@@ -1234,7 +1243,16 @@ class CFunctionCall(CStatement, CExpression):
|
|
|
1234
1243
|
@property
|
|
1235
1244
|
def prototype(self) -> Optional[SimTypeFunction]: # TODO there should be a prototype for each callsite!
|
|
1236
1245
|
if self.callee_func is not None and self.callee_func.prototype is not None:
|
|
1237
|
-
|
|
1246
|
+
proto = self.callee_func.prototype
|
|
1247
|
+
if self.callee_func.prototype_libname is not None:
|
|
1248
|
+
# we need to deref the prototype in case it uses SimTypeRef internally
|
|
1249
|
+
type_collections = []
|
|
1250
|
+
prototype_lib = SIM_LIBRARIES[self.callee_func.prototype_libname]
|
|
1251
|
+
if prototype_lib.type_collection_names:
|
|
1252
|
+
for typelib_name in prototype_lib.type_collection_names:
|
|
1253
|
+
type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
|
|
1254
|
+
proto = dereference_simtype(proto, type_collections)
|
|
1255
|
+
return proto
|
|
1238
1256
|
returnty = SimTypeInt(signed=False)
|
|
1239
1257
|
return SimTypeFunction([arg.type for arg in self.args], returnty).with_arch(self.codegen.project.arch)
|
|
1240
1258
|
|
|
@@ -2165,29 +2183,24 @@ class CConstant(CExpression):
|
|
|
2165
2183
|
|
|
2166
2184
|
if self.fmt_float:
|
|
2167
2185
|
if 0 < value <= 0xFFFF_FFFF:
|
|
2168
|
-
|
|
2169
|
-
|
|
2186
|
+
return str(struct.unpack("f", struct.pack("I", value))[0])
|
|
2187
|
+
|
|
2188
|
+
if self.fmt_char:
|
|
2189
|
+
if value < 0:
|
|
2190
|
+
value += 2**self._type.size
|
|
2191
|
+
value &= 0xFF
|
|
2192
|
+
return repr(chr(value)) if value < 0x80 else f"'\\x{value:x}'"
|
|
2170
2193
|
|
|
2171
2194
|
if self.fmt_neg:
|
|
2172
2195
|
if value > 0:
|
|
2173
|
-
value
|
|
2196
|
+
value -= 2**self._type.size
|
|
2174
2197
|
elif value < 0:
|
|
2175
|
-
value
|
|
2176
|
-
|
|
2177
|
-
str_value = None
|
|
2178
|
-
if self.fmt_char:
|
|
2179
|
-
try:
|
|
2180
|
-
str_value = f"'{chr(value)}'"
|
|
2181
|
-
except ValueError:
|
|
2182
|
-
str_value = None
|
|
2198
|
+
value += 2**self._type.size
|
|
2183
2199
|
|
|
2184
|
-
if
|
|
2185
|
-
|
|
2186
|
-
str_value = hex(value)
|
|
2187
|
-
else:
|
|
2188
|
-
str_value = str(value)
|
|
2200
|
+
if self.fmt_hex:
|
|
2201
|
+
return hex(value)
|
|
2189
2202
|
|
|
2190
|
-
return
|
|
2203
|
+
return str(value)
|
|
2191
2204
|
|
|
2192
2205
|
|
|
2193
2206
|
class CRegister(CExpression):
|
|
@@ -2590,6 +2603,15 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
2590
2603
|
if isinstance(var, CVariable):
|
|
2591
2604
|
var.variable_type = self._get_variable_type(var.variable, is_global=True)
|
|
2592
2605
|
|
|
2606
|
+
for cvar in self.cfunc.arg_list:
|
|
2607
|
+
vartype = self._get_variable_type(
|
|
2608
|
+
cvar.variable,
|
|
2609
|
+
is_global=isinstance(cvar.variable, SimMemoryVariable)
|
|
2610
|
+
and not isinstance(cvar.variable, SimStackVariable),
|
|
2611
|
+
)
|
|
2612
|
+
if vartype is not None:
|
|
2613
|
+
cvar.variable_type = vartype.with_arch(self.project.arch)
|
|
2614
|
+
|
|
2593
2615
|
#
|
|
2594
2616
|
# Util methods
|
|
2595
2617
|
#
|
|
@@ -2823,30 +2845,40 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
2823
2845
|
if len(o_terms) == 0:
|
|
2824
2846
|
# probably a plain integer, return as is
|
|
2825
2847
|
return expr
|
|
2826
|
-
result =
|
|
2827
|
-
|
|
2828
|
-
(
|
|
2829
|
-
|
|
2830
|
-
|
|
2831
|
-
|
|
2832
|
-
|
|
2833
|
-
|
|
2834
|
-
|
|
2835
|
-
|
|
2836
|
-
|
|
2837
|
-
|
|
2838
|
-
|
|
2839
|
-
|
|
2840
|
-
|
|
2841
|
-
|
|
2848
|
+
result = None
|
|
2849
|
+
pointer_length_int_type = (
|
|
2850
|
+
SimTypeLongLong(signed=False) if self.project.arch.bits == 64 else SimTypeInt(signed=False)
|
|
2851
|
+
)
|
|
2852
|
+
for c, t in o_terms:
|
|
2853
|
+
op = "Add"
|
|
2854
|
+
if c == -1 and result is not None:
|
|
2855
|
+
op = "Sub"
|
|
2856
|
+
piece = (
|
|
2857
|
+
t
|
|
2858
|
+
if not isinstance(t.type, SimTypePointer)
|
|
2859
|
+
else CTypeCast(t.type, SimTypePointer(SimTypeChar()), t, codegen=self)
|
|
2860
|
+
)
|
|
2861
|
+
elif c == 1:
|
|
2862
|
+
piece = (
|
|
2863
|
+
t
|
|
2864
|
+
if not isinstance(t.type, SimTypePointer)
|
|
2865
|
+
else CTypeCast(t.type, SimTypePointer(SimTypeChar()), t, codegen=self)
|
|
2866
|
+
)
|
|
2867
|
+
else:
|
|
2868
|
+
piece = CBinaryOp(
|
|
2869
|
+
"Mul",
|
|
2870
|
+
CConstant(c, t.type, codegen=self),
|
|
2871
|
+
(
|
|
2842
2872
|
t
|
|
2843
2873
|
if not isinstance(t.type, SimTypePointer)
|
|
2844
|
-
else CTypeCast(t.type,
|
|
2845
|
-
)
|
|
2874
|
+
else CTypeCast(t.type, pointer_length_int_type, t, codegen=self)
|
|
2875
|
+
),
|
|
2876
|
+
codegen=self,
|
|
2846
2877
|
)
|
|
2847
|
-
|
|
2848
|
-
|
|
2849
|
-
|
|
2878
|
+
if result is None:
|
|
2879
|
+
result = piece
|
|
2880
|
+
else:
|
|
2881
|
+
result = CBinaryOp(op, result, piece, codegen=self)
|
|
2850
2882
|
if o_constant != 0:
|
|
2851
2883
|
result = CBinaryOp("Add", CConstant(o_constant, SimTypeInt(), codegen=self), result, codegen=self)
|
|
2852
2884
|
|
|
@@ -2869,6 +2901,9 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
2869
2901
|
if kernel is not None:
|
|
2870
2902
|
l.warning("Summing two different pointers together. Uh oh!")
|
|
2871
2903
|
return bail_out()
|
|
2904
|
+
if c == -1:
|
|
2905
|
+
# legit case: you can deduct a pointer from another pointer and get an integer as result in C
|
|
2906
|
+
return bail_out()
|
|
2872
2907
|
if c != 1:
|
|
2873
2908
|
l.warning("Multiplying a pointer by a constant??")
|
|
2874
2909
|
return bail_out()
|
|
@@ -14,7 +14,7 @@ from ailment.expression import Const, UnaryOp, MultiStatementExpression
|
|
|
14
14
|
from angr.utils.graph import GraphUtils
|
|
15
15
|
from ....knowledge_plugins.cfg import IndirectJumpType
|
|
16
16
|
from ....utils.constants import SWITCH_MISSING_DEFAULT_NODE_ADDR
|
|
17
|
-
from ....utils.graph import dominates, to_acyclic_graph
|
|
17
|
+
from ....utils.graph import dominates, to_acyclic_graph, dfs_back_edges
|
|
18
18
|
from ..sequence_walker import SequenceWalker
|
|
19
19
|
from ..utils import (
|
|
20
20
|
remove_last_statement,
|
|
@@ -140,6 +140,10 @@ class PhoenixStructurer(StructurerBase):
|
|
|
140
140
|
if len(self._region.graph.nodes) == 1 and has_cycle:
|
|
141
141
|
self._analyze_cyclic()
|
|
142
142
|
|
|
143
|
+
# backup the region prior to conducting a cyclic refinement because we may not be able to structure a cycle out
|
|
144
|
+
# of the refined graph. in that case, we restore the original region and return.
|
|
145
|
+
pre_refinement_region = None
|
|
146
|
+
|
|
143
147
|
while len(self._region.graph.nodes) > 1:
|
|
144
148
|
progressed = self._analyze_acyclic()
|
|
145
149
|
if progressed and self._region.head not in self._region.graph:
|
|
@@ -151,12 +155,14 @@ class PhoenixStructurer(StructurerBase):
|
|
|
151
155
|
if has_cycle:
|
|
152
156
|
progressed |= self._analyze_cyclic()
|
|
153
157
|
if progressed:
|
|
158
|
+
pre_refinement_region = None
|
|
154
159
|
if self._region.head not in self._region.graph:
|
|
155
160
|
# update the loop head
|
|
156
161
|
self._region.head = next(
|
|
157
162
|
iter(node for node in self._region.graph.nodes if node.addr == self._region.head.addr)
|
|
158
163
|
)
|
|
159
|
-
|
|
164
|
+
elif pre_refinement_region is None:
|
|
165
|
+
pre_refinement_region = self._region.copy()
|
|
160
166
|
refined = self._refine_cyclic()
|
|
161
167
|
if refined:
|
|
162
168
|
if self._region.head not in self._region.graph:
|
|
@@ -194,6 +200,10 @@ class PhoenixStructurer(StructurerBase):
|
|
|
194
200
|
# successfully structured
|
|
195
201
|
self.result = next(iter(self._region.graph.nodes))
|
|
196
202
|
else:
|
|
203
|
+
if pre_refinement_region is not None:
|
|
204
|
+
# we could not make a loop after the last cycle refinement. restore the graph
|
|
205
|
+
self._region = pre_refinement_region
|
|
206
|
+
|
|
197
207
|
self.result = None # the actual result is in self._region.graph and self._region.graph_with_successors
|
|
198
208
|
|
|
199
209
|
def _analyze_cyclic(self) -> bool:
|
|
@@ -572,7 +582,16 @@ class PhoenixStructurer(StructurerBase):
|
|
|
572
582
|
return True, loop_node
|
|
573
583
|
|
|
574
584
|
def _refine_cyclic(self) -> bool:
|
|
575
|
-
|
|
585
|
+
loop_heads = {t for _, t in dfs_back_edges(self._region.graph, self._region.head)}
|
|
586
|
+
sorted_loop_heads = GraphUtils.quasi_topological_sort_nodes(self._region.graph, nodes=list(loop_heads))
|
|
587
|
+
|
|
588
|
+
for head in sorted_loop_heads:
|
|
589
|
+
l.debug("... refining cyclic at %r", head)
|
|
590
|
+
refined = self._refine_cyclic_core(head)
|
|
591
|
+
l.debug("... refined: %s", refined)
|
|
592
|
+
if refined:
|
|
593
|
+
return True
|
|
594
|
+
return False
|
|
576
595
|
|
|
577
596
|
def _refine_cyclic_core(self, loop_head) -> bool:
|
|
578
597
|
graph: networkx.DiGraph = self._region.graph
|
|
@@ -621,7 +640,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
621
640
|
# natural loop. select *any* exit edge to determine the successor
|
|
622
641
|
# well actually, to maintain determinism, we select the successor with the highest address
|
|
623
642
|
successor_candidates = set()
|
|
624
|
-
for node in graph
|
|
643
|
+
for node in networkx.descendants(graph, loop_head):
|
|
625
644
|
for succ in fullgraph.successors(node):
|
|
626
645
|
if succ not in graph:
|
|
627
646
|
successor_candidates.add(succ)
|
|
@@ -637,8 +656,15 @@ class PhoenixStructurer(StructurerBase):
|
|
|
637
656
|
outgoing_edges.append((pred, succ))
|
|
638
657
|
|
|
639
658
|
if outgoing_edges:
|
|
640
|
-
#
|
|
641
|
-
# successors
|
|
659
|
+
# if there is a single successor, we convert all out-going edges into breaks;
|
|
660
|
+
# if there are multiple successors, and if the current region does not have a parent region, then we
|
|
661
|
+
# convert all out-going edges into gotos;
|
|
662
|
+
# otherwise we give up.
|
|
663
|
+
|
|
664
|
+
if self._parent_region is not None and len({dst for _, dst in outgoing_edges}) > 1:
|
|
665
|
+
# give up because there is a parent region
|
|
666
|
+
return False
|
|
667
|
+
|
|
642
668
|
if successor is None:
|
|
643
669
|
successor_and_edgecounts = defaultdict(int)
|
|
644
670
|
for _, dst in outgoing_edges:
|
|
@@ -794,7 +820,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
794
820
|
# case being the loop head. in such cases, we can just remove the edge.
|
|
795
821
|
if src.addr not in self.kb.cfgs["CFGFast"].jump_tables:
|
|
796
822
|
l.warning(
|
|
797
|
-
"_refine_cyclic_core: Cannot find the block going to loop head for edge %r -> %r."
|
|
823
|
+
"_refine_cyclic_core: Cannot find the block going to loop head for edge %r -> %r. "
|
|
798
824
|
"Remove the edge anyway.",
|
|
799
825
|
src,
|
|
800
826
|
continue_node,
|
|
@@ -847,6 +873,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
847
873
|
def _refine_cyclic_is_while_loop(
|
|
848
874
|
self, graph, fullgraph, loop_head, head_succs
|
|
849
875
|
) -> Tuple[bool, Optional[Tuple[List, List, BaseNode, BaseNode]]]:
|
|
876
|
+
|
|
850
877
|
if len(head_succs) == 2 and any(head_succ not in graph for head_succ in head_succs):
|
|
851
878
|
# make sure the head_pred is not already structured
|
|
852
879
|
_, _, head_block_0 = self._find_node_going_to_dst(loop_head, head_succs[0])
|
|
@@ -857,7 +884,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
857
884
|
continue_edges: List[Tuple[BaseNode, BaseNode]] = []
|
|
858
885
|
outgoing_edges = []
|
|
859
886
|
successor = next(iter(head_succ for head_succ in head_succs if head_succ not in graph))
|
|
860
|
-
for node in graph
|
|
887
|
+
for node in networkx.descendants(graph, loop_head):
|
|
861
888
|
succs = list(fullgraph.successors(node))
|
|
862
889
|
if loop_head in succs:
|
|
863
890
|
continue_edges.append((node, loop_head))
|
|
@@ -888,7 +915,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
888
915
|
# virtualize all other edges
|
|
889
916
|
successor = next(iter(nn for nn in head_pred_succs if nn not in graph))
|
|
890
917
|
continue_node = head_pred
|
|
891
|
-
for node in graph
|
|
918
|
+
for node in networkx.descendants(graph, loop_head):
|
|
892
919
|
if node is head_pred:
|
|
893
920
|
continue
|
|
894
921
|
succs = list(fullgraph.successors(node))
|
|
@@ -2243,6 +2270,11 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2243
2270
|
last_stmt = block.statements[-1]
|
|
2244
2271
|
if _check(last_stmt):
|
|
2245
2272
|
walker.parent_and_block.append((walker.block_id, parent, block))
|
|
2273
|
+
elif (
|
|
2274
|
+
not isinstance(last_stmt, (Jump, ConditionalJump))
|
|
2275
|
+
and block.addr + block.original_size == dst_addr
|
|
2276
|
+
):
|
|
2277
|
+
walker.parent_and_block.append((walker.block_id, parent, block))
|
|
2246
2278
|
|
|
2247
2279
|
def _handle_MultiNode(block: MultiNode, parent=None, **kwargs): # pylint:disable=unused-argument
|
|
2248
2280
|
if block.nodes and isinstance(block.nodes[-1], Block) and block.nodes[-1].statements:
|
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
# pylint:disable=wrong-import-position
|
|
1
|
+
# pylint:disable=wrong-import-position,broad-exception-caught,ungrouped-imports
|
|
2
2
|
import pathlib
|
|
3
3
|
import copy
|
|
4
4
|
from typing import Optional, Tuple, Any, Union, List, Iterable
|
|
5
5
|
import logging
|
|
6
6
|
|
|
7
7
|
import networkx
|
|
8
|
-
|
|
9
8
|
import ailment
|
|
10
9
|
|
|
11
10
|
import angr
|
|
@@ -251,6 +250,8 @@ def _merge_ail_nodes(graph, node_a: ailment.Block, node_b: ailment.Block) -> ail
|
|
|
251
250
|
in_edges = list(graph.in_edges(node_a, data=True))
|
|
252
251
|
out_edges = list(graph.out_edges(node_b, data=True))
|
|
253
252
|
|
|
253
|
+
a_ogs = graph.nodes[node_a].get("original_nodes", set())
|
|
254
|
+
b_ogs = graph.nodes[node_b].get("original_nodes", set())
|
|
254
255
|
new_node = node_a.copy() if node_a.addr <= node_b.addr else node_b.copy()
|
|
255
256
|
old_node = node_b if new_node == node_a else node_a
|
|
256
257
|
# remove jumps in the middle of nodes when merging
|
|
@@ -263,8 +264,7 @@ def _merge_ail_nodes(graph, node_a: ailment.Block, node_b: ailment.Block) -> ail
|
|
|
263
264
|
graph.remove_node(node_b)
|
|
264
265
|
|
|
265
266
|
if new_node is not None:
|
|
266
|
-
graph.add_node(new_node)
|
|
267
|
-
|
|
267
|
+
graph.add_node(new_node, original_nodes=a_ogs.union(b_ogs))
|
|
268
268
|
for src, _, data in in_edges:
|
|
269
269
|
if src is node_b:
|
|
270
270
|
src = new_node
|
|
@@ -289,6 +289,7 @@ def to_ail_supergraph(transition_graph: networkx.DiGraph) -> networkx.DiGraph:
|
|
|
289
289
|
"""
|
|
290
290
|
# make a copy of the graph
|
|
291
291
|
transition_graph = networkx.DiGraph(transition_graph)
|
|
292
|
+
networkx.set_node_attributes(transition_graph, {node: {node} for node in transition_graph.nodes}, "original_nodes")
|
|
292
293
|
|
|
293
294
|
while True:
|
|
294
295
|
for src, dst, data in transition_graph.edges(data=True):
|
|
@@ -725,6 +726,14 @@ def calls_in_graph(graph: networkx.DiGraph) -> int:
|
|
|
725
726
|
return counter.calls
|
|
726
727
|
|
|
727
728
|
|
|
729
|
+
def find_block_by_addr(graph: networkx.DiGraph, addr: int):
|
|
730
|
+
for block in graph.nodes():
|
|
731
|
+
if block.addr == addr:
|
|
732
|
+
return block
|
|
733
|
+
|
|
734
|
+
raise KeyError("The block is not in the graph!")
|
|
735
|
+
|
|
736
|
+
|
|
728
737
|
# delayed import
|
|
729
738
|
from .structuring.structurer_nodes import (
|
|
730
739
|
MultiNode,
|
|
@@ -1148,6 +1148,9 @@ class SimEnginePropagatorAIL(
|
|
|
1148
1148
|
)
|
|
1149
1149
|
return PropValue.from_value_and_details(value, expr.size, new_expr, self._codeloc())
|
|
1150
1150
|
|
|
1151
|
+
_ail_handle_AddV = _ail_handle_Add
|
|
1152
|
+
_ail_handle_MulV = _ail_handle_Mul
|
|
1153
|
+
|
|
1151
1154
|
def _ail_handle_Mull(self, expr):
|
|
1152
1155
|
o0_value = self._expr(expr.operands[0])
|
|
1153
1156
|
o1_value = self._expr(expr.operands[1])
|
|
@@ -539,6 +539,13 @@ class ReachingDefinitionsAnalysis(
|
|
|
539
539
|
for use in [state.stack_uses, state.heap_uses, state.register_uses, state.memory_uses]:
|
|
540
540
|
self.all_uses.merge(use)
|
|
541
541
|
|
|
542
|
+
if self._track_tmps:
|
|
543
|
+
# merge tmp uses to all_uses
|
|
544
|
+
for tmp_idx, locs in state.tmp_uses.items():
|
|
545
|
+
tmp_def = next(iter(state.tmps[tmp_idx]))
|
|
546
|
+
for loc in locs:
|
|
547
|
+
self.all_uses.add_use(tmp_def, loc)
|
|
548
|
+
|
|
542
549
|
# drop definitions and uses because we will not need them anymore
|
|
543
550
|
state.downsize()
|
|
544
551
|
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from typing import Set, List, Optional, TYPE_CHECKING
|
|
4
4
|
import re
|
|
5
5
|
import logging
|
|
6
|
+
from collections import defaultdict
|
|
6
7
|
|
|
7
8
|
import pyvex
|
|
8
9
|
|
|
@@ -148,6 +149,21 @@ class OffsetVal:
|
|
|
148
149
|
return f"reg({self.reg}){(self.offset - 2**self.reg.bitlen) if self.offset != 0 else 0:+}"
|
|
149
150
|
|
|
150
151
|
|
|
152
|
+
class Eq:
|
|
153
|
+
"""
|
|
154
|
+
Represent an equivalence condition.
|
|
155
|
+
"""
|
|
156
|
+
|
|
157
|
+
__slots__ = ("val0", "val1")
|
|
158
|
+
|
|
159
|
+
def __init__(self, val0, val1):
|
|
160
|
+
self.val0 = val0
|
|
161
|
+
self.val1 = val1
|
|
162
|
+
|
|
163
|
+
def __hash__(self):
|
|
164
|
+
return hash((type(self), self.val0, self.val1))
|
|
165
|
+
|
|
166
|
+
|
|
151
167
|
class FrozenStackPointerTrackerState:
|
|
152
168
|
"""
|
|
153
169
|
Abstract state for StackPointerTracker analysis with registers and memory values being in frozensets.
|
|
@@ -296,7 +312,12 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
|
|
|
296
312
|
"""
|
|
297
313
|
|
|
298
314
|
def __init__(
|
|
299
|
-
self,
|
|
315
|
+
self,
|
|
316
|
+
func: Optional[Function],
|
|
317
|
+
reg_offsets: Set[int],
|
|
318
|
+
block: Optional["Block"] = None,
|
|
319
|
+
track_memory=True,
|
|
320
|
+
cross_insn_opt=True,
|
|
300
321
|
):
|
|
301
322
|
if func is not None:
|
|
302
323
|
if not func.normalized:
|
|
@@ -316,6 +337,8 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
|
|
|
316
337
|
self.reg_offsets = reg_offsets
|
|
317
338
|
self.states = {}
|
|
318
339
|
self._blocks = {}
|
|
340
|
+
self._reg_value_at_block_start = defaultdict(dict)
|
|
341
|
+
self.cross_insn_opt = cross_insn_opt
|
|
319
342
|
|
|
320
343
|
_l.debug("Running on function %r", self._func)
|
|
321
344
|
self._analyze()
|
|
@@ -468,7 +491,7 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
|
|
|
468
491
|
self._set_state(addr, new_val, "pre")
|
|
469
492
|
|
|
470
493
|
def _run_on_node(self, node: BlockNode, state):
|
|
471
|
-
block = self.project.factory.block(node.addr, size=node.size)
|
|
494
|
+
block = self.project.factory.block(node.addr, size=node.size, cross_insn_opt=self.cross_insn_opt)
|
|
472
495
|
self._blocks[node.addr] = block
|
|
473
496
|
|
|
474
497
|
state = state.unfreeze()
|
|
@@ -483,6 +506,10 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
|
|
|
483
506
|
except SimTranslationError:
|
|
484
507
|
pass
|
|
485
508
|
|
|
509
|
+
if node.addr in self._reg_value_at_block_start:
|
|
510
|
+
for reg, val in self._reg_value_at_block_start[node.addr].items():
|
|
511
|
+
state.put(reg, val)
|
|
512
|
+
|
|
486
513
|
if vex_block is not None:
|
|
487
514
|
if isinstance(vex_block, pyvex.IRSB):
|
|
488
515
|
curr_stmt_start_addr = self._process_vex_irsb(node, vex_block, state)
|
|
@@ -548,7 +575,12 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
|
|
|
548
575
|
and is_alignment_mask(arg1_expr.val)
|
|
549
576
|
):
|
|
550
577
|
return arg0_expr
|
|
551
|
-
|
|
578
|
+
elif expr.op.startswith("Iop_CmpEQ"):
|
|
579
|
+
arg0_expr = _resolve_expr(arg0)
|
|
580
|
+
arg1_expr = _resolve_expr(arg1)
|
|
581
|
+
if isinstance(arg0_expr, (Register, OffsetVal)) and isinstance(arg1_expr, (Register, OffsetVal)):
|
|
582
|
+
return Eq(arg0_expr, arg1_expr)
|
|
583
|
+
raise CouldNotResolveException()
|
|
552
584
|
elif type(expr) is pyvex.IRExpr.RdTmp and expr.tmp in tmps and tmps[expr.tmp] is not None:
|
|
553
585
|
return tmps[expr.tmp]
|
|
554
586
|
elif type(expr) is pyvex.IRExpr.Const:
|
|
@@ -563,13 +595,15 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
|
|
|
563
595
|
to_bits = int(m.group(3))
|
|
564
596
|
# to_unsigned = m.group(4) == "U"
|
|
565
597
|
v = resolve_expr(expr.args[0])
|
|
566
|
-
if
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
return
|
|
572
|
-
|
|
598
|
+
if isinstance(v, Constant):
|
|
599
|
+
if from_bits > to_bits:
|
|
600
|
+
# truncation
|
|
601
|
+
mask = (1 << to_bits) - 1
|
|
602
|
+
return Constant(v.val & mask)
|
|
603
|
+
return v
|
|
604
|
+
elif isinstance(v, Eq):
|
|
605
|
+
return v
|
|
606
|
+
return TOP
|
|
573
607
|
elif self.track_mem and type(expr) is pyvex.IRExpr.Load:
|
|
574
608
|
return state.load(_resolve_expr(expr.addr))
|
|
575
609
|
raise CouldNotResolveException()
|
|
@@ -606,6 +640,22 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
|
|
|
606
640
|
and vex_block.instruction_addresses.index(curr_stmt_start_addr) == vex_block.instructions - 1
|
|
607
641
|
):
|
|
608
642
|
exit_observed = True
|
|
643
|
+
if (
|
|
644
|
+
type(stmt.guard) is pyvex.IRExpr.RdTmp
|
|
645
|
+
and stmt.guard.tmp in tmps
|
|
646
|
+
and isinstance(stmt.dst, pyvex.IRConst.IRConst)
|
|
647
|
+
):
|
|
648
|
+
guard = tmps[stmt.guard.tmp]
|
|
649
|
+
if isinstance(guard, Eq):
|
|
650
|
+
for reg, val in state.regs.items():
|
|
651
|
+
if reg in {self.project.arch.sp_offset, self.project.arch.bp_offset}:
|
|
652
|
+
cond = None
|
|
653
|
+
if val == guard.val0:
|
|
654
|
+
cond = guard.val1
|
|
655
|
+
elif val == guard.val1:
|
|
656
|
+
cond = guard.val0
|
|
657
|
+
if cond is not None:
|
|
658
|
+
self._reg_value_at_block_start[stmt.dst.value][reg] = cond
|
|
609
659
|
else:
|
|
610
660
|
try:
|
|
611
661
|
resolve_stmt(stmt)
|