angr 9.2.92__py3-none-manylinux2014_x86_64.whl → 9.2.94__py3-none-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/cfg/cfg_base.py +20 -10
- angr/analyses/cfg/indirect_jump_resolvers/amd64_elf_got.py +1 -1
- angr/analyses/cfg/indirect_jump_resolvers/arm_elf_fast.py +89 -32
- angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +276 -133
- angr/analyses/complete_calling_conventions.py +1 -1
- angr/analyses/decompiler/ail_simplifier.py +20 -0
- angr/analyses/decompiler/block_io_finder.py +293 -0
- angr/analyses/decompiler/block_similarity.py +190 -0
- angr/analyses/decompiler/callsite_maker.py +5 -0
- angr/analyses/decompiler/clinic.py +103 -1
- angr/analyses/decompiler/decompilation_cache.py +2 -0
- angr/analyses/decompiler/decompiler.py +21 -4
- angr/analyses/decompiler/optimization_passes/__init__.py +6 -0
- angr/analyses/decompiler/optimization_passes/code_motion.py +361 -0
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +1 -0
- angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +30 -18
- angr/analyses/decompiler/optimization_passes/switch_default_case_duplicator.py +110 -0
- angr/analyses/decompiler/peephole_optimizations/bswap.py +53 -2
- angr/analyses/decompiler/peephole_optimizations/eager_eval.py +20 -1
- angr/analyses/decompiler/structured_codegen/c.py +76 -41
- angr/analyses/decompiler/structuring/phoenix.py +41 -9
- angr/analyses/decompiler/utils.py +13 -4
- angr/analyses/propagator/engine_ail.py +3 -0
- angr/analyses/reaching_definitions/engine_ail.py +3 -0
- angr/analyses/reaching_definitions/reaching_definitions.py +7 -0
- angr/analyses/stack_pointer_tracker.py +60 -10
- angr/analyses/typehoon/simple_solver.py +95 -24
- angr/analyses/typehoon/typeconsts.py +1 -1
- angr/calling_conventions.py +0 -3
- angr/engines/pcode/cc.py +1 -1
- angr/engines/successors.py +6 -0
- angr/knowledge_plugins/propagations/states.py +2 -1
- angr/procedures/definitions/glibc.py +3 -1
- angr/procedures/definitions/parse_win32json.py +2135 -383
- angr/procedures/definitions/wdk_ntoskrnl.py +956 -0
- angr/sim_type.py +53 -13
- angr/utils/library.py +2 -2
- {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/METADATA +6 -6
- {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/RECORD +44 -41
- {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/WHEEL +1 -1
- angr/procedures/definitions/wdk_ntdll.py +0 -994
- {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/LICENSE +0 -0
- {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/entry_points.txt +0 -0
- {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
from typing import Any, Optional, Union, List
|
|
3
|
+
|
|
4
|
+
from ailment import Block
|
|
5
|
+
from ailment.statement import Call, Statement, ConditionalJump, Assignment, Store, Return, Jump
|
|
6
|
+
from ailment.expression import (
|
|
7
|
+
Load,
|
|
8
|
+
Expression,
|
|
9
|
+
BinaryOp,
|
|
10
|
+
UnaryOp,
|
|
11
|
+
Convert,
|
|
12
|
+
ITE,
|
|
13
|
+
Tmp,
|
|
14
|
+
Const,
|
|
15
|
+
StackBaseOffset,
|
|
16
|
+
)
|
|
17
|
+
from ailment.block_walker import AILBlockWalkerBase
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
from angr.knowledge_plugins.key_definitions.atoms import MemoryLocation, Register, SpOffset, ConstantSrc
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class BlockIOFinder(AILBlockWalkerBase):
|
|
24
|
+
"""
|
|
25
|
+
Finds the input and output locations of each statement in an AIL block.
|
|
26
|
+
I/O locations can be a Register, MemoryLocation, or SpOffset (wrapped in a Memory Location).
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, ail_obj: Union[Block, List[Statement]], project, as_atom=True):
|
|
30
|
+
super().__init__()
|
|
31
|
+
self.expr_handlers[StackBaseOffset] = self._handle_StackBaseOffset
|
|
32
|
+
self._as_atom = as_atom
|
|
33
|
+
self._project = project
|
|
34
|
+
|
|
35
|
+
self.inputs_by_stmt = defaultdict(set)
|
|
36
|
+
self.outputs_by_stmt = defaultdict(set)
|
|
37
|
+
self.derefed_at = defaultdict(set)
|
|
38
|
+
|
|
39
|
+
block = Block(0, len(ail_obj), statements=ail_obj) if isinstance(ail_obj, list) else ail_obj
|
|
40
|
+
self.walk(block)
|
|
41
|
+
|
|
42
|
+
@staticmethod
|
|
43
|
+
def _add_or_update_dict(d, k, v):
|
|
44
|
+
if isinstance(v, set):
|
|
45
|
+
d[k].update(v)
|
|
46
|
+
else:
|
|
47
|
+
d[k].add(v)
|
|
48
|
+
|
|
49
|
+
@staticmethod
|
|
50
|
+
def _add_or_update_set(s, v):
|
|
51
|
+
if isinstance(v, set):
|
|
52
|
+
s.update(v)
|
|
53
|
+
else:
|
|
54
|
+
s.add(v)
|
|
55
|
+
|
|
56
|
+
#
|
|
57
|
+
# I/O helpers
|
|
58
|
+
#
|
|
59
|
+
|
|
60
|
+
@staticmethod
|
|
61
|
+
def _is_dangerous_memory(loc):
|
|
62
|
+
"""
|
|
63
|
+
Assume any memory location that is NOT on the stack is a dangerous memory location.
|
|
64
|
+
"""
|
|
65
|
+
return isinstance(loc, MemoryLocation) and not loc.is_on_stack
|
|
66
|
+
|
|
67
|
+
def _has_dangerous_deref(self, stmt_idx):
|
|
68
|
+
derefs = self.derefed_at.get(stmt_idx, set())
|
|
69
|
+
return any(self._is_dangerous_memory(d) for d in derefs)
|
|
70
|
+
|
|
71
|
+
def _input_defined_by_other_stmt(self, target_idx, other_idx):
|
|
72
|
+
target_inputs = self.inputs_by_stmt[target_idx]
|
|
73
|
+
# any memory location, not on stack, is not movable
|
|
74
|
+
if any(self._is_dangerous_memory(i) for i in target_inputs):
|
|
75
|
+
return True
|
|
76
|
+
|
|
77
|
+
other_outputs = self.outputs_by_stmt[other_idx]
|
|
78
|
+
return target_inputs.intersection(other_outputs)
|
|
79
|
+
|
|
80
|
+
def _output_used_by_other_stmt(self, target_idx, other_idx):
|
|
81
|
+
target_output = self.outputs_by_stmt[target_idx]
|
|
82
|
+
# any memory location, not on stack, is not movable
|
|
83
|
+
if any(self._is_dangerous_memory(o) for o in target_output):
|
|
84
|
+
return True
|
|
85
|
+
|
|
86
|
+
other_input = self.inputs_by_stmt[other_idx]
|
|
87
|
+
return target_output.intersection(other_input)
|
|
88
|
+
|
|
89
|
+
def can_swap(self, stmt, ail_obj: Union[Block, List[Statement]], offset: int):
|
|
90
|
+
all_stmts = (ail_obj.statements or []) if isinstance(ail_obj, Block) else ail_obj
|
|
91
|
+
if stmt not in all_stmts:
|
|
92
|
+
raise RuntimeError("Statement not in block, and we can't compute moving a stmt to a new block!")
|
|
93
|
+
|
|
94
|
+
curr_idx = all_stmts.index(stmt)
|
|
95
|
+
new_idx = curr_idx + offset
|
|
96
|
+
if (
|
|
97
|
+
# movement must be within bounds
|
|
98
|
+
(new_idx < 0 or new_idx >= len(all_stmts))
|
|
99
|
+
or
|
|
100
|
+
# you can never move jumps
|
|
101
|
+
isinstance(stmt, (ConditionalJump, Jump))
|
|
102
|
+
or
|
|
103
|
+
# we can't handle memory locations
|
|
104
|
+
self._has_dangerous_deref(curr_idx)
|
|
105
|
+
or self._has_dangerous_deref(new_idx)
|
|
106
|
+
):
|
|
107
|
+
return False
|
|
108
|
+
|
|
109
|
+
# equivalent to swapping "down"
|
|
110
|
+
if offset == 1:
|
|
111
|
+
if self._output_used_by_other_stmt(curr_idx, new_idx):
|
|
112
|
+
return False
|
|
113
|
+
# equivalent to swapping "up"
|
|
114
|
+
elif offset == -1:
|
|
115
|
+
if self._input_defined_by_other_stmt(curr_idx, new_idx):
|
|
116
|
+
return False
|
|
117
|
+
else:
|
|
118
|
+
raise RuntimeError("Offset must be -1 or 1")
|
|
119
|
+
|
|
120
|
+
return True
|
|
121
|
+
|
|
122
|
+
#
|
|
123
|
+
# Statements (all with side effects)
|
|
124
|
+
#
|
|
125
|
+
|
|
126
|
+
def _handle_Assignment(self, stmt_idx: int, stmt: Assignment, block: Optional[Block]):
|
|
127
|
+
output_loc = self._handle_expr(0, stmt.dst, stmt_idx, stmt, block)
|
|
128
|
+
self._add_or_update_dict(self.outputs_by_stmt, stmt_idx, output_loc)
|
|
129
|
+
|
|
130
|
+
input_loc = self._handle_expr(1, stmt.src, stmt_idx, stmt, block)
|
|
131
|
+
self._add_or_update_dict(self.inputs_by_stmt, stmt_idx, input_loc)
|
|
132
|
+
|
|
133
|
+
def _handle_Call(self, stmt_idx: int, stmt: Call, block: Optional[Block]):
|
|
134
|
+
if stmt.args:
|
|
135
|
+
for i, arg in enumerate(stmt.args):
|
|
136
|
+
input_loc = self._handle_expr(i, arg, stmt_idx, stmt, block)
|
|
137
|
+
self._add_or_update_dict(self.inputs_by_stmt, stmt_idx, input_loc)
|
|
138
|
+
|
|
139
|
+
out_loc = self._handle_expr(0, stmt.ret_expr, stmt_idx, stmt, block)
|
|
140
|
+
self._add_or_update_dict(self.outputs_by_stmt, stmt_idx, out_loc)
|
|
141
|
+
|
|
142
|
+
def _handle_Store(self, stmt_idx: int, stmt: Store, block: Optional[Block]):
|
|
143
|
+
out_loc = self._handle_expr(0, stmt.addr, stmt_idx, stmt, block, is_memory=True)
|
|
144
|
+
self._add_or_update_dict(self.outputs_by_stmt, stmt_idx, out_loc)
|
|
145
|
+
|
|
146
|
+
input_loc = self._handle_expr(1, stmt.data, stmt_idx, stmt, block)
|
|
147
|
+
self._add_or_update_dict(self.inputs_by_stmt, stmt_idx, input_loc)
|
|
148
|
+
|
|
149
|
+
def _handle_ConditionalJump(self, stmt_idx: int, stmt: ConditionalJump, block: Optional[Block]):
|
|
150
|
+
input1 = self._handle_expr(0, stmt.condition, stmt_idx, stmt, block)
|
|
151
|
+
input2 = self._handle_expr(1, stmt.true_target, stmt_idx, stmt, block)
|
|
152
|
+
input3 = self._handle_expr(2, stmt.false_target, stmt_idx, stmt, block)
|
|
153
|
+
self._add_or_update_dict(self.inputs_by_stmt, stmt_idx, input1)
|
|
154
|
+
self._add_or_update_dict(self.inputs_by_stmt, stmt_idx, input2)
|
|
155
|
+
self._add_or_update_dict(self.inputs_by_stmt, stmt_idx, input3)
|
|
156
|
+
|
|
157
|
+
def _handle_Return(self, stmt_idx: int, stmt: Return, block: Optional[Block]):
|
|
158
|
+
if stmt.ret_exprs:
|
|
159
|
+
for i, ret_expr in enumerate(stmt.ret_exprs):
|
|
160
|
+
loc = self._handle_expr(i, ret_expr, stmt_idx, stmt, block)
|
|
161
|
+
self._add_or_update_dict(self.inputs_by_stmt, stmt_idx, loc)
|
|
162
|
+
self._add_or_update_dict(self.outputs_by_stmt, stmt_idx, loc)
|
|
163
|
+
|
|
164
|
+
#
|
|
165
|
+
# Expressions
|
|
166
|
+
#
|
|
167
|
+
|
|
168
|
+
def _handle_expr(
|
|
169
|
+
self,
|
|
170
|
+
expr_idx: int,
|
|
171
|
+
expr: Expression,
|
|
172
|
+
stmt_idx: int,
|
|
173
|
+
stmt: Optional[Statement],
|
|
174
|
+
block: Optional[Block],
|
|
175
|
+
is_memory=False,
|
|
176
|
+
) -> Any:
|
|
177
|
+
try:
|
|
178
|
+
handler = self.expr_handlers[type(expr)]
|
|
179
|
+
except KeyError:
|
|
180
|
+
handler = None
|
|
181
|
+
|
|
182
|
+
if handler:
|
|
183
|
+
return handler(expr_idx, expr, stmt_idx, stmt, block, is_memory=is_memory)
|
|
184
|
+
return None
|
|
185
|
+
|
|
186
|
+
# pylint: disable=unused-argument
|
|
187
|
+
def _handle_Load(
|
|
188
|
+
self, expr_idx: int, expr: Load, stmt_idx: int, stmt: Statement, block: Optional[Block], is_memory=True
|
|
189
|
+
):
|
|
190
|
+
load_loc = self._handle_expr(0, expr.addr, stmt_idx, stmt, block, is_memory=True)
|
|
191
|
+
self._add_or_update_dict(self.derefed_at, stmt_idx, load_loc)
|
|
192
|
+
return load_loc
|
|
193
|
+
|
|
194
|
+
def _handle_CallExpr(
|
|
195
|
+
self, expr_idx: int, expr: Call, stmt_idx: int, stmt: Statement, block: Optional[Block], is_memory=False
|
|
196
|
+
):
|
|
197
|
+
args = set()
|
|
198
|
+
if expr.args:
|
|
199
|
+
for i, arg in enumerate(expr.args):
|
|
200
|
+
self._add_or_update_set(args, self._handle_expr(i, arg, stmt_idx, stmt, block, is_memory=is_memory))
|
|
201
|
+
|
|
202
|
+
return args
|
|
203
|
+
|
|
204
|
+
def _handle_BinaryOp(
|
|
205
|
+
self, expr_idx: int, expr: BinaryOp, stmt_idx: int, stmt: Statement, block: Optional[Block], is_memory=False
|
|
206
|
+
):
|
|
207
|
+
input_locs = set()
|
|
208
|
+
self._add_or_update_set(
|
|
209
|
+
input_locs, self._handle_expr(0, expr.operands[0], stmt_idx, stmt, block, is_memory=is_memory)
|
|
210
|
+
)
|
|
211
|
+
self._add_or_update_set(
|
|
212
|
+
input_locs, self._handle_expr(1, expr.operands[1], stmt_idx, stmt, block, is_memory=is_memory)
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
return input_locs
|
|
216
|
+
|
|
217
|
+
def _handle_UnaryOp(
|
|
218
|
+
self, expr_idx: int, expr: UnaryOp, stmt_idx: int, stmt: Statement, block: Optional[Block], is_memory=False
|
|
219
|
+
):
|
|
220
|
+
return self._handle_expr(0, expr.operand, stmt_idx, stmt, block, is_memory=is_memory)
|
|
221
|
+
|
|
222
|
+
def _handle_Convert(
|
|
223
|
+
self, expr_idx: int, expr: Convert, stmt_idx: int, stmt: Statement, block: Optional[Block], is_memory=False
|
|
224
|
+
):
|
|
225
|
+
return self._handle_expr(expr_idx, expr.operand, stmt_idx, stmt, block, is_memory=is_memory)
|
|
226
|
+
|
|
227
|
+
def _handle_ITE(
|
|
228
|
+
self, expr_idx: int, expr: ITE, stmt_idx: int, stmt: Statement, block: Optional[Block], is_memory=False
|
|
229
|
+
):
|
|
230
|
+
input_locs = set()
|
|
231
|
+
self._add_or_update_set(
|
|
232
|
+
input_locs,
|
|
233
|
+
self._handle_expr(0, expr.cond, stmt_idx, stmt, block, is_memory=is_memory),
|
|
234
|
+
)
|
|
235
|
+
self._add_or_update_set(
|
|
236
|
+
input_locs,
|
|
237
|
+
self._handle_expr(1, expr.iftrue, stmt_idx, stmt, block, is_memory=is_memory),
|
|
238
|
+
)
|
|
239
|
+
self._add_or_update_set(
|
|
240
|
+
input_locs,
|
|
241
|
+
self._handle_expr(2, expr.iffalse, stmt_idx, stmt, block, is_memory=is_memory),
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
return input_locs
|
|
245
|
+
|
|
246
|
+
#
|
|
247
|
+
# Base locations
|
|
248
|
+
#
|
|
249
|
+
|
|
250
|
+
# pylint: disable=unused-argument
|
|
251
|
+
def _handle_Tmp(
|
|
252
|
+
self, expr_idx: int, expr: Tmp, stmt_idx: int, stmt: Statement, block: Optional[Block], is_memory=False
|
|
253
|
+
):
|
|
254
|
+
if self._as_atom:
|
|
255
|
+
return None
|
|
256
|
+
else:
|
|
257
|
+
return expr
|
|
258
|
+
|
|
259
|
+
# pylint: disable=unused-argument
|
|
260
|
+
def _handle_Register(
|
|
261
|
+
self, expr_idx: int, expr: Register, stmt_idx: int, stmt: Statement, block: Optional[Block], is_memory=False
|
|
262
|
+
):
|
|
263
|
+
if self._as_atom:
|
|
264
|
+
return Register(expr.reg_offset, expr.size)
|
|
265
|
+
else:
|
|
266
|
+
return expr
|
|
267
|
+
|
|
268
|
+
def _handle_Const(
|
|
269
|
+
self, expr_idx: int, expr: Const, stmt_idx: int, stmt: Statement, block: Optional[Block], is_memory=False
|
|
270
|
+
):
|
|
271
|
+
if self._as_atom:
|
|
272
|
+
return MemoryLocation(expr.value, expr.size) if is_memory else ConstantSrc(expr.value, expr.size)
|
|
273
|
+
|
|
274
|
+
return (
|
|
275
|
+
expr,
|
|
276
|
+
is_memory,
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
# pylint: disable=unused-argument
|
|
280
|
+
def _handle_StackBaseOffset(
|
|
281
|
+
self,
|
|
282
|
+
expr_idx: int,
|
|
283
|
+
expr: StackBaseOffset,
|
|
284
|
+
stmt_idx: int,
|
|
285
|
+
stmt: Statement,
|
|
286
|
+
block: Optional[Block],
|
|
287
|
+
is_memory=False,
|
|
288
|
+
):
|
|
289
|
+
if self._as_atom:
|
|
290
|
+
return MemoryLocation(
|
|
291
|
+
SpOffset(self._project.arch.bits, expr.offset), expr.size * self._project.arch.byte_width
|
|
292
|
+
)
|
|
293
|
+
return expr
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
from typing import Union, Optional, List, Tuple
|
|
2
|
+
|
|
3
|
+
import networkx as nx
|
|
4
|
+
from ailment.block import Block
|
|
5
|
+
from ailment.statement import Statement, ConditionalJump
|
|
6
|
+
|
|
7
|
+
from .utils import find_block_by_addr
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def has_similar_stmt(blk1: Block, blk2: Block):
|
|
11
|
+
"""
|
|
12
|
+
Returns True if blk1 has a statement that is similar to a statement in blk2, False otherwise.
|
|
13
|
+
"""
|
|
14
|
+
for stmt1 in blk1.statements:
|
|
15
|
+
for stmt2 in blk2.statements:
|
|
16
|
+
if is_similar(stmt1, stmt2):
|
|
17
|
+
return True
|
|
18
|
+
return False
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def is_similar(
|
|
22
|
+
ail_obj1: Union[Block, Statement], ail_obj2: Union[Block, Statement], graph: nx.DiGraph = None, partial: bool = True
|
|
23
|
+
):
|
|
24
|
+
"""
|
|
25
|
+
Returns True if the two AIL objects are similar, False otherwise.
|
|
26
|
+
"""
|
|
27
|
+
if type(ail_obj1) is not type(ail_obj2):
|
|
28
|
+
return False
|
|
29
|
+
|
|
30
|
+
if ail_obj1 is ail_obj2:
|
|
31
|
+
return True
|
|
32
|
+
|
|
33
|
+
# AIL Blocks
|
|
34
|
+
if isinstance(ail_obj1, Block):
|
|
35
|
+
if len(ail_obj1.statements) != len(ail_obj2.statements):
|
|
36
|
+
return False
|
|
37
|
+
|
|
38
|
+
for stmt1, stmt2 in zip(ail_obj1.statements, ail_obj2.statements):
|
|
39
|
+
if not is_similar(stmt1, stmt2, graph=graph):
|
|
40
|
+
return False
|
|
41
|
+
return True
|
|
42
|
+
|
|
43
|
+
# AIL Statements
|
|
44
|
+
elif isinstance(ail_obj1, Statement):
|
|
45
|
+
# if all(barr in [0x404530, 0x404573] for barr in [ail_obj1.ins_addr, ail_obj2.ins_addr]):
|
|
46
|
+
# do a breakpoint
|
|
47
|
+
|
|
48
|
+
# ConditionalJump Handler
|
|
49
|
+
if isinstance(ail_obj1, ConditionalJump):
|
|
50
|
+
# try a simple compare
|
|
51
|
+
liked = ail_obj1.likes(ail_obj2)
|
|
52
|
+
if liked or not graph:
|
|
53
|
+
return liked
|
|
54
|
+
|
|
55
|
+
# even in partial matching, the condition must at least match
|
|
56
|
+
if not ail_obj1.condition.likes(ail_obj2.condition):
|
|
57
|
+
return False
|
|
58
|
+
|
|
59
|
+
# must use graph to know
|
|
60
|
+
for attr in ["true_target", "false_target"]:
|
|
61
|
+
t1, t2 = getattr(ail_obj1, attr).value, getattr(ail_obj2, attr).value
|
|
62
|
+
try:
|
|
63
|
+
t1_blk, t2_blk = find_block_by_addr(graph, t1), find_block_by_addr(graph, t2)
|
|
64
|
+
except KeyError:
|
|
65
|
+
return False
|
|
66
|
+
|
|
67
|
+
# special checks for when a node is empty:
|
|
68
|
+
if not t1_blk.statements or not t2_blk.statements:
|
|
69
|
+
# when both are empty, they are similar
|
|
70
|
+
if len(t1_blk.statements) == len(t2_blk.statements):
|
|
71
|
+
continue
|
|
72
|
+
|
|
73
|
+
# TODO: implement a check for when one is empty and other is jump.
|
|
74
|
+
# this will require a recursive call into similar() to check if a jump and empty are equal
|
|
75
|
+
return False
|
|
76
|
+
|
|
77
|
+
# skip full checks when partial checking is on
|
|
78
|
+
if partial and t1_blk.statements[0].likes(t2_blk.statements[0]):
|
|
79
|
+
continue
|
|
80
|
+
|
|
81
|
+
if not is_similar(t1_blk, t2_blk, graph=graph):
|
|
82
|
+
return False
|
|
83
|
+
return True
|
|
84
|
+
|
|
85
|
+
# Generic Statement Handler
|
|
86
|
+
else:
|
|
87
|
+
return ail_obj1.likes(ail_obj2)
|
|
88
|
+
else:
|
|
89
|
+
return False
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
#
|
|
93
|
+
# Knuth-Morris-Pratt Similarity Matching
|
|
94
|
+
#
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _kmp_search_ail_obj(search_pattern, stmt_seq, graph=None, partial=True):
|
|
98
|
+
"""
|
|
99
|
+
Uses the Knuth-Morris-Pratt algorithm for searching.
|
|
100
|
+
Found: https://code.activestate.com/recipes/117214/.
|
|
101
|
+
|
|
102
|
+
Returns a generator of positions, which will be empty if its not found.
|
|
103
|
+
"""
|
|
104
|
+
# allow indexing into pattern and protect against change during yield
|
|
105
|
+
search_pattern = list(search_pattern)
|
|
106
|
+
|
|
107
|
+
# build table of shift amounts
|
|
108
|
+
shifts = [1] * (len(search_pattern) + 1)
|
|
109
|
+
shift = 1
|
|
110
|
+
for pos, curr_pattern in enumerate(search_pattern):
|
|
111
|
+
while shift <= pos and not is_similar(curr_pattern, search_pattern[pos - shift], graph=graph, partial=partial):
|
|
112
|
+
shift += shifts[pos - shift]
|
|
113
|
+
shifts[pos + 1] = shift
|
|
114
|
+
|
|
115
|
+
# do the actual search
|
|
116
|
+
start_pos = 0
|
|
117
|
+
match_len = 0
|
|
118
|
+
for c in stmt_seq:
|
|
119
|
+
while (
|
|
120
|
+
match_len == len(search_pattern)
|
|
121
|
+
or match_len >= 0
|
|
122
|
+
and not is_similar(search_pattern[match_len], c, graph=graph, partial=partial)
|
|
123
|
+
):
|
|
124
|
+
start_pos += shifts[match_len]
|
|
125
|
+
match_len -= shifts[match_len]
|
|
126
|
+
match_len += 1
|
|
127
|
+
if match_len == len(search_pattern):
|
|
128
|
+
yield start_pos
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def index_of_similar_stmts(search_stmts, other_stmts, graph=None, all_positions=False) -> Optional[int]:
|
|
132
|
+
"""
|
|
133
|
+
Returns the index of the first occurrence of the search_stmts (a list of Statement) in other_stmts (a list of
|
|
134
|
+
Statement). If all_positions is True, returns a list of all positions.
|
|
135
|
+
|
|
136
|
+
@return: None or int (position start in other)
|
|
137
|
+
"""
|
|
138
|
+
positions = list(_kmp_search_ail_obj(search_stmts, other_stmts, graph=graph))
|
|
139
|
+
|
|
140
|
+
if len(positions) == 0:
|
|
141
|
+
return None
|
|
142
|
+
|
|
143
|
+
return positions.pop() if not all_positions else positions
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def in_other(stmts, other, graph=None):
|
|
147
|
+
"""
|
|
148
|
+
Returns True if the stmts (a list of Statement) is found as a subsequence in other
|
|
149
|
+
|
|
150
|
+
@return:
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
if index_of_similar_stmts(stmts, other, graph=graph) is not None:
|
|
154
|
+
return True
|
|
155
|
+
|
|
156
|
+
return False
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def longest_ail_subseq(
|
|
160
|
+
stmts_list: List[List[Statement]], graph=None
|
|
161
|
+
) -> Tuple[Optional[List[Statement]], Optional[List[int]]]:
|
|
162
|
+
"""
|
|
163
|
+
Given a list of List[Statement], it returns the longest List[Statement] that is a subsequence of all the lists.
|
|
164
|
+
The common List[Statement] most all be in the same order and adjacent to each other. If no common subsequence is
|
|
165
|
+
found, it returns None.
|
|
166
|
+
|
|
167
|
+
@param stmts_list:
|
|
168
|
+
@param graph:
|
|
169
|
+
@return: Tuple[List[Statement], List[int]], where the first element is the longest common subsequence, and the
|
|
170
|
+
second element is a list of integers indicating the index of the longest common subsequence in each
|
|
171
|
+
list of statements.
|
|
172
|
+
"""
|
|
173
|
+
|
|
174
|
+
# find the longest sequence in all stmts
|
|
175
|
+
subseq = []
|
|
176
|
+
if len(stmts_list) <= 1:
|
|
177
|
+
return stmts_list[0], [0]
|
|
178
|
+
|
|
179
|
+
if len(stmts_list[0]) > 0:
|
|
180
|
+
for i in range(len(stmts_list[0])):
|
|
181
|
+
for j in range(len(stmts_list[0]) - i + 1):
|
|
182
|
+
if j > len(subseq) and all(
|
|
183
|
+
in_other(stmts_list[0][i : i + j], stmts, graph=graph) for stmts in stmts_list
|
|
184
|
+
):
|
|
185
|
+
subseq = stmts_list[0][i : i + j]
|
|
186
|
+
|
|
187
|
+
if not subseq:
|
|
188
|
+
return None, [None] * len(stmts_list)
|
|
189
|
+
|
|
190
|
+
return subseq, [index_of_similar_stmts(subseq, stmts, graph=graph) for stmts in stmts_list]
|
|
@@ -6,6 +6,7 @@ from dataclasses import dataclass
|
|
|
6
6
|
from typing import Dict, List, Tuple, Set, Optional, Iterable, Union, Type, Any, NamedTuple, TYPE_CHECKING
|
|
7
7
|
|
|
8
8
|
import networkx
|
|
9
|
+
import capstone
|
|
9
10
|
|
|
10
11
|
import ailment
|
|
11
12
|
|
|
@@ -262,6 +263,7 @@ class Clinic(Analysis):
|
|
|
262
263
|
ail_graph = self._simplify_blocks(
|
|
263
264
|
ail_graph, stack_pointer_tracker=spt, remove_dead_memdefs=False, cache=block_simplification_cache
|
|
264
265
|
)
|
|
266
|
+
self._rewrite_alloca(ail_graph)
|
|
265
267
|
|
|
266
268
|
# Run simplification passes
|
|
267
269
|
self._update_progress(40.0, text="Running simplifications 1")
|
|
@@ -606,7 +608,12 @@ class Clinic(Analysis):
|
|
|
606
608
|
regs = {self.project.arch.sp_offset}
|
|
607
609
|
if hasattr(self.project.arch, "bp_offset") and self.project.arch.bp_offset is not None:
|
|
608
610
|
regs.add(self.project.arch.bp_offset)
|
|
609
|
-
|
|
611
|
+
|
|
612
|
+
regs |= self._find_regs_compared_against_sp(self._func_graph)
|
|
613
|
+
|
|
614
|
+
spt = self.project.analyses.StackPointerTracker(
|
|
615
|
+
self.function, regs, track_memory=self._sp_tracker_track_memory, cross_insn_opt=False
|
|
616
|
+
)
|
|
610
617
|
if spt.inconsistent_for(self.project.arch.sp_offset):
|
|
611
618
|
l.warning("Inconsistency found during stack pointer tracking. Decompilation results might be incorrect.")
|
|
612
619
|
return spt
|
|
@@ -1201,6 +1208,7 @@ class Clinic(Analysis):
|
|
|
1201
1208
|
|
|
1202
1209
|
if self._cache is not None:
|
|
1203
1210
|
self._cache.type_constraints = vr.type_constraints
|
|
1211
|
+
self._cache.func_typevar = vr.func_typevar
|
|
1204
1212
|
self._cache.var_to_typevar = vr.var_to_typevars
|
|
1205
1213
|
|
|
1206
1214
|
return tmp_kb
|
|
@@ -1877,5 +1885,99 @@ class Clinic(Analysis):
|
|
|
1877
1885
|
AILGraphWalker(graph, handle_node, replace_nodes=True).walk()
|
|
1878
1886
|
return graph
|
|
1879
1887
|
|
|
1888
|
+
def _find_regs_compared_against_sp(self, func_graph):
|
|
1889
|
+
# TODO: Implement this function for architectures beyond amd64
|
|
1890
|
+
extra_regs = set()
|
|
1891
|
+
if self.project.arch.name == "AMD64":
|
|
1892
|
+
for node in func_graph.nodes:
|
|
1893
|
+
block = self.project.factory.block(node.addr, size=node.size).capstone
|
|
1894
|
+
for insn in block.insns:
|
|
1895
|
+
if insn.mnemonic == "cmp":
|
|
1896
|
+
capstone_reg_offset = None
|
|
1897
|
+
if (
|
|
1898
|
+
insn.operands[0].type == capstone.x86.X86_OP_REG
|
|
1899
|
+
and insn.operands[0].reg == capstone.x86.X86_REG_RSP
|
|
1900
|
+
and insn.operands[1].type == capstone.x86.X86_OP_REG
|
|
1901
|
+
):
|
|
1902
|
+
capstone_reg_offset = insn.operands[1].reg
|
|
1903
|
+
elif (
|
|
1904
|
+
insn.operands[1].type == capstone.x86.X86_OP_REG
|
|
1905
|
+
and insn.operands[1].reg == capstone.x86.X86_REG_RSP
|
|
1906
|
+
and insn.operands[0].type == capstone.x86.X86_OP_REG
|
|
1907
|
+
):
|
|
1908
|
+
capstone_reg_offset = insn.operands[0].reg
|
|
1909
|
+
|
|
1910
|
+
if capstone_reg_offset is not None:
|
|
1911
|
+
reg_name = insn.reg_name(capstone_reg_offset)
|
|
1912
|
+
extra_regs.add(self.project.arch.registers[reg_name][0])
|
|
1913
|
+
|
|
1914
|
+
return extra_regs
|
|
1915
|
+
|
|
1916
|
+
def _rewrite_alloca(self, ail_graph):
|
|
1917
|
+
# pylint:disable=too-many-boolean-expressions
|
|
1918
|
+
alloca_node = None
|
|
1919
|
+
sp_equal_to = None
|
|
1920
|
+
|
|
1921
|
+
for node in ail_graph:
|
|
1922
|
+
if ail_graph.in_degree[node] == 2 and ail_graph.out_degree[node] == 2:
|
|
1923
|
+
succs = ail_graph.successors(node)
|
|
1924
|
+
if node in succs:
|
|
1925
|
+
# self loop!
|
|
1926
|
+
if len(node.statements) >= 6:
|
|
1927
|
+
stmt0 = node.statements[1] # skip the LABEL statement
|
|
1928
|
+
stmt1 = node.statements[2]
|
|
1929
|
+
last_stmt = node.statements[-1]
|
|
1930
|
+
if (
|
|
1931
|
+
isinstance(stmt0, ailment.Stmt.Assignment)
|
|
1932
|
+
and isinstance(stmt0.dst, ailment.Expr.Register)
|
|
1933
|
+
and isinstance(stmt0.src, ailment.Expr.StackBaseOffset)
|
|
1934
|
+
and stmt0.src.offset == -0x1000
|
|
1935
|
+
):
|
|
1936
|
+
if (
|
|
1937
|
+
isinstance(stmt1, ailment.Stmt.Store)
|
|
1938
|
+
and isinstance(stmt1.addr, ailment.Expr.StackBaseOffset)
|
|
1939
|
+
and stmt1.addr.offset == -0x1000
|
|
1940
|
+
and isinstance(stmt1.data, ailment.Expr.Load)
|
|
1941
|
+
and isinstance(stmt1.data.addr, ailment.Expr.StackBaseOffset)
|
|
1942
|
+
and stmt1.data.addr.offset == -0x1000
|
|
1943
|
+
):
|
|
1944
|
+
if (
|
|
1945
|
+
isinstance(last_stmt, ailment.Stmt.ConditionalJump)
|
|
1946
|
+
and isinstance(last_stmt.condition, ailment.Expr.BinaryOp)
|
|
1947
|
+
and last_stmt.condition.op == "CmpEQ"
|
|
1948
|
+
and isinstance(last_stmt.condition.operands[0], ailment.Expr.StackBaseOffset)
|
|
1949
|
+
and last_stmt.condition.operands[0].offset == -0x1000
|
|
1950
|
+
and isinstance(last_stmt.condition.operands[1], ailment.Expr.Register)
|
|
1951
|
+
and isinstance(last_stmt.false_target, ailment.Expr.Const)
|
|
1952
|
+
and last_stmt.false_target.value == node.addr
|
|
1953
|
+
):
|
|
1954
|
+
# found it!
|
|
1955
|
+
alloca_node = node
|
|
1956
|
+
sp_equal_to = ailment.Expr.BinaryOp(
|
|
1957
|
+
None,
|
|
1958
|
+
"Sub",
|
|
1959
|
+
[
|
|
1960
|
+
ailment.Expr.Register(
|
|
1961
|
+
None, None, self.project.arch.sp_offset, self.project.arch.bits
|
|
1962
|
+
),
|
|
1963
|
+
last_stmt.condition.operands[1],
|
|
1964
|
+
],
|
|
1965
|
+
False,
|
|
1966
|
+
)
|
|
1967
|
+
break
|
|
1968
|
+
|
|
1969
|
+
if alloca_node is not None:
|
|
1970
|
+
stmt0 = alloca_node.statements[1]
|
|
1971
|
+
statements = [ailment.Stmt.Call(stmt0.idx, "alloca", args=[sp_equal_to], **stmt0.tags)]
|
|
1972
|
+
new_node = ailment.Block(alloca_node.addr, alloca_node.original_size, statements=statements)
|
|
1973
|
+
# replace the node
|
|
1974
|
+
preds = [pred for pred in ail_graph.predecessors(alloca_node) if pred is not alloca_node]
|
|
1975
|
+
succs = [succ for succ in ail_graph.successors(alloca_node) if succ is not alloca_node]
|
|
1976
|
+
ail_graph.remove_node(alloca_node)
|
|
1977
|
+
for pred in preds:
|
|
1978
|
+
ail_graph.add_edge(pred, new_node)
|
|
1979
|
+
for succ in succs:
|
|
1980
|
+
ail_graph.add_edge(new_node, succ)
|
|
1981
|
+
|
|
1880
1982
|
|
|
1881
1983
|
register_analysis(Clinic, "Clinic")
|
|
@@ -15,6 +15,7 @@ class DecompilationCache:
|
|
|
15
15
|
__slots__ = (
|
|
16
16
|
"addr",
|
|
17
17
|
"type_constraints",
|
|
18
|
+
"func_typevar",
|
|
18
19
|
"var_to_typevar",
|
|
19
20
|
"codegen",
|
|
20
21
|
"clinic",
|
|
@@ -25,6 +26,7 @@ class DecompilationCache:
|
|
|
25
26
|
def __init__(self, addr):
|
|
26
27
|
self.addr = addr
|
|
27
28
|
self.type_constraints: Optional[Set] = None
|
|
29
|
+
self.func_typevar = None
|
|
28
30
|
self.var_to_typevar: Optional[Dict] = None
|
|
29
31
|
self.codegen: Optional[BaseStructuredCodeGenerator] = None
|
|
30
32
|
self.clinic: Optional[Clinic] = None
|