angr 9.2.92__py3-none-manylinux2014_x86_64.whl → 9.2.94__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (45) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/cfg/cfg_base.py +20 -10
  3. angr/analyses/cfg/indirect_jump_resolvers/amd64_elf_got.py +1 -1
  4. angr/analyses/cfg/indirect_jump_resolvers/arm_elf_fast.py +89 -32
  5. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +276 -133
  6. angr/analyses/complete_calling_conventions.py +1 -1
  7. angr/analyses/decompiler/ail_simplifier.py +20 -0
  8. angr/analyses/decompiler/block_io_finder.py +293 -0
  9. angr/analyses/decompiler/block_similarity.py +190 -0
  10. angr/analyses/decompiler/callsite_maker.py +5 -0
  11. angr/analyses/decompiler/clinic.py +103 -1
  12. angr/analyses/decompiler/decompilation_cache.py +2 -0
  13. angr/analyses/decompiler/decompiler.py +21 -4
  14. angr/analyses/decompiler/optimization_passes/__init__.py +6 -0
  15. angr/analyses/decompiler/optimization_passes/code_motion.py +361 -0
  16. angr/analyses/decompiler/optimization_passes/optimization_pass.py +1 -0
  17. angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +30 -18
  18. angr/analyses/decompiler/optimization_passes/switch_default_case_duplicator.py +110 -0
  19. angr/analyses/decompiler/peephole_optimizations/bswap.py +53 -2
  20. angr/analyses/decompiler/peephole_optimizations/eager_eval.py +20 -1
  21. angr/analyses/decompiler/structured_codegen/c.py +76 -41
  22. angr/analyses/decompiler/structuring/phoenix.py +41 -9
  23. angr/analyses/decompiler/utils.py +13 -4
  24. angr/analyses/propagator/engine_ail.py +3 -0
  25. angr/analyses/reaching_definitions/engine_ail.py +3 -0
  26. angr/analyses/reaching_definitions/reaching_definitions.py +7 -0
  27. angr/analyses/stack_pointer_tracker.py +60 -10
  28. angr/analyses/typehoon/simple_solver.py +95 -24
  29. angr/analyses/typehoon/typeconsts.py +1 -1
  30. angr/calling_conventions.py +0 -3
  31. angr/engines/pcode/cc.py +1 -1
  32. angr/engines/successors.py +6 -0
  33. angr/knowledge_plugins/propagations/states.py +2 -1
  34. angr/procedures/definitions/glibc.py +3 -1
  35. angr/procedures/definitions/parse_win32json.py +2135 -383
  36. angr/procedures/definitions/wdk_ntoskrnl.py +956 -0
  37. angr/sim_type.py +53 -13
  38. angr/utils/library.py +2 -2
  39. {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/METADATA +6 -6
  40. {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/RECORD +44 -41
  41. {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/WHEEL +1 -1
  42. angr/procedures/definitions/wdk_ntdll.py +0 -994
  43. {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/LICENSE +0 -0
  44. {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/entry_points.txt +0 -0
  45. {angr-9.2.92.dist-info → angr-9.2.94.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,293 @@
1
+ from collections import defaultdict
2
+ from typing import Any, Optional, Union, List
3
+
4
+ from ailment import Block
5
+ from ailment.statement import Call, Statement, ConditionalJump, Assignment, Store, Return, Jump
6
+ from ailment.expression import (
7
+ Load,
8
+ Expression,
9
+ BinaryOp,
10
+ UnaryOp,
11
+ Convert,
12
+ ITE,
13
+ Tmp,
14
+ Const,
15
+ StackBaseOffset,
16
+ )
17
+ from ailment.block_walker import AILBlockWalkerBase
18
+
19
+
20
+ from angr.knowledge_plugins.key_definitions.atoms import MemoryLocation, Register, SpOffset, ConstantSrc
21
+
22
+
23
+ class BlockIOFinder(AILBlockWalkerBase):
24
+ """
25
+ Finds the input and output locations of each statement in an AIL block.
26
+ I/O locations can be a Register, MemoryLocation, or SpOffset (wrapped in a Memory Location).
27
+ """
28
+
29
+ def __init__(self, ail_obj: Union[Block, List[Statement]], project, as_atom=True):
30
+ super().__init__()
31
+ self.expr_handlers[StackBaseOffset] = self._handle_StackBaseOffset
32
+ self._as_atom = as_atom
33
+ self._project = project
34
+
35
+ self.inputs_by_stmt = defaultdict(set)
36
+ self.outputs_by_stmt = defaultdict(set)
37
+ self.derefed_at = defaultdict(set)
38
+
39
+ block = Block(0, len(ail_obj), statements=ail_obj) if isinstance(ail_obj, list) else ail_obj
40
+ self.walk(block)
41
+
42
+ @staticmethod
43
+ def _add_or_update_dict(d, k, v):
44
+ if isinstance(v, set):
45
+ d[k].update(v)
46
+ else:
47
+ d[k].add(v)
48
+
49
+ @staticmethod
50
+ def _add_or_update_set(s, v):
51
+ if isinstance(v, set):
52
+ s.update(v)
53
+ else:
54
+ s.add(v)
55
+
56
+ #
57
+ # I/O helpers
58
+ #
59
+
60
+ @staticmethod
61
+ def _is_dangerous_memory(loc):
62
+ """
63
+ Assume any memory location that is NOT on the stack is a dangerous memory location.
64
+ """
65
+ return isinstance(loc, MemoryLocation) and not loc.is_on_stack
66
+
67
+ def _has_dangerous_deref(self, stmt_idx):
68
+ derefs = self.derefed_at.get(stmt_idx, set())
69
+ return any(self._is_dangerous_memory(d) for d in derefs)
70
+
71
+ def _input_defined_by_other_stmt(self, target_idx, other_idx):
72
+ target_inputs = self.inputs_by_stmt[target_idx]
73
+ # any memory location, not on stack, is not movable
74
+ if any(self._is_dangerous_memory(i) for i in target_inputs):
75
+ return True
76
+
77
+ other_outputs = self.outputs_by_stmt[other_idx]
78
+ return target_inputs.intersection(other_outputs)
79
+
80
+ def _output_used_by_other_stmt(self, target_idx, other_idx):
81
+ target_output = self.outputs_by_stmt[target_idx]
82
+ # any memory location, not on stack, is not movable
83
+ if any(self._is_dangerous_memory(o) for o in target_output):
84
+ return True
85
+
86
+ other_input = self.inputs_by_stmt[other_idx]
87
+ return target_output.intersection(other_input)
88
+
89
+ def can_swap(self, stmt, ail_obj: Union[Block, List[Statement]], offset: int):
90
+ all_stmts = (ail_obj.statements or []) if isinstance(ail_obj, Block) else ail_obj
91
+ if stmt not in all_stmts:
92
+ raise RuntimeError("Statement not in block, and we can't compute moving a stmt to a new block!")
93
+
94
+ curr_idx = all_stmts.index(stmt)
95
+ new_idx = curr_idx + offset
96
+ if (
97
+ # movement must be within bounds
98
+ (new_idx < 0 or new_idx >= len(all_stmts))
99
+ or
100
+ # you can never move jumps
101
+ isinstance(stmt, (ConditionalJump, Jump))
102
+ or
103
+ # we can't handle memory locations
104
+ self._has_dangerous_deref(curr_idx)
105
+ or self._has_dangerous_deref(new_idx)
106
+ ):
107
+ return False
108
+
109
+ # equivalent to swapping "down"
110
+ if offset == 1:
111
+ if self._output_used_by_other_stmt(curr_idx, new_idx):
112
+ return False
113
+ # equivalent to swapping "up"
114
+ elif offset == -1:
115
+ if self._input_defined_by_other_stmt(curr_idx, new_idx):
116
+ return False
117
+ else:
118
+ raise RuntimeError("Offset must be -1 or 1")
119
+
120
+ return True
121
+
122
+ #
123
+ # Statements (all with side effects)
124
+ #
125
+
126
+ def _handle_Assignment(self, stmt_idx: int, stmt: Assignment, block: Optional[Block]):
127
+ output_loc = self._handle_expr(0, stmt.dst, stmt_idx, stmt, block)
128
+ self._add_or_update_dict(self.outputs_by_stmt, stmt_idx, output_loc)
129
+
130
+ input_loc = self._handle_expr(1, stmt.src, stmt_idx, stmt, block)
131
+ self._add_or_update_dict(self.inputs_by_stmt, stmt_idx, input_loc)
132
+
133
+ def _handle_Call(self, stmt_idx: int, stmt: Call, block: Optional[Block]):
134
+ if stmt.args:
135
+ for i, arg in enumerate(stmt.args):
136
+ input_loc = self._handle_expr(i, arg, stmt_idx, stmt, block)
137
+ self._add_or_update_dict(self.inputs_by_stmt, stmt_idx, input_loc)
138
+
139
+ out_loc = self._handle_expr(0, stmt.ret_expr, stmt_idx, stmt, block)
140
+ self._add_or_update_dict(self.outputs_by_stmt, stmt_idx, out_loc)
141
+
142
+ def _handle_Store(self, stmt_idx: int, stmt: Store, block: Optional[Block]):
143
+ out_loc = self._handle_expr(0, stmt.addr, stmt_idx, stmt, block, is_memory=True)
144
+ self._add_or_update_dict(self.outputs_by_stmt, stmt_idx, out_loc)
145
+
146
+ input_loc = self._handle_expr(1, stmt.data, stmt_idx, stmt, block)
147
+ self._add_or_update_dict(self.inputs_by_stmt, stmt_idx, input_loc)
148
+
149
+ def _handle_ConditionalJump(self, stmt_idx: int, stmt: ConditionalJump, block: Optional[Block]):
150
+ input1 = self._handle_expr(0, stmt.condition, stmt_idx, stmt, block)
151
+ input2 = self._handle_expr(1, stmt.true_target, stmt_idx, stmt, block)
152
+ input3 = self._handle_expr(2, stmt.false_target, stmt_idx, stmt, block)
153
+ self._add_or_update_dict(self.inputs_by_stmt, stmt_idx, input1)
154
+ self._add_or_update_dict(self.inputs_by_stmt, stmt_idx, input2)
155
+ self._add_or_update_dict(self.inputs_by_stmt, stmt_idx, input3)
156
+
157
+ def _handle_Return(self, stmt_idx: int, stmt: Return, block: Optional[Block]):
158
+ if stmt.ret_exprs:
159
+ for i, ret_expr in enumerate(stmt.ret_exprs):
160
+ loc = self._handle_expr(i, ret_expr, stmt_idx, stmt, block)
161
+ self._add_or_update_dict(self.inputs_by_stmt, stmt_idx, loc)
162
+ self._add_or_update_dict(self.outputs_by_stmt, stmt_idx, loc)
163
+
164
+ #
165
+ # Expressions
166
+ #
167
+
168
+ def _handle_expr(
169
+ self,
170
+ expr_idx: int,
171
+ expr: Expression,
172
+ stmt_idx: int,
173
+ stmt: Optional[Statement],
174
+ block: Optional[Block],
175
+ is_memory=False,
176
+ ) -> Any:
177
+ try:
178
+ handler = self.expr_handlers[type(expr)]
179
+ except KeyError:
180
+ handler = None
181
+
182
+ if handler:
183
+ return handler(expr_idx, expr, stmt_idx, stmt, block, is_memory=is_memory)
184
+ return None
185
+
186
+ # pylint: disable=unused-argument
187
+ def _handle_Load(
188
+ self, expr_idx: int, expr: Load, stmt_idx: int, stmt: Statement, block: Optional[Block], is_memory=True
189
+ ):
190
+ load_loc = self._handle_expr(0, expr.addr, stmt_idx, stmt, block, is_memory=True)
191
+ self._add_or_update_dict(self.derefed_at, stmt_idx, load_loc)
192
+ return load_loc
193
+
194
+ def _handle_CallExpr(
195
+ self, expr_idx: int, expr: Call, stmt_idx: int, stmt: Statement, block: Optional[Block], is_memory=False
196
+ ):
197
+ args = set()
198
+ if expr.args:
199
+ for i, arg in enumerate(expr.args):
200
+ self._add_or_update_set(args, self._handle_expr(i, arg, stmt_idx, stmt, block, is_memory=is_memory))
201
+
202
+ return args
203
+
204
+ def _handle_BinaryOp(
205
+ self, expr_idx: int, expr: BinaryOp, stmt_idx: int, stmt: Statement, block: Optional[Block], is_memory=False
206
+ ):
207
+ input_locs = set()
208
+ self._add_or_update_set(
209
+ input_locs, self._handle_expr(0, expr.operands[0], stmt_idx, stmt, block, is_memory=is_memory)
210
+ )
211
+ self._add_or_update_set(
212
+ input_locs, self._handle_expr(1, expr.operands[1], stmt_idx, stmt, block, is_memory=is_memory)
213
+ )
214
+
215
+ return input_locs
216
+
217
+ def _handle_UnaryOp(
218
+ self, expr_idx: int, expr: UnaryOp, stmt_idx: int, stmt: Statement, block: Optional[Block], is_memory=False
219
+ ):
220
+ return self._handle_expr(0, expr.operand, stmt_idx, stmt, block, is_memory=is_memory)
221
+
222
+ def _handle_Convert(
223
+ self, expr_idx: int, expr: Convert, stmt_idx: int, stmt: Statement, block: Optional[Block], is_memory=False
224
+ ):
225
+ return self._handle_expr(expr_idx, expr.operand, stmt_idx, stmt, block, is_memory=is_memory)
226
+
227
+ def _handle_ITE(
228
+ self, expr_idx: int, expr: ITE, stmt_idx: int, stmt: Statement, block: Optional[Block], is_memory=False
229
+ ):
230
+ input_locs = set()
231
+ self._add_or_update_set(
232
+ input_locs,
233
+ self._handle_expr(0, expr.cond, stmt_idx, stmt, block, is_memory=is_memory),
234
+ )
235
+ self._add_or_update_set(
236
+ input_locs,
237
+ self._handle_expr(1, expr.iftrue, stmt_idx, stmt, block, is_memory=is_memory),
238
+ )
239
+ self._add_or_update_set(
240
+ input_locs,
241
+ self._handle_expr(2, expr.iffalse, stmt_idx, stmt, block, is_memory=is_memory),
242
+ )
243
+
244
+ return input_locs
245
+
246
+ #
247
+ # Base locations
248
+ #
249
+
250
+ # pylint: disable=unused-argument
251
+ def _handle_Tmp(
252
+ self, expr_idx: int, expr: Tmp, stmt_idx: int, stmt: Statement, block: Optional[Block], is_memory=False
253
+ ):
254
+ if self._as_atom:
255
+ return None
256
+ else:
257
+ return expr
258
+
259
+ # pylint: disable=unused-argument
260
+ def _handle_Register(
261
+ self, expr_idx: int, expr: Register, stmt_idx: int, stmt: Statement, block: Optional[Block], is_memory=False
262
+ ):
263
+ if self._as_atom:
264
+ return Register(expr.reg_offset, expr.size)
265
+ else:
266
+ return expr
267
+
268
+ def _handle_Const(
269
+ self, expr_idx: int, expr: Const, stmt_idx: int, stmt: Statement, block: Optional[Block], is_memory=False
270
+ ):
271
+ if self._as_atom:
272
+ return MemoryLocation(expr.value, expr.size) if is_memory else ConstantSrc(expr.value, expr.size)
273
+
274
+ return (
275
+ expr,
276
+ is_memory,
277
+ )
278
+
279
+ # pylint: disable=unused-argument
280
+ def _handle_StackBaseOffset(
281
+ self,
282
+ expr_idx: int,
283
+ expr: StackBaseOffset,
284
+ stmt_idx: int,
285
+ stmt: Statement,
286
+ block: Optional[Block],
287
+ is_memory=False,
288
+ ):
289
+ if self._as_atom:
290
+ return MemoryLocation(
291
+ SpOffset(self._project.arch.bits, expr.offset), expr.size * self._project.arch.byte_width
292
+ )
293
+ return expr
@@ -0,0 +1,190 @@
1
+ from typing import Union, Optional, List, Tuple
2
+
3
+ import networkx as nx
4
+ from ailment.block import Block
5
+ from ailment.statement import Statement, ConditionalJump
6
+
7
+ from .utils import find_block_by_addr
8
+
9
+
10
+ def has_similar_stmt(blk1: Block, blk2: Block):
11
+ """
12
+ Returns True if blk1 has a statement that is similar to a statement in blk2, False otherwise.
13
+ """
14
+ for stmt1 in blk1.statements:
15
+ for stmt2 in blk2.statements:
16
+ if is_similar(stmt1, stmt2):
17
+ return True
18
+ return False
19
+
20
+
21
+ def is_similar(
22
+ ail_obj1: Union[Block, Statement], ail_obj2: Union[Block, Statement], graph: nx.DiGraph = None, partial: bool = True
23
+ ):
24
+ """
25
+ Returns True if the two AIL objects are similar, False otherwise.
26
+ """
27
+ if type(ail_obj1) is not type(ail_obj2):
28
+ return False
29
+
30
+ if ail_obj1 is ail_obj2:
31
+ return True
32
+
33
+ # AIL Blocks
34
+ if isinstance(ail_obj1, Block):
35
+ if len(ail_obj1.statements) != len(ail_obj2.statements):
36
+ return False
37
+
38
+ for stmt1, stmt2 in zip(ail_obj1.statements, ail_obj2.statements):
39
+ if not is_similar(stmt1, stmt2, graph=graph):
40
+ return False
41
+ return True
42
+
43
+ # AIL Statements
44
+ elif isinstance(ail_obj1, Statement):
45
+ # if all(barr in [0x404530, 0x404573] for barr in [ail_obj1.ins_addr, ail_obj2.ins_addr]):
46
+ # do a breakpoint
47
+
48
+ # ConditionalJump Handler
49
+ if isinstance(ail_obj1, ConditionalJump):
50
+ # try a simple compare
51
+ liked = ail_obj1.likes(ail_obj2)
52
+ if liked or not graph:
53
+ return liked
54
+
55
+ # even in partial matching, the condition must at least match
56
+ if not ail_obj1.condition.likes(ail_obj2.condition):
57
+ return False
58
+
59
+ # must use graph to know
60
+ for attr in ["true_target", "false_target"]:
61
+ t1, t2 = getattr(ail_obj1, attr).value, getattr(ail_obj2, attr).value
62
+ try:
63
+ t1_blk, t2_blk = find_block_by_addr(graph, t1), find_block_by_addr(graph, t2)
64
+ except KeyError:
65
+ return False
66
+
67
+ # special checks for when a node is empty:
68
+ if not t1_blk.statements or not t2_blk.statements:
69
+ # when both are empty, they are similar
70
+ if len(t1_blk.statements) == len(t2_blk.statements):
71
+ continue
72
+
73
+ # TODO: implement a check for when one is empty and other is jump.
74
+ # this will require a recursive call into similar() to check if a jump and empty are equal
75
+ return False
76
+
77
+ # skip full checks when partial checking is on
78
+ if partial and t1_blk.statements[0].likes(t2_blk.statements[0]):
79
+ continue
80
+
81
+ if not is_similar(t1_blk, t2_blk, graph=graph):
82
+ return False
83
+ return True
84
+
85
+ # Generic Statement Handler
86
+ else:
87
+ return ail_obj1.likes(ail_obj2)
88
+ else:
89
+ return False
90
+
91
+
92
+ #
93
+ # Knuth-Morris-Pratt Similarity Matching
94
+ #
95
+
96
+
97
+ def _kmp_search_ail_obj(search_pattern, stmt_seq, graph=None, partial=True):
98
+ """
99
+ Uses the Knuth-Morris-Pratt algorithm for searching.
100
+ Found: https://code.activestate.com/recipes/117214/.
101
+
102
+ Returns a generator of positions, which will be empty if its not found.
103
+ """
104
+ # allow indexing into pattern and protect against change during yield
105
+ search_pattern = list(search_pattern)
106
+
107
+ # build table of shift amounts
108
+ shifts = [1] * (len(search_pattern) + 1)
109
+ shift = 1
110
+ for pos, curr_pattern in enumerate(search_pattern):
111
+ while shift <= pos and not is_similar(curr_pattern, search_pattern[pos - shift], graph=graph, partial=partial):
112
+ shift += shifts[pos - shift]
113
+ shifts[pos + 1] = shift
114
+
115
+ # do the actual search
116
+ start_pos = 0
117
+ match_len = 0
118
+ for c in stmt_seq:
119
+ while (
120
+ match_len == len(search_pattern)
121
+ or match_len >= 0
122
+ and not is_similar(search_pattern[match_len], c, graph=graph, partial=partial)
123
+ ):
124
+ start_pos += shifts[match_len]
125
+ match_len -= shifts[match_len]
126
+ match_len += 1
127
+ if match_len == len(search_pattern):
128
+ yield start_pos
129
+
130
+
131
+ def index_of_similar_stmts(search_stmts, other_stmts, graph=None, all_positions=False) -> Optional[int]:
132
+ """
133
+ Returns the index of the first occurrence of the search_stmts (a list of Statement) in other_stmts (a list of
134
+ Statement). If all_positions is True, returns a list of all positions.
135
+
136
+ @return: None or int (position start in other)
137
+ """
138
+ positions = list(_kmp_search_ail_obj(search_stmts, other_stmts, graph=graph))
139
+
140
+ if len(positions) == 0:
141
+ return None
142
+
143
+ return positions.pop() if not all_positions else positions
144
+
145
+
146
+ def in_other(stmts, other, graph=None):
147
+ """
148
+ Returns True if the stmts (a list of Statement) is found as a subsequence in other
149
+
150
+ @return:
151
+ """
152
+
153
+ if index_of_similar_stmts(stmts, other, graph=graph) is not None:
154
+ return True
155
+
156
+ return False
157
+
158
+
159
+ def longest_ail_subseq(
160
+ stmts_list: List[List[Statement]], graph=None
161
+ ) -> Tuple[Optional[List[Statement]], Optional[List[int]]]:
162
+ """
163
+ Given a list of List[Statement], it returns the longest List[Statement] that is a subsequence of all the lists.
164
+ The common List[Statement] most all be in the same order and adjacent to each other. If no common subsequence is
165
+ found, it returns None.
166
+
167
+ @param stmts_list:
168
+ @param graph:
169
+ @return: Tuple[List[Statement], List[int]], where the first element is the longest common subsequence, and the
170
+ second element is a list of integers indicating the index of the longest common subsequence in each
171
+ list of statements.
172
+ """
173
+
174
+ # find the longest sequence in all stmts
175
+ subseq = []
176
+ if len(stmts_list) <= 1:
177
+ return stmts_list[0], [0]
178
+
179
+ if len(stmts_list[0]) > 0:
180
+ for i in range(len(stmts_list[0])):
181
+ for j in range(len(stmts_list[0]) - i + 1):
182
+ if j > len(subseq) and all(
183
+ in_other(stmts_list[0][i : i + j], stmts, graph=graph) for stmts in stmts_list
184
+ ):
185
+ subseq = stmts_list[0][i : i + j]
186
+
187
+ if not subseq:
188
+ return None, [None] * len(stmts_list)
189
+
190
+ return subseq, [index_of_similar_stmts(subseq, stmts, graph=graph) for stmts in stmts_list]
@@ -49,6 +49,11 @@ class CallSiteMaker(Analysis):
49
49
  self.result_block = self.block
50
50
  return
51
51
 
52
+ if isinstance(last_stmt.target, str):
53
+ # custom function calls
54
+ self.result_block = self.block
55
+ return
56
+
52
57
  cc = None
53
58
  prototype = None
54
59
  func = None
@@ -6,6 +6,7 @@ from dataclasses import dataclass
6
6
  from typing import Dict, List, Tuple, Set, Optional, Iterable, Union, Type, Any, NamedTuple, TYPE_CHECKING
7
7
 
8
8
  import networkx
9
+ import capstone
9
10
 
10
11
  import ailment
11
12
 
@@ -262,6 +263,7 @@ class Clinic(Analysis):
262
263
  ail_graph = self._simplify_blocks(
263
264
  ail_graph, stack_pointer_tracker=spt, remove_dead_memdefs=False, cache=block_simplification_cache
264
265
  )
266
+ self._rewrite_alloca(ail_graph)
265
267
 
266
268
  # Run simplification passes
267
269
  self._update_progress(40.0, text="Running simplifications 1")
@@ -606,7 +608,12 @@ class Clinic(Analysis):
606
608
  regs = {self.project.arch.sp_offset}
607
609
  if hasattr(self.project.arch, "bp_offset") and self.project.arch.bp_offset is not None:
608
610
  regs.add(self.project.arch.bp_offset)
609
- spt = self.project.analyses.StackPointerTracker(self.function, regs, track_memory=self._sp_tracker_track_memory)
611
+
612
+ regs |= self._find_regs_compared_against_sp(self._func_graph)
613
+
614
+ spt = self.project.analyses.StackPointerTracker(
615
+ self.function, regs, track_memory=self._sp_tracker_track_memory, cross_insn_opt=False
616
+ )
610
617
  if spt.inconsistent_for(self.project.arch.sp_offset):
611
618
  l.warning("Inconsistency found during stack pointer tracking. Decompilation results might be incorrect.")
612
619
  return spt
@@ -1201,6 +1208,7 @@ class Clinic(Analysis):
1201
1208
 
1202
1209
  if self._cache is not None:
1203
1210
  self._cache.type_constraints = vr.type_constraints
1211
+ self._cache.func_typevar = vr.func_typevar
1204
1212
  self._cache.var_to_typevar = vr.var_to_typevars
1205
1213
 
1206
1214
  return tmp_kb
@@ -1877,5 +1885,99 @@ class Clinic(Analysis):
1877
1885
  AILGraphWalker(graph, handle_node, replace_nodes=True).walk()
1878
1886
  return graph
1879
1887
 
1888
+ def _find_regs_compared_against_sp(self, func_graph):
1889
+ # TODO: Implement this function for architectures beyond amd64
1890
+ extra_regs = set()
1891
+ if self.project.arch.name == "AMD64":
1892
+ for node in func_graph.nodes:
1893
+ block = self.project.factory.block(node.addr, size=node.size).capstone
1894
+ for insn in block.insns:
1895
+ if insn.mnemonic == "cmp":
1896
+ capstone_reg_offset = None
1897
+ if (
1898
+ insn.operands[0].type == capstone.x86.X86_OP_REG
1899
+ and insn.operands[0].reg == capstone.x86.X86_REG_RSP
1900
+ and insn.operands[1].type == capstone.x86.X86_OP_REG
1901
+ ):
1902
+ capstone_reg_offset = insn.operands[1].reg
1903
+ elif (
1904
+ insn.operands[1].type == capstone.x86.X86_OP_REG
1905
+ and insn.operands[1].reg == capstone.x86.X86_REG_RSP
1906
+ and insn.operands[0].type == capstone.x86.X86_OP_REG
1907
+ ):
1908
+ capstone_reg_offset = insn.operands[0].reg
1909
+
1910
+ if capstone_reg_offset is not None:
1911
+ reg_name = insn.reg_name(capstone_reg_offset)
1912
+ extra_regs.add(self.project.arch.registers[reg_name][0])
1913
+
1914
+ return extra_regs
1915
+
1916
+ def _rewrite_alloca(self, ail_graph):
1917
+ # pylint:disable=too-many-boolean-expressions
1918
+ alloca_node = None
1919
+ sp_equal_to = None
1920
+
1921
+ for node in ail_graph:
1922
+ if ail_graph.in_degree[node] == 2 and ail_graph.out_degree[node] == 2:
1923
+ succs = ail_graph.successors(node)
1924
+ if node in succs:
1925
+ # self loop!
1926
+ if len(node.statements) >= 6:
1927
+ stmt0 = node.statements[1] # skip the LABEL statement
1928
+ stmt1 = node.statements[2]
1929
+ last_stmt = node.statements[-1]
1930
+ if (
1931
+ isinstance(stmt0, ailment.Stmt.Assignment)
1932
+ and isinstance(stmt0.dst, ailment.Expr.Register)
1933
+ and isinstance(stmt0.src, ailment.Expr.StackBaseOffset)
1934
+ and stmt0.src.offset == -0x1000
1935
+ ):
1936
+ if (
1937
+ isinstance(stmt1, ailment.Stmt.Store)
1938
+ and isinstance(stmt1.addr, ailment.Expr.StackBaseOffset)
1939
+ and stmt1.addr.offset == -0x1000
1940
+ and isinstance(stmt1.data, ailment.Expr.Load)
1941
+ and isinstance(stmt1.data.addr, ailment.Expr.StackBaseOffset)
1942
+ and stmt1.data.addr.offset == -0x1000
1943
+ ):
1944
+ if (
1945
+ isinstance(last_stmt, ailment.Stmt.ConditionalJump)
1946
+ and isinstance(last_stmt.condition, ailment.Expr.BinaryOp)
1947
+ and last_stmt.condition.op == "CmpEQ"
1948
+ and isinstance(last_stmt.condition.operands[0], ailment.Expr.StackBaseOffset)
1949
+ and last_stmt.condition.operands[0].offset == -0x1000
1950
+ and isinstance(last_stmt.condition.operands[1], ailment.Expr.Register)
1951
+ and isinstance(last_stmt.false_target, ailment.Expr.Const)
1952
+ and last_stmt.false_target.value == node.addr
1953
+ ):
1954
+ # found it!
1955
+ alloca_node = node
1956
+ sp_equal_to = ailment.Expr.BinaryOp(
1957
+ None,
1958
+ "Sub",
1959
+ [
1960
+ ailment.Expr.Register(
1961
+ None, None, self.project.arch.sp_offset, self.project.arch.bits
1962
+ ),
1963
+ last_stmt.condition.operands[1],
1964
+ ],
1965
+ False,
1966
+ )
1967
+ break
1968
+
1969
+ if alloca_node is not None:
1970
+ stmt0 = alloca_node.statements[1]
1971
+ statements = [ailment.Stmt.Call(stmt0.idx, "alloca", args=[sp_equal_to], **stmt0.tags)]
1972
+ new_node = ailment.Block(alloca_node.addr, alloca_node.original_size, statements=statements)
1973
+ # replace the node
1974
+ preds = [pred for pred in ail_graph.predecessors(alloca_node) if pred is not alloca_node]
1975
+ succs = [succ for succ in ail_graph.successors(alloca_node) if succ is not alloca_node]
1976
+ ail_graph.remove_node(alloca_node)
1977
+ for pred in preds:
1978
+ ail_graph.add_edge(pred, new_node)
1979
+ for succ in succs:
1980
+ ail_graph.add_edge(new_node, succ)
1981
+
1880
1982
 
1881
1983
  register_analysis(Clinic, "Clinic")
@@ -15,6 +15,7 @@ class DecompilationCache:
15
15
  __slots__ = (
16
16
  "addr",
17
17
  "type_constraints",
18
+ "func_typevar",
18
19
  "var_to_typevar",
19
20
  "codegen",
20
21
  "clinic",
@@ -25,6 +26,7 @@ class DecompilationCache:
25
26
  def __init__(self, addr):
26
27
  self.addr = addr
27
28
  self.type_constraints: Optional[Set] = None
29
+ self.func_typevar = None
28
30
  self.var_to_typevar: Optional[Dict] = None
29
31
  self.codegen: Optional[BaseStructuredCodeGenerator] = None
30
32
  self.clinic: Optional[Clinic] = None