angr 9.2.77__py3-none-win_amd64.whl → 9.2.79__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (46) hide show
  1. angr/__init__.py +1 -1
  2. angr/__main__.py +34 -0
  3. angr/analyses/calling_convention.py +15 -12
  4. angr/analyses/cfg/cfg_fast.py +12 -0
  5. angr/analyses/complete_calling_conventions.py +5 -2
  6. angr/analyses/decompiler/ail_simplifier.py +2 -2
  7. angr/analyses/decompiler/block_simplifier.py +25 -5
  8. angr/analyses/decompiler/clinic.py +27 -17
  9. angr/analyses/decompiler/optimization_passes/__init__.py +2 -0
  10. angr/analyses/decompiler/optimization_passes/engine_base.py +2 -2
  11. angr/analyses/decompiler/optimization_passes/ite_region_converter.py +2 -2
  12. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +105 -12
  13. angr/analyses/decompiler/peephole_optimizations/__init__.py +11 -2
  14. angr/analyses/decompiler/peephole_optimizations/base.py +29 -2
  15. angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +1 -1
  16. angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +83 -0
  17. angr/analyses/decompiler/peephole_optimizations/inlined_strcpy_consolidation.py +103 -0
  18. angr/analyses/decompiler/structured_codegen/c.py +20 -4
  19. angr/analyses/decompiler/utils.py +128 -2
  20. angr/analyses/disassembly.py +8 -1
  21. angr/analyses/propagator/engine_ail.py +9 -2
  22. angr/analyses/proximity_graph.py +30 -0
  23. angr/analyses/variable_recovery/engine_ail.py +1 -1
  24. angr/analyses/variable_recovery/engine_vex.py +10 -1
  25. angr/blade.py +14 -2
  26. angr/block.py +4 -0
  27. angr/knowledge_plugins/__init__.py +1 -0
  28. angr/knowledge_plugins/custom_strings.py +40 -0
  29. angr/knowledge_plugins/functions/function.py +58 -38
  30. angr/knowledge_plugins/key_definitions/live_definitions.py +1 -1
  31. angr/knowledge_plugins/propagations/prop_value.py +6 -2
  32. angr/knowledge_plugins/variables/variable_manager.py +1 -1
  33. angr/lib/angr_native.dll +0 -0
  34. angr/sim_state.py +0 -2
  35. angr/sim_type.py +3 -0
  36. angr/storage/memory_mixins/__init__.pyi +49 -0
  37. angr/storage/memory_mixins/paged_memory/pages/multi_values.py +7 -1
  38. angr/utils/graph.py +20 -4
  39. {angr-9.2.77.dist-info → angr-9.2.79.dist-info}/METADATA +6 -6
  40. {angr-9.2.77.dist-info → angr-9.2.79.dist-info}/RECORD +46 -40
  41. {angr-9.2.77.dist-info → angr-9.2.79.dist-info}/WHEEL +1 -1
  42. angr-9.2.79.dist-info/entry_points.txt +2 -0
  43. tests/analyses/cfg/test_cfgemulated.py +1 -1
  44. tests/storage/test_multivalues.py +18 -0
  45. {angr-9.2.77.dist-info → angr-9.2.79.dist-info}/LICENSE +0 -0
  46. {angr-9.2.77.dist-info → angr-9.2.79.dist-info}/top_level.txt +0 -0
angr/__init__.py CHANGED
@@ -1,7 +1,7 @@
1
1
  # pylint: disable=wildcard-import
2
2
  # pylint: disable=wrong-import-position
3
3
 
4
- __version__ = "9.2.77"
4
+ __version__ = "9.2.79"
5
5
 
6
6
  if bytes is str:
7
7
  raise Exception(
angr/__main__.py ADDED
@@ -0,0 +1,34 @@
1
+ import argparse
2
+
3
+ from angr.analyses.decompiler.structuring import STRUCTURER_CLASSES
4
+ from angr.analyses.decompiler.utils import decompile_functions
5
+
6
+
7
+ class COMMANDS:
8
+ """
9
+ The commands that the angr CLI supports.
10
+ """
11
+
12
+ DECOMPILE = "decompile"
13
+ ALL_COMMANDS = [DECOMPILE]
14
+
15
+
16
+ def main():
17
+ parser = argparse.ArgumentParser(description="The angr CLI allows you to decompile and analyze binaries.")
18
+ parser.add_argument("command", help="The command to run", choices=COMMANDS.ALL_COMMANDS)
19
+ parser.add_argument("binary", help="The path to the binary to analyze")
20
+ parser.add_argument("--functions", help="The functions to analyze", nargs="+")
21
+ parser.add_argument(
22
+ "--structurer", help="The structurer to use", choices=STRUCTURER_CLASSES.keys(), default="phoenix"
23
+ )
24
+
25
+ args = parser.parse_args()
26
+ if args.command == COMMANDS.DECOMPILE:
27
+ decompilation = decompile_functions(args.binary, functions=args.functions, structurer=args.structurer)
28
+ print(decompilation)
29
+ else:
30
+ parser.print_help()
31
+
32
+
33
+ if __name__ == "__main__":
34
+ main()
@@ -568,20 +568,23 @@ class CallingConventionAnalysis(Analysis):
568
568
  continue
569
569
  defs_by_reg_offset[d.offset].append(d)
570
570
  defined_reg_offsets = set(defs_by_reg_offset.keys())
571
+ sp_offset = 0
571
572
  if self.project.arch.bits in {32, 64}:
572
- # Calculate the relative distances between the stack pointer at the callsite and the stack definitions
573
+ # Calculate the offsets between sp and stack defs
573
574
  sp_offset = state.get_sp_offset()
574
- defs_by_stack_offset = {
575
- d.atom.addr.offset - sp_offset: d
576
- for d in all_stack_defs
577
- if isinstance(d.atom, MemoryLocation) and isinstance(d.atom.addr, SpOffset)
578
- }
579
- else:
580
- defs_by_stack_offset = {
581
- -d.atom.addr.offset: d
582
- for d in all_stack_defs
583
- if isinstance(d.atom, MemoryLocation) and isinstance(d.atom.addr, SpOffset)
584
- }
575
+ if sp_offset is None:
576
+ # We can not find the sp_offset when sp is concrete
577
+ # e.g.,
578
+ # LDR R2, =0x20070000
579
+ # STR R1, [R3,#0x38]
580
+ # MOV SP, R2
581
+ # In this case, just assume sp_offset = 0
582
+ sp_offset = 0
583
+ defs_by_stack_offset = {
584
+ d.atom.addr.offset - sp_offset: d
585
+ for d in all_stack_defs
586
+ if isinstance(d.atom, MemoryLocation) and isinstance(d.atom.addr, SpOffset)
587
+ }
585
588
 
586
589
  default_type_cls = SimTypeInt if self.project.arch.bits == 32 else SimTypeLongLong
587
590
  arg_session = cc.arg_session(default_type_cls().with_arch(self.project.arch))
@@ -4586,6 +4586,18 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
4586
4586
  ):
4587
4587
  rbp_as_gpr = False
4588
4588
  break
4589
+ elif (
4590
+ insn.mnemonic == "lea"
4591
+ and len(insn.operands) == 2
4592
+ and insn.operands[0].type == capstone.x86.X86_OP_REG
4593
+ and insn.operands[1].type == capstone.x86.X86_OP_MEM
4594
+ ):
4595
+ if (
4596
+ insn.operands[0].reg == capstone.x86.X86_REG_RBP
4597
+ and insn.operands[1].mem.base == capstone.x86.X86_REG_RSP
4598
+ ):
4599
+ rbp_as_gpr = False
4600
+ break
4589
4601
  func = self.kb.functions.get_by_addr(func_addr)
4590
4602
  func.info["bp_as_gpr"] = rbp_as_gpr
4591
4603
 
@@ -5,6 +5,8 @@ import time
5
5
  import logging
6
6
  from collections import defaultdict
7
7
 
8
+ import networkx
9
+
8
10
  import claripy
9
11
 
10
12
  from angr.utils.graph import GraphUtils
@@ -13,7 +15,6 @@ from ..knowledge_plugins.cfg import CFGModel
13
15
  from . import Analysis, register_analysis, VariableRecoveryFast, CallingConventionAnalysis
14
16
 
15
17
  if TYPE_CHECKING:
16
- import networkx
17
18
  from angr.calling_conventions import SimCC
18
19
  from angr.sim_type import SimTypeFunction
19
20
  from angr.knowledge_plugins.variables.variable_manager import VariableManagerInternal
@@ -104,7 +105,9 @@ class CompleteCallingConventionsAnalysis(Analysis):
104
105
  """
105
106
 
106
107
  # get an ordering of functions based on the call graph
107
- sorted_funcs = GraphUtils.quasi_topological_sort_nodes(self.kb.functions.callgraph)
108
+ # note that the call graph is a multi-digraph. we convert it to a digraph to speed up topological sort
109
+ directed_callgraph = networkx.DiGraph(self.kb.functions.callgraph)
110
+ sorted_funcs = GraphUtils.quasi_topological_sort_nodes(directed_callgraph)
108
111
 
109
112
  total_funcs = 0
110
113
  for func_addr in reversed(sorted_funcs):
@@ -4,7 +4,7 @@ import logging
4
4
 
5
5
  from ailment import AILBlockWalker
6
6
  from ailment.block import Block
7
- from ailment.statement import Statement, Assignment, Store, Call, ConditionalJump
7
+ from ailment.statement import Statement, Assignment, Store, Call, ConditionalJump, DirtyStatement
8
8
  from ailment.expression import (
9
9
  Register,
10
10
  Convert,
@@ -1207,7 +1207,7 @@ class AILSimplifier(Analysis):
1207
1207
  continue
1208
1208
 
1209
1209
  for idx, stmt in enumerate(block.statements):
1210
- if idx in stmts_to_remove:
1210
+ if idx in stmts_to_remove and not isinstance(stmt, DirtyStatement):
1211
1211
  if isinstance(stmt, (Assignment, Store)):
1212
1212
  # Skip Assignment and Store statements
1213
1213
  # if this statement triggers a call, it should only be removed if it's in self._calls_to_remove
@@ -15,8 +15,15 @@ from ...analyses.propagator import PropagatorAnalysis
15
15
  from ...analyses.reaching_definitions import ReachingDefinitionsAnalysis
16
16
  from ...errors import SimMemoryMissingError
17
17
  from .. import Analysis, register_analysis
18
- from .peephole_optimizations import STMT_OPTS, EXPR_OPTS, PeepholeOptimizationStmtBase, PeepholeOptimizationExprBase
19
- from .utils import peephole_optimize_exprs, peephole_optimize_stmts
18
+ from .peephole_optimizations import (
19
+ MULTI_STMT_OPTS,
20
+ STMT_OPTS,
21
+ EXPR_OPTS,
22
+ PeepholeOptimizationStmtBase,
23
+ PeepholeOptimizationExprBase,
24
+ PeepholeOptimizationMultiStmtBase,
25
+ )
26
+ from .utils import peephole_optimize_exprs, peephole_optimize_stmts, peephole_optimize_multistmts
20
27
 
21
28
  if TYPE_CHECKING:
22
29
  from angr.storage.memory_mixins.paged_memory.pages.multi_values import MultiValues
@@ -78,6 +85,7 @@ class BlockSimplifier(Analysis):
78
85
  if peephole_optimizations is None:
79
86
  self._expr_peephole_opts = [cls(self.project, self.kb, self.func_addr) for cls in EXPR_OPTS]
80
87
  self._stmt_peephole_opts = [cls(self.project, self.kb, self.func_addr) for cls in STMT_OPTS]
88
+ self._multistmt_peephole_opts = [cls(self.project, self.kb, self.func_addr) for cls in MULTI_STMT_OPTS]
81
89
  else:
82
90
  self._expr_peephole_opts = [
83
91
  cls(self.project, self.kb, self.func_addr)
@@ -89,6 +97,11 @@ class BlockSimplifier(Analysis):
89
97
  for cls in peephole_optimizations
90
98
  if issubclass(cls, PeepholeOptimizationStmtBase)
91
99
  ]
100
+ self._multistmt_peephole_opts = [
101
+ cls(self.project, self.kb, self.func_addr)
102
+ for cls in peephole_optimizations
103
+ if issubclass(cls, PeepholeOptimizationMultiStmtBase)
104
+ ]
92
105
 
93
106
  self.result_block = None
94
107
 
@@ -404,9 +417,16 @@ class BlockSimplifier(Analysis):
404
417
  # run statement-level optimizations
405
418
  statements, stmts_updated = peephole_optimize_stmts(block, self._stmt_peephole_opts)
406
419
 
407
- if not stmts_updated:
408
- return block
409
- new_block = block.copy(statements=statements)
420
+ if stmts_updated:
421
+ new_block = block.copy(statements=statements)
422
+ else:
423
+ new_block = block
424
+
425
+ statements, multi_stmts_updated = peephole_optimize_multistmts(new_block, self._multistmt_peephole_opts)
426
+
427
+ if not multi_stmts_updated:
428
+ return new_block
429
+ new_block = new_block.copy(statements=statements)
410
430
  return new_block
411
431
 
412
432
 
@@ -20,6 +20,7 @@ from ...sim_type import (
20
20
  SimTypeFunction,
21
21
  SimTypeBottom,
22
22
  SimTypeFloat,
23
+ SimTypePointer,
23
24
  )
24
25
  from ...sim_variable import SimVariable, SimStackVariable, SimRegisterVariable, SimMemoryVariable
25
26
  from ...knowledge_plugins.key_definitions.constants import OP_BEFORE
@@ -1247,23 +1248,32 @@ class Clinic(Analysis):
1247
1248
  expr.variable_offset = offset
1248
1249
 
1249
1250
  elif isinstance(expr, ailment.Expr.Const):
1250
- # global variable?
1251
- global_vars = global_variables.get_global_variables(expr.value)
1252
- if not global_vars:
1253
- # detect if there is a related symbol
1254
- if self.project.loader.find_object_containing(expr.value):
1255
- symbol = self.project.loader.find_symbol(expr.value)
1256
- if symbol is not None:
1257
- # Create a new global variable if there isn't one already
1258
- global_vars = global_variables.get_global_variables(symbol.rebased_addr)
1259
- if not global_vars:
1260
- global_var = SimMemoryVariable(symbol.rebased_addr, symbol.size, name=symbol.name)
1261
- global_variables.add_variable("global", global_var.addr, global_var)
1262
- global_vars = {global_var}
1263
- if global_vars:
1264
- global_var = next(iter(global_vars))
1265
- expr.tags["reference_variable"] = global_var
1266
- expr.tags["reference_variable_offset"] = 0
1251
+ # custom string?
1252
+ if hasattr(expr, "custom_string") and expr.custom_string is True:
1253
+ s = self.kb.custom_strings[expr.value]
1254
+ expr.tags["reference_values"] = {
1255
+ SimTypePointer(SimTypeChar().with_arch(self.project.arch)).with_arch(self.project.arch): s.decode(
1256
+ "ascii"
1257
+ ),
1258
+ }
1259
+ else:
1260
+ # global variable?
1261
+ global_vars = global_variables.get_global_variables(expr.value)
1262
+ if not global_vars:
1263
+ # detect if there is a related symbol
1264
+ if self.project.loader.find_object_containing(expr.value):
1265
+ symbol = self.project.loader.find_symbol(expr.value)
1266
+ if symbol is not None:
1267
+ # Create a new global variable if there isn't one already
1268
+ global_vars = global_variables.get_global_variables(symbol.rebased_addr)
1269
+ if not global_vars:
1270
+ global_var = SimMemoryVariable(symbol.rebased_addr, symbol.size, name=symbol.name)
1271
+ global_variables.add_variable("global", global_var.addr, global_var)
1272
+ global_vars = {global_var}
1273
+ if global_vars:
1274
+ global_var = next(iter(global_vars))
1275
+ expr.tags["reference_variable"] = global_var
1276
+ expr.tags["reference_variable_offset"] = 0
1267
1277
 
1268
1278
  elif isinstance(expr, ailment.Stmt.Call):
1269
1279
  self._link_variables_on_call(variable_manager, global_variables, block, stmt_idx, expr, is_expr=True)
@@ -50,6 +50,8 @@ def get_optimization_passes(arch, platform):
50
50
 
51
51
  if platform is not None:
52
52
  platform = platform.lower()
53
+ if platform == "win32":
54
+ platform = "windows" # sigh
53
55
 
54
56
  passes = []
55
57
  for pass_, _ in _all_optimization_passes:
@@ -89,7 +89,7 @@ class SimplifierAILEngine(
89
89
  if hasattr(self, handler):
90
90
  return getattr(self, handler)(stmt)
91
91
  else:
92
- _l.warning("Unsupported statement type %s.", type(stmt).__name__)
92
+ _l.debug("Unsupported statement type %s.", type(stmt).__name__)
93
93
  return stmt
94
94
 
95
95
  def _ail_handle_Assignment(self, stmt):
@@ -176,7 +176,7 @@ class SimplifierAILEngine(
176
176
  if v is None:
177
177
  return expr
178
178
  return v
179
- _l.warning("Unsupported expression type %s.", type(expr).__name__)
179
+ _l.debug("Unsupported expression type %s.", type(expr).__name__)
180
180
  return expr
181
181
 
182
182
  def _ail_handle_StackBaseOffset(self, expr): # pylint:disable=no-self-use
@@ -84,9 +84,9 @@ class ITERegionConverter(OptimizationPass):
84
84
 
85
85
  true_child, false_child = None, None
86
86
  for child in children:
87
- if child.addr == if_stmt.true_target.value:
87
+ if if_stmt.true_target is not None and child.addr == if_stmt.true_target.value:
88
88
  true_child = child
89
- elif child.addr == if_stmt.false_target.value:
89
+ elif if_stmt.false_target is not None and child.addr == if_stmt.false_target.value:
90
90
  false_child = child
91
91
 
92
92
  if (
@@ -3,6 +3,7 @@ from typing import Set, Dict
3
3
  from collections import defaultdict
4
4
  import logging
5
5
 
6
+ import capstone
6
7
  import ailment
7
8
  import cle
8
9
 
@@ -48,24 +49,24 @@ class WinStackCanarySimplifier(OptimizationPass):
48
49
  return False, None
49
50
 
50
51
  # Check the first block and see if there is any statement reading data from _security_cookie
51
- init_stmt = self._find_canary_init_stmt()
52
+ init_stmts = self._find_canary_init_stmt()
52
53
 
53
- return init_stmt is not None, {"init_stmt": init_stmt}
54
+ return init_stmts is not None, {"init_stmts": init_stmts}
54
55
 
55
56
  def _analyze(self, cache=None):
56
- init_stmt = None
57
+ init_stmts = None
57
58
  if cache is not None:
58
- init_stmt = cache.get("init_stmt", None)
59
+ init_stmts = cache.get("init_stmts", None)
59
60
 
60
- if init_stmt is None:
61
- init_stmt = self._find_canary_init_stmt()
61
+ if init_stmts is None:
62
+ init_stmts = self._find_canary_init_stmt()
62
63
 
63
- if init_stmt is None:
64
+ if init_stmts is None:
64
65
  return
65
66
 
66
67
  # Look for the statement that loads back canary value from the stack
67
- first_block, canary_init_stmt_idx = init_stmt
68
- canary_init_stmt = first_block.statements[canary_init_stmt_idx]
68
+ first_block, canary_init_stmt_ids = init_stmts
69
+ canary_init_stmt = first_block.statements[canary_init_stmt_ids[-1]]
69
70
  # where is the stack canary stored?
70
71
  if not isinstance(canary_init_stmt.addr, ailment.Expr.StackBaseOffset):
71
72
  _l.debug(
@@ -142,7 +143,8 @@ class WinStackCanarySimplifier(OptimizationPass):
142
143
  if found_endpoints:
143
144
  # Remove the statement that loads the stack canary from fs
144
145
  first_block_copy = first_block.copy()
145
- first_block_copy.statements.pop(canary_init_stmt_idx)
146
+ for stmt_idx in sorted(canary_init_stmt_ids, reverse=True):
147
+ first_block_copy.statements.pop(stmt_idx)
146
148
  self._update_block(first_block, first_block_copy)
147
149
 
148
150
  def _find_canary_init_stmt(self):
@@ -150,7 +152,13 @@ class WinStackCanarySimplifier(OptimizationPass):
150
152
  if first_block is None:
151
153
  return None
152
154
 
155
+ load_stmt_idx = None
156
+ load_reg = None
157
+ xor_stmt_idx = None
158
+ xored_reg = None
159
+
153
160
  for idx, stmt in enumerate(first_block.statements):
161
+ # if we are lucky and things get folded into one statement:
154
162
  if (
155
163
  isinstance(stmt, ailment.Stmt.Store)
156
164
  and isinstance(stmt.addr, ailment.Expr.StackBaseOffset)
@@ -163,13 +171,51 @@ class WinStackCanarySimplifier(OptimizationPass):
163
171
  # Check addr: must be __security_cookie
164
172
  load_addr = stmt.data.operands[0].addr.value
165
173
  if load_addr == self._security_cookie_addr:
166
- return first_block, idx
174
+ return first_block, [idx]
175
+ # or if we are unlucky and the load and the xor are two different statements
176
+ if (
177
+ isinstance(stmt, ailment.Stmt.Assignment)
178
+ and isinstance(stmt.dst, ailment.Expr.Register)
179
+ and isinstance(stmt.src, ailment.Expr.Load)
180
+ and isinstance(stmt.src.addr, ailment.Expr.Const)
181
+ ):
182
+ load_addr = stmt.src.addr.value
183
+ if load_addr == self._security_cookie_addr:
184
+ load_stmt_idx = idx
185
+ load_reg = stmt.dst.reg_offset
186
+ if load_stmt_idx is not None and idx == load_stmt_idx + 1:
187
+ if (
188
+ isinstance(stmt, ailment.Stmt.Assignment)
189
+ and isinstance(stmt.dst, ailment.Expr.Register)
190
+ and isinstance(stmt.src, ailment.Expr.BinaryOp)
191
+ and stmt.src.op == "Xor"
192
+ and isinstance(stmt.src.operands[0], ailment.Expr.Register)
193
+ and stmt.src.operands[0].reg_offset == load_reg
194
+ and isinstance(stmt.src.operands[1], ailment.Expr.StackBaseOffset)
195
+ ):
196
+ xor_stmt_idx = idx
197
+ xored_reg = stmt.dst.reg_offset
198
+ else:
199
+ break
200
+ if xor_stmt_idx is not None and idx == xor_stmt_idx + 1:
201
+ if (
202
+ isinstance(stmt, ailment.Stmt.Store)
203
+ and isinstance(stmt.addr, ailment.Expr.StackBaseOffset)
204
+ and isinstance(stmt.data, ailment.Expr.Register)
205
+ and stmt.data.reg_offset == xored_reg
206
+ ):
207
+ return first_block, [load_stmt_idx, xor_stmt_idx, idx]
208
+ else:
209
+ break
167
210
 
168
211
  return None
169
212
 
170
213
  @staticmethod
171
214
  def _find_amd64_canary_storing_stmt(block, canary_value_stack_offset):
215
+ load_stmt_idx = None
216
+
172
217
  for idx, stmt in enumerate(block.statements):
218
+ # when we are lucky, we have one instruction
173
219
  if (
174
220
  isinstance(stmt, ailment.Stmt.Assignment)
175
221
  and isinstance(stmt.dst, ailment.Expr.Register)
@@ -185,7 +231,29 @@ class WinStackCanarySimplifier(OptimizationPass):
185
231
  if isinstance(op1, ailment.Expr.StackBaseOffset):
186
232
  # found it
187
233
  return idx
188
-
234
+ # or when we are unlucky, we have two instructions...
235
+ if (
236
+ isinstance(stmt, ailment.Stmt.Assignment)
237
+ and isinstance(stmt.dst, ailment.Expr.Register)
238
+ and stmt.dst.reg_name == "rcx"
239
+ and isinstance(stmt.src, ailment.Expr.Load)
240
+ and isinstance(stmt.src.addr, ailment.Expr.StackBaseOffset)
241
+ and stmt.src.addr.offset == canary_value_stack_offset
242
+ ):
243
+ load_stmt_idx = idx
244
+ if load_stmt_idx is not None and idx == load_stmt_idx + 1:
245
+ if (
246
+ isinstance(stmt, ailment.Stmt.Assignment)
247
+ and isinstance(stmt.dst, ailment.Expr.Register)
248
+ and isinstance(stmt.src, ailment.Expr.BinaryOp)
249
+ and stmt.src.op == "Xor"
250
+ ):
251
+ if (
252
+ isinstance(stmt.src.operands[0], ailment.Expr.Register)
253
+ and stmt.src.operands[0].reg_name == "rcx"
254
+ and isinstance(stmt.src.operands[1], ailment.Expr.StackBaseOffset)
255
+ ):
256
+ return idx
189
257
  return None
190
258
 
191
259
  @staticmethod
@@ -200,6 +268,29 @@ class WinStackCanarySimplifier(OptimizationPass):
200
268
  return idx
201
269
  return None
202
270
 
271
+ def _is_function_likely_security_check_cookie(self, func) -> bool:
272
+ # disassemble the first instruction
273
+ if func.is_plt or func.is_syscall or func.is_simprocedure:
274
+ return False
275
+ block = self.project.factory.block(func.addr)
276
+ if block.instructions != 2:
277
+ return False
278
+ ins0 = block.capstone.insns[0]
279
+ if (
280
+ ins0.mnemonic == "cmp"
281
+ and len(ins0.operands) == 2
282
+ and ins0.operands[0].type == capstone.x86.X86_OP_REG
283
+ and ins0.operands[0].reg == capstone.x86.X86_REG_RCX
284
+ and ins0.operands[1].type == capstone.x86.X86_OP_MEM
285
+ and ins0.operands[1].mem.base == capstone.x86.X86_REG_RIP
286
+ and ins0.operands[1].mem.index == 0
287
+ and ins0.operands[1].mem.disp + ins0.address + ins0.size == self._security_cookie_addr
288
+ ):
289
+ ins1 = block.capstone.insns[1]
290
+ if ins1.mnemonic == "jne":
291
+ return True
292
+ return False
293
+
203
294
  def _find_stmt_calling_security_check_cookie(self, node):
204
295
  for idx, stmt in enumerate(node.statements):
205
296
  if isinstance(stmt, ailment.Stmt.Call) and isinstance(stmt.target, ailment.Expr.Const):
@@ -208,5 +299,7 @@ class WinStackCanarySimplifier(OptimizationPass):
208
299
  func = self.kb.functions.function(addr=const_target)
209
300
  if func.name == "_security_check_cookie":
210
301
  return idx
302
+ elif self._is_function_likely_security_check_cookie(func):
303
+ return idx
211
304
 
212
305
  return None
@@ -40,10 +40,12 @@ from .sar_to_signed_div import SarToSignedDiv
40
40
  from .tidy_stack_addr import TidyStackAddr
41
41
  from .invert_negated_logical_conjuction_disjunction import InvertNegatedLogicalConjunctionsAndDisjunctions
42
42
  from .rol_ror import RolRorRewriter
43
+ from .inlined_strcpy import InlinedStrcpy
44
+ from .inlined_strcpy_consolidation import InlinedStrcpyConsolidation
43
45
 
44
- from .base import PeepholeOptimizationExprBase, PeepholeOptimizationStmtBase
45
-
46
+ from .base import PeepholeOptimizationExprBase, PeepholeOptimizationStmtBase, PeepholeOptimizationMultiStmtBase
46
47
 
48
+ MULTI_STMT_OPTS: List[Type[PeepholeOptimizationMultiStmtBase]] = []
47
49
  STMT_OPTS: List[Type[PeepholeOptimizationStmtBase]] = []
48
50
  EXPR_OPTS: List[Type[PeepholeOptimizationExprBase]] = []
49
51
 
@@ -55,4 +57,11 @@ for v in _g.values():
55
57
  if isinstance(v, type) and issubclass(v, PeepholeOptimizationStmtBase) and v is not PeepholeOptimizationStmtBase:
56
58
  STMT_OPTS.append(v)
57
59
 
60
+ if (
61
+ isinstance(v, type)
62
+ and issubclass(v, PeepholeOptimizationMultiStmtBase)
63
+ and v is not PeepholeOptimizationMultiStmtBase
64
+ ):
65
+ MULTI_STMT_OPTS.append(v)
66
+
58
67
  _g = None
@@ -1,7 +1,7 @@
1
- from typing import Optional
1
+ from typing import List, Optional
2
2
 
3
3
  from ailment.expression import BinaryOp, UnaryOp, Expression
4
- from ailment.statement import Assignment
4
+ from ailment.statement import Statement, Assignment
5
5
  from ailment import Block
6
6
  from angr.project import Project
7
7
  from angr.knowledge_base import KnowledgeBase
@@ -34,6 +34,33 @@ class PeepholeOptimizationStmtBase:
34
34
  raise NotImplementedError("_optimize() is not implemented.")
35
35
 
36
36
 
37
+ class PeepholeOptimizationMultiStmtBase:
38
+ """
39
+ The base class for all peephole optimizations that are applied on multiple AIL statements at once.
40
+ """
41
+
42
+ __slots__ = (
43
+ "project",
44
+ "kb",
45
+ "func_addr",
46
+ )
47
+ project: Optional[Project]
48
+ kb: Optional[KnowledgeBase]
49
+ func_addr: Optional[int]
50
+
51
+ NAME = "Peephole Optimization - Multi-statement"
52
+ DESCRIPTION = "Peephole Optimization - Multi-statement"
53
+ stmt_classes = None
54
+
55
+ def __init__(self, project: Optional[Project], kb: Optional[KnowledgeBase], func_addr: Optional[int] = None):
56
+ self.project = project
57
+ self.kb = kb
58
+ self.func_addr = func_addr
59
+
60
+ def optimize(self, stmts: List[Statement], stmt_idx: Optional[int] = None, block=None, **kwargs):
61
+ raise NotImplementedError("_optimize() is not implemented.")
62
+
63
+
37
64
  class PeepholeOptimizationExprBase:
38
65
  """
39
66
  The base class for all peephole optimizations that are applied on AIL expressions.
@@ -20,7 +20,7 @@ class ConstantDereferences(PeepholeOptimizationExprBase):
20
20
  if sec is not None and sec.is_readable and (not sec.is_writable or "got" in sec.name):
21
21
  # do we know the value that it's reading?
22
22
  try:
23
- val = self.project.loader.memory.unpack_word(expr.addr.value, size=self.project.arch.bytes)
23
+ val = self.project.loader.memory.unpack_word(expr.addr.value, size=expr.size)
24
24
  except KeyError:
25
25
  return expr
26
26
 
@@ -0,0 +1,83 @@
1
+ # pylint:disable=arguments-differ
2
+ from typing import Tuple, Optional
3
+ import string
4
+
5
+ from archinfo import Endness
6
+
7
+ from ailment.expression import Const
8
+ from ailment.statement import Call, Store
9
+
10
+ from .base import PeepholeOptimizationStmtBase
11
+
12
+
13
+ ASCII_PRINTABLES = set(string.printable)
14
+ ASCII_DIGITS = set(string.digits)
15
+
16
+
17
+ class InlinedStrcpy(PeepholeOptimizationStmtBase):
18
+ """
19
+ Simplifies inlined string copying logic into calls to strcpy.
20
+ """
21
+
22
+ __slots__ = ()
23
+
24
+ NAME = "Simplifying inlined strcpy"
25
+ stmt_classes = (Store,)
26
+
27
+ def optimize(self, stmt: Store, **kwargs):
28
+ if isinstance(stmt.data, Const):
29
+ r, s = self.is_integer_likely_a_string(stmt.data.value, stmt.data.size, stmt.endness)
30
+ if r:
31
+ # replace it with a call to strncpy
32
+ str_id = self.kb.custom_strings.allocate(s.encode("ascii"))
33
+ return Call(
34
+ stmt.idx,
35
+ "strncpy",
36
+ args=[
37
+ stmt.addr,
38
+ Const(None, None, str_id, stmt.addr.bits, custom_string=True),
39
+ Const(None, None, len(s), self.project.arch.bits),
40
+ ],
41
+ **stmt.tags,
42
+ )
43
+
44
+ return None
45
+
46
+ @staticmethod
47
+ def is_integer_likely_a_string(
48
+ v: int, size: int, endness: Endness, min_length: int = 4
49
+ ) -> Tuple[bool, Optional[str]]:
50
+ # we need at least four bytes of printable characters
51
+
52
+ chars = []
53
+ if endness == Endness.LE:
54
+ while v != 0:
55
+ byt = v & 0xFF
56
+ if chr(byt) not in ASCII_PRINTABLES:
57
+ return False, None
58
+ chars.append(chr(byt))
59
+ v >>= 8
60
+
61
+ elif endness == Endness.BE:
62
+ first_non_zero = False
63
+ for _ in range(size):
64
+ byt = v & 0xFF
65
+ v >>= 8
66
+ if byt == 0:
67
+ if first_non_zero:
68
+ return False, None
69
+ continue
70
+ first_non_zero = True # this is the first non-zero byte
71
+ if chr(byt) not in ASCII_PRINTABLES:
72
+ return False, None
73
+ chars.append(chr(byt))
74
+ chars = chars[::-1]
75
+ else:
76
+ # unsupported endness
77
+ return False, None
78
+
79
+ if len(chars) >= min_length:
80
+ if len(chars) <= 4 and all(ch in ASCII_DIGITS for ch in chars):
81
+ return False, None
82
+ return True, "".join(chars)
83
+ return False, None