angr 9.2.77__py3-none-win_amd64.whl → 9.2.79__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/__main__.py +34 -0
- angr/analyses/calling_convention.py +15 -12
- angr/analyses/cfg/cfg_fast.py +12 -0
- angr/analyses/complete_calling_conventions.py +5 -2
- angr/analyses/decompiler/ail_simplifier.py +2 -2
- angr/analyses/decompiler/block_simplifier.py +25 -5
- angr/analyses/decompiler/clinic.py +27 -17
- angr/analyses/decompiler/optimization_passes/__init__.py +2 -0
- angr/analyses/decompiler/optimization_passes/engine_base.py +2 -2
- angr/analyses/decompiler/optimization_passes/ite_region_converter.py +2 -2
- angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +105 -12
- angr/analyses/decompiler/peephole_optimizations/__init__.py +11 -2
- angr/analyses/decompiler/peephole_optimizations/base.py +29 -2
- angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +1 -1
- angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +83 -0
- angr/analyses/decompiler/peephole_optimizations/inlined_strcpy_consolidation.py +103 -0
- angr/analyses/decompiler/structured_codegen/c.py +20 -4
- angr/analyses/decompiler/utils.py +128 -2
- angr/analyses/disassembly.py +8 -1
- angr/analyses/propagator/engine_ail.py +9 -2
- angr/analyses/proximity_graph.py +30 -0
- angr/analyses/variable_recovery/engine_ail.py +1 -1
- angr/analyses/variable_recovery/engine_vex.py +10 -1
- angr/blade.py +14 -2
- angr/block.py +4 -0
- angr/knowledge_plugins/__init__.py +1 -0
- angr/knowledge_plugins/custom_strings.py +40 -0
- angr/knowledge_plugins/functions/function.py +58 -38
- angr/knowledge_plugins/key_definitions/live_definitions.py +1 -1
- angr/knowledge_plugins/propagations/prop_value.py +6 -2
- angr/knowledge_plugins/variables/variable_manager.py +1 -1
- angr/lib/angr_native.dll +0 -0
- angr/sim_state.py +0 -2
- angr/sim_type.py +3 -0
- angr/storage/memory_mixins/__init__.pyi +49 -0
- angr/storage/memory_mixins/paged_memory/pages/multi_values.py +7 -1
- angr/utils/graph.py +20 -4
- {angr-9.2.77.dist-info → angr-9.2.79.dist-info}/METADATA +6 -6
- {angr-9.2.77.dist-info → angr-9.2.79.dist-info}/RECORD +46 -40
- {angr-9.2.77.dist-info → angr-9.2.79.dist-info}/WHEEL +1 -1
- angr-9.2.79.dist-info/entry_points.txt +2 -0
- tests/analyses/cfg/test_cfgemulated.py +1 -1
- tests/storage/test_multivalues.py +18 -0
- {angr-9.2.77.dist-info → angr-9.2.79.dist-info}/LICENSE +0 -0
- {angr-9.2.77.dist-info → angr-9.2.79.dist-info}/top_level.txt +0 -0
angr/__init__.py
CHANGED
angr/__main__.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
|
|
3
|
+
from angr.analyses.decompiler.structuring import STRUCTURER_CLASSES
|
|
4
|
+
from angr.analyses.decompiler.utils import decompile_functions
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class COMMANDS:
|
|
8
|
+
"""
|
|
9
|
+
The commands that the angr CLI supports.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
DECOMPILE = "decompile"
|
|
13
|
+
ALL_COMMANDS = [DECOMPILE]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def main():
|
|
17
|
+
parser = argparse.ArgumentParser(description="The angr CLI allows you to decompile and analyze binaries.")
|
|
18
|
+
parser.add_argument("command", help="The command to run", choices=COMMANDS.ALL_COMMANDS)
|
|
19
|
+
parser.add_argument("binary", help="The path to the binary to analyze")
|
|
20
|
+
parser.add_argument("--functions", help="The functions to analyze", nargs="+")
|
|
21
|
+
parser.add_argument(
|
|
22
|
+
"--structurer", help="The structurer to use", choices=STRUCTURER_CLASSES.keys(), default="phoenix"
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
args = parser.parse_args()
|
|
26
|
+
if args.command == COMMANDS.DECOMPILE:
|
|
27
|
+
decompilation = decompile_functions(args.binary, functions=args.functions, structurer=args.structurer)
|
|
28
|
+
print(decompilation)
|
|
29
|
+
else:
|
|
30
|
+
parser.print_help()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
if __name__ == "__main__":
|
|
34
|
+
main()
|
|
@@ -568,20 +568,23 @@ class CallingConventionAnalysis(Analysis):
|
|
|
568
568
|
continue
|
|
569
569
|
defs_by_reg_offset[d.offset].append(d)
|
|
570
570
|
defined_reg_offsets = set(defs_by_reg_offset.keys())
|
|
571
|
+
sp_offset = 0
|
|
571
572
|
if self.project.arch.bits in {32, 64}:
|
|
572
|
-
# Calculate the
|
|
573
|
+
# Calculate the offsets between sp and stack defs
|
|
573
574
|
sp_offset = state.get_sp_offset()
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
575
|
+
if sp_offset is None:
|
|
576
|
+
# We can not find the sp_offset when sp is concrete
|
|
577
|
+
# e.g.,
|
|
578
|
+
# LDR R2, =0x20070000
|
|
579
|
+
# STR R1, [R3,#0x38]
|
|
580
|
+
# MOV SP, R2
|
|
581
|
+
# In this case, just assume sp_offset = 0
|
|
582
|
+
sp_offset = 0
|
|
583
|
+
defs_by_stack_offset = {
|
|
584
|
+
d.atom.addr.offset - sp_offset: d
|
|
585
|
+
for d in all_stack_defs
|
|
586
|
+
if isinstance(d.atom, MemoryLocation) and isinstance(d.atom.addr, SpOffset)
|
|
587
|
+
}
|
|
585
588
|
|
|
586
589
|
default_type_cls = SimTypeInt if self.project.arch.bits == 32 else SimTypeLongLong
|
|
587
590
|
arg_session = cc.arg_session(default_type_cls().with_arch(self.project.arch))
|
angr/analyses/cfg/cfg_fast.py
CHANGED
|
@@ -4586,6 +4586,18 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
4586
4586
|
):
|
|
4587
4587
|
rbp_as_gpr = False
|
|
4588
4588
|
break
|
|
4589
|
+
elif (
|
|
4590
|
+
insn.mnemonic == "lea"
|
|
4591
|
+
and len(insn.operands) == 2
|
|
4592
|
+
and insn.operands[0].type == capstone.x86.X86_OP_REG
|
|
4593
|
+
and insn.operands[1].type == capstone.x86.X86_OP_MEM
|
|
4594
|
+
):
|
|
4595
|
+
if (
|
|
4596
|
+
insn.operands[0].reg == capstone.x86.X86_REG_RBP
|
|
4597
|
+
and insn.operands[1].mem.base == capstone.x86.X86_REG_RSP
|
|
4598
|
+
):
|
|
4599
|
+
rbp_as_gpr = False
|
|
4600
|
+
break
|
|
4589
4601
|
func = self.kb.functions.get_by_addr(func_addr)
|
|
4590
4602
|
func.info["bp_as_gpr"] = rbp_as_gpr
|
|
4591
4603
|
|
|
@@ -5,6 +5,8 @@ import time
|
|
|
5
5
|
import logging
|
|
6
6
|
from collections import defaultdict
|
|
7
7
|
|
|
8
|
+
import networkx
|
|
9
|
+
|
|
8
10
|
import claripy
|
|
9
11
|
|
|
10
12
|
from angr.utils.graph import GraphUtils
|
|
@@ -13,7 +15,6 @@ from ..knowledge_plugins.cfg import CFGModel
|
|
|
13
15
|
from . import Analysis, register_analysis, VariableRecoveryFast, CallingConventionAnalysis
|
|
14
16
|
|
|
15
17
|
if TYPE_CHECKING:
|
|
16
|
-
import networkx
|
|
17
18
|
from angr.calling_conventions import SimCC
|
|
18
19
|
from angr.sim_type import SimTypeFunction
|
|
19
20
|
from angr.knowledge_plugins.variables.variable_manager import VariableManagerInternal
|
|
@@ -104,7 +105,9 @@ class CompleteCallingConventionsAnalysis(Analysis):
|
|
|
104
105
|
"""
|
|
105
106
|
|
|
106
107
|
# get an ordering of functions based on the call graph
|
|
107
|
-
|
|
108
|
+
# note that the call graph is a multi-digraph. we convert it to a digraph to speed up topological sort
|
|
109
|
+
directed_callgraph = networkx.DiGraph(self.kb.functions.callgraph)
|
|
110
|
+
sorted_funcs = GraphUtils.quasi_topological_sort_nodes(directed_callgraph)
|
|
108
111
|
|
|
109
112
|
total_funcs = 0
|
|
110
113
|
for func_addr in reversed(sorted_funcs):
|
|
@@ -4,7 +4,7 @@ import logging
|
|
|
4
4
|
|
|
5
5
|
from ailment import AILBlockWalker
|
|
6
6
|
from ailment.block import Block
|
|
7
|
-
from ailment.statement import Statement, Assignment, Store, Call, ConditionalJump
|
|
7
|
+
from ailment.statement import Statement, Assignment, Store, Call, ConditionalJump, DirtyStatement
|
|
8
8
|
from ailment.expression import (
|
|
9
9
|
Register,
|
|
10
10
|
Convert,
|
|
@@ -1207,7 +1207,7 @@ class AILSimplifier(Analysis):
|
|
|
1207
1207
|
continue
|
|
1208
1208
|
|
|
1209
1209
|
for idx, stmt in enumerate(block.statements):
|
|
1210
|
-
if idx in stmts_to_remove:
|
|
1210
|
+
if idx in stmts_to_remove and not isinstance(stmt, DirtyStatement):
|
|
1211
1211
|
if isinstance(stmt, (Assignment, Store)):
|
|
1212
1212
|
# Skip Assignment and Store statements
|
|
1213
1213
|
# if this statement triggers a call, it should only be removed if it's in self._calls_to_remove
|
|
@@ -15,8 +15,15 @@ from ...analyses.propagator import PropagatorAnalysis
|
|
|
15
15
|
from ...analyses.reaching_definitions import ReachingDefinitionsAnalysis
|
|
16
16
|
from ...errors import SimMemoryMissingError
|
|
17
17
|
from .. import Analysis, register_analysis
|
|
18
|
-
from .peephole_optimizations import
|
|
19
|
-
|
|
18
|
+
from .peephole_optimizations import (
|
|
19
|
+
MULTI_STMT_OPTS,
|
|
20
|
+
STMT_OPTS,
|
|
21
|
+
EXPR_OPTS,
|
|
22
|
+
PeepholeOptimizationStmtBase,
|
|
23
|
+
PeepholeOptimizationExprBase,
|
|
24
|
+
PeepholeOptimizationMultiStmtBase,
|
|
25
|
+
)
|
|
26
|
+
from .utils import peephole_optimize_exprs, peephole_optimize_stmts, peephole_optimize_multistmts
|
|
20
27
|
|
|
21
28
|
if TYPE_CHECKING:
|
|
22
29
|
from angr.storage.memory_mixins.paged_memory.pages.multi_values import MultiValues
|
|
@@ -78,6 +85,7 @@ class BlockSimplifier(Analysis):
|
|
|
78
85
|
if peephole_optimizations is None:
|
|
79
86
|
self._expr_peephole_opts = [cls(self.project, self.kb, self.func_addr) for cls in EXPR_OPTS]
|
|
80
87
|
self._stmt_peephole_opts = [cls(self.project, self.kb, self.func_addr) for cls in STMT_OPTS]
|
|
88
|
+
self._multistmt_peephole_opts = [cls(self.project, self.kb, self.func_addr) for cls in MULTI_STMT_OPTS]
|
|
81
89
|
else:
|
|
82
90
|
self._expr_peephole_opts = [
|
|
83
91
|
cls(self.project, self.kb, self.func_addr)
|
|
@@ -89,6 +97,11 @@ class BlockSimplifier(Analysis):
|
|
|
89
97
|
for cls in peephole_optimizations
|
|
90
98
|
if issubclass(cls, PeepholeOptimizationStmtBase)
|
|
91
99
|
]
|
|
100
|
+
self._multistmt_peephole_opts = [
|
|
101
|
+
cls(self.project, self.kb, self.func_addr)
|
|
102
|
+
for cls in peephole_optimizations
|
|
103
|
+
if issubclass(cls, PeepholeOptimizationMultiStmtBase)
|
|
104
|
+
]
|
|
92
105
|
|
|
93
106
|
self.result_block = None
|
|
94
107
|
|
|
@@ -404,9 +417,16 @@ class BlockSimplifier(Analysis):
|
|
|
404
417
|
# run statement-level optimizations
|
|
405
418
|
statements, stmts_updated = peephole_optimize_stmts(block, self._stmt_peephole_opts)
|
|
406
419
|
|
|
407
|
-
if
|
|
408
|
-
|
|
409
|
-
|
|
420
|
+
if stmts_updated:
|
|
421
|
+
new_block = block.copy(statements=statements)
|
|
422
|
+
else:
|
|
423
|
+
new_block = block
|
|
424
|
+
|
|
425
|
+
statements, multi_stmts_updated = peephole_optimize_multistmts(new_block, self._multistmt_peephole_opts)
|
|
426
|
+
|
|
427
|
+
if not multi_stmts_updated:
|
|
428
|
+
return new_block
|
|
429
|
+
new_block = new_block.copy(statements=statements)
|
|
410
430
|
return new_block
|
|
411
431
|
|
|
412
432
|
|
|
@@ -20,6 +20,7 @@ from ...sim_type import (
|
|
|
20
20
|
SimTypeFunction,
|
|
21
21
|
SimTypeBottom,
|
|
22
22
|
SimTypeFloat,
|
|
23
|
+
SimTypePointer,
|
|
23
24
|
)
|
|
24
25
|
from ...sim_variable import SimVariable, SimStackVariable, SimRegisterVariable, SimMemoryVariable
|
|
25
26
|
from ...knowledge_plugins.key_definitions.constants import OP_BEFORE
|
|
@@ -1247,23 +1248,32 @@ class Clinic(Analysis):
|
|
|
1247
1248
|
expr.variable_offset = offset
|
|
1248
1249
|
|
|
1249
1250
|
elif isinstance(expr, ailment.Expr.Const):
|
|
1250
|
-
#
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1251
|
+
# custom string?
|
|
1252
|
+
if hasattr(expr, "custom_string") and expr.custom_string is True:
|
|
1253
|
+
s = self.kb.custom_strings[expr.value]
|
|
1254
|
+
expr.tags["reference_values"] = {
|
|
1255
|
+
SimTypePointer(SimTypeChar().with_arch(self.project.arch)).with_arch(self.project.arch): s.decode(
|
|
1256
|
+
"ascii"
|
|
1257
|
+
),
|
|
1258
|
+
}
|
|
1259
|
+
else:
|
|
1260
|
+
# global variable?
|
|
1261
|
+
global_vars = global_variables.get_global_variables(expr.value)
|
|
1262
|
+
if not global_vars:
|
|
1263
|
+
# detect if there is a related symbol
|
|
1264
|
+
if self.project.loader.find_object_containing(expr.value):
|
|
1265
|
+
symbol = self.project.loader.find_symbol(expr.value)
|
|
1266
|
+
if symbol is not None:
|
|
1267
|
+
# Create a new global variable if there isn't one already
|
|
1268
|
+
global_vars = global_variables.get_global_variables(symbol.rebased_addr)
|
|
1269
|
+
if not global_vars:
|
|
1270
|
+
global_var = SimMemoryVariable(symbol.rebased_addr, symbol.size, name=symbol.name)
|
|
1271
|
+
global_variables.add_variable("global", global_var.addr, global_var)
|
|
1272
|
+
global_vars = {global_var}
|
|
1273
|
+
if global_vars:
|
|
1274
|
+
global_var = next(iter(global_vars))
|
|
1275
|
+
expr.tags["reference_variable"] = global_var
|
|
1276
|
+
expr.tags["reference_variable_offset"] = 0
|
|
1267
1277
|
|
|
1268
1278
|
elif isinstance(expr, ailment.Stmt.Call):
|
|
1269
1279
|
self._link_variables_on_call(variable_manager, global_variables, block, stmt_idx, expr, is_expr=True)
|
|
@@ -89,7 +89,7 @@ class SimplifierAILEngine(
|
|
|
89
89
|
if hasattr(self, handler):
|
|
90
90
|
return getattr(self, handler)(stmt)
|
|
91
91
|
else:
|
|
92
|
-
_l.
|
|
92
|
+
_l.debug("Unsupported statement type %s.", type(stmt).__name__)
|
|
93
93
|
return stmt
|
|
94
94
|
|
|
95
95
|
def _ail_handle_Assignment(self, stmt):
|
|
@@ -176,7 +176,7 @@ class SimplifierAILEngine(
|
|
|
176
176
|
if v is None:
|
|
177
177
|
return expr
|
|
178
178
|
return v
|
|
179
|
-
_l.
|
|
179
|
+
_l.debug("Unsupported expression type %s.", type(expr).__name__)
|
|
180
180
|
return expr
|
|
181
181
|
|
|
182
182
|
def _ail_handle_StackBaseOffset(self, expr): # pylint:disable=no-self-use
|
|
@@ -84,9 +84,9 @@ class ITERegionConverter(OptimizationPass):
|
|
|
84
84
|
|
|
85
85
|
true_child, false_child = None, None
|
|
86
86
|
for child in children:
|
|
87
|
-
if child.addr == if_stmt.true_target.value:
|
|
87
|
+
if if_stmt.true_target is not None and child.addr == if_stmt.true_target.value:
|
|
88
88
|
true_child = child
|
|
89
|
-
elif child.addr == if_stmt.false_target.value:
|
|
89
|
+
elif if_stmt.false_target is not None and child.addr == if_stmt.false_target.value:
|
|
90
90
|
false_child = child
|
|
91
91
|
|
|
92
92
|
if (
|
|
@@ -3,6 +3,7 @@ from typing import Set, Dict
|
|
|
3
3
|
from collections import defaultdict
|
|
4
4
|
import logging
|
|
5
5
|
|
|
6
|
+
import capstone
|
|
6
7
|
import ailment
|
|
7
8
|
import cle
|
|
8
9
|
|
|
@@ -48,24 +49,24 @@ class WinStackCanarySimplifier(OptimizationPass):
|
|
|
48
49
|
return False, None
|
|
49
50
|
|
|
50
51
|
# Check the first block and see if there is any statement reading data from _security_cookie
|
|
51
|
-
|
|
52
|
+
init_stmts = self._find_canary_init_stmt()
|
|
52
53
|
|
|
53
|
-
return
|
|
54
|
+
return init_stmts is not None, {"init_stmts": init_stmts}
|
|
54
55
|
|
|
55
56
|
def _analyze(self, cache=None):
|
|
56
|
-
|
|
57
|
+
init_stmts = None
|
|
57
58
|
if cache is not None:
|
|
58
|
-
|
|
59
|
+
init_stmts = cache.get("init_stmts", None)
|
|
59
60
|
|
|
60
|
-
if
|
|
61
|
-
|
|
61
|
+
if init_stmts is None:
|
|
62
|
+
init_stmts = self._find_canary_init_stmt()
|
|
62
63
|
|
|
63
|
-
if
|
|
64
|
+
if init_stmts is None:
|
|
64
65
|
return
|
|
65
66
|
|
|
66
67
|
# Look for the statement that loads back canary value from the stack
|
|
67
|
-
first_block,
|
|
68
|
-
canary_init_stmt = first_block.statements[
|
|
68
|
+
first_block, canary_init_stmt_ids = init_stmts
|
|
69
|
+
canary_init_stmt = first_block.statements[canary_init_stmt_ids[-1]]
|
|
69
70
|
# where is the stack canary stored?
|
|
70
71
|
if not isinstance(canary_init_stmt.addr, ailment.Expr.StackBaseOffset):
|
|
71
72
|
_l.debug(
|
|
@@ -142,7 +143,8 @@ class WinStackCanarySimplifier(OptimizationPass):
|
|
|
142
143
|
if found_endpoints:
|
|
143
144
|
# Remove the statement that loads the stack canary from fs
|
|
144
145
|
first_block_copy = first_block.copy()
|
|
145
|
-
|
|
146
|
+
for stmt_idx in sorted(canary_init_stmt_ids, reverse=True):
|
|
147
|
+
first_block_copy.statements.pop(stmt_idx)
|
|
146
148
|
self._update_block(first_block, first_block_copy)
|
|
147
149
|
|
|
148
150
|
def _find_canary_init_stmt(self):
|
|
@@ -150,7 +152,13 @@ class WinStackCanarySimplifier(OptimizationPass):
|
|
|
150
152
|
if first_block is None:
|
|
151
153
|
return None
|
|
152
154
|
|
|
155
|
+
load_stmt_idx = None
|
|
156
|
+
load_reg = None
|
|
157
|
+
xor_stmt_idx = None
|
|
158
|
+
xored_reg = None
|
|
159
|
+
|
|
153
160
|
for idx, stmt in enumerate(first_block.statements):
|
|
161
|
+
# if we are lucky and things get folded into one statement:
|
|
154
162
|
if (
|
|
155
163
|
isinstance(stmt, ailment.Stmt.Store)
|
|
156
164
|
and isinstance(stmt.addr, ailment.Expr.StackBaseOffset)
|
|
@@ -163,13 +171,51 @@ class WinStackCanarySimplifier(OptimizationPass):
|
|
|
163
171
|
# Check addr: must be __security_cookie
|
|
164
172
|
load_addr = stmt.data.operands[0].addr.value
|
|
165
173
|
if load_addr == self._security_cookie_addr:
|
|
166
|
-
return first_block, idx
|
|
174
|
+
return first_block, [idx]
|
|
175
|
+
# or if we are unlucky and the load and the xor are two different statements
|
|
176
|
+
if (
|
|
177
|
+
isinstance(stmt, ailment.Stmt.Assignment)
|
|
178
|
+
and isinstance(stmt.dst, ailment.Expr.Register)
|
|
179
|
+
and isinstance(stmt.src, ailment.Expr.Load)
|
|
180
|
+
and isinstance(stmt.src.addr, ailment.Expr.Const)
|
|
181
|
+
):
|
|
182
|
+
load_addr = stmt.src.addr.value
|
|
183
|
+
if load_addr == self._security_cookie_addr:
|
|
184
|
+
load_stmt_idx = idx
|
|
185
|
+
load_reg = stmt.dst.reg_offset
|
|
186
|
+
if load_stmt_idx is not None and idx == load_stmt_idx + 1:
|
|
187
|
+
if (
|
|
188
|
+
isinstance(stmt, ailment.Stmt.Assignment)
|
|
189
|
+
and isinstance(stmt.dst, ailment.Expr.Register)
|
|
190
|
+
and isinstance(stmt.src, ailment.Expr.BinaryOp)
|
|
191
|
+
and stmt.src.op == "Xor"
|
|
192
|
+
and isinstance(stmt.src.operands[0], ailment.Expr.Register)
|
|
193
|
+
and stmt.src.operands[0].reg_offset == load_reg
|
|
194
|
+
and isinstance(stmt.src.operands[1], ailment.Expr.StackBaseOffset)
|
|
195
|
+
):
|
|
196
|
+
xor_stmt_idx = idx
|
|
197
|
+
xored_reg = stmt.dst.reg_offset
|
|
198
|
+
else:
|
|
199
|
+
break
|
|
200
|
+
if xor_stmt_idx is not None and idx == xor_stmt_idx + 1:
|
|
201
|
+
if (
|
|
202
|
+
isinstance(stmt, ailment.Stmt.Store)
|
|
203
|
+
and isinstance(stmt.addr, ailment.Expr.StackBaseOffset)
|
|
204
|
+
and isinstance(stmt.data, ailment.Expr.Register)
|
|
205
|
+
and stmt.data.reg_offset == xored_reg
|
|
206
|
+
):
|
|
207
|
+
return first_block, [load_stmt_idx, xor_stmt_idx, idx]
|
|
208
|
+
else:
|
|
209
|
+
break
|
|
167
210
|
|
|
168
211
|
return None
|
|
169
212
|
|
|
170
213
|
@staticmethod
|
|
171
214
|
def _find_amd64_canary_storing_stmt(block, canary_value_stack_offset):
|
|
215
|
+
load_stmt_idx = None
|
|
216
|
+
|
|
172
217
|
for idx, stmt in enumerate(block.statements):
|
|
218
|
+
# when we are lucky, we have one instruction
|
|
173
219
|
if (
|
|
174
220
|
isinstance(stmt, ailment.Stmt.Assignment)
|
|
175
221
|
and isinstance(stmt.dst, ailment.Expr.Register)
|
|
@@ -185,7 +231,29 @@ class WinStackCanarySimplifier(OptimizationPass):
|
|
|
185
231
|
if isinstance(op1, ailment.Expr.StackBaseOffset):
|
|
186
232
|
# found it
|
|
187
233
|
return idx
|
|
188
|
-
|
|
234
|
+
# or when we are unlucky, we have two instructions...
|
|
235
|
+
if (
|
|
236
|
+
isinstance(stmt, ailment.Stmt.Assignment)
|
|
237
|
+
and isinstance(stmt.dst, ailment.Expr.Register)
|
|
238
|
+
and stmt.dst.reg_name == "rcx"
|
|
239
|
+
and isinstance(stmt.src, ailment.Expr.Load)
|
|
240
|
+
and isinstance(stmt.src.addr, ailment.Expr.StackBaseOffset)
|
|
241
|
+
and stmt.src.addr.offset == canary_value_stack_offset
|
|
242
|
+
):
|
|
243
|
+
load_stmt_idx = idx
|
|
244
|
+
if load_stmt_idx is not None and idx == load_stmt_idx + 1:
|
|
245
|
+
if (
|
|
246
|
+
isinstance(stmt, ailment.Stmt.Assignment)
|
|
247
|
+
and isinstance(stmt.dst, ailment.Expr.Register)
|
|
248
|
+
and isinstance(stmt.src, ailment.Expr.BinaryOp)
|
|
249
|
+
and stmt.src.op == "Xor"
|
|
250
|
+
):
|
|
251
|
+
if (
|
|
252
|
+
isinstance(stmt.src.operands[0], ailment.Expr.Register)
|
|
253
|
+
and stmt.src.operands[0].reg_name == "rcx"
|
|
254
|
+
and isinstance(stmt.src.operands[1], ailment.Expr.StackBaseOffset)
|
|
255
|
+
):
|
|
256
|
+
return idx
|
|
189
257
|
return None
|
|
190
258
|
|
|
191
259
|
@staticmethod
|
|
@@ -200,6 +268,29 @@ class WinStackCanarySimplifier(OptimizationPass):
|
|
|
200
268
|
return idx
|
|
201
269
|
return None
|
|
202
270
|
|
|
271
|
+
def _is_function_likely_security_check_cookie(self, func) -> bool:
|
|
272
|
+
# disassemble the first instruction
|
|
273
|
+
if func.is_plt or func.is_syscall or func.is_simprocedure:
|
|
274
|
+
return False
|
|
275
|
+
block = self.project.factory.block(func.addr)
|
|
276
|
+
if block.instructions != 2:
|
|
277
|
+
return False
|
|
278
|
+
ins0 = block.capstone.insns[0]
|
|
279
|
+
if (
|
|
280
|
+
ins0.mnemonic == "cmp"
|
|
281
|
+
and len(ins0.operands) == 2
|
|
282
|
+
and ins0.operands[0].type == capstone.x86.X86_OP_REG
|
|
283
|
+
and ins0.operands[0].reg == capstone.x86.X86_REG_RCX
|
|
284
|
+
and ins0.operands[1].type == capstone.x86.X86_OP_MEM
|
|
285
|
+
and ins0.operands[1].mem.base == capstone.x86.X86_REG_RIP
|
|
286
|
+
and ins0.operands[1].mem.index == 0
|
|
287
|
+
and ins0.operands[1].mem.disp + ins0.address + ins0.size == self._security_cookie_addr
|
|
288
|
+
):
|
|
289
|
+
ins1 = block.capstone.insns[1]
|
|
290
|
+
if ins1.mnemonic == "jne":
|
|
291
|
+
return True
|
|
292
|
+
return False
|
|
293
|
+
|
|
203
294
|
def _find_stmt_calling_security_check_cookie(self, node):
|
|
204
295
|
for idx, stmt in enumerate(node.statements):
|
|
205
296
|
if isinstance(stmt, ailment.Stmt.Call) and isinstance(stmt.target, ailment.Expr.Const):
|
|
@@ -208,5 +299,7 @@ class WinStackCanarySimplifier(OptimizationPass):
|
|
|
208
299
|
func = self.kb.functions.function(addr=const_target)
|
|
209
300
|
if func.name == "_security_check_cookie":
|
|
210
301
|
return idx
|
|
302
|
+
elif self._is_function_likely_security_check_cookie(func):
|
|
303
|
+
return idx
|
|
211
304
|
|
|
212
305
|
return None
|
|
@@ -40,10 +40,12 @@ from .sar_to_signed_div import SarToSignedDiv
|
|
|
40
40
|
from .tidy_stack_addr import TidyStackAddr
|
|
41
41
|
from .invert_negated_logical_conjuction_disjunction import InvertNegatedLogicalConjunctionsAndDisjunctions
|
|
42
42
|
from .rol_ror import RolRorRewriter
|
|
43
|
+
from .inlined_strcpy import InlinedStrcpy
|
|
44
|
+
from .inlined_strcpy_consolidation import InlinedStrcpyConsolidation
|
|
43
45
|
|
|
44
|
-
from .base import PeepholeOptimizationExprBase, PeepholeOptimizationStmtBase
|
|
45
|
-
|
|
46
|
+
from .base import PeepholeOptimizationExprBase, PeepholeOptimizationStmtBase, PeepholeOptimizationMultiStmtBase
|
|
46
47
|
|
|
48
|
+
MULTI_STMT_OPTS: List[Type[PeepholeOptimizationMultiStmtBase]] = []
|
|
47
49
|
STMT_OPTS: List[Type[PeepholeOptimizationStmtBase]] = []
|
|
48
50
|
EXPR_OPTS: List[Type[PeepholeOptimizationExprBase]] = []
|
|
49
51
|
|
|
@@ -55,4 +57,11 @@ for v in _g.values():
|
|
|
55
57
|
if isinstance(v, type) and issubclass(v, PeepholeOptimizationStmtBase) and v is not PeepholeOptimizationStmtBase:
|
|
56
58
|
STMT_OPTS.append(v)
|
|
57
59
|
|
|
60
|
+
if (
|
|
61
|
+
isinstance(v, type)
|
|
62
|
+
and issubclass(v, PeepholeOptimizationMultiStmtBase)
|
|
63
|
+
and v is not PeepholeOptimizationMultiStmtBase
|
|
64
|
+
):
|
|
65
|
+
MULTI_STMT_OPTS.append(v)
|
|
66
|
+
|
|
58
67
|
_g = None
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
from typing import Optional
|
|
1
|
+
from typing import List, Optional
|
|
2
2
|
|
|
3
3
|
from ailment.expression import BinaryOp, UnaryOp, Expression
|
|
4
|
-
from ailment.statement import Assignment
|
|
4
|
+
from ailment.statement import Statement, Assignment
|
|
5
5
|
from ailment import Block
|
|
6
6
|
from angr.project import Project
|
|
7
7
|
from angr.knowledge_base import KnowledgeBase
|
|
@@ -34,6 +34,33 @@ class PeepholeOptimizationStmtBase:
|
|
|
34
34
|
raise NotImplementedError("_optimize() is not implemented.")
|
|
35
35
|
|
|
36
36
|
|
|
37
|
+
class PeepholeOptimizationMultiStmtBase:
|
|
38
|
+
"""
|
|
39
|
+
The base class for all peephole optimizations that are applied on multiple AIL statements at once.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
__slots__ = (
|
|
43
|
+
"project",
|
|
44
|
+
"kb",
|
|
45
|
+
"func_addr",
|
|
46
|
+
)
|
|
47
|
+
project: Optional[Project]
|
|
48
|
+
kb: Optional[KnowledgeBase]
|
|
49
|
+
func_addr: Optional[int]
|
|
50
|
+
|
|
51
|
+
NAME = "Peephole Optimization - Multi-statement"
|
|
52
|
+
DESCRIPTION = "Peephole Optimization - Multi-statement"
|
|
53
|
+
stmt_classes = None
|
|
54
|
+
|
|
55
|
+
def __init__(self, project: Optional[Project], kb: Optional[KnowledgeBase], func_addr: Optional[int] = None):
|
|
56
|
+
self.project = project
|
|
57
|
+
self.kb = kb
|
|
58
|
+
self.func_addr = func_addr
|
|
59
|
+
|
|
60
|
+
def optimize(self, stmts: List[Statement], stmt_idx: Optional[int] = None, block=None, **kwargs):
|
|
61
|
+
raise NotImplementedError("_optimize() is not implemented.")
|
|
62
|
+
|
|
63
|
+
|
|
37
64
|
class PeepholeOptimizationExprBase:
|
|
38
65
|
"""
|
|
39
66
|
The base class for all peephole optimizations that are applied on AIL expressions.
|
|
@@ -20,7 +20,7 @@ class ConstantDereferences(PeepholeOptimizationExprBase):
|
|
|
20
20
|
if sec is not None and sec.is_readable and (not sec.is_writable or "got" in sec.name):
|
|
21
21
|
# do we know the value that it's reading?
|
|
22
22
|
try:
|
|
23
|
-
val = self.project.loader.memory.unpack_word(expr.addr.value, size=
|
|
23
|
+
val = self.project.loader.memory.unpack_word(expr.addr.value, size=expr.size)
|
|
24
24
|
except KeyError:
|
|
25
25
|
return expr
|
|
26
26
|
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# pylint:disable=arguments-differ
|
|
2
|
+
from typing import Tuple, Optional
|
|
3
|
+
import string
|
|
4
|
+
|
|
5
|
+
from archinfo import Endness
|
|
6
|
+
|
|
7
|
+
from ailment.expression import Const
|
|
8
|
+
from ailment.statement import Call, Store
|
|
9
|
+
|
|
10
|
+
from .base import PeepholeOptimizationStmtBase
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
ASCII_PRINTABLES = set(string.printable)
|
|
14
|
+
ASCII_DIGITS = set(string.digits)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class InlinedStrcpy(PeepholeOptimizationStmtBase):
|
|
18
|
+
"""
|
|
19
|
+
Simplifies inlined string copying logic into calls to strcpy.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
__slots__ = ()
|
|
23
|
+
|
|
24
|
+
NAME = "Simplifying inlined strcpy"
|
|
25
|
+
stmt_classes = (Store,)
|
|
26
|
+
|
|
27
|
+
def optimize(self, stmt: Store, **kwargs):
|
|
28
|
+
if isinstance(stmt.data, Const):
|
|
29
|
+
r, s = self.is_integer_likely_a_string(stmt.data.value, stmt.data.size, stmt.endness)
|
|
30
|
+
if r:
|
|
31
|
+
# replace it with a call to strncpy
|
|
32
|
+
str_id = self.kb.custom_strings.allocate(s.encode("ascii"))
|
|
33
|
+
return Call(
|
|
34
|
+
stmt.idx,
|
|
35
|
+
"strncpy",
|
|
36
|
+
args=[
|
|
37
|
+
stmt.addr,
|
|
38
|
+
Const(None, None, str_id, stmt.addr.bits, custom_string=True),
|
|
39
|
+
Const(None, None, len(s), self.project.arch.bits),
|
|
40
|
+
],
|
|
41
|
+
**stmt.tags,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
@staticmethod
|
|
47
|
+
def is_integer_likely_a_string(
|
|
48
|
+
v: int, size: int, endness: Endness, min_length: int = 4
|
|
49
|
+
) -> Tuple[bool, Optional[str]]:
|
|
50
|
+
# we need at least four bytes of printable characters
|
|
51
|
+
|
|
52
|
+
chars = []
|
|
53
|
+
if endness == Endness.LE:
|
|
54
|
+
while v != 0:
|
|
55
|
+
byt = v & 0xFF
|
|
56
|
+
if chr(byt) not in ASCII_PRINTABLES:
|
|
57
|
+
return False, None
|
|
58
|
+
chars.append(chr(byt))
|
|
59
|
+
v >>= 8
|
|
60
|
+
|
|
61
|
+
elif endness == Endness.BE:
|
|
62
|
+
first_non_zero = False
|
|
63
|
+
for _ in range(size):
|
|
64
|
+
byt = v & 0xFF
|
|
65
|
+
v >>= 8
|
|
66
|
+
if byt == 0:
|
|
67
|
+
if first_non_zero:
|
|
68
|
+
return False, None
|
|
69
|
+
continue
|
|
70
|
+
first_non_zero = True # this is the first non-zero byte
|
|
71
|
+
if chr(byt) not in ASCII_PRINTABLES:
|
|
72
|
+
return False, None
|
|
73
|
+
chars.append(chr(byt))
|
|
74
|
+
chars = chars[::-1]
|
|
75
|
+
else:
|
|
76
|
+
# unsupported endness
|
|
77
|
+
return False, None
|
|
78
|
+
|
|
79
|
+
if len(chars) >= min_length:
|
|
80
|
+
if len(chars) <= 4 and all(ch in ASCII_DIGITS for ch in chars):
|
|
81
|
+
return False, None
|
|
82
|
+
return True, "".join(chars)
|
|
83
|
+
return False, None
|