angr 9.2.149__py3-none-win_amd64.whl → 9.2.152__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/__main__.py +100 -37
- angr/analyses/calling_convention/calling_convention.py +17 -9
- angr/analyses/decompiler/ccall_rewriters/amd64_ccalls.py +39 -0
- angr/analyses/decompiler/clinic.py +73 -1
- angr/analyses/decompiler/dephication/rewriting_engine.py +38 -1
- angr/analyses/decompiler/optimization_passes/condition_constprop.py +6 -0
- angr/analyses/decompiler/optimization_passes/engine_base.py +5 -0
- angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py +2 -1
- angr/analyses/decompiler/peephole_optimizations/__init__.py +2 -0
- angr/analyses/decompiler/peephole_optimizations/cas_intrinsics.py +115 -0
- angr/analyses/decompiler/ssailification/rewriting_engine.py +37 -1
- angr/analyses/decompiler/ssailification/traversal_engine.py +10 -1
- angr/analyses/decompiler/utils.py +17 -0
- angr/analyses/disassembly.py +2 -1
- angr/analyses/patchfinder.py +1 -1
- angr/analyses/reaching_definitions/engine_ail.py +20 -0
- angr/analyses/s_propagator.py +28 -0
- angr/analyses/stack_pointer_tracker.py +2 -1
- angr/analyses/typehoon/typehoon.py +4 -1
- angr/analyses/variable_recovery/engine_ail.py +9 -0
- angr/engines/light/engine.py +7 -0
- angr/engines/pcode/lifter.py +7 -0
- angr/lib/angr_native.dll +0 -0
- angr/storage/memory_mixins/clouseau_mixin.py +7 -1
- angr/utils/graph.py +61 -39
- angr/utils/ssa/__init__.py +6 -1
- {angr-9.2.149.dist-info → angr-9.2.152.dist-info}/METADATA +6 -6
- {angr-9.2.149.dist-info → angr-9.2.152.dist-info}/RECORD +33 -32
- {angr-9.2.149.dist-info → angr-9.2.152.dist-info}/WHEEL +1 -1
- {angr-9.2.149.dist-info → angr-9.2.152.dist-info}/entry_points.txt +0 -0
- {angr-9.2.149.dist-info → angr-9.2.152.dist-info}/licenses/LICENSE +0 -0
- {angr-9.2.149.dist-info → angr-9.2.152.dist-info}/top_level.txt +0 -0
angr/__init__.py
CHANGED
angr/__main__.py
CHANGED
|
@@ -1,38 +1,93 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import argparse
|
|
4
|
-
import
|
|
4
|
+
import logging
|
|
5
|
+
import re
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
from collections.abc import Generator
|
|
5
8
|
|
|
9
|
+
import angr
|
|
6
10
|
from angr.analyses.decompiler import DECOMPILATION_PRESETS
|
|
7
11
|
from angr.analyses.decompiler.structuring import STRUCTURER_CLASSES, DEFAULT_STRUCTURER
|
|
8
12
|
from angr.analyses.decompiler.utils import decompile_functions
|
|
9
13
|
|
|
10
14
|
|
|
11
|
-
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from angr.knowledge_plugins.functions import Function
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
log = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
NUMERIC_ARG_RE = re.compile(r"^(0x)?[a-fA-F0-9]+$")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def parse_function_args(proj: angr.Project, func_args: list[str] | None) -> Generator[Function]:
|
|
12
26
|
"""
|
|
13
|
-
|
|
27
|
+
Generate a sequence of functions in the project kb by their identifier in func_args.
|
|
28
|
+
|
|
29
|
+
:param proj: Project to query.
|
|
30
|
+
:param func_args: Sequence of function identifiers to query. None for all functions.
|
|
14
31
|
"""
|
|
32
|
+
if func_args is None:
|
|
33
|
+
yield from sorted(proj.kb.functions.values(), key=lambda f: f.addr)
|
|
34
|
+
return
|
|
15
35
|
|
|
16
|
-
|
|
17
|
-
|
|
36
|
+
for func_arg in func_args:
|
|
37
|
+
if func_arg in proj.kb.functions:
|
|
38
|
+
yield proj.kb.functions[func_arg]
|
|
39
|
+
continue
|
|
18
40
|
|
|
41
|
+
if NUMERIC_ARG_RE.match(func_arg):
|
|
42
|
+
func_addr = int(func_arg, 0)
|
|
43
|
+
if func_addr in proj.kb.functions:
|
|
44
|
+
yield proj.kb.functions[func_addr]
|
|
45
|
+
continue
|
|
19
46
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
47
|
+
log.error('Function "%s" not found', func_arg)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def disassemble(args):
|
|
51
|
+
"""
|
|
52
|
+
Disassemble functions.
|
|
53
|
+
"""
|
|
54
|
+
loader_main_opts_kwargs = {}
|
|
55
|
+
if args.base_addr is not None:
|
|
56
|
+
loader_main_opts_kwargs["base_addr"] = args.base_addr
|
|
57
|
+
|
|
58
|
+
proj = angr.Project(args.binary, auto_load_libs=False, main_opts=loader_main_opts_kwargs)
|
|
59
|
+
proj.analyses.CFG(normalize=True, data_references=True)
|
|
60
|
+
|
|
61
|
+
for func in parse_function_args(proj, args.functions):
|
|
62
|
+
try:
|
|
63
|
+
if func.is_plt or func.is_syscall or func.is_alignment or func.is_simprocedure:
|
|
64
|
+
continue
|
|
65
|
+
func.pp(show_bytes=True, min_edge_depth=10)
|
|
66
|
+
except Exception as e: # pylint:disable=broad-exception-caught
|
|
67
|
+
if not args.catch_exceptions:
|
|
68
|
+
raise
|
|
69
|
+
log.exception(e)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def decompile(args):
|
|
73
|
+
"""
|
|
74
|
+
Decompile functions.
|
|
75
|
+
"""
|
|
76
|
+
decompilation = decompile_functions(
|
|
77
|
+
args.binary,
|
|
78
|
+
functions=args.functions,
|
|
79
|
+
structurer=args.structurer,
|
|
80
|
+
catch_errors=args.catch_exceptions,
|
|
81
|
+
show_casts=not args.no_casts,
|
|
82
|
+
base_address=args.base_addr,
|
|
83
|
+
preset=args.preset,
|
|
27
84
|
)
|
|
85
|
+
print(decompilation)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def main():
|
|
89
|
+
parser = argparse.ArgumentParser(description="The angr CLI allows you to decompile and analyze binaries.")
|
|
28
90
|
parser.add_argument("binary", help="The path to the binary to analyze.")
|
|
29
|
-
parser.add_argument(
|
|
30
|
-
"--functions",
|
|
31
|
-
help="""
|
|
32
|
-
The functions to analyze under the current command. Functions can either be expressed as names found in the
|
|
33
|
-
symbols of the binary or as addresses like: 0x401000.""",
|
|
34
|
-
nargs="+",
|
|
35
|
-
)
|
|
36
91
|
parser.add_argument(
|
|
37
92
|
"--catch-exceptions",
|
|
38
93
|
help="""
|
|
@@ -49,40 +104,48 @@ def main(args=sys.argv[1:], out=sys.stdout):
|
|
|
49
104
|
type=lambda x: int(x, 0),
|
|
50
105
|
default=None,
|
|
51
106
|
)
|
|
52
|
-
|
|
53
|
-
|
|
107
|
+
subparsers = parser.add_subparsers(metavar="command", required=True)
|
|
108
|
+
|
|
109
|
+
decompile_cmd_parser = subparsers.add_parser("decompile", aliases=["dec"], help=decompile.__doc__)
|
|
110
|
+
decompile_cmd_parser.set_defaults(func=decompile)
|
|
111
|
+
decompile_cmd_parser.add_argument(
|
|
54
112
|
"--structurer",
|
|
55
113
|
help="The structuring algorithm to use for decompilation.",
|
|
56
114
|
choices=STRUCTURER_CLASSES.keys(),
|
|
57
115
|
default=DEFAULT_STRUCTURER.NAME,
|
|
58
116
|
)
|
|
59
|
-
|
|
117
|
+
decompile_cmd_parser.add_argument(
|
|
60
118
|
"--no-casts",
|
|
61
119
|
help="Do not show type casts in the decompiled output.",
|
|
62
120
|
action="store_true",
|
|
63
121
|
default=False,
|
|
64
122
|
)
|
|
65
|
-
|
|
123
|
+
decompile_cmd_parser.add_argument(
|
|
66
124
|
"--preset",
|
|
67
125
|
help="The configuration preset to use for decompilation.",
|
|
68
126
|
choices=DECOMPILATION_PRESETS,
|
|
69
127
|
default="default",
|
|
70
128
|
)
|
|
129
|
+
decompile_cmd_parser.add_argument(
|
|
130
|
+
"--functions",
|
|
131
|
+
help="""
|
|
132
|
+
The functions to decompile. Functions can either be expressed as names found in the
|
|
133
|
+
symbols of the binary or as addresses like: 0x401000.""",
|
|
134
|
+
nargs="+",
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
disassemble_cmd_parser = subparsers.add_parser("disassemble", aliases=["dis"], help=disassemble.__doc__)
|
|
138
|
+
disassemble_cmd_parser.set_defaults(func=disassemble)
|
|
139
|
+
disassemble_cmd_parser.add_argument(
|
|
140
|
+
"--functions",
|
|
141
|
+
help="""
|
|
142
|
+
The functions to disassemble. Functions can either be expressed as names found in the
|
|
143
|
+
symbols of the binary or as addresses like: 0x401000.""",
|
|
144
|
+
nargs="+",
|
|
145
|
+
)
|
|
71
146
|
|
|
72
|
-
args = parser.parse_args(
|
|
73
|
-
|
|
74
|
-
decompilation = decompile_functions(
|
|
75
|
-
args.binary,
|
|
76
|
-
functions=args.functions,
|
|
77
|
-
structurer=args.structurer,
|
|
78
|
-
catch_errors=args.catch_exceptions,
|
|
79
|
-
show_casts=not args.no_casts,
|
|
80
|
-
base_address=args.base_addr,
|
|
81
|
-
preset=args.preset,
|
|
82
|
-
)
|
|
83
|
-
print(decompilation, file=out)
|
|
84
|
-
else:
|
|
85
|
-
parser.print_help(file=out)
|
|
147
|
+
args = parser.parse_args()
|
|
148
|
+
args.func(args)
|
|
86
149
|
|
|
87
150
|
|
|
88
151
|
if __name__ == "__main__":
|
|
@@ -21,6 +21,7 @@ from angr.calling_conventions import (
|
|
|
21
21
|
default_cc,
|
|
22
22
|
SimCCMicrosoftThiscall,
|
|
23
23
|
)
|
|
24
|
+
from angr.errors import SimTranslationError
|
|
24
25
|
from angr.sim_type import (
|
|
25
26
|
SimTypeCppFunction,
|
|
26
27
|
SimTypeInt,
|
|
@@ -585,16 +586,23 @@ class CallingConventionAnalysis(Analysis):
|
|
|
585
586
|
# include its successor.
|
|
586
587
|
|
|
587
588
|
# Re-lift the target block
|
|
588
|
-
|
|
589
|
+
dst_block_size = func.get_block_size(dst.addr)
|
|
590
|
+
if dst_block_size is not None and dst_block_size > 0:
|
|
591
|
+
dst_bb = self.project.factory.block(dst.addr, dst_block_size, opt_level=1)
|
|
592
|
+
try:
|
|
593
|
+
vex_block = dst_bb.vex
|
|
594
|
+
except SimTranslationError:
|
|
595
|
+
# failed to lift the block
|
|
596
|
+
continue
|
|
589
597
|
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
+
# If there is only one 'IMark' statement in vex --> the target block contains only direct jump
|
|
599
|
+
if (
|
|
600
|
+
len(vex_block.statements) == 1
|
|
601
|
+
and vex_block.statements[0].tag == "Ist_IMark"
|
|
602
|
+
and func.graph.out_degree(dst) == 1
|
|
603
|
+
):
|
|
604
|
+
for _, jmp_dst, jmp_data in func_graph.out_edges(dst, data=True):
|
|
605
|
+
subgraph.add_edge(dst, jmp_dst, **jmp_data)
|
|
598
606
|
|
|
599
607
|
return subgraph
|
|
600
608
|
|
|
@@ -412,6 +412,45 @@ class AMD64CCallRewriter(CCallRewriterBase):
|
|
|
412
412
|
)
|
|
413
413
|
return Expr.Convert(None, r.bits, ccall.bits, False, r, **ccall.tags)
|
|
414
414
|
|
|
415
|
+
elif (
|
|
416
|
+
cond_v == AMD64_CondTypes["CondNS"]
|
|
417
|
+
and op_v
|
|
418
|
+
in {
|
|
419
|
+
AMD64_OpTypes["G_CC_OP_LOGICB"],
|
|
420
|
+
AMD64_OpTypes["G_CC_OP_LOGICW"],
|
|
421
|
+
AMD64_OpTypes["G_CC_OP_LOGICL"],
|
|
422
|
+
AMD64_OpTypes["G_CC_OP_LOGICQ"],
|
|
423
|
+
}
|
|
424
|
+
and isinstance(dep_2, Expr.Const)
|
|
425
|
+
and dep_2.value == 0
|
|
426
|
+
):
|
|
427
|
+
# dep_1 >= 0
|
|
428
|
+
dep_1 = self._fix_size(
|
|
429
|
+
dep_1,
|
|
430
|
+
op_v,
|
|
431
|
+
AMD64_OpTypes["G_CC_OP_LOGICB"],
|
|
432
|
+
AMD64_OpTypes["G_CC_OP_LOGICW"],
|
|
433
|
+
AMD64_OpTypes["G_CC_OP_LOGICL"],
|
|
434
|
+
ccall.tags,
|
|
435
|
+
)
|
|
436
|
+
dep_2 = self._fix_size(
|
|
437
|
+
dep_2,
|
|
438
|
+
op_v,
|
|
439
|
+
AMD64_OpTypes["G_CC_OP_LOGICB"],
|
|
440
|
+
AMD64_OpTypes["G_CC_OP_LOGICW"],
|
|
441
|
+
AMD64_OpTypes["G_CC_OP_LOGICL"],
|
|
442
|
+
ccall.tags,
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
r = Expr.BinaryOp(
|
|
446
|
+
ccall.idx,
|
|
447
|
+
"CmpGE",
|
|
448
|
+
(dep_1, dep_2),
|
|
449
|
+
True,
|
|
450
|
+
**ccall.tags,
|
|
451
|
+
)
|
|
452
|
+
return Expr.Convert(None, r.bits, ccall.bits, False, r, **ccall.tags)
|
|
453
|
+
|
|
415
454
|
elif ccall.callee == "amd64g_calculate_rflags_c":
|
|
416
455
|
# calculate the carry flag
|
|
417
456
|
op = ccall.operands[0]
|
|
@@ -483,6 +483,9 @@ class Clinic(Analysis):
|
|
|
483
483
|
arg_vvars = self._create_function_argument_vvars(arg_list)
|
|
484
484
|
func_args = {arg_vvar for arg_vvar, _ in arg_vvars.values()}
|
|
485
485
|
|
|
486
|
+
# duplicate orphaned conditional jump blocks
|
|
487
|
+
ail_graph = self._duplicate_orphaned_cond_jumps(ail_graph)
|
|
488
|
+
|
|
486
489
|
# Transform the graph into partial SSA form
|
|
487
490
|
self._update_progress(35.0, text="Transforming to partial-SSA form")
|
|
488
491
|
ail_graph = self._transform_to_ssa_level0(ail_graph, func_args)
|
|
@@ -1892,6 +1895,19 @@ class Clinic(Analysis):
|
|
|
1892
1895
|
self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, stmt.dst)
|
|
1893
1896
|
self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, stmt.src)
|
|
1894
1897
|
|
|
1898
|
+
elif stmt_type is ailment.Stmt.CAS:
|
|
1899
|
+
for expr in [
|
|
1900
|
+
stmt.addr,
|
|
1901
|
+
stmt.data_lo,
|
|
1902
|
+
stmt.data_hi,
|
|
1903
|
+
stmt.expd_lo,
|
|
1904
|
+
stmt.expd_hi,
|
|
1905
|
+
stmt.old_lo,
|
|
1906
|
+
stmt.old_hi,
|
|
1907
|
+
]:
|
|
1908
|
+
if expr is not None:
|
|
1909
|
+
self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, expr)
|
|
1910
|
+
|
|
1895
1911
|
elif stmt_type is ailment.Stmt.ConditionalJump:
|
|
1896
1912
|
self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, stmt.condition)
|
|
1897
1913
|
|
|
@@ -2123,6 +2139,45 @@ class Clinic(Analysis):
|
|
|
2123
2139
|
|
|
2124
2140
|
return graph
|
|
2125
2141
|
|
|
2142
|
+
@staticmethod
|
|
2143
|
+
def _duplicate_orphaned_cond_jumps(ail_graph) -> networkx.DiGraph:
|
|
2144
|
+
"""
|
|
2145
|
+
Find conditional jumps that are orphaned (e.g., being the only instruction of the block). If these blocks have
|
|
2146
|
+
multiple predecessors, duplicate them to all predecessors. This is a workaround for cases where these
|
|
2147
|
+
conditional jumps rely on comparisons in more than one predecessor and we cannot resolve ccalls into
|
|
2148
|
+
comparisons.
|
|
2149
|
+
|
|
2150
|
+
This pass runs before any SSA transformations.
|
|
2151
|
+
|
|
2152
|
+
# 140017162 jz short 1400171e1
|
|
2153
|
+
"""
|
|
2154
|
+
|
|
2155
|
+
for block in list(ail_graph):
|
|
2156
|
+
if len(block.statements) > 1 and block.statements[0].ins_addr == block.statements[-1].ins_addr:
|
|
2157
|
+
preds = list(ail_graph.predecessors(block))
|
|
2158
|
+
if len(preds) > 1 and block not in preds:
|
|
2159
|
+
has_ccall = any(
|
|
2160
|
+
isinstance(stmt, ailment.Stmt.Assignment)
|
|
2161
|
+
and isinstance(stmt.src, ailment.Expr.VEXCCallExpression)
|
|
2162
|
+
for stmt in block.statements
|
|
2163
|
+
)
|
|
2164
|
+
if has_ccall:
|
|
2165
|
+
# duplicate this block to its predecessors!
|
|
2166
|
+
preds = sorted(preds, key=lambda x: x.addr)
|
|
2167
|
+
succs = sorted(ail_graph.successors(block), key=lambda x: x.addr)
|
|
2168
|
+
# FIXME: We should track block IDs globally and ensure block IDs do not collide
|
|
2169
|
+
block_idx_start = block.idx + 1 if block.idx is not None else 1
|
|
2170
|
+
for pred in preds[1:]:
|
|
2171
|
+
ail_graph.remove_edge(pred, block)
|
|
2172
|
+
new_block = block.copy()
|
|
2173
|
+
new_block.idx = block_idx_start
|
|
2174
|
+
block_idx_start += 1
|
|
2175
|
+
ail_graph.add_edge(pred, new_block)
|
|
2176
|
+
for succ in succs:
|
|
2177
|
+
ail_graph.add_edge(new_block, succ if succ is not block else new_block)
|
|
2178
|
+
|
|
2179
|
+
return ail_graph
|
|
2180
|
+
|
|
2126
2181
|
def _rewrite_ite_expressions(self, ail_graph):
|
|
2127
2182
|
cfg = self._cfg
|
|
2128
2183
|
for block in list(ail_graph):
|
|
@@ -2130,11 +2185,16 @@ class Clinic(Analysis):
|
|
|
2130
2185
|
continue
|
|
2131
2186
|
|
|
2132
2187
|
ite_ins_addrs = []
|
|
2188
|
+
cas_ins_addrs = set()
|
|
2133
2189
|
for stmt in block.statements:
|
|
2134
|
-
if (
|
|
2190
|
+
if isinstance(stmt, ailment.Stmt.CAS):
|
|
2191
|
+
# we do not rewrite ITE statements that are caused by CAS statements
|
|
2192
|
+
cas_ins_addrs.add(stmt.ins_addr)
|
|
2193
|
+
elif (
|
|
2135
2194
|
isinstance(stmt, ailment.Stmt.Assignment)
|
|
2136
2195
|
and isinstance(stmt.src, ailment.Expr.ITE)
|
|
2137
2196
|
and stmt.ins_addr not in ite_ins_addrs
|
|
2197
|
+
and stmt.ins_addr not in cas_ins_addrs
|
|
2138
2198
|
):
|
|
2139
2199
|
ite_ins_addrs.append(stmt.ins_addr)
|
|
2140
2200
|
|
|
@@ -2998,6 +3058,12 @@ class Clinic(Analysis):
|
|
|
2998
3058
|
and last_stmt.addr.offset < 0
|
|
2999
3059
|
and isinstance(last_stmt.data, ailment.Expr.Const)
|
|
3000
3060
|
and last_stmt.data.value == succ.addr
|
|
3061
|
+
) or (
|
|
3062
|
+
isinstance(last_stmt, ailment.Stmt.Assignment)
|
|
3063
|
+
and last_stmt.dst.was_stack
|
|
3064
|
+
and last_stmt.dst.stack_offset < 0
|
|
3065
|
+
and isinstance(last_stmt.src, ailment.Expr.Const)
|
|
3066
|
+
and last_stmt.src.value == succ.addr
|
|
3001
3067
|
):
|
|
3002
3068
|
# remove the statement that pushes the return address
|
|
3003
3069
|
node.statements = node.statements[:-1]
|
|
@@ -3031,6 +3097,12 @@ class Clinic(Analysis):
|
|
|
3031
3097
|
and last_stmt.addr.offset < 0
|
|
3032
3098
|
and isinstance(last_stmt.data, ailment.Expr.Const)
|
|
3033
3099
|
and last_stmt.data.value == succ.addr
|
|
3100
|
+
) or (
|
|
3101
|
+
isinstance(last_stmt, ailment.Stmt.Assignment)
|
|
3102
|
+
and last_stmt.dst.was_stack
|
|
3103
|
+
and last_stmt.dst.stack_offset < 0
|
|
3104
|
+
and isinstance(last_stmt.src, ailment.Expr.Const)
|
|
3105
|
+
and last_stmt.src.value == succ.addr
|
|
3034
3106
|
):
|
|
3035
3107
|
# remove the statement that pushes the return address
|
|
3036
3108
|
node.statements = node.statements[:-1]
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# pylint:disable=unused-argument,no-self-use
|
|
1
|
+
# pylint:disable=unused-argument,no-self-use,too-many-boolean-expressions
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
import logging
|
|
4
4
|
|
|
@@ -8,12 +8,14 @@ from ailment.statement import (
|
|
|
8
8
|
Assignment,
|
|
9
9
|
Store,
|
|
10
10
|
Call,
|
|
11
|
+
CAS,
|
|
11
12
|
Return,
|
|
12
13
|
ConditionalJump,
|
|
13
14
|
DirtyStatement,
|
|
14
15
|
WeakAssignment,
|
|
15
16
|
)
|
|
16
17
|
from ailment.expression import (
|
|
18
|
+
Atom,
|
|
17
19
|
Expression,
|
|
18
20
|
VirtualVariable,
|
|
19
21
|
Load,
|
|
@@ -121,6 +123,40 @@ class SimEngineDephiRewriting(SimEngineNostmtAIL[None, Expression | None, Statem
|
|
|
121
123
|
)
|
|
122
124
|
return None
|
|
123
125
|
|
|
126
|
+
def _handle_stmt_CAS(self, stmt: CAS) -> CAS | None:
|
|
127
|
+
new_addr = self._expr(stmt.addr)
|
|
128
|
+
new_data_lo = self._expr(stmt.data_lo)
|
|
129
|
+
new_data_hi = self._expr(stmt.data_hi) if stmt.data_hi is not None else None
|
|
130
|
+
new_expd_lo = self._expr(stmt.expd_lo)
|
|
131
|
+
new_expd_hi = self._expr(stmt.expd_hi) if stmt.expd_hi is not None else None
|
|
132
|
+
new_old_lo = self._expr(stmt.old_lo)
|
|
133
|
+
new_old_hi = self._expr(stmt.old_hi) if stmt.old_hi is not None else None
|
|
134
|
+
assert new_old_lo is None or isinstance(new_old_lo, Atom)
|
|
135
|
+
assert new_old_hi is None or isinstance(new_old_hi, Atom)
|
|
136
|
+
|
|
137
|
+
if (
|
|
138
|
+
new_addr is not None
|
|
139
|
+
or new_old_lo is not None
|
|
140
|
+
or new_old_hi is not None
|
|
141
|
+
or new_data_lo is not None
|
|
142
|
+
or new_data_hi is not None
|
|
143
|
+
or new_expd_lo is not None
|
|
144
|
+
or new_expd_hi is not None
|
|
145
|
+
):
|
|
146
|
+
return CAS(
|
|
147
|
+
stmt.idx,
|
|
148
|
+
stmt.addr if new_addr is None else new_addr,
|
|
149
|
+
stmt.data_lo if new_data_lo is None else new_data_lo,
|
|
150
|
+
stmt.data_hi if new_data_hi is None else new_data_hi,
|
|
151
|
+
stmt.expd_lo if new_expd_lo is None else new_expd_lo,
|
|
152
|
+
stmt.expd_hi if new_expd_hi is None else new_expd_hi,
|
|
153
|
+
stmt.old_lo if new_old_lo is None else new_old_lo,
|
|
154
|
+
stmt.old_hi if new_old_hi is None else new_old_hi,
|
|
155
|
+
stmt.endness,
|
|
156
|
+
**stmt.tags,
|
|
157
|
+
)
|
|
158
|
+
return None
|
|
159
|
+
|
|
124
160
|
def _handle_stmt_Store(self, stmt):
|
|
125
161
|
new_addr = self._expr(stmt.addr)
|
|
126
162
|
new_data = self._expr(stmt.data)
|
|
@@ -179,6 +215,7 @@ class SimEngineDephiRewriting(SimEngineNostmtAIL[None, Expression | None, Statem
|
|
|
179
215
|
dirty = self._expr(stmt.dirty)
|
|
180
216
|
if dirty is None or dirty is stmt.dirty:
|
|
181
217
|
return None
|
|
218
|
+
assert isinstance(dirty, DirtyExpression)
|
|
182
219
|
return DirtyStatement(stmt.idx, dirty, **stmt.tags)
|
|
183
220
|
|
|
184
221
|
def _handle_expr_Load(self, expr):
|
|
@@ -107,6 +107,12 @@ class ConditionConstantPropagation(OptimizationPass):
|
|
|
107
107
|
cconds_by_src[src] = []
|
|
108
108
|
cconds_by_src[src].append(ccond)
|
|
109
109
|
|
|
110
|
+
# eliminate sources with more than one in-edges; this is because the condition may not hold on all in-edges!
|
|
111
|
+
for src in list(cconds_by_src):
|
|
112
|
+
block = self._get_block(src[0], idx=src[1])
|
|
113
|
+
if block is not None and block in self._graph and self._graph.in_degree[block] > 1:
|
|
114
|
+
del cconds_by_src[src]
|
|
115
|
+
|
|
110
116
|
# eliminate conflicting conditions
|
|
111
117
|
for src in list(cconds_by_src):
|
|
112
118
|
cconds = cconds_by_src[src]
|
|
@@ -86,6 +86,11 @@ class SimplifierAILEngine(
|
|
|
86
86
|
|
|
87
87
|
return stmt
|
|
88
88
|
|
|
89
|
+
def _handle_stmt_CAS(self, stmt: ailment.statement.CAS) -> ailment.statement.CAS:
|
|
90
|
+
# we assume that we never have to deal with CAS statements at this point; they should have been rewritten to
|
|
91
|
+
# intrinsics
|
|
92
|
+
return stmt
|
|
93
|
+
|
|
89
94
|
def _handle_stmt_Store(self, stmt):
|
|
90
95
|
addr = self._expr(stmt.addr)
|
|
91
96
|
data = self._expr(stmt.data)
|
|
@@ -8,6 +8,7 @@ from ailment.expression import Op
|
|
|
8
8
|
from angr.analyses.decompiler.structuring.structurer_nodes import ConditionNode
|
|
9
9
|
from angr.analyses.decompiler.utils import (
|
|
10
10
|
structured_node_is_simple_return,
|
|
11
|
+
structured_node_is_simple_return_strict,
|
|
11
12
|
sequence_to_statements,
|
|
12
13
|
structured_node_has_multi_predecessors,
|
|
13
14
|
)
|
|
@@ -44,7 +45,7 @@ class FlipBooleanWalker(SequenceWalker):
|
|
|
44
45
|
and node.true_node is not None
|
|
45
46
|
and node.false_node is None
|
|
46
47
|
and idx < len(seq_node.nodes) - 1
|
|
47
|
-
and
|
|
48
|
+
and structured_node_is_simple_return_strict(seq_node.nodes[idx + 1])
|
|
48
49
|
and node not in type1_condition_nodes
|
|
49
50
|
):
|
|
50
51
|
# Type 2: Special Filter:
|
|
@@ -8,6 +8,7 @@ from .a_sub_a_div_const_mul_const import ASubADivConstMulConst
|
|
|
8
8
|
from .a_sub_a_shr_const_shr_const import ASubAShrConstShrConst
|
|
9
9
|
from .arm_cmpf import ARMCmpF
|
|
10
10
|
from .bswap import Bswap
|
|
11
|
+
from .cas_intrinsics import CASIntrinsics
|
|
11
12
|
from .coalesce_same_cascading_ifs import CoalesceSameCascadingIfs
|
|
12
13
|
from .constant_derefs import ConstantDereferences
|
|
13
14
|
from .const_mull_a_shift import ConstMullAShift
|
|
@@ -64,6 +65,7 @@ ALL_PEEPHOLE_OPTS: list[type[PeepholeOptimizationExprBase]] = [
|
|
|
64
65
|
ASubAShrConstShrConst,
|
|
65
66
|
ARMCmpF,
|
|
66
67
|
Bswap,
|
|
68
|
+
CASIntrinsics,
|
|
67
69
|
CoalesceSameCascadingIfs,
|
|
68
70
|
ConstantDereferences,
|
|
69
71
|
ConstMullAShift,
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# pylint:disable=arguments-differ,too-many-boolean-expressions
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from ailment.expression import BinaryOp, Load
|
|
5
|
+
from ailment.statement import CAS, ConditionalJump, Statement, Assignment, Call
|
|
6
|
+
|
|
7
|
+
from .base import PeepholeOptimizationMultiStmtBase
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
_INTRINSICS_NAMES = {
|
|
11
|
+
"xchg": {"Win32": "InterlockedExchange", "Linux": "atomic_exchange"},
|
|
12
|
+
"cmpxchg": {"Win32": "InterlockedCompareExchange", "Linux": "atomic_compare_exchange"},
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CASIntrinsics(PeepholeOptimizationMultiStmtBase):
|
|
17
|
+
"""
|
|
18
|
+
Rewrite lock-prefixed instructions (or rather, their VEX/AIL forms) into intrinsic calls.
|
|
19
|
+
|
|
20
|
+
Case 1.
|
|
21
|
+
|
|
22
|
+
mov eax, r12d
|
|
23
|
+
0x140014b57: xchg eax, [0x14000365f8]
|
|
24
|
+
|
|
25
|
+
LABEL_0x140014b57:
|
|
26
|
+
CAS(0x1400365f8<64>, Conv(64->32, vvar_365{reg 112}), Load(addr=0x1400365f8<64>, size=4, endness=Iend_LE),
|
|
27
|
+
vvar_27756)
|
|
28
|
+
if (CasCmpNE(vvar_27756, g_1400365f8))
|
|
29
|
+
goto LABEL_0x140014b57;
|
|
30
|
+
|
|
31
|
+
=> vvar_27756 = _InterlockedExchange(0x1400365f8, vvar_365{reg 112})
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
Case 2.
|
|
35
|
+
|
|
36
|
+
lock cmpxchg cs:g_WarbirdSecureFunctionsLock, r14d
|
|
37
|
+
|
|
38
|
+
CAS(0x1400365f8<64>, 0x1<32>, 0x0<32>, vvar_27751)
|
|
39
|
+
|
|
40
|
+
=> var_27751 = _InterlockedCompareExchange(0x1400365f8, 0x1<32>, 0x0<32>)
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
__slots__ = ()
|
|
44
|
+
|
|
45
|
+
NAME = "Rewrite compare-and-swap instructions into intrinsics."
|
|
46
|
+
stmt_classes = ((CAS, ConditionalJump), (CAS, Statement))
|
|
47
|
+
|
|
48
|
+
def optimize(self, stmts: list[Statement], stmt_idx: int | None = None, block=None, **kwargs):
|
|
49
|
+
assert len(stmts) == 2
|
|
50
|
+
cas_stmt = stmts[0]
|
|
51
|
+
next_stmt = stmts[1]
|
|
52
|
+
assert isinstance(cas_stmt, CAS)
|
|
53
|
+
|
|
54
|
+
# TODO: We ignored endianness. Are there cases where the endianness is different from the host's?
|
|
55
|
+
|
|
56
|
+
if (
|
|
57
|
+
isinstance(next_stmt, ConditionalJump)
|
|
58
|
+
and isinstance(next_stmt.condition, BinaryOp)
|
|
59
|
+
and next_stmt.condition.op == "CasCmpNE"
|
|
60
|
+
and next_stmt.ins_addr == cas_stmt.ins_addr
|
|
61
|
+
):
|
|
62
|
+
addr = cas_stmt.addr
|
|
63
|
+
if (
|
|
64
|
+
isinstance(cas_stmt.expd_lo, Load)
|
|
65
|
+
and cas_stmt.expd_lo.addr.likes(addr)
|
|
66
|
+
and isinstance(next_stmt.condition.operands[1], Load)
|
|
67
|
+
and next_stmt.condition.operands[1].addr.likes(addr)
|
|
68
|
+
and cas_stmt.old_lo.likes(next_stmt.condition.operands[0])
|
|
69
|
+
and cas_stmt.old_hi is None
|
|
70
|
+
):
|
|
71
|
+
# TODO: Support cases where cas_stmt.old_hi is not None
|
|
72
|
+
# Case 1
|
|
73
|
+
call_expr = Call(
|
|
74
|
+
cas_stmt.idx,
|
|
75
|
+
self._get_instrincs_name("xchg"),
|
|
76
|
+
args=[addr, cas_stmt.data_lo],
|
|
77
|
+
bits=cas_stmt.bits,
|
|
78
|
+
ins_addr=cas_stmt.ins_addr,
|
|
79
|
+
)
|
|
80
|
+
stmt = Assignment(cas_stmt.idx, cas_stmt.old_lo, call_expr, **cas_stmt.tags)
|
|
81
|
+
return [stmt]
|
|
82
|
+
|
|
83
|
+
if next_stmt.ins_addr <= cas_stmt.ins_addr:
|
|
84
|
+
# avoid matching against statements prematurely
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
if cas_stmt.old_hi is None:
|
|
88
|
+
# TODO: Support cases where cas_stmt.old_hi is not None
|
|
89
|
+
call_expr = Call(
|
|
90
|
+
cas_stmt.idx,
|
|
91
|
+
self._get_instrincs_name("cmpxchg"),
|
|
92
|
+
args=[
|
|
93
|
+
cas_stmt.addr,
|
|
94
|
+
cas_stmt.data_lo,
|
|
95
|
+
cas_stmt.expd_lo,
|
|
96
|
+
],
|
|
97
|
+
bits=cas_stmt.bits,
|
|
98
|
+
ins_addr=cas_stmt.ins_addr,
|
|
99
|
+
)
|
|
100
|
+
stmt = Assignment(cas_stmt.idx, cas_stmt.old_lo, call_expr, **cas_stmt.tags)
|
|
101
|
+
return [stmt, next_stmt]
|
|
102
|
+
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
def _get_instrincs_name(self, mnemonic: str) -> str:
|
|
106
|
+
if mnemonic in _INTRINSICS_NAMES:
|
|
107
|
+
os = (
|
|
108
|
+
self.project.simos.name
|
|
109
|
+
if self.project is not None and self.project.simos is not None and self.project.simos.name is not None
|
|
110
|
+
else "Linux"
|
|
111
|
+
)
|
|
112
|
+
if os not in _INTRINSICS_NAMES[mnemonic]:
|
|
113
|
+
os = "Linux"
|
|
114
|
+
return _INTRINSICS_NAMES[mnemonic][os]
|
|
115
|
+
return mnemonic
|