angr 9.2.149__py3-none-win_amd64.whl → 9.2.152__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (33) hide show
  1. angr/__init__.py +1 -1
  2. angr/__main__.py +100 -37
  3. angr/analyses/calling_convention/calling_convention.py +17 -9
  4. angr/analyses/decompiler/ccall_rewriters/amd64_ccalls.py +39 -0
  5. angr/analyses/decompiler/clinic.py +73 -1
  6. angr/analyses/decompiler/dephication/rewriting_engine.py +38 -1
  7. angr/analyses/decompiler/optimization_passes/condition_constprop.py +6 -0
  8. angr/analyses/decompiler/optimization_passes/engine_base.py +5 -0
  9. angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py +2 -1
  10. angr/analyses/decompiler/peephole_optimizations/__init__.py +2 -0
  11. angr/analyses/decompiler/peephole_optimizations/cas_intrinsics.py +115 -0
  12. angr/analyses/decompiler/ssailification/rewriting_engine.py +37 -1
  13. angr/analyses/decompiler/ssailification/traversal_engine.py +10 -1
  14. angr/analyses/decompiler/utils.py +17 -0
  15. angr/analyses/disassembly.py +2 -1
  16. angr/analyses/patchfinder.py +1 -1
  17. angr/analyses/reaching_definitions/engine_ail.py +20 -0
  18. angr/analyses/s_propagator.py +28 -0
  19. angr/analyses/stack_pointer_tracker.py +2 -1
  20. angr/analyses/typehoon/typehoon.py +4 -1
  21. angr/analyses/variable_recovery/engine_ail.py +9 -0
  22. angr/engines/light/engine.py +7 -0
  23. angr/engines/pcode/lifter.py +7 -0
  24. angr/lib/angr_native.dll +0 -0
  25. angr/storage/memory_mixins/clouseau_mixin.py +7 -1
  26. angr/utils/graph.py +61 -39
  27. angr/utils/ssa/__init__.py +6 -1
  28. {angr-9.2.149.dist-info → angr-9.2.152.dist-info}/METADATA +6 -6
  29. {angr-9.2.149.dist-info → angr-9.2.152.dist-info}/RECORD +33 -32
  30. {angr-9.2.149.dist-info → angr-9.2.152.dist-info}/WHEEL +1 -1
  31. {angr-9.2.149.dist-info → angr-9.2.152.dist-info}/entry_points.txt +0 -0
  32. {angr-9.2.149.dist-info → angr-9.2.152.dist-info}/licenses/LICENSE +0 -0
  33. {angr-9.2.149.dist-info → angr-9.2.152.dist-info}/top_level.txt +0 -0
angr/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
2
  # pylint: disable=wrong-import-position
3
3
  from __future__ import annotations
4
4
 
5
- __version__ = "9.2.149"
5
+ __version__ = "9.2.152"
6
6
 
7
7
  if bytes is str:
8
8
  raise Exception(
angr/__main__.py CHANGED
@@ -1,38 +1,93 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import argparse
4
- import sys
4
+ import logging
5
+ import re
6
+ from typing import TYPE_CHECKING
7
+ from collections.abc import Generator
5
8
 
9
+ import angr
6
10
  from angr.analyses.decompiler import DECOMPILATION_PRESETS
7
11
  from angr.analyses.decompiler.structuring import STRUCTURER_CLASSES, DEFAULT_STRUCTURER
8
12
  from angr.analyses.decompiler.utils import decompile_functions
9
13
 
10
14
 
11
- class COMMANDS:
15
+ if TYPE_CHECKING:
16
+ from angr.knowledge_plugins.functions import Function
17
+
18
+
19
+ log = logging.getLogger(__name__)
20
+
21
+
22
+ NUMERIC_ARG_RE = re.compile(r"^(0x)?[a-fA-F0-9]+$")
23
+
24
+
25
+ def parse_function_args(proj: angr.Project, func_args: list[str] | None) -> Generator[Function]:
12
26
  """
13
- The commands that the angr CLI supports.
27
+ Generate a sequence of functions in the project kb by their identifier in func_args.
28
+
29
+ :param proj: Project to query.
30
+ :param func_args: Sequence of function identifiers to query. None for all functions.
14
31
  """
32
+ if func_args is None:
33
+ yield from sorted(proj.kb.functions.values(), key=lambda f: f.addr)
34
+ return
15
35
 
16
- DECOMPILE = "decompile"
17
- ALL_COMMANDS = [DECOMPILE]
36
+ for func_arg in func_args:
37
+ if func_arg in proj.kb.functions:
38
+ yield proj.kb.functions[func_arg]
39
+ continue
18
40
 
41
+ if NUMERIC_ARG_RE.match(func_arg):
42
+ func_addr = int(func_arg, 0)
43
+ if func_addr in proj.kb.functions:
44
+ yield proj.kb.functions[func_addr]
45
+ continue
19
46
 
20
- def main(args=sys.argv[1:], out=sys.stdout):
21
- parser = argparse.ArgumentParser(description="The angr CLI allows you to decompile and analyze binaries.")
22
- parser.add_argument(
23
- "command",
24
- help="""
25
- The analysis type to run on the binary. All analysis is output to stdout.""",
26
- choices=COMMANDS.ALL_COMMANDS,
47
+ log.error('Function "%s" not found', func_arg)
48
+
49
+
50
+ def disassemble(args):
51
+ """
52
+ Disassemble functions.
53
+ """
54
+ loader_main_opts_kwargs = {}
55
+ if args.base_addr is not None:
56
+ loader_main_opts_kwargs["base_addr"] = args.base_addr
57
+
58
+ proj = angr.Project(args.binary, auto_load_libs=False, main_opts=loader_main_opts_kwargs)
59
+ proj.analyses.CFG(normalize=True, data_references=True)
60
+
61
+ for func in parse_function_args(proj, args.functions):
62
+ try:
63
+ if func.is_plt or func.is_syscall or func.is_alignment or func.is_simprocedure:
64
+ continue
65
+ func.pp(show_bytes=True, min_edge_depth=10)
66
+ except Exception as e: # pylint:disable=broad-exception-caught
67
+ if not args.catch_exceptions:
68
+ raise
69
+ log.exception(e)
70
+
71
+
72
+ def decompile(args):
73
+ """
74
+ Decompile functions.
75
+ """
76
+ decompilation = decompile_functions(
77
+ args.binary,
78
+ functions=args.functions,
79
+ structurer=args.structurer,
80
+ catch_errors=args.catch_exceptions,
81
+ show_casts=not args.no_casts,
82
+ base_address=args.base_addr,
83
+ preset=args.preset,
27
84
  )
85
+ print(decompilation)
86
+
87
+
88
+ def main():
89
+ parser = argparse.ArgumentParser(description="The angr CLI allows you to decompile and analyze binaries.")
28
90
  parser.add_argument("binary", help="The path to the binary to analyze.")
29
- parser.add_argument(
30
- "--functions",
31
- help="""
32
- The functions to analyze under the current command. Functions can either be expressed as names found in the
33
- symbols of the binary or as addresses like: 0x401000.""",
34
- nargs="+",
35
- )
36
91
  parser.add_argument(
37
92
  "--catch-exceptions",
38
93
  help="""
@@ -49,40 +104,48 @@ def main(args=sys.argv[1:], out=sys.stdout):
49
104
  type=lambda x: int(x, 0),
50
105
  default=None,
51
106
  )
52
- # decompilation-specific arguments
53
- parser.add_argument(
107
+ subparsers = parser.add_subparsers(metavar="command", required=True)
108
+
109
+ decompile_cmd_parser = subparsers.add_parser("decompile", aliases=["dec"], help=decompile.__doc__)
110
+ decompile_cmd_parser.set_defaults(func=decompile)
111
+ decompile_cmd_parser.add_argument(
54
112
  "--structurer",
55
113
  help="The structuring algorithm to use for decompilation.",
56
114
  choices=STRUCTURER_CLASSES.keys(),
57
115
  default=DEFAULT_STRUCTURER.NAME,
58
116
  )
59
- parser.add_argument(
117
+ decompile_cmd_parser.add_argument(
60
118
  "--no-casts",
61
119
  help="Do not show type casts in the decompiled output.",
62
120
  action="store_true",
63
121
  default=False,
64
122
  )
65
- parser.add_argument(
123
+ decompile_cmd_parser.add_argument(
66
124
  "--preset",
67
125
  help="The configuration preset to use for decompilation.",
68
126
  choices=DECOMPILATION_PRESETS,
69
127
  default="default",
70
128
  )
129
+ decompile_cmd_parser.add_argument(
130
+ "--functions",
131
+ help="""
132
+ The functions to decompile. Functions can either be expressed as names found in the
133
+ symbols of the binary or as addresses like: 0x401000.""",
134
+ nargs="+",
135
+ )
136
+
137
+ disassemble_cmd_parser = subparsers.add_parser("disassemble", aliases=["dis"], help=disassemble.__doc__)
138
+ disassemble_cmd_parser.set_defaults(func=disassemble)
139
+ disassemble_cmd_parser.add_argument(
140
+ "--functions",
141
+ help="""
142
+ The functions to disassemble. Functions can either be expressed as names found in the
143
+ symbols of the binary or as addresses like: 0x401000.""",
144
+ nargs="+",
145
+ )
71
146
 
72
- args = parser.parse_args(args)
73
- if args.command == COMMANDS.DECOMPILE:
74
- decompilation = decompile_functions(
75
- args.binary,
76
- functions=args.functions,
77
- structurer=args.structurer,
78
- catch_errors=args.catch_exceptions,
79
- show_casts=not args.no_casts,
80
- base_address=args.base_addr,
81
- preset=args.preset,
82
- )
83
- print(decompilation, file=out)
84
- else:
85
- parser.print_help(file=out)
147
+ args = parser.parse_args()
148
+ args.func(args)
86
149
 
87
150
 
88
151
  if __name__ == "__main__":
@@ -21,6 +21,7 @@ from angr.calling_conventions import (
21
21
  default_cc,
22
22
  SimCCMicrosoftThiscall,
23
23
  )
24
+ from angr.errors import SimTranslationError
24
25
  from angr.sim_type import (
25
26
  SimTypeCppFunction,
26
27
  SimTypeInt,
@@ -585,16 +586,23 @@ class CallingConventionAnalysis(Analysis):
585
586
  # include its successor.
586
587
 
587
588
  # Re-lift the target block
588
- dst_bb = self.project.factory.block(dst.addr, func.get_block_size(dst.addr), opt_level=1)
589
+ dst_block_size = func.get_block_size(dst.addr)
590
+ if dst_block_size is not None and dst_block_size > 0:
591
+ dst_bb = self.project.factory.block(dst.addr, dst_block_size, opt_level=1)
592
+ try:
593
+ vex_block = dst_bb.vex
594
+ except SimTranslationError:
595
+ # failed to lift the block
596
+ continue
589
597
 
590
- # If there is only one 'IMark' statement in vex --> the target block contains only direct jump
591
- if (
592
- len(dst_bb.vex.statements) == 1
593
- and dst_bb.vex.statements[0].tag == "Ist_IMark"
594
- and func.graph.out_degree(dst) == 1
595
- ):
596
- for _, jmp_dst, jmp_data in func_graph.out_edges(dst, data=True):
597
- subgraph.add_edge(dst, jmp_dst, **jmp_data)
598
+ # If there is only one 'IMark' statement in vex --> the target block contains only direct jump
599
+ if (
600
+ len(vex_block.statements) == 1
601
+ and vex_block.statements[0].tag == "Ist_IMark"
602
+ and func.graph.out_degree(dst) == 1
603
+ ):
604
+ for _, jmp_dst, jmp_data in func_graph.out_edges(dst, data=True):
605
+ subgraph.add_edge(dst, jmp_dst, **jmp_data)
598
606
 
599
607
  return subgraph
600
608
 
@@ -412,6 +412,45 @@ class AMD64CCallRewriter(CCallRewriterBase):
412
412
  )
413
413
  return Expr.Convert(None, r.bits, ccall.bits, False, r, **ccall.tags)
414
414
 
415
+ elif (
416
+ cond_v == AMD64_CondTypes["CondNS"]
417
+ and op_v
418
+ in {
419
+ AMD64_OpTypes["G_CC_OP_LOGICB"],
420
+ AMD64_OpTypes["G_CC_OP_LOGICW"],
421
+ AMD64_OpTypes["G_CC_OP_LOGICL"],
422
+ AMD64_OpTypes["G_CC_OP_LOGICQ"],
423
+ }
424
+ and isinstance(dep_2, Expr.Const)
425
+ and dep_2.value == 0
426
+ ):
427
+ # dep_1 >= 0
428
+ dep_1 = self._fix_size(
429
+ dep_1,
430
+ op_v,
431
+ AMD64_OpTypes["G_CC_OP_LOGICB"],
432
+ AMD64_OpTypes["G_CC_OP_LOGICW"],
433
+ AMD64_OpTypes["G_CC_OP_LOGICL"],
434
+ ccall.tags,
435
+ )
436
+ dep_2 = self._fix_size(
437
+ dep_2,
438
+ op_v,
439
+ AMD64_OpTypes["G_CC_OP_LOGICB"],
440
+ AMD64_OpTypes["G_CC_OP_LOGICW"],
441
+ AMD64_OpTypes["G_CC_OP_LOGICL"],
442
+ ccall.tags,
443
+ )
444
+
445
+ r = Expr.BinaryOp(
446
+ ccall.idx,
447
+ "CmpGE",
448
+ (dep_1, dep_2),
449
+ True,
450
+ **ccall.tags,
451
+ )
452
+ return Expr.Convert(None, r.bits, ccall.bits, False, r, **ccall.tags)
453
+
415
454
  elif ccall.callee == "amd64g_calculate_rflags_c":
416
455
  # calculate the carry flag
417
456
  op = ccall.operands[0]
@@ -483,6 +483,9 @@ class Clinic(Analysis):
483
483
  arg_vvars = self._create_function_argument_vvars(arg_list)
484
484
  func_args = {arg_vvar for arg_vvar, _ in arg_vvars.values()}
485
485
 
486
+ # duplicate orphaned conditional jump blocks
487
+ ail_graph = self._duplicate_orphaned_cond_jumps(ail_graph)
488
+
486
489
  # Transform the graph into partial SSA form
487
490
  self._update_progress(35.0, text="Transforming to partial-SSA form")
488
491
  ail_graph = self._transform_to_ssa_level0(ail_graph, func_args)
@@ -1892,6 +1895,19 @@ class Clinic(Analysis):
1892
1895
  self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, stmt.dst)
1893
1896
  self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, stmt.src)
1894
1897
 
1898
+ elif stmt_type is ailment.Stmt.CAS:
1899
+ for expr in [
1900
+ stmt.addr,
1901
+ stmt.data_lo,
1902
+ stmt.data_hi,
1903
+ stmt.expd_lo,
1904
+ stmt.expd_hi,
1905
+ stmt.old_lo,
1906
+ stmt.old_hi,
1907
+ ]:
1908
+ if expr is not None:
1909
+ self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, expr)
1910
+
1895
1911
  elif stmt_type is ailment.Stmt.ConditionalJump:
1896
1912
  self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, stmt.condition)
1897
1913
 
@@ -2123,6 +2139,45 @@ class Clinic(Analysis):
2123
2139
 
2124
2140
  return graph
2125
2141
 
2142
+ @staticmethod
2143
+ def _duplicate_orphaned_cond_jumps(ail_graph) -> networkx.DiGraph:
2144
+ """
2145
+ Find conditional jumps that are orphaned (e.g., being the only instruction of the block). If these blocks have
2146
+ multiple predecessors, duplicate them to all predecessors. This is a workaround for cases where these
2147
+ conditional jumps rely on comparisons in more than one predecessor and we cannot resolve ccalls into
2148
+ comparisons.
2149
+
2150
+ This pass runs before any SSA transformations.
2151
+
2152
+ # 140017162 jz short 1400171e1
2153
+ """
2154
+
2155
+ for block in list(ail_graph):
2156
+ if len(block.statements) > 1 and block.statements[0].ins_addr == block.statements[-1].ins_addr:
2157
+ preds = list(ail_graph.predecessors(block))
2158
+ if len(preds) > 1 and block not in preds:
2159
+ has_ccall = any(
2160
+ isinstance(stmt, ailment.Stmt.Assignment)
2161
+ and isinstance(stmt.src, ailment.Expr.VEXCCallExpression)
2162
+ for stmt in block.statements
2163
+ )
2164
+ if has_ccall:
2165
+ # duplicate this block to its predecessors!
2166
+ preds = sorted(preds, key=lambda x: x.addr)
2167
+ succs = sorted(ail_graph.successors(block), key=lambda x: x.addr)
2168
+ # FIXME: We should track block IDs globally and ensure block IDs do not collide
2169
+ block_idx_start = block.idx + 1 if block.idx is not None else 1
2170
+ for pred in preds[1:]:
2171
+ ail_graph.remove_edge(pred, block)
2172
+ new_block = block.copy()
2173
+ new_block.idx = block_idx_start
2174
+ block_idx_start += 1
2175
+ ail_graph.add_edge(pred, new_block)
2176
+ for succ in succs:
2177
+ ail_graph.add_edge(new_block, succ if succ is not block else new_block)
2178
+
2179
+ return ail_graph
2180
+
2126
2181
  def _rewrite_ite_expressions(self, ail_graph):
2127
2182
  cfg = self._cfg
2128
2183
  for block in list(ail_graph):
@@ -2130,11 +2185,16 @@ class Clinic(Analysis):
2130
2185
  continue
2131
2186
 
2132
2187
  ite_ins_addrs = []
2188
+ cas_ins_addrs = set()
2133
2189
  for stmt in block.statements:
2134
- if (
2190
+ if isinstance(stmt, ailment.Stmt.CAS):
2191
+ # we do not rewrite ITE statements that are caused by CAS statements
2192
+ cas_ins_addrs.add(stmt.ins_addr)
2193
+ elif (
2135
2194
  isinstance(stmt, ailment.Stmt.Assignment)
2136
2195
  and isinstance(stmt.src, ailment.Expr.ITE)
2137
2196
  and stmt.ins_addr not in ite_ins_addrs
2197
+ and stmt.ins_addr not in cas_ins_addrs
2138
2198
  ):
2139
2199
  ite_ins_addrs.append(stmt.ins_addr)
2140
2200
 
@@ -2998,6 +3058,12 @@ class Clinic(Analysis):
2998
3058
  and last_stmt.addr.offset < 0
2999
3059
  and isinstance(last_stmt.data, ailment.Expr.Const)
3000
3060
  and last_stmt.data.value == succ.addr
3061
+ ) or (
3062
+ isinstance(last_stmt, ailment.Stmt.Assignment)
3063
+ and last_stmt.dst.was_stack
3064
+ and last_stmt.dst.stack_offset < 0
3065
+ and isinstance(last_stmt.src, ailment.Expr.Const)
3066
+ and last_stmt.src.value == succ.addr
3001
3067
  ):
3002
3068
  # remove the statement that pushes the return address
3003
3069
  node.statements = node.statements[:-1]
@@ -3031,6 +3097,12 @@ class Clinic(Analysis):
3031
3097
  and last_stmt.addr.offset < 0
3032
3098
  and isinstance(last_stmt.data, ailment.Expr.Const)
3033
3099
  and last_stmt.data.value == succ.addr
3100
+ ) or (
3101
+ isinstance(last_stmt, ailment.Stmt.Assignment)
3102
+ and last_stmt.dst.was_stack
3103
+ and last_stmt.dst.stack_offset < 0
3104
+ and isinstance(last_stmt.src, ailment.Expr.Const)
3105
+ and last_stmt.src.value == succ.addr
3034
3106
  ):
3035
3107
  # remove the statement that pushes the return address
3036
3108
  node.statements = node.statements[:-1]
@@ -1,4 +1,4 @@
1
- # pylint:disable=unused-argument,no-self-use
1
+ # pylint:disable=unused-argument,no-self-use,too-many-boolean-expressions
2
2
  from __future__ import annotations
3
3
  import logging
4
4
 
@@ -8,12 +8,14 @@ from ailment.statement import (
8
8
  Assignment,
9
9
  Store,
10
10
  Call,
11
+ CAS,
11
12
  Return,
12
13
  ConditionalJump,
13
14
  DirtyStatement,
14
15
  WeakAssignment,
15
16
  )
16
17
  from ailment.expression import (
18
+ Atom,
17
19
  Expression,
18
20
  VirtualVariable,
19
21
  Load,
@@ -121,6 +123,40 @@ class SimEngineDephiRewriting(SimEngineNostmtAIL[None, Expression | None, Statem
121
123
  )
122
124
  return None
123
125
 
126
+ def _handle_stmt_CAS(self, stmt: CAS) -> CAS | None:
127
+ new_addr = self._expr(stmt.addr)
128
+ new_data_lo = self._expr(stmt.data_lo)
129
+ new_data_hi = self._expr(stmt.data_hi) if stmt.data_hi is not None else None
130
+ new_expd_lo = self._expr(stmt.expd_lo)
131
+ new_expd_hi = self._expr(stmt.expd_hi) if stmt.expd_hi is not None else None
132
+ new_old_lo = self._expr(stmt.old_lo)
133
+ new_old_hi = self._expr(stmt.old_hi) if stmt.old_hi is not None else None
134
+ assert new_old_lo is None or isinstance(new_old_lo, Atom)
135
+ assert new_old_hi is None or isinstance(new_old_hi, Atom)
136
+
137
+ if (
138
+ new_addr is not None
139
+ or new_old_lo is not None
140
+ or new_old_hi is not None
141
+ or new_data_lo is not None
142
+ or new_data_hi is not None
143
+ or new_expd_lo is not None
144
+ or new_expd_hi is not None
145
+ ):
146
+ return CAS(
147
+ stmt.idx,
148
+ stmt.addr if new_addr is None else new_addr,
149
+ stmt.data_lo if new_data_lo is None else new_data_lo,
150
+ stmt.data_hi if new_data_hi is None else new_data_hi,
151
+ stmt.expd_lo if new_expd_lo is None else new_expd_lo,
152
+ stmt.expd_hi if new_expd_hi is None else new_expd_hi,
153
+ stmt.old_lo if new_old_lo is None else new_old_lo,
154
+ stmt.old_hi if new_old_hi is None else new_old_hi,
155
+ stmt.endness,
156
+ **stmt.tags,
157
+ )
158
+ return None
159
+
124
160
  def _handle_stmt_Store(self, stmt):
125
161
  new_addr = self._expr(stmt.addr)
126
162
  new_data = self._expr(stmt.data)
@@ -179,6 +215,7 @@ class SimEngineDephiRewriting(SimEngineNostmtAIL[None, Expression | None, Statem
179
215
  dirty = self._expr(stmt.dirty)
180
216
  if dirty is None or dirty is stmt.dirty:
181
217
  return None
218
+ assert isinstance(dirty, DirtyExpression)
182
219
  return DirtyStatement(stmt.idx, dirty, **stmt.tags)
183
220
 
184
221
  def _handle_expr_Load(self, expr):
@@ -107,6 +107,12 @@ class ConditionConstantPropagation(OptimizationPass):
107
107
  cconds_by_src[src] = []
108
108
  cconds_by_src[src].append(ccond)
109
109
 
110
+ # eliminate sources with more than one in-edges; this is because the condition may not hold on all in-edges!
111
+ for src in list(cconds_by_src):
112
+ block = self._get_block(src[0], idx=src[1])
113
+ if block is not None and block in self._graph and self._graph.in_degree[block] > 1:
114
+ del cconds_by_src[src]
115
+
110
116
  # eliminate conflicting conditions
111
117
  for src in list(cconds_by_src):
112
118
  cconds = cconds_by_src[src]
@@ -86,6 +86,11 @@ class SimplifierAILEngine(
86
86
 
87
87
  return stmt
88
88
 
89
+ def _handle_stmt_CAS(self, stmt: ailment.statement.CAS) -> ailment.statement.CAS:
90
+ # we assume that we never have to deal with CAS statements at this point; they should have been rewritten to
91
+ # intrinsics
92
+ return stmt
93
+
89
94
  def _handle_stmt_Store(self, stmt):
90
95
  addr = self._expr(stmt.addr)
91
96
  data = self._expr(stmt.data)
@@ -8,6 +8,7 @@ from ailment.expression import Op
8
8
  from angr.analyses.decompiler.structuring.structurer_nodes import ConditionNode
9
9
  from angr.analyses.decompiler.utils import (
10
10
  structured_node_is_simple_return,
11
+ structured_node_is_simple_return_strict,
11
12
  sequence_to_statements,
12
13
  structured_node_has_multi_predecessors,
13
14
  )
@@ -44,7 +45,7 @@ class FlipBooleanWalker(SequenceWalker):
44
45
  and node.true_node is not None
45
46
  and node.false_node is None
46
47
  and idx < len(seq_node.nodes) - 1
47
- and structured_node_is_simple_return(seq_node.nodes[idx + 1], self._graph)
48
+ and structured_node_is_simple_return_strict(seq_node.nodes[idx + 1])
48
49
  and node not in type1_condition_nodes
49
50
  ):
50
51
  # Type 2: Special Filter:
@@ -8,6 +8,7 @@ from .a_sub_a_div_const_mul_const import ASubADivConstMulConst
8
8
  from .a_sub_a_shr_const_shr_const import ASubAShrConstShrConst
9
9
  from .arm_cmpf import ARMCmpF
10
10
  from .bswap import Bswap
11
+ from .cas_intrinsics import CASIntrinsics
11
12
  from .coalesce_same_cascading_ifs import CoalesceSameCascadingIfs
12
13
  from .constant_derefs import ConstantDereferences
13
14
  from .const_mull_a_shift import ConstMullAShift
@@ -64,6 +65,7 @@ ALL_PEEPHOLE_OPTS: list[type[PeepholeOptimizationExprBase]] = [
64
65
  ASubAShrConstShrConst,
65
66
  ARMCmpF,
66
67
  Bswap,
68
+ CASIntrinsics,
67
69
  CoalesceSameCascadingIfs,
68
70
  ConstantDereferences,
69
71
  ConstMullAShift,
@@ -0,0 +1,115 @@
1
+ # pylint:disable=arguments-differ,too-many-boolean-expressions
2
+ from __future__ import annotations
3
+
4
+ from ailment.expression import BinaryOp, Load
5
+ from ailment.statement import CAS, ConditionalJump, Statement, Assignment, Call
6
+
7
+ from .base import PeepholeOptimizationMultiStmtBase
8
+
9
+
10
+ _INTRINSICS_NAMES = {
11
+ "xchg": {"Win32": "InterlockedExchange", "Linux": "atomic_exchange"},
12
+ "cmpxchg": {"Win32": "InterlockedCompareExchange", "Linux": "atomic_compare_exchange"},
13
+ }
14
+
15
+
16
+ class CASIntrinsics(PeepholeOptimizationMultiStmtBase):
17
+ """
18
+ Rewrite lock-prefixed instructions (or rather, their VEX/AIL forms) into intrinsic calls.
19
+
20
+ Case 1.
21
+
22
+ mov eax, r12d
23
+ 0x140014b57: xchg eax, [0x14000365f8]
24
+
25
+ LABEL_0x140014b57:
26
+ CAS(0x1400365f8<64>, Conv(64->32, vvar_365{reg 112}), Load(addr=0x1400365f8<64>, size=4, endness=Iend_LE),
27
+ vvar_27756)
28
+ if (CasCmpNE(vvar_27756, g_1400365f8))
29
+ goto LABEL_0x140014b57;
30
+
31
+ => vvar_27756 = _InterlockedExchange(0x1400365f8, vvar_365{reg 112})
32
+
33
+
34
+ Case 2.
35
+
36
+ lock cmpxchg cs:g_WarbirdSecureFunctionsLock, r14d
37
+
38
+ CAS(0x1400365f8<64>, 0x1<32>, 0x0<32>, vvar_27751)
39
+
40
+ => var_27751 = _InterlockedCompareExchange(0x1400365f8, 0x1<32>, 0x0<32>)
41
+ """
42
+
43
+ __slots__ = ()
44
+
45
+ NAME = "Rewrite compare-and-swap instructions into intrinsics."
46
+ stmt_classes = ((CAS, ConditionalJump), (CAS, Statement))
47
+
48
+ def optimize(self, stmts: list[Statement], stmt_idx: int | None = None, block=None, **kwargs):
49
+ assert len(stmts) == 2
50
+ cas_stmt = stmts[0]
51
+ next_stmt = stmts[1]
52
+ assert isinstance(cas_stmt, CAS)
53
+
54
+ # TODO: We ignored endianness. Are there cases where the endianness is different from the host's?
55
+
56
+ if (
57
+ isinstance(next_stmt, ConditionalJump)
58
+ and isinstance(next_stmt.condition, BinaryOp)
59
+ and next_stmt.condition.op == "CasCmpNE"
60
+ and next_stmt.ins_addr == cas_stmt.ins_addr
61
+ ):
62
+ addr = cas_stmt.addr
63
+ if (
64
+ isinstance(cas_stmt.expd_lo, Load)
65
+ and cas_stmt.expd_lo.addr.likes(addr)
66
+ and isinstance(next_stmt.condition.operands[1], Load)
67
+ and next_stmt.condition.operands[1].addr.likes(addr)
68
+ and cas_stmt.old_lo.likes(next_stmt.condition.operands[0])
69
+ and cas_stmt.old_hi is None
70
+ ):
71
+ # TODO: Support cases where cas_stmt.old_hi is not None
72
+ # Case 1
73
+ call_expr = Call(
74
+ cas_stmt.idx,
75
+ self._get_instrincs_name("xchg"),
76
+ args=[addr, cas_stmt.data_lo],
77
+ bits=cas_stmt.bits,
78
+ ins_addr=cas_stmt.ins_addr,
79
+ )
80
+ stmt = Assignment(cas_stmt.idx, cas_stmt.old_lo, call_expr, **cas_stmt.tags)
81
+ return [stmt]
82
+
83
+ if next_stmt.ins_addr <= cas_stmt.ins_addr:
84
+ # avoid matching against statements prematurely
85
+ return None
86
+
87
+ if cas_stmt.old_hi is None:
88
+ # TODO: Support cases where cas_stmt.old_hi is not None
89
+ call_expr = Call(
90
+ cas_stmt.idx,
91
+ self._get_instrincs_name("cmpxchg"),
92
+ args=[
93
+ cas_stmt.addr,
94
+ cas_stmt.data_lo,
95
+ cas_stmt.expd_lo,
96
+ ],
97
+ bits=cas_stmt.bits,
98
+ ins_addr=cas_stmt.ins_addr,
99
+ )
100
+ stmt = Assignment(cas_stmt.idx, cas_stmt.old_lo, call_expr, **cas_stmt.tags)
101
+ return [stmt, next_stmt]
102
+
103
+ return None
104
+
105
+ def _get_instrincs_name(self, mnemonic: str) -> str:
106
+ if mnemonic in _INTRINSICS_NAMES:
107
+ os = (
108
+ self.project.simos.name
109
+ if self.project is not None and self.project.simos is not None and self.project.simos.name is not None
110
+ else "Linux"
111
+ )
112
+ if os not in _INTRINSICS_NAMES[mnemonic]:
113
+ os = "Linux"
114
+ return _INTRINSICS_NAMES[mnemonic][os]
115
+ return mnemonic