angr 9.2.93__py3-none-macosx_10_9_x86_64.whl → 9.2.94__py3-none-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (34) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/cfg/cfg_base.py +20 -10
  3. angr/analyses/cfg/indirect_jump_resolvers/amd64_elf_got.py +1 -1
  4. angr/analyses/cfg/indirect_jump_resolvers/arm_elf_fast.py +89 -32
  5. angr/analyses/decompiler/ail_simplifier.py +20 -0
  6. angr/analyses/decompiler/callsite_maker.py +5 -0
  7. angr/analyses/decompiler/clinic.py +103 -1
  8. angr/analyses/decompiler/decompilation_cache.py +2 -0
  9. angr/analyses/decompiler/decompiler.py +21 -4
  10. angr/analyses/decompiler/optimization_passes/code_motion.py +8 -3
  11. angr/analyses/decompiler/optimization_passes/optimization_pass.py +1 -0
  12. angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +30 -18
  13. angr/analyses/decompiler/peephole_optimizations/bswap.py +53 -2
  14. angr/analyses/decompiler/peephole_optimizations/eager_eval.py +20 -1
  15. angr/analyses/decompiler/structured_codegen/c.py +56 -40
  16. angr/analyses/propagator/engine_ail.py +3 -0
  17. angr/analyses/reaching_definitions/engine_ail.py +3 -0
  18. angr/analyses/reaching_definitions/reaching_definitions.py +7 -0
  19. angr/analyses/stack_pointer_tracker.py +60 -10
  20. angr/analyses/typehoon/simple_solver.py +19 -16
  21. angr/calling_conventions.py +0 -3
  22. angr/engines/pcode/cc.py +1 -1
  23. angr/engines/successors.py +6 -0
  24. angr/knowledge_plugins/propagations/states.py +2 -1
  25. angr/lib/angr_native.dylib +0 -0
  26. angr/procedures/definitions/glibc.py +3 -1
  27. angr/sim_type.py +2 -0
  28. angr/utils/library.py +1 -1
  29. {angr-9.2.93.dist-info → angr-9.2.94.dist-info}/METADATA +6 -6
  30. {angr-9.2.93.dist-info → angr-9.2.94.dist-info}/RECORD +34 -34
  31. {angr-9.2.93.dist-info → angr-9.2.94.dist-info}/WHEEL +1 -1
  32. {angr-9.2.93.dist-info → angr-9.2.94.dist-info}/LICENSE +0 -0
  33. {angr-9.2.93.dist-info → angr-9.2.94.dist-info}/entry_points.txt +0 -0
  34. {angr-9.2.93.dist-info → angr-9.2.94.dist-info}/top_level.txt +0 -0
angr/__init__.py CHANGED
@@ -1,7 +1,7 @@
1
1
  # pylint: disable=wildcard-import
2
2
  # pylint: disable=wrong-import-position
3
3
 
4
- __version__ = "9.2.93"
4
+ __version__ = "9.2.94"
5
5
 
6
6
  if bytes is str:
7
7
  raise Exception(
@@ -8,7 +8,7 @@ from sortedcontainers import SortedDict
8
8
 
9
9
  import pyvex
10
10
  from claripy.utils.orderedset import OrderedSet
11
- from cle import ELF, PE, Blob, TLSObject, MachO, ExternObject, KernelObject, FunctionHintSource, Hex, Coff
11
+ from cle import ELF, PE, Blob, TLSObject, MachO, ExternObject, KernelObject, FunctionHintSource, Hex, Coff, SRec
12
12
  from cle.backends import NamedRegion
13
13
  import archinfo
14
14
  from archinfo.arch_soot import SootAddressDescriptor
@@ -748,19 +748,29 @@ class CFGBase(Analysis):
748
748
  for b in binaries:
749
749
  if isinstance(b, ELF):
750
750
  # If we have sections, we get result from sections
751
+ sections = []
751
752
  if not force_segment and b.sections:
752
753
  # Get all executable sections
753
754
  for section in b.sections:
754
755
  if section.is_executable:
755
756
  tpl = (section.min_addr, section.max_addr + 1)
756
- memory_regions.append(tpl)
757
-
758
- else:
759
- # Get all executable segments
760
- for segment in b.segments:
761
- if segment.is_executable:
762
- tpl = (segment.min_addr, segment.max_addr + 1)
763
- memory_regions.append(tpl)
757
+ sections.append(tpl)
758
+ memory_regions += sections
759
+
760
+ segments = []
761
+ # Get all executable segments
762
+ for segment in b.segments:
763
+ if segment.is_executable:
764
+ tpl = (segment.min_addr, segment.max_addr + 1)
765
+ segments.append(tpl)
766
+ if sections and segments:
767
+ # are there executable segments with no sections inside?
768
+ for segment in segments:
769
+ for section in sections:
770
+ if segment[0] <= section[0] < segment[1]:
771
+ break
772
+ else:
773
+ memory_regions.append(segment)
764
774
 
765
775
  elif isinstance(b, (Coff, PE)):
766
776
  for section in b.sections:
@@ -778,7 +788,7 @@ class CFGBase(Analysis):
778
788
  tpl = (section.min_addr, section.max_addr + 1)
779
789
  memory_regions.append(tpl)
780
790
 
781
- elif isinstance(b, Hex):
791
+ elif isinstance(b, (Hex, SRec)):
782
792
  if b.regions:
783
793
  for region_addr, region_size in b.regions:
784
794
  memory_regions.append((region_addr, region_addr + region_size))
@@ -18,7 +18,7 @@ class AMD64ElfGotResolver(IndirectJumpResolver):
18
18
  super().__init__(project, timeless=True)
19
19
 
20
20
  def filter(self, cfg, addr, func_addr, block, jumpkind):
21
- if jumpkind != "Ijk_Call":
21
+ if not (jumpkind == "Ijk_Call" or (jumpkind == "Ijk_Boring" and addr == func_addr)):
22
22
  return False
23
23
  return True
24
24
 
@@ -12,11 +12,7 @@ _l = logging.getLogger(name=__name__)
12
12
 
13
13
  class ArmElfFastResolver(IndirectJumpResolver):
14
14
  """
15
- Resolves the indirect jump in ARM ELF binaries where all internal function calls are performed in the following
16
- manner::
17
-
18
- ldr r3, [pc+#0x124] ; load a constant from the constant_pool
19
- blx r3
15
+ Resolves indirect jumps in ARM ELF binaries
20
16
  """
21
17
 
22
18
  def __init__(self, project):
@@ -29,6 +25,89 @@ class ArmElfFastResolver(IndirectJumpResolver):
29
25
  return False
30
26
  return True
31
27
 
28
+ def _resolve_default(self, stmt, block, source, cfg, blade):
29
+ """
30
+ Resolves the indirect jump in ARM ELF binaries where all internal function calls are performed in the following
31
+ manner::
32
+
33
+ ldr r3, [pc+#0x124] ; load a constant from the constant_pool
34
+ blx r3
35
+ """
36
+
37
+ if not isinstance(stmt.data, pyvex.IRExpr.Load):
38
+ return False, []
39
+ if not isinstance(stmt.data.addr, pyvex.IRExpr.Const):
40
+ return False, []
41
+ load_addr = stmt.data.addr.con.value
42
+ load_size = stmt.data.result_size(block.tyenv) // 8
43
+ endness = archinfo.Endness.BE if stmt.data.endness == "Iend_BE" else archinfo.Endness.LE
44
+
45
+ # the next statement should be the default exit
46
+ next_target = next(iter(blade.slice.successors(source)))
47
+
48
+ if not (next_target[0] == block.addr and next_target[1] == DEFAULT_STATEMENT):
49
+ return False, []
50
+ next_tmp = block.next
51
+ if next_tmp.tmp != stmt.tmp:
52
+ return False, []
53
+
54
+ # load the address to jump to
55
+ try:
56
+ target_addr = self.project.loader.memory.unpack_word(load_addr, size=load_size, endness=endness)
57
+ if cfg.tag == "CFGFast":
58
+ cfg._seg_list.occupy(load_addr, load_size, "pointer-array")
59
+ except KeyError:
60
+ return False, []
61
+
62
+ return True, [target_addr]
63
+
64
+ def _resolve_put(self, stmt, block, source, cfg, blade):
65
+ """
66
+ Resolves the indirect jump in ARM ELF binaries where all internal function calls are performed in the following
67
+ manner::
68
+
69
+ add ip, pc, #0x100000
70
+ add ip, ip, #0x1e000
71
+ ldr pc, [ip,#0x884]!
72
+ """
73
+
74
+ # Get the value of r12 register
75
+ if not isinstance(stmt.data, pyvex.IRExpr.Const):
76
+ return False, []
77
+ if not self.project.arch.register_names[stmt.offset] == "r12":
78
+ return False, []
79
+ load_addr = stmt.data.con.value
80
+ load_size = stmt.data.result_size(block.tyenv) // 8
81
+ endness = self.project.arch.default_endness
82
+
83
+ count = 0
84
+ for next_stmt in block.statements:
85
+ if (
86
+ isinstance(next_stmt, pyvex.IRStmt.WrTmp)
87
+ and isinstance(next_stmt.data, pyvex.IRExpr.Binop)
88
+ and "Add" in next_stmt.data.op
89
+ ):
90
+ load_addr += next_stmt.constants[0].value
91
+ count += 1
92
+
93
+ if count != 2:
94
+ return False, []
95
+
96
+ next_target = next(iter(blade.slice.successors(source)))
97
+
98
+ if not next_target[0] == block.addr:
99
+ return False, []
100
+
101
+ # load the address to jump to
102
+ try:
103
+ target_addr = self.project.loader.memory.unpack_word(load_addr, size=load_size, endness=endness)
104
+ if cfg.tag == "CFGFast":
105
+ cfg._seg_list.occupy(load_addr, load_size, "pointer-array")
106
+ except KeyError:
107
+ return False, []
108
+
109
+ return True, [target_addr]
110
+
32
111
  def resolve( # pylint:disable=unused-argument
33
112
  self, cfg, addr, func_addr, block, jumpkind, func_graph_complete: bool = True, **kwargs
34
113
  ):
@@ -64,31 +143,9 @@ class ArmElfFastResolver(IndirectJumpResolver):
64
143
  return False, []
65
144
 
66
145
  stmt = block.statements[stmt_idx]
67
- if not isinstance(stmt, pyvex.IRStmt.WrTmp):
146
+ if isinstance(stmt, pyvex.IRStmt.WrTmp):
147
+ return self._resolve_default(stmt, block, source, cfg, b)
148
+ elif isinstance(stmt, pyvex.IRStmt.Put):
149
+ return self._resolve_put(stmt, block, source, cfg, b)
150
+ else:
68
151
  return False, []
69
- if not isinstance(stmt.data, pyvex.IRExpr.Load):
70
- return False, []
71
- if not isinstance(stmt.data.addr, pyvex.IRExpr.Const):
72
- return False, []
73
- load_addr = stmt.data.addr.con.value
74
- load_size = stmt.data.result_size(block.tyenv) // 8
75
- endness = archinfo.Endness.BE if stmt.data.endness == "Iend_BE" else archinfo.Endness.LE
76
-
77
- # the next statement should be the default exit
78
- next_target = next(iter(b.slice.successors(source)))
79
-
80
- if not (next_target[0] == block.addr and next_target[1] == DEFAULT_STATEMENT):
81
- return False, []
82
- next_tmp = block.next
83
- if next_tmp.tmp != stmt.tmp:
84
- return False, []
85
-
86
- # load the address to jump to
87
- try:
88
- target_addr = self.project.loader.memory.unpack_word(load_addr, size=load_size, endness=endness)
89
- if cfg.tag == "CFGFast":
90
- cfg._seg_list.occupy(load_addr, load_size, "pointer-array")
91
- except KeyError:
92
- return False, []
93
-
94
- return True, [target_addr]
@@ -23,6 +23,7 @@ from ...code_location import CodeLocation, ExternalCodeLocation
23
23
  from ...sim_variable import SimStackVariable, SimMemoryVariable
24
24
  from ...knowledge_plugins.propagations.states import Equivalence
25
25
  from ...knowledge_plugins.key_definitions import atoms
26
+ from ...knowledge_plugins.key_definitions.atoms import Register as RegisterAtom
26
27
  from ...knowledge_plugins.key_definitions.definition import Definition
27
28
  from ...knowledge_plugins.key_definitions.constants import OP_BEFORE
28
29
  from .. import Analysis, AnalysesHub
@@ -856,6 +857,25 @@ class AILSimplifier(Analysis):
856
857
  all_uses_replaced = True
857
858
  for def_, use_and_expr in all_uses_with_def:
858
859
  u, used_expr = use_and_expr
860
+
861
+ use_expr_defns = []
862
+ for d in rd.all_uses.get_uses_by_location(u):
863
+ if (
864
+ isinstance(d.atom, RegisterAtom)
865
+ and isinstance(def_.atom, RegisterAtom)
866
+ and d.atom.reg_offset == def_.atom.reg_offset
867
+ ):
868
+ use_expr_defns.append(d)
869
+ elif d.atom == def_.atom:
870
+ use_expr_defns.append(d)
871
+ # you can never replace a use with dependencies from outside the checked defn
872
+ if len(use_expr_defns) != 1 or list(use_expr_defns)[0] != def_:
873
+ if not use_expr_defns:
874
+ _l.warning("There was no use_expr_defns for %s, this is likely a bug", u)
875
+ # TODO: can you have multiple definitions which can all be eliminated?
876
+ all_uses_replaced = False
877
+ continue
878
+
859
879
  if u == eq.codeloc:
860
880
  # skip the very initial assignment location
861
881
  continue
@@ -49,6 +49,11 @@ class CallSiteMaker(Analysis):
49
49
  self.result_block = self.block
50
50
  return
51
51
 
52
+ if isinstance(last_stmt.target, str):
53
+ # custom function calls
54
+ self.result_block = self.block
55
+ return
56
+
52
57
  cc = None
53
58
  prototype = None
54
59
  func = None
@@ -6,6 +6,7 @@ from dataclasses import dataclass
6
6
  from typing import Dict, List, Tuple, Set, Optional, Iterable, Union, Type, Any, NamedTuple, TYPE_CHECKING
7
7
 
8
8
  import networkx
9
+ import capstone
9
10
 
10
11
  import ailment
11
12
 
@@ -262,6 +263,7 @@ class Clinic(Analysis):
262
263
  ail_graph = self._simplify_blocks(
263
264
  ail_graph, stack_pointer_tracker=spt, remove_dead_memdefs=False, cache=block_simplification_cache
264
265
  )
266
+ self._rewrite_alloca(ail_graph)
265
267
 
266
268
  # Run simplification passes
267
269
  self._update_progress(40.0, text="Running simplifications 1")
@@ -606,7 +608,12 @@ class Clinic(Analysis):
606
608
  regs = {self.project.arch.sp_offset}
607
609
  if hasattr(self.project.arch, "bp_offset") and self.project.arch.bp_offset is not None:
608
610
  regs.add(self.project.arch.bp_offset)
609
- spt = self.project.analyses.StackPointerTracker(self.function, regs, track_memory=self._sp_tracker_track_memory)
611
+
612
+ regs |= self._find_regs_compared_against_sp(self._func_graph)
613
+
614
+ spt = self.project.analyses.StackPointerTracker(
615
+ self.function, regs, track_memory=self._sp_tracker_track_memory, cross_insn_opt=False
616
+ )
610
617
  if spt.inconsistent_for(self.project.arch.sp_offset):
611
618
  l.warning("Inconsistency found during stack pointer tracking. Decompilation results might be incorrect.")
612
619
  return spt
@@ -1201,6 +1208,7 @@ class Clinic(Analysis):
1201
1208
 
1202
1209
  if self._cache is not None:
1203
1210
  self._cache.type_constraints = vr.type_constraints
1211
+ self._cache.func_typevar = vr.func_typevar
1204
1212
  self._cache.var_to_typevar = vr.var_to_typevars
1205
1213
 
1206
1214
  return tmp_kb
@@ -1877,5 +1885,99 @@ class Clinic(Analysis):
1877
1885
  AILGraphWalker(graph, handle_node, replace_nodes=True).walk()
1878
1886
  return graph
1879
1887
 
1888
+ def _find_regs_compared_against_sp(self, func_graph):
1889
+ # TODO: Implement this function for architectures beyond amd64
1890
+ extra_regs = set()
1891
+ if self.project.arch.name == "AMD64":
1892
+ for node in func_graph.nodes:
1893
+ block = self.project.factory.block(node.addr, size=node.size).capstone
1894
+ for insn in block.insns:
1895
+ if insn.mnemonic == "cmp":
1896
+ capstone_reg_offset = None
1897
+ if (
1898
+ insn.operands[0].type == capstone.x86.X86_OP_REG
1899
+ and insn.operands[0].reg == capstone.x86.X86_REG_RSP
1900
+ and insn.operands[1].type == capstone.x86.X86_OP_REG
1901
+ ):
1902
+ capstone_reg_offset = insn.operands[1].reg
1903
+ elif (
1904
+ insn.operands[1].type == capstone.x86.X86_OP_REG
1905
+ and insn.operands[1].reg == capstone.x86.X86_REG_RSP
1906
+ and insn.operands[0].type == capstone.x86.X86_OP_REG
1907
+ ):
1908
+ capstone_reg_offset = insn.operands[0].reg
1909
+
1910
+ if capstone_reg_offset is not None:
1911
+ reg_name = insn.reg_name(capstone_reg_offset)
1912
+ extra_regs.add(self.project.arch.registers[reg_name][0])
1913
+
1914
+ return extra_regs
1915
+
1916
+ def _rewrite_alloca(self, ail_graph):
1917
+ # pylint:disable=too-many-boolean-expressions
1918
+ alloca_node = None
1919
+ sp_equal_to = None
1920
+
1921
+ for node in ail_graph:
1922
+ if ail_graph.in_degree[node] == 2 and ail_graph.out_degree[node] == 2:
1923
+ succs = ail_graph.successors(node)
1924
+ if node in succs:
1925
+ # self loop!
1926
+ if len(node.statements) >= 6:
1927
+ stmt0 = node.statements[1] # skip the LABEL statement
1928
+ stmt1 = node.statements[2]
1929
+ last_stmt = node.statements[-1]
1930
+ if (
1931
+ isinstance(stmt0, ailment.Stmt.Assignment)
1932
+ and isinstance(stmt0.dst, ailment.Expr.Register)
1933
+ and isinstance(stmt0.src, ailment.Expr.StackBaseOffset)
1934
+ and stmt0.src.offset == -0x1000
1935
+ ):
1936
+ if (
1937
+ isinstance(stmt1, ailment.Stmt.Store)
1938
+ and isinstance(stmt1.addr, ailment.Expr.StackBaseOffset)
1939
+ and stmt1.addr.offset == -0x1000
1940
+ and isinstance(stmt1.data, ailment.Expr.Load)
1941
+ and isinstance(stmt1.data.addr, ailment.Expr.StackBaseOffset)
1942
+ and stmt1.data.addr.offset == -0x1000
1943
+ ):
1944
+ if (
1945
+ isinstance(last_stmt, ailment.Stmt.ConditionalJump)
1946
+ and isinstance(last_stmt.condition, ailment.Expr.BinaryOp)
1947
+ and last_stmt.condition.op == "CmpEQ"
1948
+ and isinstance(last_stmt.condition.operands[0], ailment.Expr.StackBaseOffset)
1949
+ and last_stmt.condition.operands[0].offset == -0x1000
1950
+ and isinstance(last_stmt.condition.operands[1], ailment.Expr.Register)
1951
+ and isinstance(last_stmt.false_target, ailment.Expr.Const)
1952
+ and last_stmt.false_target.value == node.addr
1953
+ ):
1954
+ # found it!
1955
+ alloca_node = node
1956
+ sp_equal_to = ailment.Expr.BinaryOp(
1957
+ None,
1958
+ "Sub",
1959
+ [
1960
+ ailment.Expr.Register(
1961
+ None, None, self.project.arch.sp_offset, self.project.arch.bits
1962
+ ),
1963
+ last_stmt.condition.operands[1],
1964
+ ],
1965
+ False,
1966
+ )
1967
+ break
1968
+
1969
+ if alloca_node is not None:
1970
+ stmt0 = alloca_node.statements[1]
1971
+ statements = [ailment.Stmt.Call(stmt0.idx, "alloca", args=[sp_equal_to], **stmt0.tags)]
1972
+ new_node = ailment.Block(alloca_node.addr, alloca_node.original_size, statements=statements)
1973
+ # replace the node
1974
+ preds = [pred for pred in ail_graph.predecessors(alloca_node) if pred is not alloca_node]
1975
+ succs = [succ for succ in ail_graph.successors(alloca_node) if succ is not alloca_node]
1976
+ ail_graph.remove_node(alloca_node)
1977
+ for pred in preds:
1978
+ ail_graph.add_edge(pred, new_node)
1979
+ for succ in succs:
1980
+ ail_graph.add_edge(new_node, succ)
1981
+
1880
1982
 
1881
1983
  register_analysis(Clinic, "Clinic")
@@ -15,6 +15,7 @@ class DecompilationCache:
15
15
  __slots__ = (
16
16
  "addr",
17
17
  "type_constraints",
18
+ "func_typevar",
18
19
  "var_to_typevar",
19
20
  "codegen",
20
21
  "clinic",
@@ -25,6 +26,7 @@ class DecompilationCache:
25
26
  def __init__(self, addr):
26
27
  self.addr = addr
27
28
  self.type_constraints: Optional[Set] = None
29
+ self.func_typevar = None
28
30
  self.var_to_typevar: Optional[Dict] = None
29
31
  self.codegen: Optional[BaseStructuredCodeGenerator] = None
30
32
  self.clinic: Optional[Clinic] = None
@@ -10,7 +10,7 @@ import ailment
10
10
  from angr.analyses.cfg import CFGFast
11
11
  from ...knowledge_plugins.functions.function import Function
12
12
  from ...knowledge_base import KnowledgeBase
13
- from ...sim_variable import SimMemoryVariable
13
+ from ...sim_variable import SimMemoryVariable, SimRegisterVariable, SimStackVariable
14
14
  from ...utils import timethis
15
15
  from .. import Analysis, AnalysesHub
16
16
  from .structuring import RecursiveStructurer, PhoenixStructurer
@@ -403,7 +403,7 @@ class Decompiler(Analysis):
403
403
  SimMemoryVariable(symbol.rebased_addr, 1, name=symbol.name, ident=ident),
404
404
  )
405
405
 
406
- def reflow_variable_types(self, type_constraints: Set, var_to_typevar: Dict, codegen):
406
+ def reflow_variable_types(self, type_constraints: Set, func_typevar, var_to_typevar: Dict, codegen):
407
407
  """
408
408
  Re-run type inference on an existing variable recovery result, then rerun codegen to generate new results.
409
409
 
@@ -439,13 +439,30 @@ class Decompiler(Analysis):
439
439
  try:
440
440
  tp = self.project.analyses.Typehoon(
441
441
  type_constraints,
442
+ func_typevar,
442
443
  kb=var_kb,
443
444
  var_mapping=var_to_typevar,
444
445
  must_struct=must_struct,
445
446
  ground_truth=groundtruth,
446
447
  )
447
- tp.update_variable_types(self.func.addr, var_to_typevar)
448
- tp.update_variable_types("global", var_to_typevar)
448
+ tp.update_variable_types(
449
+ self.func.addr,
450
+ {v: t for v, t in var_to_typevar.items() if isinstance(v, (SimRegisterVariable, SimStackVariable))},
451
+ )
452
+ tp.update_variable_types(
453
+ "global",
454
+ {v: t for v, t in var_to_typevar.items() if isinstance(v, (SimRegisterVariable, SimStackVariable))},
455
+ )
456
+ # update the function prototype if needed
457
+ if self.func.prototype is not None and self.func.prototype.args:
458
+ var_manager = var_kb.variables[self.func.addr]
459
+ for i, arg in enumerate(codegen.cfunc.arg_list):
460
+ if i >= len(self.func.prototype.args):
461
+ break
462
+ var = arg.variable
463
+ new_type = var_manager.get_variable_type(var)
464
+ if new_type is not None:
465
+ self.func.prototype.args[i] = new_type
449
466
  except Exception: # pylint:disable=broad-except
450
467
  l.warning(
451
468
  "Typehoon analysis failed. Variables will not have types. Please report to GitHub.", exc_info=True
@@ -3,7 +3,7 @@ from typing import Tuple, List, Optional, Dict
3
3
  import logging
4
4
 
5
5
  from ailment import Block
6
- from ailment.statement import Jump, ConditionalJump, Statement
6
+ from ailment.statement import Jump, ConditionalJump, Statement, DirtyStatement
7
7
  import networkx as nx
8
8
 
9
9
  from angr.analyses.decompiler.optimization_passes.optimization_pass import OptimizationPass, OptimizationPassStage
@@ -133,8 +133,13 @@ class CodeMotionOptimization(OptimizationPass):
133
133
  """
134
134
  # TODO: how can you handle an odd-numbered switch case? or many blocks with the same child?
135
135
  for b0, b1 in itertools.combinations(graph.nodes, 2):
136
- # ignore exact copies
137
- if b0 is b1 or not b0.statements or not b1.statements or is_similar(b0, b1):
136
+ if (
137
+ b0 is b1
138
+ or not b0.statements
139
+ or not b1.statements
140
+ or any(isinstance(stmt, DirtyStatement) for stmt in b0.statements + b1.statements)
141
+ or is_similar(b0, b1)
142
+ ):
138
143
  continue
139
144
 
140
145
  # TODO: add support for moving code to a shared parent block, which requires that we figure out how to
@@ -335,6 +335,7 @@ class StructuringOptimizationPass(OptimizationPass):
335
335
  simp = self.project.analyses.AILSimplifier(
336
336
  self._func,
337
337
  func_graph=graph,
338
+ use_callee_saved_regs_at_return=False,
338
339
  gp=self._func.info.get("gp", None) if self.project.arch.name in {"MIPS32", "MIPS64"} else None,
339
340
  )
340
341
  return simp.func_graph if simp.simplified else graph
@@ -178,24 +178,36 @@ class StackCanarySimplifier(OptimizationPass):
178
178
  # Done!
179
179
 
180
180
  def _find_canary_init_stmt(self):
181
- first_block = self._get_block(self._func.addr)
182
- if first_block is None:
183
- return None
184
-
185
- for idx, stmt in enumerate(first_block.statements):
186
- if (
187
- isinstance(stmt, ailment.Stmt.Store)
188
- and isinstance(stmt.addr, ailment.Expr.StackBaseOffset)
189
- and isinstance(stmt.data, ailment.Expr.Load)
190
- and self._is_add(stmt.data.addr)
191
- ):
192
- # Check addr: must be fs+0x28
193
- op0, op1 = stmt.data.addr.operands
194
- if isinstance(op1, ailment.Expr.Register):
195
- op0, op1 = op1, op0
196
- if isinstance(op0, ailment.Expr.Register) and isinstance(op1, ailment.Expr.Const):
197
- if op0.reg_offset == self.project.arch.get_register_offset("fs") and op1.value == 0x28:
198
- return first_block, idx
181
+ block_addr = self._func.addr
182
+ traversed = set()
183
+
184
+ while True:
185
+ traversed.add(block_addr)
186
+ first_block = self._get_block(block_addr)
187
+ if first_block is None:
188
+ break
189
+
190
+ for idx, stmt in enumerate(first_block.statements):
191
+ if (
192
+ isinstance(stmt, ailment.Stmt.Store)
193
+ and isinstance(stmt.addr, ailment.Expr.StackBaseOffset)
194
+ and isinstance(stmt.data, ailment.Expr.Load)
195
+ and self._is_add(stmt.data.addr)
196
+ ):
197
+ # Check addr: must be fs+0x28
198
+ op0, op1 = stmt.data.addr.operands
199
+ if isinstance(op1, ailment.Expr.Register):
200
+ op0, op1 = op1, op0
201
+ if isinstance(op0, ailment.Expr.Register) and isinstance(op1, ailment.Expr.Const):
202
+ if op0.reg_offset == self.project.arch.get_register_offset("fs") and op1.value == 0x28:
203
+ return first_block, idx
204
+
205
+ succs = list(self._graph.successors(first_block))
206
+ if len(succs) == 1:
207
+ block_addr = succs[0].addr
208
+ if block_addr not in traversed:
209
+ continue
210
+ break
199
211
 
200
212
  return None
201
213
 
@@ -9,8 +9,8 @@ from .base import PeepholeOptimizationExprBase
9
9
  class Bswap(PeepholeOptimizationExprBase):
10
10
  __slots__ = ()
11
11
 
12
- NAME = "Simplifying bswap_16()"
13
- expr_classes = (BinaryOp,) # all expressions are allowed
12
+ NAME = "Simplifying bswap_16() and bswap_32()"
13
+ expr_classes = (BinaryOp, Convert)
14
14
 
15
15
  def optimize(self, expr: BinaryOp, **kwargs):
16
16
  # bswap_16
@@ -48,6 +48,57 @@ class Bswap(PeepholeOptimizationExprBase):
48
48
 
49
49
  return None
50
50
 
51
+ # bswap_32
52
+ # (Conv(64->32, rax<8>) << 0x18<8>) |
53
+ # (((Conv(64->32, rax<8>) << 0x8<8>) & 0xff0000<32>) |
54
+ # (((Conv(64->32, rax<8>) >> 0x8<8>) & 0xff00<32>) |
55
+ # ((Conv(64->32, rax<8>) >> 0x18<8>) & 0xff<32>))))
56
+ if expr.op == "Or":
57
+ # fully flatten the expression
58
+ or_pieces = []
59
+ queue = [expr]
60
+ while queue:
61
+ operand = queue.pop(0)
62
+ if isinstance(operand, BinaryOp) and operand.op == "Or":
63
+ queue.append(operand.operands[0])
64
+ queue.append(operand.operands[1])
65
+ else:
66
+ or_pieces.append(operand)
67
+ if len(or_pieces) == 4:
68
+ # parse pieces
69
+ shifts = set()
70
+ cores = set()
71
+ for piece in or_pieces:
72
+ if isinstance(piece, BinaryOp):
73
+ if piece.op == "Shl" and isinstance(piece.operands[1], Const):
74
+ cores.add(piece.operands[0])
75
+ shifts.add(("<<", piece.operands[1].value, 0xFFFFFFFF))
76
+ elif piece.op == "And" and isinstance(piece.operands[1], Const):
77
+ and_amount = piece.operands[1].value
78
+ and_core = piece.operands[0]
79
+ if (
80
+ isinstance(and_core, BinaryOp)
81
+ and and_core.op == "Shl"
82
+ and isinstance(and_core.operands[1], Const)
83
+ ):
84
+ cores.add(and_core.operands[0])
85
+ shifts.add(("<<", and_core.operands[1].value, and_amount))
86
+ elif (
87
+ isinstance(and_core, BinaryOp)
88
+ and and_core.op == "Shr"
89
+ and isinstance(and_core.operands[1], Const)
90
+ ):
91
+ cores.add(and_core.operands[0])
92
+ shifts.add((">>", and_core.operands[1].value, and_amount))
93
+ if len(cores) == 1 and shifts == {
94
+ ("<<", 0x18, 0xFFFFFFFF),
95
+ ("<<", 8, 0xFF0000),
96
+ (">>", 0x18, 0xFF),
97
+ (">>", 8, 0xFF00),
98
+ }:
99
+ core_expr = next(iter(cores))
100
+ return Call(expr.idx, "__buildin_bswap32", args=[core_expr], bits=expr.bits, **expr.tags)
101
+
51
102
  return None
52
103
 
53
104
  def _match_inner(self, or_first: BinaryOp, or_second: BinaryOp) -> Tuple[bool, Optional[Expression]]:
@@ -1,6 +1,6 @@
1
1
  from math import gcd
2
2
 
3
- from ailment.expression import BinaryOp, UnaryOp, Const, Convert
3
+ from ailment.expression import BinaryOp, UnaryOp, Const, Convert, StackBaseOffset
4
4
 
5
5
  from .base import PeepholeOptimizationExprBase
6
6
 
@@ -59,6 +59,22 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
59
59
  expr.signed,
60
60
  **expr.tags,
61
61
  )
62
+ if (
63
+ isinstance(expr.operands[0], BinaryOp)
64
+ and expr.operands[0].op == "Mul"
65
+ and isinstance(expr.operands[0].operands[1], Const)
66
+ and expr.operands[0].operands[0].likes(expr.operands[1])
67
+ ):
68
+ # A * x + x => (A + 1) * x
69
+ coeff_expr = expr.operands[0].operands[1]
70
+ new_coeff = coeff_expr.value + 1
71
+ return BinaryOp(
72
+ expr.idx,
73
+ "Mul",
74
+ [Const(coeff_expr.idx, None, new_coeff, coeff_expr.bits), expr.operands[1]],
75
+ expr.signed,
76
+ **expr.tags,
77
+ )
62
78
  elif expr.op == "Sub":
63
79
  if isinstance(expr.operands[0], Const) and isinstance(expr.operands[1], Const):
64
80
  mask = (1 << expr.bits) - 1
@@ -93,6 +109,9 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
93
109
  if isinstance(expr.operands[0], Const) and expr.operands[0].value == 0:
94
110
  return UnaryOp(expr.idx, "Neg", expr.operands[1], **expr.tags)
95
111
 
112
+ if isinstance(expr.operands[0], StackBaseOffset) and isinstance(expr.operands[1], StackBaseOffset):
113
+ return Const(expr.idx, None, expr.operands[0].offset - expr.operands[1].offset, expr.bits, **expr.tags)
114
+
96
115
  elif expr.op == "And":
97
116
  if isinstance(expr.operands[0], Const) and isinstance(expr.operands[1], Const):
98
117
  new_expr = Const(