angr 9.2.83__py3-none-win_amd64.whl → 9.2.85__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (62) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/cfg/cfg_base.py +6 -1
  3. angr/analyses/cfg/cfg_fast.py +32 -10
  4. angr/analyses/decompiler/clinic.py +204 -4
  5. angr/analyses/decompiler/condition_processor.py +8 -2
  6. angr/analyses/decompiler/decompilation_options.py +10 -0
  7. angr/analyses/decompiler/decompiler.py +19 -17
  8. angr/analyses/decompiler/goto_manager.py +34 -51
  9. angr/analyses/decompiler/optimization_passes/__init__.py +5 -5
  10. angr/analyses/decompiler/optimization_passes/div_simplifier.py +2 -0
  11. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +1 -1
  12. angr/analyses/decompiler/optimization_passes/mod_simplifier.py +2 -0
  13. angr/analyses/decompiler/optimization_passes/multi_simplifier.py +2 -0
  14. angr/analyses/decompiler/optimization_passes/optimization_pass.py +131 -3
  15. angr/analyses/decompiler/optimization_passes/ret_deduplicator.py +3 -3
  16. angr/analyses/decompiler/optimization_passes/return_duplicator.py +519 -0
  17. angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +14 -2
  18. angr/analyses/decompiler/region_identifier.py +8 -2
  19. angr/analyses/decompiler/region_simplifiers/goto.py +5 -4
  20. angr/analyses/decompiler/structured_codegen/c.py +66 -5
  21. angr/analyses/decompiler/structuring/phoenix.py +3 -1
  22. angr/analyses/decompiler/structuring/structurer_nodes.py +11 -5
  23. angr/analyses/decompiler/utils.py +50 -0
  24. angr/analyses/disassembly.py +10 -3
  25. angr/analyses/propagator/engine_ail.py +125 -0
  26. angr/analyses/reaching_definitions/engine_ail.py +36 -2
  27. angr/analyses/reaching_definitions/rd_initializer.py +15 -1
  28. angr/analyses/reaching_definitions/rd_state.py +9 -4
  29. angr/analyses/stack_pointer_tracker.py +10 -17
  30. angr/analyses/variable_recovery/engine_ail.py +27 -1
  31. angr/angrdb/serializers/loader.py +10 -3
  32. angr/calling_conventions.py +2 -0
  33. angr/engines/pcode/behavior.py +7 -2
  34. angr/engines/pcode/cc.py +1 -0
  35. angr/engines/pcode/emulate.py +144 -104
  36. angr/engines/pcode/lifter.py +135 -79
  37. angr/knowledge_plugins/functions/function.py +28 -0
  38. angr/knowledge_plugins/functions/function_manager.py +48 -5
  39. angr/knowledge_plugins/propagations/states.py +14 -0
  40. angr/lib/angr_native.dll +0 -0
  41. angr/procedures/cgc/deallocate.py +5 -2
  42. angr/procedures/posix/gethostbyname.py +23 -8
  43. angr/project.py +4 -0
  44. angr/simos/__init__.py +2 -0
  45. angr/simos/simos.py +1 -0
  46. angr/simos/snimmuc_nxp.py +152 -0
  47. angr/state_plugins/history.py +3 -1
  48. angr/utils/graph.py +20 -18
  49. {angr-9.2.83.dist-info → angr-9.2.85.dist-info}/METADATA +9 -8
  50. {angr-9.2.83.dist-info → angr-9.2.85.dist-info}/RECORD +61 -59
  51. tests/analyses/cfg/test_cfg_rust_got_resolution.py +2 -1
  52. tests/analyses/cfg/test_jumptables.py +2 -1
  53. tests/analyses/decompiler/test_decompiler.py +155 -103
  54. tests/engines/pcode/test_emulate.py +607 -0
  55. tests/engines/test_java.py +609 -663
  56. tests/knowledge_plugins/functions/test_function_manager.py +13 -0
  57. tests/serialization/test_db.py +30 -0
  58. angr/analyses/decompiler/optimization_passes/eager_returns.py +0 -285
  59. {angr-9.2.83.dist-info → angr-9.2.85.dist-info}/LICENSE +0 -0
  60. {angr-9.2.83.dist-info → angr-9.2.85.dist-info}/WHEEL +0 -0
  61. {angr-9.2.83.dist-info → angr-9.2.85.dist-info}/entry_points.txt +0 -0
  62. {angr-9.2.83.dist-info → angr-9.2.85.dist-info}/top_level.txt +0 -0
angr/__init__.py CHANGED
@@ -1,7 +1,7 @@
1
1
  # pylint: disable=wildcard-import
2
2
  # pylint: disable=wrong-import-position
3
3
 
4
- __version__ = "9.2.83"
4
+ __version__ = "9.2.85"
5
5
 
6
6
  if bytes is str:
7
7
  raise Exception(
@@ -308,6 +308,9 @@ class CFGBase(Analysis):
308
308
  self._jobs_to_analyze_per_function = defaultdict(set)
309
309
  self._completed_functions = set()
310
310
 
311
+ def _function_completed(self, func_addr: int):
312
+ pass
313
+
311
314
  def _post_analysis(self):
312
315
  if self._normalize:
313
316
  if not self.normalized:
@@ -1478,7 +1481,9 @@ class CFGBase(Analysis):
1478
1481
 
1479
1482
  finished = self._get_finished_functions()
1480
1483
  for func_addr in finished:
1481
- self._completed_functions.add(func_addr)
1484
+ if func_addr not in self._completed_functions:
1485
+ self._function_completed(func_addr)
1486
+ self._completed_functions.add(func_addr)
1482
1487
  self._cleanup_analysis_jobs(finished_func_addrs=finished)
1483
1488
 
1484
1489
  #
@@ -1399,6 +1399,28 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
1399
1399
  def _post_job_handling(self, job, new_jobs, successors):
1400
1400
  pass
1401
1401
 
1402
+ def _function_completed(self, func_addr: int):
1403
+ if self._collect_data_ref and self.project is not None and ":" in self.project.arch.name:
1404
+ # this is a pcode arch - use Clinic to recover data references
1405
+
1406
+ if not self.kb.functions.contains_addr(func_addr):
1407
+ return
1408
+
1409
+ # we add an arbitrary limit to function sizes for now to ensure we are now slowing down CFG recovery by too
1410
+ # much. we can remove this limit once we significantly speed up RDA and Propagator.
1411
+
1412
+ func = self.kb.functions.get_by_addr(func_addr)
1413
+ if func.is_plt or func.is_simprocedure or func.is_syscall:
1414
+ return
1415
+ if not (1 <= len(func.block_addrs_set) < 15):
1416
+ return
1417
+
1418
+ from angr.analyses.decompiler.clinic import ClinicMode # pylint:disable=wrong-import-position
1419
+
1420
+ clinic = self.project.analyses.Clinic(func, mode=ClinicMode.COLLECT_DATA_REFS)
1421
+ for irsb_addr, refs in clinic.data_refs.items():
1422
+ self._process_irsb_data_refs(irsb_addr, refs)
1423
+
1402
1424
  def _job_queue_empty(self):
1403
1425
  if self._pending_jobs:
1404
1426
  # fastpath
@@ -2635,14 +2657,14 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
2635
2657
  """
2636
2658
 
2637
2659
  if irsb.data_refs:
2638
- self._process_irsb_data_refs(irsb)
2660
+ self._process_irsb_data_refs(irsb.addr, irsb.data_refs)
2639
2661
  elif irsb.statements:
2640
2662
  # for each statement, collect all constants that are referenced or used.
2641
2663
  self._collect_data_references_by_scanning_stmts(irsb, irsb_addr)
2642
2664
 
2643
- def _process_irsb_data_refs(self, irsb):
2644
- assumption = self._decoding_assumptions.get(irsb.addr & ~1)
2645
- for ref in irsb.data_refs:
2665
+ def _process_irsb_data_refs(self, irsb_addr, data_refs):
2666
+ assumption = self._decoding_assumptions.get(irsb_addr & ~1)
2667
+ for ref in data_refs:
2646
2668
  if ref.data_type_str == "integer(store)":
2647
2669
  data_type_str = "integer"
2648
2670
  is_store = True
@@ -2658,7 +2680,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
2658
2680
  assumption.add_data_seg(ref.data_addr, ref.data_size)
2659
2681
 
2660
2682
  self._add_data_reference(
2661
- irsb.addr,
2683
+ irsb_addr,
2662
2684
  ref.stmt_idx,
2663
2685
  ref.ins_addr,
2664
2686
  ref.data_addr,
@@ -2667,9 +2689,9 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
2667
2689
  )
2668
2690
 
2669
2691
  if ref.data_size == self.project.arch.bytes and is_arm_arch(self.project.arch):
2670
- self._process_irsb_data_ref_inlined_data(irsb, ref)
2692
+ self._process_irsb_data_ref_inlined_data(irsb_addr, ref)
2671
2693
 
2672
- def _process_irsb_data_ref_inlined_data(self, irsb, ref):
2694
+ def _process_irsb_data_ref_inlined_data(self, irsb_addr: int, ref):
2673
2695
  # ARM (and maybe a few other architectures as well) has inline pointers
2674
2696
  sec = self.project.loader.find_section_containing(ref.data_addr)
2675
2697
  if sec is not None and sec.is_readable and not sec.is_writable:
@@ -2682,7 +2704,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
2682
2704
  if sec_2nd is not None and sec_2nd.is_readable and not sec_2nd.is_writable:
2683
2705
  # found it!
2684
2706
  self._add_data_reference(
2685
- irsb.addr,
2707
+ irsb_addr,
2686
2708
  ref.stmt_idx,
2687
2709
  ref.ins_addr,
2688
2710
  v,
@@ -2717,7 +2739,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
2717
2739
  # - For all other instructions that use labels, the value of the PC is the address of the current
2718
2740
  # instruction plus 4 bytes, with bit[1] of the result cleared to 0 to make it word-aligned.
2719
2741
  #
2720
- if (irsb.addr & 1) == 1:
2742
+ if (irsb_addr & 1) == 1:
2721
2743
  actual_ref_ins_addr = ref.ins_addr + 2
2722
2744
  v += 4 + actual_ref_ins_addr
2723
2745
  v &= 0xFFFF_FFFF_FFFF_FFFE
@@ -2728,7 +2750,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
2728
2750
  if sec_3rd is not None and sec_3rd.is_readable and not sec_3rd.is_writable:
2729
2751
  # found it!
2730
2752
  self._add_data_reference(
2731
- irsb.addr, ref.stmt_idx, actual_ref_ins_addr, v, data_size=None, data_type=MemoryDataSort.Unknown
2753
+ irsb_addr, ref.stmt_idx, actual_ref_ins_addr, v, data_size=None, data_type=MemoryDataSort.Unknown
2732
2754
  )
2733
2755
 
2734
2756
  def _collect_data_references_by_scanning_stmts(self, irsb, irsb_addr):
@@ -1,6 +1,8 @@
1
1
  import copy
2
2
  from collections import defaultdict, namedtuple
3
3
  import logging
4
+ import enum
5
+ from dataclasses import dataclass
4
6
  from typing import Dict, List, Tuple, Set, Optional, Iterable, Union, Type, Any, NamedTuple, TYPE_CHECKING
5
7
 
6
8
  import networkx
@@ -9,6 +11,7 @@ import ailment
9
11
 
10
12
  from ...knowledge_base import KnowledgeBase
11
13
  from ...knowledge_plugins.functions import Function
14
+ from ...knowledge_plugins.cfg.memory_data import MemoryDataSort
12
15
  from ...codenode import BlockNode
13
16
  from ...utils import timethis
14
17
  from ...calling_conventions import SimRegArg, SimStackArg, SimStructArg, SimFunctionArgument
@@ -43,6 +46,29 @@ l = logging.getLogger(name=__name__)
43
46
  BlockCache = namedtuple("BlockCache", ("rd", "prop"))
44
47
 
45
48
 
49
+ class ClinicMode(enum.Enum):
50
+ """
51
+ Analysis mode for Clinic.
52
+ """
53
+
54
+ DECOMPILE = 1
55
+ COLLECT_DATA_REFS = 2
56
+
57
+
58
+ @dataclass
59
+ class DataRefDesc:
60
+ """
61
+ The fields of this class is compatible with items inside IRSB.data_refs.
62
+ """
63
+
64
+ data_addr: int
65
+ data_size: int
66
+ block_addr: int
67
+ stmt_idx: int
68
+ ins_addr: int
69
+ data_type_str: str
70
+
71
+
46
72
  class Clinic(Analysis):
47
73
  """
48
74
  A Clinic deals with AILments.
@@ -66,8 +92,9 @@ class Clinic(Analysis):
66
92
  reset_variable_names=False,
67
93
  rewrite_ites_to_diamonds=True,
68
94
  cache: Optional["DecompilationCache"] = None,
95
+ mode: ClinicMode = ClinicMode.DECOMPILE,
69
96
  ):
70
- if not func.normalized:
97
+ if not func.normalized and mode == ClinicMode.DECOMPILE:
71
98
  raise ValueError("Decompilation must work on normalized function graphs.")
72
99
 
73
100
  self.function = func
@@ -77,6 +104,7 @@ class Clinic(Analysis):
77
104
  self.arg_list = None
78
105
  self.variable_kb = variable_kb
79
106
  self.externs: Set[SimMemoryVariable] = set()
107
+ self.data_refs: Dict[int, int] = {} # data address to instruction address
80
108
 
81
109
  self._func_graph: Optional[networkx.DiGraph] = None
82
110
  self._ail_manager = None
@@ -94,6 +122,7 @@ class Clinic(Analysis):
94
122
  self._rewrite_ites_to_diamonds = rewrite_ites_to_diamonds
95
123
  self.reaching_definitions: Optional[ReachingDefinitionsAnalysis] = None
96
124
  self._cache = cache
125
+ self._mode = mode
97
126
 
98
127
  self._register_save_areas_removed: bool = False
99
128
 
@@ -109,7 +138,12 @@ class Clinic(Analysis):
109
138
  self._optimization_passes = get_default_optimization_passes(self.project.arch, self.project.simos.name)
110
139
  l.debug("Get %d optimization passes for the current binary.", len(self._optimization_passes))
111
140
 
112
- self._analyze()
141
+ if self._mode == ClinicMode.DECOMPILE:
142
+ self._analyze_for_decompiling()
143
+ elif self._mode == ClinicMode.COLLECT_DATA_REFS:
144
+ self._analyze_for_data_refs()
145
+ else:
146
+ raise TypeError(f"Unsupported analysis mode {self._mode}")
113
147
 
114
148
  #
115
149
  # Public methods
@@ -146,7 +180,7 @@ class Clinic(Analysis):
146
180
  # Private methods
147
181
  #
148
182
 
149
- def _analyze(self):
183
+ def _analyze_for_decompiling(self):
150
184
  is_pcode_arch = ":" in self.project.arch.name
151
185
 
152
186
  # Set up the function graph according to configurations
@@ -335,6 +369,78 @@ class Clinic(Analysis):
335
369
  self.cc_graph = self.copy_graph()
336
370
  self.externs = self._collect_externs(ail_graph, variable_kb)
337
371
 
372
+ def _analyze_for_data_refs(self):
373
+ # Set up the function graph according to configurations
374
+ self._update_progress(0.0, text="Setting up function graph")
375
+ self._set_function_graph()
376
+
377
+ # Remove alignment blocks
378
+ self._update_progress(5.0, text="Removing alignment blocks")
379
+ self._remove_alignment_blocks()
380
+
381
+ # if the graph is empty, don't continue
382
+ if not self._func_graph:
383
+ return
384
+
385
+ # initialize the AIL conversion manager
386
+ self._ail_manager = ailment.Manager(arch=self.project.arch)
387
+
388
+ # Track stack pointers
389
+ self._update_progress(15.0, text="Tracking stack pointers")
390
+ spt = self._track_stack_pointers()
391
+
392
+ # Convert VEX blocks to AIL blocks and then simplify them
393
+
394
+ self._update_progress(20.0, text="Converting VEX to AIL")
395
+ self._convert_all()
396
+
397
+ ail_graph = self._make_ailgraph()
398
+ self._remove_redundant_jump_blocks(ail_graph)
399
+
400
+ # full-function constant-only propagation
401
+ self._update_progress(33.0, text="Constant propagation")
402
+ self._simplify_function(
403
+ ail_graph,
404
+ remove_dead_memdefs=False,
405
+ unify_variables=False,
406
+ narrow_expressions=False,
407
+ only_consts=True,
408
+ fold_callexprs_into_conditions=self._fold_callexprs_into_conditions,
409
+ max_iterations=1,
410
+ )
411
+
412
+ # cached block-level reaching definition analysis results and propagator results
413
+ block_simplification_cache: Optional[Dict[ailment.Block, NamedTuple]] = {}
414
+
415
+ # Simplify blocks
416
+ # we never remove dead memory definitions before making callsites. otherwise stack arguments may go missing
417
+ # before they are recognized as stack arguments.
418
+ self._update_progress(35.0, text="Simplifying blocks 1")
419
+ ail_graph = self._simplify_blocks(
420
+ ail_graph, stack_pointer_tracker=spt, remove_dead_memdefs=False, cache=block_simplification_cache
421
+ )
422
+
423
+ # Simplify the entire function for the first time
424
+ self._update_progress(45.0, text="Simplifying function 1")
425
+ self._simplify_function(
426
+ ail_graph,
427
+ remove_dead_memdefs=False,
428
+ unify_variables=False,
429
+ narrow_expressions=False,
430
+ fold_callexprs_into_conditions=False,
431
+ )
432
+
433
+ # clear _blocks_by_addr_and_size so no one can use it again
434
+ # TODO: Totally remove this dict
435
+ self._blocks_by_addr_and_size = None
436
+
437
+ self.graph = ail_graph
438
+ self.arg_list = None
439
+ self.variable_kb = None
440
+ self.cc_graph = None
441
+ self.externs = None
442
+ self.data_refs: Dict[int, List[DataRefDesc]] = self._collect_data_refs(ail_graph)
443
+
338
444
  def copy_graph(self) -> networkx.DiGraph:
339
445
  """
340
446
  Copy AIL Graph.
@@ -1521,7 +1627,7 @@ class Clinic(Analysis):
1521
1627
 
1522
1628
  def handle_expr(
1523
1629
  expr_idx: int,
1524
- expr: ailment.expression.Load,
1630
+ expr: ailment.expression.Expression,
1525
1631
  stmt_idx: int,
1526
1632
  stmt: ailment.statement.Statement,
1527
1633
  block: Optional[ailment.Block],
@@ -1546,6 +1652,100 @@ class Clinic(Analysis):
1546
1652
  AILGraphWalker(ail_graph, walker.walk).walk()
1547
1653
  return variables
1548
1654
 
1655
+ @staticmethod
1656
+ def _collect_data_refs(ail_graph) -> Dict[int, List[DataRefDesc]]:
1657
+ # pylint:disable=unused-argument
1658
+ walker = ailment.AILBlockWalker()
1659
+ data_refs: Dict[int, List[DataRefDesc]] = defaultdict(list)
1660
+
1661
+ def handle_Const(
1662
+ expr_idx: int,
1663
+ expr: ailment.expression.Const,
1664
+ stmt_idx: int,
1665
+ stmt: ailment.statement.Statement,
1666
+ block: Optional[ailment.Block],
1667
+ ):
1668
+ if isinstance(expr.value, int) and hasattr(expr, "ins_addr"):
1669
+ data_refs[block.addr].append(
1670
+ DataRefDesc(expr.value, 1, block.addr, stmt_idx, expr.ins_addr, MemoryDataSort.Unknown)
1671
+ )
1672
+ if hasattr(expr, "deref_src_addr"):
1673
+ data_refs[block.addr].append(
1674
+ DataRefDesc(
1675
+ expr.deref_src_addr, expr.size, block.addr, stmt_idx, expr.ins_addr, MemoryDataSort.Unknown
1676
+ )
1677
+ )
1678
+
1679
+ def handle_Load(
1680
+ expr_idx: int,
1681
+ expr: ailment.expression.Load,
1682
+ stmt_idx: int,
1683
+ stmt: ailment.statement.Statement,
1684
+ block: Optional[ailment.Block],
1685
+ ):
1686
+ if isinstance(expr.addr, ailment.expression.Const):
1687
+ addr = expr.addr
1688
+ if isinstance(addr.value, int) and hasattr(addr, "ins_addr"):
1689
+ data_refs[block.addr].append(
1690
+ DataRefDesc(
1691
+ addr.value,
1692
+ expr.size,
1693
+ block.addr,
1694
+ stmt_idx,
1695
+ addr.ins_addr,
1696
+ MemoryDataSort.Integer if expr.size == 4 else MemoryDataSort.Unknown,
1697
+ )
1698
+ )
1699
+ if hasattr(addr, "deref_src_addr"):
1700
+ data_refs[block.addr].append(
1701
+ DataRefDesc(
1702
+ addr.deref_src_addr,
1703
+ expr.size,
1704
+ block.addr,
1705
+ stmt_idx,
1706
+ addr.ins_addr,
1707
+ MemoryDataSort.Integer if expr.size == 4 else MemoryDataSort.Unknown,
1708
+ )
1709
+ )
1710
+ return None
1711
+
1712
+ return ailment.AILBlockWalker._handle_Load(walker, expr_idx, expr, stmt_idx, stmt, block)
1713
+
1714
+ def handle_Store(stmt_idx: int, stmt: ailment.statement.Store, block: Optional[ailment.Block]):
1715
+ if isinstance(stmt.addr, ailment.expression.Const):
1716
+ addr = stmt.addr
1717
+ if isinstance(addr.value, int) and hasattr(addr, "ins_addr"):
1718
+ data_refs[block.addr].append(
1719
+ DataRefDesc(
1720
+ addr.value,
1721
+ stmt.size,
1722
+ block.addr,
1723
+ stmt_idx,
1724
+ addr.ins_addr,
1725
+ MemoryDataSort.Integer if stmt.size == 4 else MemoryDataSort.Unknown,
1726
+ )
1727
+ )
1728
+ if hasattr(addr, "deref_src_addr"):
1729
+ data_refs[block.addr].append(
1730
+ DataRefDesc(
1731
+ addr.deref_src_addr,
1732
+ stmt.size,
1733
+ block.addr,
1734
+ stmt_idx,
1735
+ addr.ins_addr,
1736
+ MemoryDataSort.Integer if stmt.size == 4 else MemoryDataSort.Unknown,
1737
+ )
1738
+ )
1739
+ return None
1740
+
1741
+ return ailment.AILBlockWalker._handle_Store(walker, stmt_idx, stmt, block)
1742
+
1743
+ walker.stmt_handlers[ailment.statement.Store] = handle_Store
1744
+ walker.expr_handlers[ailment.expression.Load] = handle_Load
1745
+ walker.expr_handlers[ailment.expression.Const] = handle_Const
1746
+ AILGraphWalker(ail_graph, walker.walk).walk()
1747
+ return data_refs
1748
+
1549
1749
  def _next_atom(self) -> int:
1550
1750
  return self._ail_manager.next_atom()
1551
1751
 
@@ -3,10 +3,11 @@ from typing import Generator, Dict, Any, Optional, Set, List
3
3
  import operator
4
4
  import logging
5
5
 
6
+ import ailment
7
+ import claripy
6
8
  import networkx
9
+ from unique_log_filter import UniqueLogFilter
7
10
 
8
- import claripy
9
- import ailment
10
11
 
11
12
  from angr.utils.graph import GraphUtils
12
13
  from ...utils.lazy_import import lazy_import
@@ -39,6 +40,7 @@ else:
39
40
 
40
41
 
41
42
  l = logging.getLogger(__name__)
43
+ l.addFilter(UniqueLogFilter())
42
44
 
43
45
 
44
46
  _UNIFIABLE_COMPARISONS = {
@@ -111,6 +113,10 @@ _ail2claripy_op_mapping = {
111
113
  "Reinterpret": lambda expr, _, m: _dummy_bvs(expr, m),
112
114
  "Rol": lambda expr, _, m: _dummy_bvs(expr, m),
113
115
  "Ror": lambda expr, _, m: _dummy_bvs(expr, m),
116
+ "LogicalXor": lambda expr, _, m: _dummy_bvs(expr, m),
117
+ "Carry": lambda expr, _, m: _dummy_bvs(expr, m),
118
+ "SCarry": lambda expr, _, m: _dummy_bvs(expr, m),
119
+ "SBorrow": lambda expr, _, m: _dummy_bvs(expr, m),
114
120
  }
115
121
 
116
122
  #
@@ -176,6 +176,16 @@ options = [
176
176
  default_value=True,
177
177
  clears_cache=True,
178
178
  ),
179
+ O(
180
+ "Show disambiguated names",
181
+ "Disambiguate function names when they conflict with variables and other functions",
182
+ bool,
183
+ "codegen",
184
+ "show_disambiguated_name",
185
+ category="Display",
186
+ default_value=True,
187
+ clears_cache=True,
188
+ ),
179
189
  O(
180
190
  "Structuring algorithm",
181
191
  "Select a structuring algorithm. Currently supports Dream and Phoenix.",
@@ -3,6 +3,7 @@ import logging
3
3
  from collections import defaultdict
4
4
  from typing import List, Tuple, Optional, Iterable, Union, Type, Set, Dict, Any, TYPE_CHECKING
5
5
 
6
+ import networkx
6
7
  from cle import SymbolType
7
8
  import ailment
8
9
 
@@ -195,15 +196,12 @@ class Decompiler(Analysis):
195
196
  ite_exprs=ite_exprs,
196
197
  )
197
198
 
198
- # recover regions
199
- ri = self.project.analyses[RegionIdentifier].prep(kb=self.kb)(
200
- self.func,
201
- graph=clinic.graph,
202
- cond_proc=cond_proc,
203
- force_loop_single_exit=self._force_loop_single_exit,
204
- complete_successors=self._complete_successors,
205
- **self.options_to_params(self.options_by_class["region_identifier"]),
199
+ # recover regions, delay updating when we have optimizations that may update regions themselves
200
+ delay_graph_updates = any(
201
+ pass_.STAGE == OptimizationPassStage.DURING_REGION_IDENTIFICATION for pass_ in self._optimization_passes
206
202
  )
203
+ ri = self._recover_regions(clinic.graph, cond_proc, update_graph=not delay_graph_updates)
204
+
207
205
  # run optimizations that may require re-RegionIdentification
208
206
  clinic.graph, ri = self._run_region_simplification_passes(
209
207
  clinic.graph,
@@ -265,6 +263,17 @@ class Decompiler(Analysis):
265
263
  self.cache.codegen = codegen
266
264
  self.cache.clinic = self.clinic
267
265
 
266
+ def _recover_regions(self, graph: networkx.DiGraph, condition_processor, update_graph: bool = True):
267
+ return self.project.analyses[RegionIdentifier].prep(kb=self.kb)(
268
+ self.func,
269
+ graph=graph,
270
+ cond_proc=condition_processor,
271
+ update_graph=update_graph,
272
+ force_loop_single_exit=self._force_loop_single_exit,
273
+ complete_successors=self._complete_successors,
274
+ **self.options_to_params(self.options_by_class["region_identifier"]),
275
+ )
276
+
268
277
  @timethis
269
278
  def _run_graph_simplification_passes(self, ail_graph, reaching_definitions, **kwargs):
270
279
  """
@@ -364,16 +373,9 @@ class Decompiler(Analysis):
364
373
 
365
374
  cond_proc = ConditionProcessor(self.project.arch)
366
375
  # always update RI on graph change
367
- ri = self.project.analyses[RegionIdentifier].prep(kb=self.kb)(
368
- self.func,
369
- graph=ail_graph,
370
- cond_proc=cond_proc,
371
- force_loop_single_exit=self._force_loop_single_exit,
372
- complete_successors=self._complete_successors,
373
- **self.options_to_params(self.options_by_class["region_identifier"]),
374
- )
376
+ ri = self._recover_regions(ail_graph, cond_proc, update_graph=False)
375
377
 
376
- return ail_graph, ri
378
+ return ail_graph, self._recover_regions(ail_graph, ConditionProcessor(self.project.arch), update_graph=True)
377
379
 
378
380
  @timethis
379
381
  def _run_post_structuring_simplification_passes(self, seq_node, **kwargs):
@@ -1,5 +1,4 @@
1
1
  from typing import Set
2
- from collections import defaultdict
3
2
 
4
3
  import ailment
5
4
 
@@ -10,32 +9,28 @@ class Goto:
10
9
  will differ).
11
10
  """
12
11
 
13
- def __init__(self, block_addr=None, ins_addr=None, target_addr=None):
14
- """
15
- :param block_addr: The block address this goto is contained in
16
- :param ins_addr: The instruction address this goto is at
17
- :param target_addr: The target this goto will jump to
18
- """
19
- self.block_addr = block_addr
20
- self.ins_addr = ins_addr
21
- self.target_addr = target_addr
12
+ def __init__(self, src_addr, dst_addr, src_idx=None, dst_idx=None, src_ins_addr=None):
13
+ self.src_addr = src_addr
14
+ self.dst_addr = dst_addr
15
+ self.src_idx = src_idx
16
+ self.dst_idx = dst_idx
17
+ self.src_ins_addr = src_ins_addr
22
18
 
23
19
  def __hash__(self):
24
- return hash(f"{self.block_addr}{self.ins_addr}{self.target_addr}")
20
+ return hash(f"{self.src_addr}{self.dst_addr}{self.src_idx}{self.dst_idx}")
25
21
 
26
22
  def __str__(self):
27
- if not self.addr or not self.target_addr:
23
+ if self.src_addr is None or self.dst_addr is None:
28
24
  return f"<Goto {self.__hash__()}>"
29
25
 
30
- return f"<Goto: [{hex(self.addr)}] -> {hex(self.target_addr)}>"
26
+ src_idx_str = "" if self.src_idx is None else f".{self.src_idx}"
27
+ dst_idx_str = "" if self.dst_idx is None else f".{self.dst_idx}"
28
+ src_ins_addr_str = "" if self.src_ins_addr is None else f"{hex(self.src_ins_addr)}"
29
+ return f"<Goto: [{hex(self.src_addr)}@{src_ins_addr_str}{src_idx_str}] -> {hex(self.dst_addr)}{dst_idx_str}>"
31
30
 
32
31
  def __repr__(self):
33
32
  return self.__str__()
34
33
 
35
- @property
36
- def addr(self):
37
- return self.block_addr or self.ins_addr
38
-
39
34
 
40
35
  class GotoManager:
41
36
  """
@@ -55,38 +50,26 @@ class GotoManager:
55
50
  def __repr__(self):
56
51
  return self.__str__()
57
52
 
58
- def gotos_by_addr(self, force_refresh=False):
59
- """
60
- Returns a dictionary of gotos by addresses. This set can CONTAIN DUPLICATES, so don't trust
61
- this for a valid number of gotos. If you need the real number of gotos, just get the size of
62
- self.gotos. This set should mostly be used when checking if a block contains a goto, since recording
63
- can be recorded on null-addr blocks.
64
-
65
- :param force_refresh: Don't use the cached self._gotos_by_addr
66
- :return:
67
- """
68
-
69
- if not force_refresh and self._gotos_by_addr:
70
- return self._gotos_by_addr
71
-
72
- self._gotos_by_addr = defaultdict(set)
73
- for goto in self.gotos:
74
- if goto.block_addr is not None:
75
- self._gotos_by_addr[goto.block_addr].add(goto)
76
-
77
- if goto.ins_addr is not None:
78
- self._gotos_by_addr[goto.ins_addr].add(goto)
79
-
80
- return self._gotos_by_addr
81
-
82
53
  def gotos_in_block(self, block: ailment.Block) -> Set[Goto]:
83
- gotos_by_addr = self.gotos_by_addr()
84
- gotos = set()
85
- if block.addr in gotos_by_addr:
86
- gotos.update(gotos_by_addr[block.addr])
87
-
88
- for stmt in block.statements:
89
- if stmt.ins_addr in gotos_by_addr:
90
- gotos.update(gotos_by_addr[stmt.ins_addr])
91
-
92
- return gotos
54
+ gotos_found = set()
55
+ for goto in self.gotos:
56
+ if goto.src_addr == block.addr:
57
+ gotos_found.add(goto)
58
+ else:
59
+ block_addrs = {stmt.ins_addr for stmt in block.statements if "ins_addr" in stmt.tags}
60
+ if goto.src_ins_addr in block_addrs:
61
+ gotos_found.add(goto)
62
+
63
+ return gotos_found
64
+
65
+ def is_goto_edge(self, src: ailment.Block, dst: ailment.Block):
66
+ src_gotos = self.gotos_in_block(src)
67
+ for goto in src_gotos:
68
+ if goto.dst_addr == dst.addr:
69
+ return True
70
+ else:
71
+ block_addrs = {stmt.ins_addr for stmt in dst.statements if "ins_addr" in stmt.tags}
72
+ if goto.dst_addr in block_addrs:
73
+ return True
74
+
75
+ return False
@@ -13,7 +13,7 @@ from .lowered_switch_simplifier import LoweredSwitchSimplifier
13
13
  from .multi_simplifier import MultiSimplifier
14
14
  from .div_simplifier import DivSimplifier
15
15
  from .mod_simplifier import ModSimplifier
16
- from .eager_returns import EagerReturnsSimplifier
16
+ from .return_duplicator import ReturnDuplicator
17
17
  from .const_derefs import ConstantDereferencesSimplifier
18
18
  from .register_save_area_simplifier import RegisterSaveAreaSimplifier
19
19
  from .ret_addr_save_simplifier import RetAddrSaveSimplifier
@@ -22,7 +22,7 @@ from .flip_boolean_cmp import FlipBooleanCmp
22
22
  from .ret_deduplicator import ReturnDeduplicator
23
23
  from .win_stack_canary_simplifier import WinStackCanarySimplifier
24
24
 
25
-
25
+ # order matters!
26
26
  _all_optimization_passes = [
27
27
  (RegisterSaveAreaSimplifier, True),
28
28
  (StackCanarySimplifier, True),
@@ -35,11 +35,11 @@ _all_optimization_passes = [
35
35
  (RetAddrSaveSimplifier, True),
36
36
  (X86GccGetPcSimplifier, True),
37
37
  (ITERegionConverter, True),
38
- (ReturnDeduplicator, True),
39
- (LoweredSwitchSimplifier, False),
40
- (EagerReturnsSimplifier, True),
41
38
  (ITEExprConverter, True),
42
39
  (ExprOpSwapper, True),
40
+ (ReturnDuplicator, True),
41
+ (LoweredSwitchSimplifier, False),
42
+ (ReturnDeduplicator, True),
43
43
  (FlipBooleanCmp, True),
44
44
  ]
45
45
 
@@ -3,11 +3,13 @@ import logging
3
3
  import math
4
4
 
5
5
  from ailment import Expr
6
+ from unique_log_filter import UniqueLogFilter
6
7
 
7
8
  from .engine_base import SimplifierAILEngine, SimplifierAILState
8
9
  from .optimization_pass import OptimizationPass, OptimizationPassStage
9
10
 
10
11
  _l = logging.getLogger(name=__name__)
12
+ _l.addFilter(UniqueLogFilter())
11
13
 
12
14
 
13
15
  class DivSimplifierAILEngine(SimplifierAILEngine):