angr 9.2.84__py3-none-win_amd64.whl → 9.2.85__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/cfg/cfg_base.py +6 -1
- angr/analyses/cfg/cfg_fast.py +32 -10
- angr/analyses/decompiler/clinic.py +204 -4
- angr/analyses/decompiler/condition_processor.py +8 -2
- angr/analyses/decompiler/decompiler.py +19 -17
- angr/analyses/decompiler/goto_manager.py +34 -51
- angr/analyses/decompiler/optimization_passes/__init__.py +5 -5
- angr/analyses/decompiler/optimization_passes/div_simplifier.py +2 -0
- angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +1 -1
- angr/analyses/decompiler/optimization_passes/mod_simplifier.py +2 -0
- angr/analyses/decompiler/optimization_passes/multi_simplifier.py +2 -0
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +131 -3
- angr/analyses/decompiler/optimization_passes/ret_deduplicator.py +3 -3
- angr/analyses/decompiler/optimization_passes/return_duplicator.py +519 -0
- angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +14 -2
- angr/analyses/decompiler/region_identifier.py +8 -2
- angr/analyses/decompiler/region_simplifiers/goto.py +5 -4
- angr/analyses/decompiler/structured_codegen/c.py +33 -1
- angr/analyses/decompiler/structuring/phoenix.py +3 -1
- angr/analyses/decompiler/structuring/structurer_nodes.py +11 -5
- angr/analyses/decompiler/utils.py +50 -0
- angr/analyses/disassembly.py +10 -3
- angr/analyses/propagator/engine_ail.py +125 -0
- angr/analyses/reaching_definitions/engine_ail.py +36 -2
- angr/analyses/reaching_definitions/rd_initializer.py +15 -1
- angr/analyses/reaching_definitions/rd_state.py +9 -4
- angr/analyses/stack_pointer_tracker.py +10 -17
- angr/analyses/variable_recovery/engine_ail.py +27 -1
- angr/angrdb/serializers/loader.py +10 -3
- angr/calling_conventions.py +2 -0
- angr/engines/pcode/behavior.py +7 -2
- angr/engines/pcode/cc.py +1 -0
- angr/engines/pcode/emulate.py +144 -104
- angr/engines/pcode/lifter.py +135 -79
- angr/knowledge_plugins/functions/function_manager.py +5 -3
- angr/knowledge_plugins/propagations/states.py +14 -0
- angr/lib/angr_native.dll +0 -0
- angr/procedures/cgc/deallocate.py +5 -2
- angr/procedures/posix/gethostbyname.py +23 -8
- angr/project.py +4 -0
- angr/simos/__init__.py +2 -0
- angr/simos/simos.py +1 -0
- angr/simos/snimmuc_nxp.py +152 -0
- angr/state_plugins/history.py +3 -1
- angr/utils/graph.py +20 -18
- {angr-9.2.84.dist-info → angr-9.2.85.dist-info}/METADATA +9 -8
- {angr-9.2.84.dist-info → angr-9.2.85.dist-info}/RECORD +57 -55
- tests/analyses/cfg/test_cfg_rust_got_resolution.py +2 -1
- tests/analyses/cfg/test_jumptables.py +2 -1
- tests/analyses/decompiler/test_decompiler.py +130 -103
- tests/engines/pcode/test_emulate.py +607 -0
- tests/serialization/test_db.py +30 -0
- angr/analyses/decompiler/optimization_passes/eager_returns.py +0 -285
- {angr-9.2.84.dist-info → angr-9.2.85.dist-info}/LICENSE +0 -0
- {angr-9.2.84.dist-info → angr-9.2.85.dist-info}/WHEEL +0 -0
- {angr-9.2.84.dist-info → angr-9.2.85.dist-info}/entry_points.txt +0 -0
- {angr-9.2.84.dist-info → angr-9.2.85.dist-info}/top_level.txt +0 -0
angr/__init__.py
CHANGED
angr/analyses/cfg/cfg_base.py
CHANGED
|
@@ -308,6 +308,9 @@ class CFGBase(Analysis):
|
|
|
308
308
|
self._jobs_to_analyze_per_function = defaultdict(set)
|
|
309
309
|
self._completed_functions = set()
|
|
310
310
|
|
|
311
|
+
def _function_completed(self, func_addr: int):
|
|
312
|
+
pass
|
|
313
|
+
|
|
311
314
|
def _post_analysis(self):
|
|
312
315
|
if self._normalize:
|
|
313
316
|
if not self.normalized:
|
|
@@ -1478,7 +1481,9 @@ class CFGBase(Analysis):
|
|
|
1478
1481
|
|
|
1479
1482
|
finished = self._get_finished_functions()
|
|
1480
1483
|
for func_addr in finished:
|
|
1481
|
-
self._completed_functions
|
|
1484
|
+
if func_addr not in self._completed_functions:
|
|
1485
|
+
self._function_completed(func_addr)
|
|
1486
|
+
self._completed_functions.add(func_addr)
|
|
1482
1487
|
self._cleanup_analysis_jobs(finished_func_addrs=finished)
|
|
1483
1488
|
|
|
1484
1489
|
#
|
angr/analyses/cfg/cfg_fast.py
CHANGED
|
@@ -1399,6 +1399,28 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
1399
1399
|
def _post_job_handling(self, job, new_jobs, successors):
|
|
1400
1400
|
pass
|
|
1401
1401
|
|
|
1402
|
+
def _function_completed(self, func_addr: int):
|
|
1403
|
+
if self._collect_data_ref and self.project is not None and ":" in self.project.arch.name:
|
|
1404
|
+
# this is a pcode arch - use Clinic to recover data references
|
|
1405
|
+
|
|
1406
|
+
if not self.kb.functions.contains_addr(func_addr):
|
|
1407
|
+
return
|
|
1408
|
+
|
|
1409
|
+
# we add an arbitrary limit to function sizes for now to ensure we are now slowing down CFG recovery by too
|
|
1410
|
+
# much. we can remove this limit once we significantly speed up RDA and Propagator.
|
|
1411
|
+
|
|
1412
|
+
func = self.kb.functions.get_by_addr(func_addr)
|
|
1413
|
+
if func.is_plt or func.is_simprocedure or func.is_syscall:
|
|
1414
|
+
return
|
|
1415
|
+
if not (1 <= len(func.block_addrs_set) < 15):
|
|
1416
|
+
return
|
|
1417
|
+
|
|
1418
|
+
from angr.analyses.decompiler.clinic import ClinicMode # pylint:disable=wrong-import-position
|
|
1419
|
+
|
|
1420
|
+
clinic = self.project.analyses.Clinic(func, mode=ClinicMode.COLLECT_DATA_REFS)
|
|
1421
|
+
for irsb_addr, refs in clinic.data_refs.items():
|
|
1422
|
+
self._process_irsb_data_refs(irsb_addr, refs)
|
|
1423
|
+
|
|
1402
1424
|
def _job_queue_empty(self):
|
|
1403
1425
|
if self._pending_jobs:
|
|
1404
1426
|
# fastpath
|
|
@@ -2635,14 +2657,14 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
2635
2657
|
"""
|
|
2636
2658
|
|
|
2637
2659
|
if irsb.data_refs:
|
|
2638
|
-
self._process_irsb_data_refs(irsb)
|
|
2660
|
+
self._process_irsb_data_refs(irsb.addr, irsb.data_refs)
|
|
2639
2661
|
elif irsb.statements:
|
|
2640
2662
|
# for each statement, collect all constants that are referenced or used.
|
|
2641
2663
|
self._collect_data_references_by_scanning_stmts(irsb, irsb_addr)
|
|
2642
2664
|
|
|
2643
|
-
def _process_irsb_data_refs(self,
|
|
2644
|
-
assumption = self._decoding_assumptions.get(
|
|
2645
|
-
for ref in
|
|
2665
|
+
def _process_irsb_data_refs(self, irsb_addr, data_refs):
|
|
2666
|
+
assumption = self._decoding_assumptions.get(irsb_addr & ~1)
|
|
2667
|
+
for ref in data_refs:
|
|
2646
2668
|
if ref.data_type_str == "integer(store)":
|
|
2647
2669
|
data_type_str = "integer"
|
|
2648
2670
|
is_store = True
|
|
@@ -2658,7 +2680,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
2658
2680
|
assumption.add_data_seg(ref.data_addr, ref.data_size)
|
|
2659
2681
|
|
|
2660
2682
|
self._add_data_reference(
|
|
2661
|
-
|
|
2683
|
+
irsb_addr,
|
|
2662
2684
|
ref.stmt_idx,
|
|
2663
2685
|
ref.ins_addr,
|
|
2664
2686
|
ref.data_addr,
|
|
@@ -2667,9 +2689,9 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
2667
2689
|
)
|
|
2668
2690
|
|
|
2669
2691
|
if ref.data_size == self.project.arch.bytes and is_arm_arch(self.project.arch):
|
|
2670
|
-
self._process_irsb_data_ref_inlined_data(
|
|
2692
|
+
self._process_irsb_data_ref_inlined_data(irsb_addr, ref)
|
|
2671
2693
|
|
|
2672
|
-
def _process_irsb_data_ref_inlined_data(self,
|
|
2694
|
+
def _process_irsb_data_ref_inlined_data(self, irsb_addr: int, ref):
|
|
2673
2695
|
# ARM (and maybe a few other architectures as well) has inline pointers
|
|
2674
2696
|
sec = self.project.loader.find_section_containing(ref.data_addr)
|
|
2675
2697
|
if sec is not None and sec.is_readable and not sec.is_writable:
|
|
@@ -2682,7 +2704,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
2682
2704
|
if sec_2nd is not None and sec_2nd.is_readable and not sec_2nd.is_writable:
|
|
2683
2705
|
# found it!
|
|
2684
2706
|
self._add_data_reference(
|
|
2685
|
-
|
|
2707
|
+
irsb_addr,
|
|
2686
2708
|
ref.stmt_idx,
|
|
2687
2709
|
ref.ins_addr,
|
|
2688
2710
|
v,
|
|
@@ -2717,7 +2739,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
2717
2739
|
# - For all other instructions that use labels, the value of the PC is the address of the current
|
|
2718
2740
|
# instruction plus 4 bytes, with bit[1] of the result cleared to 0 to make it word-aligned.
|
|
2719
2741
|
#
|
|
2720
|
-
if (
|
|
2742
|
+
if (irsb_addr & 1) == 1:
|
|
2721
2743
|
actual_ref_ins_addr = ref.ins_addr + 2
|
|
2722
2744
|
v += 4 + actual_ref_ins_addr
|
|
2723
2745
|
v &= 0xFFFF_FFFF_FFFF_FFFE
|
|
@@ -2728,7 +2750,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
2728
2750
|
if sec_3rd is not None and sec_3rd.is_readable and not sec_3rd.is_writable:
|
|
2729
2751
|
# found it!
|
|
2730
2752
|
self._add_data_reference(
|
|
2731
|
-
|
|
2753
|
+
irsb_addr, ref.stmt_idx, actual_ref_ins_addr, v, data_size=None, data_type=MemoryDataSort.Unknown
|
|
2732
2754
|
)
|
|
2733
2755
|
|
|
2734
2756
|
def _collect_data_references_by_scanning_stmts(self, irsb, irsb_addr):
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import copy
|
|
2
2
|
from collections import defaultdict, namedtuple
|
|
3
3
|
import logging
|
|
4
|
+
import enum
|
|
5
|
+
from dataclasses import dataclass
|
|
4
6
|
from typing import Dict, List, Tuple, Set, Optional, Iterable, Union, Type, Any, NamedTuple, TYPE_CHECKING
|
|
5
7
|
|
|
6
8
|
import networkx
|
|
@@ -9,6 +11,7 @@ import ailment
|
|
|
9
11
|
|
|
10
12
|
from ...knowledge_base import KnowledgeBase
|
|
11
13
|
from ...knowledge_plugins.functions import Function
|
|
14
|
+
from ...knowledge_plugins.cfg.memory_data import MemoryDataSort
|
|
12
15
|
from ...codenode import BlockNode
|
|
13
16
|
from ...utils import timethis
|
|
14
17
|
from ...calling_conventions import SimRegArg, SimStackArg, SimStructArg, SimFunctionArgument
|
|
@@ -43,6 +46,29 @@ l = logging.getLogger(name=__name__)
|
|
|
43
46
|
BlockCache = namedtuple("BlockCache", ("rd", "prop"))
|
|
44
47
|
|
|
45
48
|
|
|
49
|
+
class ClinicMode(enum.Enum):
|
|
50
|
+
"""
|
|
51
|
+
Analysis mode for Clinic.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
DECOMPILE = 1
|
|
55
|
+
COLLECT_DATA_REFS = 2
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class DataRefDesc:
|
|
60
|
+
"""
|
|
61
|
+
The fields of this class is compatible with items inside IRSB.data_refs.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
data_addr: int
|
|
65
|
+
data_size: int
|
|
66
|
+
block_addr: int
|
|
67
|
+
stmt_idx: int
|
|
68
|
+
ins_addr: int
|
|
69
|
+
data_type_str: str
|
|
70
|
+
|
|
71
|
+
|
|
46
72
|
class Clinic(Analysis):
|
|
47
73
|
"""
|
|
48
74
|
A Clinic deals with AILments.
|
|
@@ -66,8 +92,9 @@ class Clinic(Analysis):
|
|
|
66
92
|
reset_variable_names=False,
|
|
67
93
|
rewrite_ites_to_diamonds=True,
|
|
68
94
|
cache: Optional["DecompilationCache"] = None,
|
|
95
|
+
mode: ClinicMode = ClinicMode.DECOMPILE,
|
|
69
96
|
):
|
|
70
|
-
if not func.normalized:
|
|
97
|
+
if not func.normalized and mode == ClinicMode.DECOMPILE:
|
|
71
98
|
raise ValueError("Decompilation must work on normalized function graphs.")
|
|
72
99
|
|
|
73
100
|
self.function = func
|
|
@@ -77,6 +104,7 @@ class Clinic(Analysis):
|
|
|
77
104
|
self.arg_list = None
|
|
78
105
|
self.variable_kb = variable_kb
|
|
79
106
|
self.externs: Set[SimMemoryVariable] = set()
|
|
107
|
+
self.data_refs: Dict[int, int] = {} # data address to instruction address
|
|
80
108
|
|
|
81
109
|
self._func_graph: Optional[networkx.DiGraph] = None
|
|
82
110
|
self._ail_manager = None
|
|
@@ -94,6 +122,7 @@ class Clinic(Analysis):
|
|
|
94
122
|
self._rewrite_ites_to_diamonds = rewrite_ites_to_diamonds
|
|
95
123
|
self.reaching_definitions: Optional[ReachingDefinitionsAnalysis] = None
|
|
96
124
|
self._cache = cache
|
|
125
|
+
self._mode = mode
|
|
97
126
|
|
|
98
127
|
self._register_save_areas_removed: bool = False
|
|
99
128
|
|
|
@@ -109,7 +138,12 @@ class Clinic(Analysis):
|
|
|
109
138
|
self._optimization_passes = get_default_optimization_passes(self.project.arch, self.project.simos.name)
|
|
110
139
|
l.debug("Get %d optimization passes for the current binary.", len(self._optimization_passes))
|
|
111
140
|
|
|
112
|
-
self.
|
|
141
|
+
if self._mode == ClinicMode.DECOMPILE:
|
|
142
|
+
self._analyze_for_decompiling()
|
|
143
|
+
elif self._mode == ClinicMode.COLLECT_DATA_REFS:
|
|
144
|
+
self._analyze_for_data_refs()
|
|
145
|
+
else:
|
|
146
|
+
raise TypeError(f"Unsupported analysis mode {self._mode}")
|
|
113
147
|
|
|
114
148
|
#
|
|
115
149
|
# Public methods
|
|
@@ -146,7 +180,7 @@ class Clinic(Analysis):
|
|
|
146
180
|
# Private methods
|
|
147
181
|
#
|
|
148
182
|
|
|
149
|
-
def
|
|
183
|
+
def _analyze_for_decompiling(self):
|
|
150
184
|
is_pcode_arch = ":" in self.project.arch.name
|
|
151
185
|
|
|
152
186
|
# Set up the function graph according to configurations
|
|
@@ -335,6 +369,78 @@ class Clinic(Analysis):
|
|
|
335
369
|
self.cc_graph = self.copy_graph()
|
|
336
370
|
self.externs = self._collect_externs(ail_graph, variable_kb)
|
|
337
371
|
|
|
372
|
+
def _analyze_for_data_refs(self):
|
|
373
|
+
# Set up the function graph according to configurations
|
|
374
|
+
self._update_progress(0.0, text="Setting up function graph")
|
|
375
|
+
self._set_function_graph()
|
|
376
|
+
|
|
377
|
+
# Remove alignment blocks
|
|
378
|
+
self._update_progress(5.0, text="Removing alignment blocks")
|
|
379
|
+
self._remove_alignment_blocks()
|
|
380
|
+
|
|
381
|
+
# if the graph is empty, don't continue
|
|
382
|
+
if not self._func_graph:
|
|
383
|
+
return
|
|
384
|
+
|
|
385
|
+
# initialize the AIL conversion manager
|
|
386
|
+
self._ail_manager = ailment.Manager(arch=self.project.arch)
|
|
387
|
+
|
|
388
|
+
# Track stack pointers
|
|
389
|
+
self._update_progress(15.0, text="Tracking stack pointers")
|
|
390
|
+
spt = self._track_stack_pointers()
|
|
391
|
+
|
|
392
|
+
# Convert VEX blocks to AIL blocks and then simplify them
|
|
393
|
+
|
|
394
|
+
self._update_progress(20.0, text="Converting VEX to AIL")
|
|
395
|
+
self._convert_all()
|
|
396
|
+
|
|
397
|
+
ail_graph = self._make_ailgraph()
|
|
398
|
+
self._remove_redundant_jump_blocks(ail_graph)
|
|
399
|
+
|
|
400
|
+
# full-function constant-only propagation
|
|
401
|
+
self._update_progress(33.0, text="Constant propagation")
|
|
402
|
+
self._simplify_function(
|
|
403
|
+
ail_graph,
|
|
404
|
+
remove_dead_memdefs=False,
|
|
405
|
+
unify_variables=False,
|
|
406
|
+
narrow_expressions=False,
|
|
407
|
+
only_consts=True,
|
|
408
|
+
fold_callexprs_into_conditions=self._fold_callexprs_into_conditions,
|
|
409
|
+
max_iterations=1,
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
# cached block-level reaching definition analysis results and propagator results
|
|
413
|
+
block_simplification_cache: Optional[Dict[ailment.Block, NamedTuple]] = {}
|
|
414
|
+
|
|
415
|
+
# Simplify blocks
|
|
416
|
+
# we never remove dead memory definitions before making callsites. otherwise stack arguments may go missing
|
|
417
|
+
# before they are recognized as stack arguments.
|
|
418
|
+
self._update_progress(35.0, text="Simplifying blocks 1")
|
|
419
|
+
ail_graph = self._simplify_blocks(
|
|
420
|
+
ail_graph, stack_pointer_tracker=spt, remove_dead_memdefs=False, cache=block_simplification_cache
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
# Simplify the entire function for the first time
|
|
424
|
+
self._update_progress(45.0, text="Simplifying function 1")
|
|
425
|
+
self._simplify_function(
|
|
426
|
+
ail_graph,
|
|
427
|
+
remove_dead_memdefs=False,
|
|
428
|
+
unify_variables=False,
|
|
429
|
+
narrow_expressions=False,
|
|
430
|
+
fold_callexprs_into_conditions=False,
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
# clear _blocks_by_addr_and_size so no one can use it again
|
|
434
|
+
# TODO: Totally remove this dict
|
|
435
|
+
self._blocks_by_addr_and_size = None
|
|
436
|
+
|
|
437
|
+
self.graph = ail_graph
|
|
438
|
+
self.arg_list = None
|
|
439
|
+
self.variable_kb = None
|
|
440
|
+
self.cc_graph = None
|
|
441
|
+
self.externs = None
|
|
442
|
+
self.data_refs: Dict[int, List[DataRefDesc]] = self._collect_data_refs(ail_graph)
|
|
443
|
+
|
|
338
444
|
def copy_graph(self) -> networkx.DiGraph:
|
|
339
445
|
"""
|
|
340
446
|
Copy AIL Graph.
|
|
@@ -1521,7 +1627,7 @@ class Clinic(Analysis):
|
|
|
1521
1627
|
|
|
1522
1628
|
def handle_expr(
|
|
1523
1629
|
expr_idx: int,
|
|
1524
|
-
expr: ailment.expression.
|
|
1630
|
+
expr: ailment.expression.Expression,
|
|
1525
1631
|
stmt_idx: int,
|
|
1526
1632
|
stmt: ailment.statement.Statement,
|
|
1527
1633
|
block: Optional[ailment.Block],
|
|
@@ -1546,6 +1652,100 @@ class Clinic(Analysis):
|
|
|
1546
1652
|
AILGraphWalker(ail_graph, walker.walk).walk()
|
|
1547
1653
|
return variables
|
|
1548
1654
|
|
|
1655
|
+
@staticmethod
|
|
1656
|
+
def _collect_data_refs(ail_graph) -> Dict[int, List[DataRefDesc]]:
|
|
1657
|
+
# pylint:disable=unused-argument
|
|
1658
|
+
walker = ailment.AILBlockWalker()
|
|
1659
|
+
data_refs: Dict[int, List[DataRefDesc]] = defaultdict(list)
|
|
1660
|
+
|
|
1661
|
+
def handle_Const(
|
|
1662
|
+
expr_idx: int,
|
|
1663
|
+
expr: ailment.expression.Const,
|
|
1664
|
+
stmt_idx: int,
|
|
1665
|
+
stmt: ailment.statement.Statement,
|
|
1666
|
+
block: Optional[ailment.Block],
|
|
1667
|
+
):
|
|
1668
|
+
if isinstance(expr.value, int) and hasattr(expr, "ins_addr"):
|
|
1669
|
+
data_refs[block.addr].append(
|
|
1670
|
+
DataRefDesc(expr.value, 1, block.addr, stmt_idx, expr.ins_addr, MemoryDataSort.Unknown)
|
|
1671
|
+
)
|
|
1672
|
+
if hasattr(expr, "deref_src_addr"):
|
|
1673
|
+
data_refs[block.addr].append(
|
|
1674
|
+
DataRefDesc(
|
|
1675
|
+
expr.deref_src_addr, expr.size, block.addr, stmt_idx, expr.ins_addr, MemoryDataSort.Unknown
|
|
1676
|
+
)
|
|
1677
|
+
)
|
|
1678
|
+
|
|
1679
|
+
def handle_Load(
|
|
1680
|
+
expr_idx: int,
|
|
1681
|
+
expr: ailment.expression.Load,
|
|
1682
|
+
stmt_idx: int,
|
|
1683
|
+
stmt: ailment.statement.Statement,
|
|
1684
|
+
block: Optional[ailment.Block],
|
|
1685
|
+
):
|
|
1686
|
+
if isinstance(expr.addr, ailment.expression.Const):
|
|
1687
|
+
addr = expr.addr
|
|
1688
|
+
if isinstance(addr.value, int) and hasattr(addr, "ins_addr"):
|
|
1689
|
+
data_refs[block.addr].append(
|
|
1690
|
+
DataRefDesc(
|
|
1691
|
+
addr.value,
|
|
1692
|
+
expr.size,
|
|
1693
|
+
block.addr,
|
|
1694
|
+
stmt_idx,
|
|
1695
|
+
addr.ins_addr,
|
|
1696
|
+
MemoryDataSort.Integer if expr.size == 4 else MemoryDataSort.Unknown,
|
|
1697
|
+
)
|
|
1698
|
+
)
|
|
1699
|
+
if hasattr(addr, "deref_src_addr"):
|
|
1700
|
+
data_refs[block.addr].append(
|
|
1701
|
+
DataRefDesc(
|
|
1702
|
+
addr.deref_src_addr,
|
|
1703
|
+
expr.size,
|
|
1704
|
+
block.addr,
|
|
1705
|
+
stmt_idx,
|
|
1706
|
+
addr.ins_addr,
|
|
1707
|
+
MemoryDataSort.Integer if expr.size == 4 else MemoryDataSort.Unknown,
|
|
1708
|
+
)
|
|
1709
|
+
)
|
|
1710
|
+
return None
|
|
1711
|
+
|
|
1712
|
+
return ailment.AILBlockWalker._handle_Load(walker, expr_idx, expr, stmt_idx, stmt, block)
|
|
1713
|
+
|
|
1714
|
+
def handle_Store(stmt_idx: int, stmt: ailment.statement.Store, block: Optional[ailment.Block]):
|
|
1715
|
+
if isinstance(stmt.addr, ailment.expression.Const):
|
|
1716
|
+
addr = stmt.addr
|
|
1717
|
+
if isinstance(addr.value, int) and hasattr(addr, "ins_addr"):
|
|
1718
|
+
data_refs[block.addr].append(
|
|
1719
|
+
DataRefDesc(
|
|
1720
|
+
addr.value,
|
|
1721
|
+
stmt.size,
|
|
1722
|
+
block.addr,
|
|
1723
|
+
stmt_idx,
|
|
1724
|
+
addr.ins_addr,
|
|
1725
|
+
MemoryDataSort.Integer if stmt.size == 4 else MemoryDataSort.Unknown,
|
|
1726
|
+
)
|
|
1727
|
+
)
|
|
1728
|
+
if hasattr(addr, "deref_src_addr"):
|
|
1729
|
+
data_refs[block.addr].append(
|
|
1730
|
+
DataRefDesc(
|
|
1731
|
+
addr.deref_src_addr,
|
|
1732
|
+
stmt.size,
|
|
1733
|
+
block.addr,
|
|
1734
|
+
stmt_idx,
|
|
1735
|
+
addr.ins_addr,
|
|
1736
|
+
MemoryDataSort.Integer if stmt.size == 4 else MemoryDataSort.Unknown,
|
|
1737
|
+
)
|
|
1738
|
+
)
|
|
1739
|
+
return None
|
|
1740
|
+
|
|
1741
|
+
return ailment.AILBlockWalker._handle_Store(walker, stmt_idx, stmt, block)
|
|
1742
|
+
|
|
1743
|
+
walker.stmt_handlers[ailment.statement.Store] = handle_Store
|
|
1744
|
+
walker.expr_handlers[ailment.expression.Load] = handle_Load
|
|
1745
|
+
walker.expr_handlers[ailment.expression.Const] = handle_Const
|
|
1746
|
+
AILGraphWalker(ail_graph, walker.walk).walk()
|
|
1747
|
+
return data_refs
|
|
1748
|
+
|
|
1549
1749
|
def _next_atom(self) -> int:
|
|
1550
1750
|
return self._ail_manager.next_atom()
|
|
1551
1751
|
|
|
@@ -3,10 +3,11 @@ from typing import Generator, Dict, Any, Optional, Set, List
|
|
|
3
3
|
import operator
|
|
4
4
|
import logging
|
|
5
5
|
|
|
6
|
+
import ailment
|
|
7
|
+
import claripy
|
|
6
8
|
import networkx
|
|
9
|
+
from unique_log_filter import UniqueLogFilter
|
|
7
10
|
|
|
8
|
-
import claripy
|
|
9
|
-
import ailment
|
|
10
11
|
|
|
11
12
|
from angr.utils.graph import GraphUtils
|
|
12
13
|
from ...utils.lazy_import import lazy_import
|
|
@@ -39,6 +40,7 @@ else:
|
|
|
39
40
|
|
|
40
41
|
|
|
41
42
|
l = logging.getLogger(__name__)
|
|
43
|
+
l.addFilter(UniqueLogFilter())
|
|
42
44
|
|
|
43
45
|
|
|
44
46
|
_UNIFIABLE_COMPARISONS = {
|
|
@@ -111,6 +113,10 @@ _ail2claripy_op_mapping = {
|
|
|
111
113
|
"Reinterpret": lambda expr, _, m: _dummy_bvs(expr, m),
|
|
112
114
|
"Rol": lambda expr, _, m: _dummy_bvs(expr, m),
|
|
113
115
|
"Ror": lambda expr, _, m: _dummy_bvs(expr, m),
|
|
116
|
+
"LogicalXor": lambda expr, _, m: _dummy_bvs(expr, m),
|
|
117
|
+
"Carry": lambda expr, _, m: _dummy_bvs(expr, m),
|
|
118
|
+
"SCarry": lambda expr, _, m: _dummy_bvs(expr, m),
|
|
119
|
+
"SBorrow": lambda expr, _, m: _dummy_bvs(expr, m),
|
|
114
120
|
}
|
|
115
121
|
|
|
116
122
|
#
|
|
@@ -3,6 +3,7 @@ import logging
|
|
|
3
3
|
from collections import defaultdict
|
|
4
4
|
from typing import List, Tuple, Optional, Iterable, Union, Type, Set, Dict, Any, TYPE_CHECKING
|
|
5
5
|
|
|
6
|
+
import networkx
|
|
6
7
|
from cle import SymbolType
|
|
7
8
|
import ailment
|
|
8
9
|
|
|
@@ -195,15 +196,12 @@ class Decompiler(Analysis):
|
|
|
195
196
|
ite_exprs=ite_exprs,
|
|
196
197
|
)
|
|
197
198
|
|
|
198
|
-
# recover regions
|
|
199
|
-
|
|
200
|
-
self.
|
|
201
|
-
graph=clinic.graph,
|
|
202
|
-
cond_proc=cond_proc,
|
|
203
|
-
force_loop_single_exit=self._force_loop_single_exit,
|
|
204
|
-
complete_successors=self._complete_successors,
|
|
205
|
-
**self.options_to_params(self.options_by_class["region_identifier"]),
|
|
199
|
+
# recover regions, delay updating when we have optimizations that may update regions themselves
|
|
200
|
+
delay_graph_updates = any(
|
|
201
|
+
pass_.STAGE == OptimizationPassStage.DURING_REGION_IDENTIFICATION for pass_ in self._optimization_passes
|
|
206
202
|
)
|
|
203
|
+
ri = self._recover_regions(clinic.graph, cond_proc, update_graph=not delay_graph_updates)
|
|
204
|
+
|
|
207
205
|
# run optimizations that may require re-RegionIdentification
|
|
208
206
|
clinic.graph, ri = self._run_region_simplification_passes(
|
|
209
207
|
clinic.graph,
|
|
@@ -265,6 +263,17 @@ class Decompiler(Analysis):
|
|
|
265
263
|
self.cache.codegen = codegen
|
|
266
264
|
self.cache.clinic = self.clinic
|
|
267
265
|
|
|
266
|
+
def _recover_regions(self, graph: networkx.DiGraph, condition_processor, update_graph: bool = True):
|
|
267
|
+
return self.project.analyses[RegionIdentifier].prep(kb=self.kb)(
|
|
268
|
+
self.func,
|
|
269
|
+
graph=graph,
|
|
270
|
+
cond_proc=condition_processor,
|
|
271
|
+
update_graph=update_graph,
|
|
272
|
+
force_loop_single_exit=self._force_loop_single_exit,
|
|
273
|
+
complete_successors=self._complete_successors,
|
|
274
|
+
**self.options_to_params(self.options_by_class["region_identifier"]),
|
|
275
|
+
)
|
|
276
|
+
|
|
268
277
|
@timethis
|
|
269
278
|
def _run_graph_simplification_passes(self, ail_graph, reaching_definitions, **kwargs):
|
|
270
279
|
"""
|
|
@@ -364,16 +373,9 @@ class Decompiler(Analysis):
|
|
|
364
373
|
|
|
365
374
|
cond_proc = ConditionProcessor(self.project.arch)
|
|
366
375
|
# always update RI on graph change
|
|
367
|
-
ri = self.
|
|
368
|
-
self.func,
|
|
369
|
-
graph=ail_graph,
|
|
370
|
-
cond_proc=cond_proc,
|
|
371
|
-
force_loop_single_exit=self._force_loop_single_exit,
|
|
372
|
-
complete_successors=self._complete_successors,
|
|
373
|
-
**self.options_to_params(self.options_by_class["region_identifier"]),
|
|
374
|
-
)
|
|
376
|
+
ri = self._recover_regions(ail_graph, cond_proc, update_graph=False)
|
|
375
377
|
|
|
376
|
-
return ail_graph,
|
|
378
|
+
return ail_graph, self._recover_regions(ail_graph, ConditionProcessor(self.project.arch), update_graph=True)
|
|
377
379
|
|
|
378
380
|
@timethis
|
|
379
381
|
def _run_post_structuring_simplification_passes(self, seq_node, **kwargs):
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
from typing import Set
|
|
2
|
-
from collections import defaultdict
|
|
3
2
|
|
|
4
3
|
import ailment
|
|
5
4
|
|
|
@@ -10,32 +9,28 @@ class Goto:
|
|
|
10
9
|
will differ).
|
|
11
10
|
"""
|
|
12
11
|
|
|
13
|
-
def __init__(self,
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
self.block_addr = block_addr
|
|
20
|
-
self.ins_addr = ins_addr
|
|
21
|
-
self.target_addr = target_addr
|
|
12
|
+
def __init__(self, src_addr, dst_addr, src_idx=None, dst_idx=None, src_ins_addr=None):
|
|
13
|
+
self.src_addr = src_addr
|
|
14
|
+
self.dst_addr = dst_addr
|
|
15
|
+
self.src_idx = src_idx
|
|
16
|
+
self.dst_idx = dst_idx
|
|
17
|
+
self.src_ins_addr = src_ins_addr
|
|
22
18
|
|
|
23
19
|
def __hash__(self):
|
|
24
|
-
return hash(f"{self.
|
|
20
|
+
return hash(f"{self.src_addr}{self.dst_addr}{self.src_idx}{self.dst_idx}")
|
|
25
21
|
|
|
26
22
|
def __str__(self):
|
|
27
|
-
if
|
|
23
|
+
if self.src_addr is None or self.dst_addr is None:
|
|
28
24
|
return f"<Goto {self.__hash__()}>"
|
|
29
25
|
|
|
30
|
-
|
|
26
|
+
src_idx_str = "" if self.src_idx is None else f".{self.src_idx}"
|
|
27
|
+
dst_idx_str = "" if self.dst_idx is None else f".{self.dst_idx}"
|
|
28
|
+
src_ins_addr_str = "" if self.src_ins_addr is None else f"{hex(self.src_ins_addr)}"
|
|
29
|
+
return f"<Goto: [{hex(self.src_addr)}@{src_ins_addr_str}{src_idx_str}] -> {hex(self.dst_addr)}{dst_idx_str}>"
|
|
31
30
|
|
|
32
31
|
def __repr__(self):
|
|
33
32
|
return self.__str__()
|
|
34
33
|
|
|
35
|
-
@property
|
|
36
|
-
def addr(self):
|
|
37
|
-
return self.block_addr or self.ins_addr
|
|
38
|
-
|
|
39
34
|
|
|
40
35
|
class GotoManager:
|
|
41
36
|
"""
|
|
@@ -55,38 +50,26 @@ class GotoManager:
|
|
|
55
50
|
def __repr__(self):
|
|
56
51
|
return self.__str__()
|
|
57
52
|
|
|
58
|
-
def gotos_by_addr(self, force_refresh=False):
|
|
59
|
-
"""
|
|
60
|
-
Returns a dictionary of gotos by addresses. This set can CONTAIN DUPLICATES, so don't trust
|
|
61
|
-
this for a valid number of gotos. If you need the real number of gotos, just get the size of
|
|
62
|
-
self.gotos. This set should mostly be used when checking if a block contains a goto, since recording
|
|
63
|
-
can be recorded on null-addr blocks.
|
|
64
|
-
|
|
65
|
-
:param force_refresh: Don't use the cached self._gotos_by_addr
|
|
66
|
-
:return:
|
|
67
|
-
"""
|
|
68
|
-
|
|
69
|
-
if not force_refresh and self._gotos_by_addr:
|
|
70
|
-
return self._gotos_by_addr
|
|
71
|
-
|
|
72
|
-
self._gotos_by_addr = defaultdict(set)
|
|
73
|
-
for goto in self.gotos:
|
|
74
|
-
if goto.block_addr is not None:
|
|
75
|
-
self._gotos_by_addr[goto.block_addr].add(goto)
|
|
76
|
-
|
|
77
|
-
if goto.ins_addr is not None:
|
|
78
|
-
self._gotos_by_addr[goto.ins_addr].add(goto)
|
|
79
|
-
|
|
80
|
-
return self._gotos_by_addr
|
|
81
|
-
|
|
82
53
|
def gotos_in_block(self, block: ailment.Block) -> Set[Goto]:
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
return
|
|
54
|
+
gotos_found = set()
|
|
55
|
+
for goto in self.gotos:
|
|
56
|
+
if goto.src_addr == block.addr:
|
|
57
|
+
gotos_found.add(goto)
|
|
58
|
+
else:
|
|
59
|
+
block_addrs = {stmt.ins_addr for stmt in block.statements if "ins_addr" in stmt.tags}
|
|
60
|
+
if goto.src_ins_addr in block_addrs:
|
|
61
|
+
gotos_found.add(goto)
|
|
62
|
+
|
|
63
|
+
return gotos_found
|
|
64
|
+
|
|
65
|
+
def is_goto_edge(self, src: ailment.Block, dst: ailment.Block):
|
|
66
|
+
src_gotos = self.gotos_in_block(src)
|
|
67
|
+
for goto in src_gotos:
|
|
68
|
+
if goto.dst_addr == dst.addr:
|
|
69
|
+
return True
|
|
70
|
+
else:
|
|
71
|
+
block_addrs = {stmt.ins_addr for stmt in dst.statements if "ins_addr" in stmt.tags}
|
|
72
|
+
if goto.dst_addr in block_addrs:
|
|
73
|
+
return True
|
|
74
|
+
|
|
75
|
+
return False
|
|
@@ -13,7 +13,7 @@ from .lowered_switch_simplifier import LoweredSwitchSimplifier
|
|
|
13
13
|
from .multi_simplifier import MultiSimplifier
|
|
14
14
|
from .div_simplifier import DivSimplifier
|
|
15
15
|
from .mod_simplifier import ModSimplifier
|
|
16
|
-
from .
|
|
16
|
+
from .return_duplicator import ReturnDuplicator
|
|
17
17
|
from .const_derefs import ConstantDereferencesSimplifier
|
|
18
18
|
from .register_save_area_simplifier import RegisterSaveAreaSimplifier
|
|
19
19
|
from .ret_addr_save_simplifier import RetAddrSaveSimplifier
|
|
@@ -22,7 +22,7 @@ from .flip_boolean_cmp import FlipBooleanCmp
|
|
|
22
22
|
from .ret_deduplicator import ReturnDeduplicator
|
|
23
23
|
from .win_stack_canary_simplifier import WinStackCanarySimplifier
|
|
24
24
|
|
|
25
|
-
|
|
25
|
+
# order matters!
|
|
26
26
|
_all_optimization_passes = [
|
|
27
27
|
(RegisterSaveAreaSimplifier, True),
|
|
28
28
|
(StackCanarySimplifier, True),
|
|
@@ -35,11 +35,11 @@ _all_optimization_passes = [
|
|
|
35
35
|
(RetAddrSaveSimplifier, True),
|
|
36
36
|
(X86GccGetPcSimplifier, True),
|
|
37
37
|
(ITERegionConverter, True),
|
|
38
|
-
(ReturnDeduplicator, True),
|
|
39
|
-
(LoweredSwitchSimplifier, False),
|
|
40
|
-
(EagerReturnsSimplifier, True),
|
|
41
38
|
(ITEExprConverter, True),
|
|
42
39
|
(ExprOpSwapper, True),
|
|
40
|
+
(ReturnDuplicator, True),
|
|
41
|
+
(LoweredSwitchSimplifier, False),
|
|
42
|
+
(ReturnDeduplicator, True),
|
|
43
43
|
(FlipBooleanCmp, True),
|
|
44
44
|
]
|
|
45
45
|
|
|
@@ -3,11 +3,13 @@ import logging
|
|
|
3
3
|
import math
|
|
4
4
|
|
|
5
5
|
from ailment import Expr
|
|
6
|
+
from unique_log_filter import UniqueLogFilter
|
|
6
7
|
|
|
7
8
|
from .engine_base import SimplifierAILEngine, SimplifierAILState
|
|
8
9
|
from .optimization_pass import OptimizationPass, OptimizationPassStage
|
|
9
10
|
|
|
10
11
|
_l = logging.getLogger(name=__name__)
|
|
12
|
+
_l.addFilter(UniqueLogFilter())
|
|
11
13
|
|
|
12
14
|
|
|
13
15
|
class DivSimplifierAILEngine(SimplifierAILEngine):
|
|
@@ -139,7 +139,7 @@ class LoweredSwitchSimplifier(OptimizationPass):
|
|
|
139
139
|
"AMD64",
|
|
140
140
|
]
|
|
141
141
|
PLATFORMS = ["linux", "windows"]
|
|
142
|
-
STAGE = OptimizationPassStage.
|
|
142
|
+
STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
|
|
143
143
|
NAME = "Convert lowered switch-cases (if-else) to switch-cases"
|
|
144
144
|
DESCRIPTION = (
|
|
145
145
|
"Convert lowered switch-cases (if-else) to switch-cases. Only works when the Phoenix structuring "
|