angr 9.2.114__py3-none-macosx_11_0_arm64.whl → 9.2.115__py3-none-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/__main__.py +2 -2
- angr/analyses/cfg/cfg_fast.py +1 -1
- angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +2 -2
- angr/analyses/decompiler/decompilation_options.py +2 -12
- angr/analyses/decompiler/decompiler.py +14 -3
- angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +3 -0
- angr/analyses/decompiler/optimization_passes/cross_jump_reverter.py +0 -3
- angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +2 -3
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +9 -2
- angr/analyses/decompiler/optimization_passes/ret_deduplicator.py +2 -0
- angr/analyses/decompiler/optimization_passes/return_duplicator_high.py +2 -0
- angr/analyses/decompiler/structuring/__init__.py +6 -2
- angr/analyses/decompiler/structuring/phoenix.py +28 -91
- angr/analyses/decompiler/structuring/recursive_structurer.py +0 -3
- angr/analyses/decompiler/structuring/sailr.py +111 -0
- angr/analyses/decompiler/structuring/structurer_base.py +0 -2
- angr/analyses/reaching_definitions/function_handler.py +11 -1
- angr/analyses/reaching_definitions/function_handler_library/__init__.py +0 -0
- angr/analyses/reaching_definitions/function_handler_library/stdio.py +260 -0
- angr/analyses/reaching_definitions/function_handler_library/stdlib.py +151 -0
- angr/analyses/reaching_definitions/function_handler_library/string.py +93 -0
- angr/analyses/reaching_definitions/function_handler_library/unistd.py +23 -0
- angr/analyses/reaching_definitions/rd_state.py +3 -1
- angr/analyses/vfg.py +13 -14
- angr/code_location.py +4 -4
- angr/engines/vex/heavy/heavy.py +1 -1
- angr/lib/angr_native.dylib +0 -0
- angr/procedures/libc/strlen.py +5 -2
- angr/sim_variable.py +3 -18
- angr/state_plugins/solver.py +3 -9
- angr/storage/memory_mixins/address_concretization_mixin.py +1 -1
- angr/storage/memory_mixins/regioned_memory/abstract_merger_mixin.py +4 -2
- angr/storage/memory_mixins/regioned_memory/regioned_memory_mixin.py +5 -5
- angr/storage/memory_mixins/regioned_memory/static_find_mixin.py +3 -3
- {angr-9.2.114.dist-info → angr-9.2.115.dist-info}/METADATA +6 -6
- {angr-9.2.114.dist-info → angr-9.2.115.dist-info}/RECORD +41 -35
- {angr-9.2.114.dist-info → angr-9.2.115.dist-info}/WHEEL +1 -1
- {angr-9.2.114.dist-info → angr-9.2.115.dist-info}/LICENSE +0 -0
- {angr-9.2.114.dist-info → angr-9.2.115.dist-info}/entry_points.txt +0 -0
- {angr-9.2.114.dist-info → angr-9.2.115.dist-info}/top_level.txt +0 -0
angr/__init__.py
CHANGED
angr/__main__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
|
|
3
|
-
from angr.analyses.decompiler.structuring import STRUCTURER_CLASSES
|
|
3
|
+
from angr.analyses.decompiler.structuring import STRUCTURER_CLASSES, DEFAULT_STRUCTURER
|
|
4
4
|
from angr.analyses.decompiler.utils import decompile_functions
|
|
5
5
|
|
|
6
6
|
|
|
@@ -42,7 +42,7 @@ def main():
|
|
|
42
42
|
"--structurer",
|
|
43
43
|
help="The structuring algorithm to use for decompilation.",
|
|
44
44
|
choices=STRUCTURER_CLASSES.keys(),
|
|
45
|
-
default=
|
|
45
|
+
default=DEFAULT_STRUCTURER,
|
|
46
46
|
)
|
|
47
47
|
|
|
48
48
|
args = parser.parse_args()
|
angr/analyses/cfg/cfg_fast.py
CHANGED
|
@@ -2981,7 +2981,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
2981
2981
|
simsucc = self.project.factory.default_engine.process(self._initial_state, irsb, force_addr=addr)
|
|
2982
2982
|
if len(simsucc.successors) == 1:
|
|
2983
2983
|
ip = simsucc.successors[0].ip
|
|
2984
|
-
if ip
|
|
2984
|
+
if claripy.backends.concrete.convert(ip) is not ip:
|
|
2985
2985
|
target_addr = ip.concrete_value
|
|
2986
2986
|
obj = self.project.loader.find_object_containing(target_addr, membership_check=False)
|
|
2987
2987
|
if (obj is not None and obj is not self.project.loader.main_object) or self.project.is_hooked(
|
|
@@ -1734,7 +1734,7 @@ class JumpTableResolver(IndirectJumpResolver):
|
|
|
1734
1734
|
# full-function data propagation before performing jump table recovery.
|
|
1735
1735
|
l.debug("Multiple statements adding bases, not supported yet") # FIXME: Just check the addresses?
|
|
1736
1736
|
|
|
1737
|
-
jumptable_addr_vsa = jumptable_addr
|
|
1737
|
+
jumptable_addr_vsa = claripy.backends.vsa.convert(jumptable_addr)
|
|
1738
1738
|
|
|
1739
1739
|
if not isinstance(jumptable_addr_vsa, claripy.vsa.StridedInterval):
|
|
1740
1740
|
return None
|
|
@@ -2103,7 +2103,7 @@ class JumpTableResolver(IndirectJumpResolver):
|
|
|
2103
2103
|
|
|
2104
2104
|
read_length = state.inspect.mem_read_length
|
|
2105
2105
|
if not isinstance(read_length, int):
|
|
2106
|
-
read_length = read_length.
|
|
2106
|
+
read_length = claripy.backends.vsa.convert(read_length).upper_bound
|
|
2107
2107
|
if read_length > 16:
|
|
2108
2108
|
return
|
|
2109
2109
|
new_read_addr = state.solver.BVV(UninitReadMeta.uninit_read_base, state.arch.bits)
|
|
@@ -193,21 +193,11 @@ options = [
|
|
|
193
193
|
"recursive_structurer",
|
|
194
194
|
"structurer_cls",
|
|
195
195
|
category="Structuring",
|
|
196
|
-
default_value="
|
|
197
|
-
candidate_values=["
|
|
196
|
+
default_value="SAILR",
|
|
197
|
+
candidate_values=["SAILR", "Phoenix", "DREAM"],
|
|
198
198
|
clears_cache=True,
|
|
199
199
|
convert=structurer_class_from_name,
|
|
200
200
|
),
|
|
201
|
-
O(
|
|
202
|
-
"Improve structuring algorithm",
|
|
203
|
-
"If applicable in deeper structurer, like Phoenix, improves decompilation output",
|
|
204
|
-
bool,
|
|
205
|
-
"recursive_structurer",
|
|
206
|
-
"improve_structurer",
|
|
207
|
-
category="Structuring",
|
|
208
|
-
default_value=True,
|
|
209
|
-
clears_cache=True,
|
|
210
|
-
),
|
|
211
201
|
O(
|
|
212
202
|
"C-style null compares",
|
|
213
203
|
"Rewrites the (x == 0) => (!x) && (x != 0) => (x)",
|
|
@@ -14,7 +14,7 @@ from ...knowledge_base import KnowledgeBase
|
|
|
14
14
|
from ...sim_variable import SimMemoryVariable, SimRegisterVariable, SimStackVariable
|
|
15
15
|
from ...utils import timethis
|
|
16
16
|
from .. import Analysis, AnalysesHub
|
|
17
|
-
from .structuring import RecursiveStructurer, PhoenixStructurer
|
|
17
|
+
from .structuring import RecursiveStructurer, PhoenixStructurer, DEFAULT_STRUCTURER
|
|
18
18
|
from .region_identifier import RegionIdentifier
|
|
19
19
|
from .optimization_passes.optimization_pass import OptimizationPassStage
|
|
20
20
|
from .optimization_passes import get_default_optimization_passes
|
|
@@ -146,8 +146,9 @@ class Decompiler(Analysis):
|
|
|
146
146
|
self._complete_successors = False
|
|
147
147
|
self._recursive_structurer_params = self.options_to_params(self.options_by_class["recursive_structurer"])
|
|
148
148
|
if "structurer_cls" not in self._recursive_structurer_params:
|
|
149
|
-
self._recursive_structurer_params["structurer_cls"] =
|
|
150
|
-
|
|
149
|
+
self._recursive_structurer_params["structurer_cls"] = DEFAULT_STRUCTURER
|
|
150
|
+
# is the algorithm based on Phoenix (a schema-based algorithm)?
|
|
151
|
+
if issubclass(self._recursive_structurer_params["structurer_cls"], PhoenixStructurer):
|
|
151
152
|
self._force_loop_single_exit = False
|
|
152
153
|
self._complete_successors = True
|
|
153
154
|
fold_callexprs_into_conditions = True
|
|
@@ -316,6 +317,11 @@ class Decompiler(Analysis):
|
|
|
316
317
|
continue
|
|
317
318
|
if pass_.STRUCTURING:
|
|
318
319
|
if self._recursive_structurer_params["structurer_cls"].NAME not in pass_.STRUCTURING:
|
|
320
|
+
l.warning(
|
|
321
|
+
"Skipping %s because it does not support structuring algorithm: %s",
|
|
322
|
+
pass_,
|
|
323
|
+
self._recursive_structurer_params["structurer_cls"].NAME,
|
|
324
|
+
)
|
|
319
325
|
continue
|
|
320
326
|
|
|
321
327
|
a = pass_(
|
|
@@ -367,6 +373,11 @@ class Decompiler(Analysis):
|
|
|
367
373
|
continue
|
|
368
374
|
if pass_.STRUCTURING:
|
|
369
375
|
if self._recursive_structurer_params["structurer_cls"].NAME not in pass_.STRUCTURING:
|
|
376
|
+
l.warning(
|
|
377
|
+
"Skipping %s because it does not support structuring algorithm: %s",
|
|
378
|
+
pass_,
|
|
379
|
+
self._recursive_structurer_params["structurer_cls"].NAME,
|
|
380
|
+
)
|
|
370
381
|
continue
|
|
371
382
|
|
|
372
383
|
a = pass_(
|
|
@@ -10,6 +10,7 @@ from ailment.statement import Call, Statement, ConditionalJump, Assignment, Stor
|
|
|
10
10
|
from ailment.expression import Convert, Register, Expression
|
|
11
11
|
|
|
12
12
|
from .optimization_pass import OptimizationPass, OptimizationPassStage
|
|
13
|
+
from ..structuring import SAILRStructurer, DreamStructurer
|
|
13
14
|
from ....knowledge_plugins.key_definitions.atoms import MemoryLocation
|
|
14
15
|
from ....knowledge_plugins.key_definitions.constants import OP_BEFORE
|
|
15
16
|
|
|
@@ -140,6 +141,8 @@ class ConstPropOptReverter(OptimizationPass):
|
|
|
140
141
|
|
|
141
142
|
ARCHES = None
|
|
142
143
|
PLATFORMS = None
|
|
144
|
+
# allow DREAM since it's useful for return merging
|
|
145
|
+
STRUCTURING = [SAILRStructurer.NAME, DreamStructurer.NAME]
|
|
143
146
|
STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
|
|
144
147
|
NAME = "Revert Constant Propagation Optimizations"
|
|
145
148
|
DESCRIPTION = __doc__.strip()
|
|
@@ -21,11 +21,8 @@ class CrossJumpReverter(StructuringOptimizationPass):
|
|
|
21
21
|
a max of max_opt_iters times. Second, it will not duplicate a block with too many calls.
|
|
22
22
|
"""
|
|
23
23
|
|
|
24
|
-
ARCHES = None
|
|
25
|
-
PLATFORMS = None
|
|
26
24
|
STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
|
|
27
25
|
NAME = "Duplicate linear blocks with gotos"
|
|
28
|
-
STRUCTURING = ["phoenix"]
|
|
29
26
|
DESCRIPTION = inspect.cleandoc(__doc__).strip()
|
|
30
27
|
|
|
31
28
|
def __init__(
|
|
@@ -11,7 +11,7 @@ from ailment.expression import Expression, BinaryOp, Const, Load
|
|
|
11
11
|
from angr.utils.graph import GraphUtils
|
|
12
12
|
from ..utils import first_nonlabel_statement, remove_last_statement
|
|
13
13
|
from ..structuring.structurer_nodes import IncompleteSwitchCaseHeadStatement, SequenceNode, MultiNode
|
|
14
|
-
from .optimization_pass import
|
|
14
|
+
from .optimization_pass import MultipleBlocksException, StructuringOptimizationPass
|
|
15
15
|
from ..region_simplifiers.switch_cluster_simplifier import SwitchClusterFinder
|
|
16
16
|
|
|
17
17
|
if TYPE_CHECKING:
|
|
@@ -143,14 +143,13 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
143
143
|
As a hack for now, we only run this deoptimization on Linux binaries.
|
|
144
144
|
"""
|
|
145
145
|
|
|
146
|
+
# TODO: this needs to be updated to support Windows, but detect and disable on MSVC
|
|
146
147
|
PLATFORMS = ["linux"]
|
|
147
|
-
STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
|
|
148
148
|
NAME = "Convert lowered switch-cases (if-else) to switch-cases"
|
|
149
149
|
DESCRIPTION = (
|
|
150
150
|
"Convert lowered switch-cases (if-else) to switch-cases. Only works when the Phoenix structuring "
|
|
151
151
|
"algorithm is in use."
|
|
152
152
|
)
|
|
153
|
-
STRUCTURING = ["phoenix"]
|
|
154
153
|
|
|
155
154
|
def __init__(self, func, min_distinct_cases=2, **kwargs):
|
|
156
155
|
super().__init__(
|
|
@@ -10,7 +10,7 @@ import ailment
|
|
|
10
10
|
from angr.analyses.decompiler import RegionIdentifier
|
|
11
11
|
from angr.analyses.decompiler.condition_processor import ConditionProcessor
|
|
12
12
|
from angr.analyses.decompiler.goto_manager import GotoManager
|
|
13
|
-
from angr.analyses.decompiler.structuring import RecursiveStructurer,
|
|
13
|
+
from angr.analyses.decompiler.structuring import RecursiveStructurer, SAILRStructurer
|
|
14
14
|
from angr.analyses.decompiler.utils import add_labels
|
|
15
15
|
from angr.analyses.decompiler.seq_cf_structure_counter import ControlFlowStructureCounter
|
|
16
16
|
|
|
@@ -266,10 +266,17 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
266
266
|
The base class for any optimization pass that requires structuring. Optimization passes that inherit from this class
|
|
267
267
|
should directly depend on structuring artifacts, such as regions and gotos. Otherwise, they should use
|
|
268
268
|
OptimizationPass. This is the heaviest (computation time) optimization pass class.
|
|
269
|
+
|
|
270
|
+
By default this type of optimization should work:
|
|
271
|
+
- on any architecture
|
|
272
|
+
- on any platform
|
|
273
|
+
- during region identification (to have iterative structuring)
|
|
274
|
+
- only with the SAILR structuring algorithm
|
|
269
275
|
"""
|
|
270
276
|
|
|
271
277
|
ARCHES = None
|
|
272
278
|
PLATFORMS = None
|
|
279
|
+
STRUCTURING = [SAILRStructurer.NAME]
|
|
273
280
|
STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
|
|
274
281
|
|
|
275
282
|
def __init__(
|
|
@@ -401,7 +408,7 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
401
408
|
self._ri.region,
|
|
402
409
|
cond_proc=self._ri.cond_proc,
|
|
403
410
|
func=self._func,
|
|
404
|
-
structurer_cls=
|
|
411
|
+
structurer_cls=SAILRStructurer,
|
|
405
412
|
)
|
|
406
413
|
# pylint:disable=broad-except
|
|
407
414
|
except Exception:
|
|
@@ -4,6 +4,7 @@ import logging
|
|
|
4
4
|
from ailment import Block
|
|
5
5
|
from ailment.statement import ConditionalJump, Return
|
|
6
6
|
|
|
7
|
+
from ..structuring import SAILRStructurer, DreamStructurer
|
|
7
8
|
from ....utils.graph import subgraph_between_nodes
|
|
8
9
|
from ..utils import remove_labels, to_ail_supergraph, update_labels
|
|
9
10
|
from .optimization_pass import OptimizationPass, OptimizationPassStage
|
|
@@ -28,6 +29,7 @@ class ReturnDeduplicator(OptimizationPass):
|
|
|
28
29
|
STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
|
|
29
30
|
NAME = "Deduplicates return statements that may have been duplicated"
|
|
30
31
|
DESCRIPTION = __doc__.strip()
|
|
32
|
+
STRUCTURING = [SAILRStructurer.NAME, DreamStructurer.NAME]
|
|
31
33
|
|
|
32
34
|
def __init__(self, func, **kwargs):
|
|
33
35
|
super().__init__(func, **kwargs)
|
|
@@ -4,6 +4,7 @@ import networkx
|
|
|
4
4
|
|
|
5
5
|
from .return_duplicator_base import ReturnDuplicatorBase
|
|
6
6
|
from .optimization_pass import OptimizationPass, OptimizationPassStage
|
|
7
|
+
from ..structuring import SAILRStructurer, DreamStructurer
|
|
7
8
|
|
|
8
9
|
_l = logging.getLogger(name=__name__)
|
|
9
10
|
|
|
@@ -19,6 +20,7 @@ class ReturnDuplicatorHigh(OptimizationPass, ReturnDuplicatorBase):
|
|
|
19
20
|
STAGE = OptimizationPassStage.AFTER_VARIABLE_RECOVERY
|
|
20
21
|
NAME = "Duplicate return-only blocks (high)"
|
|
21
22
|
DESCRIPTION = __doc__
|
|
23
|
+
STRUCTURING = [SAILRStructurer.NAME, DreamStructurer.NAME]
|
|
22
24
|
|
|
23
25
|
def __init__(
|
|
24
26
|
self,
|
|
@@ -2,14 +2,18 @@ from typing import Optional, Type
|
|
|
2
2
|
|
|
3
3
|
from .dream import DreamStructurer
|
|
4
4
|
from .phoenix import PhoenixStructurer
|
|
5
|
+
from .sailr import SAILRStructurer
|
|
5
6
|
from .recursive_structurer import RecursiveStructurer
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
STRUCTURER_CLASSES = {
|
|
9
|
-
|
|
10
|
-
|
|
10
|
+
SAILRStructurer.NAME: SAILRStructurer,
|
|
11
|
+
PhoenixStructurer.NAME: PhoenixStructurer,
|
|
12
|
+
DreamStructurer.NAME: DreamStructurer,
|
|
11
13
|
}
|
|
12
14
|
|
|
15
|
+
DEFAULT_STRUCTURER = SAILRStructurer
|
|
16
|
+
|
|
13
17
|
|
|
14
18
|
def structurer_class_from_name(name: str) -> type | None:
|
|
15
19
|
return STRUCTURER_CLASSES.get(name.lower(), None)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# pylint:disable=line-too-long,import-outside-toplevel,import-error,multiple-statements,too-many-boolean-expressions
|
|
2
2
|
from typing import Any, DefaultDict, Optional, TYPE_CHECKING
|
|
3
3
|
from collections import OrderedDict as ODict
|
|
4
|
-
from collections import defaultdict
|
|
4
|
+
from collections import defaultdict
|
|
5
5
|
from enum import Enum
|
|
6
6
|
import logging
|
|
7
7
|
|
|
@@ -12,7 +12,7 @@ from ailment.block import Block
|
|
|
12
12
|
from ailment.statement import Statement, ConditionalJump, Jump, Label, Return
|
|
13
13
|
from ailment.expression import Const, UnaryOp, MultiStatementExpression
|
|
14
14
|
|
|
15
|
-
from angr.utils.graph import GraphUtils
|
|
15
|
+
from angr.utils.graph import GraphUtils
|
|
16
16
|
from ....knowledge_plugins.cfg import IndirectJumpType
|
|
17
17
|
from ....utils.constants import SWITCH_MISSING_DEFAULT_NODE_ADDR
|
|
18
18
|
from ....utils.graph import dominates, to_acyclic_graph, dfs_back_edges
|
|
@@ -24,7 +24,6 @@ from ..utils import (
|
|
|
24
24
|
is_empty_or_label_only_node,
|
|
25
25
|
has_nonlabel_statements,
|
|
26
26
|
first_nonlabel_statement,
|
|
27
|
-
structured_node_is_simple_return,
|
|
28
27
|
)
|
|
29
28
|
from ..call_counter import AILCallCounter
|
|
30
29
|
from .structurer_nodes import (
|
|
@@ -84,7 +83,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
84
83
|
func: Optional["Function"] = None,
|
|
85
84
|
case_entry_to_switch_head: dict[int, int] | None = None,
|
|
86
85
|
parent_region=None,
|
|
87
|
-
|
|
86
|
+
improve_algorithm=False,
|
|
88
87
|
use_multistmtexprs: MultiStmtExprMode = MultiStmtExprMode.MAX_ONE_CALL,
|
|
89
88
|
**kwargs,
|
|
90
89
|
):
|
|
@@ -95,7 +94,6 @@ class PhoenixStructurer(StructurerBase):
|
|
|
95
94
|
func=func,
|
|
96
95
|
case_entry_to_switch_head=case_entry_to_switch_head,
|
|
97
96
|
parent_region=parent_region,
|
|
98
|
-
improve_structurer=improve_structurer,
|
|
99
97
|
**kwargs,
|
|
100
98
|
)
|
|
101
99
|
|
|
@@ -112,13 +110,17 @@ class PhoenixStructurer(StructurerBase):
|
|
|
112
110
|
# absorbed into other SequenceNodes
|
|
113
111
|
self.dowhile_known_tail_nodes: set = set()
|
|
114
112
|
|
|
115
|
-
|
|
113
|
+
# in reimplementing the core phoenix algorithm from the phoenix decompiler paper, two types of changes were
|
|
114
|
+
# made to the algorithm:
|
|
115
|
+
# 1. Mandatory fixes to correct flaws we found in the algorithm
|
|
116
|
+
# 2. Optional fixes to improve the results of already correct choices
|
|
117
|
+
#
|
|
118
|
+
# the improve_algorithm flag controls whether the optional fixes are applied. these are disabled by default
|
|
119
|
+
# to be as close to the original algorithm as possible. for best results, enable this flag.
|
|
120
|
+
self._improve_algorithm = improve_algorithm
|
|
116
121
|
self._edge_virtualization_hints = []
|
|
117
122
|
|
|
118
123
|
self._use_multistmtexprs = use_multistmtexprs
|
|
119
|
-
if not self._phoenix_improved:
|
|
120
|
-
self._use_multistmtexprs = MultiStmtExprMode.NEVER
|
|
121
|
-
|
|
122
124
|
self._analyze()
|
|
123
125
|
|
|
124
126
|
@staticmethod
|
|
@@ -246,7 +248,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
246
248
|
self._rewrite_jumps_to_continues(loop_node.sequence_node, loop_node=loop_node)
|
|
247
249
|
return True
|
|
248
250
|
|
|
249
|
-
if self.
|
|
251
|
+
if self._improve_algorithm:
|
|
250
252
|
matched, loop_node, successor_node = self._match_cyclic_while_with_single_successor(
|
|
251
253
|
node, head, graph, full_graph
|
|
252
254
|
)
|
|
@@ -379,7 +381,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
379
381
|
|
|
380
382
|
return True, loop_node, right
|
|
381
383
|
|
|
382
|
-
if self.
|
|
384
|
+
if self._improve_algorithm:
|
|
383
385
|
if full_graph.out_degree[node] == 1:
|
|
384
386
|
# while (true) { ...; if (...) break; }
|
|
385
387
|
_, _, head_block = self._find_node_going_to_dst(node, left, condjump_only=True)
|
|
@@ -498,7 +500,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
498
500
|
self._remove_last_statement_if_jump(succ)
|
|
499
501
|
drop_succ = False
|
|
500
502
|
|
|
501
|
-
if self.
|
|
503
|
+
if self._improve_algorithm:
|
|
502
504
|
# absorb the entire succ block if possible
|
|
503
505
|
if self._is_sequential_statement_block(succ) and self._should_use_multistmtexprs(succ):
|
|
504
506
|
stmts = self._build_multistatementexpr_statements(succ)
|
|
@@ -1004,7 +1006,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
1004
1006
|
any_matches |= matched
|
|
1005
1007
|
if matched:
|
|
1006
1008
|
break
|
|
1007
|
-
if self.
|
|
1009
|
+
if self._improve_algorithm:
|
|
1008
1010
|
l.debug("... matching acyclic ITE with short-circuit conditions at %r", node)
|
|
1009
1011
|
matched = self._match_acyclic_short_circuit_conditions(graph, full_graph, node)
|
|
1010
1012
|
l.debug("... matched: %s", matched)
|
|
@@ -1307,7 +1309,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
1307
1309
|
graph,
|
|
1308
1310
|
full_graph,
|
|
1309
1311
|
) -> tuple[ODict, Any, set[Any]]:
|
|
1310
|
-
cases: ODict[int | tuple[int], SequenceNode] =
|
|
1312
|
+
cases: ODict[int | tuple[int], SequenceNode] = ODict()
|
|
1311
1313
|
to_remove = set()
|
|
1312
1314
|
|
|
1313
1315
|
# it is possible that the default node gets duplicated by other analyses and creates a default node (addr.a)
|
|
@@ -2108,7 +2110,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2108
2110
|
return None
|
|
2109
2111
|
|
|
2110
2112
|
def _last_resort_refinement(self, head, graph: networkx.DiGraph, full_graph: networkx.DiGraph | None) -> bool:
|
|
2111
|
-
if self.
|
|
2113
|
+
if self._improve_algorithm:
|
|
2112
2114
|
while self._edge_virtualization_hints:
|
|
2113
2115
|
src, dst = self._edge_virtualization_hints.pop(0)
|
|
2114
2116
|
if graph.has_edge(src, dst):
|
|
@@ -2229,6 +2231,15 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2229
2231
|
remove_last_statement(src)
|
|
2230
2232
|
|
|
2231
2233
|
def _should_use_multistmtexprs(self, node: Block | BaseNode) -> bool:
|
|
2234
|
+
"""
|
|
2235
|
+
The original Phoenix algorithm had no support for multi-stmt expressions, such as the following:
|
|
2236
|
+
if ((x = y) && z) { ... }
|
|
2237
|
+
|
|
2238
|
+
There are multiple levels at which multi-stmt expressions can be used. If the Phoenix algorith is not not
|
|
2239
|
+
set to be in improved mode, then we should not use multi-stmt expressions at all.
|
|
2240
|
+
"""
|
|
2241
|
+
if not self._improve_algorithm:
|
|
2242
|
+
return False
|
|
2232
2243
|
if self._use_multistmtexprs == MultiStmtExprMode.NEVER:
|
|
2233
2244
|
return False
|
|
2234
2245
|
if self._use_multistmtexprs == MultiStmtExprMode.ALWAYS:
|
|
@@ -2313,7 +2324,6 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2313
2324
|
walker.block_id += 1
|
|
2314
2325
|
if _check(block.nodes[-1].statements[-1]):
|
|
2315
2326
|
walker.parent_and_block.append((walker.block_id, parent, block))
|
|
2316
|
-
return
|
|
2317
2327
|
|
|
2318
2328
|
def _handle_BreakNode(break_node: BreakNode, parent=None, **kwargs): # pylint:disable=unused-argument
|
|
2319
2329
|
walker.block_id += 1
|
|
@@ -2324,7 +2334,6 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2324
2334
|
):
|
|
2325
2335
|
# FIXME: idx is ignored
|
|
2326
2336
|
walker.parent_and_block.append((walker.block_id, parent, break_node))
|
|
2327
|
-
return
|
|
2328
2337
|
|
|
2329
2338
|
walker = SequenceWalker(
|
|
2330
2339
|
handlers={
|
|
@@ -2502,84 +2511,12 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2502
2511
|
break
|
|
2503
2512
|
return None
|
|
2504
2513
|
|
|
2514
|
+
# pylint: disable=unused-argument,no-self-use
|
|
2505
2515
|
def _order_virtualizable_edges(self, graph: networkx.DiGraph, edges: list, node_seq: dict[Any, int]) -> list:
|
|
2506
2516
|
"""
|
|
2507
2517
|
Returns a list of edges that are ordered by the best edges to virtualize first.
|
|
2508
|
-
The criteria for "best" is defined by a variety of heuristics described below.
|
|
2509
2518
|
"""
|
|
2510
|
-
|
|
2511
|
-
return edges
|
|
2512
|
-
|
|
2513
|
-
# TODO: the graph we have here is not an accurate graph and can have no "entry node". We need a better graph.
|
|
2514
|
-
try:
|
|
2515
|
-
entry_node = [node for node in graph.nodes if graph.in_degree(node) == 0][0]
|
|
2516
|
-
except IndexError:
|
|
2517
|
-
entry_node = None
|
|
2518
|
-
|
|
2519
|
-
best_edges = edges
|
|
2520
|
-
if self._phoenix_improved and entry_node is not None:
|
|
2521
|
-
# the first few heuristics are based on the post-dominator count of the edge
|
|
2522
|
-
# so we collect them for each candidate edge
|
|
2523
|
-
edge_postdom_count = {}
|
|
2524
|
-
edge_sibling_count = {}
|
|
2525
|
-
for edge in edges:
|
|
2526
|
-
_, dst = edge
|
|
2527
|
-
graph_copy = networkx.DiGraph(graph)
|
|
2528
|
-
graph_copy.remove_edge(*edge)
|
|
2529
|
-
sibling_cnt = graph_copy.in_degree(dst)
|
|
2530
|
-
if sibling_cnt == 0:
|
|
2531
|
-
continue
|
|
2532
|
-
|
|
2533
|
-
edge_sibling_count[edge] = sibling_cnt
|
|
2534
|
-
post_dom_graph = PostDominators(graph_copy, entry_node).post_dom
|
|
2535
|
-
post_doms = set()
|
|
2536
|
-
for postdom_node, dominatee in post_dom_graph.edges():
|
|
2537
|
-
if not isinstance(postdom_node, TemporaryNode) and not isinstance(dominatee, TemporaryNode):
|
|
2538
|
-
post_doms.add((postdom_node, dominatee))
|
|
2539
|
-
edge_postdom_count[edge] = len(post_doms)
|
|
2540
|
-
|
|
2541
|
-
# H1: the edge that has the least amount of sibling edges should be virtualized first
|
|
2542
|
-
# this is believed to reduce the amount of virtualization needed in future rounds and increase
|
|
2543
|
-
# the edges that enter a single outer-scope if-stmt
|
|
2544
|
-
if edge_sibling_count:
|
|
2545
|
-
min_sibling_count = min(edge_sibling_count.values())
|
|
2546
|
-
best_edges = [edge for edge, cnt in edge_sibling_count.items() if cnt == min_sibling_count]
|
|
2547
|
-
if len(best_edges) == 1:
|
|
2548
|
-
return best_edges
|
|
2549
|
-
|
|
2550
|
-
# create the next heuristic based on the best edges from the previous heuristic
|
|
2551
|
-
filtered_edge_postdom_count = edge_postdom_count.copy()
|
|
2552
|
-
for edge in list(edge_postdom_count.keys()):
|
|
2553
|
-
if edge not in best_edges:
|
|
2554
|
-
del filtered_edge_postdom_count[edge]
|
|
2555
|
-
if filtered_edge_postdom_count:
|
|
2556
|
-
edge_postdom_count = filtered_edge_postdom_count
|
|
2557
|
-
|
|
2558
|
-
# H2: the edge, when removed, that causes the most post-dominators of the graph should be virtualized
|
|
2559
|
-
# first. this is believed to make the code more linear looking be reducing the amount of scopes.
|
|
2560
|
-
# informally, we believe post-dominators to be an inverse indicator of the number of scopes present
|
|
2561
|
-
if edge_postdom_count:
|
|
2562
|
-
max_postdom_count = max(edge_postdom_count.values())
|
|
2563
|
-
best_edges = [edge for edge, cnt in edge_postdom_count.items() if cnt == max_postdom_count]
|
|
2564
|
-
if len(best_edges) == 1:
|
|
2565
|
-
return best_edges
|
|
2566
|
-
|
|
2567
|
-
# H3: the edge that goes directly to a return statement should be virtualized first
|
|
2568
|
-
# this is believed to be good because it can be corrected in later optimization by duplicating
|
|
2569
|
-
# the return
|
|
2570
|
-
candidate_edges = best_edges
|
|
2571
|
-
best_edges = []
|
|
2572
|
-
for src, dst in candidate_edges:
|
|
2573
|
-
if graph.has_node(dst) and structured_node_is_simple_return(dst, graph):
|
|
2574
|
-
best_edges.append((src, dst))
|
|
2575
|
-
|
|
2576
|
-
if len(best_edges) == 1:
|
|
2577
|
-
return best_edges
|
|
2578
|
-
elif not best_edges:
|
|
2579
|
-
best_edges = candidate_edges
|
|
2580
|
-
|
|
2581
|
-
# if we have another tie, or we never used improved heuristics, then we do the chick_order.
|
|
2582
|
-
return PhoenixStructurer._chick_order_edges(best_edges, node_seq)
|
|
2519
|
+
return PhoenixStructurer._chick_order_edges(edges, node_seq)
|
|
2583
2520
|
|
|
2584
2521
|
@staticmethod
|
|
2585
2522
|
def _chick_order_edges(edges: list, node_seq: dict[Any, int]) -> list:
|
|
@@ -34,14 +34,12 @@ class RecursiveStructurer(Analysis):
|
|
|
34
34
|
cond_proc=None,
|
|
35
35
|
func: Optional["Function"] = None,
|
|
36
36
|
structurer_cls: type | None = None,
|
|
37
|
-
improve_structurer=True,
|
|
38
37
|
**kwargs,
|
|
39
38
|
):
|
|
40
39
|
self._region = region
|
|
41
40
|
self.cond_proc = cond_proc if cond_proc is not None else ConditionProcessor(self.project.arch)
|
|
42
41
|
self.function = func
|
|
43
42
|
self.structurer_cls = structurer_cls if structurer_cls is not None else DreamStructurer
|
|
44
|
-
self.improve_structurer = improve_structurer
|
|
45
43
|
self.structurer_options = kwargs
|
|
46
44
|
|
|
47
45
|
self.result = None
|
|
@@ -91,7 +89,6 @@ class RecursiveStructurer(Analysis):
|
|
|
91
89
|
case_entry_to_switch_head=self._case_entry_to_switch_head,
|
|
92
90
|
func=self.function,
|
|
93
91
|
parent_region=parent_region,
|
|
94
|
-
improve_structurer=self.improve_structurer,
|
|
95
92
|
**self.structurer_options,
|
|
96
93
|
)
|
|
97
94
|
# replace this region with the resulting node in its parent region... if it's not an orphan
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
import networkx
|
|
4
|
+
|
|
5
|
+
from ..utils import structured_node_is_simple_return
|
|
6
|
+
from ....utils.graph import PostDominators, TemporaryNode
|
|
7
|
+
from .phoenix import PhoenixStructurer
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SAILRStructurer(PhoenixStructurer):
|
|
11
|
+
"""
|
|
12
|
+
The SAILR structuring algorithm is the phoenix-based algorithm from the USENIX 2024 paper SAILR.
|
|
13
|
+
The entirety of the algorithm is implemented across this class and various optimization passes in the decompiler.
|
|
14
|
+
To find each optimization class, simply search for optimizations which reference this class.NAME.
|
|
15
|
+
|
|
16
|
+
At a high-level, SAILR does three things different from the traditional Phoenix schema-based algorithm:
|
|
17
|
+
1. It recursively structures the graph, rather than doing it in a single pass. This allows decisions to be made
|
|
18
|
+
based on the currrent state of what the decompilation would look like.
|
|
19
|
+
2. It performs deoptimizations targeting specific optimizations that introduces gotos and mis-structured code.
|
|
20
|
+
It can only do this because of the recursive nature of the algorithm.
|
|
21
|
+
3. It uses a more advanced heuristic for virtualizing edges, which is implemented in this class.
|
|
22
|
+
|
|
23
|
+
Additionally, some changes in Phoenix are only activated when SAILR is used.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
NAME = "sailr"
|
|
27
|
+
|
|
28
|
+
def __init__(self, region, improve_phoenix=True, **kwargs):
|
|
29
|
+
super().__init__(
|
|
30
|
+
region,
|
|
31
|
+
improve_algorithm=improve_phoenix,
|
|
32
|
+
**kwargs,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
def _order_virtualizable_edges(self, graph: networkx.DiGraph, edges: list, node_seq: dict[Any, int]) -> list:
|
|
36
|
+
"""
|
|
37
|
+
The criteria for "best" is defined by a variety of heuristics described below.
|
|
38
|
+
"""
|
|
39
|
+
if len(edges) <= 1:
|
|
40
|
+
return edges
|
|
41
|
+
|
|
42
|
+
# TODO: the graph we have here is not an accurate graph and can have no "entry node". We need a better graph.
|
|
43
|
+
try:
|
|
44
|
+
entry_node = [node for node in graph.nodes if graph.in_degree(node) == 0][0]
|
|
45
|
+
except IndexError:
|
|
46
|
+
entry_node = None
|
|
47
|
+
|
|
48
|
+
best_edges = edges
|
|
49
|
+
if entry_node is not None:
|
|
50
|
+
# the first few heuristics are based on the post-dominator count of the edge
|
|
51
|
+
# so we collect them for each candidate edge
|
|
52
|
+
edge_postdom_count = {}
|
|
53
|
+
edge_sibling_count = {}
|
|
54
|
+
for edge in edges:
|
|
55
|
+
_, dst = edge
|
|
56
|
+
graph_copy = networkx.DiGraph(graph)
|
|
57
|
+
graph_copy.remove_edge(*edge)
|
|
58
|
+
sibling_cnt = graph_copy.in_degree(dst)
|
|
59
|
+
if sibling_cnt == 0:
|
|
60
|
+
continue
|
|
61
|
+
|
|
62
|
+
edge_sibling_count[edge] = sibling_cnt
|
|
63
|
+
post_dom_graph = PostDominators(graph_copy, entry_node).post_dom
|
|
64
|
+
post_doms = set()
|
|
65
|
+
for postdom_node, dominatee in post_dom_graph.edges():
|
|
66
|
+
if not isinstance(postdom_node, TemporaryNode) and not isinstance(dominatee, TemporaryNode):
|
|
67
|
+
post_doms.add((postdom_node, dominatee))
|
|
68
|
+
edge_postdom_count[edge] = len(post_doms)
|
|
69
|
+
|
|
70
|
+
# H1: the edge that has the least amount of sibling edges should be virtualized first
|
|
71
|
+
# this is believed to reduce the amount of virtualization needed in future rounds and increase
|
|
72
|
+
# the edges that enter a single outer-scope if-stmt
|
|
73
|
+
if edge_sibling_count:
|
|
74
|
+
min_sibling_count = min(edge_sibling_count.values())
|
|
75
|
+
best_edges = [edge for edge, cnt in edge_sibling_count.items() if cnt == min_sibling_count]
|
|
76
|
+
if len(best_edges) == 1:
|
|
77
|
+
return best_edges
|
|
78
|
+
|
|
79
|
+
# create the next heuristic based on the best edges from the previous heuristic
|
|
80
|
+
filtered_edge_postdom_count = edge_postdom_count.copy()
|
|
81
|
+
for edge in list(edge_postdom_count.keys()):
|
|
82
|
+
if edge not in best_edges:
|
|
83
|
+
del filtered_edge_postdom_count[edge]
|
|
84
|
+
if filtered_edge_postdom_count:
|
|
85
|
+
edge_postdom_count = filtered_edge_postdom_count
|
|
86
|
+
|
|
87
|
+
# H2: the edge, when removed, that causes the most post-dominators of the graph should be virtualized
|
|
88
|
+
# first. this is believed to make the code more linear looking be reducing the amount of scopes.
|
|
89
|
+
# informally, we believe post-dominators to be an inverse indicator of the number of scopes present
|
|
90
|
+
if edge_postdom_count:
|
|
91
|
+
max_postdom_count = max(edge_postdom_count.values())
|
|
92
|
+
best_edges = [edge for edge, cnt in edge_postdom_count.items() if cnt == max_postdom_count]
|
|
93
|
+
if len(best_edges) == 1:
|
|
94
|
+
return best_edges
|
|
95
|
+
|
|
96
|
+
# H3: the edge that goes directly to a return statement should be virtualized first
|
|
97
|
+
# this is believed to be good because it can be corrected in later optimization by duplicating
|
|
98
|
+
# the return
|
|
99
|
+
candidate_edges = best_edges
|
|
100
|
+
best_edges = []
|
|
101
|
+
for src, dst in candidate_edges:
|
|
102
|
+
if graph.has_node(dst) and structured_node_is_simple_return(dst, graph):
|
|
103
|
+
best_edges.append((src, dst))
|
|
104
|
+
|
|
105
|
+
if len(best_edges) == 1:
|
|
106
|
+
return best_edges
|
|
107
|
+
elif not best_edges:
|
|
108
|
+
best_edges = candidate_edges
|
|
109
|
+
|
|
110
|
+
# if we have another tie, or we never used improved heuristics, then we do the default ordering.
|
|
111
|
+
return super()._order_virtualizable_edges(graph, best_edges, node_seq)
|
|
@@ -53,7 +53,6 @@ class StructurerBase(Analysis):
|
|
|
53
53
|
func: Optional["Function"] = None,
|
|
54
54
|
case_entry_to_switch_head: dict[int, int] | None = None,
|
|
55
55
|
parent_region=None,
|
|
56
|
-
improve_structurer=True,
|
|
57
56
|
**kwargs,
|
|
58
57
|
):
|
|
59
58
|
self._region: "GraphRegion" = region
|
|
@@ -61,7 +60,6 @@ class StructurerBase(Analysis):
|
|
|
61
60
|
self.function = func
|
|
62
61
|
self._case_entry_to_switch_head = case_entry_to_switch_head
|
|
63
62
|
self._parent_region = parent_region
|
|
64
|
-
self._improve_structurer = improve_structurer
|
|
65
63
|
|
|
66
64
|
self.cond_proc = (
|
|
67
65
|
condition_processor if condition_processor is not None else ConditionProcessor(self.project.arch)
|