angr 9.2.113__py3-none-macosx_10_9_x86_64.whl → 9.2.115__py3-none-macosx_10_9_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/__main__.py +2 -2
- angr/analyses/cfg/cfg_fast.py +1 -1
- angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +2 -2
- angr/analyses/decompiler/decompilation_options.py +2 -12
- angr/analyses/decompiler/decompiler.py +14 -3
- angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +9 -9
- angr/analyses/decompiler/optimization_passes/cross_jump_reverter.py +0 -3
- angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +2 -3
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +9 -2
- angr/analyses/decompiler/optimization_passes/ret_deduplicator.py +2 -0
- angr/analyses/decompiler/optimization_passes/return_duplicator_high.py +2 -0
- angr/analyses/decompiler/region_simplifiers/switch_cluster_simplifier.py +5 -0
- angr/analyses/decompiler/structuring/__init__.py +6 -2
- angr/analyses/decompiler/structuring/phoenix.py +34 -17
- angr/analyses/decompiler/structuring/recursive_structurer.py +0 -3
- angr/analyses/decompiler/structuring/sailr.py +111 -0
- angr/analyses/decompiler/structuring/structurer_base.py +0 -2
- angr/analyses/decompiler/utils.py +6 -1
- angr/analyses/reaching_definitions/function_handler.py +11 -1
- angr/analyses/reaching_definitions/function_handler_library/__init__.py +0 -0
- angr/analyses/reaching_definitions/function_handler_library/stdio.py +260 -0
- angr/analyses/reaching_definitions/function_handler_library/stdlib.py +151 -0
- angr/analyses/reaching_definitions/function_handler_library/string.py +93 -0
- angr/analyses/reaching_definitions/function_handler_library/unistd.py +23 -0
- angr/analyses/reaching_definitions/rd_state.py +3 -1
- angr/analyses/vfg.py +13 -14
- angr/calling_conventions.py +10 -4
- angr/code_location.py +4 -4
- angr/engines/vex/heavy/heavy.py +1 -1
- angr/lib/angr_native.dylib +0 -0
- angr/procedures/libc/strlen.py +5 -2
- angr/sim_variable.py +3 -18
- angr/state_plugins/solver.py +3 -9
- angr/storage/memory_mixins/address_concretization_mixin.py +1 -1
- angr/storage/memory_mixins/regioned_memory/abstract_merger_mixin.py +4 -2
- angr/storage/memory_mixins/regioned_memory/regioned_memory_mixin.py +5 -5
- angr/storage/memory_mixins/regioned_memory/static_find_mixin.py +3 -3
- {angr-9.2.113.dist-info → angr-9.2.115.dist-info}/METADATA +26 -26
- {angr-9.2.113.dist-info → angr-9.2.115.dist-info}/RECORD +44 -38
- {angr-9.2.113.dist-info → angr-9.2.115.dist-info}/WHEEL +1 -1
- {angr-9.2.113.dist-info → angr-9.2.115.dist-info}/LICENSE +0 -0
- {angr-9.2.113.dist-info → angr-9.2.115.dist-info}/entry_points.txt +0 -0
- {angr-9.2.113.dist-info → angr-9.2.115.dist-info}/top_level.txt +0 -0
angr/__init__.py
CHANGED
angr/__main__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
|
|
3
|
-
from angr.analyses.decompiler.structuring import STRUCTURER_CLASSES
|
|
3
|
+
from angr.analyses.decompiler.structuring import STRUCTURER_CLASSES, DEFAULT_STRUCTURER
|
|
4
4
|
from angr.analyses.decompiler.utils import decompile_functions
|
|
5
5
|
|
|
6
6
|
|
|
@@ -42,7 +42,7 @@ def main():
|
|
|
42
42
|
"--structurer",
|
|
43
43
|
help="The structuring algorithm to use for decompilation.",
|
|
44
44
|
choices=STRUCTURER_CLASSES.keys(),
|
|
45
|
-
default=
|
|
45
|
+
default=DEFAULT_STRUCTURER,
|
|
46
46
|
)
|
|
47
47
|
|
|
48
48
|
args = parser.parse_args()
|
angr/analyses/cfg/cfg_fast.py
CHANGED
|
@@ -2981,7 +2981,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
2981
2981
|
simsucc = self.project.factory.default_engine.process(self._initial_state, irsb, force_addr=addr)
|
|
2982
2982
|
if len(simsucc.successors) == 1:
|
|
2983
2983
|
ip = simsucc.successors[0].ip
|
|
2984
|
-
if ip
|
|
2984
|
+
if claripy.backends.concrete.convert(ip) is not ip:
|
|
2985
2985
|
target_addr = ip.concrete_value
|
|
2986
2986
|
obj = self.project.loader.find_object_containing(target_addr, membership_check=False)
|
|
2987
2987
|
if (obj is not None and obj is not self.project.loader.main_object) or self.project.is_hooked(
|
|
@@ -1734,7 +1734,7 @@ class JumpTableResolver(IndirectJumpResolver):
|
|
|
1734
1734
|
# full-function data propagation before performing jump table recovery.
|
|
1735
1735
|
l.debug("Multiple statements adding bases, not supported yet") # FIXME: Just check the addresses?
|
|
1736
1736
|
|
|
1737
|
-
jumptable_addr_vsa = jumptable_addr
|
|
1737
|
+
jumptable_addr_vsa = claripy.backends.vsa.convert(jumptable_addr)
|
|
1738
1738
|
|
|
1739
1739
|
if not isinstance(jumptable_addr_vsa, claripy.vsa.StridedInterval):
|
|
1740
1740
|
return None
|
|
@@ -2103,7 +2103,7 @@ class JumpTableResolver(IndirectJumpResolver):
|
|
|
2103
2103
|
|
|
2104
2104
|
read_length = state.inspect.mem_read_length
|
|
2105
2105
|
if not isinstance(read_length, int):
|
|
2106
|
-
read_length = read_length.
|
|
2106
|
+
read_length = claripy.backends.vsa.convert(read_length).upper_bound
|
|
2107
2107
|
if read_length > 16:
|
|
2108
2108
|
return
|
|
2109
2109
|
new_read_addr = state.solver.BVV(UninitReadMeta.uninit_read_base, state.arch.bits)
|
|
@@ -193,21 +193,11 @@ options = [
|
|
|
193
193
|
"recursive_structurer",
|
|
194
194
|
"structurer_cls",
|
|
195
195
|
category="Structuring",
|
|
196
|
-
default_value="
|
|
197
|
-
candidate_values=["
|
|
196
|
+
default_value="SAILR",
|
|
197
|
+
candidate_values=["SAILR", "Phoenix", "DREAM"],
|
|
198
198
|
clears_cache=True,
|
|
199
199
|
convert=structurer_class_from_name,
|
|
200
200
|
),
|
|
201
|
-
O(
|
|
202
|
-
"Improve structuring algorithm",
|
|
203
|
-
"If applicable in deeper structurer, like Phoenix, improves decompilation output",
|
|
204
|
-
bool,
|
|
205
|
-
"recursive_structurer",
|
|
206
|
-
"improve_structurer",
|
|
207
|
-
category="Structuring",
|
|
208
|
-
default_value=True,
|
|
209
|
-
clears_cache=True,
|
|
210
|
-
),
|
|
211
201
|
O(
|
|
212
202
|
"C-style null compares",
|
|
213
203
|
"Rewrites the (x == 0) => (!x) && (x != 0) => (x)",
|
|
@@ -14,7 +14,7 @@ from ...knowledge_base import KnowledgeBase
|
|
|
14
14
|
from ...sim_variable import SimMemoryVariable, SimRegisterVariable, SimStackVariable
|
|
15
15
|
from ...utils import timethis
|
|
16
16
|
from .. import Analysis, AnalysesHub
|
|
17
|
-
from .structuring import RecursiveStructurer, PhoenixStructurer
|
|
17
|
+
from .structuring import RecursiveStructurer, PhoenixStructurer, DEFAULT_STRUCTURER
|
|
18
18
|
from .region_identifier import RegionIdentifier
|
|
19
19
|
from .optimization_passes.optimization_pass import OptimizationPassStage
|
|
20
20
|
from .optimization_passes import get_default_optimization_passes
|
|
@@ -146,8 +146,9 @@ class Decompiler(Analysis):
|
|
|
146
146
|
self._complete_successors = False
|
|
147
147
|
self._recursive_structurer_params = self.options_to_params(self.options_by_class["recursive_structurer"])
|
|
148
148
|
if "structurer_cls" not in self._recursive_structurer_params:
|
|
149
|
-
self._recursive_structurer_params["structurer_cls"] =
|
|
150
|
-
|
|
149
|
+
self._recursive_structurer_params["structurer_cls"] = DEFAULT_STRUCTURER
|
|
150
|
+
# is the algorithm based on Phoenix (a schema-based algorithm)?
|
|
151
|
+
if issubclass(self._recursive_structurer_params["structurer_cls"], PhoenixStructurer):
|
|
151
152
|
self._force_loop_single_exit = False
|
|
152
153
|
self._complete_successors = True
|
|
153
154
|
fold_callexprs_into_conditions = True
|
|
@@ -316,6 +317,11 @@ class Decompiler(Analysis):
|
|
|
316
317
|
continue
|
|
317
318
|
if pass_.STRUCTURING:
|
|
318
319
|
if self._recursive_structurer_params["structurer_cls"].NAME not in pass_.STRUCTURING:
|
|
320
|
+
l.warning(
|
|
321
|
+
"Skipping %s because it does not support structuring algorithm: %s",
|
|
322
|
+
pass_,
|
|
323
|
+
self._recursive_structurer_params["structurer_cls"].NAME,
|
|
324
|
+
)
|
|
319
325
|
continue
|
|
320
326
|
|
|
321
327
|
a = pass_(
|
|
@@ -367,6 +373,11 @@ class Decompiler(Analysis):
|
|
|
367
373
|
continue
|
|
368
374
|
if pass_.STRUCTURING:
|
|
369
375
|
if self._recursive_structurer_params["structurer_cls"].NAME not in pass_.STRUCTURING:
|
|
376
|
+
l.warning(
|
|
377
|
+
"Skipping %s because it does not support structuring algorithm: %s",
|
|
378
|
+
pass_,
|
|
379
|
+
self._recursive_structurer_params["structurer_cls"].NAME,
|
|
380
|
+
)
|
|
370
381
|
continue
|
|
371
382
|
|
|
372
383
|
a = pass_(
|
|
@@ -7,10 +7,10 @@ import claripy
|
|
|
7
7
|
from ailment import Const
|
|
8
8
|
from ailment.block_walker import AILBlockWalkerBase
|
|
9
9
|
from ailment.statement import Call, Statement, ConditionalJump, Assignment, Store, Return
|
|
10
|
-
from ailment.expression import Convert, Register
|
|
10
|
+
from ailment.expression import Convert, Register, Expression
|
|
11
11
|
|
|
12
12
|
from .optimization_pass import OptimizationPass, OptimizationPassStage
|
|
13
|
-
from ..
|
|
13
|
+
from ..structuring import SAILRStructurer, DreamStructurer
|
|
14
14
|
from ....knowledge_plugins.key_definitions.atoms import MemoryLocation
|
|
15
15
|
from ....knowledge_plugins.key_definitions.constants import OP_BEFORE
|
|
16
16
|
|
|
@@ -141,6 +141,8 @@ class ConstPropOptReverter(OptimizationPass):
|
|
|
141
141
|
|
|
142
142
|
ARCHES = None
|
|
143
143
|
PLATFORMS = None
|
|
144
|
+
# allow DREAM since it's useful for return merging
|
|
145
|
+
STRUCTURING = [SAILRStructurer.NAME, DreamStructurer.NAME]
|
|
144
146
|
STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
|
|
145
147
|
NAME = "Revert Constant Propagation Optimizations"
|
|
146
148
|
DESCRIPTION = __doc__.strip()
|
|
@@ -159,8 +161,7 @@ class ConstPropOptReverter(OptimizationPass):
|
|
|
159
161
|
|
|
160
162
|
def _analyze(self, cache=None):
|
|
161
163
|
self.resolution = False
|
|
162
|
-
self.out_graph =
|
|
163
|
-
# self.out_graph = self._graph
|
|
164
|
+
self.out_graph = self._graph.copy()
|
|
164
165
|
|
|
165
166
|
_pair_stmt_handlers = {
|
|
166
167
|
Call: self._handle_Call_pair,
|
|
@@ -177,8 +178,6 @@ class ConstPropOptReverter(OptimizationPass):
|
|
|
177
178
|
|
|
178
179
|
if not self.resolution:
|
|
179
180
|
self.out_graph = None
|
|
180
|
-
else:
|
|
181
|
-
self.out_graph = add_labels(self.out_graph)
|
|
182
181
|
|
|
183
182
|
def _analyze_call_pair_targets(self):
|
|
184
183
|
all_obs_points = []
|
|
@@ -329,9 +328,10 @@ class ConstPropOptReverter(OptimizationPass):
|
|
|
329
328
|
return
|
|
330
329
|
|
|
331
330
|
# verify both calls are calls to the same function
|
|
332
|
-
if
|
|
333
|
-
|
|
334
|
-
|
|
331
|
+
if isinstance(obj0.target, Expression) and isinstance(obj1.target, Expression):
|
|
332
|
+
if not obj0.target.likes(obj1.target):
|
|
333
|
+
return
|
|
334
|
+
elif obj0.target != obj1.target:
|
|
335
335
|
return
|
|
336
336
|
|
|
337
337
|
call0, call1 = obj0, obj1
|
|
@@ -21,11 +21,8 @@ class CrossJumpReverter(StructuringOptimizationPass):
|
|
|
21
21
|
a max of max_opt_iters times. Second, it will not duplicate a block with too many calls.
|
|
22
22
|
"""
|
|
23
23
|
|
|
24
|
-
ARCHES = None
|
|
25
|
-
PLATFORMS = None
|
|
26
24
|
STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
|
|
27
25
|
NAME = "Duplicate linear blocks with gotos"
|
|
28
|
-
STRUCTURING = ["phoenix"]
|
|
29
26
|
DESCRIPTION = inspect.cleandoc(__doc__).strip()
|
|
30
27
|
|
|
31
28
|
def __init__(
|
|
@@ -11,7 +11,7 @@ from ailment.expression import Expression, BinaryOp, Const, Load
|
|
|
11
11
|
from angr.utils.graph import GraphUtils
|
|
12
12
|
from ..utils import first_nonlabel_statement, remove_last_statement
|
|
13
13
|
from ..structuring.structurer_nodes import IncompleteSwitchCaseHeadStatement, SequenceNode, MultiNode
|
|
14
|
-
from .optimization_pass import
|
|
14
|
+
from .optimization_pass import MultipleBlocksException, StructuringOptimizationPass
|
|
15
15
|
from ..region_simplifiers.switch_cluster_simplifier import SwitchClusterFinder
|
|
16
16
|
|
|
17
17
|
if TYPE_CHECKING:
|
|
@@ -143,14 +143,13 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
143
143
|
As a hack for now, we only run this deoptimization on Linux binaries.
|
|
144
144
|
"""
|
|
145
145
|
|
|
146
|
+
# TODO: this needs to be updated to support Windows, but detect and disable on MSVC
|
|
146
147
|
PLATFORMS = ["linux"]
|
|
147
|
-
STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
|
|
148
148
|
NAME = "Convert lowered switch-cases (if-else) to switch-cases"
|
|
149
149
|
DESCRIPTION = (
|
|
150
150
|
"Convert lowered switch-cases (if-else) to switch-cases. Only works when the Phoenix structuring "
|
|
151
151
|
"algorithm is in use."
|
|
152
152
|
)
|
|
153
|
-
STRUCTURING = ["phoenix"]
|
|
154
153
|
|
|
155
154
|
def __init__(self, func, min_distinct_cases=2, **kwargs):
|
|
156
155
|
super().__init__(
|
|
@@ -10,7 +10,7 @@ import ailment
|
|
|
10
10
|
from angr.analyses.decompiler import RegionIdentifier
|
|
11
11
|
from angr.analyses.decompiler.condition_processor import ConditionProcessor
|
|
12
12
|
from angr.analyses.decompiler.goto_manager import GotoManager
|
|
13
|
-
from angr.analyses.decompiler.structuring import RecursiveStructurer,
|
|
13
|
+
from angr.analyses.decompiler.structuring import RecursiveStructurer, SAILRStructurer
|
|
14
14
|
from angr.analyses.decompiler.utils import add_labels
|
|
15
15
|
from angr.analyses.decompiler.seq_cf_structure_counter import ControlFlowStructureCounter
|
|
16
16
|
|
|
@@ -266,10 +266,17 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
266
266
|
The base class for any optimization pass that requires structuring. Optimization passes that inherit from this class
|
|
267
267
|
should directly depend on structuring artifacts, such as regions and gotos. Otherwise, they should use
|
|
268
268
|
OptimizationPass. This is the heaviest (computation time) optimization pass class.
|
|
269
|
+
|
|
270
|
+
By default this type of optimization should work:
|
|
271
|
+
- on any architecture
|
|
272
|
+
- on any platform
|
|
273
|
+
- during region identification (to have iterative structuring)
|
|
274
|
+
- only with the SAILR structuring algorithm
|
|
269
275
|
"""
|
|
270
276
|
|
|
271
277
|
ARCHES = None
|
|
272
278
|
PLATFORMS = None
|
|
279
|
+
STRUCTURING = [SAILRStructurer.NAME]
|
|
273
280
|
STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
|
|
274
281
|
|
|
275
282
|
def __init__(
|
|
@@ -401,7 +408,7 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
401
408
|
self._ri.region,
|
|
402
409
|
cond_proc=self._ri.cond_proc,
|
|
403
410
|
func=self._func,
|
|
404
|
-
structurer_cls=
|
|
411
|
+
structurer_cls=SAILRStructurer,
|
|
405
412
|
)
|
|
406
413
|
# pylint:disable=broad-except
|
|
407
414
|
except Exception:
|
|
@@ -4,6 +4,7 @@ import logging
|
|
|
4
4
|
from ailment import Block
|
|
5
5
|
from ailment.statement import ConditionalJump, Return
|
|
6
6
|
|
|
7
|
+
from ..structuring import SAILRStructurer, DreamStructurer
|
|
7
8
|
from ....utils.graph import subgraph_between_nodes
|
|
8
9
|
from ..utils import remove_labels, to_ail_supergraph, update_labels
|
|
9
10
|
from .optimization_pass import OptimizationPass, OptimizationPassStage
|
|
@@ -28,6 +29,7 @@ class ReturnDeduplicator(OptimizationPass):
|
|
|
28
29
|
STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
|
|
29
30
|
NAME = "Deduplicates return statements that may have been duplicated"
|
|
30
31
|
DESCRIPTION = __doc__.strip()
|
|
32
|
+
STRUCTURING = [SAILRStructurer.NAME, DreamStructurer.NAME]
|
|
31
33
|
|
|
32
34
|
def __init__(self, func, **kwargs):
|
|
33
35
|
super().__init__(func, **kwargs)
|
|
@@ -4,6 +4,7 @@ import networkx
|
|
|
4
4
|
|
|
5
5
|
from .return_duplicator_base import ReturnDuplicatorBase
|
|
6
6
|
from .optimization_pass import OptimizationPass, OptimizationPassStage
|
|
7
|
+
from ..structuring import SAILRStructurer, DreamStructurer
|
|
7
8
|
|
|
8
9
|
_l = logging.getLogger(name=__name__)
|
|
9
10
|
|
|
@@ -19,6 +20,7 @@ class ReturnDuplicatorHigh(OptimizationPass, ReturnDuplicatorBase):
|
|
|
19
20
|
STAGE = OptimizationPassStage.AFTER_VARIABLE_RECOVERY
|
|
20
21
|
NAME = "Duplicate return-only blocks (high)"
|
|
21
22
|
DESCRIPTION = __doc__
|
|
23
|
+
STRUCTURING = [SAILRStructurer.NAME, DreamStructurer.NAME]
|
|
22
24
|
|
|
23
25
|
def __init__(
|
|
24
26
|
self,
|
|
@@ -4,6 +4,8 @@ from typing import DefaultDict, Any
|
|
|
4
4
|
from collections import OrderedDict, defaultdict
|
|
5
5
|
|
|
6
6
|
import ailment
|
|
7
|
+
from ailment import UnaryOp
|
|
8
|
+
from ailment.expression import negate
|
|
7
9
|
|
|
8
10
|
from ....utils.constants import SWITCH_MISSING_DEFAULT_NODE_ADDR
|
|
9
11
|
from ..structuring.structurer_nodes import SwitchCaseNode, ConditionNode, SequenceNode, MultiNode, BaseNode, BreakNode
|
|
@@ -520,6 +522,9 @@ def simplify_lowered_switches_core(
|
|
|
520
522
|
|
|
521
523
|
if outermost_node is None:
|
|
522
524
|
return False
|
|
525
|
+
if isinstance(outermost_node.condition, UnaryOp) and outermost_node.condition.op == "Not":
|
|
526
|
+
# attempt to flip any simple negated comparison for normalized operations
|
|
527
|
+
outermost_node.condition = negate(outermost_node.condition.operand)
|
|
523
528
|
|
|
524
529
|
caseno_to_node = {}
|
|
525
530
|
default_node_candidates: list[tuple[BaseNode, BaseNode]] = [] # parent to default node candidate
|
|
@@ -2,14 +2,18 @@ from typing import Optional, Type
|
|
|
2
2
|
|
|
3
3
|
from .dream import DreamStructurer
|
|
4
4
|
from .phoenix import PhoenixStructurer
|
|
5
|
+
from .sailr import SAILRStructurer
|
|
5
6
|
from .recursive_structurer import RecursiveStructurer
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
STRUCTURER_CLASSES = {
|
|
9
|
-
|
|
10
|
-
|
|
10
|
+
SAILRStructurer.NAME: SAILRStructurer,
|
|
11
|
+
PhoenixStructurer.NAME: PhoenixStructurer,
|
|
12
|
+
DreamStructurer.NAME: DreamStructurer,
|
|
11
13
|
}
|
|
12
14
|
|
|
15
|
+
DEFAULT_STRUCTURER = SAILRStructurer
|
|
16
|
+
|
|
13
17
|
|
|
14
18
|
def structurer_class_from_name(name: str) -> type | None:
|
|
15
19
|
return STRUCTURER_CLASSES.get(name.lower(), None)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# pylint:disable=line-too-long,import-outside-toplevel,import-error,multiple-statements,too-many-boolean-expressions
|
|
2
2
|
from typing import Any, DefaultDict, Optional, TYPE_CHECKING
|
|
3
3
|
from collections import OrderedDict as ODict
|
|
4
|
-
from collections import defaultdict
|
|
4
|
+
from collections import defaultdict
|
|
5
5
|
from enum import Enum
|
|
6
6
|
import logging
|
|
7
7
|
|
|
@@ -83,7 +83,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
83
83
|
func: Optional["Function"] = None,
|
|
84
84
|
case_entry_to_switch_head: dict[int, int] | None = None,
|
|
85
85
|
parent_region=None,
|
|
86
|
-
|
|
86
|
+
improve_algorithm=False,
|
|
87
87
|
use_multistmtexprs: MultiStmtExprMode = MultiStmtExprMode.MAX_ONE_CALL,
|
|
88
88
|
**kwargs,
|
|
89
89
|
):
|
|
@@ -94,7 +94,6 @@ class PhoenixStructurer(StructurerBase):
|
|
|
94
94
|
func=func,
|
|
95
95
|
case_entry_to_switch_head=case_entry_to_switch_head,
|
|
96
96
|
parent_region=parent_region,
|
|
97
|
-
improve_structurer=improve_structurer,
|
|
98
97
|
**kwargs,
|
|
99
98
|
)
|
|
100
99
|
|
|
@@ -111,13 +110,17 @@ class PhoenixStructurer(StructurerBase):
|
|
|
111
110
|
# absorbed into other SequenceNodes
|
|
112
111
|
self.dowhile_known_tail_nodes: set = set()
|
|
113
112
|
|
|
114
|
-
|
|
113
|
+
# in reimplementing the core phoenix algorithm from the phoenix decompiler paper, two types of changes were
|
|
114
|
+
# made to the algorithm:
|
|
115
|
+
# 1. Mandatory fixes to correct flaws we found in the algorithm
|
|
116
|
+
# 2. Optional fixes to improve the results of already correct choices
|
|
117
|
+
#
|
|
118
|
+
# the improve_algorithm flag controls whether the optional fixes are applied. these are disabled by default
|
|
119
|
+
# to be as close to the original algorithm as possible. for best results, enable this flag.
|
|
120
|
+
self._improve_algorithm = improve_algorithm
|
|
115
121
|
self._edge_virtualization_hints = []
|
|
116
122
|
|
|
117
123
|
self._use_multistmtexprs = use_multistmtexprs
|
|
118
|
-
if not self._phoenix_improved:
|
|
119
|
-
self._use_multistmtexprs = MultiStmtExprMode.NEVER
|
|
120
|
-
|
|
121
124
|
self._analyze()
|
|
122
125
|
|
|
123
126
|
@staticmethod
|
|
@@ -245,7 +248,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
245
248
|
self._rewrite_jumps_to_continues(loop_node.sequence_node, loop_node=loop_node)
|
|
246
249
|
return True
|
|
247
250
|
|
|
248
|
-
if self.
|
|
251
|
+
if self._improve_algorithm:
|
|
249
252
|
matched, loop_node, successor_node = self._match_cyclic_while_with_single_successor(
|
|
250
253
|
node, head, graph, full_graph
|
|
251
254
|
)
|
|
@@ -378,7 +381,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
378
381
|
|
|
379
382
|
return True, loop_node, right
|
|
380
383
|
|
|
381
|
-
if self.
|
|
384
|
+
if self._improve_algorithm:
|
|
382
385
|
if full_graph.out_degree[node] == 1:
|
|
383
386
|
# while (true) { ...; if (...) break; }
|
|
384
387
|
_, _, head_block = self._find_node_going_to_dst(node, left, condjump_only=True)
|
|
@@ -497,7 +500,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
497
500
|
self._remove_last_statement_if_jump(succ)
|
|
498
501
|
drop_succ = False
|
|
499
502
|
|
|
500
|
-
if self.
|
|
503
|
+
if self._improve_algorithm:
|
|
501
504
|
# absorb the entire succ block if possible
|
|
502
505
|
if self._is_sequential_statement_block(succ) and self._should_use_multistmtexprs(succ):
|
|
503
506
|
stmts = self._build_multistatementexpr_statements(succ)
|
|
@@ -1003,7 +1006,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
1003
1006
|
any_matches |= matched
|
|
1004
1007
|
if matched:
|
|
1005
1008
|
break
|
|
1006
|
-
if self.
|
|
1009
|
+
if self._improve_algorithm:
|
|
1007
1010
|
l.debug("... matching acyclic ITE with short-circuit conditions at %r", node)
|
|
1008
1011
|
matched = self._match_acyclic_short_circuit_conditions(graph, full_graph, node)
|
|
1009
1012
|
l.debug("... matched: %s", matched)
|
|
@@ -1306,7 +1309,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
1306
1309
|
graph,
|
|
1307
1310
|
full_graph,
|
|
1308
1311
|
) -> tuple[ODict, Any, set[Any]]:
|
|
1309
|
-
cases: ODict[int | tuple[int], SequenceNode] =
|
|
1312
|
+
cases: ODict[int | tuple[int], SequenceNode] = ODict()
|
|
1310
1313
|
to_remove = set()
|
|
1311
1314
|
|
|
1312
1315
|
# it is possible that the default node gets duplicated by other analyses and creates a default node (addr.a)
|
|
@@ -2107,7 +2110,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2107
2110
|
return None
|
|
2108
2111
|
|
|
2109
2112
|
def _last_resort_refinement(self, head, graph: networkx.DiGraph, full_graph: networkx.DiGraph | None) -> bool:
|
|
2110
|
-
if self.
|
|
2113
|
+
if self._improve_algorithm:
|
|
2111
2114
|
while self._edge_virtualization_hints:
|
|
2112
2115
|
src, dst = self._edge_virtualization_hints.pop(0)
|
|
2113
2116
|
if graph.has_edge(src, dst):
|
|
@@ -2144,7 +2147,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2144
2147
|
node_seq = {nn: (len(ordered_nodes) - idx) for (idx, nn) in enumerate(ordered_nodes)} # post-order
|
|
2145
2148
|
|
|
2146
2149
|
if all_edges_wo_dominance:
|
|
2147
|
-
all_edges_wo_dominance = self.
|
|
2150
|
+
all_edges_wo_dominance = self._order_virtualizable_edges(full_graph, all_edges_wo_dominance, node_seq)
|
|
2148
2151
|
# virtualize the first edge
|
|
2149
2152
|
src, dst = all_edges_wo_dominance[0]
|
|
2150
2153
|
self._virtualize_edge(graph, full_graph, src, dst)
|
|
@@ -2152,7 +2155,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2152
2155
|
return True
|
|
2153
2156
|
|
|
2154
2157
|
if secondary_edges:
|
|
2155
|
-
secondary_edges = self.
|
|
2158
|
+
secondary_edges = self._order_virtualizable_edges(full_graph, secondary_edges, node_seq)
|
|
2156
2159
|
# virtualize the first edge
|
|
2157
2160
|
src, dst = secondary_edges[0]
|
|
2158
2161
|
self._virtualize_edge(graph, full_graph, src, dst)
|
|
@@ -2228,6 +2231,15 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2228
2231
|
remove_last_statement(src)
|
|
2229
2232
|
|
|
2230
2233
|
def _should_use_multistmtexprs(self, node: Block | BaseNode) -> bool:
|
|
2234
|
+
"""
|
|
2235
|
+
The original Phoenix algorithm had no support for multi-stmt expressions, such as the following:
|
|
2236
|
+
if ((x = y) && z) { ... }
|
|
2237
|
+
|
|
2238
|
+
There are multiple levels at which multi-stmt expressions can be used. If the Phoenix algorith is not not
|
|
2239
|
+
set to be in improved mode, then we should not use multi-stmt expressions at all.
|
|
2240
|
+
"""
|
|
2241
|
+
if not self._improve_algorithm:
|
|
2242
|
+
return False
|
|
2231
2243
|
if self._use_multistmtexprs == MultiStmtExprMode.NEVER:
|
|
2232
2244
|
return False
|
|
2233
2245
|
if self._use_multistmtexprs == MultiStmtExprMode.ALWAYS:
|
|
@@ -2312,7 +2324,6 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2312
2324
|
walker.block_id += 1
|
|
2313
2325
|
if _check(block.nodes[-1].statements[-1]):
|
|
2314
2326
|
walker.parent_and_block.append((walker.block_id, parent, block))
|
|
2315
|
-
return
|
|
2316
2327
|
|
|
2317
2328
|
def _handle_BreakNode(break_node: BreakNode, parent=None, **kwargs): # pylint:disable=unused-argument
|
|
2318
2329
|
walker.block_id += 1
|
|
@@ -2323,7 +2334,6 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2323
2334
|
):
|
|
2324
2335
|
# FIXME: idx is ignored
|
|
2325
2336
|
walker.parent_and_block.append((walker.block_id, parent, break_node))
|
|
2326
|
-
return
|
|
2327
2337
|
|
|
2328
2338
|
walker = SequenceWalker(
|
|
2329
2339
|
handlers={
|
|
@@ -2501,6 +2511,13 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2501
2511
|
break
|
|
2502
2512
|
return None
|
|
2503
2513
|
|
|
2514
|
+
# pylint: disable=unused-argument,no-self-use
|
|
2515
|
+
def _order_virtualizable_edges(self, graph: networkx.DiGraph, edges: list, node_seq: dict[Any, int]) -> list:
|
|
2516
|
+
"""
|
|
2517
|
+
Returns a list of edges that are ordered by the best edges to virtualize first.
|
|
2518
|
+
"""
|
|
2519
|
+
return PhoenixStructurer._chick_order_edges(edges, node_seq)
|
|
2520
|
+
|
|
2504
2521
|
@staticmethod
|
|
2505
2522
|
def _chick_order_edges(edges: list, node_seq: dict[Any, int]) -> list:
|
|
2506
2523
|
graph = networkx.DiGraph()
|
|
@@ -34,14 +34,12 @@ class RecursiveStructurer(Analysis):
|
|
|
34
34
|
cond_proc=None,
|
|
35
35
|
func: Optional["Function"] = None,
|
|
36
36
|
structurer_cls: type | None = None,
|
|
37
|
-
improve_structurer=True,
|
|
38
37
|
**kwargs,
|
|
39
38
|
):
|
|
40
39
|
self._region = region
|
|
41
40
|
self.cond_proc = cond_proc if cond_proc is not None else ConditionProcessor(self.project.arch)
|
|
42
41
|
self.function = func
|
|
43
42
|
self.structurer_cls = structurer_cls if structurer_cls is not None else DreamStructurer
|
|
44
|
-
self.improve_structurer = improve_structurer
|
|
45
43
|
self.structurer_options = kwargs
|
|
46
44
|
|
|
47
45
|
self.result = None
|
|
@@ -91,7 +89,6 @@ class RecursiveStructurer(Analysis):
|
|
|
91
89
|
case_entry_to_switch_head=self._case_entry_to_switch_head,
|
|
92
90
|
func=self.function,
|
|
93
91
|
parent_region=parent_region,
|
|
94
|
-
improve_structurer=self.improve_structurer,
|
|
95
92
|
**self.structurer_options,
|
|
96
93
|
)
|
|
97
94
|
# replace this region with the resulting node in its parent region... if it's not an orphan
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
import networkx
|
|
4
|
+
|
|
5
|
+
from ..utils import structured_node_is_simple_return
|
|
6
|
+
from ....utils.graph import PostDominators, TemporaryNode
|
|
7
|
+
from .phoenix import PhoenixStructurer
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SAILRStructurer(PhoenixStructurer):
|
|
11
|
+
"""
|
|
12
|
+
The SAILR structuring algorithm is the phoenix-based algorithm from the USENIX 2024 paper SAILR.
|
|
13
|
+
The entirety of the algorithm is implemented across this class and various optimization passes in the decompiler.
|
|
14
|
+
To find each optimization class, simply search for optimizations which reference this class.NAME.
|
|
15
|
+
|
|
16
|
+
At a high-level, SAILR does three things different from the traditional Phoenix schema-based algorithm:
|
|
17
|
+
1. It recursively structures the graph, rather than doing it in a single pass. This allows decisions to be made
|
|
18
|
+
based on the currrent state of what the decompilation would look like.
|
|
19
|
+
2. It performs deoptimizations targeting specific optimizations that introduces gotos and mis-structured code.
|
|
20
|
+
It can only do this because of the recursive nature of the algorithm.
|
|
21
|
+
3. It uses a more advanced heuristic for virtualizing edges, which is implemented in this class.
|
|
22
|
+
|
|
23
|
+
Additionally, some changes in Phoenix are only activated when SAILR is used.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
NAME = "sailr"
|
|
27
|
+
|
|
28
|
+
def __init__(self, region, improve_phoenix=True, **kwargs):
|
|
29
|
+
super().__init__(
|
|
30
|
+
region,
|
|
31
|
+
improve_algorithm=improve_phoenix,
|
|
32
|
+
**kwargs,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
def _order_virtualizable_edges(self, graph: networkx.DiGraph, edges: list, node_seq: dict[Any, int]) -> list:
|
|
36
|
+
"""
|
|
37
|
+
The criteria for "best" is defined by a variety of heuristics described below.
|
|
38
|
+
"""
|
|
39
|
+
if len(edges) <= 1:
|
|
40
|
+
return edges
|
|
41
|
+
|
|
42
|
+
# TODO: the graph we have here is not an accurate graph and can have no "entry node". We need a better graph.
|
|
43
|
+
try:
|
|
44
|
+
entry_node = [node for node in graph.nodes if graph.in_degree(node) == 0][0]
|
|
45
|
+
except IndexError:
|
|
46
|
+
entry_node = None
|
|
47
|
+
|
|
48
|
+
best_edges = edges
|
|
49
|
+
if entry_node is not None:
|
|
50
|
+
# the first few heuristics are based on the post-dominator count of the edge
|
|
51
|
+
# so we collect them for each candidate edge
|
|
52
|
+
edge_postdom_count = {}
|
|
53
|
+
edge_sibling_count = {}
|
|
54
|
+
for edge in edges:
|
|
55
|
+
_, dst = edge
|
|
56
|
+
graph_copy = networkx.DiGraph(graph)
|
|
57
|
+
graph_copy.remove_edge(*edge)
|
|
58
|
+
sibling_cnt = graph_copy.in_degree(dst)
|
|
59
|
+
if sibling_cnt == 0:
|
|
60
|
+
continue
|
|
61
|
+
|
|
62
|
+
edge_sibling_count[edge] = sibling_cnt
|
|
63
|
+
post_dom_graph = PostDominators(graph_copy, entry_node).post_dom
|
|
64
|
+
post_doms = set()
|
|
65
|
+
for postdom_node, dominatee in post_dom_graph.edges():
|
|
66
|
+
if not isinstance(postdom_node, TemporaryNode) and not isinstance(dominatee, TemporaryNode):
|
|
67
|
+
post_doms.add((postdom_node, dominatee))
|
|
68
|
+
edge_postdom_count[edge] = len(post_doms)
|
|
69
|
+
|
|
70
|
+
# H1: the edge that has the least amount of sibling edges should be virtualized first
|
|
71
|
+
# this is believed to reduce the amount of virtualization needed in future rounds and increase
|
|
72
|
+
# the edges that enter a single outer-scope if-stmt
|
|
73
|
+
if edge_sibling_count:
|
|
74
|
+
min_sibling_count = min(edge_sibling_count.values())
|
|
75
|
+
best_edges = [edge for edge, cnt in edge_sibling_count.items() if cnt == min_sibling_count]
|
|
76
|
+
if len(best_edges) == 1:
|
|
77
|
+
return best_edges
|
|
78
|
+
|
|
79
|
+
# create the next heuristic based on the best edges from the previous heuristic
|
|
80
|
+
filtered_edge_postdom_count = edge_postdom_count.copy()
|
|
81
|
+
for edge in list(edge_postdom_count.keys()):
|
|
82
|
+
if edge not in best_edges:
|
|
83
|
+
del filtered_edge_postdom_count[edge]
|
|
84
|
+
if filtered_edge_postdom_count:
|
|
85
|
+
edge_postdom_count = filtered_edge_postdom_count
|
|
86
|
+
|
|
87
|
+
# H2: the edge, when removed, that causes the most post-dominators of the graph should be virtualized
|
|
88
|
+
# first. this is believed to make the code more linear looking be reducing the amount of scopes.
|
|
89
|
+
# informally, we believe post-dominators to be an inverse indicator of the number of scopes present
|
|
90
|
+
if edge_postdom_count:
|
|
91
|
+
max_postdom_count = max(edge_postdom_count.values())
|
|
92
|
+
best_edges = [edge for edge, cnt in edge_postdom_count.items() if cnt == max_postdom_count]
|
|
93
|
+
if len(best_edges) == 1:
|
|
94
|
+
return best_edges
|
|
95
|
+
|
|
96
|
+
# H3: the edge that goes directly to a return statement should be virtualized first
|
|
97
|
+
# this is believed to be good because it can be corrected in later optimization by duplicating
|
|
98
|
+
# the return
|
|
99
|
+
candidate_edges = best_edges
|
|
100
|
+
best_edges = []
|
|
101
|
+
for src, dst in candidate_edges:
|
|
102
|
+
if graph.has_node(dst) and structured_node_is_simple_return(dst, graph):
|
|
103
|
+
best_edges.append((src, dst))
|
|
104
|
+
|
|
105
|
+
if len(best_edges) == 1:
|
|
106
|
+
return best_edges
|
|
107
|
+
elif not best_edges:
|
|
108
|
+
best_edges = candidate_edges
|
|
109
|
+
|
|
110
|
+
# if we have another tie, or we never used improved heuristics, then we do the default ordering.
|
|
111
|
+
return super()._order_virtualizable_edges(graph, best_edges, node_seq)
|
|
@@ -53,7 +53,6 @@ class StructurerBase(Analysis):
|
|
|
53
53
|
func: Optional["Function"] = None,
|
|
54
54
|
case_entry_to_switch_head: dict[int, int] | None = None,
|
|
55
55
|
parent_region=None,
|
|
56
|
-
improve_structurer=True,
|
|
57
56
|
**kwargs,
|
|
58
57
|
):
|
|
59
58
|
self._region: "GraphRegion" = region
|
|
@@ -61,7 +60,6 @@ class StructurerBase(Analysis):
|
|
|
61
60
|
self.function = func
|
|
62
61
|
self._case_entry_to_switch_head = case_entry_to_switch_head
|
|
63
62
|
self._parent_region = parent_region
|
|
64
|
-
self._improve_structurer = improve_structurer
|
|
65
63
|
|
|
66
64
|
self.cond_proc = (
|
|
67
65
|
condition_processor if condition_processor is not None else ConditionProcessor(self.project.arch)
|