angr 9.2.114__py3-none-macosx_11_0_arm64.whl → 9.2.116__py3-none-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/__main__.py +2 -2
- angr/analyses/cfg/cfg_fast.py +1 -1
- angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +2 -2
- angr/analyses/decompiler/decompilation_options.py +2 -12
- angr/analyses/decompiler/decompiler.py +14 -3
- angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +3 -0
- angr/analyses/decompiler/optimization_passes/cross_jump_reverter.py +0 -3
- angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +2 -3
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +9 -2
- angr/analyses/decompiler/optimization_passes/ret_deduplicator.py +2 -0
- angr/analyses/decompiler/optimization_passes/return_duplicator_high.py +2 -0
- angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +5 -1
- angr/analyses/decompiler/structured_codegen/c.py +10 -13
- angr/analyses/decompiler/structuring/__init__.py +6 -2
- angr/analyses/decompiler/structuring/dream.py +3 -4
- angr/analyses/decompiler/structuring/phoenix.py +29 -93
- angr/analyses/decompiler/structuring/recursive_structurer.py +0 -3
- angr/analyses/decompiler/structuring/sailr.py +111 -0
- angr/analyses/decompiler/structuring/structurer_base.py +2 -5
- angr/analyses/decompiler/structuring/structurer_nodes.py +3 -3
- angr/analyses/reaching_definitions/dep_graph.py +62 -5
- angr/analyses/reaching_definitions/function_handler.py +11 -1
- angr/analyses/reaching_definitions/function_handler_library/__init__.py +11 -0
- angr/analyses/reaching_definitions/function_handler_library/stdio.py +262 -0
- angr/analyses/reaching_definitions/function_handler_library/stdlib.py +157 -0
- angr/analyses/reaching_definitions/function_handler_library/string.py +93 -0
- angr/analyses/reaching_definitions/function_handler_library/unistd.py +23 -0
- angr/analyses/reaching_definitions/rd_state.py +28 -29
- angr/analyses/variable_recovery/engine_vex.py +0 -9
- angr/analyses/vfg.py +13 -14
- angr/code_location.py +4 -4
- angr/engines/pcode/cc.py +2 -0
- angr/engines/vex/heavy/heavy.py +1 -1
- angr/knowledge_plugins/key_definitions/live_definitions.py +12 -13
- angr/lib/angr_native.dylib +0 -0
- angr/procedures/libc/strlen.py +5 -2
- angr/sim_variable.py +3 -18
- angr/state_plugins/solver.py +3 -9
- angr/storage/memory_mixins/address_concretization_mixin.py +1 -1
- angr/storage/memory_mixins/paged_memory/pages/cooperation.py +2 -1
- angr/storage/memory_mixins/regioned_memory/abstract_merger_mixin.py +4 -2
- angr/storage/memory_mixins/regioned_memory/regioned_memory_mixin.py +5 -5
- angr/storage/memory_mixins/regioned_memory/static_find_mixin.py +3 -3
- {angr-9.2.114.dist-info → angr-9.2.116.dist-info}/METADATA +7 -7
- {angr-9.2.114.dist-info → angr-9.2.116.dist-info}/RECORD +50 -44
- {angr-9.2.114.dist-info → angr-9.2.116.dist-info}/WHEEL +1 -1
- {angr-9.2.114.dist-info → angr-9.2.116.dist-info}/LICENSE +0 -0
- {angr-9.2.114.dist-info → angr-9.2.116.dist-info}/entry_points.txt +0 -0
- {angr-9.2.114.dist-info → angr-9.2.116.dist-info}/top_level.txt +0 -0
angr/__init__.py
CHANGED
angr/__main__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
|
|
3
|
-
from angr.analyses.decompiler.structuring import STRUCTURER_CLASSES
|
|
3
|
+
from angr.analyses.decompiler.structuring import STRUCTURER_CLASSES, DEFAULT_STRUCTURER
|
|
4
4
|
from angr.analyses.decompiler.utils import decompile_functions
|
|
5
5
|
|
|
6
6
|
|
|
@@ -42,7 +42,7 @@ def main():
|
|
|
42
42
|
"--structurer",
|
|
43
43
|
help="The structuring algorithm to use for decompilation.",
|
|
44
44
|
choices=STRUCTURER_CLASSES.keys(),
|
|
45
|
-
default=
|
|
45
|
+
default=DEFAULT_STRUCTURER.NAME,
|
|
46
46
|
)
|
|
47
47
|
|
|
48
48
|
args = parser.parse_args()
|
angr/analyses/cfg/cfg_fast.py
CHANGED
|
@@ -2981,7 +2981,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
|
|
|
2981
2981
|
simsucc = self.project.factory.default_engine.process(self._initial_state, irsb, force_addr=addr)
|
|
2982
2982
|
if len(simsucc.successors) == 1:
|
|
2983
2983
|
ip = simsucc.successors[0].ip
|
|
2984
|
-
if ip
|
|
2984
|
+
if claripy.backends.concrete.convert(ip) is not ip:
|
|
2985
2985
|
target_addr = ip.concrete_value
|
|
2986
2986
|
obj = self.project.loader.find_object_containing(target_addr, membership_check=False)
|
|
2987
2987
|
if (obj is not None and obj is not self.project.loader.main_object) or self.project.is_hooked(
|
|
@@ -1734,7 +1734,7 @@ class JumpTableResolver(IndirectJumpResolver):
|
|
|
1734
1734
|
# full-function data propagation before performing jump table recovery.
|
|
1735
1735
|
l.debug("Multiple statements adding bases, not supported yet") # FIXME: Just check the addresses?
|
|
1736
1736
|
|
|
1737
|
-
jumptable_addr_vsa = jumptable_addr
|
|
1737
|
+
jumptable_addr_vsa = claripy.backends.vsa.convert(jumptable_addr)
|
|
1738
1738
|
|
|
1739
1739
|
if not isinstance(jumptable_addr_vsa, claripy.vsa.StridedInterval):
|
|
1740
1740
|
return None
|
|
@@ -2103,7 +2103,7 @@ class JumpTableResolver(IndirectJumpResolver):
|
|
|
2103
2103
|
|
|
2104
2104
|
read_length = state.inspect.mem_read_length
|
|
2105
2105
|
if not isinstance(read_length, int):
|
|
2106
|
-
read_length = read_length.
|
|
2106
|
+
read_length = claripy.backends.vsa.convert(read_length).upper_bound
|
|
2107
2107
|
if read_length > 16:
|
|
2108
2108
|
return
|
|
2109
2109
|
new_read_addr = state.solver.BVV(UninitReadMeta.uninit_read_base, state.arch.bits)
|
|
@@ -193,21 +193,11 @@ options = [
|
|
|
193
193
|
"recursive_structurer",
|
|
194
194
|
"structurer_cls",
|
|
195
195
|
category="Structuring",
|
|
196
|
-
default_value="
|
|
197
|
-
candidate_values=["
|
|
196
|
+
default_value="SAILR",
|
|
197
|
+
candidate_values=["SAILR", "Phoenix", "DREAM"],
|
|
198
198
|
clears_cache=True,
|
|
199
199
|
convert=structurer_class_from_name,
|
|
200
200
|
),
|
|
201
|
-
O(
|
|
202
|
-
"Improve structuring algorithm",
|
|
203
|
-
"If applicable in deeper structurer, like Phoenix, improves decompilation output",
|
|
204
|
-
bool,
|
|
205
|
-
"recursive_structurer",
|
|
206
|
-
"improve_structurer",
|
|
207
|
-
category="Structuring",
|
|
208
|
-
default_value=True,
|
|
209
|
-
clears_cache=True,
|
|
210
|
-
),
|
|
211
201
|
O(
|
|
212
202
|
"C-style null compares",
|
|
213
203
|
"Rewrites the (x == 0) => (!x) && (x != 0) => (x)",
|
|
@@ -14,7 +14,7 @@ from ...knowledge_base import KnowledgeBase
|
|
|
14
14
|
from ...sim_variable import SimMemoryVariable, SimRegisterVariable, SimStackVariable
|
|
15
15
|
from ...utils import timethis
|
|
16
16
|
from .. import Analysis, AnalysesHub
|
|
17
|
-
from .structuring import RecursiveStructurer, PhoenixStructurer
|
|
17
|
+
from .structuring import RecursiveStructurer, PhoenixStructurer, DEFAULT_STRUCTURER
|
|
18
18
|
from .region_identifier import RegionIdentifier
|
|
19
19
|
from .optimization_passes.optimization_pass import OptimizationPassStage
|
|
20
20
|
from .optimization_passes import get_default_optimization_passes
|
|
@@ -146,8 +146,9 @@ class Decompiler(Analysis):
|
|
|
146
146
|
self._complete_successors = False
|
|
147
147
|
self._recursive_structurer_params = self.options_to_params(self.options_by_class["recursive_structurer"])
|
|
148
148
|
if "structurer_cls" not in self._recursive_structurer_params:
|
|
149
|
-
self._recursive_structurer_params["structurer_cls"] =
|
|
150
|
-
|
|
149
|
+
self._recursive_structurer_params["structurer_cls"] = DEFAULT_STRUCTURER
|
|
150
|
+
# is the algorithm based on Phoenix (a schema-based algorithm)?
|
|
151
|
+
if issubclass(self._recursive_structurer_params["structurer_cls"], PhoenixStructurer):
|
|
151
152
|
self._force_loop_single_exit = False
|
|
152
153
|
self._complete_successors = True
|
|
153
154
|
fold_callexprs_into_conditions = True
|
|
@@ -316,6 +317,11 @@ class Decompiler(Analysis):
|
|
|
316
317
|
continue
|
|
317
318
|
if pass_.STRUCTURING:
|
|
318
319
|
if self._recursive_structurer_params["structurer_cls"].NAME not in pass_.STRUCTURING:
|
|
320
|
+
l.warning(
|
|
321
|
+
"Skipping %s because it does not support structuring algorithm: %s",
|
|
322
|
+
pass_,
|
|
323
|
+
self._recursive_structurer_params["structurer_cls"].NAME,
|
|
324
|
+
)
|
|
319
325
|
continue
|
|
320
326
|
|
|
321
327
|
a = pass_(
|
|
@@ -367,6 +373,11 @@ class Decompiler(Analysis):
|
|
|
367
373
|
continue
|
|
368
374
|
if pass_.STRUCTURING:
|
|
369
375
|
if self._recursive_structurer_params["structurer_cls"].NAME not in pass_.STRUCTURING:
|
|
376
|
+
l.warning(
|
|
377
|
+
"Skipping %s because it does not support structuring algorithm: %s",
|
|
378
|
+
pass_,
|
|
379
|
+
self._recursive_structurer_params["structurer_cls"].NAME,
|
|
380
|
+
)
|
|
370
381
|
continue
|
|
371
382
|
|
|
372
383
|
a = pass_(
|
|
@@ -10,6 +10,7 @@ from ailment.statement import Call, Statement, ConditionalJump, Assignment, Stor
|
|
|
10
10
|
from ailment.expression import Convert, Register, Expression
|
|
11
11
|
|
|
12
12
|
from .optimization_pass import OptimizationPass, OptimizationPassStage
|
|
13
|
+
from ..structuring import SAILRStructurer, DreamStructurer
|
|
13
14
|
from ....knowledge_plugins.key_definitions.atoms import MemoryLocation
|
|
14
15
|
from ....knowledge_plugins.key_definitions.constants import OP_BEFORE
|
|
15
16
|
|
|
@@ -140,6 +141,8 @@ class ConstPropOptReverter(OptimizationPass):
|
|
|
140
141
|
|
|
141
142
|
ARCHES = None
|
|
142
143
|
PLATFORMS = None
|
|
144
|
+
# allow DREAM since it's useful for return merging
|
|
145
|
+
STRUCTURING = [SAILRStructurer.NAME, DreamStructurer.NAME]
|
|
143
146
|
STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
|
|
144
147
|
NAME = "Revert Constant Propagation Optimizations"
|
|
145
148
|
DESCRIPTION = __doc__.strip()
|
|
@@ -21,11 +21,8 @@ class CrossJumpReverter(StructuringOptimizationPass):
|
|
|
21
21
|
a max of max_opt_iters times. Second, it will not duplicate a block with too many calls.
|
|
22
22
|
"""
|
|
23
23
|
|
|
24
|
-
ARCHES = None
|
|
25
|
-
PLATFORMS = None
|
|
26
24
|
STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
|
|
27
25
|
NAME = "Duplicate linear blocks with gotos"
|
|
28
|
-
STRUCTURING = ["phoenix"]
|
|
29
26
|
DESCRIPTION = inspect.cleandoc(__doc__).strip()
|
|
30
27
|
|
|
31
28
|
def __init__(
|
|
@@ -11,7 +11,7 @@ from ailment.expression import Expression, BinaryOp, Const, Load
|
|
|
11
11
|
from angr.utils.graph import GraphUtils
|
|
12
12
|
from ..utils import first_nonlabel_statement, remove_last_statement
|
|
13
13
|
from ..structuring.structurer_nodes import IncompleteSwitchCaseHeadStatement, SequenceNode, MultiNode
|
|
14
|
-
from .optimization_pass import
|
|
14
|
+
from .optimization_pass import MultipleBlocksException, StructuringOptimizationPass
|
|
15
15
|
from ..region_simplifiers.switch_cluster_simplifier import SwitchClusterFinder
|
|
16
16
|
|
|
17
17
|
if TYPE_CHECKING:
|
|
@@ -143,14 +143,13 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
|
|
|
143
143
|
As a hack for now, we only run this deoptimization on Linux binaries.
|
|
144
144
|
"""
|
|
145
145
|
|
|
146
|
+
# TODO: this needs to be updated to support Windows, but detect and disable on MSVC
|
|
146
147
|
PLATFORMS = ["linux"]
|
|
147
|
-
STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
|
|
148
148
|
NAME = "Convert lowered switch-cases (if-else) to switch-cases"
|
|
149
149
|
DESCRIPTION = (
|
|
150
150
|
"Convert lowered switch-cases (if-else) to switch-cases. Only works when the Phoenix structuring "
|
|
151
151
|
"algorithm is in use."
|
|
152
152
|
)
|
|
153
|
-
STRUCTURING = ["phoenix"]
|
|
154
153
|
|
|
155
154
|
def __init__(self, func, min_distinct_cases=2, **kwargs):
|
|
156
155
|
super().__init__(
|
|
@@ -10,7 +10,7 @@ import ailment
|
|
|
10
10
|
from angr.analyses.decompiler import RegionIdentifier
|
|
11
11
|
from angr.analyses.decompiler.condition_processor import ConditionProcessor
|
|
12
12
|
from angr.analyses.decompiler.goto_manager import GotoManager
|
|
13
|
-
from angr.analyses.decompiler.structuring import RecursiveStructurer,
|
|
13
|
+
from angr.analyses.decompiler.structuring import RecursiveStructurer, SAILRStructurer
|
|
14
14
|
from angr.analyses.decompiler.utils import add_labels
|
|
15
15
|
from angr.analyses.decompiler.seq_cf_structure_counter import ControlFlowStructureCounter
|
|
16
16
|
|
|
@@ -266,10 +266,17 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
266
266
|
The base class for any optimization pass that requires structuring. Optimization passes that inherit from this class
|
|
267
267
|
should directly depend on structuring artifacts, such as regions and gotos. Otherwise, they should use
|
|
268
268
|
OptimizationPass. This is the heaviest (computation time) optimization pass class.
|
|
269
|
+
|
|
270
|
+
By default this type of optimization should work:
|
|
271
|
+
- on any architecture
|
|
272
|
+
- on any platform
|
|
273
|
+
- during region identification (to have iterative structuring)
|
|
274
|
+
- only with the SAILR structuring algorithm
|
|
269
275
|
"""
|
|
270
276
|
|
|
271
277
|
ARCHES = None
|
|
272
278
|
PLATFORMS = None
|
|
279
|
+
STRUCTURING = [SAILRStructurer.NAME]
|
|
273
280
|
STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
|
|
274
281
|
|
|
275
282
|
def __init__(
|
|
@@ -401,7 +408,7 @@ class StructuringOptimizationPass(OptimizationPass):
|
|
|
401
408
|
self._ri.region,
|
|
402
409
|
cond_proc=self._ri.cond_proc,
|
|
403
410
|
func=self._func,
|
|
404
|
-
structurer_cls=
|
|
411
|
+
structurer_cls=SAILRStructurer,
|
|
405
412
|
)
|
|
406
413
|
# pylint:disable=broad-except
|
|
407
414
|
except Exception:
|
|
@@ -4,6 +4,7 @@ import logging
|
|
|
4
4
|
from ailment import Block
|
|
5
5
|
from ailment.statement import ConditionalJump, Return
|
|
6
6
|
|
|
7
|
+
from ..structuring import SAILRStructurer, DreamStructurer
|
|
7
8
|
from ....utils.graph import subgraph_between_nodes
|
|
8
9
|
from ..utils import remove_labels, to_ail_supergraph, update_labels
|
|
9
10
|
from .optimization_pass import OptimizationPass, OptimizationPassStage
|
|
@@ -28,6 +29,7 @@ class ReturnDeduplicator(OptimizationPass):
|
|
|
28
29
|
STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
|
|
29
30
|
NAME = "Deduplicates return statements that may have been duplicated"
|
|
30
31
|
DESCRIPTION = __doc__.strip()
|
|
32
|
+
STRUCTURING = [SAILRStructurer.NAME, DreamStructurer.NAME]
|
|
31
33
|
|
|
32
34
|
def __init__(self, func, **kwargs):
|
|
33
35
|
super().__init__(func, **kwargs)
|
|
@@ -4,6 +4,7 @@ import networkx
|
|
|
4
4
|
|
|
5
5
|
from .return_duplicator_base import ReturnDuplicatorBase
|
|
6
6
|
from .optimization_pass import OptimizationPass, OptimizationPassStage
|
|
7
|
+
from ..structuring import SAILRStructurer, DreamStructurer
|
|
7
8
|
|
|
8
9
|
_l = logging.getLogger(name=__name__)
|
|
9
10
|
|
|
@@ -19,6 +20,7 @@ class ReturnDuplicatorHigh(OptimizationPass, ReturnDuplicatorBase):
|
|
|
19
20
|
STAGE = OptimizationPassStage.AFTER_VARIABLE_RECOVERY
|
|
20
21
|
NAME = "Duplicate return-only blocks (high)"
|
|
21
22
|
DESCRIPTION = __doc__
|
|
23
|
+
STRUCTURING = [SAILRStructurer.NAME, DreamStructurer.NAME]
|
|
22
24
|
|
|
23
25
|
def __init__(
|
|
24
26
|
self,
|
|
@@ -182,7 +182,11 @@ class StackCanarySimplifier(OptimizationPass):
|
|
|
182
182
|
|
|
183
183
|
while True:
|
|
184
184
|
traversed.add(block_addr)
|
|
185
|
-
|
|
185
|
+
try:
|
|
186
|
+
first_block = next(self._get_blocks(block_addr))
|
|
187
|
+
except StopIteration:
|
|
188
|
+
break
|
|
189
|
+
|
|
186
190
|
if first_block is None:
|
|
187
191
|
break
|
|
188
192
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# pylint:disable=missing-class-docstring,too-many-boolean-expressions,unused-argument,no-self-use
|
|
2
2
|
from typing import Optional, Any, TYPE_CHECKING
|
|
3
3
|
from collections.abc import Callable
|
|
4
|
-
from collections import defaultdict
|
|
4
|
+
from collections import defaultdict, Counter
|
|
5
5
|
import logging
|
|
6
6
|
import struct
|
|
7
7
|
|
|
@@ -491,19 +491,16 @@ class CFunction(CConstruct): # pylint:disable=abstract-method
|
|
|
491
491
|
else:
|
|
492
492
|
name = str(variable)
|
|
493
493
|
|
|
494
|
-
# sort by
|
|
494
|
+
# sort by the following:
|
|
495
|
+
# * if it's a a non-basic type
|
|
496
|
+
# * the number of occurrences
|
|
497
|
+
# * the repr of the type itself
|
|
495
498
|
# TODO: The type selection should actually happen during variable unification
|
|
496
499
|
vartypes = [x[1] for x in cvar_and_vartypes]
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
if nonprimitive_vartypes:
|
|
502
|
-
nonprimitive_vartypes = list(
|
|
503
|
-
dict.fromkeys(sorted(nonprimitive_vartypes, key=nonprimitive_vartypes.count, reverse=True))
|
|
504
|
-
)
|
|
505
|
-
vartypes.remove(nonprimitive_vartypes[0])
|
|
506
|
-
vartypes.insert(0, nonprimitive_vartypes[0])
|
|
500
|
+
count = Counter(vartypes)
|
|
501
|
+
vartypes = sorted(
|
|
502
|
+
count.copy(), key=lambda x: (isinstance(x, (SimTypeChar, SimTypeInt, SimTypeFloat)), count[x], repr(x))
|
|
503
|
+
)
|
|
507
504
|
|
|
508
505
|
for i, var_type in enumerate(vartypes):
|
|
509
506
|
if i == 0:
|
|
@@ -2177,8 +2174,8 @@ class CConstant(CExpression):
|
|
|
2177
2174
|
v = refval.content.decode("utf-8")
|
|
2178
2175
|
else:
|
|
2179
2176
|
# it's a string
|
|
2180
|
-
assert isinstance(v, str)
|
|
2181
2177
|
v = refval
|
|
2178
|
+
assert isinstance(v, str)
|
|
2182
2179
|
yield CConstant.str_to_c_str(v), self
|
|
2183
2180
|
elif isinstance(self._type, SimTypePointer) and isinstance(self._type.pts_to, SimTypeWideChar):
|
|
2184
2181
|
refval = self.reference_values[self._type]
|
|
@@ -2,14 +2,18 @@ from typing import Optional, Type
|
|
|
2
2
|
|
|
3
3
|
from .dream import DreamStructurer
|
|
4
4
|
from .phoenix import PhoenixStructurer
|
|
5
|
+
from .sailr import SAILRStructurer
|
|
5
6
|
from .recursive_structurer import RecursiveStructurer
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
STRUCTURER_CLASSES = {
|
|
9
|
-
|
|
10
|
-
|
|
10
|
+
SAILRStructurer.NAME: SAILRStructurer,
|
|
11
|
+
PhoenixStructurer.NAME: PhoenixStructurer,
|
|
12
|
+
DreamStructurer.NAME: DreamStructurer,
|
|
11
13
|
}
|
|
12
14
|
|
|
15
|
+
DEFAULT_STRUCTURER = SAILRStructurer
|
|
16
|
+
|
|
13
17
|
|
|
14
18
|
def structurer_class_from_name(name: str) -> type | None:
|
|
15
19
|
return STRUCTURER_CLASSES.get(name.lower(), None)
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
# pylint:disable=multiple-statements,line-too-long,consider-using-enumerate
|
|
2
2
|
from typing import Optional, Any, TYPE_CHECKING
|
|
3
|
-
from collections import OrderedDict as ODict
|
|
4
3
|
import logging
|
|
5
4
|
from collections import defaultdict, OrderedDict
|
|
6
5
|
|
|
@@ -661,7 +660,7 @@ class DreamStructurer(StructurerBase):
|
|
|
661
660
|
i,
|
|
662
661
|
node,
|
|
663
662
|
cmp_expr,
|
|
664
|
-
cases:
|
|
663
|
+
cases: OrderedDict,
|
|
665
664
|
node_default,
|
|
666
665
|
addr,
|
|
667
666
|
addr2nodes,
|
|
@@ -909,7 +908,7 @@ class DreamStructurer(StructurerBase):
|
|
|
909
908
|
head_node_idx: int,
|
|
910
909
|
node_b_addr: int,
|
|
911
910
|
addr2nodes: dict[int, set[CodeNode]],
|
|
912
|
-
) -> tuple[
|
|
911
|
+
) -> tuple[OrderedDict, Any, Any]:
|
|
913
912
|
"""
|
|
914
913
|
Discover all cases for the switch-case structure and build the switch-cases dict.
|
|
915
914
|
|
|
@@ -922,7 +921,7 @@ class DreamStructurer(StructurerBase):
|
|
|
922
921
|
:return: A tuple of (dict of cases, the default node if exists, nodes to remove).
|
|
923
922
|
"""
|
|
924
923
|
|
|
925
|
-
cases:
|
|
924
|
+
cases: OrderedDict[int | tuple[int, ...], SequenceNode] = OrderedDict()
|
|
926
925
|
to_remove = set()
|
|
927
926
|
node_default = addr2nodes.get(node_b_addr, None)
|
|
928
927
|
if node_default is not None:
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
# pylint:disable=line-too-long,import-outside-toplevel,import-error,multiple-statements,too-many-boolean-expressions
|
|
2
2
|
from typing import Any, DefaultDict, Optional, TYPE_CHECKING
|
|
3
|
-
from collections import OrderedDict as ODict
|
|
4
3
|
from collections import defaultdict, OrderedDict
|
|
5
4
|
from enum import Enum
|
|
6
5
|
import logging
|
|
@@ -12,7 +11,7 @@ from ailment.block import Block
|
|
|
12
11
|
from ailment.statement import Statement, ConditionalJump, Jump, Label, Return
|
|
13
12
|
from ailment.expression import Const, UnaryOp, MultiStatementExpression
|
|
14
13
|
|
|
15
|
-
from angr.utils.graph import GraphUtils
|
|
14
|
+
from angr.utils.graph import GraphUtils
|
|
16
15
|
from ....knowledge_plugins.cfg import IndirectJumpType
|
|
17
16
|
from ....utils.constants import SWITCH_MISSING_DEFAULT_NODE_ADDR
|
|
18
17
|
from ....utils.graph import dominates, to_acyclic_graph, dfs_back_edges
|
|
@@ -24,7 +23,6 @@ from ..utils import (
|
|
|
24
23
|
is_empty_or_label_only_node,
|
|
25
24
|
has_nonlabel_statements,
|
|
26
25
|
first_nonlabel_statement,
|
|
27
|
-
structured_node_is_simple_return,
|
|
28
26
|
)
|
|
29
27
|
from ..call_counter import AILCallCounter
|
|
30
28
|
from .structurer_nodes import (
|
|
@@ -84,7 +82,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
84
82
|
func: Optional["Function"] = None,
|
|
85
83
|
case_entry_to_switch_head: dict[int, int] | None = None,
|
|
86
84
|
parent_region=None,
|
|
87
|
-
|
|
85
|
+
improve_algorithm=False,
|
|
88
86
|
use_multistmtexprs: MultiStmtExprMode = MultiStmtExprMode.MAX_ONE_CALL,
|
|
89
87
|
**kwargs,
|
|
90
88
|
):
|
|
@@ -95,7 +93,6 @@ class PhoenixStructurer(StructurerBase):
|
|
|
95
93
|
func=func,
|
|
96
94
|
case_entry_to_switch_head=case_entry_to_switch_head,
|
|
97
95
|
parent_region=parent_region,
|
|
98
|
-
improve_structurer=improve_structurer,
|
|
99
96
|
**kwargs,
|
|
100
97
|
)
|
|
101
98
|
|
|
@@ -112,13 +109,17 @@ class PhoenixStructurer(StructurerBase):
|
|
|
112
109
|
# absorbed into other SequenceNodes
|
|
113
110
|
self.dowhile_known_tail_nodes: set = set()
|
|
114
111
|
|
|
115
|
-
|
|
112
|
+
# in reimplementing the core phoenix algorithm from the phoenix decompiler paper, two types of changes were
|
|
113
|
+
# made to the algorithm:
|
|
114
|
+
# 1. Mandatory fixes to correct flaws we found in the algorithm
|
|
115
|
+
# 2. Optional fixes to improve the results of already correct choices
|
|
116
|
+
#
|
|
117
|
+
# the improve_algorithm flag controls whether the optional fixes are applied. these are disabled by default
|
|
118
|
+
# to be as close to the original algorithm as possible. for best results, enable this flag.
|
|
119
|
+
self._improve_algorithm = improve_algorithm
|
|
116
120
|
self._edge_virtualization_hints = []
|
|
117
121
|
|
|
118
122
|
self._use_multistmtexprs = use_multistmtexprs
|
|
119
|
-
if not self._phoenix_improved:
|
|
120
|
-
self._use_multistmtexprs = MultiStmtExprMode.NEVER
|
|
121
|
-
|
|
122
123
|
self._analyze()
|
|
123
124
|
|
|
124
125
|
@staticmethod
|
|
@@ -246,7 +247,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
246
247
|
self._rewrite_jumps_to_continues(loop_node.sequence_node, loop_node=loop_node)
|
|
247
248
|
return True
|
|
248
249
|
|
|
249
|
-
if self.
|
|
250
|
+
if self._improve_algorithm:
|
|
250
251
|
matched, loop_node, successor_node = self._match_cyclic_while_with_single_successor(
|
|
251
252
|
node, head, graph, full_graph
|
|
252
253
|
)
|
|
@@ -379,7 +380,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
379
380
|
|
|
380
381
|
return True, loop_node, right
|
|
381
382
|
|
|
382
|
-
if self.
|
|
383
|
+
if self._improve_algorithm:
|
|
383
384
|
if full_graph.out_degree[node] == 1:
|
|
384
385
|
# while (true) { ...; if (...) break; }
|
|
385
386
|
_, _, head_block = self._find_node_going_to_dst(node, left, condjump_only=True)
|
|
@@ -498,7 +499,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
498
499
|
self._remove_last_statement_if_jump(succ)
|
|
499
500
|
drop_succ = False
|
|
500
501
|
|
|
501
|
-
if self.
|
|
502
|
+
if self._improve_algorithm:
|
|
502
503
|
# absorb the entire succ block if possible
|
|
503
504
|
if self._is_sequential_statement_block(succ) and self._should_use_multistmtexprs(succ):
|
|
504
505
|
stmts = self._build_multistatementexpr_statements(succ)
|
|
@@ -1004,7 +1005,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
1004
1005
|
any_matches |= matched
|
|
1005
1006
|
if matched:
|
|
1006
1007
|
break
|
|
1007
|
-
if self.
|
|
1008
|
+
if self._improve_algorithm:
|
|
1008
1009
|
l.debug("... matching acyclic ITE with short-circuit conditions at %r", node)
|
|
1009
1010
|
matched = self._match_acyclic_short_circuit_conditions(graph, full_graph, node)
|
|
1010
1011
|
l.debug("... matched: %s", matched)
|
|
@@ -1306,8 +1307,8 @@ class PhoenixStructurer(StructurerBase):
|
|
|
1306
1307
|
node_b_addr,
|
|
1307
1308
|
graph,
|
|
1308
1309
|
full_graph,
|
|
1309
|
-
) -> tuple[
|
|
1310
|
-
cases:
|
|
1310
|
+
) -> tuple[OrderedDict, Any, set[Any]]:
|
|
1311
|
+
cases: OrderedDict[int | tuple[int], SequenceNode] = OrderedDict()
|
|
1311
1312
|
to_remove = set()
|
|
1312
1313
|
|
|
1313
1314
|
# it is possible that the default node gets duplicated by other analyses and creates a default node (addr.a)
|
|
@@ -1416,7 +1417,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
1416
1417
|
self,
|
|
1417
1418
|
head,
|
|
1418
1419
|
cmp_expr,
|
|
1419
|
-
cases:
|
|
1420
|
+
cases: OrderedDict,
|
|
1420
1421
|
node_default_addr: int,
|
|
1421
1422
|
node_default,
|
|
1422
1423
|
addr,
|
|
@@ -2108,7 +2109,7 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2108
2109
|
return None
|
|
2109
2110
|
|
|
2110
2111
|
def _last_resort_refinement(self, head, graph: networkx.DiGraph, full_graph: networkx.DiGraph | None) -> bool:
|
|
2111
|
-
if self.
|
|
2112
|
+
if self._improve_algorithm:
|
|
2112
2113
|
while self._edge_virtualization_hints:
|
|
2113
2114
|
src, dst = self._edge_virtualization_hints.pop(0)
|
|
2114
2115
|
if graph.has_edge(src, dst):
|
|
@@ -2229,6 +2230,15 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2229
2230
|
remove_last_statement(src)
|
|
2230
2231
|
|
|
2231
2232
|
def _should_use_multistmtexprs(self, node: Block | BaseNode) -> bool:
|
|
2233
|
+
"""
|
|
2234
|
+
The original Phoenix algorithm had no support for multi-stmt expressions, such as the following:
|
|
2235
|
+
if ((x = y) && z) { ... }
|
|
2236
|
+
|
|
2237
|
+
There are multiple levels at which multi-stmt expressions can be used. If the Phoenix algorith is not not
|
|
2238
|
+
set to be in improved mode, then we should not use multi-stmt expressions at all.
|
|
2239
|
+
"""
|
|
2240
|
+
if not self._improve_algorithm:
|
|
2241
|
+
return False
|
|
2232
2242
|
if self._use_multistmtexprs == MultiStmtExprMode.NEVER:
|
|
2233
2243
|
return False
|
|
2234
2244
|
if self._use_multistmtexprs == MultiStmtExprMode.ALWAYS:
|
|
@@ -2313,7 +2323,6 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2313
2323
|
walker.block_id += 1
|
|
2314
2324
|
if _check(block.nodes[-1].statements[-1]):
|
|
2315
2325
|
walker.parent_and_block.append((walker.block_id, parent, block))
|
|
2316
|
-
return
|
|
2317
2326
|
|
|
2318
2327
|
def _handle_BreakNode(break_node: BreakNode, parent=None, **kwargs): # pylint:disable=unused-argument
|
|
2319
2328
|
walker.block_id += 1
|
|
@@ -2324,7 +2333,6 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2324
2333
|
):
|
|
2325
2334
|
# FIXME: idx is ignored
|
|
2326
2335
|
walker.parent_and_block.append((walker.block_id, parent, break_node))
|
|
2327
|
-
return
|
|
2328
2336
|
|
|
2329
2337
|
walker = SequenceWalker(
|
|
2330
2338
|
handlers={
|
|
@@ -2502,84 +2510,12 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2502
2510
|
break
|
|
2503
2511
|
return None
|
|
2504
2512
|
|
|
2513
|
+
# pylint: disable=unused-argument,no-self-use
|
|
2505
2514
|
def _order_virtualizable_edges(self, graph: networkx.DiGraph, edges: list, node_seq: dict[Any, int]) -> list:
|
|
2506
2515
|
"""
|
|
2507
2516
|
Returns a list of edges that are ordered by the best edges to virtualize first.
|
|
2508
|
-
The criteria for "best" is defined by a variety of heuristics described below.
|
|
2509
2517
|
"""
|
|
2510
|
-
|
|
2511
|
-
return edges
|
|
2512
|
-
|
|
2513
|
-
# TODO: the graph we have here is not an accurate graph and can have no "entry node". We need a better graph.
|
|
2514
|
-
try:
|
|
2515
|
-
entry_node = [node for node in graph.nodes if graph.in_degree(node) == 0][0]
|
|
2516
|
-
except IndexError:
|
|
2517
|
-
entry_node = None
|
|
2518
|
-
|
|
2519
|
-
best_edges = edges
|
|
2520
|
-
if self._phoenix_improved and entry_node is not None:
|
|
2521
|
-
# the first few heuristics are based on the post-dominator count of the edge
|
|
2522
|
-
# so we collect them for each candidate edge
|
|
2523
|
-
edge_postdom_count = {}
|
|
2524
|
-
edge_sibling_count = {}
|
|
2525
|
-
for edge in edges:
|
|
2526
|
-
_, dst = edge
|
|
2527
|
-
graph_copy = networkx.DiGraph(graph)
|
|
2528
|
-
graph_copy.remove_edge(*edge)
|
|
2529
|
-
sibling_cnt = graph_copy.in_degree(dst)
|
|
2530
|
-
if sibling_cnt == 0:
|
|
2531
|
-
continue
|
|
2532
|
-
|
|
2533
|
-
edge_sibling_count[edge] = sibling_cnt
|
|
2534
|
-
post_dom_graph = PostDominators(graph_copy, entry_node).post_dom
|
|
2535
|
-
post_doms = set()
|
|
2536
|
-
for postdom_node, dominatee in post_dom_graph.edges():
|
|
2537
|
-
if not isinstance(postdom_node, TemporaryNode) and not isinstance(dominatee, TemporaryNode):
|
|
2538
|
-
post_doms.add((postdom_node, dominatee))
|
|
2539
|
-
edge_postdom_count[edge] = len(post_doms)
|
|
2540
|
-
|
|
2541
|
-
# H1: the edge that has the least amount of sibling edges should be virtualized first
|
|
2542
|
-
# this is believed to reduce the amount of virtualization needed in future rounds and increase
|
|
2543
|
-
# the edges that enter a single outer-scope if-stmt
|
|
2544
|
-
if edge_sibling_count:
|
|
2545
|
-
min_sibling_count = min(edge_sibling_count.values())
|
|
2546
|
-
best_edges = [edge for edge, cnt in edge_sibling_count.items() if cnt == min_sibling_count]
|
|
2547
|
-
if len(best_edges) == 1:
|
|
2548
|
-
return best_edges
|
|
2549
|
-
|
|
2550
|
-
# create the next heuristic based on the best edges from the previous heuristic
|
|
2551
|
-
filtered_edge_postdom_count = edge_postdom_count.copy()
|
|
2552
|
-
for edge in list(edge_postdom_count.keys()):
|
|
2553
|
-
if edge not in best_edges:
|
|
2554
|
-
del filtered_edge_postdom_count[edge]
|
|
2555
|
-
if filtered_edge_postdom_count:
|
|
2556
|
-
edge_postdom_count = filtered_edge_postdom_count
|
|
2557
|
-
|
|
2558
|
-
# H2: the edge, when removed, that causes the most post-dominators of the graph should be virtualized
|
|
2559
|
-
# first. this is believed to make the code more linear looking be reducing the amount of scopes.
|
|
2560
|
-
# informally, we believe post-dominators to be an inverse indicator of the number of scopes present
|
|
2561
|
-
if edge_postdom_count:
|
|
2562
|
-
max_postdom_count = max(edge_postdom_count.values())
|
|
2563
|
-
best_edges = [edge for edge, cnt in edge_postdom_count.items() if cnt == max_postdom_count]
|
|
2564
|
-
if len(best_edges) == 1:
|
|
2565
|
-
return best_edges
|
|
2566
|
-
|
|
2567
|
-
# H3: the edge that goes directly to a return statement should be virtualized first
|
|
2568
|
-
# this is believed to be good because it can be corrected in later optimization by duplicating
|
|
2569
|
-
# the return
|
|
2570
|
-
candidate_edges = best_edges
|
|
2571
|
-
best_edges = []
|
|
2572
|
-
for src, dst in candidate_edges:
|
|
2573
|
-
if graph.has_node(dst) and structured_node_is_simple_return(dst, graph):
|
|
2574
|
-
best_edges.append((src, dst))
|
|
2575
|
-
|
|
2576
|
-
if len(best_edges) == 1:
|
|
2577
|
-
return best_edges
|
|
2578
|
-
elif not best_edges:
|
|
2579
|
-
best_edges = candidate_edges
|
|
2580
|
-
|
|
2581
|
-
# if we have another tie, or we never used improved heuristics, then we do the chick_order.
|
|
2582
|
-
return PhoenixStructurer._chick_order_edges(best_edges, node_seq)
|
|
2518
|
+
return PhoenixStructurer._chick_order_edges(edges, node_seq)
|
|
2583
2519
|
|
|
2584
2520
|
@staticmethod
|
|
2585
2521
|
def _chick_order_edges(edges: list, node_seq: dict[Any, int]) -> list:
|
|
@@ -34,14 +34,12 @@ class RecursiveStructurer(Analysis):
|
|
|
34
34
|
cond_proc=None,
|
|
35
35
|
func: Optional["Function"] = None,
|
|
36
36
|
structurer_cls: type | None = None,
|
|
37
|
-
improve_structurer=True,
|
|
38
37
|
**kwargs,
|
|
39
38
|
):
|
|
40
39
|
self._region = region
|
|
41
40
|
self.cond_proc = cond_proc if cond_proc is not None else ConditionProcessor(self.project.arch)
|
|
42
41
|
self.function = func
|
|
43
42
|
self.structurer_cls = structurer_cls if structurer_cls is not None else DreamStructurer
|
|
44
|
-
self.improve_structurer = improve_structurer
|
|
45
43
|
self.structurer_options = kwargs
|
|
46
44
|
|
|
47
45
|
self.result = None
|
|
@@ -91,7 +89,6 @@ class RecursiveStructurer(Analysis):
|
|
|
91
89
|
case_entry_to_switch_head=self._case_entry_to_switch_head,
|
|
92
90
|
func=self.function,
|
|
93
91
|
parent_region=parent_region,
|
|
94
|
-
improve_structurer=self.improve_structurer,
|
|
95
92
|
**self.structurer_options,
|
|
96
93
|
)
|
|
97
94
|
# replace this region with the resulting node in its parent region... if it's not an orphan
|