angr 9.2.166__cp310-abi3-macosx_10_12_x86_64.whl → 9.2.168__cp310-abi3-macosx_10_12_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/cfg/cfb.py +7 -7
- angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +8 -8
- angr/analyses/decompiler/clinic.py +8 -0
- angr/analyses/decompiler/condition_processor.py +44 -1
- angr/analyses/decompiler/decompilation_cache.py +2 -0
- angr/analyses/decompiler/decompilation_options.py +10 -0
- angr/analyses/decompiler/decompiler.py +26 -2
- angr/analyses/decompiler/node_replacer.py +42 -0
- angr/analyses/decompiler/notes/__init__.py +9 -0
- angr/analyses/decompiler/notes/decompilation_note.py +48 -0
- angr/analyses/decompiler/notes/deobfuscated_strings.py +56 -0
- angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +1 -1
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +5 -0
- angr/analyses/decompiler/optimization_passes/return_duplicator_low.py +5 -76
- angr/analyses/decompiler/region_identifier.py +12 -3
- angr/analyses/decompiler/sequence_walker.py +11 -7
- angr/analyses/decompiler/structured_codegen/base.py +34 -1
- angr/analyses/decompiler/structured_codegen/c.py +44 -10
- angr/analyses/decompiler/structuring/phoenix.py +645 -305
- angr/analyses/decompiler/structuring/structurer_base.py +75 -1
- angr/analyses/decompiler/utils.py +71 -28
- angr/analyses/deobfuscator/string_obf_finder.py +19 -16
- angr/analyses/deobfuscator/string_obf_opt_passes.py +6 -3
- angr/analyses/reaching_definitions/engine_vex.py +3 -2
- angr/procedures/glibc/scanf.py +8 -0
- angr/procedures/glibc/sscanf.py +4 -0
- angr/rustylib.abi3.so +0 -0
- angr/unicornlib.dylib +0 -0
- angr/utils/graph.py +62 -24
- {angr-9.2.166.dist-info → angr-9.2.168.dist-info}/METADATA +5 -5
- {angr-9.2.166.dist-info → angr-9.2.168.dist-info}/RECORD +36 -32
- {angr-9.2.166.dist-info → angr-9.2.168.dist-info}/WHEEL +0 -0
- {angr-9.2.166.dist-info → angr-9.2.168.dist-info}/entry_points.txt +0 -0
- {angr-9.2.166.dist-info → angr-9.2.168.dist-info}/licenses/LICENSE +0 -0
- {angr-9.2.166.dist-info → angr-9.2.168.dist-info}/top_level.txt +0 -0
angr/__init__.py
CHANGED
angr/analyses/cfg/cfb.py
CHANGED
|
@@ -119,19 +119,19 @@ class CFBlanket(Analysis):
|
|
|
119
119
|
def _init_regions(self):
|
|
120
120
|
for obj in self.project.loader.all_objects:
|
|
121
121
|
if isinstance(obj, cle.MetaELF):
|
|
122
|
-
if obj.
|
|
122
|
+
if obj.segments:
|
|
123
|
+
if "segment" not in self._exclude_region_types:
|
|
124
|
+
for segment in obj.segments:
|
|
125
|
+
if segment.memsize > 0:
|
|
126
|
+
mr = MemoryRegion(segment.vaddr, segment.memsize, "segment", obj, segment)
|
|
127
|
+
self._regions.append(mr)
|
|
128
|
+
elif obj.sections:
|
|
123
129
|
if "section" not in self._exclude_region_types:
|
|
124
130
|
# Enumerate sections in an ELF file
|
|
125
131
|
for section in obj.sections:
|
|
126
132
|
if section.occupies_memory:
|
|
127
133
|
mr = MemoryRegion(section.vaddr, section.memsize, "section", obj, section)
|
|
128
134
|
self._regions.append(mr)
|
|
129
|
-
elif obj.segments:
|
|
130
|
-
if "segment" not in self._exclude_region_types:
|
|
131
|
-
for segment in obj.segments:
|
|
132
|
-
if segment.memsize > 0:
|
|
133
|
-
mr = MemoryRegion(segment.vaddr, segment.memsize, "segment", obj, segment)
|
|
134
|
-
self._regions.append(mr)
|
|
135
135
|
else:
|
|
136
136
|
raise NotImplementedError(
|
|
137
137
|
"Currently ELFs without sections or segments are not supported. Please "
|
|
@@ -920,32 +920,32 @@ class JumpTableResolver(IndirectJumpResolver):
|
|
|
920
920
|
# more sanity checks
|
|
921
921
|
|
|
922
922
|
# for a typical jump table, the current block has only one predecessor, and the predecessor to the current
|
|
923
|
-
# block has two successors
|
|
923
|
+
# block has two successors
|
|
924
924
|
# for a typical vtable call (or jump if at the end of a function), the block as two predecessors that form a
|
|
925
925
|
# diamond shape
|
|
926
926
|
curr_node = func.get_node(addr)
|
|
927
|
-
if curr_node is None or curr_node not in func.
|
|
927
|
+
if curr_node is None or curr_node not in func.transition_graph:
|
|
928
928
|
l.debug("Could not find the node %#x in the function transition graph", addr)
|
|
929
929
|
return False, None
|
|
930
|
-
preds = list(func.
|
|
930
|
+
preds = list(func.transition_graph.predecessors(curr_node))
|
|
931
931
|
pred_endaddrs = {pred.addr + pred.size for pred in preds} # handle non-normalized CFGs
|
|
932
932
|
if func_graph_complete and not is_arm and not potential_call_table:
|
|
933
933
|
# on ARM you can do a single-block jump table...
|
|
934
934
|
if len(pred_endaddrs) == 1:
|
|
935
|
-
pred_succs = [succ for succ in func.
|
|
935
|
+
pred_succs = [succ for succ in func.transition_graph.successors(preds[0]) if succ.addr != preds[0].addr]
|
|
936
936
|
if len(pred_succs) != 2:
|
|
937
937
|
l.debug("Expect two successors to the single predecessor, found %d.", len(pred_succs))
|
|
938
938
|
return False, None
|
|
939
939
|
elif len(pred_endaddrs) == 2 and len(preds) == 2:
|
|
940
940
|
pred_succs = set(
|
|
941
|
-
[succ for succ in func.
|
|
942
|
-
+ [succ for succ in func.
|
|
941
|
+
[succ for succ in func.transition_graph.successors(preds[0]) if succ.addr != preds[0].addr]
|
|
942
|
+
+ [succ for succ in func.transition_graph.successors(preds[1]) if succ.addr != preds[1].addr]
|
|
943
943
|
)
|
|
944
944
|
is_diamond = False
|
|
945
945
|
if len(pred_succs) == 2:
|
|
946
946
|
non_node_succ = next(iter(pred_succ for pred_succ in pred_succs if pred_succ is not curr_node))
|
|
947
|
-
while func.
|
|
948
|
-
non_node_succ = next(iter(func.
|
|
947
|
+
while func.transition_graph.out_degree[non_node_succ] == 1:
|
|
948
|
+
non_node_succ = next(iter(func.transition_graph.successors(non_node_succ)))
|
|
949
949
|
if non_node_succ == curr_node:
|
|
950
950
|
is_diamond = True
|
|
951
951
|
break
|
|
@@ -55,6 +55,7 @@ from .optimization_passes import (
|
|
|
55
55
|
|
|
56
56
|
if TYPE_CHECKING:
|
|
57
57
|
from angr.knowledge_plugins.cfg import CFGModel
|
|
58
|
+
from .notes import DecompilationNote
|
|
58
59
|
from .decompilation_cache import DecompilationCache
|
|
59
60
|
from .peephole_optimizations import PeepholeOptimizationStmtBase, PeepholeOptimizationExprBase
|
|
60
61
|
|
|
@@ -142,12 +143,14 @@ class Clinic(Analysis):
|
|
|
142
143
|
optimization_scratch: dict[str, Any] | None = None,
|
|
143
144
|
desired_variables: set[str] | None = None,
|
|
144
145
|
force_loop_single_exit: bool = True,
|
|
146
|
+
refine_loops_with_single_successor: bool = False,
|
|
145
147
|
complete_successors: bool = False,
|
|
146
148
|
max_type_constraints: int = 100_000,
|
|
147
149
|
type_constraint_set_degradation_threshold: int = 150,
|
|
148
150
|
ail_graph: networkx.DiGraph | None = None,
|
|
149
151
|
arg_vvars: dict[int, tuple[ailment.Expr.VirtualVariable, SimVariable]] | None = None,
|
|
150
152
|
start_stage: ClinicStage | None = ClinicStage.INITIALIZATION,
|
|
153
|
+
notes: dict[str, DecompilationNote] | None = None,
|
|
151
154
|
):
|
|
152
155
|
if not func.normalized and mode == ClinicMode.DECOMPILE:
|
|
153
156
|
raise ValueError("Decompilation must work on normalized function graphs.")
|
|
@@ -193,6 +196,8 @@ class Clinic(Analysis):
|
|
|
193
196
|
# actual stack variables. these secondary stack variables can be safely eliminated if not used by anything.
|
|
194
197
|
self.secondary_stackvars: set[int] = set()
|
|
195
198
|
|
|
199
|
+
self.notes = notes if notes is not None else {}
|
|
200
|
+
|
|
196
201
|
#
|
|
197
202
|
# intermediate variables used during decompilation
|
|
198
203
|
#
|
|
@@ -212,6 +217,7 @@ class Clinic(Analysis):
|
|
|
212
217
|
self._inlining_parents = inlining_parents or ()
|
|
213
218
|
self._desired_variables = desired_variables
|
|
214
219
|
self._force_loop_single_exit = force_loop_single_exit
|
|
220
|
+
self._refine_loops_with_single_successor = refine_loops_with_single_successor
|
|
215
221
|
self._complete_successors = complete_successors
|
|
216
222
|
|
|
217
223
|
self._register_save_areas_removed: bool = False
|
|
@@ -1550,8 +1556,10 @@ class Clinic(Analysis):
|
|
|
1550
1556
|
entry_node_addr=self.entry_node_addr,
|
|
1551
1557
|
scratch=self.optimization_scratch,
|
|
1552
1558
|
force_loop_single_exit=self._force_loop_single_exit,
|
|
1559
|
+
refine_loops_with_single_successor=self._refine_loops_with_single_successor,
|
|
1553
1560
|
complete_successors=self._complete_successors,
|
|
1554
1561
|
stack_pointer_tracker=stack_pointer_tracker,
|
|
1562
|
+
notes=self.notes,
|
|
1555
1563
|
**kwargs,
|
|
1556
1564
|
)
|
|
1557
1565
|
if a.out_graph:
|
|
@@ -239,6 +239,24 @@ class ConditionProcessor:
|
|
|
239
239
|
condition translation if possible.
|
|
240
240
|
"""
|
|
241
241
|
|
|
242
|
+
if isinstance(src, SequenceNode) and src.nodes and isinstance(src.nodes[-1], ConditionNode):
|
|
243
|
+
cond_node = src.nodes[-1]
|
|
244
|
+
if (
|
|
245
|
+
isinstance(cond_node.true_node, ailment.Block)
|
|
246
|
+
and isinstance(cond_node.false_node, ailment.Block)
|
|
247
|
+
and cond_node.true_node.statements
|
|
248
|
+
and cond_node.false_node.statements
|
|
249
|
+
):
|
|
250
|
+
last_stmt_true = self.get_last_statement(cond_node.true_node)
|
|
251
|
+
last_stmt_false = self.get_last_statement(cond_node.false_node)
|
|
252
|
+
if (
|
|
253
|
+
isinstance(last_stmt_true, ailment.Stmt.Jump)
|
|
254
|
+
and isinstance(last_stmt_false, ailment.Stmt.Jump)
|
|
255
|
+
and isinstance(last_stmt_true.target, ailment.Expr.Const)
|
|
256
|
+
and isinstance(last_stmt_false.target, ailment.Expr.Const)
|
|
257
|
+
):
|
|
258
|
+
return {last_stmt_true.target.value, last_stmt_false.target.value} == {dst0.addr, dst1.addr}
|
|
259
|
+
|
|
242
260
|
if src in graph and graph.out_degree[src] == 2 and graph.has_edge(src, dst0) and graph.has_edge(src, dst1):
|
|
243
261
|
# sometimes the last statement is the conditional jump. sometimes it's the first statement of the block
|
|
244
262
|
if isinstance(src, ailment.Block) and src.statements and is_head_controlled_loop_block(src):
|
|
@@ -247,7 +265,10 @@ class ConditionProcessor:
|
|
|
247
265
|
)
|
|
248
266
|
assert last_stmt is not None
|
|
249
267
|
else:
|
|
250
|
-
|
|
268
|
+
try:
|
|
269
|
+
last_stmt = self.get_last_statement(src)
|
|
270
|
+
except EmptyBlockNotice:
|
|
271
|
+
last_stmt = None
|
|
251
272
|
|
|
252
273
|
if isinstance(last_stmt, ailment.Stmt.ConditionalJump):
|
|
253
274
|
return True
|
|
@@ -258,6 +279,28 @@ class ConditionProcessor:
|
|
|
258
279
|
return claripy.is_true(claripy.Not(edge_cond_left) == edge_cond_right) # type: ignore
|
|
259
280
|
|
|
260
281
|
def recover_edge_condition(self, graph: networkx.DiGraph, src, dst):
|
|
282
|
+
|
|
283
|
+
def _check_condnode_and_get_condition(cond_node: ConditionNode) -> claripy.ast.Bool | None:
|
|
284
|
+
for cond_block, negate in [(cond_node.true_node, False), (cond_node.false_node, True)]:
|
|
285
|
+
if isinstance(cond_block, ailment.Block) and cond_block.statements:
|
|
286
|
+
last_stmt = self.get_last_statement(cond_block)
|
|
287
|
+
if (
|
|
288
|
+
isinstance(last_stmt, ailment.Stmt.Jump)
|
|
289
|
+
and isinstance(last_stmt.target, ailment.Expr.Const)
|
|
290
|
+
and last_stmt.target.value == dst.addr
|
|
291
|
+
):
|
|
292
|
+
return claripy.Not(cond_node.condition) if negate else cond_node.condition
|
|
293
|
+
return None
|
|
294
|
+
|
|
295
|
+
if isinstance(src, SequenceNode) and src.nodes and isinstance(src.nodes[-1], ConditionNode):
|
|
296
|
+
predicate = _check_condnode_and_get_condition(src.nodes[-1])
|
|
297
|
+
if predicate is not None:
|
|
298
|
+
return predicate
|
|
299
|
+
if isinstance(src, ConditionNode):
|
|
300
|
+
predicate = _check_condnode_and_get_condition(src)
|
|
301
|
+
if predicate is not None:
|
|
302
|
+
return predicate
|
|
303
|
+
|
|
261
304
|
edge = src, dst
|
|
262
305
|
edge_data = graph.get_edge_data(*edge)
|
|
263
306
|
edge_type = edge_data.get("type", "transition") if edge_data is not None else "transition"
|
|
@@ -22,6 +22,7 @@ class DecompilationCache:
|
|
|
22
22
|
"errors",
|
|
23
23
|
"func_typevar",
|
|
24
24
|
"ite_exprs",
|
|
25
|
+
"notes",
|
|
25
26
|
"parameters",
|
|
26
27
|
"type_constraints",
|
|
27
28
|
"var_to_typevar",
|
|
@@ -38,6 +39,7 @@ class DecompilationCache:
|
|
|
38
39
|
self.ite_exprs: set[tuple[int, Any]] | None = None
|
|
39
40
|
self.binop_operators: dict[OpDescriptor, str] | None = None
|
|
40
41
|
self.errors: list[str] = []
|
|
42
|
+
self.notes: dict[str, str] = {}
|
|
41
43
|
|
|
42
44
|
@property
|
|
43
45
|
def local_types(self):
|
|
@@ -239,6 +239,16 @@ options = [
|
|
|
239
239
|
default_value=False,
|
|
240
240
|
clears_cache=True,
|
|
241
241
|
),
|
|
242
|
+
O(
|
|
243
|
+
"Display decompilation notes as comments",
|
|
244
|
+
"Display decompilation notes in the outpu as function comments.",
|
|
245
|
+
bool,
|
|
246
|
+
"codegen",
|
|
247
|
+
"display_notes",
|
|
248
|
+
category="Display",
|
|
249
|
+
default_value=False,
|
|
250
|
+
clears_cache=False,
|
|
251
|
+
),
|
|
242
252
|
O(
|
|
243
253
|
"Multi-expression statements generation",
|
|
244
254
|
"Should the structuring algorithm generate multi-expression statements? If so, under what conditions?",
|
|
@@ -21,12 +21,13 @@ from .region_identifier import RegionIdentifier
|
|
|
21
21
|
from .optimization_passes.optimization_pass import OptimizationPassStage
|
|
22
22
|
from .ailgraph_walker import AILGraphWalker
|
|
23
23
|
from .condition_processor import ConditionProcessor
|
|
24
|
-
from .decompilation_options import DecompilationOption
|
|
24
|
+
from .decompilation_options import DecompilationOption, PARAM_TO_OPTION
|
|
25
25
|
from .decompilation_cache import DecompilationCache
|
|
26
26
|
from .utils import remove_edges_in_ailgraph
|
|
27
27
|
from .sequence_walker import SequenceWalker
|
|
28
28
|
from .structuring.structurer_nodes import SequenceNode
|
|
29
29
|
from .presets import DECOMPILATION_PRESETS, DecompilationPreset
|
|
30
|
+
from .notes import DecompilationNote
|
|
30
31
|
|
|
31
32
|
if TYPE_CHECKING:
|
|
32
33
|
from angr.knowledge_plugins.cfg.cfg_model import CFGModel
|
|
@@ -80,7 +81,7 @@ class Decompiler(Analysis):
|
|
|
80
81
|
func = self.kb.functions[func]
|
|
81
82
|
self.func: Function = func
|
|
82
83
|
self._cfg = cfg.model if isinstance(cfg, CFGFast) else cfg
|
|
83
|
-
self._options = options
|
|
84
|
+
self._options = self._parse_options(options) if options else []
|
|
84
85
|
|
|
85
86
|
if preset is None and optimization_passes:
|
|
86
87
|
self._optimization_passes = optimization_passes
|
|
@@ -145,6 +146,7 @@ class Decompiler(Analysis):
|
|
|
145
146
|
self._copied_var_ids: set[int] = set()
|
|
146
147
|
self._optimization_scratch: dict[str, Any] = {}
|
|
147
148
|
self.expr_collapse_depth = expr_collapse_depth
|
|
149
|
+
self.notes: dict[str, DecompilationNote] = {}
|
|
148
150
|
|
|
149
151
|
if decompile:
|
|
150
152
|
with self._resilience():
|
|
@@ -171,6 +173,20 @@ class Decompiler(Analysis):
|
|
|
171
173
|
id_checks = {"cfg", "variable_kb"}
|
|
172
174
|
return all(a[k] is b[k] if k in id_checks else a[k] == b[k] for k in self._cache_parameters)
|
|
173
175
|
|
|
176
|
+
@staticmethod
|
|
177
|
+
def _parse_options(options: list[tuple[DecompilationOption | str, Any]]) -> list[tuple[DecompilationOption, Any]]:
|
|
178
|
+
"""
|
|
179
|
+
Parse the options and return a list of option tuples.
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
converted_options = []
|
|
183
|
+
for o, v in options:
|
|
184
|
+
if isinstance(o, str):
|
|
185
|
+
# convert to DecompilationOption
|
|
186
|
+
o = PARAM_TO_OPTION[o]
|
|
187
|
+
converted_options.append((o, v))
|
|
188
|
+
return converted_options
|
|
189
|
+
|
|
174
190
|
@timethis
|
|
175
191
|
def _decompile(self):
|
|
176
192
|
if self.func.is_simprocedure:
|
|
@@ -222,6 +238,7 @@ class Decompiler(Analysis):
|
|
|
222
238
|
# determine a few arguments according to the structuring algorithm
|
|
223
239
|
fold_callexprs_into_conditions = False
|
|
224
240
|
self._force_loop_single_exit = True
|
|
241
|
+
self._refine_loops_with_single_successor = False
|
|
225
242
|
self._complete_successors = False
|
|
226
243
|
self._recursive_structurer_params = self.options_to_params(self.options_by_class["recursive_structurer"])
|
|
227
244
|
if "structurer_cls" not in self._recursive_structurer_params:
|
|
@@ -229,6 +246,7 @@ class Decompiler(Analysis):
|
|
|
229
246
|
# is the algorithm based on Phoenix (a schema-based algorithm)?
|
|
230
247
|
if issubclass(self._recursive_structurer_params["structurer_cls"], PhoenixStructurer):
|
|
231
248
|
self._force_loop_single_exit = False
|
|
249
|
+
# self._refine_loops_with_single_successor = True
|
|
232
250
|
self._complete_successors = True
|
|
233
251
|
fold_callexprs_into_conditions = True
|
|
234
252
|
|
|
@@ -261,10 +279,12 @@ class Decompiler(Analysis):
|
|
|
261
279
|
desired_variables=self._desired_variables,
|
|
262
280
|
optimization_scratch=self._optimization_scratch,
|
|
263
281
|
force_loop_single_exit=self._force_loop_single_exit,
|
|
282
|
+
refine_loops_with_single_successor=self._refine_loops_with_single_successor,
|
|
264
283
|
complete_successors=self._complete_successors,
|
|
265
284
|
ail_graph=self._clinic_graph,
|
|
266
285
|
arg_vvars=self._clinic_arg_vvars,
|
|
267
286
|
start_stage=self._clinic_start_stage,
|
|
287
|
+
notes=self.notes,
|
|
268
288
|
**self.options_to_params(self.options_by_class["clinic"]),
|
|
269
289
|
)
|
|
270
290
|
else:
|
|
@@ -375,6 +395,7 @@ class Decompiler(Analysis):
|
|
|
375
395
|
const_formats=old_codegen.const_formats if old_codegen is not None else None,
|
|
376
396
|
externs=clinic.externs,
|
|
377
397
|
binop_depth_cutoff=self.expr_collapse_depth,
|
|
398
|
+
notes=self.notes,
|
|
378
399
|
**self.options_to_params(self.options_by_class["codegen"]),
|
|
379
400
|
)
|
|
380
401
|
|
|
@@ -396,6 +417,7 @@ class Decompiler(Analysis):
|
|
|
396
417
|
cond_proc=condition_processor,
|
|
397
418
|
update_graph=update_graph,
|
|
398
419
|
force_loop_single_exit=self._force_loop_single_exit,
|
|
420
|
+
refine_loops_with_single_successor=self._refine_loops_with_single_successor,
|
|
399
421
|
complete_successors=self._complete_successors,
|
|
400
422
|
entry_node_addr=self.clinic.entry_node_addr,
|
|
401
423
|
**self.options_to_params(self.options_by_class["region_identifier"]),
|
|
@@ -444,6 +466,7 @@ class Decompiler(Analysis):
|
|
|
444
466
|
entry_node_addr=self.clinic.entry_node_addr,
|
|
445
467
|
scratch=self._optimization_scratch,
|
|
446
468
|
force_loop_single_exit=self._force_loop_single_exit,
|
|
469
|
+
refine_loops_with_single_successor=self._refine_loops_with_single_successor,
|
|
447
470
|
complete_successors=self._complete_successors,
|
|
448
471
|
**kwargs,
|
|
449
472
|
)
|
|
@@ -507,6 +530,7 @@ class Decompiler(Analysis):
|
|
|
507
530
|
entry_node_addr=self.clinic.entry_node_addr,
|
|
508
531
|
scratch=self._optimization_scratch,
|
|
509
532
|
force_loop_single_exit=self._force_loop_single_exit,
|
|
533
|
+
refine_loops_with_single_successor=self._refine_loops_with_single_successor,
|
|
510
534
|
complete_successors=self._complete_successors,
|
|
511
535
|
peephole_optimizations=self._peephole_optimizations,
|
|
512
536
|
avoid_vvar_ids=self._copied_var_ids,
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from angr.ailment import Block
|
|
4
|
+
from .sequence_walker import SequenceWalker
|
|
5
|
+
from .structuring.structurer_nodes import BaseNode, SequenceNode, MultiNode
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class NodeReplacer(SequenceWalker):
|
|
9
|
+
"""
|
|
10
|
+
Replaces nodes in a node with new nodes based on a mapping.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, root: BaseNode, replacements: dict) -> None:
|
|
14
|
+
super().__init__(update_seqnode_in_place=False)
|
|
15
|
+
|
|
16
|
+
self.root = root
|
|
17
|
+
self.replacements = replacements
|
|
18
|
+
self.result: BaseNode = self.walk(self.root) # type:ignore
|
|
19
|
+
|
|
20
|
+
def _handle(self, node: BaseNode, **kwargs):
|
|
21
|
+
return self.replacements[node] if node in self.replacements else super()._handle(node, **kwargs)
|
|
22
|
+
|
|
23
|
+
def _handle_MultiNode(self, node: MultiNode, **kwargs):
|
|
24
|
+
changed = False
|
|
25
|
+
nodes_copy = list(node.nodes)
|
|
26
|
+
|
|
27
|
+
i = len(nodes_copy) - 1
|
|
28
|
+
has_non_block = False
|
|
29
|
+
while i > -1:
|
|
30
|
+
node_ = nodes_copy[i]
|
|
31
|
+
new_node = self._handle(node_, parent=node, index=i)
|
|
32
|
+
if new_node is not None:
|
|
33
|
+
changed = True
|
|
34
|
+
nodes_copy[i] = new_node
|
|
35
|
+
if not isinstance(new_node, Block):
|
|
36
|
+
has_non_block = True
|
|
37
|
+
i -= 1
|
|
38
|
+
if not changed:
|
|
39
|
+
return None
|
|
40
|
+
if has_non_block:
|
|
41
|
+
return SequenceNode(node.addr, nodes=nodes_copy)
|
|
42
|
+
return MultiNode(nodes_copy, addr=node.addr, idx=node.idx)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from enum import Enum
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DecompilationNoteLevel(Enum):
|
|
8
|
+
"""
|
|
9
|
+
Enum class describing the level of each decompilation note.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
DEBUG = 0
|
|
13
|
+
INFO = 1
|
|
14
|
+
WARNING = 2
|
|
15
|
+
CRITICAL = 3
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class DecompilationNote:
|
|
19
|
+
"""
|
|
20
|
+
Describes a note that is generated during decompilation.
|
|
21
|
+
|
|
22
|
+
Key is a unique string for the decompilation note. It is used as an index in the decompilation notes dictionary in
|
|
23
|
+
the Decompiler class.
|
|
24
|
+
Name is string for display by default.
|
|
25
|
+
Content is the actual content of the note. It can be of any time, but for custom types, you must override `__str__`
|
|
26
|
+
so that it can be displayed.
|
|
27
|
+
Level is the level of the note. The following values are available: DecompilationNoteLevel.DEBUG,
|
|
28
|
+
DecompilationNoteLevel.INFO, DecompilationNoteLevel.WARNING, and DecompilationNoteLevel.CRITICAL.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
__slots__ = (
|
|
32
|
+
"content",
|
|
33
|
+
"key",
|
|
34
|
+
"level",
|
|
35
|
+
"name",
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
def __init__(self, key: str, name: str, content: Any, *, level=DecompilationNoteLevel.INFO):
|
|
39
|
+
self.key = key
|
|
40
|
+
self.name = name
|
|
41
|
+
self.content = content
|
|
42
|
+
self.level = level
|
|
43
|
+
|
|
44
|
+
def __repr__(self):
|
|
45
|
+
return f"<DecompilationNote: {self.name}>"
|
|
46
|
+
|
|
47
|
+
def __str__(self):
|
|
48
|
+
return f"{self.name}: {self.content}"
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from .decompilation_note import DecompilationNote
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DeobfuscatedString:
|
|
7
|
+
"""
|
|
8
|
+
Represents a deobfuscated string.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
__slots__ = ("ref_addr", "type", "value")
|
|
12
|
+
|
|
13
|
+
def __init__(self, value: bytes, obf_type: str, ref_addr: int | None = None):
|
|
14
|
+
self.value = value
|
|
15
|
+
self.type = obf_type
|
|
16
|
+
self.ref_addr = ref_addr
|
|
17
|
+
|
|
18
|
+
def __repr__(self):
|
|
19
|
+
return (
|
|
20
|
+
f"<DeobfuscatedString Type{self.type} value={self.value!r} ref={self.ref_addr:#x}>"
|
|
21
|
+
if self.ref_addr is not None
|
|
22
|
+
else f"<DeobfuscatedString Type{self.type} value={self.value!r}>"
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
def __str__(self):
|
|
26
|
+
return repr(self.value)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class DeobfuscatedStringsNote(DecompilationNote):
|
|
30
|
+
"""
|
|
31
|
+
Represents a decompilation note that describes obfuscated strings found during decompilation.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, key: str = "deobfuscated_strings", name: str = "Deobfuscated Strings"):
|
|
35
|
+
super().__init__(key, name, None)
|
|
36
|
+
|
|
37
|
+
self.strings: dict[int, DeobfuscatedString] = {}
|
|
38
|
+
|
|
39
|
+
def add_string(self, obf_type: str, value: bytes, *, ref_addr: int):
|
|
40
|
+
"""
|
|
41
|
+
Add a deobfuscated string to the note.
|
|
42
|
+
|
|
43
|
+
:param obf_type: The type of obfuscation (e.g., "1", "2").
|
|
44
|
+
:param value: The deobfuscated string value.
|
|
45
|
+
:param ref_addr: The address where this string is referenced, if applicable.
|
|
46
|
+
"""
|
|
47
|
+
deobf_str = DeobfuscatedString(value, obf_type, ref_addr=ref_addr)
|
|
48
|
+
self.strings[ref_addr] = deobf_str
|
|
49
|
+
|
|
50
|
+
def __str__(self):
|
|
51
|
+
lines = ["Obfuscated strings are found in decompilation and have been deobfuscated:"]
|
|
52
|
+
for addr in sorted(self.strings):
|
|
53
|
+
deobf_str = self.strings[addr]
|
|
54
|
+
lines.append(f" Type {deobf_str.type} @ {deobf_str.ref_addr:#x}: {deobf_str.value!r}")
|
|
55
|
+
|
|
56
|
+
return "\n".join(lines)
|
|
@@ -135,11 +135,13 @@ class OptimizationPass(BaseOptimizationPass):
|
|
|
135
135
|
entry_node_addr=None,
|
|
136
136
|
scratch: dict[str, Any] | None = None,
|
|
137
137
|
force_loop_single_exit: bool = True,
|
|
138
|
+
refine_loops_with_single_successor: bool = False,
|
|
138
139
|
complete_successors: bool = False,
|
|
139
140
|
avoid_vvar_ids: set[int] | None = None,
|
|
140
141
|
arg_vvars: set[int] | None = None,
|
|
141
142
|
peephole_optimizations=None,
|
|
142
143
|
stack_pointer_tracker=None,
|
|
144
|
+
notes: dict | None = None,
|
|
143
145
|
**kwargs,
|
|
144
146
|
):
|
|
145
147
|
super().__init__(func)
|
|
@@ -158,10 +160,12 @@ class OptimizationPass(BaseOptimizationPass):
|
|
|
158
160
|
entry_node_addr if entry_node_addr is not None else (func.addr, None)
|
|
159
161
|
)
|
|
160
162
|
self._force_loop_single_exit = force_loop_single_exit
|
|
163
|
+
self._refine_loops_with_single_successor = refine_loops_with_single_successor
|
|
161
164
|
self._complete_successors = complete_successors
|
|
162
165
|
self._avoid_vvar_ids = avoid_vvar_ids or set()
|
|
163
166
|
self._peephole_optimizations = peephole_optimizations
|
|
164
167
|
self._stack_pointer_tracker = stack_pointer_tracker
|
|
168
|
+
self.notes = notes if notes is not None else {}
|
|
165
169
|
|
|
166
170
|
# output
|
|
167
171
|
self.out_graph: networkx.DiGraph | None = None
|
|
@@ -397,6 +401,7 @@ class OptimizationPass(BaseOptimizationPass):
|
|
|
397
401
|
cond_proc=condition_processor or ConditionProcessor(self.project.arch),
|
|
398
402
|
update_graph=update_graph,
|
|
399
403
|
force_loop_single_exit=self._force_loop_single_exit,
|
|
404
|
+
refine_loops_with_single_successor=self._refine_loops_with_single_successor,
|
|
400
405
|
complete_successors=self._complete_successors,
|
|
401
406
|
entry_node_addr=self.entry_node_addr,
|
|
402
407
|
)
|
|
@@ -6,7 +6,7 @@ from typing import Any
|
|
|
6
6
|
import networkx
|
|
7
7
|
|
|
8
8
|
from angr.ailment import Block
|
|
9
|
-
from angr.ailment.statement import ConditionalJump
|
|
9
|
+
from angr.ailment.statement import ConditionalJump
|
|
10
10
|
|
|
11
11
|
from .return_duplicator_base import ReturnDuplicatorBase
|
|
12
12
|
from .optimization_pass import StructuringOptimizationPass
|
|
@@ -53,7 +53,7 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
|
|
|
53
53
|
prevent_new_gotos: bool = True,
|
|
54
54
|
minimize_copies_for_regions: bool = True,
|
|
55
55
|
region_identifier=None,
|
|
56
|
-
vvar_id_start: int
|
|
56
|
+
vvar_id_start: int = 0,
|
|
57
57
|
scratch: dict[str, Any] | None = None,
|
|
58
58
|
max_func_blocks: int = 500,
|
|
59
59
|
**kwargs,
|
|
@@ -91,8 +91,9 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
|
|
|
91
91
|
self,
|
|
92
92
|
src: Block,
|
|
93
93
|
dst: Block,
|
|
94
|
-
graph: networkx.DiGraph = None,
|
|
95
94
|
max_level_check=1,
|
|
95
|
+
*,
|
|
96
|
+
graph: networkx.DiGraph,
|
|
96
97
|
):
|
|
97
98
|
"""
|
|
98
99
|
TODO: Implement a more principled way of checking if an edge is a goto edge with Phoenix's structuring info
|
|
@@ -100,6 +101,7 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
|
|
|
100
101
|
above a goto edge as the goto src.
|
|
101
102
|
"""
|
|
102
103
|
# Do a simple and fast check first
|
|
104
|
+
assert self._goto_manager is not None
|
|
103
105
|
is_simple_goto = self._goto_manager.is_goto_edge(src, dst)
|
|
104
106
|
if is_simple_goto:
|
|
105
107
|
return True
|
|
@@ -155,79 +157,6 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
|
|
|
155
157
|
# keep testing the next edge
|
|
156
158
|
node = succ
|
|
157
159
|
|
|
158
|
-
# Special case 3: In Phoenix, regions full of only if-stmts can be collapsed and moved. This causes
|
|
159
|
-
# the goto manager to report gotos that are at the top of the region instead of ones in the middle of it.
|
|
160
|
-
# Because of this, we need to gather all the nodes above the original src and check if any of them
|
|
161
|
-
# go to the destination. Additionally, we need to do this on the supergraph to get rid of
|
|
162
|
-
# goto edges that are removed by Phoenix.
|
|
163
|
-
# This case is observed in the test case `TestDecompiler.test_tail_tail_bytes_ret_dup`.
|
|
164
|
-
if self._supergraph is None:
|
|
165
|
-
return False
|
|
166
|
-
|
|
167
|
-
super_to_og_nodes = {n: self._supergraph.nodes[n]["original_nodes"] for n in self._supergraph.nodes}
|
|
168
|
-
og_to_super_nodes = {og: super_n for super_n, ogs in super_to_og_nodes.items() for og in ogs}
|
|
169
|
-
super_src = og_to_super_nodes.get(src)
|
|
170
|
-
super_dst = og_to_super_nodes.get(dst)
|
|
171
|
-
if super_src is None or super_dst is None:
|
|
172
|
-
return False
|
|
173
|
-
|
|
174
|
-
# collect all nodes which have only an if-stmt in them that are ancestors of super_src
|
|
175
|
-
check_blks = {super_src}
|
|
176
|
-
level_blocks = {super_src}
|
|
177
|
-
for _ in range(10):
|
|
178
|
-
done = False
|
|
179
|
-
if_blks = set()
|
|
180
|
-
for lblock in level_blocks:
|
|
181
|
-
preds = list(self._supergraph.predecessors(lblock))
|
|
182
|
-
for pred in preds:
|
|
183
|
-
only_cond_jump = all(isinstance(s, (ConditionalJump, Label)) for s in pred.statements)
|
|
184
|
-
if only_cond_jump:
|
|
185
|
-
if_blks.add(pred)
|
|
186
|
-
|
|
187
|
-
done = len(if_blks) == 0
|
|
188
|
-
|
|
189
|
-
if done:
|
|
190
|
-
break
|
|
191
|
-
|
|
192
|
-
check_blks |= if_blks
|
|
193
|
-
level_blocks = if_blks
|
|
194
|
-
|
|
195
|
-
# convert all the found if-only super-blocks back into their original blocks
|
|
196
|
-
og_check_blocks = set()
|
|
197
|
-
for blk in check_blks:
|
|
198
|
-
og_check_blocks |= set(super_to_og_nodes[blk])
|
|
199
|
-
|
|
200
|
-
# check if any of the original blocks are gotos to the destination
|
|
201
|
-
goto_hits = 0
|
|
202
|
-
for block in og_check_blocks:
|
|
203
|
-
if self._goto_manager.is_goto_edge(block, dst):
|
|
204
|
-
goto_hits += 1
|
|
205
|
-
|
|
206
|
-
# Although it is good to find a goto in the if-only block region, having more than a single goto
|
|
207
|
-
# existing that goes to the same dst is a bad sign. This can be seen in the the following test:
|
|
208
|
-
# TestDecompiler.test_dd_iread_ret_dup_region
|
|
209
|
-
#
|
|
210
|
-
# It occurs when you have something like:
|
|
211
|
-
# ```
|
|
212
|
-
# if (a || c)
|
|
213
|
-
# goto target;
|
|
214
|
-
# target:
|
|
215
|
-
# return 0;
|
|
216
|
-
# ```
|
|
217
|
-
#
|
|
218
|
-
#
|
|
219
|
-
# This looks like an edge from (a, target) and (c, target) but it is actually a single edge.
|
|
220
|
-
# If you allow both to duplicate you get the following:
|
|
221
|
-
# ```
|
|
222
|
-
# if (a):
|
|
223
|
-
# return
|
|
224
|
-
# if (c):
|
|
225
|
-
# return
|
|
226
|
-
# ```
|
|
227
|
-
# This is not the desired behavior.
|
|
228
|
-
# So we need to check if there is only a single goto that goes to the destination.
|
|
229
|
-
return goto_hits == 1
|
|
230
|
-
|
|
231
160
|
return False
|
|
232
161
|
|
|
233
162
|
def _analyze(self, cache=None):
|