angr 9.2.166__cp310-abi3-manylinux_2_28_aarch64.whl → 9.2.168__cp310-abi3-manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (35) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/cfg/cfb.py +7 -7
  3. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +8 -8
  4. angr/analyses/decompiler/clinic.py +8 -0
  5. angr/analyses/decompiler/condition_processor.py +44 -1
  6. angr/analyses/decompiler/decompilation_cache.py +2 -0
  7. angr/analyses/decompiler/decompilation_options.py +10 -0
  8. angr/analyses/decompiler/decompiler.py +26 -2
  9. angr/analyses/decompiler/node_replacer.py +42 -0
  10. angr/analyses/decompiler/notes/__init__.py +9 -0
  11. angr/analyses/decompiler/notes/decompilation_note.py +48 -0
  12. angr/analyses/decompiler/notes/deobfuscated_strings.py +56 -0
  13. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +1 -1
  14. angr/analyses/decompiler/optimization_passes/optimization_pass.py +5 -0
  15. angr/analyses/decompiler/optimization_passes/return_duplicator_low.py +5 -76
  16. angr/analyses/decompiler/region_identifier.py +12 -3
  17. angr/analyses/decompiler/sequence_walker.py +11 -7
  18. angr/analyses/decompiler/structured_codegen/base.py +34 -1
  19. angr/analyses/decompiler/structured_codegen/c.py +44 -10
  20. angr/analyses/decompiler/structuring/phoenix.py +645 -305
  21. angr/analyses/decompiler/structuring/structurer_base.py +75 -1
  22. angr/analyses/decompiler/utils.py +71 -28
  23. angr/analyses/deobfuscator/string_obf_finder.py +19 -16
  24. angr/analyses/deobfuscator/string_obf_opt_passes.py +6 -3
  25. angr/analyses/reaching_definitions/engine_vex.py +3 -2
  26. angr/procedures/glibc/scanf.py +8 -0
  27. angr/procedures/glibc/sscanf.py +4 -0
  28. angr/rustylib.abi3.so +0 -0
  29. angr/utils/graph.py +62 -24
  30. {angr-9.2.166.dist-info → angr-9.2.168.dist-info}/METADATA +5 -5
  31. {angr-9.2.166.dist-info → angr-9.2.168.dist-info}/RECORD +35 -31
  32. {angr-9.2.166.dist-info → angr-9.2.168.dist-info}/WHEEL +0 -0
  33. {angr-9.2.166.dist-info → angr-9.2.168.dist-info}/entry_points.txt +0 -0
  34. {angr-9.2.166.dist-info → angr-9.2.168.dist-info}/licenses/LICENSE +0 -0
  35. {angr-9.2.166.dist-info → angr-9.2.168.dist-info}/top_level.txt +0 -0
angr/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
2
  # pylint: disable=wrong-import-position
3
3
  from __future__ import annotations
4
4
 
5
- __version__ = "9.2.166"
5
+ __version__ = "9.2.168"
6
6
 
7
7
  if bytes is str:
8
8
  raise Exception(
angr/analyses/cfg/cfb.py CHANGED
@@ -119,19 +119,19 @@ class CFBlanket(Analysis):
119
119
  def _init_regions(self):
120
120
  for obj in self.project.loader.all_objects:
121
121
  if isinstance(obj, cle.MetaELF):
122
- if obj.sections:
122
+ if obj.segments:
123
+ if "segment" not in self._exclude_region_types:
124
+ for segment in obj.segments:
125
+ if segment.memsize > 0:
126
+ mr = MemoryRegion(segment.vaddr, segment.memsize, "segment", obj, segment)
127
+ self._regions.append(mr)
128
+ elif obj.sections:
123
129
  if "section" not in self._exclude_region_types:
124
130
  # Enumerate sections in an ELF file
125
131
  for section in obj.sections:
126
132
  if section.occupies_memory:
127
133
  mr = MemoryRegion(section.vaddr, section.memsize, "section", obj, section)
128
134
  self._regions.append(mr)
129
- elif obj.segments:
130
- if "segment" not in self._exclude_region_types:
131
- for segment in obj.segments:
132
- if segment.memsize > 0:
133
- mr = MemoryRegion(segment.vaddr, segment.memsize, "segment", obj, segment)
134
- self._regions.append(mr)
135
135
  else:
136
136
  raise NotImplementedError(
137
137
  "Currently ELFs without sections or segments are not supported. Please "
@@ -920,32 +920,32 @@ class JumpTableResolver(IndirectJumpResolver):
920
920
  # more sanity checks
921
921
 
922
922
  # for a typical jump table, the current block has only one predecessor, and the predecessor to the current
923
- # block has two successors (not including itself)
923
+ # block has two successors
924
924
  # for a typical vtable call (or jump if at the end of a function), the block as two predecessors that form a
925
925
  # diamond shape
926
926
  curr_node = func.get_node(addr)
927
- if curr_node is None or curr_node not in func.graph:
927
+ if curr_node is None or curr_node not in func.transition_graph:
928
928
  l.debug("Could not find the node %#x in the function transition graph", addr)
929
929
  return False, None
930
- preds = list(func.graph.predecessors(curr_node))
930
+ preds = list(func.transition_graph.predecessors(curr_node))
931
931
  pred_endaddrs = {pred.addr + pred.size for pred in preds} # handle non-normalized CFGs
932
932
  if func_graph_complete and not is_arm and not potential_call_table:
933
933
  # on ARM you can do a single-block jump table...
934
934
  if len(pred_endaddrs) == 1:
935
- pred_succs = [succ for succ in func.graph.successors(preds[0]) if succ.addr != preds[0].addr]
935
+ pred_succs = [succ for succ in func.transition_graph.successors(preds[0]) if succ.addr != preds[0].addr]
936
936
  if len(pred_succs) != 2:
937
937
  l.debug("Expect two successors to the single predecessor, found %d.", len(pred_succs))
938
938
  return False, None
939
939
  elif len(pred_endaddrs) == 2 and len(preds) == 2:
940
940
  pred_succs = set(
941
- [succ for succ in func.graph.successors(preds[0]) if succ.addr != preds[0].addr]
942
- + [succ for succ in func.graph.successors(preds[1]) if succ.addr != preds[1].addr]
941
+ [succ for succ in func.transition_graph.successors(preds[0]) if succ.addr != preds[0].addr]
942
+ + [succ for succ in func.transition_graph.successors(preds[1]) if succ.addr != preds[1].addr]
943
943
  )
944
944
  is_diamond = False
945
945
  if len(pred_succs) == 2:
946
946
  non_node_succ = next(iter(pred_succ for pred_succ in pred_succs if pred_succ is not curr_node))
947
- while func.graph.out_degree[non_node_succ] == 1:
948
- non_node_succ = next(iter(func.graph.successors(non_node_succ)))
947
+ while func.transition_graph.out_degree[non_node_succ] == 1:
948
+ non_node_succ = next(iter(func.transition_graph.successors(non_node_succ)))
949
949
  if non_node_succ == curr_node:
950
950
  is_diamond = True
951
951
  break
@@ -55,6 +55,7 @@ from .optimization_passes import (
55
55
 
56
56
  if TYPE_CHECKING:
57
57
  from angr.knowledge_plugins.cfg import CFGModel
58
+ from .notes import DecompilationNote
58
59
  from .decompilation_cache import DecompilationCache
59
60
  from .peephole_optimizations import PeepholeOptimizationStmtBase, PeepholeOptimizationExprBase
60
61
 
@@ -142,12 +143,14 @@ class Clinic(Analysis):
142
143
  optimization_scratch: dict[str, Any] | None = None,
143
144
  desired_variables: set[str] | None = None,
144
145
  force_loop_single_exit: bool = True,
146
+ refine_loops_with_single_successor: bool = False,
145
147
  complete_successors: bool = False,
146
148
  max_type_constraints: int = 100_000,
147
149
  type_constraint_set_degradation_threshold: int = 150,
148
150
  ail_graph: networkx.DiGraph | None = None,
149
151
  arg_vvars: dict[int, tuple[ailment.Expr.VirtualVariable, SimVariable]] | None = None,
150
152
  start_stage: ClinicStage | None = ClinicStage.INITIALIZATION,
153
+ notes: dict[str, DecompilationNote] | None = None,
151
154
  ):
152
155
  if not func.normalized and mode == ClinicMode.DECOMPILE:
153
156
  raise ValueError("Decompilation must work on normalized function graphs.")
@@ -193,6 +196,8 @@ class Clinic(Analysis):
193
196
  # actual stack variables. these secondary stack variables can be safely eliminated if not used by anything.
194
197
  self.secondary_stackvars: set[int] = set()
195
198
 
199
+ self.notes = notes if notes is not None else {}
200
+
196
201
  #
197
202
  # intermediate variables used during decompilation
198
203
  #
@@ -212,6 +217,7 @@ class Clinic(Analysis):
212
217
  self._inlining_parents = inlining_parents or ()
213
218
  self._desired_variables = desired_variables
214
219
  self._force_loop_single_exit = force_loop_single_exit
220
+ self._refine_loops_with_single_successor = refine_loops_with_single_successor
215
221
  self._complete_successors = complete_successors
216
222
 
217
223
  self._register_save_areas_removed: bool = False
@@ -1550,8 +1556,10 @@ class Clinic(Analysis):
1550
1556
  entry_node_addr=self.entry_node_addr,
1551
1557
  scratch=self.optimization_scratch,
1552
1558
  force_loop_single_exit=self._force_loop_single_exit,
1559
+ refine_loops_with_single_successor=self._refine_loops_with_single_successor,
1553
1560
  complete_successors=self._complete_successors,
1554
1561
  stack_pointer_tracker=stack_pointer_tracker,
1562
+ notes=self.notes,
1555
1563
  **kwargs,
1556
1564
  )
1557
1565
  if a.out_graph:
@@ -239,6 +239,24 @@ class ConditionProcessor:
239
239
  condition translation if possible.
240
240
  """
241
241
 
242
+ if isinstance(src, SequenceNode) and src.nodes and isinstance(src.nodes[-1], ConditionNode):
243
+ cond_node = src.nodes[-1]
244
+ if (
245
+ isinstance(cond_node.true_node, ailment.Block)
246
+ and isinstance(cond_node.false_node, ailment.Block)
247
+ and cond_node.true_node.statements
248
+ and cond_node.false_node.statements
249
+ ):
250
+ last_stmt_true = self.get_last_statement(cond_node.true_node)
251
+ last_stmt_false = self.get_last_statement(cond_node.false_node)
252
+ if (
253
+ isinstance(last_stmt_true, ailment.Stmt.Jump)
254
+ and isinstance(last_stmt_false, ailment.Stmt.Jump)
255
+ and isinstance(last_stmt_true.target, ailment.Expr.Const)
256
+ and isinstance(last_stmt_false.target, ailment.Expr.Const)
257
+ ):
258
+ return {last_stmt_true.target.value, last_stmt_false.target.value} == {dst0.addr, dst1.addr}
259
+
242
260
  if src in graph and graph.out_degree[src] == 2 and graph.has_edge(src, dst0) and graph.has_edge(src, dst1):
243
261
  # sometimes the last statement is the conditional jump. sometimes it's the first statement of the block
244
262
  if isinstance(src, ailment.Block) and src.statements and is_head_controlled_loop_block(src):
@@ -247,7 +265,10 @@ class ConditionProcessor:
247
265
  )
248
266
  assert last_stmt is not None
249
267
  else:
250
- last_stmt = self.get_last_statement(src)
268
+ try:
269
+ last_stmt = self.get_last_statement(src)
270
+ except EmptyBlockNotice:
271
+ last_stmt = None
251
272
 
252
273
  if isinstance(last_stmt, ailment.Stmt.ConditionalJump):
253
274
  return True
@@ -258,6 +279,28 @@ class ConditionProcessor:
258
279
  return claripy.is_true(claripy.Not(edge_cond_left) == edge_cond_right) # type: ignore
259
280
 
260
281
  def recover_edge_condition(self, graph: networkx.DiGraph, src, dst):
282
+
283
+ def _check_condnode_and_get_condition(cond_node: ConditionNode) -> claripy.ast.Bool | None:
284
+ for cond_block, negate in [(cond_node.true_node, False), (cond_node.false_node, True)]:
285
+ if isinstance(cond_block, ailment.Block) and cond_block.statements:
286
+ last_stmt = self.get_last_statement(cond_block)
287
+ if (
288
+ isinstance(last_stmt, ailment.Stmt.Jump)
289
+ and isinstance(last_stmt.target, ailment.Expr.Const)
290
+ and last_stmt.target.value == dst.addr
291
+ ):
292
+ return claripy.Not(cond_node.condition) if negate else cond_node.condition
293
+ return None
294
+
295
+ if isinstance(src, SequenceNode) and src.nodes and isinstance(src.nodes[-1], ConditionNode):
296
+ predicate = _check_condnode_and_get_condition(src.nodes[-1])
297
+ if predicate is not None:
298
+ return predicate
299
+ if isinstance(src, ConditionNode):
300
+ predicate = _check_condnode_and_get_condition(src)
301
+ if predicate is not None:
302
+ return predicate
303
+
261
304
  edge = src, dst
262
305
  edge_data = graph.get_edge_data(*edge)
263
306
  edge_type = edge_data.get("type", "transition") if edge_data is not None else "transition"
@@ -22,6 +22,7 @@ class DecompilationCache:
22
22
  "errors",
23
23
  "func_typevar",
24
24
  "ite_exprs",
25
+ "notes",
25
26
  "parameters",
26
27
  "type_constraints",
27
28
  "var_to_typevar",
@@ -38,6 +39,7 @@ class DecompilationCache:
38
39
  self.ite_exprs: set[tuple[int, Any]] | None = None
39
40
  self.binop_operators: dict[OpDescriptor, str] | None = None
40
41
  self.errors: list[str] = []
42
+ self.notes: dict[str, str] = {}
41
43
 
42
44
  @property
43
45
  def local_types(self):
@@ -239,6 +239,16 @@ options = [
239
239
  default_value=False,
240
240
  clears_cache=True,
241
241
  ),
242
+ O(
243
+ "Display decompilation notes as comments",
244
+ "Display decompilation notes in the outpu as function comments.",
245
+ bool,
246
+ "codegen",
247
+ "display_notes",
248
+ category="Display",
249
+ default_value=False,
250
+ clears_cache=False,
251
+ ),
242
252
  O(
243
253
  "Multi-expression statements generation",
244
254
  "Should the structuring algorithm generate multi-expression statements? If so, under what conditions?",
@@ -21,12 +21,13 @@ from .region_identifier import RegionIdentifier
21
21
  from .optimization_passes.optimization_pass import OptimizationPassStage
22
22
  from .ailgraph_walker import AILGraphWalker
23
23
  from .condition_processor import ConditionProcessor
24
- from .decompilation_options import DecompilationOption
24
+ from .decompilation_options import DecompilationOption, PARAM_TO_OPTION
25
25
  from .decompilation_cache import DecompilationCache
26
26
  from .utils import remove_edges_in_ailgraph
27
27
  from .sequence_walker import SequenceWalker
28
28
  from .structuring.structurer_nodes import SequenceNode
29
29
  from .presets import DECOMPILATION_PRESETS, DecompilationPreset
30
+ from .notes import DecompilationNote
30
31
 
31
32
  if TYPE_CHECKING:
32
33
  from angr.knowledge_plugins.cfg.cfg_model import CFGModel
@@ -80,7 +81,7 @@ class Decompiler(Analysis):
80
81
  func = self.kb.functions[func]
81
82
  self.func: Function = func
82
83
  self._cfg = cfg.model if isinstance(cfg, CFGFast) else cfg
83
- self._options = options or []
84
+ self._options = self._parse_options(options) if options else []
84
85
 
85
86
  if preset is None and optimization_passes:
86
87
  self._optimization_passes = optimization_passes
@@ -145,6 +146,7 @@ class Decompiler(Analysis):
145
146
  self._copied_var_ids: set[int] = set()
146
147
  self._optimization_scratch: dict[str, Any] = {}
147
148
  self.expr_collapse_depth = expr_collapse_depth
149
+ self.notes: dict[str, DecompilationNote] = {}
148
150
 
149
151
  if decompile:
150
152
  with self._resilience():
@@ -171,6 +173,20 @@ class Decompiler(Analysis):
171
173
  id_checks = {"cfg", "variable_kb"}
172
174
  return all(a[k] is b[k] if k in id_checks else a[k] == b[k] for k in self._cache_parameters)
173
175
 
176
+ @staticmethod
177
+ def _parse_options(options: list[tuple[DecompilationOption | str, Any]]) -> list[tuple[DecompilationOption, Any]]:
178
+ """
179
+ Parse the options and return a list of option tuples.
180
+ """
181
+
182
+ converted_options = []
183
+ for o, v in options:
184
+ if isinstance(o, str):
185
+ # convert to DecompilationOption
186
+ o = PARAM_TO_OPTION[o]
187
+ converted_options.append((o, v))
188
+ return converted_options
189
+
174
190
  @timethis
175
191
  def _decompile(self):
176
192
  if self.func.is_simprocedure:
@@ -222,6 +238,7 @@ class Decompiler(Analysis):
222
238
  # determine a few arguments according to the structuring algorithm
223
239
  fold_callexprs_into_conditions = False
224
240
  self._force_loop_single_exit = True
241
+ self._refine_loops_with_single_successor = False
225
242
  self._complete_successors = False
226
243
  self._recursive_structurer_params = self.options_to_params(self.options_by_class["recursive_structurer"])
227
244
  if "structurer_cls" not in self._recursive_structurer_params:
@@ -229,6 +246,7 @@ class Decompiler(Analysis):
229
246
  # is the algorithm based on Phoenix (a schema-based algorithm)?
230
247
  if issubclass(self._recursive_structurer_params["structurer_cls"], PhoenixStructurer):
231
248
  self._force_loop_single_exit = False
249
+ # self._refine_loops_with_single_successor = True
232
250
  self._complete_successors = True
233
251
  fold_callexprs_into_conditions = True
234
252
 
@@ -261,10 +279,12 @@ class Decompiler(Analysis):
261
279
  desired_variables=self._desired_variables,
262
280
  optimization_scratch=self._optimization_scratch,
263
281
  force_loop_single_exit=self._force_loop_single_exit,
282
+ refine_loops_with_single_successor=self._refine_loops_with_single_successor,
264
283
  complete_successors=self._complete_successors,
265
284
  ail_graph=self._clinic_graph,
266
285
  arg_vvars=self._clinic_arg_vvars,
267
286
  start_stage=self._clinic_start_stage,
287
+ notes=self.notes,
268
288
  **self.options_to_params(self.options_by_class["clinic"]),
269
289
  )
270
290
  else:
@@ -375,6 +395,7 @@ class Decompiler(Analysis):
375
395
  const_formats=old_codegen.const_formats if old_codegen is not None else None,
376
396
  externs=clinic.externs,
377
397
  binop_depth_cutoff=self.expr_collapse_depth,
398
+ notes=self.notes,
378
399
  **self.options_to_params(self.options_by_class["codegen"]),
379
400
  )
380
401
 
@@ -396,6 +417,7 @@ class Decompiler(Analysis):
396
417
  cond_proc=condition_processor,
397
418
  update_graph=update_graph,
398
419
  force_loop_single_exit=self._force_loop_single_exit,
420
+ refine_loops_with_single_successor=self._refine_loops_with_single_successor,
399
421
  complete_successors=self._complete_successors,
400
422
  entry_node_addr=self.clinic.entry_node_addr,
401
423
  **self.options_to_params(self.options_by_class["region_identifier"]),
@@ -444,6 +466,7 @@ class Decompiler(Analysis):
444
466
  entry_node_addr=self.clinic.entry_node_addr,
445
467
  scratch=self._optimization_scratch,
446
468
  force_loop_single_exit=self._force_loop_single_exit,
469
+ refine_loops_with_single_successor=self._refine_loops_with_single_successor,
447
470
  complete_successors=self._complete_successors,
448
471
  **kwargs,
449
472
  )
@@ -507,6 +530,7 @@ class Decompiler(Analysis):
507
530
  entry_node_addr=self.clinic.entry_node_addr,
508
531
  scratch=self._optimization_scratch,
509
532
  force_loop_single_exit=self._force_loop_single_exit,
533
+ refine_loops_with_single_successor=self._refine_loops_with_single_successor,
510
534
  complete_successors=self._complete_successors,
511
535
  peephole_optimizations=self._peephole_optimizations,
512
536
  avoid_vvar_ids=self._copied_var_ids,
@@ -0,0 +1,42 @@
1
+ from __future__ import annotations
2
+
3
+ from angr.ailment import Block
4
+ from .sequence_walker import SequenceWalker
5
+ from .structuring.structurer_nodes import BaseNode, SequenceNode, MultiNode
6
+
7
+
8
+ class NodeReplacer(SequenceWalker):
9
+ """
10
+ Replaces nodes in a node with new nodes based on a mapping.
11
+ """
12
+
13
+ def __init__(self, root: BaseNode, replacements: dict) -> None:
14
+ super().__init__(update_seqnode_in_place=False)
15
+
16
+ self.root = root
17
+ self.replacements = replacements
18
+ self.result: BaseNode = self.walk(self.root) # type:ignore
19
+
20
+ def _handle(self, node: BaseNode, **kwargs):
21
+ return self.replacements[node] if node in self.replacements else super()._handle(node, **kwargs)
22
+
23
+ def _handle_MultiNode(self, node: MultiNode, **kwargs):
24
+ changed = False
25
+ nodes_copy = list(node.nodes)
26
+
27
+ i = len(nodes_copy) - 1
28
+ has_non_block = False
29
+ while i > -1:
30
+ node_ = nodes_copy[i]
31
+ new_node = self._handle(node_, parent=node, index=i)
32
+ if new_node is not None:
33
+ changed = True
34
+ nodes_copy[i] = new_node
35
+ if not isinstance(new_node, Block):
36
+ has_non_block = True
37
+ i -= 1
38
+ if not changed:
39
+ return None
40
+ if has_non_block:
41
+ return SequenceNode(node.addr, nodes=nodes_copy)
42
+ return MultiNode(nodes_copy, addr=node.addr, idx=node.idx)
@@ -0,0 +1,9 @@
1
+ from __future__ import annotations
2
+
3
+ from .decompilation_note import DecompilationNote, DecompilationNoteLevel
4
+
5
+
6
+ __all__ = (
7
+ "DecompilationNote",
8
+ "DecompilationNoteLevel",
9
+ )
@@ -0,0 +1,48 @@
1
+ from __future__ import annotations
2
+ from typing import Any
3
+
4
+ from enum import Enum
5
+
6
+
7
+ class DecompilationNoteLevel(Enum):
8
+ """
9
+ Enum class describing the level of each decompilation note.
10
+ """
11
+
12
+ DEBUG = 0
13
+ INFO = 1
14
+ WARNING = 2
15
+ CRITICAL = 3
16
+
17
+
18
+ class DecompilationNote:
19
+ """
20
+ Describes a note that is generated during decompilation.
21
+
22
+ Key is a unique string for the decompilation note. It is used as an index in the decompilation notes dictionary in
23
+ the Decompiler class.
24
+ Name is string for display by default.
25
+ Content is the actual content of the note. It can be of any time, but for custom types, you must override `__str__`
26
+ so that it can be displayed.
27
+ Level is the level of the note. The following values are available: DecompilationNoteLevel.DEBUG,
28
+ DecompilationNoteLevel.INFO, DecompilationNoteLevel.WARNING, and DecompilationNoteLevel.CRITICAL.
29
+ """
30
+
31
+ __slots__ = (
32
+ "content",
33
+ "key",
34
+ "level",
35
+ "name",
36
+ )
37
+
38
+ def __init__(self, key: str, name: str, content: Any, *, level=DecompilationNoteLevel.INFO):
39
+ self.key = key
40
+ self.name = name
41
+ self.content = content
42
+ self.level = level
43
+
44
+ def __repr__(self):
45
+ return f"<DecompilationNote: {self.name}>"
46
+
47
+ def __str__(self):
48
+ return f"{self.name}: {self.content}"
@@ -0,0 +1,56 @@
1
+ from __future__ import annotations
2
+
3
+ from .decompilation_note import DecompilationNote
4
+
5
+
6
+ class DeobfuscatedString:
7
+ """
8
+ Represents a deobfuscated string.
9
+ """
10
+
11
+ __slots__ = ("ref_addr", "type", "value")
12
+
13
+ def __init__(self, value: bytes, obf_type: str, ref_addr: int | None = None):
14
+ self.value = value
15
+ self.type = obf_type
16
+ self.ref_addr = ref_addr
17
+
18
+ def __repr__(self):
19
+ return (
20
+ f"<DeobfuscatedString Type{self.type} value={self.value!r} ref={self.ref_addr:#x}>"
21
+ if self.ref_addr is not None
22
+ else f"<DeobfuscatedString Type{self.type} value={self.value!r}>"
23
+ )
24
+
25
+ def __str__(self):
26
+ return repr(self.value)
27
+
28
+
29
+ class DeobfuscatedStringsNote(DecompilationNote):
30
+ """
31
+ Represents a decompilation note that describes obfuscated strings found during decompilation.
32
+ """
33
+
34
+ def __init__(self, key: str = "deobfuscated_strings", name: str = "Deobfuscated Strings"):
35
+ super().__init__(key, name, None)
36
+
37
+ self.strings: dict[int, DeobfuscatedString] = {}
38
+
39
+ def add_string(self, obf_type: str, value: bytes, *, ref_addr: int):
40
+ """
41
+ Add a deobfuscated string to the note.
42
+
43
+ :param obf_type: The type of obfuscation (e.g., "1", "2").
44
+ :param value: The deobfuscated string value.
45
+ :param ref_addr: The address where this string is referenced, if applicable.
46
+ """
47
+ deobf_str = DeobfuscatedString(value, obf_type, ref_addr=ref_addr)
48
+ self.strings[ref_addr] = deobf_str
49
+
50
+ def __str__(self):
51
+ lines = ["Obfuscated strings are found in decompilation and have been deobfuscated:"]
52
+ for addr in sorted(self.strings):
53
+ deobf_str = self.strings[addr]
54
+ lines.append(f" Type {deobf_str.type} @ {deobf_str.ref_addr:#x}: {deobf_str.value!r}")
55
+
56
+ return "\n".join(lines)
@@ -163,7 +163,7 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
163
163
  require_gotos=False,
164
164
  prevent_new_gotos=False,
165
165
  simplify_ail=False,
166
- must_improve_rel_quality=True,
166
+ must_improve_rel_quality=False,
167
167
  **kwargs,
168
168
  )
169
169
 
@@ -135,11 +135,13 @@ class OptimizationPass(BaseOptimizationPass):
135
135
  entry_node_addr=None,
136
136
  scratch: dict[str, Any] | None = None,
137
137
  force_loop_single_exit: bool = True,
138
+ refine_loops_with_single_successor: bool = False,
138
139
  complete_successors: bool = False,
139
140
  avoid_vvar_ids: set[int] | None = None,
140
141
  arg_vvars: set[int] | None = None,
141
142
  peephole_optimizations=None,
142
143
  stack_pointer_tracker=None,
144
+ notes: dict | None = None,
143
145
  **kwargs,
144
146
  ):
145
147
  super().__init__(func)
@@ -158,10 +160,12 @@ class OptimizationPass(BaseOptimizationPass):
158
160
  entry_node_addr if entry_node_addr is not None else (func.addr, None)
159
161
  )
160
162
  self._force_loop_single_exit = force_loop_single_exit
163
+ self._refine_loops_with_single_successor = refine_loops_with_single_successor
161
164
  self._complete_successors = complete_successors
162
165
  self._avoid_vvar_ids = avoid_vvar_ids or set()
163
166
  self._peephole_optimizations = peephole_optimizations
164
167
  self._stack_pointer_tracker = stack_pointer_tracker
168
+ self.notes = notes if notes is not None else {}
165
169
 
166
170
  # output
167
171
  self.out_graph: networkx.DiGraph | None = None
@@ -397,6 +401,7 @@ class OptimizationPass(BaseOptimizationPass):
397
401
  cond_proc=condition_processor or ConditionProcessor(self.project.arch),
398
402
  update_graph=update_graph,
399
403
  force_loop_single_exit=self._force_loop_single_exit,
404
+ refine_loops_with_single_successor=self._refine_loops_with_single_successor,
400
405
  complete_successors=self._complete_successors,
401
406
  entry_node_addr=self.entry_node_addr,
402
407
  )
@@ -6,7 +6,7 @@ from typing import Any
6
6
  import networkx
7
7
 
8
8
  from angr.ailment import Block
9
- from angr.ailment.statement import ConditionalJump, Label
9
+ from angr.ailment.statement import ConditionalJump
10
10
 
11
11
  from .return_duplicator_base import ReturnDuplicatorBase
12
12
  from .optimization_pass import StructuringOptimizationPass
@@ -53,7 +53,7 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
53
53
  prevent_new_gotos: bool = True,
54
54
  minimize_copies_for_regions: bool = True,
55
55
  region_identifier=None,
56
- vvar_id_start: int | None = None,
56
+ vvar_id_start: int = 0,
57
57
  scratch: dict[str, Any] | None = None,
58
58
  max_func_blocks: int = 500,
59
59
  **kwargs,
@@ -91,8 +91,9 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
91
91
  self,
92
92
  src: Block,
93
93
  dst: Block,
94
- graph: networkx.DiGraph = None,
95
94
  max_level_check=1,
95
+ *,
96
+ graph: networkx.DiGraph,
96
97
  ):
97
98
  """
98
99
  TODO: Implement a more principled way of checking if an edge is a goto edge with Phoenix's structuring info
@@ -100,6 +101,7 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
100
101
  above a goto edge as the goto src.
101
102
  """
102
103
  # Do a simple and fast check first
104
+ assert self._goto_manager is not None
103
105
  is_simple_goto = self._goto_manager.is_goto_edge(src, dst)
104
106
  if is_simple_goto:
105
107
  return True
@@ -155,79 +157,6 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
155
157
  # keep testing the next edge
156
158
  node = succ
157
159
 
158
- # Special case 3: In Phoenix, regions full of only if-stmts can be collapsed and moved. This causes
159
- # the goto manager to report gotos that are at the top of the region instead of ones in the middle of it.
160
- # Because of this, we need to gather all the nodes above the original src and check if any of them
161
- # go to the destination. Additionally, we need to do this on the supergraph to get rid of
162
- # goto edges that are removed by Phoenix.
163
- # This case is observed in the test case `TestDecompiler.test_tail_tail_bytes_ret_dup`.
164
- if self._supergraph is None:
165
- return False
166
-
167
- super_to_og_nodes = {n: self._supergraph.nodes[n]["original_nodes"] for n in self._supergraph.nodes}
168
- og_to_super_nodes = {og: super_n for super_n, ogs in super_to_og_nodes.items() for og in ogs}
169
- super_src = og_to_super_nodes.get(src)
170
- super_dst = og_to_super_nodes.get(dst)
171
- if super_src is None or super_dst is None:
172
- return False
173
-
174
- # collect all nodes which have only an if-stmt in them that are ancestors of super_src
175
- check_blks = {super_src}
176
- level_blocks = {super_src}
177
- for _ in range(10):
178
- done = False
179
- if_blks = set()
180
- for lblock in level_blocks:
181
- preds = list(self._supergraph.predecessors(lblock))
182
- for pred in preds:
183
- only_cond_jump = all(isinstance(s, (ConditionalJump, Label)) for s in pred.statements)
184
- if only_cond_jump:
185
- if_blks.add(pred)
186
-
187
- done = len(if_blks) == 0
188
-
189
- if done:
190
- break
191
-
192
- check_blks |= if_blks
193
- level_blocks = if_blks
194
-
195
- # convert all the found if-only super-blocks back into their original blocks
196
- og_check_blocks = set()
197
- for blk in check_blks:
198
- og_check_blocks |= set(super_to_og_nodes[blk])
199
-
200
- # check if any of the original blocks are gotos to the destination
201
- goto_hits = 0
202
- for block in og_check_blocks:
203
- if self._goto_manager.is_goto_edge(block, dst):
204
- goto_hits += 1
205
-
206
- # Although it is good to find a goto in the if-only block region, having more than a single goto
207
- # existing that goes to the same dst is a bad sign. This can be seen in the the following test:
208
- # TestDecompiler.test_dd_iread_ret_dup_region
209
- #
210
- # It occurs when you have something like:
211
- # ```
212
- # if (a || c)
213
- # goto target;
214
- # target:
215
- # return 0;
216
- # ```
217
- #
218
- #
219
- # This looks like an edge from (a, target) and (c, target) but it is actually a single edge.
220
- # If you allow both to duplicate you get the following:
221
- # ```
222
- # if (a):
223
- # return
224
- # if (c):
225
- # return
226
- # ```
227
- # This is not the desired behavior.
228
- # So we need to check if there is only a single goto that goes to the destination.
229
- return goto_hits == 1
230
-
231
160
  return False
232
161
 
233
162
  def _analyze(self, cache=None):