angr 9.2.166__cp310-abi3-manylinux_2_28_aarch64.whl → 9.2.168__cp310-abi3-manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (35) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/cfg/cfb.py +7 -7
  3. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +8 -8
  4. angr/analyses/decompiler/clinic.py +8 -0
  5. angr/analyses/decompiler/condition_processor.py +44 -1
  6. angr/analyses/decompiler/decompilation_cache.py +2 -0
  7. angr/analyses/decompiler/decompilation_options.py +10 -0
  8. angr/analyses/decompiler/decompiler.py +26 -2
  9. angr/analyses/decompiler/node_replacer.py +42 -0
  10. angr/analyses/decompiler/notes/__init__.py +9 -0
  11. angr/analyses/decompiler/notes/decompilation_note.py +48 -0
  12. angr/analyses/decompiler/notes/deobfuscated_strings.py +56 -0
  13. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +1 -1
  14. angr/analyses/decompiler/optimization_passes/optimization_pass.py +5 -0
  15. angr/analyses/decompiler/optimization_passes/return_duplicator_low.py +5 -76
  16. angr/analyses/decompiler/region_identifier.py +12 -3
  17. angr/analyses/decompiler/sequence_walker.py +11 -7
  18. angr/analyses/decompiler/structured_codegen/base.py +34 -1
  19. angr/analyses/decompiler/structured_codegen/c.py +44 -10
  20. angr/analyses/decompiler/structuring/phoenix.py +645 -305
  21. angr/analyses/decompiler/structuring/structurer_base.py +75 -1
  22. angr/analyses/decompiler/utils.py +71 -28
  23. angr/analyses/deobfuscator/string_obf_finder.py +19 -16
  24. angr/analyses/deobfuscator/string_obf_opt_passes.py +6 -3
  25. angr/analyses/reaching_definitions/engine_vex.py +3 -2
  26. angr/procedures/glibc/scanf.py +8 -0
  27. angr/procedures/glibc/sscanf.py +4 -0
  28. angr/rustylib.abi3.so +0 -0
  29. angr/utils/graph.py +62 -24
  30. {angr-9.2.166.dist-info → angr-9.2.168.dist-info}/METADATA +5 -5
  31. {angr-9.2.166.dist-info → angr-9.2.168.dist-info}/RECORD +35 -31
  32. {angr-9.2.166.dist-info → angr-9.2.168.dist-info}/WHEEL +0 -0
  33. {angr-9.2.166.dist-info → angr-9.2.168.dist-info}/entry_points.txt +0 -0
  34. {angr-9.2.166.dist-info → angr-9.2.168.dist-info}/licenses/LICENSE +0 -0
  35. {angr-9.2.166.dist-info → angr-9.2.168.dist-info}/top_level.txt +0 -0
@@ -43,6 +43,7 @@ class RegionIdentifier(Analysis):
43
43
  update_graph=True,
44
44
  largest_successor_tree_outside_loop=True,
45
45
  force_loop_single_exit=True,
46
+ refine_loops_with_single_successor=False,
46
47
  complete_successors=False,
47
48
  entry_node_addr: tuple[int, int | None] | None = None,
48
49
  ):
@@ -70,6 +71,7 @@ class RegionIdentifier(Analysis):
70
71
  self.regions_by_block_addrs = []
71
72
  self._largest_successor_tree_outside_loop = largest_successor_tree_outside_loop
72
73
  self._force_loop_single_exit = force_loop_single_exit
74
+ self._refine_loops_with_single_successor = refine_loops_with_single_successor
73
75
  self._complete_successors = complete_successors
74
76
  # we keep a dictionary of node and their traversal order in a quasi-topological traversal and update this
75
77
  # dictionary as we update the graph
@@ -265,13 +267,18 @@ class RegionIdentifier(Analysis):
265
267
 
266
268
  # special case: any node with more than two non-self successors are probably the head of a switch-case. we
267
269
  # should include all successors into the loop subgraph.
270
+ # we must be extra careful here to not include nodes that are reachable from outside the loop subgraph. an
271
+ # example is in binary 064e1d62c8542d658d83f7e231cc3b935a1f18153b8aea809dcccfd446a91c93, loop 0x40d7b0 should
272
+ # not include block 0x40d9d5 because this node has a out-of-loop-body predecessor (block 0x40d795).
268
273
  while True:
269
274
  updated = False
270
275
  for node in list(loop_subgraph):
271
276
  nonself_successors = [succ for succ in graph.successors(node) if succ is not node]
272
277
  if len(nonself_successors) > 2:
273
278
  for succ in nonself_successors:
274
- if not loop_subgraph.has_edge(node, succ):
279
+ if not loop_subgraph.has_edge(node, succ) and all(
280
+ pred in loop_subgraph for pred in graph.predecessors(succ)
281
+ ):
275
282
  updated = True
276
283
  loop_subgraph.add_edge(node, succ)
277
284
  if not updated:
@@ -280,7 +287,9 @@ class RegionIdentifier(Analysis):
280
287
  return set(loop_subgraph)
281
288
 
282
289
  def _refine_loop(self, graph: networkx.DiGraph, head, initial_loop_nodes, initial_exit_nodes):
283
- if len(initial_exit_nodes) <= 1:
290
+ if (self._refine_loops_with_single_successor and len(initial_exit_nodes) == 0) or (
291
+ not self._refine_loops_with_single_successor and len(initial_exit_nodes) <= 1
292
+ ):
284
293
  return initial_loop_nodes, initial_exit_nodes
285
294
 
286
295
  refined_loop_nodes = initial_loop_nodes.copy()
@@ -713,7 +722,7 @@ class RegionIdentifier(Analysis):
713
722
 
714
723
  # visit the nodes in post-order
715
724
  region_created = False
716
- for node in list(networkx.dfs_postorder_nodes(graph_copy, source=head)):
725
+ for node in list(GraphUtils.dfs_postorder_nodes_deterministic(graph_copy, head)):
717
726
  if node is dummy_endnode:
718
727
  # skip the dummy endnode
719
728
  continue
@@ -110,24 +110,28 @@ class SequenceWalker:
110
110
 
111
111
  def _handle_MultiNode(self, node, **kwargs):
112
112
  changed = False
113
- nodes_copy = list(node.nodes)
113
+ nodes = node.nodes if self._update_seqnode_in_place else list(node.nodes)
114
114
 
115
115
  if self._force_forward_scan:
116
- for i, node_ in enumerate(nodes_copy):
116
+ for i, node_ in enumerate(nodes):
117
117
  new_node = self._handle(node_, parent=node, index=i)
118
118
  if new_node is not None:
119
119
  changed = True
120
- node.nodes[i] = new_node
120
+ nodes[i] = new_node
121
121
  else:
122
- i = len(nodes_copy) - 1
122
+ i = len(nodes) - 1
123
123
  while i > -1:
124
- node_ = nodes_copy[i]
124
+ node_ = nodes[i]
125
125
  new_node = self._handle(node_, parent=node, index=i)
126
126
  if new_node is not None:
127
127
  changed = True
128
- node.nodes[i] = new_node
128
+ nodes[i] = new_node
129
129
  i -= 1
130
- return None if not changed else node
130
+ if not changed:
131
+ return None
132
+ if self._update_seqnode_in_place:
133
+ return node
134
+ return MultiNode(nodes, addr=node.addr, idx=node.idx)
131
135
 
132
136
  def _handle_SwitchCase(self, node, **kwargs):
133
137
  self._handle(node.switch_expr, parent=node, label="switch_expr")
@@ -1,3 +1,4 @@
1
+ # pylint:disable=missing-class-docstring
1
2
  from __future__ import annotations
2
3
  from sortedcontainers import SortedDict
3
4
 
@@ -114,13 +115,45 @@ class InstructionMapping:
114
115
 
115
116
 
116
117
  class BaseStructuredCodeGenerator:
117
- def __init__(self, flavor=None):
118
+ def __init__(self, flavor=None, notes=None):
118
119
  self.flavor = flavor
119
120
  self.text = None
120
121
  self.map_pos_to_node = None
121
122
  self.map_pos_to_addr = None
122
123
  self.map_addr_to_pos = None
123
124
  self.map_ast_to_pos: dict[SimVariable, set[PositionMappingElement]] | None = None
125
+ self.notes = notes if notes is not None else {}
126
+
127
+ def adjust_mapping_positions(
128
+ self,
129
+ offset: int,
130
+ pos_to_node: PositionMapping,
131
+ pos_to_addr: PositionMapping,
132
+ addr_to_pos: InstructionMapping,
133
+ ) -> tuple[PositionMapping, PositionMapping, InstructionMapping]:
134
+ """
135
+ Adjust positions in the mappings to account for the notes that are prepended to the text.
136
+
137
+ :param offset: The length of the notes to prepend.
138
+ :param pos_to_node: The position to node mapping.
139
+ :param pos_to_addr: The position to address mapping.
140
+ :param addr_to_pos: The address to position mapping.
141
+ :return: Adjusted mappings.
142
+ """
143
+ new_pos_to_node = PositionMapping()
144
+ new_pos_to_addr = PositionMapping()
145
+ new_addr_to_pos = InstructionMapping()
146
+
147
+ for pos, node in pos_to_node.items():
148
+ new_pos_to_node.add_mapping(pos + offset, node.length, node.obj)
149
+
150
+ for pos, node in pos_to_addr.items():
151
+ new_pos_to_addr.add_mapping(pos + offset, node.length, node.obj)
152
+
153
+ for addr, pos in addr_to_pos.items():
154
+ new_addr_to_pos.add_mapping(addr, pos.posmap_pos + offset)
155
+
156
+ return new_pos_to_node, new_pos_to_addr, new_addr_to_pos
124
157
 
125
158
  def reapply_options(self, options):
126
159
  pass
@@ -43,6 +43,7 @@ from angr.utils.library import get_cpp_function_name
43
43
  from angr.utils.loader import is_in_readonly_segment, is_in_readonly_section
44
44
  from angr.utils.types import unpack_typeref, unpack_pointer_and_array, dereference_simtype_by_lib
45
45
  from angr.analyses.decompiler.utils import structured_node_is_simple_return
46
+ from angr.analyses.decompiler.notes.deobfuscated_strings import DeobfuscatedStringsNote
46
47
  from angr.errors import UnsupportedNodeTypeError, AngrRuntimeError
47
48
  from angr.knowledge_plugins.cfg.memory_data import MemoryData, MemoryDataSort
48
49
  from angr.analyses import Analysis, register_analysis
@@ -254,7 +255,7 @@ class CConstruct:
254
255
  self.tags = tags or {}
255
256
  self.codegen: StructuredCodeGenerator = codegen
256
257
 
257
- def c_repr(self, indent=0, pos_to_node=None, pos_to_addr=None, addr_to_pos=None):
258
+ def c_repr(self, initial_pos=0, indent=0, pos_to_node=None, pos_to_addr=None, addr_to_pos=None):
258
259
  """
259
260
  Creates the C representation of the code and displays it by
260
261
  constructing a large string. This function is called by each program function that needs to be decompiled.
@@ -268,7 +269,7 @@ class CConstruct:
268
269
 
269
270
  def mapper(chunks):
270
271
  # start all positions at beginning of document
271
- pos = 0
272
+ pos = initial_pos
272
273
 
273
274
  last_insn_addr = None
274
275
 
@@ -2520,8 +2521,10 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
2520
2521
  display_block_addrs=False,
2521
2522
  display_vvar_ids=False,
2522
2523
  min_data_addr: int = 0x400_000,
2524
+ notes=None,
2525
+ display_notes: bool = True,
2523
2526
  ):
2524
- super().__init__(flavor=flavor)
2527
+ super().__init__(flavor=flavor, notes=notes)
2525
2528
 
2526
2529
  self._handlers = {
2527
2530
  CodeNode: self._handle_Code,
@@ -2604,6 +2607,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
2604
2607
  self.map_addr_to_label: dict[tuple[int, int | None], CLabel] = {}
2605
2608
  self.cfunc: CFunction | None = None
2606
2609
  self.cexterns: set[CVariable] | None = None
2610
+ self.display_notes = display_notes
2607
2611
 
2608
2612
  self._analyze()
2609
2613
 
@@ -2700,9 +2704,20 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
2700
2704
  ast_to_pos = defaultdict(set)
2701
2705
 
2702
2706
  text = cfunc.c_repr(
2703
- indent=self._indent, pos_to_node=pos_to_node, pos_to_addr=pos_to_addr, addr_to_pos=addr_to_pos
2707
+ initial_pos=0,
2708
+ indent=self._indent,
2709
+ pos_to_node=pos_to_node,
2710
+ pos_to_addr=pos_to_addr,
2711
+ addr_to_pos=addr_to_pos,
2704
2712
  )
2705
2713
 
2714
+ if self.display_notes:
2715
+ notes = self.render_notes()
2716
+ pos_to_node, pos_to_addr, addr_to_pos = self.adjust_mapping_positions(
2717
+ len(notes), pos_to_node, pos_to_addr, addr_to_pos
2718
+ )
2719
+ text = notes + text
2720
+
2706
2721
  for elem, node in pos_to_node.items():
2707
2722
  if isinstance(node.obj, CConstant):
2708
2723
  ast_to_pos[node.obj.value].add(elem)
@@ -2726,6 +2741,21 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
2726
2741
 
2727
2742
  return text, pos_to_node, pos_to_addr, addr_to_pos, ast_to_pos
2728
2743
 
2744
+ def render_notes(self) -> str:
2745
+ """
2746
+ Render decompilation notes.
2747
+
2748
+ :return: A string containing all notes.
2749
+ """
2750
+ if not self.notes:
2751
+ return ""
2752
+
2753
+ lines = []
2754
+ for note in self.notes.values():
2755
+ note_lines = str(note).split("\n")
2756
+ lines += [f"// {line}" for line in note_lines]
2757
+ return "\n".join(lines) + "\n\n"
2758
+
2729
2759
  def _get_variable_type(self, var, is_global=False):
2730
2760
  if is_global:
2731
2761
  return self._variable_kb.variables["global"].get_variable_type(var)
@@ -3601,14 +3631,18 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3601
3631
  reference_values = {}
3602
3632
  type_ = unpack_typeref(type_)
3603
3633
  if expr.value in self.kb.obfuscations.type1_deobfuscated_strings:
3604
- reference_values[SimTypePointer(SimTypeChar())] = self.kb.obfuscations.type1_deobfuscated_strings[
3605
- expr.value
3606
- ]
3634
+ deobf_str = self.kb.obfuscations.type1_deobfuscated_strings[expr.value]
3635
+ reference_values[SimTypePointer(SimTypeChar())] = deobf_str
3636
+ if "deobfuscated_strings" not in self.notes:
3637
+ self.notes["deobfuscated_strings"] = DeobfuscatedStringsNote()
3638
+ self.notes["deobfuscated_strings"].add_string("1", deobf_str, ref_addr=expr.value)
3607
3639
  inline_string = True
3608
3640
  elif expr.value in self.kb.obfuscations.type2_deobfuscated_strings:
3609
- reference_values[SimTypePointer(SimTypeChar())] = self.kb.obfuscations.type2_deobfuscated_strings[
3610
- expr.value
3611
- ]
3641
+ deobf_str = self.kb.obfuscations.type2_deobfuscated_strings[expr.value]
3642
+ reference_values[SimTypePointer(SimTypeChar())] = deobf_str
3643
+ if "deobfuscated_strings" not in self.notes:
3644
+ self.notes["deobfuscated_strings"] = DeobfuscatedStringsNote()
3645
+ self.notes["deobfuscated_strings"].add_string("2", deobf_str, ref_addr=expr.value)
3612
3646
  inline_string = True
3613
3647
  elif isinstance(type_, SimTypePointer) and isinstance(type_.pts_to, (SimTypeChar, SimTypeBottom)):
3614
3648
  # char* or void*