angr 9.2.141__py3-none-manylinux2014_x86_64.whl → 9.2.143__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (71) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +26 -12
  3. angr/analyses/calling_convention/fact_collector.py +31 -9
  4. angr/analyses/cfg/cfg_base.py +38 -4
  5. angr/analyses/cfg/cfg_fast.py +23 -7
  6. angr/analyses/cfg/indirect_jump_resolvers/const_resolver.py +12 -1
  7. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +8 -1
  8. angr/analyses/class_identifier.py +8 -7
  9. angr/analyses/complete_calling_conventions.py +19 -6
  10. angr/analyses/decompiler/ail_simplifier.py +138 -98
  11. angr/analyses/decompiler/clinic.py +73 -5
  12. angr/analyses/decompiler/condition_processor.py +7 -7
  13. angr/analyses/decompiler/decompilation_cache.py +2 -1
  14. angr/analyses/decompiler/decompiler.py +10 -2
  15. angr/analyses/decompiler/dephication/graph_vvar_mapping.py +4 -6
  16. angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +8 -2
  17. angr/analyses/decompiler/optimization_passes/condition_constprop.py +110 -46
  18. angr/analyses/decompiler/optimization_passes/ite_region_converter.py +8 -0
  19. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +1 -1
  20. angr/analyses/decompiler/optimization_passes/optimization_pass.py +2 -0
  21. angr/analyses/decompiler/optimization_passes/register_save_area_simplifier.py +29 -7
  22. angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +6 -0
  23. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +9 -1
  24. angr/analyses/decompiler/peephole_optimizations/simplify_pc_relative_loads.py +15 -1
  25. angr/analyses/decompiler/region_identifier.py +70 -47
  26. angr/analyses/decompiler/sequence_walker.py +8 -0
  27. angr/analyses/decompiler/ssailification/rewriting.py +47 -17
  28. angr/analyses/decompiler/ssailification/rewriting_engine.py +13 -0
  29. angr/analyses/decompiler/stack_item.py +36 -0
  30. angr/analyses/decompiler/structured_codegen/c.py +14 -9
  31. angr/analyses/decompiler/structuring/phoenix.py +3 -3
  32. angr/analyses/decompiler/utils.py +13 -0
  33. angr/analyses/find_objects_static.py +2 -1
  34. angr/analyses/reaching_definitions/engine_vex.py +13 -0
  35. angr/analyses/reaching_definitions/function_handler.py +24 -10
  36. angr/analyses/reaching_definitions/function_handler_library/stdio.py +1 -0
  37. angr/analyses/reaching_definitions/function_handler_library/stdlib.py +45 -12
  38. angr/analyses/reaching_definitions/function_handler_library/string.py +77 -21
  39. angr/analyses/reaching_definitions/function_handler_library/unistd.py +21 -1
  40. angr/analyses/reaching_definitions/rd_state.py +11 -7
  41. angr/analyses/s_liveness.py +44 -6
  42. angr/analyses/s_propagator.py +40 -29
  43. angr/analyses/s_reaching_definitions/s_rda_model.py +48 -37
  44. angr/analyses/s_reaching_definitions/s_rda_view.py +6 -3
  45. angr/analyses/s_reaching_definitions/s_reaching_definitions.py +21 -21
  46. angr/analyses/typehoon/simple_solver.py +35 -8
  47. angr/analyses/typehoon/typehoon.py +3 -1
  48. angr/analyses/variable_recovery/engine_ail.py +6 -6
  49. angr/calling_conventions.py +20 -10
  50. angr/knowledge_plugins/functions/function.py +5 -10
  51. angr/knowledge_plugins/variables/variable_manager.py +27 -0
  52. angr/procedures/definitions/__init__.py +3 -10
  53. angr/procedures/definitions/linux_kernel.py +5 -0
  54. angr/procedures/definitions/wdk_ntoskrnl.py +2 -0
  55. angr/procedures/win32_kernel/__fastfail.py +15 -0
  56. angr/sim_procedure.py +2 -2
  57. angr/simos/simos.py +14 -10
  58. angr/simos/windows.py +42 -1
  59. angr/utils/ail.py +41 -1
  60. angr/utils/cpp.py +17 -0
  61. angr/utils/doms.py +149 -0
  62. angr/utils/library.py +1 -1
  63. angr/utils/ssa/__init__.py +21 -14
  64. angr/utils/ssa/vvar_uses_collector.py +2 -2
  65. angr/utils/types.py +12 -1
  66. {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/METADATA +7 -7
  67. {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/RECORD +71 -67
  68. {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/LICENSE +0 -0
  69. {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/WHEEL +0 -0
  70. {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/entry_points.txt +0 -0
  71. {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,4 @@
1
+ # pylint:disable=too-many-boolean-expressions
1
2
  from __future__ import annotations
2
3
  from collections.abc import Iterable
3
4
  import logging
@@ -7,6 +8,7 @@ import ailment
7
8
 
8
9
  from angr.calling_conventions import SimRegArg
9
10
  from angr.code_location import CodeLocation
11
+ from angr.analyses.decompiler.stack_item import StackItem, StackItemType
10
12
  from .optimization_pass import OptimizationPass, OptimizationPassStage
11
13
 
12
14
 
@@ -82,6 +84,14 @@ class RegisterSaveAreaSimplifier(OptimizationPass):
82
84
  # update it
83
85
  self._update_block(old_block, new_block)
84
86
 
87
+ if updated_blocks:
88
+ # update stack_items
89
+ for data in info.values():
90
+ for stack_offset, _ in data["stored"]:
91
+ self.stack_items[stack_offset] = StackItem(
92
+ stack_offset, self.project.arch.bytes, "regs", StackItemType.SAVED_REGS
93
+ )
94
+
85
95
  def _find_registers_stored_on_stack(self) -> list[tuple[int, int, CodeLocation]]:
86
96
  first_block = self._get_block(self._func.addr)
87
97
  if first_block is None:
@@ -94,14 +104,26 @@ class RegisterSaveAreaSimplifier(OptimizationPass):
94
104
  isinstance(stmt, ailment.Stmt.Store)
95
105
  and isinstance(stmt.addr, ailment.Expr.StackBaseOffset)
96
106
  and isinstance(stmt.addr.offset, int)
97
- and isinstance(stmt.data, ailment.Expr.VirtualVariable)
98
- and stmt.data.was_reg
99
107
  ):
100
- # it's storing registers to the stack!
101
- stack_offset = stmt.addr.offset
102
- reg_offset = stmt.data.reg_offset
103
- codeloc = CodeLocation(first_block.addr, idx, block_idx=first_block.idx, ins_addr=stmt.ins_addr)
104
- results.append((reg_offset, stack_offset, codeloc))
108
+ if isinstance(stmt.data, ailment.Expr.VirtualVariable) and stmt.data.was_reg:
109
+ # it's storing registers to the stack!
110
+ stack_offset = stmt.addr.offset
111
+ reg_offset = stmt.data.reg_offset
112
+ codeloc = CodeLocation(first_block.addr, idx, block_idx=first_block.idx, ins_addr=stmt.ins_addr)
113
+ results.append((reg_offset, stack_offset, codeloc))
114
+ elif (
115
+ self.project.arch.name == "AMD64"
116
+ and isinstance(stmt.data, ailment.Expr.Convert)
117
+ and isinstance(stmt.data.operand, ailment.Expr.VirtualVariable)
118
+ and stmt.data.operand.was_reg
119
+ and stmt.data.from_bits == 256
120
+ and stmt.data.to_bits == 128
121
+ ):
122
+ # storing xmm registers to the stack
123
+ stack_offset = stmt.addr.offset
124
+ reg_offset = stmt.data.operand.reg_offset
125
+ codeloc = CodeLocation(first_block.addr, idx, block_idx=first_block.idx, ins_addr=stmt.ins_addr)
126
+ results.append((reg_offset, stack_offset, codeloc))
105
127
 
106
128
  return results
107
129
 
@@ -6,6 +6,7 @@ import logging
6
6
  import ailment
7
7
 
8
8
  from angr.utils.bits import s2u
9
+ from angr.analyses.decompiler.stack_item import StackItem, StackItemType
9
10
  from .optimization_pass import OptimizationPass, OptimizationPassStage
10
11
 
11
12
 
@@ -168,6 +169,11 @@ class StackCanarySimplifier(OptimizationPass):
168
169
  first_block_copy.statements.pop(stmt_idx)
169
170
  self._update_block(first_block, first_block_copy)
170
171
 
172
+ # update stack_items
173
+ self.stack_items[store_offset] = StackItem(
174
+ store_offset, canary_init_stmt.dst.size, "canary", StackItemType.STACK_CANARY
175
+ )
176
+
171
177
  # Done!
172
178
 
173
179
  def _find_canary_init_stmt(self):
@@ -7,6 +7,7 @@ import ailment
7
7
  import cle
8
8
 
9
9
  from angr.utils.funcid import is_function_security_check_cookie
10
+ from angr.analyses.decompiler.stack_item import StackItem, StackItemType
10
11
  from .optimization_pass import OptimizationPass, OptimizationPassStage
11
12
 
12
13
 
@@ -62,7 +63,9 @@ class WinStackCanarySimplifier(OptimizationPass):
62
63
  first_block, canary_init_stmt_ids = init_stmts
63
64
  canary_init_stmt = first_block.statements[canary_init_stmt_ids[-1]]
64
65
  # where is the stack canary stored?
65
- if not isinstance(canary_init_stmt.addr, ailment.Expr.StackBaseOffset):
66
+ if not isinstance(canary_init_stmt, ailment.Stmt.Store) or not isinstance(
67
+ canary_init_stmt.addr, ailment.Expr.StackBaseOffset
68
+ ):
66
69
  _l.debug(
67
70
  "Unsupported canary storing location %s. Expects an ailment.Expr.StackBaseOffset.",
68
71
  canary_init_stmt.addr,
@@ -143,6 +146,11 @@ class WinStackCanarySimplifier(OptimizationPass):
143
146
  first_block_copy.statements.pop(stmt_idx)
144
147
  self._update_block(first_block, first_block_copy)
145
148
 
149
+ # update stack_items
150
+ self.stack_items[store_offset] = StackItem(
151
+ store_offset, canary_init_stmt.size, "canary", StackItemType.STACK_CANARY
152
+ )
153
+
146
154
  def _find_canary_init_stmt(self):
147
155
  first_block = self._get_block(self._func.addr)
148
156
  if first_block is None:
@@ -1,3 +1,4 @@
1
+ # pylint:disable=too-many-boolean-expressions
1
2
  from __future__ import annotations
2
3
  from ailment.expression import BinaryOp, Const, Load
3
4
 
@@ -20,10 +21,23 @@ class SimplifyPcRelativeLoads(PeepholeOptimizationExprBase):
20
21
  if expr.op == "Add" and len(expr.operands) == 2 and isinstance(expr.operands[0], Load):
21
22
  op0, op1 = expr.operands
22
23
 
24
+ assert self.project is not None
25
+ if not hasattr(expr, "ins_addr"):
26
+ return expr
27
+ assert expr.ins_addr is not None
28
+
23
29
  # check if op1 is PC
24
- if isinstance(op1, Const) and hasattr(expr, "ins_addr") and is_pc(self.project, expr.ins_addr, op1.value):
30
+ if (
31
+ isinstance(op1, Const)
32
+ and op1.is_int
33
+ and hasattr(expr, "ins_addr")
34
+ and is_pc(self.project, expr.ins_addr, op1.value) # type: ignore
35
+ and isinstance(op0.addr, Const)
36
+ and op0.addr.is_int
37
+ ):
25
38
  # check if op0.addr points to a read-only section
26
39
  addr = op0.addr.value
40
+ assert isinstance(addr, int)
27
41
  if is_in_readonly_section(self.project, addr) or is_in_readonly_segment(self.project, addr):
28
42
  # found it!
29
43
  # do the load first
@@ -11,7 +11,8 @@ from ailment.statement import ConditionalJump, Jump
11
11
  from ailment.expression import Const
12
12
 
13
13
  from angr.utils.graph import GraphUtils
14
- from angr.utils.graph import dfs_back_edges, subgraph_between_nodes, dominates, shallow_reverse
14
+ from angr.utils.graph import dfs_back_edges, subgraph_between_nodes, dominates
15
+ from angr.utils.doms import IncrementalDominators
15
16
  from angr.errors import AngrRuntimeError
16
17
  from angr.analyses import Analysis, register_analysis
17
18
  from .structuring.structurer_nodes import MultiNode, ConditionNode, IncompleteSwitchCaseHeadStatement
@@ -115,11 +116,11 @@ class RegionIdentifier(Analysis):
115
116
  @return: List of addr lists
116
117
  """
117
118
 
118
- work_list = [self.region]
119
+ work_list: list[GraphRegion] = [self.region] # type: ignore
119
120
  block_only_regions = []
120
121
  seen_regions = set()
121
122
  while work_list:
122
- children_regions = []
123
+ children_regions: list[GraphRegion] = []
123
124
  for region in work_list:
124
125
  children_blocks = []
125
126
  for node in region.graph.nodes:
@@ -234,7 +235,7 @@ class RegionIdentifier(Analysis):
234
235
  break
235
236
 
236
237
  def _find_loop_headers(self, graph: networkx.DiGraph) -> list:
237
- heads = {t for _, t in dfs_back_edges(graph, self._start_node)}
238
+ heads = list({t for _, t in dfs_back_edges(graph, self._start_node)})
238
239
  return GraphUtils.quasi_topological_sort_nodes(graph, heads)
239
240
 
240
241
  def _find_initial_loop_nodes(self, graph: networkx.DiGraph, head):
@@ -392,7 +393,7 @@ class RegionIdentifier(Analysis):
392
393
 
393
394
  while True:
394
395
  for node in networkx.dfs_postorder_nodes(graph):
395
- preds = graph.predecessors(node)
396
+ preds = list(graph.predecessors(node))
396
397
  if len(preds) == 1:
397
398
  # merge the two nodes
398
399
  self._absorb_node(graph, preds[0], node)
@@ -473,7 +474,7 @@ class RegionIdentifier(Analysis):
473
474
  head = next(iter(n for n in subgraph.nodes() if n.addr == head.addr))
474
475
  region.head = head
475
476
 
476
- if len(graph.nodes()) == 1 and isinstance(next(iter(graph.nodes())), GraphRegion):
477
+ if len(graph) == 1 and isinstance(next(iter(graph.nodes())), GraphRegion):
477
478
  return next(iter(graph.nodes()))
478
479
  # create a large graph region
479
480
  new_head = self._get_start_node(graph)
@@ -491,6 +492,7 @@ class RegionIdentifier(Analysis):
491
492
  l.debug("Initial loop nodes %s", self._dbg_block_list(initial_loop_nodes))
492
493
 
493
494
  # Make sure no other loops are contained in the current loop
495
+ assert self._loop_headers is not None
494
496
  if {n for n in initial_loop_nodes if n.addr != head.addr}.intersection(self._loop_headers):
495
497
  return None
496
498
 
@@ -535,7 +537,7 @@ class RegionIdentifier(Analysis):
535
537
  region = self._abstract_cyclic_region(
536
538
  graph, refined_loop_nodes, head, normal_entries, abnormal_entries, normal_exit_node, abnormal_exit_nodes
537
539
  )
538
- if len(region.successors) > 1 and self._force_loop_single_exit:
540
+ if region.successors is not None and len(region.successors) > 1 and self._force_loop_single_exit:
539
541
  # multi-successor region. refinement is required
540
542
  self._refine_loop_successors_to_guarded_successors(region, graph)
541
543
 
@@ -705,23 +707,20 @@ class RegionIdentifier(Analysis):
705
707
  else:
706
708
  dummy_endnode = None
707
709
 
708
- # compute dominator tree
709
- doms = networkx.immediate_dominators(graph_copy, head)
710
-
711
- # compute post-dominator tree
712
- inverted_graph = shallow_reverse(graph_copy)
713
- postdoms = networkx.immediate_dominators(inverted_graph, endnodes[0])
714
-
715
- # dominance frontiers
716
- df = networkx.algorithms.dominance_frontiers(graph_copy, head)
710
+ # dominators and post-dominators, computed incrementally
711
+ doms = IncrementalDominators(graph_copy, head)
712
+ postdoms = IncrementalDominators(graph_copy, endnodes[0], post=True)
717
713
 
718
714
  # visit the nodes in post-order
719
- for node in networkx.dfs_postorder_nodes(graph_copy, source=head):
715
+ region_created = False
716
+ for node in list(networkx.dfs_postorder_nodes(graph_copy, source=head)):
720
717
  if node is dummy_endnode:
721
718
  # skip the dummy endnode
722
719
  continue
723
720
  if cyclic and node is head:
724
721
  continue
722
+ if node not in graph_copy:
723
+ continue
725
724
 
726
725
  out_degree = graph_copy.out_degree[node]
727
726
  if out_degree == 0:
@@ -740,10 +739,10 @@ class RegionIdentifier(Analysis):
740
739
 
741
740
  # test if this node is an entry to a single-entry, single-successor region
742
741
  levels = 0
743
- postdom_node = postdoms.get(node, None)
742
+ postdom_node = postdoms.idom(node)
744
743
  while postdom_node is not None:
745
744
  if (node, postdom_node) not in failed_region_attempts and self._check_region(
746
- graph_copy, node, postdom_node, doms, df
745
+ graph_copy, node, postdom_node, doms
747
746
  ):
748
747
  frontier = [postdom_node]
749
748
  region = self._compute_region(
@@ -752,6 +751,8 @@ class RegionIdentifier(Analysis):
752
751
  if region is not None:
753
752
  # update region.graph_with_successors
754
753
  if secondary_graph is not None:
754
+ assert region.graph_with_successors is not None
755
+ assert region.successors is not None
755
756
  if self._complete_successors:
756
757
  for nn in list(region.graph_with_successors.nodes):
757
758
  original_successors = secondary_graph.successors(nn)
@@ -782,52 +783,75 @@ class RegionIdentifier(Analysis):
782
783
  graph, region, frontier, dummy_endnode=dummy_endnode, secondary_graph=secondary_graph
783
784
  )
784
785
  # assert dummy_endnode not in graph
785
- return True
786
+ region_created = True
787
+ # we created a new region to replace one or more nodes in the graph.
788
+ replaced_nodes = set(region.graph)
789
+ # update graph_copy; doms and postdoms are updated as well because they hold references to
790
+ # graph_copy internally.
791
+ if graph_copy is not graph:
792
+ self._update_graph(graph_copy, region, replaced_nodes)
793
+ doms.graph_updated(region, replaced_nodes, region.head)
794
+ postdoms.graph_updated(region, replaced_nodes, region.head)
795
+ # break out of the inner loop
796
+ break
786
797
 
787
798
  failed_region_attempts.add((node, postdom_node))
788
- if not dominates(doms, node, postdom_node):
799
+ if not doms.dominates(node, postdom_node):
789
800
  break
790
- if postdom_node is postdoms.get(postdom_node, None):
801
+ if postdom_node is postdoms.idom(postdom_node):
791
802
  break
792
- postdom_node = postdoms.get(postdom_node, None)
803
+ postdom_node = postdoms.idom(postdom_node)
793
804
  levels += 1
794
805
  # l.debug("Walked back %d levels in postdom tree and did not find anything for %r. Next.", levels, node)
795
806
 
796
- return False
807
+ return region_created
797
808
 
798
809
  @staticmethod
799
- def _check_region(graph, start_node, end_node, doms, df):
800
- """
810
+ def _update_graph(graph: networkx.DiGraph, new_region, replaced_nodes: set) -> None:
811
+ region_in_edges = RegionIdentifier._region_in_edges(graph, new_region, data=True)
812
+ region_out_edges = RegionIdentifier._region_out_edges(graph, new_region, data=True)
813
+ for node in replaced_nodes:
814
+ graph.remove_node(node)
815
+ graph.add_node(new_region)
816
+ for src, _, data in region_in_edges:
817
+ graph.add_edge(src, new_region, **data)
818
+ for _, dst, data in region_out_edges:
819
+ graph.add_edge(new_region, dst, **data)
801
820
 
802
- :param graph:
803
- :param start_node:
804
- :param end_node:
805
- :param doms:
806
- :param df:
807
- :return:
821
+ @staticmethod
822
+ def _check_region(graph, start_node, end_node, doms) -> bool:
823
+ """
824
+ Determine the graph slice between start_node and end_node forms a good region.
808
825
  """
809
826
 
810
827
  # if the exit node is the header of a loop that contains the start node, the dominance frontier should only
811
828
  # contain the exit node.
812
- if not dominates(doms, start_node, end_node):
813
- frontier = df.get(start_node, set())
814
- for node in frontier:
829
+ start_node_frontier = None
830
+ end_node_frontier = None
831
+
832
+ if not doms.dominates(start_node, end_node):
833
+ start_node_frontier = doms.df(start_node)
834
+ for node in start_node_frontier:
815
835
  if node is not start_node and node is not end_node:
816
836
  return False
817
837
 
818
838
  # no edges should enter the region.
819
- for node in df.get(end_node, set()):
820
- if dominates(doms, start_node, node) and node is not end_node:
839
+ end_node_frontier = doms.df(end_node)
840
+ for node in end_node_frontier:
841
+ if doms.dominates(start_node, node) and node is not end_node:
821
842
  return False
822
843
 
844
+ if start_node_frontier is None:
845
+ start_node_frontier = doms.df(start_node)
846
+
823
847
  # no edges should leave the region.
824
- for node in df.get(start_node, set()):
848
+ for node in start_node_frontier:
825
849
  if node is start_node or node is end_node:
826
850
  continue
827
- if node not in df.get(end_node, set()):
851
+ if node not in end_node_frontier:
828
852
  return False
829
853
  for pred in graph.predecessors(node):
830
- if dominates(doms, start_node, pred) and not dominates(doms, end_node, pred):
854
+ if doms.dominates(start_node, pred) and not doms.dominates(end_node, pred):
831
855
  return False
832
856
 
833
857
  return True
@@ -978,14 +1002,13 @@ class RegionIdentifier(Analysis):
978
1002
  subgraph_with_exits.add_edge(src, dst)
979
1003
  region.graph = subgraph
980
1004
  region.graph_with_successors = subgraph_with_exits
981
- if normal_exit_node is not None:
982
- region.successors = [normal_exit_node]
983
- else:
984
- region.successors = []
985
- region.successors += list(abnormal_exit_nodes)
1005
+ succs = [normal_exit_node] if normal_exit_node is not None else []
1006
+ succs += list(abnormal_exit_nodes)
1007
+ succs = sorted(set(succs), key=lambda x: x.addr)
1008
+ region.successors = set(succs)
986
1009
 
987
- for succ_0 in region.successors:
988
- for succ_1 in region.successors:
1010
+ for succ_0 in succs:
1011
+ for succ_1 in succs:
989
1012
  if succ_0 is not succ_1 and graph.has_edge(succ_0, succ_1):
990
1013
  region.graph_with_successors.add_edge(succ_0, succ_1)
991
1014
 
@@ -186,6 +186,14 @@ class SequenceWalker:
186
186
  new_condition = (
187
187
  self._handle(node.condition, parent=node, label="condition") if node.condition is not None else None
188
188
  )
189
+
190
+ # note that initializer and iterator are both statements, so they can return empty tuples
191
+ # TODO: Handle the case where multiple statements are returned
192
+ if new_initializer == ():
193
+ new_initializer = None
194
+ if new_iterator == ():
195
+ new_iterator = None
196
+
189
197
  seq_node = self._handle(node.sequence_node, parent=node, label="body", index=0)
190
198
  if seq_node is not None or new_initializer is not None or new_iterator is not None or new_condition is not None:
191
199
  return LoopNode(
@@ -14,10 +14,10 @@ from ailment.statement import Assignment, Label
14
14
  from angr.code_location import CodeLocation
15
15
  from angr.analyses import ForwardAnalysis
16
16
  from angr.analyses.forward_analysis import FunctionGraphVisitor
17
+ from angr.utils.ail import is_head_controlled_loop_block
17
18
  from .rewriting_engine import SimEngineSSARewriting, DefExprType, AT
18
19
  from .rewriting_state import RewritingState
19
20
 
20
-
21
21
  l = logging.getLogger(__name__)
22
22
 
23
23
 
@@ -71,6 +71,14 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, ailment.Block, object, o
71
71
  self._visited_blocks: set[Any] = set()
72
72
  self.out_blocks = {}
73
73
  self.out_states = {}
74
+ # loop_states stores states at the beginning of a loop block *after a loop iteration*, where the block is the
75
+ # following:
76
+ # 0x4036df | t4 = (rcx<8> == 0x0<64>)
77
+ # 0x4036df | if (t4) { Goto 0x4036e2<64> } else { Goto 0x4036df<64> }
78
+ # 0x4036df | STORE(addr=t3, data=t2, size=8, endness=Iend_LE, guard=None)
79
+ # 0x4036df | rdi<8> = t8
80
+ #
81
+ self.head_controlled_loop_outstates = {}
74
82
 
75
83
  self._analyze()
76
84
 
@@ -177,8 +185,12 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, ailment.Block, object, o
177
185
  else:
178
186
  node.statements = node.statements[:idx] + phi_stmts + node.statements[idx:]
179
187
 
180
- def _reg_predicate(self, node_, *, reg_offset: int, reg_size: int) -> tuple[bool, Any]:
181
- out_state: RewritingState = self.out_states[(node_.addr, node_.idx)]
188
+ def _reg_predicate(self, node_: Block, *, reg_offset: int, reg_size: int) -> tuple[bool, Any]:
189
+ out_state: RewritingState = (
190
+ self.head_controlled_loop_outstates[(node_.addr, node_.idx)]
191
+ if is_head_controlled_loop_block(node_)
192
+ else self.out_states[(node_.addr, node_.idx)]
193
+ )
182
194
  if reg_offset in out_state.registers and reg_size in out_state.registers[reg_offset]:
183
195
  existing_var = out_state.registers[reg_offset][reg_size]
184
196
  if existing_var is None:
@@ -189,8 +201,12 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, ailment.Block, object, o
189
201
  return True, vvar
190
202
  return False, None
191
203
 
192
- def _stack_predicate(self, node_, *, stack_offset: int, stackvar_size: int) -> tuple[bool, Any]:
193
- out_state: RewritingState = self.out_states[(node_.addr, node_.idx)]
204
+ def _stack_predicate(self, node_: Block, *, stack_offset: int, stackvar_size: int) -> tuple[bool, Any]:
205
+ out_state: RewritingState = (
206
+ self.head_controlled_loop_outstates[(node_.addr, node_.idx)]
207
+ if is_head_controlled_loop_block(node_)
208
+ else self.out_states[(node_.addr, node_.idx)]
209
+ )
194
210
  if stack_offset in out_state.stackvars and stackvar_size in out_state.stackvars[stack_offset]:
195
211
  existing_var = out_state.stackvars[stack_offset][stackvar_size]
196
212
  if existing_var is None:
@@ -262,18 +278,32 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, ailment.Block, object, o
262
278
  )
263
279
 
264
280
  self._visited_blocks.add(block_key)
265
- self.out_states[block_key] = state
266
-
267
- if state.out_block is not None:
268
- assert state.out_block.addr == block.addr
269
-
270
- if self.out_blocks.get(block_key, None) == state.out_block:
271
- return True, state
272
- self.out_blocks[block_key] = state.out_block
273
- state.out_block = None
274
- return True, state
275
-
276
- return True, state
281
+ # get the output state (which is the input state for the successor node)
282
+ # if head_controlled_loop_outstate is set, then it is the output state of the successor node; in this case, the
283
+ # input state for the head-controlled loop block itself is out.state.
284
+ # otherwise (if head_controlled_loop_outstate is not set), engine.state is the input state of the successor
285
+ # node.
286
+ if engine.head_controlled_loop_outstate is None:
287
+ # this is a normal block
288
+ out_state = state
289
+ else:
290
+ # this is a head-controlled loop block
291
+ out_state = engine.head_controlled_loop_outstate
292
+ self.head_controlled_loop_outstates[block_key] = state
293
+ self.out_states[block_key] = out_state
294
+ # the final block is always in state
295
+ out_block = state.out_block
296
+
297
+ if out_block is not None:
298
+ assert out_block.addr == block.addr
299
+
300
+ if self.out_blocks.get(block_key, None) == out_block:
301
+ return True, out_state
302
+ self.out_blocks[block_key] = out_block
303
+ out_state.out_block = None
304
+ return True, out_state
305
+
306
+ return True, out_state
277
307
 
278
308
  def _intra_analysis(self):
279
309
  pass
@@ -4,6 +4,7 @@ from typing import Literal
4
4
  import logging
5
5
 
6
6
  from archinfo import Endness
7
+ from ailment.block import Block
7
8
  from ailment.manager import Manager
8
9
  from ailment.statement import Statement, Assignment, Store, Call, Return, ConditionalJump, DirtyStatement, Jump
9
10
  from ailment.expression import (
@@ -70,6 +71,7 @@ class SimEngineSSARewriting(
70
71
  self.phiid_to_loc = phiid_to_loc
71
72
  self.rewrite_tmps = rewrite_tmps
72
73
  self.ail_manager = ail_manager
74
+ self.head_controlled_loop_outstate: RewritingState | None = None
73
75
 
74
76
  self.secondary_stackvars: set[int] = set()
75
77
 
@@ -87,6 +89,12 @@ class SimEngineSSARewriting(
87
89
  # Handlers
88
90
  #
89
91
 
92
+ def process(
93
+ self, state: RewritingState, *, block: Block | None = None, whitelist: set[int] | None = None, **kwargs
94
+ ) -> None:
95
+ self.head_controlled_loop_outstate = None
96
+ super().process(state, block=block, whitelist=whitelist, **kwargs)
97
+
90
98
  def _top(self, bits):
91
99
  assert False, "Unreachable"
92
100
 
@@ -236,6 +244,11 @@ class SimEngineSSARewriting(
236
244
  new_true_target = self._expr(stmt.true_target) if stmt.true_target is not None else None
237
245
  new_false_target = self._expr(stmt.false_target) if stmt.false_target is not None else None
238
246
 
247
+ if self.stmt_idx != len(self.block.statements) - 1:
248
+ # the conditional jump is in the middle of the block (e.g., the block generated from lifting rep stosq).
249
+ # we need to make a copy of the state and use the state of this point in its successor
250
+ self.head_controlled_loop_outstate = self.state.copy()
251
+
239
252
  if new_cond is not None or new_true_target is not None or new_false_target is not None:
240
253
  return ConditionalJump(
241
254
  stmt.idx,
@@ -0,0 +1,36 @@
1
+ from __future__ import annotations
2
+
3
+ from enum import Enum
4
+
5
+
6
+ class StackItemType(Enum):
7
+ """
8
+ Enum for the type of stack items.
9
+ """
10
+
11
+ UNKNOWN = 0
12
+ SAVED_BP = 1
13
+ SAVED_REGS = 2
14
+ ARGUMENT = 3
15
+ RET_ADDR = 4
16
+ STACK_CANARY = 5
17
+
18
+
19
+ class StackItem:
20
+ """
21
+ A stack item describes a piece of data that is stored on the stack at a certain offset (usually negative).
22
+ """
23
+
24
+ offset: int
25
+ size: int
26
+ name: str
27
+ item_type: StackItemType
28
+
29
+ def __init__(self, offset: int, size: int, name: str, item_type: StackItemType = StackItemType.UNKNOWN):
30
+ self.offset = offset
31
+ self.size = size
32
+ self.name = name
33
+ self.item_type = item_type
34
+
35
+ def __repr__(self):
36
+ return f"<StackItem {self.name} {self.item_type!s} at {self.offset:#x} ({self.size}b)>"
@@ -40,7 +40,7 @@ from angr.sim_variable import SimVariable, SimTemporaryVariable, SimStackVariabl
40
40
  from angr.utils.constants import is_alignment_mask
41
41
  from angr.utils.library import get_cpp_function_name
42
42
  from angr.utils.loader import is_in_readonly_segment, is_in_readonly_section
43
- from angr.utils.types import unpack_typeref, unpack_pointer
43
+ from angr.utils.types import unpack_typeref, unpack_pointer_and_array
44
44
  from angr.analyses.decompiler.utils import structured_node_is_simple_return
45
45
  from angr.errors import UnsupportedNodeTypeError, AngrRuntimeError
46
46
  from angr.knowledge_plugins.cfg.memory_data import MemoryData, MemoryDataSort
@@ -539,6 +539,8 @@ class CFunction(CConstruct): # pylint:disable=abstract-method
539
539
 
540
540
  if self.codegen.show_externs and self.codegen.cexterns:
541
541
  for v in sorted(self.codegen.cexterns, key=lambda v: str(v.variable.name)):
542
+ if v.variable not in self.variables_in_use:
543
+ continue
542
544
  varname = v.c_repr() if v.type is None else v.variable.name
543
545
  yield "extern ", None
544
546
  yield from type_to_c_repr_chunks(v.type, name=varname, name_type=v, full=False)
@@ -2581,7 +2583,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
2581
2583
 
2582
2584
  # TODO store extern fallback size somewhere lol
2583
2585
  self.cexterns = {
2584
- self._variable(v, 1)
2586
+ self._variable(v, 1, mark_used=False)
2585
2587
  for v in self.externs
2586
2588
  if v not in self._inlined_strings and v not in self._function_pointers
2587
2589
  }
@@ -2698,7 +2700,9 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
2698
2700
  return _mapping.get(n)(signed=signed).with_arch(self.project.arch)
2699
2701
  return SimTypeNum(n, signed=signed).with_arch(self.project.arch)
2700
2702
 
2701
- def _variable(self, variable: SimVariable, fallback_type_size: int | None, vvar_id: int | None = None) -> CVariable:
2703
+ def _variable(
2704
+ self, variable: SimVariable, fallback_type_size: int | None, vvar_id: int | None = None, mark_used: bool = True
2705
+ ) -> CVariable:
2702
2706
  # TODO: we need to fucking make sure that variable recovery and type inference actually generates a size
2703
2707
  # TODO: for each variable it links into the fucking ail. then we can remove fallback_type_size.
2704
2708
  unified = self._variable_kb.variables[self._func.addr].unified_variable(variable)
@@ -2710,7 +2714,8 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
2710
2714
  (fallback_type_size or self.project.arch.bytes) * self.project.arch.byte_width
2711
2715
  )
2712
2716
  cvar = CVariable(variable, unified_variable=unified, variable_type=variable_type, codegen=self, vvar_id=vvar_id)
2713
- self._variables_in_use[variable] = cvar
2717
+ if mark_used:
2718
+ self._variables_in_use[variable] = cvar
2714
2719
  return cvar
2715
2720
 
2716
2721
  def _get_variable_reference(self, cvar: CVariable) -> CExpression:
@@ -2776,7 +2781,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
2776
2781
  # expr must express a POINTER to the base
2777
2782
  # returns a value which has a simtype of data_type as if it were dereferenced out of expr
2778
2783
  data_type = unpack_typeref(data_type)
2779
- base_type = unpack_typeref(unpack_pointer(expr.type))
2784
+ base_type = unpack_typeref(unpack_pointer_and_array(expr.type))
2780
2785
  if base_type is None:
2781
2786
  # well, not much we can do
2782
2787
  if data_type is None:
@@ -2899,7 +2904,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
2899
2904
  ) -> CExpression:
2900
2905
  # same rule as _access_constant_offset wrt pointer expressions
2901
2906
  data_type = unpack_typeref(data_type)
2902
- base_type = unpack_pointer(expr.type)
2907
+ base_type = unpack_pointer_and_array(expr.type)
2903
2908
  if base_type is None:
2904
2909
  # use the fallback from above
2905
2910
  return self._access_constant_offset(expr, 0, data_type, lvalue, renegotiate_type)
@@ -2959,7 +2964,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
2959
2964
  kernel = None
2960
2965
  while i < len(terms):
2961
2966
  c, t = terms[i]
2962
- if isinstance(unpack_typeref(t.type), SimTypePointer):
2967
+ if isinstance(unpack_typeref(t.type), (SimTypePointer, SimTypeArray)):
2963
2968
  if kernel is not None:
2964
2969
  l.warning("Summing two different pointers together. Uh oh!")
2965
2970
  return bail_out()
@@ -2982,7 +2987,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
2982
2987
 
2983
2988
  # suffering.
2984
2989
  while terms:
2985
- kernel_type = unpack_typeref(unpack_pointer(kernel.type))
2990
+ kernel_type = unpack_typeref(unpack_pointer_and_array(kernel.type))
2986
2991
  assert kernel_type
2987
2992
 
2988
2993
  if kernel_type.size is None:
@@ -3049,7 +3054,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3049
3054
  kernel = inner.operand
3050
3055
  else:
3051
3056
  kernel = CUnaryOp("Reference", inner, codegen=self)
3052
- if unpack_typeref(unpack_pointer(kernel.type)) == kernel_type:
3057
+ if unpack_typeref(unpack_pointer_and_array(kernel.type)) == kernel_type:
3053
3058
  # we are not making progress
3054
3059
  pass
3055
3060
  else: