angr 9.2.141__py3-none-manylinux2014_aarch64.whl → 9.2.143__py3-none-manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/calling_convention/calling_convention.py +26 -12
- angr/analyses/calling_convention/fact_collector.py +31 -9
- angr/analyses/cfg/cfg_base.py +38 -4
- angr/analyses/cfg/cfg_fast.py +23 -7
- angr/analyses/cfg/indirect_jump_resolvers/const_resolver.py +12 -1
- angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +8 -1
- angr/analyses/class_identifier.py +8 -7
- angr/analyses/complete_calling_conventions.py +19 -6
- angr/analyses/decompiler/ail_simplifier.py +138 -98
- angr/analyses/decompiler/clinic.py +73 -5
- angr/analyses/decompiler/condition_processor.py +7 -7
- angr/analyses/decompiler/decompilation_cache.py +2 -1
- angr/analyses/decompiler/decompiler.py +10 -2
- angr/analyses/decompiler/dephication/graph_vvar_mapping.py +4 -6
- angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +8 -2
- angr/analyses/decompiler/optimization_passes/condition_constprop.py +110 -46
- angr/analyses/decompiler/optimization_passes/ite_region_converter.py +8 -0
- angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +1 -1
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +2 -0
- angr/analyses/decompiler/optimization_passes/register_save_area_simplifier.py +29 -7
- angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +6 -0
- angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +9 -1
- angr/analyses/decompiler/peephole_optimizations/simplify_pc_relative_loads.py +15 -1
- angr/analyses/decompiler/region_identifier.py +70 -47
- angr/analyses/decompiler/sequence_walker.py +8 -0
- angr/analyses/decompiler/ssailification/rewriting.py +47 -17
- angr/analyses/decompiler/ssailification/rewriting_engine.py +13 -0
- angr/analyses/decompiler/stack_item.py +36 -0
- angr/analyses/decompiler/structured_codegen/c.py +14 -9
- angr/analyses/decompiler/structuring/phoenix.py +3 -3
- angr/analyses/decompiler/utils.py +13 -0
- angr/analyses/find_objects_static.py +2 -1
- angr/analyses/reaching_definitions/engine_vex.py +13 -0
- angr/analyses/reaching_definitions/function_handler.py +24 -10
- angr/analyses/reaching_definitions/function_handler_library/stdio.py +1 -0
- angr/analyses/reaching_definitions/function_handler_library/stdlib.py +45 -12
- angr/analyses/reaching_definitions/function_handler_library/string.py +77 -21
- angr/analyses/reaching_definitions/function_handler_library/unistd.py +21 -1
- angr/analyses/reaching_definitions/rd_state.py +11 -7
- angr/analyses/s_liveness.py +44 -6
- angr/analyses/s_propagator.py +40 -29
- angr/analyses/s_reaching_definitions/s_rda_model.py +48 -37
- angr/analyses/s_reaching_definitions/s_rda_view.py +6 -3
- angr/analyses/s_reaching_definitions/s_reaching_definitions.py +21 -21
- angr/analyses/typehoon/simple_solver.py +35 -8
- angr/analyses/typehoon/typehoon.py +3 -1
- angr/analyses/variable_recovery/engine_ail.py +6 -6
- angr/calling_conventions.py +20 -10
- angr/knowledge_plugins/functions/function.py +5 -10
- angr/knowledge_plugins/variables/variable_manager.py +27 -0
- angr/procedures/definitions/__init__.py +3 -10
- angr/procedures/definitions/linux_kernel.py +5 -0
- angr/procedures/definitions/wdk_ntoskrnl.py +2 -0
- angr/procedures/win32_kernel/__fastfail.py +15 -0
- angr/sim_procedure.py +2 -2
- angr/simos/simos.py +14 -10
- angr/simos/windows.py +42 -1
- angr/utils/ail.py +41 -1
- angr/utils/cpp.py +17 -0
- angr/utils/doms.py +149 -0
- angr/utils/library.py +1 -1
- angr/utils/ssa/__init__.py +21 -14
- angr/utils/ssa/vvar_uses_collector.py +2 -2
- angr/utils/types.py +12 -1
- {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/METADATA +7 -7
- {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/RECORD +71 -67
- {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/LICENSE +0 -0
- {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/WHEEL +0 -0
- {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/entry_points.txt +0 -0
- {angr-9.2.141.dist-info → angr-9.2.143.dist-info}/top_level.txt +0 -0
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# pylint:disable=too-many-boolean-expressions
|
|
1
2
|
from __future__ import annotations
|
|
2
3
|
from collections.abc import Iterable
|
|
3
4
|
import logging
|
|
@@ -7,6 +8,7 @@ import ailment
|
|
|
7
8
|
|
|
8
9
|
from angr.calling_conventions import SimRegArg
|
|
9
10
|
from angr.code_location import CodeLocation
|
|
11
|
+
from angr.analyses.decompiler.stack_item import StackItem, StackItemType
|
|
10
12
|
from .optimization_pass import OptimizationPass, OptimizationPassStage
|
|
11
13
|
|
|
12
14
|
|
|
@@ -82,6 +84,14 @@ class RegisterSaveAreaSimplifier(OptimizationPass):
|
|
|
82
84
|
# update it
|
|
83
85
|
self._update_block(old_block, new_block)
|
|
84
86
|
|
|
87
|
+
if updated_blocks:
|
|
88
|
+
# update stack_items
|
|
89
|
+
for data in info.values():
|
|
90
|
+
for stack_offset, _ in data["stored"]:
|
|
91
|
+
self.stack_items[stack_offset] = StackItem(
|
|
92
|
+
stack_offset, self.project.arch.bytes, "regs", StackItemType.SAVED_REGS
|
|
93
|
+
)
|
|
94
|
+
|
|
85
95
|
def _find_registers_stored_on_stack(self) -> list[tuple[int, int, CodeLocation]]:
|
|
86
96
|
first_block = self._get_block(self._func.addr)
|
|
87
97
|
if first_block is None:
|
|
@@ -94,14 +104,26 @@ class RegisterSaveAreaSimplifier(OptimizationPass):
|
|
|
94
104
|
isinstance(stmt, ailment.Stmt.Store)
|
|
95
105
|
and isinstance(stmt.addr, ailment.Expr.StackBaseOffset)
|
|
96
106
|
and isinstance(stmt.addr.offset, int)
|
|
97
|
-
and isinstance(stmt.data, ailment.Expr.VirtualVariable)
|
|
98
|
-
and stmt.data.was_reg
|
|
99
107
|
):
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
108
|
+
if isinstance(stmt.data, ailment.Expr.VirtualVariable) and stmt.data.was_reg:
|
|
109
|
+
# it's storing registers to the stack!
|
|
110
|
+
stack_offset = stmt.addr.offset
|
|
111
|
+
reg_offset = stmt.data.reg_offset
|
|
112
|
+
codeloc = CodeLocation(first_block.addr, idx, block_idx=first_block.idx, ins_addr=stmt.ins_addr)
|
|
113
|
+
results.append((reg_offset, stack_offset, codeloc))
|
|
114
|
+
elif (
|
|
115
|
+
self.project.arch.name == "AMD64"
|
|
116
|
+
and isinstance(stmt.data, ailment.Expr.Convert)
|
|
117
|
+
and isinstance(stmt.data.operand, ailment.Expr.VirtualVariable)
|
|
118
|
+
and stmt.data.operand.was_reg
|
|
119
|
+
and stmt.data.from_bits == 256
|
|
120
|
+
and stmt.data.to_bits == 128
|
|
121
|
+
):
|
|
122
|
+
# storing xmm registers to the stack
|
|
123
|
+
stack_offset = stmt.addr.offset
|
|
124
|
+
reg_offset = stmt.data.operand.reg_offset
|
|
125
|
+
codeloc = CodeLocation(first_block.addr, idx, block_idx=first_block.idx, ins_addr=stmt.ins_addr)
|
|
126
|
+
results.append((reg_offset, stack_offset, codeloc))
|
|
105
127
|
|
|
106
128
|
return results
|
|
107
129
|
|
|
@@ -6,6 +6,7 @@ import logging
|
|
|
6
6
|
import ailment
|
|
7
7
|
|
|
8
8
|
from angr.utils.bits import s2u
|
|
9
|
+
from angr.analyses.decompiler.stack_item import StackItem, StackItemType
|
|
9
10
|
from .optimization_pass import OptimizationPass, OptimizationPassStage
|
|
10
11
|
|
|
11
12
|
|
|
@@ -168,6 +169,11 @@ class StackCanarySimplifier(OptimizationPass):
|
|
|
168
169
|
first_block_copy.statements.pop(stmt_idx)
|
|
169
170
|
self._update_block(first_block, first_block_copy)
|
|
170
171
|
|
|
172
|
+
# update stack_items
|
|
173
|
+
self.stack_items[store_offset] = StackItem(
|
|
174
|
+
store_offset, canary_init_stmt.dst.size, "canary", StackItemType.STACK_CANARY
|
|
175
|
+
)
|
|
176
|
+
|
|
171
177
|
# Done!
|
|
172
178
|
|
|
173
179
|
def _find_canary_init_stmt(self):
|
|
@@ -7,6 +7,7 @@ import ailment
|
|
|
7
7
|
import cle
|
|
8
8
|
|
|
9
9
|
from angr.utils.funcid import is_function_security_check_cookie
|
|
10
|
+
from angr.analyses.decompiler.stack_item import StackItem, StackItemType
|
|
10
11
|
from .optimization_pass import OptimizationPass, OptimizationPassStage
|
|
11
12
|
|
|
12
13
|
|
|
@@ -62,7 +63,9 @@ class WinStackCanarySimplifier(OptimizationPass):
|
|
|
62
63
|
first_block, canary_init_stmt_ids = init_stmts
|
|
63
64
|
canary_init_stmt = first_block.statements[canary_init_stmt_ids[-1]]
|
|
64
65
|
# where is the stack canary stored?
|
|
65
|
-
if not isinstance(canary_init_stmt
|
|
66
|
+
if not isinstance(canary_init_stmt, ailment.Stmt.Store) or not isinstance(
|
|
67
|
+
canary_init_stmt.addr, ailment.Expr.StackBaseOffset
|
|
68
|
+
):
|
|
66
69
|
_l.debug(
|
|
67
70
|
"Unsupported canary storing location %s. Expects an ailment.Expr.StackBaseOffset.",
|
|
68
71
|
canary_init_stmt.addr,
|
|
@@ -143,6 +146,11 @@ class WinStackCanarySimplifier(OptimizationPass):
|
|
|
143
146
|
first_block_copy.statements.pop(stmt_idx)
|
|
144
147
|
self._update_block(first_block, first_block_copy)
|
|
145
148
|
|
|
149
|
+
# update stack_items
|
|
150
|
+
self.stack_items[store_offset] = StackItem(
|
|
151
|
+
store_offset, canary_init_stmt.size, "canary", StackItemType.STACK_CANARY
|
|
152
|
+
)
|
|
153
|
+
|
|
146
154
|
def _find_canary_init_stmt(self):
|
|
147
155
|
first_block = self._get_block(self._func.addr)
|
|
148
156
|
if first_block is None:
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# pylint:disable=too-many-boolean-expressions
|
|
1
2
|
from __future__ import annotations
|
|
2
3
|
from ailment.expression import BinaryOp, Const, Load
|
|
3
4
|
|
|
@@ -20,10 +21,23 @@ class SimplifyPcRelativeLoads(PeepholeOptimizationExprBase):
|
|
|
20
21
|
if expr.op == "Add" and len(expr.operands) == 2 and isinstance(expr.operands[0], Load):
|
|
21
22
|
op0, op1 = expr.operands
|
|
22
23
|
|
|
24
|
+
assert self.project is not None
|
|
25
|
+
if not hasattr(expr, "ins_addr"):
|
|
26
|
+
return expr
|
|
27
|
+
assert expr.ins_addr is not None
|
|
28
|
+
|
|
23
29
|
# check if op1 is PC
|
|
24
|
-
if
|
|
30
|
+
if (
|
|
31
|
+
isinstance(op1, Const)
|
|
32
|
+
and op1.is_int
|
|
33
|
+
and hasattr(expr, "ins_addr")
|
|
34
|
+
and is_pc(self.project, expr.ins_addr, op1.value) # type: ignore
|
|
35
|
+
and isinstance(op0.addr, Const)
|
|
36
|
+
and op0.addr.is_int
|
|
37
|
+
):
|
|
25
38
|
# check if op0.addr points to a read-only section
|
|
26
39
|
addr = op0.addr.value
|
|
40
|
+
assert isinstance(addr, int)
|
|
27
41
|
if is_in_readonly_section(self.project, addr) or is_in_readonly_segment(self.project, addr):
|
|
28
42
|
# found it!
|
|
29
43
|
# do the load first
|
|
@@ -11,7 +11,8 @@ from ailment.statement import ConditionalJump, Jump
|
|
|
11
11
|
from ailment.expression import Const
|
|
12
12
|
|
|
13
13
|
from angr.utils.graph import GraphUtils
|
|
14
|
-
from angr.utils.graph import dfs_back_edges, subgraph_between_nodes, dominates
|
|
14
|
+
from angr.utils.graph import dfs_back_edges, subgraph_between_nodes, dominates
|
|
15
|
+
from angr.utils.doms import IncrementalDominators
|
|
15
16
|
from angr.errors import AngrRuntimeError
|
|
16
17
|
from angr.analyses import Analysis, register_analysis
|
|
17
18
|
from .structuring.structurer_nodes import MultiNode, ConditionNode, IncompleteSwitchCaseHeadStatement
|
|
@@ -115,11 +116,11 @@ class RegionIdentifier(Analysis):
|
|
|
115
116
|
@return: List of addr lists
|
|
116
117
|
"""
|
|
117
118
|
|
|
118
|
-
work_list = [self.region]
|
|
119
|
+
work_list: list[GraphRegion] = [self.region] # type: ignore
|
|
119
120
|
block_only_regions = []
|
|
120
121
|
seen_regions = set()
|
|
121
122
|
while work_list:
|
|
122
|
-
children_regions = []
|
|
123
|
+
children_regions: list[GraphRegion] = []
|
|
123
124
|
for region in work_list:
|
|
124
125
|
children_blocks = []
|
|
125
126
|
for node in region.graph.nodes:
|
|
@@ -234,7 +235,7 @@ class RegionIdentifier(Analysis):
|
|
|
234
235
|
break
|
|
235
236
|
|
|
236
237
|
def _find_loop_headers(self, graph: networkx.DiGraph) -> list:
|
|
237
|
-
heads = {t for _, t in dfs_back_edges(graph, self._start_node)}
|
|
238
|
+
heads = list({t for _, t in dfs_back_edges(graph, self._start_node)})
|
|
238
239
|
return GraphUtils.quasi_topological_sort_nodes(graph, heads)
|
|
239
240
|
|
|
240
241
|
def _find_initial_loop_nodes(self, graph: networkx.DiGraph, head):
|
|
@@ -392,7 +393,7 @@ class RegionIdentifier(Analysis):
|
|
|
392
393
|
|
|
393
394
|
while True:
|
|
394
395
|
for node in networkx.dfs_postorder_nodes(graph):
|
|
395
|
-
preds = graph.predecessors(node)
|
|
396
|
+
preds = list(graph.predecessors(node))
|
|
396
397
|
if len(preds) == 1:
|
|
397
398
|
# merge the two nodes
|
|
398
399
|
self._absorb_node(graph, preds[0], node)
|
|
@@ -473,7 +474,7 @@ class RegionIdentifier(Analysis):
|
|
|
473
474
|
head = next(iter(n for n in subgraph.nodes() if n.addr == head.addr))
|
|
474
475
|
region.head = head
|
|
475
476
|
|
|
476
|
-
if len(graph
|
|
477
|
+
if len(graph) == 1 and isinstance(next(iter(graph.nodes())), GraphRegion):
|
|
477
478
|
return next(iter(graph.nodes()))
|
|
478
479
|
# create a large graph region
|
|
479
480
|
new_head = self._get_start_node(graph)
|
|
@@ -491,6 +492,7 @@ class RegionIdentifier(Analysis):
|
|
|
491
492
|
l.debug("Initial loop nodes %s", self._dbg_block_list(initial_loop_nodes))
|
|
492
493
|
|
|
493
494
|
# Make sure no other loops are contained in the current loop
|
|
495
|
+
assert self._loop_headers is not None
|
|
494
496
|
if {n for n in initial_loop_nodes if n.addr != head.addr}.intersection(self._loop_headers):
|
|
495
497
|
return None
|
|
496
498
|
|
|
@@ -535,7 +537,7 @@ class RegionIdentifier(Analysis):
|
|
|
535
537
|
region = self._abstract_cyclic_region(
|
|
536
538
|
graph, refined_loop_nodes, head, normal_entries, abnormal_entries, normal_exit_node, abnormal_exit_nodes
|
|
537
539
|
)
|
|
538
|
-
if len(region.successors) > 1 and self._force_loop_single_exit:
|
|
540
|
+
if region.successors is not None and len(region.successors) > 1 and self._force_loop_single_exit:
|
|
539
541
|
# multi-successor region. refinement is required
|
|
540
542
|
self._refine_loop_successors_to_guarded_successors(region, graph)
|
|
541
543
|
|
|
@@ -705,23 +707,20 @@ class RegionIdentifier(Analysis):
|
|
|
705
707
|
else:
|
|
706
708
|
dummy_endnode = None
|
|
707
709
|
|
|
708
|
-
#
|
|
709
|
-
doms =
|
|
710
|
-
|
|
711
|
-
# compute post-dominator tree
|
|
712
|
-
inverted_graph = shallow_reverse(graph_copy)
|
|
713
|
-
postdoms = networkx.immediate_dominators(inverted_graph, endnodes[0])
|
|
714
|
-
|
|
715
|
-
# dominance frontiers
|
|
716
|
-
df = networkx.algorithms.dominance_frontiers(graph_copy, head)
|
|
710
|
+
# dominators and post-dominators, computed incrementally
|
|
711
|
+
doms = IncrementalDominators(graph_copy, head)
|
|
712
|
+
postdoms = IncrementalDominators(graph_copy, endnodes[0], post=True)
|
|
717
713
|
|
|
718
714
|
# visit the nodes in post-order
|
|
719
|
-
|
|
715
|
+
region_created = False
|
|
716
|
+
for node in list(networkx.dfs_postorder_nodes(graph_copy, source=head)):
|
|
720
717
|
if node is dummy_endnode:
|
|
721
718
|
# skip the dummy endnode
|
|
722
719
|
continue
|
|
723
720
|
if cyclic and node is head:
|
|
724
721
|
continue
|
|
722
|
+
if node not in graph_copy:
|
|
723
|
+
continue
|
|
725
724
|
|
|
726
725
|
out_degree = graph_copy.out_degree[node]
|
|
727
726
|
if out_degree == 0:
|
|
@@ -740,10 +739,10 @@ class RegionIdentifier(Analysis):
|
|
|
740
739
|
|
|
741
740
|
# test if this node is an entry to a single-entry, single-successor region
|
|
742
741
|
levels = 0
|
|
743
|
-
postdom_node = postdoms.
|
|
742
|
+
postdom_node = postdoms.idom(node)
|
|
744
743
|
while postdom_node is not None:
|
|
745
744
|
if (node, postdom_node) not in failed_region_attempts and self._check_region(
|
|
746
|
-
graph_copy, node, postdom_node, doms
|
|
745
|
+
graph_copy, node, postdom_node, doms
|
|
747
746
|
):
|
|
748
747
|
frontier = [postdom_node]
|
|
749
748
|
region = self._compute_region(
|
|
@@ -752,6 +751,8 @@ class RegionIdentifier(Analysis):
|
|
|
752
751
|
if region is not None:
|
|
753
752
|
# update region.graph_with_successors
|
|
754
753
|
if secondary_graph is not None:
|
|
754
|
+
assert region.graph_with_successors is not None
|
|
755
|
+
assert region.successors is not None
|
|
755
756
|
if self._complete_successors:
|
|
756
757
|
for nn in list(region.graph_with_successors.nodes):
|
|
757
758
|
original_successors = secondary_graph.successors(nn)
|
|
@@ -782,52 +783,75 @@ class RegionIdentifier(Analysis):
|
|
|
782
783
|
graph, region, frontier, dummy_endnode=dummy_endnode, secondary_graph=secondary_graph
|
|
783
784
|
)
|
|
784
785
|
# assert dummy_endnode not in graph
|
|
785
|
-
|
|
786
|
+
region_created = True
|
|
787
|
+
# we created a new region to replace one or more nodes in the graph.
|
|
788
|
+
replaced_nodes = set(region.graph)
|
|
789
|
+
# update graph_copy; doms and postdoms are updated as well because they hold references to
|
|
790
|
+
# graph_copy internally.
|
|
791
|
+
if graph_copy is not graph:
|
|
792
|
+
self._update_graph(graph_copy, region, replaced_nodes)
|
|
793
|
+
doms.graph_updated(region, replaced_nodes, region.head)
|
|
794
|
+
postdoms.graph_updated(region, replaced_nodes, region.head)
|
|
795
|
+
# break out of the inner loop
|
|
796
|
+
break
|
|
786
797
|
|
|
787
798
|
failed_region_attempts.add((node, postdom_node))
|
|
788
|
-
if not dominates(
|
|
799
|
+
if not doms.dominates(node, postdom_node):
|
|
789
800
|
break
|
|
790
|
-
if postdom_node is postdoms.
|
|
801
|
+
if postdom_node is postdoms.idom(postdom_node):
|
|
791
802
|
break
|
|
792
|
-
postdom_node = postdoms.
|
|
803
|
+
postdom_node = postdoms.idom(postdom_node)
|
|
793
804
|
levels += 1
|
|
794
805
|
# l.debug("Walked back %d levels in postdom tree and did not find anything for %r. Next.", levels, node)
|
|
795
806
|
|
|
796
|
-
return
|
|
807
|
+
return region_created
|
|
797
808
|
|
|
798
809
|
@staticmethod
|
|
799
|
-
def
|
|
800
|
-
|
|
810
|
+
def _update_graph(graph: networkx.DiGraph, new_region, replaced_nodes: set) -> None:
|
|
811
|
+
region_in_edges = RegionIdentifier._region_in_edges(graph, new_region, data=True)
|
|
812
|
+
region_out_edges = RegionIdentifier._region_out_edges(graph, new_region, data=True)
|
|
813
|
+
for node in replaced_nodes:
|
|
814
|
+
graph.remove_node(node)
|
|
815
|
+
graph.add_node(new_region)
|
|
816
|
+
for src, _, data in region_in_edges:
|
|
817
|
+
graph.add_edge(src, new_region, **data)
|
|
818
|
+
for _, dst, data in region_out_edges:
|
|
819
|
+
graph.add_edge(new_region, dst, **data)
|
|
801
820
|
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
:param df:
|
|
807
|
-
:return:
|
|
821
|
+
@staticmethod
|
|
822
|
+
def _check_region(graph, start_node, end_node, doms) -> bool:
|
|
823
|
+
"""
|
|
824
|
+
Determine the graph slice between start_node and end_node forms a good region.
|
|
808
825
|
"""
|
|
809
826
|
|
|
810
827
|
# if the exit node is the header of a loop that contains the start node, the dominance frontier should only
|
|
811
828
|
# contain the exit node.
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
829
|
+
start_node_frontier = None
|
|
830
|
+
end_node_frontier = None
|
|
831
|
+
|
|
832
|
+
if not doms.dominates(start_node, end_node):
|
|
833
|
+
start_node_frontier = doms.df(start_node)
|
|
834
|
+
for node in start_node_frontier:
|
|
815
835
|
if node is not start_node and node is not end_node:
|
|
816
836
|
return False
|
|
817
837
|
|
|
818
838
|
# no edges should enter the region.
|
|
819
|
-
|
|
820
|
-
|
|
839
|
+
end_node_frontier = doms.df(end_node)
|
|
840
|
+
for node in end_node_frontier:
|
|
841
|
+
if doms.dominates(start_node, node) and node is not end_node:
|
|
821
842
|
return False
|
|
822
843
|
|
|
844
|
+
if start_node_frontier is None:
|
|
845
|
+
start_node_frontier = doms.df(start_node)
|
|
846
|
+
|
|
823
847
|
# no edges should leave the region.
|
|
824
|
-
for node in
|
|
848
|
+
for node in start_node_frontier:
|
|
825
849
|
if node is start_node or node is end_node:
|
|
826
850
|
continue
|
|
827
|
-
if node not in
|
|
851
|
+
if node not in end_node_frontier:
|
|
828
852
|
return False
|
|
829
853
|
for pred in graph.predecessors(node):
|
|
830
|
-
if dominates(
|
|
854
|
+
if doms.dominates(start_node, pred) and not doms.dominates(end_node, pred):
|
|
831
855
|
return False
|
|
832
856
|
|
|
833
857
|
return True
|
|
@@ -978,14 +1002,13 @@ class RegionIdentifier(Analysis):
|
|
|
978
1002
|
subgraph_with_exits.add_edge(src, dst)
|
|
979
1003
|
region.graph = subgraph
|
|
980
1004
|
region.graph_with_successors = subgraph_with_exits
|
|
981
|
-
if normal_exit_node is not None
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
region.successors += list(abnormal_exit_nodes)
|
|
1005
|
+
succs = [normal_exit_node] if normal_exit_node is not None else []
|
|
1006
|
+
succs += list(abnormal_exit_nodes)
|
|
1007
|
+
succs = sorted(set(succs), key=lambda x: x.addr)
|
|
1008
|
+
region.successors = set(succs)
|
|
986
1009
|
|
|
987
|
-
for succ_0 in
|
|
988
|
-
for succ_1 in
|
|
1010
|
+
for succ_0 in succs:
|
|
1011
|
+
for succ_1 in succs:
|
|
989
1012
|
if succ_0 is not succ_1 and graph.has_edge(succ_0, succ_1):
|
|
990
1013
|
region.graph_with_successors.add_edge(succ_0, succ_1)
|
|
991
1014
|
|
|
@@ -186,6 +186,14 @@ class SequenceWalker:
|
|
|
186
186
|
new_condition = (
|
|
187
187
|
self._handle(node.condition, parent=node, label="condition") if node.condition is not None else None
|
|
188
188
|
)
|
|
189
|
+
|
|
190
|
+
# note that initializer and iterator are both statements, so they can return empty tuples
|
|
191
|
+
# TODO: Handle the case where multiple statements are returned
|
|
192
|
+
if new_initializer == ():
|
|
193
|
+
new_initializer = None
|
|
194
|
+
if new_iterator == ():
|
|
195
|
+
new_iterator = None
|
|
196
|
+
|
|
189
197
|
seq_node = self._handle(node.sequence_node, parent=node, label="body", index=0)
|
|
190
198
|
if seq_node is not None or new_initializer is not None or new_iterator is not None or new_condition is not None:
|
|
191
199
|
return LoopNode(
|
|
@@ -14,10 +14,10 @@ from ailment.statement import Assignment, Label
|
|
|
14
14
|
from angr.code_location import CodeLocation
|
|
15
15
|
from angr.analyses import ForwardAnalysis
|
|
16
16
|
from angr.analyses.forward_analysis import FunctionGraphVisitor
|
|
17
|
+
from angr.utils.ail import is_head_controlled_loop_block
|
|
17
18
|
from .rewriting_engine import SimEngineSSARewriting, DefExprType, AT
|
|
18
19
|
from .rewriting_state import RewritingState
|
|
19
20
|
|
|
20
|
-
|
|
21
21
|
l = logging.getLogger(__name__)
|
|
22
22
|
|
|
23
23
|
|
|
@@ -71,6 +71,14 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, ailment.Block, object, o
|
|
|
71
71
|
self._visited_blocks: set[Any] = set()
|
|
72
72
|
self.out_blocks = {}
|
|
73
73
|
self.out_states = {}
|
|
74
|
+
# loop_states stores states at the beginning of a loop block *after a loop iteration*, where the block is the
|
|
75
|
+
# following:
|
|
76
|
+
# 0x4036df | t4 = (rcx<8> == 0x0<64>)
|
|
77
|
+
# 0x4036df | if (t4) { Goto 0x4036e2<64> } else { Goto 0x4036df<64> }
|
|
78
|
+
# 0x4036df | STORE(addr=t3, data=t2, size=8, endness=Iend_LE, guard=None)
|
|
79
|
+
# 0x4036df | rdi<8> = t8
|
|
80
|
+
#
|
|
81
|
+
self.head_controlled_loop_outstates = {}
|
|
74
82
|
|
|
75
83
|
self._analyze()
|
|
76
84
|
|
|
@@ -177,8 +185,12 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, ailment.Block, object, o
|
|
|
177
185
|
else:
|
|
178
186
|
node.statements = node.statements[:idx] + phi_stmts + node.statements[idx:]
|
|
179
187
|
|
|
180
|
-
def _reg_predicate(self, node_, *, reg_offset: int, reg_size: int) -> tuple[bool, Any]:
|
|
181
|
-
out_state: RewritingState =
|
|
188
|
+
def _reg_predicate(self, node_: Block, *, reg_offset: int, reg_size: int) -> tuple[bool, Any]:
|
|
189
|
+
out_state: RewritingState = (
|
|
190
|
+
self.head_controlled_loop_outstates[(node_.addr, node_.idx)]
|
|
191
|
+
if is_head_controlled_loop_block(node_)
|
|
192
|
+
else self.out_states[(node_.addr, node_.idx)]
|
|
193
|
+
)
|
|
182
194
|
if reg_offset in out_state.registers and reg_size in out_state.registers[reg_offset]:
|
|
183
195
|
existing_var = out_state.registers[reg_offset][reg_size]
|
|
184
196
|
if existing_var is None:
|
|
@@ -189,8 +201,12 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, ailment.Block, object, o
|
|
|
189
201
|
return True, vvar
|
|
190
202
|
return False, None
|
|
191
203
|
|
|
192
|
-
def _stack_predicate(self, node_, *, stack_offset: int, stackvar_size: int) -> tuple[bool, Any]:
|
|
193
|
-
out_state: RewritingState =
|
|
204
|
+
def _stack_predicate(self, node_: Block, *, stack_offset: int, stackvar_size: int) -> tuple[bool, Any]:
|
|
205
|
+
out_state: RewritingState = (
|
|
206
|
+
self.head_controlled_loop_outstates[(node_.addr, node_.idx)]
|
|
207
|
+
if is_head_controlled_loop_block(node_)
|
|
208
|
+
else self.out_states[(node_.addr, node_.idx)]
|
|
209
|
+
)
|
|
194
210
|
if stack_offset in out_state.stackvars and stackvar_size in out_state.stackvars[stack_offset]:
|
|
195
211
|
existing_var = out_state.stackvars[stack_offset][stackvar_size]
|
|
196
212
|
if existing_var is None:
|
|
@@ -262,18 +278,32 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, ailment.Block, object, o
|
|
|
262
278
|
)
|
|
263
279
|
|
|
264
280
|
self._visited_blocks.add(block_key)
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
281
|
+
# get the output state (which is the input state for the successor node)
|
|
282
|
+
# if head_controlled_loop_outstate is set, then it is the output state of the successor node; in this case, the
|
|
283
|
+
# input state for the head-controlled loop block itself is out.state.
|
|
284
|
+
# otherwise (if head_controlled_loop_outstate is not set), engine.state is the input state of the successor
|
|
285
|
+
# node.
|
|
286
|
+
if engine.head_controlled_loop_outstate is None:
|
|
287
|
+
# this is a normal block
|
|
288
|
+
out_state = state
|
|
289
|
+
else:
|
|
290
|
+
# this is a head-controlled loop block
|
|
291
|
+
out_state = engine.head_controlled_loop_outstate
|
|
292
|
+
self.head_controlled_loop_outstates[block_key] = state
|
|
293
|
+
self.out_states[block_key] = out_state
|
|
294
|
+
# the final block is always in state
|
|
295
|
+
out_block = state.out_block
|
|
296
|
+
|
|
297
|
+
if out_block is not None:
|
|
298
|
+
assert out_block.addr == block.addr
|
|
299
|
+
|
|
300
|
+
if self.out_blocks.get(block_key, None) == out_block:
|
|
301
|
+
return True, out_state
|
|
302
|
+
self.out_blocks[block_key] = out_block
|
|
303
|
+
out_state.out_block = None
|
|
304
|
+
return True, out_state
|
|
305
|
+
|
|
306
|
+
return True, out_state
|
|
277
307
|
|
|
278
308
|
def _intra_analysis(self):
|
|
279
309
|
pass
|
|
@@ -4,6 +4,7 @@ from typing import Literal
|
|
|
4
4
|
import logging
|
|
5
5
|
|
|
6
6
|
from archinfo import Endness
|
|
7
|
+
from ailment.block import Block
|
|
7
8
|
from ailment.manager import Manager
|
|
8
9
|
from ailment.statement import Statement, Assignment, Store, Call, Return, ConditionalJump, DirtyStatement, Jump
|
|
9
10
|
from ailment.expression import (
|
|
@@ -70,6 +71,7 @@ class SimEngineSSARewriting(
|
|
|
70
71
|
self.phiid_to_loc = phiid_to_loc
|
|
71
72
|
self.rewrite_tmps = rewrite_tmps
|
|
72
73
|
self.ail_manager = ail_manager
|
|
74
|
+
self.head_controlled_loop_outstate: RewritingState | None = None
|
|
73
75
|
|
|
74
76
|
self.secondary_stackvars: set[int] = set()
|
|
75
77
|
|
|
@@ -87,6 +89,12 @@ class SimEngineSSARewriting(
|
|
|
87
89
|
# Handlers
|
|
88
90
|
#
|
|
89
91
|
|
|
92
|
+
def process(
|
|
93
|
+
self, state: RewritingState, *, block: Block | None = None, whitelist: set[int] | None = None, **kwargs
|
|
94
|
+
) -> None:
|
|
95
|
+
self.head_controlled_loop_outstate = None
|
|
96
|
+
super().process(state, block=block, whitelist=whitelist, **kwargs)
|
|
97
|
+
|
|
90
98
|
def _top(self, bits):
|
|
91
99
|
assert False, "Unreachable"
|
|
92
100
|
|
|
@@ -236,6 +244,11 @@ class SimEngineSSARewriting(
|
|
|
236
244
|
new_true_target = self._expr(stmt.true_target) if stmt.true_target is not None else None
|
|
237
245
|
new_false_target = self._expr(stmt.false_target) if stmt.false_target is not None else None
|
|
238
246
|
|
|
247
|
+
if self.stmt_idx != len(self.block.statements) - 1:
|
|
248
|
+
# the conditional jump is in the middle of the block (e.g., the block generated from lifting rep stosq).
|
|
249
|
+
# we need to make a copy of the state and use the state of this point in its successor
|
|
250
|
+
self.head_controlled_loop_outstate = self.state.copy()
|
|
251
|
+
|
|
239
252
|
if new_cond is not None or new_true_target is not None or new_false_target is not None:
|
|
240
253
|
return ConditionalJump(
|
|
241
254
|
stmt.idx,
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class StackItemType(Enum):
|
|
7
|
+
"""
|
|
8
|
+
Enum for the type of stack items.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
UNKNOWN = 0
|
|
12
|
+
SAVED_BP = 1
|
|
13
|
+
SAVED_REGS = 2
|
|
14
|
+
ARGUMENT = 3
|
|
15
|
+
RET_ADDR = 4
|
|
16
|
+
STACK_CANARY = 5
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class StackItem:
|
|
20
|
+
"""
|
|
21
|
+
A stack item describes a piece of data that is stored on the stack at a certain offset (usually negative).
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
offset: int
|
|
25
|
+
size: int
|
|
26
|
+
name: str
|
|
27
|
+
item_type: StackItemType
|
|
28
|
+
|
|
29
|
+
def __init__(self, offset: int, size: int, name: str, item_type: StackItemType = StackItemType.UNKNOWN):
|
|
30
|
+
self.offset = offset
|
|
31
|
+
self.size = size
|
|
32
|
+
self.name = name
|
|
33
|
+
self.item_type = item_type
|
|
34
|
+
|
|
35
|
+
def __repr__(self):
|
|
36
|
+
return f"<StackItem {self.name} {self.item_type!s} at {self.offset:#x} ({self.size}b)>"
|
|
@@ -40,7 +40,7 @@ from angr.sim_variable import SimVariable, SimTemporaryVariable, SimStackVariabl
|
|
|
40
40
|
from angr.utils.constants import is_alignment_mask
|
|
41
41
|
from angr.utils.library import get_cpp_function_name
|
|
42
42
|
from angr.utils.loader import is_in_readonly_segment, is_in_readonly_section
|
|
43
|
-
from angr.utils.types import unpack_typeref,
|
|
43
|
+
from angr.utils.types import unpack_typeref, unpack_pointer_and_array
|
|
44
44
|
from angr.analyses.decompiler.utils import structured_node_is_simple_return
|
|
45
45
|
from angr.errors import UnsupportedNodeTypeError, AngrRuntimeError
|
|
46
46
|
from angr.knowledge_plugins.cfg.memory_data import MemoryData, MemoryDataSort
|
|
@@ -539,6 +539,8 @@ class CFunction(CConstruct): # pylint:disable=abstract-method
|
|
|
539
539
|
|
|
540
540
|
if self.codegen.show_externs and self.codegen.cexterns:
|
|
541
541
|
for v in sorted(self.codegen.cexterns, key=lambda v: str(v.variable.name)):
|
|
542
|
+
if v.variable not in self.variables_in_use:
|
|
543
|
+
continue
|
|
542
544
|
varname = v.c_repr() if v.type is None else v.variable.name
|
|
543
545
|
yield "extern ", None
|
|
544
546
|
yield from type_to_c_repr_chunks(v.type, name=varname, name_type=v, full=False)
|
|
@@ -2581,7 +2583,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
2581
2583
|
|
|
2582
2584
|
# TODO store extern fallback size somewhere lol
|
|
2583
2585
|
self.cexterns = {
|
|
2584
|
-
self._variable(v, 1)
|
|
2586
|
+
self._variable(v, 1, mark_used=False)
|
|
2585
2587
|
for v in self.externs
|
|
2586
2588
|
if v not in self._inlined_strings and v not in self._function_pointers
|
|
2587
2589
|
}
|
|
@@ -2698,7 +2700,9 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
2698
2700
|
return _mapping.get(n)(signed=signed).with_arch(self.project.arch)
|
|
2699
2701
|
return SimTypeNum(n, signed=signed).with_arch(self.project.arch)
|
|
2700
2702
|
|
|
2701
|
-
def _variable(
|
|
2703
|
+
def _variable(
|
|
2704
|
+
self, variable: SimVariable, fallback_type_size: int | None, vvar_id: int | None = None, mark_used: bool = True
|
|
2705
|
+
) -> CVariable:
|
|
2702
2706
|
# TODO: we need to fucking make sure that variable recovery and type inference actually generates a size
|
|
2703
2707
|
# TODO: for each variable it links into the fucking ail. then we can remove fallback_type_size.
|
|
2704
2708
|
unified = self._variable_kb.variables[self._func.addr].unified_variable(variable)
|
|
@@ -2710,7 +2714,8 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
2710
2714
|
(fallback_type_size or self.project.arch.bytes) * self.project.arch.byte_width
|
|
2711
2715
|
)
|
|
2712
2716
|
cvar = CVariable(variable, unified_variable=unified, variable_type=variable_type, codegen=self, vvar_id=vvar_id)
|
|
2713
|
-
|
|
2717
|
+
if mark_used:
|
|
2718
|
+
self._variables_in_use[variable] = cvar
|
|
2714
2719
|
return cvar
|
|
2715
2720
|
|
|
2716
2721
|
def _get_variable_reference(self, cvar: CVariable) -> CExpression:
|
|
@@ -2776,7 +2781,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
2776
2781
|
# expr must express a POINTER to the base
|
|
2777
2782
|
# returns a value which has a simtype of data_type as if it were dereferenced out of expr
|
|
2778
2783
|
data_type = unpack_typeref(data_type)
|
|
2779
|
-
base_type = unpack_typeref(
|
|
2784
|
+
base_type = unpack_typeref(unpack_pointer_and_array(expr.type))
|
|
2780
2785
|
if base_type is None:
|
|
2781
2786
|
# well, not much we can do
|
|
2782
2787
|
if data_type is None:
|
|
@@ -2899,7 +2904,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
2899
2904
|
) -> CExpression:
|
|
2900
2905
|
# same rule as _access_constant_offset wrt pointer expressions
|
|
2901
2906
|
data_type = unpack_typeref(data_type)
|
|
2902
|
-
base_type =
|
|
2907
|
+
base_type = unpack_pointer_and_array(expr.type)
|
|
2903
2908
|
if base_type is None:
|
|
2904
2909
|
# use the fallback from above
|
|
2905
2910
|
return self._access_constant_offset(expr, 0, data_type, lvalue, renegotiate_type)
|
|
@@ -2959,7 +2964,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
2959
2964
|
kernel = None
|
|
2960
2965
|
while i < len(terms):
|
|
2961
2966
|
c, t = terms[i]
|
|
2962
|
-
if isinstance(unpack_typeref(t.type), SimTypePointer):
|
|
2967
|
+
if isinstance(unpack_typeref(t.type), (SimTypePointer, SimTypeArray)):
|
|
2963
2968
|
if kernel is not None:
|
|
2964
2969
|
l.warning("Summing two different pointers together. Uh oh!")
|
|
2965
2970
|
return bail_out()
|
|
@@ -2982,7 +2987,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
2982
2987
|
|
|
2983
2988
|
# suffering.
|
|
2984
2989
|
while terms:
|
|
2985
|
-
kernel_type = unpack_typeref(
|
|
2990
|
+
kernel_type = unpack_typeref(unpack_pointer_and_array(kernel.type))
|
|
2986
2991
|
assert kernel_type
|
|
2987
2992
|
|
|
2988
2993
|
if kernel_type.size is None:
|
|
@@ -3049,7 +3054,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
3049
3054
|
kernel = inner.operand
|
|
3050
3055
|
else:
|
|
3051
3056
|
kernel = CUnaryOp("Reference", inner, codegen=self)
|
|
3052
|
-
if unpack_typeref(
|
|
3057
|
+
if unpack_typeref(unpack_pointer_and_array(kernel.type)) == kernel_type:
|
|
3053
3058
|
# we are not making progress
|
|
3054
3059
|
pass
|
|
3055
3060
|
else:
|