angr 9.2.141__py3-none-win_amd64.whl → 9.2.142__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/calling_convention/calling_convention.py +17 -3
- angr/analyses/cfg/cfg_base.py +38 -4
- angr/analyses/cfg/cfg_fast.py +23 -7
- angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +4 -0
- angr/analyses/class_identifier.py +8 -7
- angr/analyses/complete_calling_conventions.py +1 -1
- angr/analyses/decompiler/ail_simplifier.py +61 -46
- angr/analyses/decompiler/clinic.py +73 -5
- angr/analyses/decompiler/condition_processor.py +7 -7
- angr/analyses/decompiler/decompilation_cache.py +2 -1
- angr/analyses/decompiler/decompiler.py +10 -2
- angr/analyses/decompiler/dephication/graph_vvar_mapping.py +4 -6
- angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +8 -2
- angr/analyses/decompiler/optimization_passes/condition_constprop.py +63 -34
- angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +1 -1
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +2 -0
- angr/analyses/decompiler/optimization_passes/register_save_area_simplifier.py +29 -7
- angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +6 -0
- angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +9 -1
- angr/analyses/decompiler/region_identifier.py +70 -47
- angr/analyses/decompiler/ssailification/rewriting.py +47 -17
- angr/analyses/decompiler/ssailification/rewriting_engine.py +13 -0
- angr/analyses/decompiler/stack_item.py +36 -0
- angr/analyses/decompiler/structured_codegen/c.py +14 -9
- angr/analyses/decompiler/structuring/phoenix.py +3 -3
- angr/analyses/find_objects_static.py +2 -1
- angr/analyses/reaching_definitions/engine_vex.py +13 -0
- angr/analyses/reaching_definitions/function_handler.py +24 -10
- angr/analyses/reaching_definitions/function_handler_library/stdio.py +1 -0
- angr/analyses/reaching_definitions/function_handler_library/stdlib.py +45 -12
- angr/analyses/reaching_definitions/function_handler_library/string.py +77 -21
- angr/analyses/reaching_definitions/function_handler_library/unistd.py +21 -1
- angr/analyses/reaching_definitions/rd_state.py +11 -7
- angr/analyses/s_liveness.py +44 -6
- angr/analyses/s_reaching_definitions/s_rda_model.py +4 -2
- angr/analyses/typehoon/simple_solver.py +35 -8
- angr/analyses/typehoon/typehoon.py +3 -1
- angr/calling_conventions.py +2 -2
- angr/knowledge_plugins/functions/function.py +5 -10
- angr/knowledge_plugins/variables/variable_manager.py +27 -0
- angr/lib/angr_native.dll +0 -0
- angr/procedures/definitions/__init__.py +3 -10
- angr/procedures/definitions/wdk_ntoskrnl.py +2 -0
- angr/procedures/win32_kernel/__fastfail.py +15 -0
- angr/sim_procedure.py +2 -2
- angr/simos/simos.py +14 -10
- angr/simos/windows.py +42 -1
- angr/utils/ail.py +41 -1
- angr/utils/cpp.py +17 -0
- angr/utils/doms.py +142 -0
- angr/utils/library.py +1 -1
- angr/utils/types.py +12 -1
- {angr-9.2.141.dist-info → angr-9.2.142.dist-info}/METADATA +7 -7
- {angr-9.2.141.dist-info → angr-9.2.142.dist-info}/RECORD +59 -55
- {angr-9.2.141.dist-info → angr-9.2.142.dist-info}/LICENSE +0 -0
- {angr-9.2.141.dist-info → angr-9.2.142.dist-info}/WHEEL +0 -0
- {angr-9.2.141.dist-info → angr-9.2.142.dist-info}/entry_points.txt +0 -0
- {angr-9.2.141.dist-info → angr-9.2.142.dist-info}/top_level.txt +0 -0
|
@@ -11,7 +11,8 @@ from ailment.statement import ConditionalJump, Jump
|
|
|
11
11
|
from ailment.expression import Const
|
|
12
12
|
|
|
13
13
|
from angr.utils.graph import GraphUtils
|
|
14
|
-
from angr.utils.graph import dfs_back_edges, subgraph_between_nodes, dominates
|
|
14
|
+
from angr.utils.graph import dfs_back_edges, subgraph_between_nodes, dominates
|
|
15
|
+
from angr.utils.doms import IncrementalDominators
|
|
15
16
|
from angr.errors import AngrRuntimeError
|
|
16
17
|
from angr.analyses import Analysis, register_analysis
|
|
17
18
|
from .structuring.structurer_nodes import MultiNode, ConditionNode, IncompleteSwitchCaseHeadStatement
|
|
@@ -115,11 +116,11 @@ class RegionIdentifier(Analysis):
|
|
|
115
116
|
@return: List of addr lists
|
|
116
117
|
"""
|
|
117
118
|
|
|
118
|
-
work_list = [self.region]
|
|
119
|
+
work_list: list[GraphRegion] = [self.region] # type: ignore
|
|
119
120
|
block_only_regions = []
|
|
120
121
|
seen_regions = set()
|
|
121
122
|
while work_list:
|
|
122
|
-
children_regions = []
|
|
123
|
+
children_regions: list[GraphRegion] = []
|
|
123
124
|
for region in work_list:
|
|
124
125
|
children_blocks = []
|
|
125
126
|
for node in region.graph.nodes:
|
|
@@ -234,7 +235,7 @@ class RegionIdentifier(Analysis):
|
|
|
234
235
|
break
|
|
235
236
|
|
|
236
237
|
def _find_loop_headers(self, graph: networkx.DiGraph) -> list:
|
|
237
|
-
heads = {t for _, t in dfs_back_edges(graph, self._start_node)}
|
|
238
|
+
heads = list({t for _, t in dfs_back_edges(graph, self._start_node)})
|
|
238
239
|
return GraphUtils.quasi_topological_sort_nodes(graph, heads)
|
|
239
240
|
|
|
240
241
|
def _find_initial_loop_nodes(self, graph: networkx.DiGraph, head):
|
|
@@ -392,7 +393,7 @@ class RegionIdentifier(Analysis):
|
|
|
392
393
|
|
|
393
394
|
while True:
|
|
394
395
|
for node in networkx.dfs_postorder_nodes(graph):
|
|
395
|
-
preds = graph.predecessors(node)
|
|
396
|
+
preds = list(graph.predecessors(node))
|
|
396
397
|
if len(preds) == 1:
|
|
397
398
|
# merge the two nodes
|
|
398
399
|
self._absorb_node(graph, preds[0], node)
|
|
@@ -473,7 +474,7 @@ class RegionIdentifier(Analysis):
|
|
|
473
474
|
head = next(iter(n for n in subgraph.nodes() if n.addr == head.addr))
|
|
474
475
|
region.head = head
|
|
475
476
|
|
|
476
|
-
if len(graph
|
|
477
|
+
if len(graph) == 1 and isinstance(next(iter(graph.nodes())), GraphRegion):
|
|
477
478
|
return next(iter(graph.nodes()))
|
|
478
479
|
# create a large graph region
|
|
479
480
|
new_head = self._get_start_node(graph)
|
|
@@ -491,6 +492,7 @@ class RegionIdentifier(Analysis):
|
|
|
491
492
|
l.debug("Initial loop nodes %s", self._dbg_block_list(initial_loop_nodes))
|
|
492
493
|
|
|
493
494
|
# Make sure no other loops are contained in the current loop
|
|
495
|
+
assert self._loop_headers is not None
|
|
494
496
|
if {n for n in initial_loop_nodes if n.addr != head.addr}.intersection(self._loop_headers):
|
|
495
497
|
return None
|
|
496
498
|
|
|
@@ -535,7 +537,7 @@ class RegionIdentifier(Analysis):
|
|
|
535
537
|
region = self._abstract_cyclic_region(
|
|
536
538
|
graph, refined_loop_nodes, head, normal_entries, abnormal_entries, normal_exit_node, abnormal_exit_nodes
|
|
537
539
|
)
|
|
538
|
-
if len(region.successors) > 1 and self._force_loop_single_exit:
|
|
540
|
+
if region.successors is not None and len(region.successors) > 1 and self._force_loop_single_exit:
|
|
539
541
|
# multi-successor region. refinement is required
|
|
540
542
|
self._refine_loop_successors_to_guarded_successors(region, graph)
|
|
541
543
|
|
|
@@ -705,23 +707,20 @@ class RegionIdentifier(Analysis):
|
|
|
705
707
|
else:
|
|
706
708
|
dummy_endnode = None
|
|
707
709
|
|
|
708
|
-
#
|
|
709
|
-
doms =
|
|
710
|
-
|
|
711
|
-
# compute post-dominator tree
|
|
712
|
-
inverted_graph = shallow_reverse(graph_copy)
|
|
713
|
-
postdoms = networkx.immediate_dominators(inverted_graph, endnodes[0])
|
|
714
|
-
|
|
715
|
-
# dominance frontiers
|
|
716
|
-
df = networkx.algorithms.dominance_frontiers(graph_copy, head)
|
|
710
|
+
# dominators and post-dominators, computed incrementally
|
|
711
|
+
doms = IncrementalDominators(graph_copy, head)
|
|
712
|
+
postdoms = IncrementalDominators(graph_copy, endnodes[0], post=True)
|
|
717
713
|
|
|
718
714
|
# visit the nodes in post-order
|
|
719
|
-
|
|
715
|
+
region_created = False
|
|
716
|
+
for node in list(networkx.dfs_postorder_nodes(graph_copy, source=head)):
|
|
720
717
|
if node is dummy_endnode:
|
|
721
718
|
# skip the dummy endnode
|
|
722
719
|
continue
|
|
723
720
|
if cyclic and node is head:
|
|
724
721
|
continue
|
|
722
|
+
if node not in graph_copy:
|
|
723
|
+
continue
|
|
725
724
|
|
|
726
725
|
out_degree = graph_copy.out_degree[node]
|
|
727
726
|
if out_degree == 0:
|
|
@@ -740,10 +739,10 @@ class RegionIdentifier(Analysis):
|
|
|
740
739
|
|
|
741
740
|
# test if this node is an entry to a single-entry, single-successor region
|
|
742
741
|
levels = 0
|
|
743
|
-
postdom_node = postdoms.
|
|
742
|
+
postdom_node = postdoms.idom(node)
|
|
744
743
|
while postdom_node is not None:
|
|
745
744
|
if (node, postdom_node) not in failed_region_attempts and self._check_region(
|
|
746
|
-
graph_copy, node, postdom_node, doms
|
|
745
|
+
graph_copy, node, postdom_node, doms
|
|
747
746
|
):
|
|
748
747
|
frontier = [postdom_node]
|
|
749
748
|
region = self._compute_region(
|
|
@@ -752,6 +751,8 @@ class RegionIdentifier(Analysis):
|
|
|
752
751
|
if region is not None:
|
|
753
752
|
# update region.graph_with_successors
|
|
754
753
|
if secondary_graph is not None:
|
|
754
|
+
assert region.graph_with_successors is not None
|
|
755
|
+
assert region.successors is not None
|
|
755
756
|
if self._complete_successors:
|
|
756
757
|
for nn in list(region.graph_with_successors.nodes):
|
|
757
758
|
original_successors = secondary_graph.successors(nn)
|
|
@@ -782,52 +783,75 @@ class RegionIdentifier(Analysis):
|
|
|
782
783
|
graph, region, frontier, dummy_endnode=dummy_endnode, secondary_graph=secondary_graph
|
|
783
784
|
)
|
|
784
785
|
# assert dummy_endnode not in graph
|
|
785
|
-
|
|
786
|
+
region_created = True
|
|
787
|
+
# we created a new region to replace one or more nodes in the graph.
|
|
788
|
+
replaced_nodes = set(region.graph)
|
|
789
|
+
# update graph_copy; doms and postdoms are updated as well because they hold references to
|
|
790
|
+
# graph_copy internally.
|
|
791
|
+
if graph_copy is not graph:
|
|
792
|
+
self._update_graph(graph_copy, region, replaced_nodes)
|
|
793
|
+
doms.graph_updated(region, replaced_nodes, region.head)
|
|
794
|
+
postdoms.graph_updated(region, replaced_nodes, region.head)
|
|
795
|
+
# break out of the inner loop
|
|
796
|
+
break
|
|
786
797
|
|
|
787
798
|
failed_region_attempts.add((node, postdom_node))
|
|
788
|
-
if not dominates(
|
|
799
|
+
if not doms.dominates(node, postdom_node):
|
|
789
800
|
break
|
|
790
|
-
if postdom_node is postdoms.
|
|
801
|
+
if postdom_node is postdoms.idom(postdom_node):
|
|
791
802
|
break
|
|
792
|
-
postdom_node = postdoms.
|
|
803
|
+
postdom_node = postdoms.idom(postdom_node)
|
|
793
804
|
levels += 1
|
|
794
805
|
# l.debug("Walked back %d levels in postdom tree and did not find anything for %r. Next.", levels, node)
|
|
795
806
|
|
|
796
|
-
return
|
|
807
|
+
return region_created
|
|
797
808
|
|
|
798
809
|
@staticmethod
|
|
799
|
-
def
|
|
800
|
-
|
|
810
|
+
def _update_graph(graph: networkx.DiGraph, new_region, replaced_nodes: set) -> None:
|
|
811
|
+
region_in_edges = RegionIdentifier._region_in_edges(graph, new_region, data=True)
|
|
812
|
+
region_out_edges = RegionIdentifier._region_out_edges(graph, new_region, data=True)
|
|
813
|
+
for node in replaced_nodes:
|
|
814
|
+
graph.remove_node(node)
|
|
815
|
+
graph.add_node(new_region)
|
|
816
|
+
for src, _, data in region_in_edges:
|
|
817
|
+
graph.add_edge(src, new_region, **data)
|
|
818
|
+
for _, dst, data in region_out_edges:
|
|
819
|
+
graph.add_edge(new_region, dst, **data)
|
|
801
820
|
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
:param df:
|
|
807
|
-
:return:
|
|
821
|
+
@staticmethod
|
|
822
|
+
def _check_region(graph, start_node, end_node, doms) -> bool:
|
|
823
|
+
"""
|
|
824
|
+
Determine the graph slice between start_node and end_node forms a good region.
|
|
808
825
|
"""
|
|
809
826
|
|
|
810
827
|
# if the exit node is the header of a loop that contains the start node, the dominance frontier should only
|
|
811
828
|
# contain the exit node.
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
829
|
+
start_node_frontier = None
|
|
830
|
+
end_node_frontier = None
|
|
831
|
+
|
|
832
|
+
if not doms.dominates(start_node, end_node):
|
|
833
|
+
start_node_frontier = doms.df(start_node)
|
|
834
|
+
for node in start_node_frontier:
|
|
815
835
|
if node is not start_node and node is not end_node:
|
|
816
836
|
return False
|
|
817
837
|
|
|
818
838
|
# no edges should enter the region.
|
|
819
|
-
|
|
820
|
-
|
|
839
|
+
end_node_frontier = doms.df(end_node)
|
|
840
|
+
for node in end_node_frontier:
|
|
841
|
+
if doms.dominates(start_node, node) and node is not end_node:
|
|
821
842
|
return False
|
|
822
843
|
|
|
844
|
+
if start_node_frontier is None:
|
|
845
|
+
start_node_frontier = doms.df(start_node)
|
|
846
|
+
|
|
823
847
|
# no edges should leave the region.
|
|
824
|
-
for node in
|
|
848
|
+
for node in start_node_frontier:
|
|
825
849
|
if node is start_node or node is end_node:
|
|
826
850
|
continue
|
|
827
|
-
if node not in
|
|
851
|
+
if node not in end_node_frontier:
|
|
828
852
|
return False
|
|
829
853
|
for pred in graph.predecessors(node):
|
|
830
|
-
if dominates(
|
|
854
|
+
if doms.dominates(start_node, pred) and not doms.dominates(end_node, pred):
|
|
831
855
|
return False
|
|
832
856
|
|
|
833
857
|
return True
|
|
@@ -978,14 +1002,13 @@ class RegionIdentifier(Analysis):
|
|
|
978
1002
|
subgraph_with_exits.add_edge(src, dst)
|
|
979
1003
|
region.graph = subgraph
|
|
980
1004
|
region.graph_with_successors = subgraph_with_exits
|
|
981
|
-
if normal_exit_node is not None
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
region.successors += list(abnormal_exit_nodes)
|
|
1005
|
+
succs = [normal_exit_node] if normal_exit_node is not None else []
|
|
1006
|
+
succs += list(abnormal_exit_nodes)
|
|
1007
|
+
succs = sorted(set(succs), key=lambda x: x.addr)
|
|
1008
|
+
region.successors = set(succs)
|
|
986
1009
|
|
|
987
|
-
for succ_0 in
|
|
988
|
-
for succ_1 in
|
|
1010
|
+
for succ_0 in succs:
|
|
1011
|
+
for succ_1 in succs:
|
|
989
1012
|
if succ_0 is not succ_1 and graph.has_edge(succ_0, succ_1):
|
|
990
1013
|
region.graph_with_successors.add_edge(succ_0, succ_1)
|
|
991
1014
|
|
|
@@ -14,10 +14,10 @@ from ailment.statement import Assignment, Label
|
|
|
14
14
|
from angr.code_location import CodeLocation
|
|
15
15
|
from angr.analyses import ForwardAnalysis
|
|
16
16
|
from angr.analyses.forward_analysis import FunctionGraphVisitor
|
|
17
|
+
from angr.utils.ail import is_head_controlled_loop_block
|
|
17
18
|
from .rewriting_engine import SimEngineSSARewriting, DefExprType, AT
|
|
18
19
|
from .rewriting_state import RewritingState
|
|
19
20
|
|
|
20
|
-
|
|
21
21
|
l = logging.getLogger(__name__)
|
|
22
22
|
|
|
23
23
|
|
|
@@ -71,6 +71,14 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, ailment.Block, object, o
|
|
|
71
71
|
self._visited_blocks: set[Any] = set()
|
|
72
72
|
self.out_blocks = {}
|
|
73
73
|
self.out_states = {}
|
|
74
|
+
# loop_states stores states at the beginning of a loop block *after a loop iteration*, where the block is the
|
|
75
|
+
# following:
|
|
76
|
+
# 0x4036df | t4 = (rcx<8> == 0x0<64>)
|
|
77
|
+
# 0x4036df | if (t4) { Goto 0x4036e2<64> } else { Goto 0x4036df<64> }
|
|
78
|
+
# 0x4036df | STORE(addr=t3, data=t2, size=8, endness=Iend_LE, guard=None)
|
|
79
|
+
# 0x4036df | rdi<8> = t8
|
|
80
|
+
#
|
|
81
|
+
self.head_controlled_loop_outstates = {}
|
|
74
82
|
|
|
75
83
|
self._analyze()
|
|
76
84
|
|
|
@@ -177,8 +185,12 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, ailment.Block, object, o
|
|
|
177
185
|
else:
|
|
178
186
|
node.statements = node.statements[:idx] + phi_stmts + node.statements[idx:]
|
|
179
187
|
|
|
180
|
-
def _reg_predicate(self, node_, *, reg_offset: int, reg_size: int) -> tuple[bool, Any]:
|
|
181
|
-
out_state: RewritingState =
|
|
188
|
+
def _reg_predicate(self, node_: Block, *, reg_offset: int, reg_size: int) -> tuple[bool, Any]:
|
|
189
|
+
out_state: RewritingState = (
|
|
190
|
+
self.head_controlled_loop_outstates[(node_.addr, node_.idx)]
|
|
191
|
+
if is_head_controlled_loop_block(node_)
|
|
192
|
+
else self.out_states[(node_.addr, node_.idx)]
|
|
193
|
+
)
|
|
182
194
|
if reg_offset in out_state.registers and reg_size in out_state.registers[reg_offset]:
|
|
183
195
|
existing_var = out_state.registers[reg_offset][reg_size]
|
|
184
196
|
if existing_var is None:
|
|
@@ -189,8 +201,12 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, ailment.Block, object, o
|
|
|
189
201
|
return True, vvar
|
|
190
202
|
return False, None
|
|
191
203
|
|
|
192
|
-
def _stack_predicate(self, node_, *, stack_offset: int, stackvar_size: int) -> tuple[bool, Any]:
|
|
193
|
-
out_state: RewritingState =
|
|
204
|
+
def _stack_predicate(self, node_: Block, *, stack_offset: int, stackvar_size: int) -> tuple[bool, Any]:
|
|
205
|
+
out_state: RewritingState = (
|
|
206
|
+
self.head_controlled_loop_outstates[(node_.addr, node_.idx)]
|
|
207
|
+
if is_head_controlled_loop_block(node_)
|
|
208
|
+
else self.out_states[(node_.addr, node_.idx)]
|
|
209
|
+
)
|
|
194
210
|
if stack_offset in out_state.stackvars and stackvar_size in out_state.stackvars[stack_offset]:
|
|
195
211
|
existing_var = out_state.stackvars[stack_offset][stackvar_size]
|
|
196
212
|
if existing_var is None:
|
|
@@ -262,18 +278,32 @@ class RewritingAnalysis(ForwardAnalysis[RewritingState, ailment.Block, object, o
|
|
|
262
278
|
)
|
|
263
279
|
|
|
264
280
|
self._visited_blocks.add(block_key)
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
281
|
+
# get the output state (which is the input state for the successor node)
|
|
282
|
+
# if head_controlled_loop_outstate is set, then it is the output state of the successor node; in this case, the
|
|
283
|
+
# input state for the head-controlled loop block itself is out.state.
|
|
284
|
+
# otherwise (if head_controlled_loop_outstate is not set), engine.state is the input state of the successor
|
|
285
|
+
# node.
|
|
286
|
+
if engine.head_controlled_loop_outstate is None:
|
|
287
|
+
# this is a normal block
|
|
288
|
+
out_state = state
|
|
289
|
+
else:
|
|
290
|
+
# this is a head-controlled loop block
|
|
291
|
+
out_state = engine.head_controlled_loop_outstate
|
|
292
|
+
self.head_controlled_loop_outstates[block_key] = state
|
|
293
|
+
self.out_states[block_key] = out_state
|
|
294
|
+
# the final block is always in state
|
|
295
|
+
out_block = state.out_block
|
|
296
|
+
|
|
297
|
+
if out_block is not None:
|
|
298
|
+
assert out_block.addr == block.addr
|
|
299
|
+
|
|
300
|
+
if self.out_blocks.get(block_key, None) == out_block:
|
|
301
|
+
return True, out_state
|
|
302
|
+
self.out_blocks[block_key] = out_block
|
|
303
|
+
out_state.out_block = None
|
|
304
|
+
return True, out_state
|
|
305
|
+
|
|
306
|
+
return True, out_state
|
|
277
307
|
|
|
278
308
|
def _intra_analysis(self):
|
|
279
309
|
pass
|
|
@@ -4,6 +4,7 @@ from typing import Literal
|
|
|
4
4
|
import logging
|
|
5
5
|
|
|
6
6
|
from archinfo import Endness
|
|
7
|
+
from ailment.block import Block
|
|
7
8
|
from ailment.manager import Manager
|
|
8
9
|
from ailment.statement import Statement, Assignment, Store, Call, Return, ConditionalJump, DirtyStatement, Jump
|
|
9
10
|
from ailment.expression import (
|
|
@@ -70,6 +71,7 @@ class SimEngineSSARewriting(
|
|
|
70
71
|
self.phiid_to_loc = phiid_to_loc
|
|
71
72
|
self.rewrite_tmps = rewrite_tmps
|
|
72
73
|
self.ail_manager = ail_manager
|
|
74
|
+
self.head_controlled_loop_outstate: RewritingState | None = None
|
|
73
75
|
|
|
74
76
|
self.secondary_stackvars: set[int] = set()
|
|
75
77
|
|
|
@@ -87,6 +89,12 @@ class SimEngineSSARewriting(
|
|
|
87
89
|
# Handlers
|
|
88
90
|
#
|
|
89
91
|
|
|
92
|
+
def process(
|
|
93
|
+
self, state: RewritingState, *, block: Block | None = None, whitelist: set[int] | None = None, **kwargs
|
|
94
|
+
) -> None:
|
|
95
|
+
self.head_controlled_loop_outstate = None
|
|
96
|
+
super().process(state, block=block, whitelist=whitelist, **kwargs)
|
|
97
|
+
|
|
90
98
|
def _top(self, bits):
|
|
91
99
|
assert False, "Unreachable"
|
|
92
100
|
|
|
@@ -236,6 +244,11 @@ class SimEngineSSARewriting(
|
|
|
236
244
|
new_true_target = self._expr(stmt.true_target) if stmt.true_target is not None else None
|
|
237
245
|
new_false_target = self._expr(stmt.false_target) if stmt.false_target is not None else None
|
|
238
246
|
|
|
247
|
+
if self.stmt_idx != len(self.block.statements) - 1:
|
|
248
|
+
# the conditional jump is in the middle of the block (e.g., the block generated from lifting rep stosq).
|
|
249
|
+
# we need to make a copy of the state and use the state of this point in its successor
|
|
250
|
+
self.head_controlled_loop_outstate = self.state.copy()
|
|
251
|
+
|
|
239
252
|
if new_cond is not None or new_true_target is not None or new_false_target is not None:
|
|
240
253
|
return ConditionalJump(
|
|
241
254
|
stmt.idx,
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class StackItemType(Enum):
|
|
7
|
+
"""
|
|
8
|
+
Enum for the type of stack items.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
UNKNOWN = 0
|
|
12
|
+
SAVED_BP = 1
|
|
13
|
+
SAVED_REGS = 2
|
|
14
|
+
ARGUMENT = 3
|
|
15
|
+
RET_ADDR = 4
|
|
16
|
+
STACK_CANARY = 5
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class StackItem:
|
|
20
|
+
"""
|
|
21
|
+
A stack item describes a piece of data that is stored on the stack at a certain offset (usually negative).
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
offset: int
|
|
25
|
+
size: int
|
|
26
|
+
name: str
|
|
27
|
+
item_type: StackItemType
|
|
28
|
+
|
|
29
|
+
def __init__(self, offset: int, size: int, name: str, item_type: StackItemType = StackItemType.UNKNOWN):
|
|
30
|
+
self.offset = offset
|
|
31
|
+
self.size = size
|
|
32
|
+
self.name = name
|
|
33
|
+
self.item_type = item_type
|
|
34
|
+
|
|
35
|
+
def __repr__(self):
|
|
36
|
+
return f"<StackItem {self.name} {self.item_type!s} at {self.offset:#x} ({self.size}b)>"
|
|
@@ -40,7 +40,7 @@ from angr.sim_variable import SimVariable, SimTemporaryVariable, SimStackVariabl
|
|
|
40
40
|
from angr.utils.constants import is_alignment_mask
|
|
41
41
|
from angr.utils.library import get_cpp_function_name
|
|
42
42
|
from angr.utils.loader import is_in_readonly_segment, is_in_readonly_section
|
|
43
|
-
from angr.utils.types import unpack_typeref,
|
|
43
|
+
from angr.utils.types import unpack_typeref, unpack_pointer_and_array
|
|
44
44
|
from angr.analyses.decompiler.utils import structured_node_is_simple_return
|
|
45
45
|
from angr.errors import UnsupportedNodeTypeError, AngrRuntimeError
|
|
46
46
|
from angr.knowledge_plugins.cfg.memory_data import MemoryData, MemoryDataSort
|
|
@@ -539,6 +539,8 @@ class CFunction(CConstruct): # pylint:disable=abstract-method
|
|
|
539
539
|
|
|
540
540
|
if self.codegen.show_externs and self.codegen.cexterns:
|
|
541
541
|
for v in sorted(self.codegen.cexterns, key=lambda v: str(v.variable.name)):
|
|
542
|
+
if v.variable not in self.variables_in_use:
|
|
543
|
+
continue
|
|
542
544
|
varname = v.c_repr() if v.type is None else v.variable.name
|
|
543
545
|
yield "extern ", None
|
|
544
546
|
yield from type_to_c_repr_chunks(v.type, name=varname, name_type=v, full=False)
|
|
@@ -2581,7 +2583,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
2581
2583
|
|
|
2582
2584
|
# TODO store extern fallback size somewhere lol
|
|
2583
2585
|
self.cexterns = {
|
|
2584
|
-
self._variable(v, 1)
|
|
2586
|
+
self._variable(v, 1, mark_used=False)
|
|
2585
2587
|
for v in self.externs
|
|
2586
2588
|
if v not in self._inlined_strings and v not in self._function_pointers
|
|
2587
2589
|
}
|
|
@@ -2698,7 +2700,9 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
2698
2700
|
return _mapping.get(n)(signed=signed).with_arch(self.project.arch)
|
|
2699
2701
|
return SimTypeNum(n, signed=signed).with_arch(self.project.arch)
|
|
2700
2702
|
|
|
2701
|
-
def _variable(
|
|
2703
|
+
def _variable(
|
|
2704
|
+
self, variable: SimVariable, fallback_type_size: int | None, vvar_id: int | None = None, mark_used: bool = True
|
|
2705
|
+
) -> CVariable:
|
|
2702
2706
|
# TODO: we need to fucking make sure that variable recovery and type inference actually generates a size
|
|
2703
2707
|
# TODO: for each variable it links into the fucking ail. then we can remove fallback_type_size.
|
|
2704
2708
|
unified = self._variable_kb.variables[self._func.addr].unified_variable(variable)
|
|
@@ -2710,7 +2714,8 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
2710
2714
|
(fallback_type_size or self.project.arch.bytes) * self.project.arch.byte_width
|
|
2711
2715
|
)
|
|
2712
2716
|
cvar = CVariable(variable, unified_variable=unified, variable_type=variable_type, codegen=self, vvar_id=vvar_id)
|
|
2713
|
-
|
|
2717
|
+
if mark_used:
|
|
2718
|
+
self._variables_in_use[variable] = cvar
|
|
2714
2719
|
return cvar
|
|
2715
2720
|
|
|
2716
2721
|
def _get_variable_reference(self, cvar: CVariable) -> CExpression:
|
|
@@ -2776,7 +2781,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
2776
2781
|
# expr must express a POINTER to the base
|
|
2777
2782
|
# returns a value which has a simtype of data_type as if it were dereferenced out of expr
|
|
2778
2783
|
data_type = unpack_typeref(data_type)
|
|
2779
|
-
base_type = unpack_typeref(
|
|
2784
|
+
base_type = unpack_typeref(unpack_pointer_and_array(expr.type))
|
|
2780
2785
|
if base_type is None:
|
|
2781
2786
|
# well, not much we can do
|
|
2782
2787
|
if data_type is None:
|
|
@@ -2899,7 +2904,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
2899
2904
|
) -> CExpression:
|
|
2900
2905
|
# same rule as _access_constant_offset wrt pointer expressions
|
|
2901
2906
|
data_type = unpack_typeref(data_type)
|
|
2902
|
-
base_type =
|
|
2907
|
+
base_type = unpack_pointer_and_array(expr.type)
|
|
2903
2908
|
if base_type is None:
|
|
2904
2909
|
# use the fallback from above
|
|
2905
2910
|
return self._access_constant_offset(expr, 0, data_type, lvalue, renegotiate_type)
|
|
@@ -2959,7 +2964,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
2959
2964
|
kernel = None
|
|
2960
2965
|
while i < len(terms):
|
|
2961
2966
|
c, t = terms[i]
|
|
2962
|
-
if isinstance(unpack_typeref(t.type), SimTypePointer):
|
|
2967
|
+
if isinstance(unpack_typeref(t.type), (SimTypePointer, SimTypeArray)):
|
|
2963
2968
|
if kernel is not None:
|
|
2964
2969
|
l.warning("Summing two different pointers together. Uh oh!")
|
|
2965
2970
|
return bail_out()
|
|
@@ -2982,7 +2987,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
2982
2987
|
|
|
2983
2988
|
# suffering.
|
|
2984
2989
|
while terms:
|
|
2985
|
-
kernel_type = unpack_typeref(
|
|
2990
|
+
kernel_type = unpack_typeref(unpack_pointer_and_array(kernel.type))
|
|
2986
2991
|
assert kernel_type
|
|
2987
2992
|
|
|
2988
2993
|
if kernel_type.size is None:
|
|
@@ -3049,7 +3054,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
|
|
|
3049
3054
|
kernel = inner.operand
|
|
3050
3055
|
else:
|
|
3051
3056
|
kernel = CUnaryOp("Reference", inner, codegen=self)
|
|
3052
|
-
if unpack_typeref(
|
|
3057
|
+
if unpack_typeref(unpack_pointer_and_array(kernel.type)) == kernel_type:
|
|
3053
3058
|
# we are not making progress
|
|
3054
3059
|
pass
|
|
3055
3060
|
else:
|
|
@@ -14,7 +14,7 @@ from ailment.statement import Statement, ConditionalJump, Jump, Label, Return
|
|
|
14
14
|
from ailment.expression import Const, UnaryOp, MultiStatementExpression
|
|
15
15
|
|
|
16
16
|
from angr.utils.graph import GraphUtils
|
|
17
|
-
from angr.utils.ail import is_phi_assignment
|
|
17
|
+
from angr.utils.ail import is_phi_assignment, is_head_controlled_loop_block
|
|
18
18
|
from angr.knowledge_plugins.cfg import IndirectJump, IndirectJumpType
|
|
19
19
|
from angr.utils.constants import SWITCH_MISSING_DEFAULT_NODE_ADDR
|
|
20
20
|
from angr.utils.graph import dominates, to_acyclic_graph, dfs_back_edges
|
|
@@ -312,11 +312,11 @@ class PhoenixStructurer(StructurerBase):
|
|
|
312
312
|
and head_block.nodes
|
|
313
313
|
and isinstance(head_block.nodes[0], Block)
|
|
314
314
|
and head_block.nodes[0].statements
|
|
315
|
-
and
|
|
315
|
+
and is_head_controlled_loop_block(head_block.nodes[0])
|
|
316
316
|
) or (
|
|
317
317
|
isinstance(head_block, Block)
|
|
318
318
|
and head_block.statements
|
|
319
|
-
and
|
|
319
|
+
and is_head_controlled_loop_block(head_block)
|
|
320
320
|
):
|
|
321
321
|
# it's a while loop if the conditional jump (or the head block) is at the beginning of node
|
|
322
322
|
loop_type = "while" if head_block_idx == 0 else "do-while"
|
|
@@ -9,6 +9,7 @@ from angr.analyses.reaching_definitions.function_handler import FunctionHandler
|
|
|
9
9
|
from angr.knowledge_plugins.key_definitions.atoms import Register, MemoryLocation
|
|
10
10
|
from angr.storage.memory_mixins.paged_memory.pages.multi_values import MultiValues
|
|
11
11
|
from angr.knowledge_plugins.key_definitions.constants import OP_BEFORE, OP_AFTER
|
|
12
|
+
from angr.utils.cpp import is_cpp_funcname_ctor
|
|
12
13
|
from . import Analysis, VtableFinder, CFGFast, ReachingDefinitionsAnalysis
|
|
13
14
|
|
|
14
15
|
if TYPE_CHECKING:
|
|
@@ -109,7 +110,7 @@ class NewFunctionHandler(FunctionHandler):
|
|
|
109
110
|
else:
|
|
110
111
|
if self.project.kb.functions.contains_addr(function_address):
|
|
111
112
|
func = self.project.kb.functions.get_by_addr(function_address)
|
|
112
|
-
if func is not None and
|
|
113
|
+
if func is not None and is_cpp_funcname_ctor(func.demangled_name):
|
|
113
114
|
# check if rdi has a possible this pointer/ object address, if so then we can assign this object
|
|
114
115
|
# this class
|
|
115
116
|
# also if the func is a constructor(not stripped binaries)
|
|
@@ -77,12 +77,15 @@ class SimEngineRDVEX(
|
|
|
77
77
|
def _process_block_end(self, stmt_result, whitelist):
|
|
78
78
|
self.stmt_idx = DEFAULT_STATEMENT
|
|
79
79
|
self._set_codeloc()
|
|
80
|
+
|
|
81
|
+
function_handled = False
|
|
80
82
|
if self.block.vex.jumpkind == "Ijk_Call":
|
|
81
83
|
# it has to be a function
|
|
82
84
|
block_next = self.block.vex.next
|
|
83
85
|
assert isinstance(block_next, pyvex.expr.IRExpr)
|
|
84
86
|
addr = self._expr_bv(block_next)
|
|
85
87
|
self._handle_function(addr)
|
|
88
|
+
function_handled = True
|
|
86
89
|
elif self.block.vex.jumpkind == "Ijk_Boring":
|
|
87
90
|
# test if the target addr is a function or not
|
|
88
91
|
block_next = self.block.vex.next
|
|
@@ -94,6 +97,16 @@ class SimEngineRDVEX(
|
|
|
94
97
|
if addr_int in self.functions:
|
|
95
98
|
# yes it's a jump to a function
|
|
96
99
|
self._handle_function(addr)
|
|
100
|
+
function_handled = True
|
|
101
|
+
|
|
102
|
+
# take care of OP_AFTER during statement processing for function calls in a block
|
|
103
|
+
if self.state.analysis and function_handled:
|
|
104
|
+
self.state.analysis.stmt_observe(
|
|
105
|
+
self.stmt_idx, self.block.vex.statements[-1], self.block, self.state, OP_AFTER
|
|
106
|
+
)
|
|
107
|
+
self.state.analysis.insn_observe(
|
|
108
|
+
self.ins_addr, self.block.vex.statements[-1], self.block, self.state, OP_AFTER
|
|
109
|
+
)
|
|
97
110
|
|
|
98
111
|
return self.state
|
|
99
112
|
|
|
@@ -121,9 +121,9 @@ class FunctionCallData:
|
|
|
121
121
|
return False
|
|
122
122
|
if isinstance(dest, MemoryLocation) and isinstance(dest.addr, SpOffset):
|
|
123
123
|
for effect in self.effects:
|
|
124
|
-
if not isinstance(effect.dest, MemoryLocation) or not isinstance(effect.dest.addr, SpOffset):
|
|
125
|
-
continue
|
|
126
124
|
stkarg = effect.dest
|
|
125
|
+
if not isinstance(stkarg, MemoryLocation) or not isinstance(stkarg.addr, SpOffset):
|
|
126
|
+
continue
|
|
127
127
|
if (
|
|
128
128
|
dest.addr.offset + dest.size <= stkarg.addr.offset
|
|
129
129
|
or stkarg.addr.offset + stkarg.size <= dest.addr.offset
|
|
@@ -282,12 +282,20 @@ class FunctionHandler:
|
|
|
282
282
|
A mechanism for summarizing a function call's effect on a program for ReachingDefinitionsAnalysis.
|
|
283
283
|
"""
|
|
284
284
|
|
|
285
|
-
def __init__(self, interfunction_level: int = 0, extra_impls: Iterable[FunctionHandler] | None = None):
|
|
285
|
+
def __init__(self, interfunction_level: int = 0, extra_impls: Iterable[type[FunctionHandler]] | None = None):
|
|
286
|
+
"""
|
|
287
|
+
:param interfunction_level: Maximum depth in to continue local function exploration
|
|
288
|
+
:param extra_impls: FunctionHandler classes to implement beyond what's implemented in function_handler_library
|
|
289
|
+
"""
|
|
290
|
+
|
|
286
291
|
self.interfunction_level: int = interfunction_level
|
|
287
292
|
|
|
288
|
-
if extra_impls is
|
|
289
|
-
|
|
290
|
-
|
|
293
|
+
if extra_impls is None:
|
|
294
|
+
return
|
|
295
|
+
|
|
296
|
+
for extra_handler in extra_impls:
|
|
297
|
+
for cls in extra_handler.__mro__:
|
|
298
|
+
for name, func in vars(cls).items():
|
|
291
299
|
if name.startswith("handle_impl_"):
|
|
292
300
|
setattr(self, name, _mk_wrapper(func, self))
|
|
293
301
|
|
|
@@ -398,9 +406,13 @@ class FunctionHandler:
|
|
|
398
406
|
for typelib_name in prototype_lib.type_collection_names:
|
|
399
407
|
type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
|
|
400
408
|
if type_collections:
|
|
401
|
-
|
|
409
|
+
prototype = dereference_simtype(data.prototype, type_collections).with_arch(state.arch)
|
|
410
|
+
data.prototype = cast(SimTypeFunction, prototype)
|
|
402
411
|
|
|
403
|
-
|
|
412
|
+
if isinstance(data.prototype, SimTypeFunction):
|
|
413
|
+
args_atoms_from_values = data.reset_prototype(data.prototype, state, soft_reset=True)
|
|
414
|
+
else:
|
|
415
|
+
args_atoms_from_values = set()
|
|
404
416
|
|
|
405
417
|
# PROCESS
|
|
406
418
|
state.move_codelocs(data.function_codeloc)
|
|
@@ -506,7 +518,9 @@ class FunctionHandler:
|
|
|
506
518
|
assert data.prototype is not None
|
|
507
519
|
if data.prototype.returnty is not None:
|
|
508
520
|
if not isinstance(data.prototype.returnty, SimTypeBottom):
|
|
509
|
-
data.ret_values = MultiValues(
|
|
521
|
+
data.ret_values = MultiValues(
|
|
522
|
+
state.top(data.prototype.returnty.with_arch(state.arch).size or state.arch.bits)
|
|
523
|
+
)
|
|
510
524
|
else:
|
|
511
525
|
data.ret_values = MultiValues(state.top(state.arch.bits))
|
|
512
526
|
if data.guessed_prototype:
|
|
@@ -567,7 +581,7 @@ class FunctionHandler:
|
|
|
567
581
|
sub_rda = state.analysis.project.analyses.ReachingDefinitions(
|
|
568
582
|
data.function,
|
|
569
583
|
observe_all=state.analysis._observe_all,
|
|
570
|
-
observation_points=(state.analysis._observation_points or [])
|
|
584
|
+
observation_points=list(state.analysis._observation_points or []).extend(return_observation_points),
|
|
571
585
|
observe_callback=state.analysis._observe_callback,
|
|
572
586
|
dep_graph=state.dep_graph,
|
|
573
587
|
function_handler=self,
|
|
@@ -202,6 +202,7 @@ def handle_printf(
|
|
|
202
202
|
for defn in state.get_definitions(atom):
|
|
203
203
|
top_val = state.annotate_with_def(top_val, defn)
|
|
204
204
|
buf_data = MultiValues(top_val)
|
|
205
|
+
buf_atoms = atom
|
|
205
206
|
elif fmt == "%u":
|
|
206
207
|
buf_atoms = atom
|
|
207
208
|
buf_data = state.get_concrete_value(buf_atoms)
|