angr 9.2.163__cp310-abi3-macosx_11_0_arm64.whl → 9.2.165__cp310-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/ailment/converter_vex.py +1 -1
- angr/ailment/expression.py +5 -1
- angr/analyses/analysis.py +27 -4
- angr/analyses/cfg/cfg_base.py +16 -13
- angr/analyses/cfg/cfg_emulated.py +5 -1
- angr/analyses/cfg/cfg_fast.py +43 -5
- angr/analyses/cfg/indirect_jump_resolvers/arm_elf_fast.py +11 -1
- angr/analyses/cfg/indirect_jump_resolvers/const_resolver.py +194 -41
- angr/analyses/decompiler/ail_simplifier.py +19 -5
- angr/analyses/decompiler/callsite_maker.py +33 -17
- angr/analyses/decompiler/condition_processor.py +9 -8
- angr/analyses/decompiler/graph_region.py +19 -0
- angr/analyses/decompiler/optimization_passes/deadblock_remover.py +1 -1
- angr/analyses/decompiler/peephole_optimizations/__init__.py +2 -0
- angr/analyses/decompiler/peephole_optimizations/inlined_memcpy.py +78 -0
- angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +67 -10
- angr/analyses/decompiler/peephole_optimizations/inlined_strcpy_consolidation.py +10 -13
- angr/analyses/decompiler/region_identifier.py +22 -1
- angr/analyses/decompiler/structuring/phoenix.py +72 -20
- angr/analyses/decompiler/structuring/recursive_structurer.py +3 -4
- angr/analyses/decompiler/structuring/structurer_nodes.py +3 -0
- angr/analyses/decompiler/utils.py +17 -5
- angr/analyses/deobfuscator/string_obf_finder.py +130 -32
- angr/analyses/s_reaching_definitions/s_rda_view.py +2 -1
- angr/analyses/typehoon/typeconsts.py +3 -1
- angr/blade.py +20 -15
- angr/engines/icicle.py +16 -3
- angr/knowledge_plugins/propagations/propagation_model.py +7 -0
- angr/rustylib.abi3.so +0 -0
- angr/sim_type.py +16 -1
- angr/state_plugins/history.py +16 -0
- angr/unicornlib.dylib +0 -0
- angr/utils/constants.py +1 -1
- angr/utils/graph.py +1 -1
- angr/utils/vex.py +11 -0
- {angr-9.2.163.dist-info → angr-9.2.165.dist-info}/METADATA +5 -5
- {angr-9.2.163.dist-info → angr-9.2.165.dist-info}/RECORD +1409 -1407
- {angr-9.2.163.dist-info → angr-9.2.165.dist-info}/WHEEL +1 -0
- {angr-9.2.163.dist-info → angr-9.2.165.dist-info}/entry_points.txt +0 -0
- {angr-9.2.163.dist-info → angr-9.2.165.dist-info}/licenses/LICENSE +0 -0
- {angr-9.2.163.dist-info → angr-9.2.165.dist-info}/top_level.txt +0 -0
|
@@ -397,9 +397,11 @@ class AILSimplifier(Analysis):
|
|
|
397
397
|
if isinstance(def_.atom, atoms.VirtualVariable) and (def_.atom.was_reg or def_.atom.was_parameter):
|
|
398
398
|
# only do this for general purpose register
|
|
399
399
|
skip_def = False
|
|
400
|
+
reg = None
|
|
400
401
|
for reg in self.project.arch.register_list:
|
|
401
|
-
if
|
|
402
|
-
|
|
402
|
+
if reg.vex_offset == def_.atom.reg_offset:
|
|
403
|
+
if not reg.artificial and not reg.general_purpose and not reg.vector:
|
|
404
|
+
skip_def = True
|
|
403
405
|
break
|
|
404
406
|
|
|
405
407
|
if skip_def:
|
|
@@ -659,6 +661,16 @@ class AILSimplifier(Analysis):
|
|
|
659
661
|
first_op = ops[0]
|
|
660
662
|
if isinstance(first_op, Convert) and first_op.to_bits >= self.project.arch.byte_width:
|
|
661
663
|
# we need at least one byte!
|
|
664
|
+
if (
|
|
665
|
+
len({(op.from_bits, op.to_bits) for op in ops if isinstance(op, Convert) and op.operand.likes(expr)})
|
|
666
|
+
> 1
|
|
667
|
+
):
|
|
668
|
+
# there are more Convert operations; it's probably because there are multiple expressions involving the
|
|
669
|
+
# same core expr. just give up (for now)
|
|
670
|
+
return None, None
|
|
671
|
+
if any(op for op in ops if isinstance(op, BinaryOp) and op.op == "Shr" and op.operands[0].likes(expr)):
|
|
672
|
+
# the expression is right-shifted, which means higher bits might be used.
|
|
673
|
+
return None, None
|
|
662
674
|
return first_op.to_bits // self.project.arch.byte_width, ("convert", (first_op,))
|
|
663
675
|
if isinstance(first_op, BinaryOp):
|
|
664
676
|
second_op = None
|
|
@@ -1816,13 +1828,11 @@ class AILSimplifier(Analysis):
|
|
|
1816
1828
|
if codeloc in self._assignments_to_remove:
|
|
1817
1829
|
# it should be removed
|
|
1818
1830
|
simplified = True
|
|
1819
|
-
self._assignments_to_remove.discard(codeloc)
|
|
1820
1831
|
continue
|
|
1821
1832
|
|
|
1822
1833
|
if self._statement_has_call_exprs(stmt):
|
|
1823
1834
|
if codeloc in self._calls_to_remove:
|
|
1824
1835
|
# it has a call and must be removed
|
|
1825
|
-
self._calls_to_remove.discard(codeloc)
|
|
1826
1836
|
simplified = True
|
|
1827
1837
|
continue
|
|
1828
1838
|
if isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable):
|
|
@@ -1845,7 +1855,6 @@ class AILSimplifier(Analysis):
|
|
|
1845
1855
|
codeloc = CodeLocation(block.addr, idx, ins_addr=stmt.ins_addr, block_idx=block.idx)
|
|
1846
1856
|
if codeloc in self._calls_to_remove:
|
|
1847
1857
|
# this call can be removed
|
|
1848
|
-
self._calls_to_remove.discard(codeloc)
|
|
1849
1858
|
simplified = True
|
|
1850
1859
|
continue
|
|
1851
1860
|
|
|
@@ -1865,6 +1874,11 @@ class AILSimplifier(Analysis):
|
|
|
1865
1874
|
new_block.statements = new_statements
|
|
1866
1875
|
self.blocks[old_block] = new_block
|
|
1867
1876
|
|
|
1877
|
+
# we can only use calls_to_remove and assignments_to_remove once; if any statements in blocks are removed, then
|
|
1878
|
+
# the statement IDs in calls_to_remove and assignments_to_remove no longer match!
|
|
1879
|
+
self._calls_to_remove.clear()
|
|
1880
|
+
self._assignments_to_remove.clear()
|
|
1881
|
+
|
|
1868
1882
|
return simplified
|
|
1869
1883
|
|
|
1870
1884
|
@staticmethod
|
|
@@ -17,7 +17,15 @@ from angr.sim_type import (
|
|
|
17
17
|
SimTypeFunction,
|
|
18
18
|
SimTypeLongLong,
|
|
19
19
|
)
|
|
20
|
-
from angr.calling_conventions import
|
|
20
|
+
from angr.calling_conventions import (
|
|
21
|
+
SimReferenceArgument,
|
|
22
|
+
SimRegArg,
|
|
23
|
+
SimStackArg,
|
|
24
|
+
SimCC,
|
|
25
|
+
SimStructArg,
|
|
26
|
+
SimComboArg,
|
|
27
|
+
SimFunctionArgument,
|
|
28
|
+
)
|
|
21
29
|
from angr.knowledge_plugins.key_definitions.constants import OP_BEFORE
|
|
22
30
|
from angr.analyses import Analysis, register_analysis
|
|
23
31
|
from angr.analyses.s_reaching_definitions import SRDAView
|
|
@@ -137,22 +145,7 @@ class CallSiteMaker(Analysis):
|
|
|
137
145
|
arg_locs = cc.arg_locs(callsite_ty)
|
|
138
146
|
|
|
139
147
|
if arg_locs is not None and cc is not None:
|
|
140
|
-
expanded_arg_locs
|
|
141
|
-
for arg_loc in arg_locs:
|
|
142
|
-
if isinstance(arg_loc, SimComboArg):
|
|
143
|
-
# a ComboArg spans across multiple locations (mostly stack but *in theory* can also be spanning
|
|
144
|
-
# across registers). most importantly, a ComboArg represents one variable, not multiple, but we
|
|
145
|
-
# have no way to know that until later down the pipeline.
|
|
146
|
-
expanded_arg_locs += arg_loc.locations
|
|
147
|
-
elif isinstance(arg_loc, SimStructArg):
|
|
148
|
-
expanded_arg_locs += [ # type: ignore
|
|
149
|
-
arg_loc.locs[field_name] for field_name in arg_loc.struct.fields if field_name in arg_loc.locs
|
|
150
|
-
]
|
|
151
|
-
elif isinstance(arg_loc, (SimRegArg, SimStackArg, SimReferenceArgument)):
|
|
152
|
-
expanded_arg_locs.append(arg_loc)
|
|
153
|
-
else:
|
|
154
|
-
raise NotImplementedError("Not implemented yet.")
|
|
155
|
-
|
|
148
|
+
expanded_arg_locs = self._expand_arglocs(arg_locs)
|
|
156
149
|
for arg_loc in expanded_arg_locs:
|
|
157
150
|
if isinstance(arg_loc, SimReferenceArgument):
|
|
158
151
|
if not isinstance(arg_loc.ptr_loc, (SimRegArg, SimStackArg)):
|
|
@@ -548,6 +541,29 @@ class CallSiteMaker(Analysis):
|
|
|
548
541
|
return None
|
|
549
542
|
return len(specifiers)
|
|
550
543
|
|
|
544
|
+
def _expand_arglocs(
|
|
545
|
+
self, arg_locs: list[SimFunctionArgument]
|
|
546
|
+
) -> list[SimStackArg | SimRegArg | SimReferenceArgument]:
|
|
547
|
+
expanded_arg_locs: list[SimStackArg | SimRegArg | SimReferenceArgument] = []
|
|
548
|
+
|
|
549
|
+
for arg_loc in arg_locs:
|
|
550
|
+
if isinstance(arg_loc, SimComboArg):
|
|
551
|
+
# a ComboArg spans across multiple locations (mostly stack but *in theory* can also be spanning
|
|
552
|
+
# across registers). most importantly, a ComboArg represents one variable, not multiple, but we
|
|
553
|
+
# have no way to know that until later down the pipeline.
|
|
554
|
+
expanded_arg_locs += arg_loc.locations
|
|
555
|
+
elif isinstance(arg_loc, SimStructArg):
|
|
556
|
+
for field_name in arg_loc.struct.fields:
|
|
557
|
+
if field_name not in arg_loc.locs:
|
|
558
|
+
continue
|
|
559
|
+
expanded_arg_locs += self._expand_arglocs([arg_loc.locs[field_name]])
|
|
560
|
+
elif isinstance(arg_loc, (SimRegArg, SimStackArg, SimReferenceArgument)):
|
|
561
|
+
expanded_arg_locs.append(arg_loc)
|
|
562
|
+
else:
|
|
563
|
+
raise NotImplementedError("Not implemented yet.")
|
|
564
|
+
|
|
565
|
+
return expanded_arg_locs
|
|
566
|
+
|
|
551
567
|
def _atom_idx(self) -> int | None:
|
|
552
568
|
return self._ail_manager.next_atom() if self._ail_manager is not None else None
|
|
553
569
|
|
|
@@ -13,8 +13,6 @@ from unique_log_filter import UniqueLogFilter
|
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
from angr.utils.graph import GraphUtils
|
|
16
|
-
from angr.utils.lazy_import import lazy_import
|
|
17
|
-
from angr.utils import is_pyinstaller
|
|
18
16
|
from angr.utils.graph import dominates, inverted_idoms
|
|
19
17
|
from angr.utils.ail import is_head_controlled_loop_block
|
|
20
18
|
from angr.block import Block, BlockNode
|
|
@@ -37,12 +35,6 @@ from .structuring.structurer_nodes import (
|
|
|
37
35
|
from .graph_region import GraphRegion
|
|
38
36
|
from .utils import peephole_optimize_expr
|
|
39
37
|
|
|
40
|
-
if is_pyinstaller():
|
|
41
|
-
# PyInstaller is not happy with lazy import
|
|
42
|
-
import sympy
|
|
43
|
-
else:
|
|
44
|
-
sympy = lazy_import("sympy")
|
|
45
|
-
|
|
46
38
|
|
|
47
39
|
l = logging.getLogger(__name__)
|
|
48
40
|
l.addFilter(UniqueLogFilter())
|
|
@@ -953,6 +945,9 @@ class ConditionProcessor:
|
|
|
953
945
|
|
|
954
946
|
@staticmethod
|
|
955
947
|
def claripy_ast_to_sympy_expr(ast, memo=None):
|
|
948
|
+
|
|
949
|
+
import sympy # pylint:disable=import-outside-toplevel
|
|
950
|
+
|
|
956
951
|
if ast.op == "And":
|
|
957
952
|
return sympy.And(*(ConditionProcessor.claripy_ast_to_sympy_expr(arg, memo=memo) for arg in ast.args))
|
|
958
953
|
if ast.op == "Or":
|
|
@@ -974,6 +969,9 @@ class ConditionProcessor:
|
|
|
974
969
|
|
|
975
970
|
@staticmethod
|
|
976
971
|
def sympy_expr_to_claripy_ast(expr, memo: dict):
|
|
972
|
+
|
|
973
|
+
import sympy # pylint:disable=import-outside-toplevel
|
|
974
|
+
|
|
977
975
|
if expr.is_Symbol:
|
|
978
976
|
return memo[expr]
|
|
979
977
|
if isinstance(expr, sympy.Or):
|
|
@@ -990,6 +988,9 @@ class ConditionProcessor:
|
|
|
990
988
|
|
|
991
989
|
@staticmethod
|
|
992
990
|
def simplify_condition(cond, depth_limit=8, variables_limit=8):
|
|
991
|
+
|
|
992
|
+
import sympy # pylint:disable=import-outside-toplevel
|
|
993
|
+
|
|
993
994
|
memo = {}
|
|
994
995
|
if cond.depth > depth_limit or len(cond.variables) > variables_limit:
|
|
995
996
|
return cond
|
|
@@ -271,6 +271,13 @@ class GraphRegion:
|
|
|
271
271
|
else:
|
|
272
272
|
replace_with_graph_with_successors = replace_with.graph_with_successors
|
|
273
273
|
|
|
274
|
+
# if complete_successors is True for RegionIdentifier, replace_with.graph_with_successors may include nodes
|
|
275
|
+
# and edges that are *only* reachable from immediate successors. we will want to remove these nodes and edges,
|
|
276
|
+
# otherwise we may end up structuring the same region twice!
|
|
277
|
+
replace_with_graph_with_successors = self._cleanup_graph_with_successors(
|
|
278
|
+
replace_with.graph, replace_with_graph_with_successors
|
|
279
|
+
)
|
|
280
|
+
|
|
274
281
|
self._replace_node_in_graph_with_subgraph(
|
|
275
282
|
self.graph,
|
|
276
283
|
self.successors,
|
|
@@ -289,6 +296,18 @@ class GraphRegion:
|
|
|
289
296
|
replace_with.head,
|
|
290
297
|
)
|
|
291
298
|
|
|
299
|
+
@staticmethod
|
|
300
|
+
def _cleanup_graph_with_successors(
|
|
301
|
+
graph: networkx.DiGraph, graph_with_successors: networkx.DiGraph
|
|
302
|
+
) -> networkx.DiGraph:
|
|
303
|
+
expected_nodes = set(graph)
|
|
304
|
+
for n in list(expected_nodes):
|
|
305
|
+
for succ in graph_with_successors.successors(n):
|
|
306
|
+
expected_nodes.add(succ)
|
|
307
|
+
if all(n in expected_nodes for n in graph_with_successors):
|
|
308
|
+
return graph_with_successors
|
|
309
|
+
return graph_with_successors.subgraph(expected_nodes).to_directed()
|
|
310
|
+
|
|
292
311
|
@staticmethod
|
|
293
312
|
def _replace_node_in_graph(graph: networkx.DiGraph, node, replace_with, removed_edges: set):
|
|
294
313
|
in_edges = [(src, dst) for src, dst in graph.in_edges(node) if (src, dst) not in removed_edges]
|
|
@@ -60,7 +60,7 @@ class DeadblockRemover(OptimizationPass):
|
|
|
60
60
|
blk
|
|
61
61
|
for blk in self._graph.nodes()
|
|
62
62
|
if (blk.addr != self._func.addr and self._graph.in_degree(blk) == 0)
|
|
63
|
-
or claripy.is_false(cond_proc.reaching_conditions
|
|
63
|
+
or claripy.is_false(cond_proc.reaching_conditions.get(blk, claripy.true()))
|
|
64
64
|
}
|
|
65
65
|
|
|
66
66
|
# fix up predecessors
|
|
@@ -43,6 +43,7 @@ from .sar_to_signed_div import SarToSignedDiv
|
|
|
43
43
|
from .tidy_stack_addr import TidyStackAddr
|
|
44
44
|
from .invert_negated_logical_conjuction_disjunction import InvertNegatedLogicalConjunctionsAndDisjunctions
|
|
45
45
|
from .rol_ror import RolRorRewriter
|
|
46
|
+
from .inlined_memcpy import InlinedMemcpy
|
|
46
47
|
from .inlined_strcpy import InlinedStrcpy
|
|
47
48
|
from .inlined_strcpy_consolidation import InlinedStrcpyConsolidation
|
|
48
49
|
from .inlined_wstrcpy import InlinedWstrcpy
|
|
@@ -99,6 +100,7 @@ ALL_PEEPHOLE_OPTS: list[type[PeepholeOptimizationExprBase]] = [
|
|
|
99
100
|
TidyStackAddr,
|
|
100
101
|
InvertNegatedLogicalConjunctionsAndDisjunctions,
|
|
101
102
|
RolRorRewriter,
|
|
103
|
+
InlinedMemcpy,
|
|
102
104
|
InlinedStrcpy,
|
|
103
105
|
InlinedStrcpyConsolidation,
|
|
104
106
|
InlinedWstrcpy,
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# pylint:disable=arguments-differ
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from angr.ailment.expression import Const, StackBaseOffset, VirtualVariable, Load, UnaryOp
|
|
5
|
+
from angr.ailment.statement import Call, Assignment, Store
|
|
6
|
+
from angr import SIM_LIBRARIES
|
|
7
|
+
from .base import PeepholeOptimizationStmtBase
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class InlinedMemcpy(PeepholeOptimizationStmtBase):
|
|
11
|
+
"""
|
|
12
|
+
Simplifies inlined data copying logic into calls to memcpy.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
__slots__ = ()
|
|
16
|
+
|
|
17
|
+
NAME = "Simplifying inlined strcpy"
|
|
18
|
+
stmt_classes = (Assignment, Store)
|
|
19
|
+
|
|
20
|
+
def optimize(self, stmt: Assignment | Store, stmt_idx: int | None = None, block=None, **kwargs):
|
|
21
|
+
should_replace = False
|
|
22
|
+
dst_offset, src_offset, store_size = None, None, None
|
|
23
|
+
if (
|
|
24
|
+
isinstance(stmt, Assignment)
|
|
25
|
+
and isinstance(stmt.dst, VirtualVariable)
|
|
26
|
+
and stmt.dst.was_stack
|
|
27
|
+
and stmt.dst.size == 16
|
|
28
|
+
and isinstance(stmt.src, Load)
|
|
29
|
+
):
|
|
30
|
+
dst_offset = stmt.dst.stack_offset
|
|
31
|
+
store_size = stmt.dst.size
|
|
32
|
+
if (
|
|
33
|
+
isinstance(stmt.src.addr, UnaryOp)
|
|
34
|
+
and stmt.src.addr.op == "Reference"
|
|
35
|
+
and isinstance(stmt.src.addr.operand, VirtualVariable)
|
|
36
|
+
):
|
|
37
|
+
should_replace = True
|
|
38
|
+
src_offset = stmt.src.addr.operand.stack_offset
|
|
39
|
+
elif isinstance(stmt.src.addr, StackBaseOffset):
|
|
40
|
+
should_replace = True
|
|
41
|
+
src_offset = stmt.src.addr.offset
|
|
42
|
+
|
|
43
|
+
if (
|
|
44
|
+
isinstance(stmt, Store)
|
|
45
|
+
and isinstance(stmt.addr, StackBaseOffset)
|
|
46
|
+
and stmt.size == 16
|
|
47
|
+
and isinstance(stmt.data, Load)
|
|
48
|
+
):
|
|
49
|
+
dst_offset = stmt.addr.offset
|
|
50
|
+
store_size = stmt.size
|
|
51
|
+
if (
|
|
52
|
+
isinstance(stmt.data.addr, UnaryOp)
|
|
53
|
+
and stmt.data.addr.op == "Reference"
|
|
54
|
+
and isinstance(stmt.data.addr.operand, VirtualVariable)
|
|
55
|
+
):
|
|
56
|
+
should_replace = True
|
|
57
|
+
src_offset = stmt.data.addr.operand.stack_offset
|
|
58
|
+
elif isinstance(stmt.data.addr, StackBaseOffset):
|
|
59
|
+
should_replace = True
|
|
60
|
+
src_offset = stmt.data.addr.offset
|
|
61
|
+
|
|
62
|
+
if should_replace:
|
|
63
|
+
assert dst_offset is not None and src_offset is not None and store_size is not None
|
|
64
|
+
# replace it with a call to memcpy
|
|
65
|
+
assert self.project is not None
|
|
66
|
+
return Call(
|
|
67
|
+
stmt.idx,
|
|
68
|
+
"memcpy",
|
|
69
|
+
args=[
|
|
70
|
+
StackBaseOffset(None, self.project.arch.bits, dst_offset),
|
|
71
|
+
StackBaseOffset(None, self.project.arch.bits, src_offset),
|
|
72
|
+
Const(None, None, store_size, self.project.arch.bits),
|
|
73
|
+
],
|
|
74
|
+
prototype=SIM_LIBRARIES["libc.so"][0].get_prototype("memcpy"),
|
|
75
|
+
**stmt.tags,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
return None
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
# pylint:disable=arguments-differ
|
|
1
|
+
# pylint:disable=arguments-differ,too-many-boolean-expressions
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
import string
|
|
4
4
|
|
|
5
5
|
from archinfo import Endness
|
|
6
6
|
|
|
7
|
-
from angr.ailment.expression import Const, StackBaseOffset, VirtualVariable
|
|
8
|
-
from angr.ailment.statement import Call, Assignment
|
|
7
|
+
from angr.ailment.expression import Const, StackBaseOffset, VirtualVariable, UnaryOp
|
|
8
|
+
from angr.ailment.statement import Call, Assignment, Store, Statement
|
|
9
9
|
|
|
10
10
|
from angr import SIM_LIBRARIES
|
|
11
11
|
from angr.utils.endness import ail_const_to_be
|
|
@@ -24,24 +24,54 @@ class InlinedStrcpy(PeepholeOptimizationStmtBase):
|
|
|
24
24
|
__slots__ = ()
|
|
25
25
|
|
|
26
26
|
NAME = "Simplifying inlined strcpy"
|
|
27
|
-
stmt_classes = (Assignment,)
|
|
27
|
+
stmt_classes = (Assignment, Store)
|
|
28
|
+
|
|
29
|
+
def optimize(self, stmt: Assignment | Store, stmt_idx: int | None = None, block=None, **kwargs):
|
|
30
|
+
inlined_strcpy_candidate = False
|
|
31
|
+
src: Const | None = None
|
|
32
|
+
strcpy_dst: StackBaseOffset | UnaryOp | None = None
|
|
33
|
+
|
|
34
|
+
assert self.project is not None
|
|
28
35
|
|
|
29
|
-
def optimize(self, stmt: Assignment, stmt_idx: int | None = None, block=None, **kwargs):
|
|
30
36
|
if (
|
|
31
|
-
isinstance(stmt
|
|
37
|
+
isinstance(stmt, Assignment)
|
|
38
|
+
and isinstance(stmt.dst, VirtualVariable)
|
|
32
39
|
and stmt.dst.was_stack
|
|
33
40
|
and isinstance(stmt.src, Const)
|
|
34
41
|
and isinstance(stmt.src.value, int)
|
|
35
42
|
):
|
|
36
|
-
|
|
43
|
+
inlined_strcpy_candidate = True
|
|
44
|
+
src = stmt.src
|
|
45
|
+
strcpy_dst = StackBaseOffset(None, self.project.arch.bits, stmt.dst.stack_offset)
|
|
46
|
+
elif (
|
|
47
|
+
isinstance(stmt, Store)
|
|
48
|
+
and isinstance(stmt.addr, UnaryOp)
|
|
49
|
+
and stmt.addr.op == "Reference"
|
|
50
|
+
and isinstance(stmt.addr.operand, VirtualVariable)
|
|
51
|
+
and stmt.addr.operand.was_stack
|
|
52
|
+
and isinstance(stmt.data, Const)
|
|
53
|
+
and isinstance(stmt.data.value, int)
|
|
54
|
+
):
|
|
55
|
+
inlined_strcpy_candidate = True
|
|
56
|
+
src = stmt.data
|
|
57
|
+
strcpy_dst = stmt.addr
|
|
58
|
+
|
|
59
|
+
if inlined_strcpy_candidate:
|
|
60
|
+
assert src is not None and strcpy_dst is not None
|
|
61
|
+
assert isinstance(src.value, int)
|
|
62
|
+
assert self.kb is not None
|
|
63
|
+
|
|
64
|
+
r, s = self.is_integer_likely_a_string(src.value, src.size, self.project.arch.memory_endness)
|
|
37
65
|
if r:
|
|
66
|
+
assert s is not None
|
|
67
|
+
|
|
38
68
|
# replace it with a call to strncpy
|
|
39
69
|
str_id = self.kb.custom_strings.allocate(s.encode("ascii"))
|
|
40
70
|
return Call(
|
|
41
71
|
stmt.idx,
|
|
42
72
|
"strncpy",
|
|
43
73
|
args=[
|
|
44
|
-
|
|
74
|
+
strcpy_dst,
|
|
45
75
|
Const(None, None, str_id, self.project.arch.bits, custom_string=True),
|
|
46
76
|
Const(None, None, len(s), self.project.arch.bits),
|
|
47
77
|
],
|
|
@@ -68,9 +98,21 @@ class InlinedStrcpy(PeepholeOptimizationStmtBase):
|
|
|
68
98
|
next_offset = None
|
|
69
99
|
stride = []
|
|
70
100
|
|
|
101
|
+
if not stride:
|
|
102
|
+
return None
|
|
103
|
+
min_stride_stmt_idx = min(stmt_idx_ for _, stmt_idx_, _ in stride)
|
|
104
|
+
if min_stride_stmt_idx > stmt_idx:
|
|
105
|
+
# the current statement is not involved in the stride. we can't simplify here, otherwise we
|
|
106
|
+
# will incorrectly remove the current statement
|
|
107
|
+
return None
|
|
108
|
+
|
|
71
109
|
integer, size = self.stride_to_int(stride)
|
|
72
|
-
|
|
110
|
+
prev_stmt = None if stmt_idx == 0 else block.statements[stmt_idx - 1]
|
|
111
|
+
min_str_length = 1 if prev_stmt is not None and self.is_inlined_strcpy(prev_stmt) else 4
|
|
112
|
+
r, s = self.is_integer_likely_a_string(integer, size, Endness.BE, min_length=min_str_length)
|
|
73
113
|
if r:
|
|
114
|
+
assert s is not None
|
|
115
|
+
|
|
74
116
|
# we remove all involved statements whose statement IDs are greater than the current one
|
|
75
117
|
for _, stmt_idx_, _ in reversed(stride):
|
|
76
118
|
if stmt_idx_ <= stmt_idx:
|
|
@@ -83,7 +125,7 @@ class InlinedStrcpy(PeepholeOptimizationStmtBase):
|
|
|
83
125
|
stmt.idx,
|
|
84
126
|
"strncpy",
|
|
85
127
|
args=[
|
|
86
|
-
|
|
128
|
+
strcpy_dst,
|
|
87
129
|
Const(None, None, str_id, self.project.arch.bits, custom_string=True),
|
|
88
130
|
Const(None, None, len(s), self.project.arch.bits),
|
|
89
131
|
],
|
|
@@ -101,10 +143,13 @@ class InlinedStrcpy(PeepholeOptimizationStmtBase):
|
|
|
101
143
|
for _, _, v in stride:
|
|
102
144
|
size += v.size
|
|
103
145
|
n <<= v.bits
|
|
146
|
+
assert isinstance(v.value, int)
|
|
104
147
|
n |= v.value
|
|
105
148
|
return n, size
|
|
106
149
|
|
|
107
150
|
def collect_constant_stores(self, block, starting_stmt_idx: int) -> dict[int, tuple[int, Const | None]]:
|
|
151
|
+
assert self.project is not None
|
|
152
|
+
|
|
108
153
|
r = {}
|
|
109
154
|
for idx, stmt in enumerate(block.statements):
|
|
110
155
|
if idx < starting_stmt_idx:
|
|
@@ -158,3 +203,15 @@ class InlinedStrcpy(PeepholeOptimizationStmtBase):
|
|
|
158
203
|
return False, None
|
|
159
204
|
return True, "".join(chars)
|
|
160
205
|
return False, None
|
|
206
|
+
|
|
207
|
+
@staticmethod
|
|
208
|
+
def is_inlined_strcpy(stmt: Statement) -> bool:
|
|
209
|
+
return (
|
|
210
|
+
isinstance(stmt, Call)
|
|
211
|
+
and isinstance(stmt.target, str)
|
|
212
|
+
and stmt.target == "strncpy"
|
|
213
|
+
and stmt.args is not None
|
|
214
|
+
and len(stmt.args) == 3
|
|
215
|
+
and isinstance(stmt.args[1], Const)
|
|
216
|
+
and hasattr(stmt.args[1], "custom_string")
|
|
217
|
+
)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# pylint:disable=arguments-differ
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
|
|
4
|
-
from angr.ailment.expression import Expression, BinaryOp, Const, Register, StackBaseOffset
|
|
4
|
+
from angr.ailment.expression import Expression, BinaryOp, Const, Register, StackBaseOffset, UnaryOp, VirtualVariable
|
|
5
5
|
from angr.ailment.statement import Call, Store
|
|
6
6
|
|
|
7
7
|
from angr import SIM_LIBRARIES
|
|
@@ -21,12 +21,12 @@ class InlinedStrcpyConsolidation(PeepholeOptimizationMultiStmtBase):
|
|
|
21
21
|
|
|
22
22
|
def optimize(self, stmts: list[Call], **kwargs):
|
|
23
23
|
last_stmt, stmt = stmts
|
|
24
|
-
if
|
|
24
|
+
if InlinedStrcpy.is_inlined_strcpy(last_stmt):
|
|
25
25
|
s_last: bytes = self.kb.custom_strings[last_stmt.args[1].value]
|
|
26
26
|
addr_last = last_stmt.args[0]
|
|
27
27
|
new_str = None # will be set if consolidation should happen
|
|
28
28
|
|
|
29
|
-
if isinstance(stmt, Call) and
|
|
29
|
+
if isinstance(stmt, Call) and InlinedStrcpy.is_inlined_strcpy(stmt):
|
|
30
30
|
# consolidating two calls
|
|
31
31
|
s_curr: bytes = self.kb.custom_strings[stmt.args[1].value]
|
|
32
32
|
addr_curr = stmt.args[0]
|
|
@@ -74,22 +74,19 @@ class InlinedStrcpyConsolidation(PeepholeOptimizationMultiStmtBase):
|
|
|
74
74
|
|
|
75
75
|
return None
|
|
76
76
|
|
|
77
|
-
@staticmethod
|
|
78
|
-
def _is_inlined_strcpy(stmt: Call):
|
|
79
|
-
return (
|
|
80
|
-
isinstance(stmt.target, str)
|
|
81
|
-
and stmt.target == "strncpy"
|
|
82
|
-
and len(stmt.args) == 3
|
|
83
|
-
and isinstance(stmt.args[1], Const)
|
|
84
|
-
and hasattr(stmt.args[1], "custom_string")
|
|
85
|
-
)
|
|
86
|
-
|
|
87
77
|
@staticmethod
|
|
88
78
|
def _parse_addr(addr: Expression) -> tuple[Expression, int]:
|
|
89
79
|
if isinstance(addr, Register):
|
|
90
80
|
return addr, 0
|
|
91
81
|
if isinstance(addr, StackBaseOffset):
|
|
92
82
|
return StackBaseOffset(None, addr.bits, 0), addr.offset
|
|
83
|
+
if (
|
|
84
|
+
isinstance(addr, UnaryOp)
|
|
85
|
+
and addr.op == "Reference"
|
|
86
|
+
and isinstance(addr.operand, VirtualVariable)
|
|
87
|
+
and addr.operand.was_stack
|
|
88
|
+
):
|
|
89
|
+
return StackBaseOffset(None, addr.bits, 0), addr.operand.stack_offset
|
|
93
90
|
if isinstance(addr, BinaryOp):
|
|
94
91
|
if addr.op == "Add" and isinstance(addr.operands[1], Const):
|
|
95
92
|
base_0, offset_0 = InlinedStrcpyConsolidation._parse_addr(addr.operands[0])
|
|
@@ -99,7 +99,7 @@ class RegionIdentifier(Analysis):
|
|
|
99
99
|
|
|
100
100
|
def _analyze(self):
|
|
101
101
|
# make a copy of the graph
|
|
102
|
-
graph =
|
|
102
|
+
graph = self._pick_one_connected_component(self._graph, as_copy=True)
|
|
103
103
|
|
|
104
104
|
# preprocess: make it a super graph
|
|
105
105
|
self._make_supergraph(graph)
|
|
@@ -113,6 +113,27 @@ class RegionIdentifier(Analysis):
|
|
|
113
113
|
# make regions into block address lists
|
|
114
114
|
self.regions_by_block_addrs = self._make_regions_by_block_addrs()
|
|
115
115
|
|
|
116
|
+
def _pick_one_connected_component(self, digraph: networkx.DiGraph, as_copy: bool = False) -> networkx.DiGraph:
|
|
117
|
+
g = networkx.Graph(digraph)
|
|
118
|
+
components = list(networkx.connected_components(g))
|
|
119
|
+
if len(components) <= 1:
|
|
120
|
+
return networkx.DiGraph(digraph) if as_copy else digraph
|
|
121
|
+
|
|
122
|
+
the_component = None
|
|
123
|
+
largest_component = None
|
|
124
|
+
for component in components:
|
|
125
|
+
if largest_component is None or len(component) > len(largest_component):
|
|
126
|
+
largest_component = component
|
|
127
|
+
if any((block.addr, block.idx) == self.entry_node_addr for block in component):
|
|
128
|
+
the_component = component
|
|
129
|
+
break
|
|
130
|
+
|
|
131
|
+
if the_component is None:
|
|
132
|
+
the_component = largest_component
|
|
133
|
+
|
|
134
|
+
assert the_component is not None
|
|
135
|
+
return digraph.subgraph(the_component).to_directed()
|
|
136
|
+
|
|
116
137
|
@staticmethod
|
|
117
138
|
def _compute_node_order(graph: networkx.DiGraph) -> dict[Any, tuple[int, int]]:
|
|
118
139
|
sorted_nodes = GraphUtils.quasi_topological_sort_nodes(graph)
|