angr 9.2.139__py3-none-manylinux2014_x86_64.whl → 9.2.140__py3-none-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/calling_convention/calling_convention.py +48 -21
- angr/analyses/cfg/cfg_base.py +13 -0
- angr/analyses/cfg/cfg_fast.py +11 -0
- angr/analyses/decompiler/ail_simplifier.py +67 -52
- angr/analyses/decompiler/clinic.py +68 -43
- angr/analyses/decompiler/decompiler.py +17 -7
- angr/analyses/decompiler/expression_narrower.py +1 -1
- angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +8 -7
- angr/analyses/decompiler/optimization_passes/ite_region_converter.py +21 -13
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +16 -10
- angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +2 -2
- angr/analyses/decompiler/region_simplifiers/expr_folding.py +259 -108
- angr/analyses/decompiler/region_simplifiers/region_simplifier.py +27 -12
- angr/analyses/decompiler/structuring/dream.py +21 -17
- angr/analyses/decompiler/structuring/phoenix.py +152 -40
- angr/analyses/decompiler/structuring/recursive_structurer.py +1 -0
- angr/analyses/decompiler/structuring/structurer_base.py +36 -10
- angr/analyses/decompiler/structuring/structurer_nodes.py +4 -1
- angr/analyses/decompiler/utils.py +60 -1
- angr/analyses/deobfuscator/api_obf_finder.py +8 -5
- angr/analyses/deobfuscator/api_obf_type2_finder.py +18 -10
- angr/analyses/deobfuscator/string_obf_finder.py +105 -18
- angr/analyses/forward_analysis/forward_analysis.py +1 -1
- angr/analyses/propagator/top_checker_mixin.py +6 -6
- angr/analyses/reaching_definitions/__init__.py +2 -1
- angr/analyses/reaching_definitions/dep_graph.py +1 -12
- angr/analyses/reaching_definitions/engine_vex.py +36 -31
- angr/analyses/reaching_definitions/function_handler.py +15 -2
- angr/analyses/reaching_definitions/rd_state.py +1 -37
- angr/analyses/reaching_definitions/reaching_definitions.py +13 -24
- angr/analyses/s_propagator.py +6 -41
- angr/analyses/s_reaching_definitions/s_rda_model.py +7 -1
- angr/analyses/stack_pointer_tracker.py +36 -22
- angr/analyses/typehoon/simple_solver.py +45 -7
- angr/analyses/typehoon/typeconsts.py +18 -5
- angr/analyses/variable_recovery/engine_base.py +7 -5
- angr/block.py +69 -107
- angr/callable.py +14 -7
- angr/calling_conventions.py +15 -1
- angr/distributed/__init__.py +1 -1
- angr/engines/__init__.py +7 -8
- angr/engines/engine.py +1 -120
- angr/engines/failure.py +2 -2
- angr/engines/hook.py +2 -2
- angr/engines/light/engine.py +2 -2
- angr/engines/pcode/engine.py +2 -14
- angr/engines/procedure.py +2 -2
- angr/engines/soot/engine.py +2 -2
- angr/engines/soot/statements/switch.py +1 -1
- angr/engines/successors.py +124 -11
- angr/engines/syscall.py +2 -2
- angr/engines/unicorn.py +3 -3
- angr/engines/vex/heavy/heavy.py +3 -15
- angr/factory.py +4 -19
- angr/knowledge_plugins/key_definitions/atoms.py +8 -4
- angr/knowledge_plugins/key_definitions/live_definitions.py +41 -103
- angr/sim_type.py +19 -17
- angr/state_plugins/plugin.py +19 -4
- angr/storage/memory_mixins/memory_mixin.py +1 -1
- angr/storage/memory_mixins/paged_memory/pages/multi_values.py +10 -5
- angr/utils/ssa/__init__.py +119 -4
- {angr-9.2.139.dist-info → angr-9.2.140.dist-info}/METADATA +6 -6
- {angr-9.2.139.dist-info → angr-9.2.140.dist-info}/RECORD +68 -68
- {angr-9.2.139.dist-info → angr-9.2.140.dist-info}/LICENSE +0 -0
- {angr-9.2.139.dist-info → angr-9.2.140.dist-info}/WHEEL +0 -0
- {angr-9.2.139.dist-info → angr-9.2.140.dist-info}/entry_points.txt +0 -0
- {angr-9.2.139.dist-info → angr-9.2.140.dist-info}/top_level.txt +0 -0
|
@@ -20,6 +20,7 @@ from angr.analyses.decompiler.utils import (
|
|
|
20
20
|
)
|
|
21
21
|
from angr.analyses.decompiler.label_collector import LabelCollector
|
|
22
22
|
from angr.errors import AngrDecompilationError
|
|
23
|
+
from angr.knowledge_plugins.cfg import IndirectJump
|
|
23
24
|
from .structurer_nodes import (
|
|
24
25
|
MultiNode,
|
|
25
26
|
SequenceNode,
|
|
@@ -33,6 +34,7 @@ from .structurer_nodes import (
|
|
|
33
34
|
BreakNode,
|
|
34
35
|
LoopNode,
|
|
35
36
|
EmptyBlockNotice,
|
|
37
|
+
IncompleteSwitchCaseNode,
|
|
36
38
|
)
|
|
37
39
|
|
|
38
40
|
if TYPE_CHECKING:
|
|
@@ -60,6 +62,7 @@ class StructurerBase(Analysis):
|
|
|
60
62
|
func: Function | None = None,
|
|
61
63
|
case_entry_to_switch_head: dict[int, int] | None = None,
|
|
62
64
|
parent_region=None,
|
|
65
|
+
jump_tables: dict[int, IndirectJump] | None = None,
|
|
63
66
|
**kwargs,
|
|
64
67
|
):
|
|
65
68
|
self._region: GraphRegion = region
|
|
@@ -67,6 +70,7 @@ class StructurerBase(Analysis):
|
|
|
67
70
|
self.function = func
|
|
68
71
|
self._case_entry_to_switch_head = case_entry_to_switch_head
|
|
69
72
|
self._parent_region = parent_region
|
|
73
|
+
self.jump_tables = jump_tables or {}
|
|
70
74
|
|
|
71
75
|
self.cond_proc = (
|
|
72
76
|
condition_processor if condition_processor is not None else ConditionProcessor(self.project.arch)
|
|
@@ -304,6 +308,7 @@ class StructurerBase(Analysis):
|
|
|
304
308
|
jump_stmt = this_node.statements[-1] # type: ignore
|
|
305
309
|
|
|
306
310
|
if isinstance(jump_stmt, ailment.Stmt.Jump):
|
|
311
|
+
assert isinstance(this_node, ailment.Block)
|
|
307
312
|
next_node = node.nodes[i + 1]
|
|
308
313
|
if (
|
|
309
314
|
isinstance(jump_stmt.target, ailment.Expr.Const)
|
|
@@ -312,6 +317,7 @@ class StructurerBase(Analysis):
|
|
|
312
317
|
# this goto is useless
|
|
313
318
|
this_node.statements = this_node.statements[:-1]
|
|
314
319
|
elif isinstance(jump_stmt, ailment.Stmt.ConditionalJump):
|
|
320
|
+
assert isinstance(this_node, ailment.Block)
|
|
315
321
|
next_node = node.nodes[i + 1]
|
|
316
322
|
if (
|
|
317
323
|
isinstance(jump_stmt.true_target, ailment.Expr.Const)
|
|
@@ -365,6 +371,7 @@ class StructurerBase(Analysis):
|
|
|
365
371
|
jump_stmt = this_node.nodes[-1].statements[-1]
|
|
366
372
|
this_node = this_node.nodes[-1]
|
|
367
373
|
|
|
374
|
+
assert isinstance(this_node, ailment.Block)
|
|
368
375
|
if isinstance(jump_stmt, ailment.Stmt.Jump):
|
|
369
376
|
next_node = node.nodes[i + 1]
|
|
370
377
|
if (
|
|
@@ -785,10 +792,6 @@ class StructurerBase(Analysis):
|
|
|
785
792
|
|
|
786
793
|
return _Holder.merged, seq
|
|
787
794
|
|
|
788
|
-
#
|
|
789
|
-
# Util methods
|
|
790
|
-
#
|
|
791
|
-
|
|
792
795
|
def _reorganize_switch_cases(
|
|
793
796
|
self, cases: OrderedDict[int | tuple[int, ...], SequenceNode]
|
|
794
797
|
) -> OrderedDict[int | tuple[int, ...], SequenceNode]:
|
|
@@ -891,12 +894,12 @@ class StructurerBase(Analysis):
|
|
|
891
894
|
if isinstance(last_stmt.false_target, ailment.Expr.Const):
|
|
892
895
|
jump_targets.append((last_stmt.false_target.value, last_stmt.false_target_idx))
|
|
893
896
|
if any(tpl in addr_and_ids for tpl in jump_targets):
|
|
894
|
-
return remove_last_statement(node)
|
|
897
|
+
return remove_last_statement(node) # type: ignore
|
|
895
898
|
return None
|
|
896
899
|
|
|
897
900
|
@staticmethod
|
|
898
901
|
def _remove_last_statement_if_jump(
|
|
899
|
-
node: BaseNode | ailment.Block,
|
|
902
|
+
node: BaseNode | ailment.Block | MultiNode,
|
|
900
903
|
) -> ailment.Stmt.Jump | ailment.Stmt.ConditionalJump | None:
|
|
901
904
|
try:
|
|
902
905
|
last_stmts = ConditionProcessor.get_last_statements(node)
|
|
@@ -904,7 +907,7 @@ class StructurerBase(Analysis):
|
|
|
904
907
|
return None
|
|
905
908
|
|
|
906
909
|
if len(last_stmts) == 1 and isinstance(last_stmts[0], (ailment.Stmt.Jump, ailment.Stmt.ConditionalJump)):
|
|
907
|
-
return remove_last_statement(node)
|
|
910
|
+
return remove_last_statement(node) # type: ignore
|
|
908
911
|
return None
|
|
909
912
|
|
|
910
913
|
@staticmethod
|
|
@@ -994,8 +997,8 @@ class StructurerBase(Analysis):
|
|
|
994
997
|
@staticmethod
|
|
995
998
|
def replace_node_in_node(
|
|
996
999
|
parent_node: BaseNode,
|
|
997
|
-
old_node: BaseNode | ailment.Block,
|
|
998
|
-
new_node: BaseNode | ailment.Block,
|
|
1000
|
+
old_node: BaseNode | ailment.Block | MultiNode,
|
|
1001
|
+
new_node: BaseNode | ailment.Block | MultiNode,
|
|
999
1002
|
) -> None:
|
|
1000
1003
|
if isinstance(parent_node, SequenceNode):
|
|
1001
1004
|
for i in range(len(parent_node.nodes)): # pylint:disable=consider-using-enumerate
|
|
@@ -1018,7 +1021,9 @@ class StructurerBase(Analysis):
|
|
|
1018
1021
|
raise TypeError(f"Unsupported node type {type(parent_node)}")
|
|
1019
1022
|
|
|
1020
1023
|
@staticmethod
|
|
1021
|
-
def is_a_jump_target(
|
|
1024
|
+
def is_a_jump_target(
|
|
1025
|
+
stmt: ailment.Stmt.ConditionalJump | ailment.Stmt.Jump | ailment.Stmt.Statement, addr: int
|
|
1026
|
+
) -> bool:
|
|
1022
1027
|
if isinstance(stmt, ailment.Stmt.ConditionalJump):
|
|
1023
1028
|
if isinstance(stmt.true_target, ailment.Expr.Const) and stmt.true_target.value == addr:
|
|
1024
1029
|
return True
|
|
@@ -1038,3 +1043,24 @@ class StructurerBase(Analysis):
|
|
|
1038
1043
|
if isinstance(node, SequenceNode):
|
|
1039
1044
|
return any(StructurerBase.has_nonlabel_nonphi_statements(nn) for nn in node.nodes)
|
|
1040
1045
|
return False
|
|
1046
|
+
|
|
1047
|
+
def _node_ending_with_jump_table_header(self, node: BaseNode) -> tuple[int | None, IndirectJump | None]:
|
|
1048
|
+
if isinstance(node, (ailment.Block, MultiNode, IncompleteSwitchCaseNode)):
|
|
1049
|
+
assert node.addr is not None
|
|
1050
|
+
return node.addr, self.jump_tables.get(node.addr, None)
|
|
1051
|
+
if isinstance(node, SequenceNode):
|
|
1052
|
+
return node.addr, self._node_ending_with_jump_table_header(node.nodes[-1])[1]
|
|
1053
|
+
return None, None
|
|
1054
|
+
|
|
1055
|
+
@staticmethod
|
|
1056
|
+
def _switch_find_default_node(
|
|
1057
|
+
graph: networkx.DiGraph, head_node: BaseNode, default_node_addr: int
|
|
1058
|
+
) -> BaseNode | None:
|
|
1059
|
+
# it is possible that the default node gets duplicated by other analyses and creates a default node (addr.a)
|
|
1060
|
+
# and a case node (addr.b). The addr.a node is a successor to the head node while the addr.b node is a
|
|
1061
|
+
# successor to node_a
|
|
1062
|
+
default_node_candidates = [nn for nn in graph.nodes if nn.addr == default_node_addr]
|
|
1063
|
+
node_default: BaseNode | None = next(
|
|
1064
|
+
iter(nn for nn in default_node_candidates if graph.has_edge(head_node, nn)), None
|
|
1065
|
+
)
|
|
1066
|
+
return node_default
|
|
@@ -231,7 +231,10 @@ class CascadingConditionNode(BaseNode):
|
|
|
231
231
|
)
|
|
232
232
|
|
|
233
233
|
def __init__(
|
|
234
|
-
self,
|
|
234
|
+
self,
|
|
235
|
+
addr,
|
|
236
|
+
condition_and_nodes: list[tuple[Any, BaseNode | ailment.Block | MultiNode]],
|
|
237
|
+
else_node: BaseNode = None,
|
|
235
238
|
):
|
|
236
239
|
self.addr = addr
|
|
237
240
|
self.condition_and_nodes = condition_and_nodes
|
|
@@ -144,7 +144,9 @@ def extract_jump_targets(stmt):
|
|
|
144
144
|
return targets
|
|
145
145
|
|
|
146
146
|
|
|
147
|
-
def switch_extract_cmp_bounds(
|
|
147
|
+
def switch_extract_cmp_bounds(
|
|
148
|
+
last_stmt: ailment.Stmt.ConditionalJump | ailment.Stmt.Statement,
|
|
149
|
+
) -> tuple[Any, int, int] | None:
|
|
148
150
|
"""
|
|
149
151
|
Check the last statement of the switch-case header node, and extract lower+upper bounds for the comparison.
|
|
150
152
|
|
|
@@ -175,6 +177,54 @@ def switch_extract_cmp_bounds(last_stmt: ailment.Stmt.ConditionalJump) -> tuple[
|
|
|
175
177
|
return None
|
|
176
178
|
|
|
177
179
|
|
|
180
|
+
def switch_extract_switch_expr_from_jump_target(target: ailment.Expr.Expression) -> ailment.Expr.Expression | None:
|
|
181
|
+
"""
|
|
182
|
+
Extract the switch expression from the indirect jump target expression.
|
|
183
|
+
|
|
184
|
+
:param target: The target of the indirect jump statement.
|
|
185
|
+
:return: The extracted expression if successful, or None otherwise.
|
|
186
|
+
"""
|
|
187
|
+
|
|
188
|
+
# e.g.: Jump (Conv(32->64, (Load(addr=((0x140000000<64> + (vvar_229{reg 80} * 0x4<64>)) + 0x2290<64>),
|
|
189
|
+
# size=4,
|
|
190
|
+
# endness=Iend_LE
|
|
191
|
+
# ) + 0x140000000<32>)))
|
|
192
|
+
|
|
193
|
+
found_load = False
|
|
194
|
+
while True:
|
|
195
|
+
if isinstance(target, ailment.Expr.Convert):
|
|
196
|
+
if target.from_bits < target.to_bits:
|
|
197
|
+
target = target.operand
|
|
198
|
+
else:
|
|
199
|
+
return None
|
|
200
|
+
elif isinstance(target, ailment.Expr.BinaryOp):
|
|
201
|
+
if target.op == "Add":
|
|
202
|
+
# it must be adding the target expr with a constant
|
|
203
|
+
if isinstance(target.operands[0], ailment.Expr.Const):
|
|
204
|
+
target = target.operands[1]
|
|
205
|
+
elif isinstance(target.operands[1], ailment.Expr.Const):
|
|
206
|
+
target = target.operands[0]
|
|
207
|
+
else:
|
|
208
|
+
return None
|
|
209
|
+
elif target.op == "Mul":
|
|
210
|
+
# it must be multiplying the target expr with a constant
|
|
211
|
+
if isinstance(target.operands[0], ailment.Expr.Const):
|
|
212
|
+
target = target.operands[1]
|
|
213
|
+
elif isinstance(target.operands[1], ailment.Expr.Const):
|
|
214
|
+
target = target.operands[0]
|
|
215
|
+
else:
|
|
216
|
+
return None
|
|
217
|
+
elif isinstance(target, ailment.Expr.Load):
|
|
218
|
+
# we want the address!
|
|
219
|
+
found_load = True
|
|
220
|
+
target = target.addr
|
|
221
|
+
elif isinstance(target, ailment.Expr.VirtualVariable):
|
|
222
|
+
break
|
|
223
|
+
else:
|
|
224
|
+
return None
|
|
225
|
+
return target if found_load else None
|
|
226
|
+
|
|
227
|
+
|
|
178
228
|
def switch_extract_bitwiseand_jumptable_info(last_stmt: ailment.Stmt.Jump) -> tuple[Any, int, int] | None:
|
|
179
229
|
"""
|
|
180
230
|
Check the last statement of the switch-case header node (whose address is loaded from a jump table and computed
|
|
@@ -973,6 +1023,15 @@ def sequence_to_statements(
|
|
|
973
1023
|
return statements
|
|
974
1024
|
|
|
975
1025
|
|
|
1026
|
+
def remove_edges_in_ailgraph(
|
|
1027
|
+
ail_graph: networkx.DiGraph, edges_to_remove: list[tuple[tuple[int, int | None], tuple[int, int | None]]]
|
|
1028
|
+
) -> None:
|
|
1029
|
+
d = {(bb.addr, bb.idx): bb for bb in ail_graph}
|
|
1030
|
+
for src_addr, dst_addr in edges_to_remove:
|
|
1031
|
+
if src_addr in d and dst_addr in d and ail_graph.has_edge(d[src_addr], d[dst_addr]):
|
|
1032
|
+
ail_graph.remove_edge(d[src_addr], d[dst_addr])
|
|
1033
|
+
|
|
1034
|
+
|
|
976
1035
|
# delayed import
|
|
977
1036
|
from .structuring.structurer_nodes import (
|
|
978
1037
|
MultiNode,
|
|
@@ -12,6 +12,7 @@ import claripy
|
|
|
12
12
|
from angr import SIM_LIBRARIES
|
|
13
13
|
from angr.calling_conventions import SimRegArg
|
|
14
14
|
from angr.errors import SimMemoryMissingError
|
|
15
|
+
from angr.knowledge_base import KnowledgeBase
|
|
15
16
|
from angr.knowledge_plugins.key_definitions.constants import ObservationPointType
|
|
16
17
|
from angr.sim_type import SimTypePointer, SimTypeChar
|
|
17
18
|
from angr.analyses import Analysis, AnalysesHub
|
|
@@ -35,7 +36,7 @@ class APIObfuscationType(IntEnum):
|
|
|
35
36
|
|
|
36
37
|
|
|
37
38
|
class APIDeobFuncDescriptor:
|
|
38
|
-
def __init__(self, type_: APIObfuscationType, func_addr
|
|
39
|
+
def __init__(self, type_: APIObfuscationType, *, func_addr: int, libname_argidx: int, funcname_argidx: int):
|
|
39
40
|
self.type = type_
|
|
40
41
|
self.func_addr = func_addr
|
|
41
42
|
self.libname_argidx = libname_argidx
|
|
@@ -96,8 +97,9 @@ class APIObfuscationFinder(Analysis):
|
|
|
96
97
|
- Type 2: GetProcAddress(_, "api_name").
|
|
97
98
|
"""
|
|
98
99
|
|
|
99
|
-
def __init__(self):
|
|
100
|
+
def __init__(self, variable_kb: KnowledgeBase | None = None):
|
|
100
101
|
self.type1_candidates = []
|
|
102
|
+
self.variable_kb = variable_kb or self.project.kb
|
|
101
103
|
|
|
102
104
|
self.analyze()
|
|
103
105
|
|
|
@@ -109,7 +111,7 @@ class APIObfuscationFinder(Analysis):
|
|
|
109
111
|
type1_deobfuscated = self._analyze_type1(desc.func_addr, desc)
|
|
110
112
|
self.kb.obfuscations.type1_deobfuscated_apis.update(type1_deobfuscated)
|
|
111
113
|
|
|
112
|
-
APIObfuscationType2Finder(self.project).analyze()
|
|
114
|
+
APIObfuscationType2Finder(self.project, self.variable_kb).analyze()
|
|
113
115
|
|
|
114
116
|
def _find_type1(self):
|
|
115
117
|
cfg = self.kb.cfgs.get_most_accurate()
|
|
@@ -195,6 +197,8 @@ class APIObfuscationFinder(Analysis):
|
|
|
195
197
|
callsite_node.instruction_addrs[-1],
|
|
196
198
|
ObservationPointType.OP_BEFORE,
|
|
197
199
|
)
|
|
200
|
+
if observ is None:
|
|
201
|
+
continue
|
|
198
202
|
args: list[tuple[int, Any]] = []
|
|
199
203
|
for arg_idx, func_arg in enumerate(func.arguments):
|
|
200
204
|
# FIXME: We are ignoring all non-register function arguments until we see a test case where
|
|
@@ -232,9 +236,8 @@ class APIObfuscationFinder(Analysis):
|
|
|
232
236
|
acceptable_args = False
|
|
233
237
|
break
|
|
234
238
|
arg_strs.append((idx, value.decode("utf-8")))
|
|
235
|
-
if acceptable_args:
|
|
239
|
+
if acceptable_args and len(arg_strs) == 2:
|
|
236
240
|
libname_arg_idx, funcname_arg_idx = None, None
|
|
237
|
-
assert len(arg_strs) == 2
|
|
238
241
|
for arg_idx, name in arg_strs:
|
|
239
242
|
if self.is_libname(name):
|
|
240
243
|
libname_arg_idx = arg_idx
|
|
@@ -1,21 +1,24 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
from typing import cast
|
|
2
|
+
from typing import TYPE_CHECKING, cast
|
|
3
3
|
|
|
4
4
|
from collections.abc import Iterator
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
import logging
|
|
7
7
|
|
|
8
8
|
from angr.project import Project
|
|
9
|
-
from angr.
|
|
10
|
-
ReachingDefinitionsAnalysis,
|
|
11
|
-
FunctionCallRelationships,
|
|
12
|
-
)
|
|
9
|
+
from angr.knowledge_base import KnowledgeBase
|
|
13
10
|
from angr.knowledge_plugins.functions.function import Function
|
|
14
11
|
from angr.knowledge_plugins.key_definitions import DerefSize
|
|
15
12
|
from angr.knowledge_plugins.key_definitions.constants import ObservationPointType
|
|
16
13
|
from angr.knowledge_plugins.key_definitions.atoms import MemoryLocation
|
|
17
14
|
from angr.sim_variable import SimMemoryVariable
|
|
18
15
|
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from angr.analyses.reaching_definitions import (
|
|
18
|
+
ReachingDefinitionsAnalysis,
|
|
19
|
+
FunctionCallRelationships,
|
|
20
|
+
)
|
|
21
|
+
|
|
19
22
|
|
|
20
23
|
log = logging.getLogger(__name__)
|
|
21
24
|
|
|
@@ -40,8 +43,9 @@ class APIObfuscationType2Finder:
|
|
|
40
43
|
|
|
41
44
|
results: list[APIObfuscationType2]
|
|
42
45
|
|
|
43
|
-
def __init__(self, project: Project):
|
|
46
|
+
def __init__(self, project: Project, variable_kb: KnowledgeBase | None = None):
|
|
44
47
|
self.project = project
|
|
48
|
+
self.variable_kb = variable_kb or self.project.kb
|
|
45
49
|
self.results = []
|
|
46
50
|
|
|
47
51
|
def analyze(self) -> list[APIObfuscationType2]:
|
|
@@ -91,8 +95,12 @@ class APIObfuscationType2Finder:
|
|
|
91
95
|
log.debug("...Failed to resolve a function name")
|
|
92
96
|
return
|
|
93
97
|
|
|
94
|
-
|
|
95
|
-
|
|
98
|
+
try:
|
|
99
|
+
func_name = result.rstrip(b"\x00").decode("utf-8")
|
|
100
|
+
log.debug("...Resolved concrete function name: %s", func_name)
|
|
101
|
+
except UnicodeDecodeError:
|
|
102
|
+
log.debug("...Failed to decode utf-8 function name")
|
|
103
|
+
return
|
|
96
104
|
|
|
97
105
|
# Examine successor definitions to find where the function pointer is written
|
|
98
106
|
for successor in rda.dep_graph.find_all_successors(callsite_info.ret_defns):
|
|
@@ -121,7 +129,7 @@ class APIObfuscationType2Finder:
|
|
|
121
129
|
|
|
122
130
|
self.results.append(
|
|
123
131
|
APIObfuscationType2(
|
|
124
|
-
resolved_func_name=
|
|
132
|
+
resolved_func_name=func_name,
|
|
125
133
|
resolved_func_ptr=ptr,
|
|
126
134
|
resolved_in=caller,
|
|
127
135
|
resolved_by=callee,
|
|
@@ -139,7 +147,7 @@ class APIObfuscationType2Finder:
|
|
|
139
147
|
log.debug("...Created label %s for address %x", lbl, result.resolved_func_ptr.addr)
|
|
140
148
|
|
|
141
149
|
# Create a variable
|
|
142
|
-
global_variables = self.
|
|
150
|
+
global_variables = self.variable_kb.variables["global"]
|
|
143
151
|
variables = global_variables.get_global_variables(result.resolved_func_ptr.addr)
|
|
144
152
|
if not variables:
|
|
145
153
|
ident = global_variables.next_variable_ident("global")
|
|
@@ -9,11 +9,11 @@ import networkx
|
|
|
9
9
|
|
|
10
10
|
import claripy
|
|
11
11
|
|
|
12
|
-
from angr import sim_options
|
|
13
12
|
from angr.analyses import Analysis, AnalysesHub
|
|
14
|
-
from angr.errors import SimMemoryMissingError, AngrCallableMultistateError, AngrCallableError
|
|
13
|
+
from angr.errors import SimMemoryMissingError, AngrCallableMultistateError, AngrCallableError, AngrAnalysisError
|
|
15
14
|
from angr.calling_conventions import SimRegArg, default_cc
|
|
16
15
|
from angr.state_plugins.sim_action import SimActionData
|
|
16
|
+
from angr.sim_options import ZERO_FILL_UNCONSTRAINED_REGISTERS, ZERO_FILL_UNCONSTRAINED_MEMORY, TRACK_MEMORY_ACTIONS
|
|
17
17
|
from angr.sim_type import SimTypeFunction, SimTypeBottom, SimTypePointer
|
|
18
18
|
from angr.analyses.reaching_definitions import ObservationPointType
|
|
19
19
|
from angr.utils.graph import GraphUtils
|
|
@@ -23,12 +23,23 @@ from .irsb_reg_collector import IRSBRegisterCollector
|
|
|
23
23
|
_l = logging.getLogger(__name__)
|
|
24
24
|
|
|
25
25
|
|
|
26
|
+
STEP_LIMIT_FIND = 500
|
|
27
|
+
STEP_LIMIT_ANALYSIS = 5000
|
|
28
|
+
|
|
29
|
+
|
|
26
30
|
class StringDeobFuncDescriptor:
|
|
31
|
+
"""
|
|
32
|
+
Describes a string deobfuscation function.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
string_input_arg_idx: int
|
|
36
|
+
string_output_arg_idx: int
|
|
37
|
+
string_length_arg_idx: int | None
|
|
38
|
+
string_null_terminating: bool | None
|
|
39
|
+
|
|
27
40
|
def __init__(self):
|
|
28
|
-
self.string_input_arg_idx = None
|
|
29
|
-
self.string_output_arg_idx = None
|
|
30
41
|
self.string_length_arg_idx = None
|
|
31
|
-
self.string_null_terminating
|
|
42
|
+
self.string_null_terminating = None
|
|
32
43
|
|
|
33
44
|
|
|
34
45
|
class StringObfuscationFinder(Analysis):
|
|
@@ -89,6 +100,9 @@ class StringObfuscationFinder(Analysis):
|
|
|
89
100
|
# Type 1 string deobfuscation functions will decrypt each string once and for good.
|
|
90
101
|
|
|
91
102
|
cfg = self.kb.cfgs.get_most_accurate()
|
|
103
|
+
if cfg is None:
|
|
104
|
+
raise AngrAnalysisError("StringObfuscationFinder needs a CFG for the analysis")
|
|
105
|
+
|
|
92
106
|
arch = self.project.arch
|
|
93
107
|
|
|
94
108
|
type1_candidates: list[tuple[int, StringDeobFuncDescriptor]] = []
|
|
@@ -100,6 +114,10 @@ class StringObfuscationFinder(Analysis):
|
|
|
100
114
|
if func.prototype is None or len(func.prototype.args) < 1:
|
|
101
115
|
continue
|
|
102
116
|
|
|
117
|
+
if len(func.arguments) != len(func.prototype.args):
|
|
118
|
+
# function argument locations and function prototype arguments do not match
|
|
119
|
+
continue
|
|
120
|
+
|
|
103
121
|
if self.project.kb.functions.callgraph.out_degree[func.addr] != 0:
|
|
104
122
|
continue
|
|
105
123
|
|
|
@@ -123,14 +141,22 @@ class StringObfuscationFinder(Analysis):
|
|
|
123
141
|
dec = self.project.analyses.Decompiler(func, cfg=cfg)
|
|
124
142
|
except Exception: # pylint:disable=broad-exception-caught
|
|
125
143
|
continue
|
|
126
|
-
if
|
|
144
|
+
if (
|
|
145
|
+
dec.codegen is None
|
|
146
|
+
or not dec.codegen.text
|
|
147
|
+
or not self._like_type1_deobfuscation_function(dec.codegen.text)
|
|
148
|
+
):
|
|
149
|
+
continue
|
|
150
|
+
|
|
151
|
+
func_node = cfg.get_any_node(func.addr)
|
|
152
|
+
if func_node is None:
|
|
127
153
|
continue
|
|
128
154
|
|
|
129
155
|
args_list = []
|
|
130
156
|
for caller in callers:
|
|
131
157
|
callsite_nodes = [
|
|
132
158
|
pred
|
|
133
|
-
for pred in cfg.get_predecessors(
|
|
159
|
+
for pred in cfg.get_predecessors(func_node)
|
|
134
160
|
if pred.function_address == caller and pred.instruction_addrs
|
|
135
161
|
]
|
|
136
162
|
observation_points = []
|
|
@@ -148,15 +174,21 @@ class StringObfuscationFinder(Analysis):
|
|
|
148
174
|
callsite_node.instruction_addrs[-1],
|
|
149
175
|
ObservationPointType.OP_BEFORE,
|
|
150
176
|
)
|
|
177
|
+
if observ is None:
|
|
178
|
+
continue
|
|
151
179
|
# load values for each function argument
|
|
152
180
|
args: list[tuple[int, Any]] = []
|
|
153
181
|
for arg_idx, func_arg in enumerate(func.arguments):
|
|
154
182
|
# FIXME: We are ignoring all non-register function arguments until we see a test case where
|
|
155
183
|
# FIXME: stack-passing arguments are used
|
|
184
|
+
real_arg = func.prototype.args[arg_idx]
|
|
156
185
|
if isinstance(func_arg, SimRegArg):
|
|
157
186
|
reg_offset, reg_size = arch.registers[func_arg.reg_name]
|
|
187
|
+
arg_size = (
|
|
188
|
+
real_arg.size if real_arg.size is not None else reg_size
|
|
189
|
+
) // self.project.arch.byte_width
|
|
158
190
|
try:
|
|
159
|
-
mv = observ.registers.load(reg_offset, size=
|
|
191
|
+
mv = observ.registers.load(reg_offset, size=arg_size)
|
|
160
192
|
except SimMemoryMissingError:
|
|
161
193
|
args.append((arg_idx, claripy.BVV(0xDEADBEEF, self.project.arch.bits)))
|
|
162
194
|
continue
|
|
@@ -185,7 +217,15 @@ class StringObfuscationFinder(Analysis):
|
|
|
185
217
|
# now that we have good arguments, let's test the function!
|
|
186
218
|
for args in args_list:
|
|
187
219
|
func_call = self.project.factory.callable(
|
|
188
|
-
func.addr,
|
|
220
|
+
func.addr,
|
|
221
|
+
concrete_only=True,
|
|
222
|
+
cc=func.calling_convention,
|
|
223
|
+
prototype=func.prototype,
|
|
224
|
+
add_options={
|
|
225
|
+
ZERO_FILL_UNCONSTRAINED_MEMORY,
|
|
226
|
+
ZERO_FILL_UNCONSTRAINED_REGISTERS,
|
|
227
|
+
},
|
|
228
|
+
step_limit=STEP_LIMIT_FIND,
|
|
189
229
|
)
|
|
190
230
|
|
|
191
231
|
# before calling the function, let's record the crime scene
|
|
@@ -202,6 +242,9 @@ class StringObfuscationFinder(Analysis):
|
|
|
202
242
|
except (AngrCallableMultistateError, AngrCallableError):
|
|
203
243
|
continue
|
|
204
244
|
|
|
245
|
+
if func_call.result_state is None:
|
|
246
|
+
continue
|
|
247
|
+
|
|
205
248
|
# let's see what this amazing function has done
|
|
206
249
|
# TODO: Support cases where input and output are using different function arguments
|
|
207
250
|
for arg_idx, addr, old_value in values:
|
|
@@ -240,6 +283,9 @@ class StringObfuscationFinder(Analysis):
|
|
|
240
283
|
|
|
241
284
|
arch = self.project.arch
|
|
242
285
|
cfg = self.kb.cfgs.get_most_accurate()
|
|
286
|
+
if cfg is None:
|
|
287
|
+
raise AngrAnalysisError("StringObfuscationFinder needs a CFG for the analysis")
|
|
288
|
+
|
|
243
289
|
func = self.kb.functions.get_by_addr(func_addr)
|
|
244
290
|
func_node = cfg.get_any_node(func_addr)
|
|
245
291
|
assert func_node is not None
|
|
@@ -260,14 +306,20 @@ class StringObfuscationFinder(Analysis):
|
|
|
260
306
|
callsite_node.instruction_addrs[-1],
|
|
261
307
|
ObservationPointType.OP_BEFORE,
|
|
262
308
|
)
|
|
309
|
+
if observ is None:
|
|
310
|
+
continue
|
|
263
311
|
args = []
|
|
264
|
-
|
|
312
|
+
assert func.prototype is not None and len(func.arguments) == len(func.prototype.args)
|
|
313
|
+
for func_arg, real_arg in zip(func.arguments, func.prototype.args):
|
|
265
314
|
# FIXME: We are ignoring all non-register function arguments until we see a test case where
|
|
266
315
|
# FIXME: stack-passing arguments are used
|
|
267
316
|
if isinstance(func_arg, SimRegArg):
|
|
268
317
|
reg_offset, reg_size = arch.registers[func_arg.reg_name]
|
|
318
|
+
arg_size = (
|
|
319
|
+
real_arg.size if real_arg.size is not None else reg_size
|
|
320
|
+
) // self.project.arch.byte_width
|
|
269
321
|
try:
|
|
270
|
-
mv = observ.registers.load(reg_offset, size=
|
|
322
|
+
mv = observ.registers.load(reg_offset, size=arg_size)
|
|
271
323
|
except SimMemoryMissingError:
|
|
272
324
|
args.append(claripy.BVV(0xDEADBEEF, self.project.arch.bits))
|
|
273
325
|
continue
|
|
@@ -286,7 +338,12 @@ class StringObfuscationFinder(Analysis):
|
|
|
286
338
|
|
|
287
339
|
# call the function
|
|
288
340
|
func_call = self.project.factory.callable(
|
|
289
|
-
func.addr,
|
|
341
|
+
func.addr,
|
|
342
|
+
concrete_only=True,
|
|
343
|
+
cc=func.calling_convention,
|
|
344
|
+
prototype=func.prototype,
|
|
345
|
+
add_options={ZERO_FILL_UNCONSTRAINED_MEMORY, ZERO_FILL_UNCONSTRAINED_REGISTERS},
|
|
346
|
+
step_limit=STEP_LIMIT_ANALYSIS,
|
|
290
347
|
)
|
|
291
348
|
try:
|
|
292
349
|
func_call(*args)
|
|
@@ -303,6 +360,9 @@ class StringObfuscationFinder(Analysis):
|
|
|
303
360
|
)
|
|
304
361
|
continue
|
|
305
362
|
|
|
363
|
+
if func_call.result_state is None:
|
|
364
|
+
continue
|
|
365
|
+
|
|
306
366
|
# dump the decrypted string!
|
|
307
367
|
output_addr = args[desc.string_output_arg_idx]
|
|
308
368
|
length = args[desc.string_length_arg_idx].concrete_value if desc.string_length_arg_idx is not None else 256
|
|
@@ -322,6 +382,8 @@ class StringObfuscationFinder(Analysis):
|
|
|
322
382
|
xref_set = xrefs.get_xrefs_by_dst(str_addr)
|
|
323
383
|
block_addrs = {xref.block_addr for xref in xref_set}
|
|
324
384
|
for block_addr in block_addrs:
|
|
385
|
+
if block_addr is None:
|
|
386
|
+
continue
|
|
325
387
|
node = cfg.get_any_node(block_addr)
|
|
326
388
|
if node is not None:
|
|
327
389
|
callees = list(self.kb.functions.callgraph.successors(node.function_address))
|
|
@@ -340,6 +402,8 @@ class StringObfuscationFinder(Analysis):
|
|
|
340
402
|
# Type 2 string deobfuscation functions will decrypt each string once and for good.
|
|
341
403
|
|
|
342
404
|
cfg = self.kb.cfgs.get_most_accurate()
|
|
405
|
+
if cfg is None:
|
|
406
|
+
raise AngrAnalysisError("StringObfuscationFinder needs a CFG for the analysis")
|
|
343
407
|
|
|
344
408
|
type2_candidates: list[tuple[int, StringDeobFuncDescriptor, list[tuple[int, int, bytes]]]] = []
|
|
345
409
|
|
|
@@ -374,7 +438,11 @@ class StringObfuscationFinder(Analysis):
|
|
|
374
438
|
dec = self.project.analyses.Decompiler(func, cfg=cfg, expr_collapse_depth=64)
|
|
375
439
|
except Exception: # pylint:disable=broad-exception-caught
|
|
376
440
|
continue
|
|
377
|
-
if
|
|
441
|
+
if (
|
|
442
|
+
dec.codegen is None
|
|
443
|
+
or not dec.codegen.text
|
|
444
|
+
or not self._like_type2_deobfuscation_function(dec.codegen.text)
|
|
445
|
+
):
|
|
378
446
|
continue
|
|
379
447
|
|
|
380
448
|
desc = StringDeobFuncDescriptor()
|
|
@@ -384,7 +452,8 @@ class StringObfuscationFinder(Analysis):
|
|
|
384
452
|
concrete_only=True,
|
|
385
453
|
cc=func.calling_convention,
|
|
386
454
|
prototype=func.prototype,
|
|
387
|
-
add_options={
|
|
455
|
+
add_options={TRACK_MEMORY_ACTIONS, ZERO_FILL_UNCONSTRAINED_MEMORY, ZERO_FILL_UNCONSTRAINED_REGISTERS},
|
|
456
|
+
step_limit=STEP_LIMIT_FIND,
|
|
388
457
|
)
|
|
389
458
|
|
|
390
459
|
try:
|
|
@@ -392,6 +461,9 @@ class StringObfuscationFinder(Analysis):
|
|
|
392
461
|
except (AngrCallableMultistateError, AngrCallableError):
|
|
393
462
|
continue
|
|
394
463
|
|
|
464
|
+
if func_call.result_state is None:
|
|
465
|
+
continue
|
|
466
|
+
|
|
395
467
|
# where are the reads and writes?
|
|
396
468
|
all_global_reads = []
|
|
397
469
|
all_global_writes = []
|
|
@@ -399,7 +471,7 @@ class StringObfuscationFinder(Analysis):
|
|
|
399
471
|
if not isinstance(action, SimActionData):
|
|
400
472
|
continue
|
|
401
473
|
if not action.actual_addrs:
|
|
402
|
-
if not action.addr.ast.concrete:
|
|
474
|
+
if action.addr is None or not action.addr.ast.concrete:
|
|
403
475
|
continue
|
|
404
476
|
actual_addrs = [action.addr.ast.concrete_value]
|
|
405
477
|
else:
|
|
@@ -469,6 +541,8 @@ class StringObfuscationFinder(Analysis):
|
|
|
469
541
|
"""
|
|
470
542
|
|
|
471
543
|
cfg = self.kb.cfgs.get_most_accurate()
|
|
544
|
+
if cfg is None:
|
|
545
|
+
raise AngrAnalysisError("StringObfuscationFinder needs a CFG for the analysis")
|
|
472
546
|
|
|
473
547
|
# for each string table address, we find its string loader function
|
|
474
548
|
# an obvious candidate function is 0x140001b20
|
|
@@ -478,6 +552,8 @@ class StringObfuscationFinder(Analysis):
|
|
|
478
552
|
xref_set = xrefs.get_xrefs_by_dst(table_addr)
|
|
479
553
|
block_addrs = {xref.block_addr for xref in xref_set}
|
|
480
554
|
for block_addr in block_addrs:
|
|
555
|
+
if block_addr is None:
|
|
556
|
+
continue
|
|
481
557
|
node = cfg.get_any_node(block_addr)
|
|
482
558
|
if node is not None:
|
|
483
559
|
callees = list(self.kb.functions.callgraph.successors(node.function_address))
|
|
@@ -496,6 +572,9 @@ class StringObfuscationFinder(Analysis):
|
|
|
496
572
|
# not have a SimProcedure for)
|
|
497
573
|
|
|
498
574
|
cfg = self.kb.cfgs.get_most_accurate()
|
|
575
|
+
if cfg is None:
|
|
576
|
+
raise AngrAnalysisError("StringObfuscationFinder needs a CFG for the analysis")
|
|
577
|
+
|
|
499
578
|
functions = self.kb.functions
|
|
500
579
|
callgraph_digraph = networkx.DiGraph(functions.callgraph)
|
|
501
580
|
|
|
@@ -554,7 +633,7 @@ class StringObfuscationFinder(Analysis):
|
|
|
554
633
|
except Exception: # pylint:disable=broad-exception-caught
|
|
555
634
|
# catch all exceptions
|
|
556
635
|
continue
|
|
557
|
-
if dec.codegen is None:
|
|
636
|
+
if dec.codegen is None or not dec.codegen.text:
|
|
558
637
|
continue
|
|
559
638
|
if not self._like_type3_deobfuscation_function(dec.codegen.text):
|
|
560
639
|
continue
|
|
@@ -605,6 +684,8 @@ class StringObfuscationFinder(Analysis):
|
|
|
605
684
|
"""
|
|
606
685
|
|
|
607
686
|
cfg = self.kb.cfgs.get_most_accurate()
|
|
687
|
+
if cfg is None:
|
|
688
|
+
raise AngrAnalysisError("StringObfuscationFinder needs a CFG for the analysis")
|
|
608
689
|
|
|
609
690
|
call_sites = cfg.get_predecessors(cfg.get_any_node(func_addr))
|
|
610
691
|
callinsn2content = {}
|
|
@@ -687,7 +768,7 @@ class StringObfuscationFinder(Analysis):
|
|
|
687
768
|
# execute the block at the call site
|
|
688
769
|
state = self.project.factory.blank_state(
|
|
689
770
|
addr=call_site_addr,
|
|
690
|
-
add_options={
|
|
771
|
+
add_options={ZERO_FILL_UNCONSTRAINED_REGISTERS, ZERO_FILL_UNCONSTRAINED_MEMORY},
|
|
691
772
|
)
|
|
692
773
|
# setup sp and bp, just in case
|
|
693
774
|
state.regs._sp = 0x7FFF0000
|
|
@@ -728,7 +809,13 @@ class StringObfuscationFinder(Analysis):
|
|
|
728
809
|
self.project.arch
|
|
729
810
|
)
|
|
730
811
|
callable_0 = self.project.factory.callable(
|
|
731
|
-
func_addr,
|
|
812
|
+
func_addr,
|
|
813
|
+
concrete_only=True,
|
|
814
|
+
base_state=in_state,
|
|
815
|
+
cc=cc,
|
|
816
|
+
prototype=prototype_0,
|
|
817
|
+
add_options={ZERO_FILL_UNCONSTRAINED_MEMORY, ZERO_FILL_UNCONSTRAINED_REGISTERS},
|
|
818
|
+
step_limit=STEP_LIMIT_ANALYSIS,
|
|
732
819
|
)
|
|
733
820
|
|
|
734
821
|
try:
|
|
@@ -181,7 +181,7 @@ class ForwardAnalysis(Generic[AnalysisState, NodeType, JobType, JobKey]):
|
|
|
181
181
|
"""
|
|
182
182
|
return node
|
|
183
183
|
|
|
184
|
-
def _run_on_node(self, node: NodeType, state: AnalysisState) -> tuple[bool, AnalysisState]:
|
|
184
|
+
def _run_on_node(self, node: NodeType, state: AnalysisState) -> tuple[bool | None, AnalysisState]:
|
|
185
185
|
"""
|
|
186
186
|
The analysis routine that runs on each node in the graph.
|
|
187
187
|
|