angr 9.2.138__py3-none-macosx_11_0_arm64.whl → 9.2.139__py3-none-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/calling_convention/fact_collector.py +59 -12
- angr/analyses/calling_convention/utils.py +2 -2
- angr/analyses/cfg/cfg_fast.py +12 -4
- angr/analyses/decompiler/ail_simplifier.py +14 -3
- angr/analyses/decompiler/block_simplifier.py +0 -2
- angr/analyses/decompiler/callsite_maker.py +80 -14
- angr/analyses/decompiler/clinic.py +31 -37
- angr/analyses/decompiler/condition_processor.py +2 -2
- angr/analyses/decompiler/decompiler.py +2 -0
- angr/analyses/decompiler/dephication/rewriting_engine.py +16 -7
- angr/analyses/decompiler/optimization_passes/__init__.py +3 -0
- angr/analyses/decompiler/optimization_passes/condition_constprop.py +149 -0
- angr/analyses/decompiler/optimization_passes/deadblock_remover.py +12 -3
- angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +1 -1
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +5 -2
- angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +15 -7
- angr/analyses/decompiler/optimization_passes/return_duplicator_high.py +7 -10
- angr/analyses/decompiler/peephole_optimizations/eager_eval.py +12 -1
- angr/analyses/decompiler/peephole_optimizations/remove_redundant_conversions.py +61 -25
- angr/analyses/decompiler/peephole_optimizations/remove_redundant_shifts.py +50 -1
- angr/analyses/decompiler/presets/fast.py +2 -0
- angr/analyses/decompiler/presets/full.py +2 -0
- angr/analyses/decompiler/region_simplifiers/region_simplifier.py +4 -0
- angr/analyses/decompiler/ssailification/rewriting_engine.py +20 -2
- angr/analyses/decompiler/ssailification/traversal_engine.py +4 -3
- angr/analyses/decompiler/structured_codegen/c.py +10 -3
- angr/analyses/decompiler/structuring/dream.py +7 -2
- angr/analyses/decompiler/structuring/phoenix.py +101 -49
- angr/analyses/decompiler/structuring/structurer_base.py +85 -36
- angr/analyses/decompiler/structuring/structurer_nodes.py +3 -1
- angr/analyses/deobfuscator/api_obf_finder.py +6 -1
- angr/analyses/deobfuscator/api_obf_type2_finder.py +158 -0
- angr/analyses/s_propagator.py +127 -50
- angr/analyses/s_reaching_definitions/s_rda_view.py +2 -2
- angr/analyses/s_reaching_definitions/s_reaching_definitions.py +3 -1
- angr/analyses/variable_recovery/engine_ail.py +1 -1
- angr/analyses/variable_recovery/engine_base.py +55 -62
- angr/analyses/variable_recovery/engine_vex.py +1 -1
- angr/analyses/variable_recovery/irsb_scanner.py +2 -2
- angr/calling_conventions.py +66 -9
- angr/engines/engine.py +2 -18
- angr/engines/light/engine.py +3 -8
- angr/engines/pcode/emulate.py +2 -2
- angr/engines/pcode/lifter.py +2 -2
- angr/engines/successors.py +1 -8
- angr/engines/vex/lifter.py +2 -2
- angr/engines/vex/light/light.py +2 -2
- angr/knowledge_plugins/cfg/cfg_model.py +3 -2
- angr/knowledge_plugins/labels.py +2 -2
- angr/knowledge_plugins/obfuscations.py +1 -0
- angr/knowledge_plugins/xrefs/xref_manager.py +4 -0
- angr/lib/angr_native.dylib +0 -0
- {angr-9.2.138.dist-info → angr-9.2.139.dist-info}/METADATA +6 -6
- {angr-9.2.138.dist-info → angr-9.2.139.dist-info}/RECORD +59 -57
- {angr-9.2.138.dist-info → angr-9.2.139.dist-info}/LICENSE +0 -0
- {angr-9.2.138.dist-info → angr-9.2.139.dist-info}/WHEEL +0 -0
- {angr-9.2.138.dist-info → angr-9.2.139.dist-info}/entry_points.txt +0 -0
- {angr-9.2.138.dist-info → angr-9.2.139.dist-info}/top_level.txt +0 -0
|
@@ -19,6 +19,7 @@ from angr.analyses.decompiler.utils import (
|
|
|
19
19
|
has_nonlabel_nonphi_statements,
|
|
20
20
|
)
|
|
21
21
|
from angr.analyses.decompiler.label_collector import LabelCollector
|
|
22
|
+
from angr.errors import AngrDecompilationError
|
|
22
23
|
from .structurer_nodes import (
|
|
23
24
|
MultiNode,
|
|
24
25
|
SequenceNode,
|
|
@@ -49,7 +50,7 @@ class StructurerBase(Analysis):
|
|
|
49
50
|
longer exist due to empty node removal during structuring or prior steps.
|
|
50
51
|
"""
|
|
51
52
|
|
|
52
|
-
NAME: str =
|
|
53
|
+
NAME: str = "StructurerBase"
|
|
53
54
|
|
|
54
55
|
def __init__(
|
|
55
56
|
self,
|
|
@@ -132,16 +133,9 @@ class StructurerBase(Analysis):
|
|
|
132
133
|
return seq
|
|
133
134
|
|
|
134
135
|
@staticmethod
|
|
135
|
-
def
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
:param dict cases: A dict of switch-cases.
|
|
140
|
-
:param default: The default node.
|
|
141
|
-
:param int|None node_b_addr: Address of the end of the switch.
|
|
142
|
-
:return: None
|
|
143
|
-
"""
|
|
144
|
-
|
|
136
|
+
def _switch_find_switch_end_addr(
|
|
137
|
+
cases: dict[int, BaseNode], default: BaseNode | ailment.Block | None, region_node_addrs: set[int]
|
|
138
|
+
) -> int | None:
|
|
145
139
|
goto_addrs = defaultdict(int)
|
|
146
140
|
|
|
147
141
|
def _find_gotos(block, **kwargs):
|
|
@@ -155,20 +149,54 @@ class StructurerBase(Analysis):
|
|
|
155
149
|
continue
|
|
156
150
|
goto_addrs[t] += 1
|
|
157
151
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
handlers = {ailment.Block: _find_gotos}
|
|
152
|
+
# we need to figure this out
|
|
153
|
+
handlers = {ailment.Block: _find_gotos}
|
|
161
154
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
155
|
+
walker = SequenceWalker(handlers=handlers)
|
|
156
|
+
for case_node in cases.values():
|
|
157
|
+
walker.walk(case_node)
|
|
158
|
+
if default is not None:
|
|
159
|
+
walker.walk(default)
|
|
167
160
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
161
|
+
if not goto_addrs:
|
|
162
|
+
# there is no Goto statement - perfect, we don't need a switch-end node
|
|
163
|
+
return None
|
|
164
|
+
if len(goto_addrs) > 1 and any(a in region_node_addrs for a in goto_addrs):
|
|
165
|
+
goto_addrs = {a: times for a, times in goto_addrs.items() if a in region_node_addrs}
|
|
166
|
+
return sorted(goto_addrs.items(), key=lambda x: x[1], reverse=True)[0][0]
|
|
167
|
+
|
|
168
|
+
def _switch_handle_gotos(self, cases: dict[int, BaseNode], default, switch_end_addr: int) -> None:
|
|
169
|
+
"""
|
|
170
|
+
For each case, convert the goto that goes outside of the switch-case to a break statement.
|
|
171
|
+
|
|
172
|
+
:param cases: A dict of switch-cases.
|
|
173
|
+
:param default: The default node.
|
|
174
|
+
:param node_b_addr: Address of the end of the switch.
|
|
175
|
+
:return: None
|
|
176
|
+
"""
|
|
177
|
+
|
|
178
|
+
# ensure every case node ends with a control-flow transition statement
|
|
179
|
+
# FIXME: The following logic only handles one case. are there other cases?
|
|
180
|
+
for case_addr in cases:
|
|
181
|
+
case_node = cases[case_addr]
|
|
182
|
+
if (
|
|
183
|
+
isinstance(case_node, SequenceNode)
|
|
184
|
+
and case_node.nodes
|
|
185
|
+
and isinstance(case_node.nodes[-1], ConditionNode)
|
|
186
|
+
):
|
|
187
|
+
cond_node = case_node.nodes[-1]
|
|
188
|
+
if (cond_node.true_node is None and cond_node.false_node is not None) or (
|
|
189
|
+
cond_node.false_node is None and cond_node.true_node is not None
|
|
190
|
+
):
|
|
191
|
+
# the last node is a condition node and only has one branch - we need a goto statement to ensure it
|
|
192
|
+
# does not fall through to the next branch
|
|
193
|
+
goto_stmt = ailment.Stmt.Jump(
|
|
194
|
+
None,
|
|
195
|
+
ailment.Expr.Const(None, None, switch_end_addr, self.project.arch.bits),
|
|
196
|
+
target_idx=None,
|
|
197
|
+
ins_addr=cond_node.addr,
|
|
198
|
+
)
|
|
199
|
+
case_node.nodes.append(ailment.Block(cond_node.addr, 0, statements=[goto_stmt], idx=None))
|
|
172
200
|
|
|
173
201
|
# rewrite all _goto switch_end_addr_ to _break_
|
|
174
202
|
|
|
@@ -262,7 +290,7 @@ class StructurerBase(Analysis):
|
|
|
262
290
|
and this_node.statements
|
|
263
291
|
and isinstance(this_node.statements[-1], (ailment.Stmt.Jump, ailment.Stmt.ConditionalJump))
|
|
264
292
|
):
|
|
265
|
-
jump_stmt = this_node.statements[-1]
|
|
293
|
+
jump_stmt = this_node.statements[-1] # type: ignore
|
|
266
294
|
elif (
|
|
267
295
|
isinstance(this_node, MultiNode)
|
|
268
296
|
and this_node.nodes
|
|
@@ -273,7 +301,7 @@ class StructurerBase(Analysis):
|
|
|
273
301
|
)
|
|
274
302
|
):
|
|
275
303
|
this_node = this_node.nodes[-1]
|
|
276
|
-
jump_stmt = this_node.statements[-1]
|
|
304
|
+
jump_stmt = this_node.statements[-1] # type: ignore
|
|
277
305
|
|
|
278
306
|
if isinstance(jump_stmt, ailment.Stmt.Jump):
|
|
279
307
|
next_node = node.nodes[i + 1]
|
|
@@ -387,7 +415,7 @@ class StructurerBase(Analysis):
|
|
|
387
415
|
return seq
|
|
388
416
|
|
|
389
417
|
def _rewrite_conditional_jumps_to_breaks(self, loop_node, successor_addrs):
|
|
390
|
-
def _rewrite_conditional_jump_to_break(node: ailment.Block, parent
|
|
418
|
+
def _rewrite_conditional_jump_to_break(node: ailment.Block, *, parent, index: int, label=None, **kwargs):
|
|
391
419
|
if not node.statements:
|
|
392
420
|
return
|
|
393
421
|
|
|
@@ -481,7 +509,7 @@ class StructurerBase(Analysis):
|
|
|
481
509
|
):
|
|
482
510
|
continue_node_addr = loop_node.condition.ins_addr
|
|
483
511
|
|
|
484
|
-
def _rewrite_jump_to_continue(node, parent
|
|
512
|
+
def _rewrite_jump_to_continue(node, *, parent, index: int, label=None, **kwargs):
|
|
485
513
|
if not node.statements:
|
|
486
514
|
return
|
|
487
515
|
stmt = node.statements[-1]
|
|
@@ -593,6 +621,7 @@ class StructurerBase(Analysis):
|
|
|
593
621
|
if (true_target_value is not None and true_target_value in loop_successor_addrs) and (
|
|
594
622
|
false_target_value is None or false_target_value not in loop_successor_addrs
|
|
595
623
|
):
|
|
624
|
+
assert last_stmt.true_target is not None
|
|
596
625
|
cond = last_stmt.condition
|
|
597
626
|
target = last_stmt.true_target.value
|
|
598
627
|
new_node = ConditionalBreakNode(
|
|
@@ -601,6 +630,7 @@ class StructurerBase(Analysis):
|
|
|
601
630
|
elif (false_target_value is not None and false_target_value in loop_successor_addrs) and (
|
|
602
631
|
true_target_value is None or true_target_value not in loop_successor_addrs
|
|
603
632
|
):
|
|
633
|
+
assert last_stmt.false_target is not None
|
|
604
634
|
cond = ailment.Expr.UnaryOp(last_stmt.condition.idx, "Not", last_stmt.condition)
|
|
605
635
|
target = last_stmt.false_target.value
|
|
606
636
|
new_node = ConditionalBreakNode(
|
|
@@ -611,10 +641,11 @@ class StructurerBase(Analysis):
|
|
|
611
641
|
):
|
|
612
642
|
# both targets are pointing outside the loop
|
|
613
643
|
# we should use just add a break node
|
|
644
|
+
assert last_stmt.false_target is not None
|
|
614
645
|
new_node = BreakNode(last_stmt.ins_addr, last_stmt.false_target.value)
|
|
615
646
|
else:
|
|
616
647
|
_l.warning("None of the branches is jumping to outside of the loop")
|
|
617
|
-
raise
|
|
648
|
+
raise AngrDecompilationError("Unexpected: None of the branches is jumping to outside of the loop")
|
|
618
649
|
|
|
619
650
|
return new_node
|
|
620
651
|
|
|
@@ -622,6 +653,13 @@ class StructurerBase(Analysis):
|
|
|
622
653
|
def _merge_conditional_breaks(seq):
|
|
623
654
|
# Find consecutive ConditionalBreakNodes and merge their conditions
|
|
624
655
|
|
|
656
|
+
class _Holder:
|
|
657
|
+
"""
|
|
658
|
+
Holds values so that handlers can access them directly.
|
|
659
|
+
"""
|
|
660
|
+
|
|
661
|
+
merged = False
|
|
662
|
+
|
|
625
663
|
def _handle_SequenceNode(seq_node, parent=None, index=0, label=None):
|
|
626
664
|
new_nodes = []
|
|
627
665
|
i = 0
|
|
@@ -642,7 +680,7 @@ class StructurerBase(Analysis):
|
|
|
642
680
|
claripy.Or(node.condition, prev_node.condition)
|
|
643
681
|
)
|
|
644
682
|
new_node = ConditionalBreakNode(node.addr, merged_condition, node.target)
|
|
645
|
-
|
|
683
|
+
_Holder.merged = True
|
|
646
684
|
else:
|
|
647
685
|
walker._handle(node, parent=seq_node, index=i)
|
|
648
686
|
|
|
@@ -659,13 +697,20 @@ class StructurerBase(Analysis):
|
|
|
659
697
|
}
|
|
660
698
|
|
|
661
699
|
walker = SequenceWalker(handlers=handlers)
|
|
662
|
-
|
|
700
|
+
_Holder.merged = False # this is just a hack
|
|
663
701
|
walker.walk(seq)
|
|
664
|
-
return
|
|
702
|
+
return _Holder.merged, seq
|
|
665
703
|
|
|
666
704
|
def _merge_nesting_conditionals(self, seq):
|
|
667
705
|
# find if(A) { if(B) { ... ] } and simplify them to if( A && B ) { ... }
|
|
668
706
|
|
|
707
|
+
class _Holder:
|
|
708
|
+
"""
|
|
709
|
+
Holds values so that handlers can access them directly.
|
|
710
|
+
"""
|
|
711
|
+
|
|
712
|
+
merged = False
|
|
713
|
+
|
|
669
714
|
def _condnode_truenode_only(node):
|
|
670
715
|
if type(node) is CodeNode:
|
|
671
716
|
# unpack
|
|
@@ -693,9 +738,11 @@ class StructurerBase(Analysis):
|
|
|
693
738
|
node = seq_node.nodes[i]
|
|
694
739
|
r, cond_node = _condnode_truenode_only(node)
|
|
695
740
|
if r:
|
|
741
|
+
assert cond_node is not None
|
|
696
742
|
r, cond_node_inner = _condnode_truenode_only(cond_node.true_node)
|
|
697
743
|
if r:
|
|
698
744
|
# amazing!
|
|
745
|
+
assert cond_node_inner is not None
|
|
699
746
|
merged_cond = ConditionProcessor.simplify_condition(
|
|
700
747
|
claripy.And(
|
|
701
748
|
self.cond_proc.claripy_ast_from_ail_condition(cond_node.condition),
|
|
@@ -704,13 +751,14 @@ class StructurerBase(Analysis):
|
|
|
704
751
|
)
|
|
705
752
|
new_node = ConditionNode(cond_node.addr, None, merged_cond, cond_node_inner.true_node, None)
|
|
706
753
|
seq_node.nodes[i] = new_node
|
|
707
|
-
|
|
754
|
+
_Holder.merged = True
|
|
708
755
|
i += 1
|
|
709
756
|
continue
|
|
710
757
|
# else:
|
|
711
758
|
r, condbreak_node = _condbreaknode(cond_node.true_node)
|
|
712
759
|
if r:
|
|
713
760
|
# amazing!
|
|
761
|
+
assert condbreak_node is not None
|
|
714
762
|
merged_cond = ConditionProcessor.simplify_condition(
|
|
715
763
|
claripy.And(
|
|
716
764
|
self.cond_proc.claripy_ast_from_ail_condition(cond_node.condition),
|
|
@@ -719,7 +767,7 @@ class StructurerBase(Analysis):
|
|
|
719
767
|
)
|
|
720
768
|
new_node = ConditionalBreakNode(condbreak_node.addr, merged_cond, condbreak_node.target)
|
|
721
769
|
seq_node.nodes[i] = new_node
|
|
722
|
-
|
|
770
|
+
_Holder.merged = True
|
|
723
771
|
i += 1
|
|
724
772
|
continue
|
|
725
773
|
|
|
@@ -732,10 +780,10 @@ class StructurerBase(Analysis):
|
|
|
732
780
|
}
|
|
733
781
|
|
|
734
782
|
walker = SequenceWalker(handlers=handlers)
|
|
735
|
-
|
|
783
|
+
_Holder.merged = False # this is just a hack
|
|
736
784
|
walker.walk(seq)
|
|
737
785
|
|
|
738
|
-
return
|
|
786
|
+
return _Holder.merged, seq
|
|
739
787
|
|
|
740
788
|
#
|
|
741
789
|
# Util methods
|
|
@@ -747,10 +795,11 @@ class StructurerBase(Analysis):
|
|
|
747
795
|
new_cases = OrderedDict()
|
|
748
796
|
|
|
749
797
|
caseid2gotoaddrs = {}
|
|
750
|
-
addr2caseids: dict[int, list[int
|
|
798
|
+
addr2caseids: dict[int, list[int | tuple[int, ...]]] = defaultdict(list)
|
|
751
799
|
|
|
752
800
|
# collect goto locations
|
|
753
801
|
for idx, case_node in cases.items():
|
|
802
|
+
assert case_node.addr is not None
|
|
754
803
|
addr2caseids[case_node.addr].append(idx)
|
|
755
804
|
try:
|
|
756
805
|
last_stmt = self.cond_proc.get_last_statement(case_node)
|
|
@@ -230,7 +230,9 @@ class CascadingConditionNode(BaseNode):
|
|
|
230
230
|
"else_node",
|
|
231
231
|
)
|
|
232
232
|
|
|
233
|
-
def __init__(
|
|
233
|
+
def __init__(
|
|
234
|
+
self, addr, condition_and_nodes: list[tuple[Any, BaseNode | ailment.Block]], else_node: BaseNode = None
|
|
235
|
+
):
|
|
234
236
|
self.addr = addr
|
|
235
237
|
self.condition_and_nodes = condition_and_nodes
|
|
236
238
|
self.else_node = else_node
|
|
@@ -25,6 +25,8 @@ from angr.analyses.decompiler.structured_codegen.c import (
|
|
|
25
25
|
CVariable,
|
|
26
26
|
)
|
|
27
27
|
|
|
28
|
+
from .api_obf_type2_finder import APIObfuscationType2Finder
|
|
29
|
+
|
|
28
30
|
_l = logging.getLogger(name=__name__)
|
|
29
31
|
|
|
30
32
|
|
|
@@ -90,7 +92,8 @@ class APIObfuscationFinder(Analysis):
|
|
|
90
92
|
|
|
91
93
|
Currently, we support the following API "obfuscation" styles:
|
|
92
94
|
|
|
93
|
-
- sub_A("dll_name", "api_name) where
|
|
95
|
+
- Type 1: sub_A("dll_name", "api_name") where sub_A ends up calling LoadLibrary.
|
|
96
|
+
- Type 2: GetProcAddress(_, "api_name").
|
|
94
97
|
"""
|
|
95
98
|
|
|
96
99
|
def __init__(self):
|
|
@@ -106,6 +109,8 @@ class APIObfuscationFinder(Analysis):
|
|
|
106
109
|
type1_deobfuscated = self._analyze_type1(desc.func_addr, desc)
|
|
107
110
|
self.kb.obfuscations.type1_deobfuscated_apis.update(type1_deobfuscated)
|
|
108
111
|
|
|
112
|
+
APIObfuscationType2Finder(self.project).analyze()
|
|
113
|
+
|
|
109
114
|
def _find_type1(self):
|
|
110
115
|
cfg = self.kb.cfgs.get_most_accurate()
|
|
111
116
|
load_library_funcs = []
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import cast
|
|
3
|
+
|
|
4
|
+
from collections.abc import Iterator
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
import logging
|
|
7
|
+
|
|
8
|
+
from angr.project import Project
|
|
9
|
+
from angr.analyses.reaching_definitions.reaching_definitions import (
|
|
10
|
+
ReachingDefinitionsAnalysis,
|
|
11
|
+
FunctionCallRelationships,
|
|
12
|
+
)
|
|
13
|
+
from angr.knowledge_plugins.functions.function import Function
|
|
14
|
+
from angr.knowledge_plugins.key_definitions import DerefSize
|
|
15
|
+
from angr.knowledge_plugins.key_definitions.constants import ObservationPointType
|
|
16
|
+
from angr.knowledge_plugins.key_definitions.atoms import MemoryLocation
|
|
17
|
+
from angr.sim_variable import SimMemoryVariable
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
log = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class APIObfuscationType2:
|
|
25
|
+
"""
|
|
26
|
+
API Obfuscation Type 2 result.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
resolved_func_name: str
|
|
30
|
+
resolved_func_ptr: MemoryLocation
|
|
31
|
+
resolved_in: Function
|
|
32
|
+
resolved_by: Function
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class APIObfuscationType2Finder:
|
|
36
|
+
"""
|
|
37
|
+
Finds global function pointers initialized by calls to dlsym/GetProcAddress and names
|
|
38
|
+
them accordingly.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
results: list[APIObfuscationType2]
|
|
42
|
+
|
|
43
|
+
def __init__(self, project: Project):
|
|
44
|
+
self.project = project
|
|
45
|
+
self.results = []
|
|
46
|
+
|
|
47
|
+
def analyze(self) -> list[APIObfuscationType2]:
|
|
48
|
+
self.results = []
|
|
49
|
+
for caller, callee in self._get_candidates():
|
|
50
|
+
rda = self.project.analyses.ReachingDefinitions(caller, observe_all=True)
|
|
51
|
+
for info in rda.callsites_to(callee):
|
|
52
|
+
self._process_callsite(caller, callee, rda, info)
|
|
53
|
+
self._mark_globals()
|
|
54
|
+
return self.results
|
|
55
|
+
|
|
56
|
+
def _get_candidates(self) -> Iterator[tuple[Function, Function]]:
|
|
57
|
+
"""
|
|
58
|
+
Returns a tuple of (caller, callee) where callee is GetProcAddress/dlsym.
|
|
59
|
+
"""
|
|
60
|
+
targets = ["GetProcAddress"] if self.project.simos.name == "Win32" else ["dlsym", "dlvsym"]
|
|
61
|
+
for callee in self.project.kb.functions.values():
|
|
62
|
+
if callee.name not in targets:
|
|
63
|
+
continue
|
|
64
|
+
for caller_addr in self.project.kb.callgraph.predecessors(callee.addr):
|
|
65
|
+
caller = self.project.kb.functions[caller_addr]
|
|
66
|
+
yield (caller, callee)
|
|
67
|
+
|
|
68
|
+
def _process_callsite(
|
|
69
|
+
self,
|
|
70
|
+
caller: Function,
|
|
71
|
+
callee: Function,
|
|
72
|
+
rda: ReachingDefinitionsAnalysis,
|
|
73
|
+
callsite_info: FunctionCallRelationships,
|
|
74
|
+
) -> None:
|
|
75
|
+
"""
|
|
76
|
+
Process a resolver function callsite looking for function name concrete string argument.
|
|
77
|
+
"""
|
|
78
|
+
func_name_arg_idx = 1
|
|
79
|
+
if len(callsite_info.args_defns) <= func_name_arg_idx:
|
|
80
|
+
return
|
|
81
|
+
|
|
82
|
+
log.debug("Examining call to %r from %r at %r", callee, caller, callsite_info.callsite.ins_addr)
|
|
83
|
+
ld = rda.model.get_observation_by_insn(callsite_info.callsite, ObservationPointType.OP_BEFORE)
|
|
84
|
+
if ld is None:
|
|
85
|
+
return
|
|
86
|
+
|
|
87
|
+
# Attempt resolving static function name from callsite
|
|
88
|
+
string_atom = ld.deref(callsite_info.args_defns[func_name_arg_idx], DerefSize.NULL_TERMINATE)
|
|
89
|
+
result = ld.get_concrete_value(string_atom, cast_to=bytes)
|
|
90
|
+
if result is None:
|
|
91
|
+
log.debug("...Failed to resolve a function name")
|
|
92
|
+
return
|
|
93
|
+
|
|
94
|
+
proc_name = result.rstrip(b"\x00").decode("utf-8")
|
|
95
|
+
log.debug("...Resolved concrete function name: %s", proc_name)
|
|
96
|
+
|
|
97
|
+
# Examine successor definitions to find where the function pointer is written
|
|
98
|
+
for successor in rda.dep_graph.find_all_successors(callsite_info.ret_defns):
|
|
99
|
+
if not (
|
|
100
|
+
isinstance(successor.atom, MemoryLocation)
|
|
101
|
+
and isinstance(successor.atom.addr, int)
|
|
102
|
+
and successor.atom.size == self.project.arch.bytes
|
|
103
|
+
):
|
|
104
|
+
continue
|
|
105
|
+
|
|
106
|
+
ptr = successor.atom
|
|
107
|
+
ptr_addr: int = cast(int, ptr.addr)
|
|
108
|
+
log.debug("...Found function pointer %r", ptr)
|
|
109
|
+
|
|
110
|
+
sym = self.project.loader.find_symbol(ptr_addr)
|
|
111
|
+
if sym is not None:
|
|
112
|
+
log.debug("...Already have pointer symbol: %r. Skipping.", sym)
|
|
113
|
+
continue
|
|
114
|
+
if ptr_addr in self.project.kb.labels:
|
|
115
|
+
log.debug("...Already have pointer label. Skipping.")
|
|
116
|
+
continue
|
|
117
|
+
sec = self.project.loader.find_section_containing(ptr_addr)
|
|
118
|
+
if not sec or not sec.is_writable:
|
|
119
|
+
log.debug("...Bogus section. Skipping.")
|
|
120
|
+
continue
|
|
121
|
+
|
|
122
|
+
self.results.append(
|
|
123
|
+
APIObfuscationType2(
|
|
124
|
+
resolved_func_name=proc_name,
|
|
125
|
+
resolved_func_ptr=ptr,
|
|
126
|
+
resolved_in=caller,
|
|
127
|
+
resolved_by=callee,
|
|
128
|
+
)
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
def _mark_globals(self):
|
|
132
|
+
"""
|
|
133
|
+
Create function pointer labels/variables.
|
|
134
|
+
"""
|
|
135
|
+
for result in self.results:
|
|
136
|
+
# Create a label
|
|
137
|
+
lbl = self.project.kb.labels.get_unique_label(f"p_{result.resolved_func_name}")
|
|
138
|
+
self.project.kb.labels[result.resolved_func_ptr.addr] = lbl
|
|
139
|
+
log.debug("...Created label %s for address %x", lbl, result.resolved_func_ptr.addr)
|
|
140
|
+
|
|
141
|
+
# Create a variable
|
|
142
|
+
global_variables = self.project.kb.variables["global"]
|
|
143
|
+
variables = global_variables.get_global_variables(result.resolved_func_ptr.addr)
|
|
144
|
+
if not variables:
|
|
145
|
+
ident = global_variables.next_variable_ident("global")
|
|
146
|
+
var = SimMemoryVariable(
|
|
147
|
+
result.resolved_func_ptr.addr, result.resolved_func_ptr.size, name=lbl, ident=ident
|
|
148
|
+
)
|
|
149
|
+
global_variables.set_variable("global", result.resolved_func_ptr.addr, var)
|
|
150
|
+
log.debug("...Created variable %r", var)
|
|
151
|
+
elif len(variables) == 1:
|
|
152
|
+
(var,) = variables
|
|
153
|
+
log.debug("...Renaming variable %r -> %s", var, lbl)
|
|
154
|
+
var.name = lbl
|
|
155
|
+
|
|
156
|
+
self.project.kb.obfuscations.type2_deobfuscated_apis[result.resolved_func_ptr.addr] = (
|
|
157
|
+
result.resolved_func_name
|
|
158
|
+
)
|