angr 9.2.146__py3-none-macosx_10_9_x86_64.whl → 9.2.147__py3-none-macosx_10_9_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/bindiff.py +343 -68
- angr/analyses/cfg/cfg_arch_options.py +10 -0
- angr/analyses/cfg/cfg_base.py +39 -15
- angr/analyses/cfg/cfg_fast.py +19 -3
- angr/analyses/flirt/__init__.py +47 -0
- angr/analyses/flirt/consts.py +160 -0
- angr/analyses/{flirt.py → flirt/flirt.py} +99 -38
- angr/analyses/flirt/flirt_function.py +20 -0
- angr/analyses/flirt/flirt_matcher.py +351 -0
- angr/analyses/flirt/flirt_module.py +32 -0
- angr/analyses/flirt/flirt_node.py +23 -0
- angr/analyses/flirt/flirt_sig.py +356 -0
- angr/analyses/flirt/flirt_utils.py +31 -0
- angr/analyses/stack_pointer_tracker.py +34 -0
- angr/block.py +6 -6
- angr/engines/vex/heavy/concretizers.py +10 -0
- angr/flirt/__init__.py +15 -44
- angr/knowledge_plugins/functions/function.py +2 -2
- angr/lib/angr_native.dylib +0 -0
- {angr-9.2.146.dist-info → angr-9.2.147.dist-info}/METADATA +6 -7
- {angr-9.2.146.dist-info → angr-9.2.147.dist-info}/RECORD +26 -18
- {angr-9.2.146.dist-info → angr-9.2.147.dist-info}/WHEEL +1 -1
- {angr-9.2.146.dist-info → angr-9.2.147.dist-info}/LICENSE +0 -0
- {angr-9.2.146.dist-info → angr-9.2.147.dist-info}/entry_points.txt +0 -0
- {angr-9.2.146.dist-info → angr-9.2.147.dist-info}/top_level.txt +0 -0
angr/__init__.py
CHANGED
angr/analyses/bindiff.py
CHANGED
|
@@ -4,11 +4,13 @@ import math
|
|
|
4
4
|
import types
|
|
5
5
|
from collections import deque, defaultdict
|
|
6
6
|
from typing import TYPE_CHECKING
|
|
7
|
+
from functools import partial
|
|
7
8
|
|
|
8
9
|
import networkx
|
|
9
10
|
|
|
10
|
-
from angr.analyses import AnalysesHub, Analysis,
|
|
11
|
+
from angr.analyses import AnalysesHub, Analysis, CFGFast
|
|
11
12
|
from angr.errors import SimEngineError, SimMemoryError
|
|
13
|
+
from angr.knowledge_plugins.cfg.memory_data import MemoryDataSort
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
if TYPE_CHECKING:
|
|
@@ -140,6 +142,9 @@ def _is_better_match(x, y, matched_a, matched_b, attributes_dict_a, attributes_d
|
|
|
140
142
|
:param attributes_dict_b: The attributes for each element in the second set.
|
|
141
143
|
:returns: True/False
|
|
142
144
|
"""
|
|
145
|
+
if x not in attributes_dict_a or y not in attributes_dict_b:
|
|
146
|
+
return False
|
|
147
|
+
|
|
143
148
|
attributes_x = attributes_dict_a[x]
|
|
144
149
|
attributes_y = attributes_dict_b[y]
|
|
145
150
|
if x in matched_a:
|
|
@@ -162,6 +167,11 @@ def differing_constants(block_a, block_b):
|
|
|
162
167
|
:returns: Returns a list of differing constants in the form of ConstantChange, which has the offset in the
|
|
163
168
|
block and the respective constants.
|
|
164
169
|
"""
|
|
170
|
+
if block_a.size == 0 or block_b.size == 0:
|
|
171
|
+
return []
|
|
172
|
+
if not block_a.instruction_addrs or not block_b.instruction_addrs:
|
|
173
|
+
return []
|
|
174
|
+
|
|
165
175
|
statements_a = [s for s in block_a.vex.statements if s.tag != "Ist_IMark"] + [block_a.vex.next]
|
|
166
176
|
statements_b = [s for s in block_b.vex.statements if s.tag != "Ist_IMark"] + [block_b.vex.next]
|
|
167
177
|
if len(statements_a) != len(statements_b):
|
|
@@ -858,36 +868,20 @@ class BinDiff(Analysis):
|
|
|
858
868
|
This class computes the a diff between two binaries represented by angr Projects
|
|
859
869
|
"""
|
|
860
870
|
|
|
861
|
-
def __init__(self, other_project,
|
|
871
|
+
def __init__(self, other_project, cfg_a=None, cfg_b=None):
|
|
862
872
|
"""
|
|
863
873
|
:param other_project: The second project to diff
|
|
864
874
|
"""
|
|
865
|
-
l.debug("Computing cfg's")
|
|
866
|
-
|
|
867
|
-
back_traversal = not enable_advanced_backward_slicing
|
|
868
|
-
|
|
869
875
|
if cfg_a is None:
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
self.
|
|
873
|
-
|
|
874
|
-
keep_state=True,
|
|
875
|
-
enable_symbolic_back_traversal=back_traversal,
|
|
876
|
-
enable_advanced_backward_slicing=enable_advanced_backward_slicing,
|
|
877
|
-
)
|
|
878
|
-
|
|
879
|
-
self.cfg_b = other_project.analyses[CFGEmulated].prep()(
|
|
880
|
-
context_sensitivity_level=1,
|
|
881
|
-
keep_state=True,
|
|
882
|
-
enable_symbolic_back_traversal=back_traversal,
|
|
883
|
-
enable_advanced_backward_slicing=enable_advanced_backward_slicing,
|
|
884
|
-
)
|
|
885
|
-
|
|
876
|
+
l.debug("Computing cfg's")
|
|
877
|
+
self.cfg_a = self.project.analyses[CFGFast].prep(fail_fast=self._fail_fast)().model
|
|
878
|
+
self.cfg_b = other_project.analyses[CFGFast].prep(fail_fast=self._fail_fast)().model
|
|
879
|
+
l.debug("Done computing cfg's")
|
|
886
880
|
else:
|
|
887
881
|
self.cfg_a = cfg_a
|
|
888
882
|
self.cfg_b = cfg_b
|
|
889
|
-
|
|
890
|
-
|
|
883
|
+
self.funcs_a = self.kb.functions
|
|
884
|
+
self.funcs_b = other_project.kb.functions
|
|
891
885
|
|
|
892
886
|
self._p2 = other_project
|
|
893
887
|
self._attributes_a = {}
|
|
@@ -908,8 +902,8 @@ class BinDiff(Analysis):
|
|
|
908
902
|
:param func_b_addr: The address of the second function (in the second binary).
|
|
909
903
|
:returns: Whether or not the functions appear to be identical.
|
|
910
904
|
"""
|
|
911
|
-
if self.
|
|
912
|
-
return self.
|
|
905
|
+
if self.project.is_hooked(func_a_addr) and self._p2.is_hooked(func_b_addr):
|
|
906
|
+
return self.project._sim_procedures[func_a_addr] == self._p2._sim_procedures[func_b_addr]
|
|
913
907
|
|
|
914
908
|
func_diff = self.get_function_diff(func_a_addr, func_b_addr)
|
|
915
909
|
if check_consts:
|
|
@@ -992,36 +986,33 @@ class BinDiff(Analysis):
|
|
|
992
986
|
"""
|
|
993
987
|
pair = (function_addr_a, function_addr_b)
|
|
994
988
|
if pair not in self._function_diffs:
|
|
995
|
-
function_a = self.
|
|
996
|
-
function_b = self.
|
|
989
|
+
function_a = self.funcs_a.function(function_addr_a)
|
|
990
|
+
function_b = self.funcs_b.function(function_addr_b)
|
|
997
991
|
self._function_diffs[pair] = FunctionDiff(function_a, function_b, self)
|
|
998
992
|
return self._function_diffs[pair]
|
|
999
993
|
|
|
1000
994
|
@staticmethod
|
|
1001
|
-
def _compute_function_attributes(
|
|
995
|
+
def _compute_function_attributes(funcs, exclude_func_addrs: set[int] | None = None):
|
|
1002
996
|
"""
|
|
1003
|
-
:param cfg: An angr CFG object
|
|
1004
997
|
:returns: a dictionary of function addresses to tuples of attributes
|
|
1005
998
|
"""
|
|
1006
999
|
# the attributes we use are the number of basic blocks, number of edges, and number of subfunction calls
|
|
1007
1000
|
attributes = {}
|
|
1008
|
-
all_funcs = set(
|
|
1009
|
-
for function_addr in
|
|
1001
|
+
all_funcs = set(funcs.callgraph)
|
|
1002
|
+
for function_addr in funcs:
|
|
1003
|
+
if not funcs.contains_addr(function_addr):
|
|
1004
|
+
continue
|
|
1005
|
+
if exclude_func_addrs and function_addr in exclude_func_addrs:
|
|
1006
|
+
continue
|
|
1007
|
+
func = funcs.get_by_addr(function_addr)
|
|
1010
1008
|
# skip syscalls and functions which are None in the cfg
|
|
1011
|
-
if
|
|
1009
|
+
if func.is_syscall or func.is_alignment or func.is_plt:
|
|
1012
1010
|
continue
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
number_of_basic_blocks = 0
|
|
1019
|
-
number_of_edges = 0
|
|
1020
|
-
if function_addr in all_funcs:
|
|
1021
|
-
number_of_subfunction_calls = len(list(cfg.kb.callgraph.successors(function_addr)))
|
|
1022
|
-
else:
|
|
1023
|
-
number_of_subfunction_calls = 0
|
|
1024
|
-
attributes[function_addr] = (number_of_basic_blocks, number_of_edges, number_of_subfunction_calls)
|
|
1011
|
+
normalized_function = NormalizedFunction(func)
|
|
1012
|
+
number_of_basic_blocks = len(normalized_function.graph.nodes())
|
|
1013
|
+
number_of_edges = len(normalized_function.graph.edges())
|
|
1014
|
+
number_of_subfunction_calls = funcs.callgraph.out_degree[function_addr] if function_addr in all_funcs else 0
|
|
1015
|
+
attributes[function_addr] = number_of_basic_blocks, number_of_edges, number_of_subfunction_calls
|
|
1025
1016
|
|
|
1026
1017
|
return attributes
|
|
1027
1018
|
|
|
@@ -1029,8 +1020,8 @@ class BinDiff(Analysis):
|
|
|
1029
1020
|
possible_matches = set()
|
|
1030
1021
|
|
|
1031
1022
|
# Make sure those functions are not SimProcedures
|
|
1032
|
-
f_a = self.
|
|
1033
|
-
f_b = self.
|
|
1023
|
+
f_a = self.funcs_a.function(func_a)
|
|
1024
|
+
f_b = self.funcs_b.function(func_b)
|
|
1034
1025
|
if f_a.startpoint is None or f_b.startpoint is None:
|
|
1035
1026
|
return possible_matches
|
|
1036
1027
|
|
|
@@ -1052,6 +1043,8 @@ class BinDiff(Analysis):
|
|
|
1052
1043
|
|
|
1053
1044
|
def _get_plt_matches(self):
|
|
1054
1045
|
plt_matches = []
|
|
1046
|
+
if not hasattr(self.project.loader.main_object, "plt") or not hasattr(self._p2.loader.main_object, "plt"):
|
|
1047
|
+
return []
|
|
1055
1048
|
for name, addr in self.project.loader.main_object.plt.items():
|
|
1056
1049
|
if name in self._p2.loader.main_object.plt:
|
|
1057
1050
|
plt_matches.append((addr, self._p2.loader.main_object.plt[name]))
|
|
@@ -1072,18 +1065,18 @@ class BinDiff(Analysis):
|
|
|
1072
1065
|
plt_matches.append((addr, func_to_addr_b[name]))
|
|
1073
1066
|
|
|
1074
1067
|
# remove ones that aren't in the interfunction graph, because these seem to not be consistent
|
|
1075
|
-
all_funcs_a = set(self.
|
|
1076
|
-
all_funcs_b = set(self.
|
|
1068
|
+
all_funcs_a = set(self.funcs_a.callgraph.nodes())
|
|
1069
|
+
all_funcs_b = set(self.funcs_b.callgraph.nodes())
|
|
1077
1070
|
return [x for x in plt_matches if x[0] in all_funcs_a and x[1] in all_funcs_b]
|
|
1078
1071
|
|
|
1079
1072
|
def _get_name_matches(self):
|
|
1080
1073
|
names_to_addrs_a = defaultdict(list)
|
|
1081
|
-
for f in self.
|
|
1074
|
+
for f in self.funcs_a.values():
|
|
1082
1075
|
if not f.name.startswith("sub_"):
|
|
1083
1076
|
names_to_addrs_a[f.name].append(f.addr)
|
|
1084
1077
|
|
|
1085
1078
|
names_to_addrs_b = defaultdict(list)
|
|
1086
|
-
for f in self.
|
|
1079
|
+
for f in self.funcs_b.values():
|
|
1087
1080
|
if not f.name.startswith("sub_"):
|
|
1088
1081
|
names_to_addrs_b[f.name].append(f.addr)
|
|
1089
1082
|
|
|
@@ -1097,23 +1090,305 @@ class BinDiff(Analysis):
|
|
|
1097
1090
|
|
|
1098
1091
|
return name_matches
|
|
1099
1092
|
|
|
1100
|
-
def
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1093
|
+
def _get_string_reference_matches(self) -> list[tuple[int, int]]:
|
|
1094
|
+
strs_main: dict[str, int | None] = {}
|
|
1095
|
+
strs_secondary: dict[str, int | None] = {}
|
|
1096
|
+
|
|
1097
|
+
for mem_data in self.cfg_a.memory_data.values():
|
|
1098
|
+
if mem_data.sort == MemoryDataSort.String:
|
|
1099
|
+
if mem_data.content not in strs_main:
|
|
1100
|
+
strs_main[mem_data.content] = mem_data.addr
|
|
1101
|
+
else:
|
|
1102
|
+
# unfortunately there are multiple strings with the same value...
|
|
1103
|
+
strs_main[mem_data.content] = None
|
|
1104
|
+
|
|
1105
|
+
for mem_data in self.cfg_b.memory_data.values():
|
|
1106
|
+
if mem_data.sort == MemoryDataSort.String:
|
|
1107
|
+
if mem_data.content not in strs_secondary:
|
|
1108
|
+
strs_secondary[mem_data.content] = mem_data.addr
|
|
1109
|
+
else:
|
|
1110
|
+
# unfortunately there are multiple strings with the same value...
|
|
1111
|
+
strs_secondary[mem_data.content] = None
|
|
1112
|
+
|
|
1113
|
+
shared_strs = set(strs_main.keys()) & set(strs_secondary.keys())
|
|
1114
|
+
matches = []
|
|
1115
|
+
# check cross-references
|
|
1116
|
+
for s in shared_strs:
|
|
1117
|
+
if strs_main[s] is None or strs_secondary[s] is None:
|
|
1118
|
+
continue
|
|
1119
|
+
addr_main = strs_main[s]
|
|
1120
|
+
addr_secondary = strs_secondary[s]
|
|
1121
|
+
xrefs_main = self.kb.xrefs.get_xrefs_by_dst(addr_main)
|
|
1122
|
+
xrefs_secondary = self._p2.kb.xrefs.get_xrefs_by_dst(addr_secondary)
|
|
1123
|
+
if len(xrefs_main) == len(xrefs_secondary) == 1:
|
|
1124
|
+
xref_main = next(iter(xrefs_main))
|
|
1125
|
+
xref_secondary = next(iter(xrefs_secondary))
|
|
1126
|
+
cfgnode_main = self.cfg_a.get_any_node(xref_main.block_addr)
|
|
1127
|
+
cfgnode_secondary = self.cfg_b.get_any_node(xref_secondary.block_addr)
|
|
1128
|
+
if cfgnode_main is not None and cfgnode_secondary is not None:
|
|
1129
|
+
matches.append((cfgnode_main.function_address, cfgnode_secondary.function_address))
|
|
1130
|
+
|
|
1131
|
+
return sorted(set(matches))
|
|
1132
|
+
|
|
1133
|
+
@staticmethod
|
|
1134
|
+
def _approximate_matcher_func_block_and_edge_count(
|
|
1135
|
+
main_funcs, secondary_funcs, new_matches: list, size_tolerance=0.1
|
|
1136
|
+
) -> None:
|
|
1137
|
+
# functions likely match if they have the same number of blocks and the same number of edges
|
|
1138
|
+
main_funcs = sorted(main_funcs, key=lambda x: x.addr)
|
|
1139
|
+
secondary_funcs = sorted(secondary_funcs, key=lambda x: x.addr)
|
|
1140
|
+
m, s = 0, 0
|
|
1141
|
+
while m < len(main_funcs) and s < len(secondary_funcs):
|
|
1142
|
+
mf = main_funcs[m]
|
|
1143
|
+
sf = secondary_funcs[s]
|
|
1144
|
+
# best case: there is a direct match
|
|
1145
|
+
if len(mf.block_addrs_set) == len(sf.block_addrs_set) and len(mf.graph.edges) == len(sf.graph.edges):
|
|
1146
|
+
# ensure function sizes are roughly the same
|
|
1147
|
+
if abs(mf.size - sf.size) / max(mf.size, sf.size) < size_tolerance:
|
|
1148
|
+
l.info(
|
|
1149
|
+
"Approximate matcher (block&edge count) found %#x (%s) and %#x (%s).",
|
|
1150
|
+
mf.addr,
|
|
1151
|
+
mf.name,
|
|
1152
|
+
sf.addr,
|
|
1153
|
+
sf.name,
|
|
1154
|
+
)
|
|
1155
|
+
new_matches.append((mf.addr, sf.addr))
|
|
1156
|
+
m += 1
|
|
1157
|
+
s += 1
|
|
1158
|
+
else:
|
|
1159
|
+
if len(main_funcs) - m > len(secondary_funcs) - s:
|
|
1160
|
+
# more main funcs than secondary funcs; we increment m in case a function in the main binary
|
|
1161
|
+
# is removed
|
|
1162
|
+
m += 1
|
|
1163
|
+
elif len(main_funcs) - m < len(secondary_funcs) - s:
|
|
1164
|
+
# more secondary funcs than main funcs; we increment s in case a function in the secondary
|
|
1165
|
+
# binary is removed
|
|
1166
|
+
s += 1
|
|
1167
|
+
else:
|
|
1168
|
+
m += 1
|
|
1169
|
+
s += 1
|
|
1170
|
+
|
|
1171
|
+
@staticmethod
|
|
1172
|
+
def _get_function_max_addr(func) -> int | None:
|
|
1173
|
+
if not func.block_addrs_set:
|
|
1174
|
+
return None
|
|
1175
|
+
last_block_addr = max(func.block_addrs_set)
|
|
1176
|
+
block_size = func.get_block_size(last_block_addr)
|
|
1177
|
+
return last_block_addr + block_size
|
|
1178
|
+
|
|
1179
|
+
def _get_function_string_refs(self, proj, cfg, func) -> set[bytes]:
|
|
1180
|
+
strs = set()
|
|
1181
|
+
func_max_addr = self._get_function_max_addr(func)
|
|
1182
|
+
if func_max_addr is None:
|
|
1183
|
+
return strs
|
|
1184
|
+
xrefs = proj.kb.xrefs.get_xrefs_by_ins_addr_region(func.addr, func_max_addr)
|
|
1185
|
+
for xref in xrefs:
|
|
1186
|
+
if xref.dst in cfg.memory_data:
|
|
1187
|
+
md = cfg.memory_data[xref.dst]
|
|
1188
|
+
if md.sort == MemoryDataSort.String:
|
|
1189
|
+
strs.add(md.content)
|
|
1190
|
+
return strs
|
|
1191
|
+
|
|
1192
|
+
def _approximate_matcher_func_string_refs(self, main_funcs, secondary_funcs, new_matches: list) -> None:
|
|
1193
|
+
# functions likely match if they both refer to the same strings
|
|
1194
|
+
strs_to_funcs_main = defaultdict(list)
|
|
1195
|
+
strs_to_funcs_secondary = defaultdict(list)
|
|
1196
|
+
|
|
1197
|
+
for func in main_funcs:
|
|
1198
|
+
strs = self._get_function_string_refs(self.project, self.cfg_a, func)
|
|
1199
|
+
if strs:
|
|
1200
|
+
strs_to_funcs_main[frozenset(strs)].append(func)
|
|
1201
|
+
|
|
1202
|
+
for func in secondary_funcs:
|
|
1203
|
+
strs = self._get_function_string_refs(self._p2, self.cfg_b, func)
|
|
1204
|
+
if strs:
|
|
1205
|
+
strs_to_funcs_secondary[frozenset(strs)].append(func)
|
|
1206
|
+
|
|
1207
|
+
for strs_main, funcs_main in strs_to_funcs_main.items():
|
|
1208
|
+
if strs_main in strs_to_funcs_secondary and len(funcs_main) == 1:
|
|
1209
|
+
funcs_secondary = strs_to_funcs_secondary[strs_main]
|
|
1210
|
+
if len(funcs_secondary) == 1:
|
|
1211
|
+
# found a match!
|
|
1212
|
+
mf = funcs_main[0]
|
|
1213
|
+
sf = funcs_secondary[0]
|
|
1214
|
+
l.info(
|
|
1215
|
+
"Approximate matcher (string refs) found %#x (%s) and %#x (%s).",
|
|
1216
|
+
mf.addr,
|
|
1217
|
+
mf.name,
|
|
1218
|
+
sf.addr,
|
|
1219
|
+
sf.name,
|
|
1220
|
+
)
|
|
1221
|
+
new_matches.append((mf.addr, sf.addr))
|
|
1104
1222
|
|
|
1223
|
+
@staticmethod
|
|
1224
|
+
def _get_function_callees(
|
|
1225
|
+
proj, funcs, func, main2secondary: dict[int, int] | None = None, funcs_secondary=None
|
|
1226
|
+
) -> tuple[str, ...]:
|
|
1227
|
+
callees = proj.kb.functions.callgraph.successors(func.addr)
|
|
1228
|
+
# convert callees to meaningful function names
|
|
1229
|
+
callee_names = []
|
|
1230
|
+
for callee in callees:
|
|
1231
|
+
if callee == func.addr:
|
|
1232
|
+
name = "!self"
|
|
1233
|
+
else:
|
|
1234
|
+
if main2secondary is not None and funcs_secondary is not None and callee in main2secondary:
|
|
1235
|
+
callee = main2secondary[callee]
|
|
1236
|
+
func = funcs_secondary.get_by_addr(callee)
|
|
1237
|
+
name = func.name
|
|
1238
|
+
else:
|
|
1239
|
+
func = funcs.get_by_addr(callee)
|
|
1240
|
+
name = None if func.is_default_name else func.name
|
|
1241
|
+
if name is not None:
|
|
1242
|
+
callee_names.append(name)
|
|
1243
|
+
else:
|
|
1244
|
+
# has at least one unknown/unmatched callee
|
|
1245
|
+
return ()
|
|
1246
|
+
return tuple(sorted(callee_names))
|
|
1247
|
+
|
|
1248
|
+
def _approximate_matcher_func_callees(
|
|
1249
|
+
self, main2secondary: dict[int, int], main_funcs, secondary_funcs, new_matches: list
|
|
1250
|
+
) -> None:
|
|
1251
|
+
# functions likely match if they both call the same callees
|
|
1252
|
+
callees_to_funcs_main = defaultdict(list)
|
|
1253
|
+
callees_to_funcs_secondary = defaultdict(list)
|
|
1254
|
+
|
|
1255
|
+
for func in main_funcs:
|
|
1256
|
+
callees = self._get_function_callees(
|
|
1257
|
+
self.project, self.funcs_a, func, main2secondary=main2secondary, funcs_secondary=self.funcs_b
|
|
1258
|
+
)
|
|
1259
|
+
if callees:
|
|
1260
|
+
callees_to_funcs_main[callees].append(func)
|
|
1261
|
+
|
|
1262
|
+
for func in secondary_funcs:
|
|
1263
|
+
callees = self._get_function_callees(self._p2, self.funcs_b, func)
|
|
1264
|
+
if callees:
|
|
1265
|
+
callees_to_funcs_secondary[callees].append(func)
|
|
1266
|
+
|
|
1267
|
+
for callees_main, funcs_main in callees_to_funcs_main.items():
|
|
1268
|
+
if callees_main in callees_to_funcs_secondary and len(funcs_main) == 1:
|
|
1269
|
+
funcs_secondary = callees_to_funcs_secondary[callees_main]
|
|
1270
|
+
if len(funcs_secondary) == 1:
|
|
1271
|
+
# found a match!
|
|
1272
|
+
mf = funcs_main[0]
|
|
1273
|
+
sf = funcs_secondary[0]
|
|
1274
|
+
l.info(
|
|
1275
|
+
"Approximate matcher (callees) found %#x (%s) and %#x (%s).",
|
|
1276
|
+
mf.addr,
|
|
1277
|
+
mf.name,
|
|
1278
|
+
sf.addr,
|
|
1279
|
+
sf.name,
|
|
1280
|
+
)
|
|
1281
|
+
new_matches.append((mf.addr, sf.addr))
|
|
1282
|
+
|
|
1283
|
+
def _get_approximate_matches_between_matched_pairs(self, matches: list[tuple[int, int]], matcher):
|
|
1284
|
+
sorted_matches = sorted(matches, key=lambda x: x[0])
|
|
1285
|
+
new_matches = []
|
|
1286
|
+
|
|
1287
|
+
for idx, (addr1_main, addr1_secondary) in enumerate(sorted_matches):
|
|
1288
|
+
if idx == len(sorted_matches) - 1:
|
|
1289
|
+
break
|
|
1290
|
+
addr2_main, addr2_secondary = sorted_matches[idx + 1]
|
|
1291
|
+
if addr1_secondary >= addr2_secondary:
|
|
1292
|
+
continue
|
|
1293
|
+
|
|
1294
|
+
# either two main functions are named, or two secondary functions are named
|
|
1295
|
+
f1_main = self.funcs_a.get_by_addr(addr1_main)
|
|
1296
|
+
f2_main = self.funcs_a.get_by_addr(addr2_main)
|
|
1297
|
+
f1_secondary = self.funcs_b.get_by_addr(addr1_secondary)
|
|
1298
|
+
f2_secondary = self.funcs_b.get_by_addr(addr2_secondary)
|
|
1299
|
+
if not (
|
|
1300
|
+
(not f1_main.is_default_name and not f2_main.is_default_name)
|
|
1301
|
+
or (not f1_secondary.is_default_name and not f2_secondary.is_default_name)
|
|
1302
|
+
):
|
|
1303
|
+
continue
|
|
1304
|
+
|
|
1305
|
+
# are there any functions in between?
|
|
1306
|
+
main_funcaddrs = list(self.funcs_a._function_map.irange(minimum=addr1_main + 1, maximum=addr2_main - 1))
|
|
1307
|
+
secondary_funcaddrs = list(
|
|
1308
|
+
self.funcs_b._function_map.irange(minimum=addr1_secondary + 1, maximum=addr2_secondary - 1)
|
|
1309
|
+
)
|
|
1310
|
+
# eliminate bad funcs
|
|
1311
|
+
main_funcs = [self.funcs_a.get_by_addr(addr) for addr in main_funcaddrs]
|
|
1312
|
+
main_funcs = [
|
|
1313
|
+
f
|
|
1314
|
+
for f in main_funcs
|
|
1315
|
+
if not f.is_syscall and not f.is_simprocedure and not f.is_alignment and not f.is_plt
|
|
1316
|
+
]
|
|
1317
|
+
secondary_funcs = [self.funcs_b.get_by_addr(addr) for addr in secondary_funcaddrs]
|
|
1318
|
+
secondary_funcs = [
|
|
1319
|
+
f
|
|
1320
|
+
for f in secondary_funcs
|
|
1321
|
+
if not f.is_syscall and not f.is_simprocedure and not f.is_alignment and not f.is_plt
|
|
1322
|
+
]
|
|
1323
|
+
if (
|
|
1324
|
+
main_funcs
|
|
1325
|
+
and secondary_funcs
|
|
1326
|
+
and len(main_funcs) > 0
|
|
1327
|
+
and len(secondary_funcs) > 0
|
|
1328
|
+
and len(main_funcs) < 100
|
|
1329
|
+
and len(secondary_funcs) < 100
|
|
1330
|
+
):
|
|
1331
|
+
# more checks
|
|
1332
|
+
matcher(main_funcs, secondary_funcs, new_matches)
|
|
1333
|
+
|
|
1334
|
+
return new_matches
|
|
1335
|
+
|
|
1336
|
+
def _compute_diff(self):
|
|
1105
1337
|
# get the initial matches
|
|
1338
|
+
l.info("Getting PLT-based matches...")
|
|
1106
1339
|
initial_matches = self._get_plt_matches()
|
|
1340
|
+
l.info("... initial matches: %d", len(initial_matches))
|
|
1341
|
+
|
|
1342
|
+
l.info("Getting function name-based matches...")
|
|
1107
1343
|
initial_matches += self._get_name_matches()
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1344
|
+
l.info("... initial matches: %d", len(initial_matches))
|
|
1345
|
+
|
|
1346
|
+
l.info("Getting string reference-based matches...")
|
|
1347
|
+
initial_matches += self._get_string_reference_matches()
|
|
1348
|
+
l.info("... initial matches: %d", len(initial_matches))
|
|
1349
|
+
|
|
1350
|
+
l.info("Getting adjacent function matches based on function block and edge counts...")
|
|
1351
|
+
initial_matches += self._get_approximate_matches_between_matched_pairs(
|
|
1352
|
+
initial_matches, self._approximate_matcher_func_block_and_edge_count
|
|
1353
|
+
)
|
|
1354
|
+
l.info("... initial matches: %d", len(initial_matches))
|
|
1355
|
+
|
|
1356
|
+
l.info("Getting adjacent function matches based on string references...")
|
|
1357
|
+
initial_matches += self._get_approximate_matches_between_matched_pairs(
|
|
1358
|
+
initial_matches, self._approximate_matcher_func_string_refs
|
|
1359
|
+
)
|
|
1360
|
+
l.info("... initial matches: %d", len(initial_matches))
|
|
1361
|
+
|
|
1362
|
+
l.info("Getting adjacent function matches based on callees...")
|
|
1363
|
+
main2secondary = dict(initial_matches)
|
|
1364
|
+
initial_matches += self._get_approximate_matches_between_matched_pairs(
|
|
1365
|
+
initial_matches, partial(self._approximate_matcher_func_callees, main2secondary)
|
|
1366
|
+
)
|
|
1367
|
+
l.info("... initial matches: %d", len(initial_matches))
|
|
1368
|
+
|
|
1369
|
+
# dedup
|
|
1370
|
+
initial_matches = sorted(set(initial_matches))
|
|
1371
|
+
l.info("We got %d initial matches so far. Time to get busy...", len(initial_matches))
|
|
1372
|
+
|
|
1373
|
+
# get the attributes for all functions
|
|
1374
|
+
l.info("Computing function attributes for main project...")
|
|
1375
|
+
self.attributes_a = self._compute_function_attributes(
|
|
1376
|
+
self.funcs_a, exclude_func_addrs={a for a, _ in initial_matches}
|
|
1377
|
+
)
|
|
1378
|
+
l.info("Computing function attributes for secondary project...")
|
|
1379
|
+
self.attributes_b = self._compute_function_attributes(
|
|
1380
|
+
self.funcs_b, exclude_func_addrs={a for _, a in initial_matches}
|
|
1381
|
+
)
|
|
1382
|
+
|
|
1383
|
+
l.info("Getting function attribute-based matches...")
|
|
1384
|
+
attribute_based_matches = self._get_function_matches(self.attributes_a, self.attributes_b)
|
|
1385
|
+
l.info("Got %d attribute-based matches.", len(attribute_based_matches))
|
|
1111
1386
|
|
|
1112
1387
|
# Use a queue so we process matches in the order that they are found
|
|
1113
|
-
to_process = deque(
|
|
1388
|
+
to_process = deque(attribute_based_matches)
|
|
1114
1389
|
|
|
1115
1390
|
# Keep track of which matches we've already added to the queue
|
|
1116
|
-
processed_matches = set(initial_matches)
|
|
1391
|
+
processed_matches = set(initial_matches + attribute_based_matches)
|
|
1117
1392
|
|
|
1118
1393
|
# Keep a dict of current matches, which will be updated if better matches are found
|
|
1119
1394
|
matched_a = {}
|
|
@@ -1122,8 +1397,8 @@ class BinDiff(Analysis):
|
|
|
1122
1397
|
matched_a[x] = y
|
|
1123
1398
|
matched_b[y] = x
|
|
1124
1399
|
|
|
1125
|
-
callgraph_a_nodes = set(self.
|
|
1126
|
-
callgraph_b_nodes = set(self.
|
|
1400
|
+
callgraph_a_nodes = set(self.funcs_a.callgraph.nodes())
|
|
1401
|
+
callgraph_b_nodes = set(self.funcs_b.callgraph.nodes())
|
|
1127
1402
|
|
|
1128
1403
|
# while queue is not empty
|
|
1129
1404
|
while to_process:
|
|
@@ -1136,10 +1411,10 @@ class BinDiff(Analysis):
|
|
|
1136
1411
|
if not self._p2.loader.main_object.contains_addr(func_b):
|
|
1137
1412
|
continue
|
|
1138
1413
|
|
|
1139
|
-
func_a_succ = self.
|
|
1140
|
-
func_b_succ = self.
|
|
1141
|
-
func_a_pred = self.
|
|
1142
|
-
func_b_pred = self.
|
|
1414
|
+
func_a_succ = self.funcs_a.callgraph.successors(func_a) if func_a in callgraph_a_nodes else []
|
|
1415
|
+
func_b_succ = self.funcs_b.callgraph.successors(func_b) if func_b in callgraph_b_nodes else []
|
|
1416
|
+
func_a_pred = self.funcs_a.callgraph.predecessors(func_a) if func_a in callgraph_a_nodes else []
|
|
1417
|
+
func_b_pred = self.funcs_b.callgraph.predecessors(func_b) if func_b in callgraph_b_nodes else []
|
|
1143
1418
|
|
|
1144
1419
|
# get possible new matches
|
|
1145
1420
|
new_matches = set(
|
|
@@ -1155,10 +1430,10 @@ class BinDiff(Analysis):
|
|
|
1155
1430
|
# for each of the possible new matches add it if it improves the matching
|
|
1156
1431
|
for x, y in new_matches:
|
|
1157
1432
|
# skip none functions and syscalls
|
|
1158
|
-
func_a = self.
|
|
1433
|
+
func_a = self.funcs_a.function(x)
|
|
1159
1434
|
if func_a is None or func_a.is_simprocedure or func_a.is_syscall:
|
|
1160
1435
|
continue
|
|
1161
|
-
func_b = self.
|
|
1436
|
+
func_b = self.funcs_b.function(y)
|
|
1162
1437
|
if func_b is None or func_b.is_simprocedure or func_b.is_syscall:
|
|
1163
1438
|
continue
|
|
1164
1439
|
|
|
@@ -25,16 +25,20 @@ class CFGArchOptions:
|
|
|
25
25
|
"switch_mode_on_nodecode": (bool, True),
|
|
26
26
|
# Whether we should use byte-based pattern-matching to identify ifuncs
|
|
27
27
|
"pattern_match_ifuncs": (bool, True),
|
|
28
|
+
# Do we consider ARM-mode code at all
|
|
29
|
+
"has_arm_code": (bool, True),
|
|
28
30
|
},
|
|
29
31
|
"ARMHF": {
|
|
30
32
|
"ret_jumpkind_heuristics": (bool, True),
|
|
31
33
|
"switch_mode_on_nodecode": (bool, True),
|
|
32
34
|
"pattern_match_ifuncs": (bool, True),
|
|
35
|
+
"has_arm_code": (bool, True),
|
|
33
36
|
},
|
|
34
37
|
"ARMCortexM": {
|
|
35
38
|
"ret_jumpkind_heuristics": (bool, True),
|
|
36
39
|
"switch_mode_on_nodecode": (bool, False),
|
|
37
40
|
"pattern_match_ifuncs": (bool, True),
|
|
41
|
+
"has_arm_code": (bool, False),
|
|
38
42
|
},
|
|
39
43
|
}
|
|
40
44
|
|
|
@@ -83,3 +87,9 @@ class CFGArchOptions:
|
|
|
83
87
|
|
|
84
88
|
else:
|
|
85
89
|
super().__setattr__(option_name, option_value)
|
|
90
|
+
|
|
91
|
+
def __getitem__(self, option_name: str):
|
|
92
|
+
return self._options[option_name]
|
|
93
|
+
|
|
94
|
+
def __contains__(self, option_name: str) -> bool:
|
|
95
|
+
return option_name in self._options
|
angr/analyses/cfg/cfg_base.py
CHANGED
|
@@ -1550,7 +1550,9 @@ class CFGBase(Analysis):
|
|
|
1550
1550
|
block = next((b for b in function.blocks), None)
|
|
1551
1551
|
if block is None:
|
|
1552
1552
|
continue
|
|
1553
|
-
if
|
|
1553
|
+
if self._is_noop_block(self.project.arch, block) or all(
|
|
1554
|
+
self._is_noop_insn(insn) for insn in block.capstone.insns
|
|
1555
|
+
):
|
|
1554
1556
|
# all nops. mark this function as a function alignment
|
|
1555
1557
|
l.debug("Function chunk %#x is probably used as a function alignment (all nops).", func_addr)
|
|
1556
1558
|
self.kb.functions[func_addr].alignment = True
|
|
@@ -2205,10 +2207,6 @@ class CFGBase(Analysis):
|
|
|
2205
2207
|
out_edges = [e for e in g.out_edges(node_) if g.get_edge_data(*e)["jumpkind"] != "Ijk_FakeRet"]
|
|
2206
2208
|
return len(out_edges) > 1
|
|
2207
2209
|
|
|
2208
|
-
if len(src_function.block_addrs_set) > 10:
|
|
2209
|
-
# ignore functions unless they are extremely small
|
|
2210
|
-
return False
|
|
2211
|
-
|
|
2212
2210
|
if len(all_edges) == 1 and dst_addr != src_addr:
|
|
2213
2211
|
the_edge = next(iter(all_edges))
|
|
2214
2212
|
_, dst, data = the_edge
|
|
@@ -2251,15 +2249,41 @@ class CFGBase(Analysis):
|
|
|
2251
2249
|
candidate = True
|
|
2252
2250
|
|
|
2253
2251
|
if candidate:
|
|
2254
|
-
|
|
2255
|
-
|
|
2256
|
-
|
|
2257
|
-
|
|
2258
|
-
|
|
2259
|
-
|
|
2260
|
-
|
|
2261
|
-
|
|
2262
|
-
|
|
2252
|
+
# we have two strategies; for small functions, we run SPTracker on the entire function and see if the
|
|
2253
|
+
# stack pointer changes or not; for large functions, we simply detect how far away we jump as well as
|
|
2254
|
+
# if there are any other functions identified between the source and the destination.
|
|
2255
|
+
if len(src_function.block_addrs_set) <= 10:
|
|
2256
|
+
regs = {self.project.arch.sp_offset}
|
|
2257
|
+
if hasattr(self.project.arch, "bp_offset") and self.project.arch.bp_offset is not None:
|
|
2258
|
+
regs.add(self.project.arch.bp_offset)
|
|
2259
|
+
sptracker = self.project.analyses[StackPointerTracker].prep()(
|
|
2260
|
+
src_function, regs, track_memory=self._sp_tracking_track_memory
|
|
2261
|
+
)
|
|
2262
|
+
sp_delta = sptracker.offset_after_block(src_addr, self.project.arch.sp_offset)
|
|
2263
|
+
if sp_delta == 0:
|
|
2264
|
+
return True
|
|
2265
|
+
else:
|
|
2266
|
+
# large function; to speed things up, we don't track sp
|
|
2267
|
+
minaddr, maxaddr = None, None
|
|
2268
|
+
if dst_addr - src_addr >= 0x100:
|
|
2269
|
+
minaddr = src_addr
|
|
2270
|
+
maxaddr = dst_addr
|
|
2271
|
+
elif dst_addr < src_addr:
|
|
2272
|
+
# jumping back; is it jumping beyond the function header?
|
|
2273
|
+
src_func = blockaddr_to_function[src_addr]
|
|
2274
|
+
if dst_addr < src_func.addr and src_func.addr - dst_addr >= 0x100:
|
|
2275
|
+
minaddr = dst_addr
|
|
2276
|
+
maxaddr = src_func.addr
|
|
2277
|
+
|
|
2278
|
+
if minaddr is not None and maxaddr is not None:
|
|
2279
|
+
# are there other function in between?
|
|
2280
|
+
funcaddrs_in_between = list(
|
|
2281
|
+
known_functions._function_map.irange(minimum=minaddr + 1, maximum=maxaddr - 1)
|
|
2282
|
+
)
|
|
2283
|
+
funcs_in_between = [known_functions.get_by_addr(a) for a in funcaddrs_in_between]
|
|
2284
|
+
funcs_in_between = [func for func in funcs_in_between if not func.is_alignment]
|
|
2285
|
+
if len(funcs_in_between) >= 3:
|
|
2286
|
+
return True
|
|
2263
2287
|
|
|
2264
2288
|
return False
|
|
2265
2289
|
|
|
@@ -2639,7 +2663,7 @@ class CFGBase(Analysis):
|
|
|
2639
2663
|
:return: True if the instruction does no-op, False otherwise.
|
|
2640
2664
|
"""
|
|
2641
2665
|
|
|
2642
|
-
insn_name = insn.
|
|
2666
|
+
insn_name = insn.mnemonic
|
|
2643
2667
|
|
|
2644
2668
|
if insn_name == "nop":
|
|
2645
2669
|
# nops
|