angr 9.2.148__py3-none-manylinux2014_x86_64.whl → 9.2.150__py3-none-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/__main__.py +100 -37
- angr/analyses/calling_convention/calling_convention.py +42 -2
- angr/analyses/cfg/cfg_emulated.py +5 -2
- angr/analyses/cfg/cfg_fast.py +48 -46
- angr/analyses/decompiler/ail_simplifier.py +65 -32
- angr/analyses/decompiler/block_simplifier.py +20 -6
- angr/analyses/decompiler/clinic.py +80 -13
- angr/analyses/decompiler/dephication/rewriting_engine.py +24 -2
- angr/analyses/decompiler/optimization_passes/__init__.py +5 -0
- angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +15 -13
- angr/analyses/decompiler/optimization_passes/determine_load_sizes.py +64 -0
- angr/analyses/decompiler/optimization_passes/eager_std_string_concatenation.py +165 -0
- angr/analyses/decompiler/optimization_passes/engine_base.py +11 -2
- angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py +2 -1
- angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +17 -2
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +10 -6
- angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +99 -30
- angr/analyses/decompiler/peephole_optimizations/__init__.py +6 -0
- angr/analyses/decompiler/peephole_optimizations/base.py +43 -3
- angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +1 -1
- angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +3 -0
- angr/analyses/decompiler/peephole_optimizations/inlined_strcpy_consolidation.py +4 -1
- angr/analyses/decompiler/peephole_optimizations/remove_cxx_destructor_calls.py +32 -0
- angr/analyses/decompiler/peephole_optimizations/remove_redundant_bitmasks.py +69 -2
- angr/analyses/decompiler/peephole_optimizations/rewrite_conv_mul.py +40 -0
- angr/analyses/decompiler/peephole_optimizations/rewrite_cxx_operator_calls.py +90 -0
- angr/analyses/decompiler/presets/fast.py +2 -0
- angr/analyses/decompiler/presets/full.py +2 -0
- angr/analyses/decompiler/ssailification/rewriting_engine.py +51 -4
- angr/analyses/decompiler/ssailification/ssailification.py +23 -3
- angr/analyses/decompiler/ssailification/traversal_engine.py +15 -1
- angr/analyses/decompiler/structured_codegen/c.py +141 -10
- angr/analyses/decompiler/utils.py +23 -1
- angr/analyses/disassembly.py +2 -1
- angr/analyses/patchfinder.py +1 -1
- angr/analyses/s_reaching_definitions/s_rda_view.py +1 -0
- angr/analyses/typehoon/lifter.py +20 -0
- angr/analyses/typehoon/simple_solver.py +42 -9
- angr/analyses/typehoon/translator.py +4 -1
- angr/analyses/typehoon/typeconsts.py +17 -6
- angr/analyses/typehoon/typehoon.py +25 -6
- angr/analyses/variable_recovery/engine_ail.py +44 -5
- angr/analyses/variable_recovery/engine_base.py +35 -12
- angr/analyses/variable_recovery/variable_recovery_fast.py +33 -2
- angr/calling_conventions.py +23 -5
- angr/engines/light/engine.py +7 -0
- angr/engines/pcode/lifter.py +7 -0
- angr/knowledge_plugins/functions/function.py +68 -0
- angr/knowledge_plugins/propagations/states.py +5 -2
- angr/knowledge_plugins/variables/variable_manager.py +3 -3
- angr/procedures/definitions/__init__.py +1 -1
- angr/procedures/definitions/types_stl.py +22 -0
- angr/sim_type.py +251 -130
- angr/utils/graph.py +51 -27
- {angr-9.2.148.dist-info → angr-9.2.150.dist-info}/METADATA +7 -7
- {angr-9.2.148.dist-info → angr-9.2.150.dist-info}/RECORD +61 -55
- {angr-9.2.148.dist-info → angr-9.2.150.dist-info}/WHEEL +1 -1
- {angr-9.2.148.dist-info → angr-9.2.150.dist-info}/licenses/LICENSE +3 -0
- {angr-9.2.148.dist-info → angr-9.2.150.dist-info}/entry_points.txt +0 -0
- {angr-9.2.148.dist-info → angr-9.2.150.dist-info}/top_level.txt +0 -0
|
@@ -723,6 +723,23 @@ def structured_node_is_simple_return(
|
|
|
723
723
|
return False
|
|
724
724
|
|
|
725
725
|
|
|
726
|
+
def structured_node_is_simple_return_strict(node: BaseNode | SequenceNode | MultiNode | ailment.Block) -> bool:
|
|
727
|
+
"""
|
|
728
|
+
Returns True iff the node exclusively contains a return statement.
|
|
729
|
+
"""
|
|
730
|
+
if isinstance(node, (SequenceNode, MultiNode)) and node.nodes:
|
|
731
|
+
flat_blocks = _flatten_structured_node(node)
|
|
732
|
+
if len(flat_blocks) != 1:
|
|
733
|
+
return False
|
|
734
|
+
node = flat_blocks[-1]
|
|
735
|
+
|
|
736
|
+
return (
|
|
737
|
+
isinstance(node, ailment.Block)
|
|
738
|
+
and len(node.statements) == 1
|
|
739
|
+
and isinstance(node.statements[0], ailment.Stmt.Return)
|
|
740
|
+
)
|
|
741
|
+
|
|
742
|
+
|
|
726
743
|
def is_statement_terminating(stmt: ailment.statement.Statement, functions) -> bool:
|
|
727
744
|
if isinstance(stmt, ailment.Stmt.Return):
|
|
728
745
|
return True
|
|
@@ -851,11 +868,16 @@ def peephole_optimize_stmts(block, stmt_opts):
|
|
|
851
868
|
r = opt.optimize(stmt, stmt_idx=stmt_idx, block=block)
|
|
852
869
|
if r is not None and r is not stmt:
|
|
853
870
|
stmt = r
|
|
871
|
+
if r == ():
|
|
872
|
+
# the statement is gone; no more redo
|
|
873
|
+
redo = False
|
|
874
|
+
break
|
|
854
875
|
redo = True
|
|
855
876
|
break
|
|
856
877
|
|
|
857
878
|
if stmt is not None and stmt is not old_stmt:
|
|
858
|
-
|
|
879
|
+
if stmt != ():
|
|
880
|
+
statements.append(stmt)
|
|
859
881
|
any_update = True
|
|
860
882
|
else:
|
|
861
883
|
statements.append(old_stmt)
|
angr/analyses/disassembly.py
CHANGED
|
@@ -1159,6 +1159,7 @@ class Disassembly(Analysis):
|
|
|
1159
1159
|
show_bytes: bool = False,
|
|
1160
1160
|
ascii_only: bool | None = None,
|
|
1161
1161
|
color: bool = True,
|
|
1162
|
+
min_edge_depth: int = 0,
|
|
1162
1163
|
) -> str:
|
|
1163
1164
|
"""
|
|
1164
1165
|
Render the disassembly to a string, with optional edges and addresses.
|
|
@@ -1288,7 +1289,7 @@ class Disassembly(Analysis):
|
|
|
1288
1289
|
for f, t in sorted(edges_by_line, key=lambda e: abs(e[0] - e[1])):
|
|
1289
1290
|
add_edge_to_buffer(edge_buf, ref_buf, f, t, lambda s: ansi_color(s, edge_col), ascii_only=ascii_only)
|
|
1290
1291
|
add_edge_to_buffer(ref_buf, ref_buf, f, t, ascii_only=ascii_only)
|
|
1291
|
-
max_edge_depth = max(map(len, ref_buf))
|
|
1292
|
+
max_edge_depth = max(*map(len, ref_buf), min_edge_depth)
|
|
1292
1293
|
|
|
1293
1294
|
# Justify edge and combine with disassembly
|
|
1294
1295
|
for i, line in enumerate(buf):
|
angr/analyses/patchfinder.py
CHANGED
|
@@ -97,7 +97,7 @@ class PatchFinderAnalysis(Analysis):
|
|
|
97
97
|
# - Looking for instruction partials broken by a patch (nodecode)
|
|
98
98
|
# - Unusual stack manipulation
|
|
99
99
|
|
|
100
|
-
atypical_alignments: list[
|
|
100
|
+
atypical_alignments: list[AtypicallyAlignedFunction]
|
|
101
101
|
possibly_patched_out: list[PatchedOutFunctionality]
|
|
102
102
|
|
|
103
103
|
def __init__(self):
|
angr/analyses/typehoon/lifter.py
CHANGED
|
@@ -13,6 +13,7 @@ from angr.sim_type import (
|
|
|
13
13
|
SimTypeArray,
|
|
14
14
|
SimTypeFloat,
|
|
15
15
|
SimTypeDouble,
|
|
16
|
+
SimCppClass,
|
|
16
17
|
)
|
|
17
18
|
from .typeconsts import BottomType, Int8, Int16, Int32, Int64, Pointer32, Pointer64, Struct, Array, Float32, Float64
|
|
18
19
|
|
|
@@ -77,6 +78,24 @@ class TypeLifter:
|
|
|
77
78
|
obj.field_names = field_names
|
|
78
79
|
return obj
|
|
79
80
|
|
|
81
|
+
def _lift_SimCppClass(self, ty: SimCppClass) -> TypeConstant | BottomType:
|
|
82
|
+
if ty in self.memo:
|
|
83
|
+
return BottomType()
|
|
84
|
+
|
|
85
|
+
obj = Struct(fields={}, name=ty.name, is_cppclass=True)
|
|
86
|
+
self.memo[ty] = obj
|
|
87
|
+
converted_fields = {}
|
|
88
|
+
field_names = {}
|
|
89
|
+
ty_offsets = ty.offsets
|
|
90
|
+
for field_name, simtype in ty.members.items():
|
|
91
|
+
if field_name not in ty_offsets:
|
|
92
|
+
return BottomType()
|
|
93
|
+
converted_fields[ty_offsets[field_name]] = self.lift(simtype)
|
|
94
|
+
field_names[ty_offsets[field_name]] = field_name
|
|
95
|
+
obj.fields = converted_fields
|
|
96
|
+
obj.field_names = field_names
|
|
97
|
+
return obj
|
|
98
|
+
|
|
80
99
|
def _lift_SimTypeArray(self, ty: SimTypeArray) -> Array:
|
|
81
100
|
elem_type = self.lift(ty.elem_type)
|
|
82
101
|
return Array(elem_type, count=ty.length)
|
|
@@ -96,6 +115,7 @@ _mapping = {
|
|
|
96
115
|
SimTypeLongLong: TypeLifter._lift_SimTypeLongLong,
|
|
97
116
|
SimTypePointer: TypeLifter._lift_SimTypePointer,
|
|
98
117
|
SimStruct: TypeLifter._lift_SimStruct,
|
|
118
|
+
SimCppClass: TypeLifter._lift_SimCppClass,
|
|
99
119
|
SimTypeArray: TypeLifter._lift_SimTypeArray,
|
|
100
120
|
SimTypeFloat: TypeLifter._lift_SimTypeFloat,
|
|
101
121
|
SimTypeDouble: TypeLifter._lift_SimTypeDouble,
|
|
@@ -181,7 +181,7 @@ class Sketch:
|
|
|
181
181
|
|
|
182
182
|
def __init__(self, solver: SimpleSolver, root: TypeVariable):
|
|
183
183
|
self.root: SketchNode = SketchNode(root)
|
|
184
|
-
self.graph = networkx.
|
|
184
|
+
self.graph = networkx.MultiDiGraph()
|
|
185
185
|
self.node_mapping: dict[TypeVariable | DerivedTypeVariable, SketchNodeBase] = {}
|
|
186
186
|
self.solver = solver
|
|
187
187
|
|
|
@@ -200,7 +200,7 @@ class Sketch:
|
|
|
200
200
|
for label in typevar.labels:
|
|
201
201
|
succs = []
|
|
202
202
|
for _, dst, data in self.graph.out_edges(node, data=True):
|
|
203
|
-
if "label" in data and data["label"] == label:
|
|
203
|
+
if "label" in data and data["label"] == label and dst not in succs:
|
|
204
204
|
succs.append(dst)
|
|
205
205
|
if len(succs) > 1:
|
|
206
206
|
_l.warning(
|
|
@@ -215,6 +215,11 @@ class Sketch:
|
|
|
215
215
|
return node
|
|
216
216
|
|
|
217
217
|
def add_edge(self, src: SketchNodeBase, dst: SketchNodeBase, label) -> None:
|
|
218
|
+
# ensure the label does not already exist in existing edges
|
|
219
|
+
if self.graph.has_edge(src, dst):
|
|
220
|
+
for data in self.graph.get_edge_data(src, dst).values():
|
|
221
|
+
if "label" in data and data["label"] == label:
|
|
222
|
+
return
|
|
218
223
|
self.graph.add_edge(src, dst, label=label)
|
|
219
224
|
|
|
220
225
|
def add_constraint(self, constraint: TypeConstraint) -> None:
|
|
@@ -315,7 +320,7 @@ class ConstraintGraphNode:
|
|
|
315
320
|
tag_str = "R"
|
|
316
321
|
else:
|
|
317
322
|
tag_str = "U"
|
|
318
|
-
forgotten_str = "PRE" if FORGOTTEN.PRE_FORGOTTEN else "POST"
|
|
323
|
+
forgotten_str = "PRE" if self.forgotten == FORGOTTEN.PRE_FORGOTTEN else "POST"
|
|
319
324
|
s = f"{self.typevar}#{variance_str}.{tag_str}.{forgotten_str}"
|
|
320
325
|
if ":" in s:
|
|
321
326
|
return '"' + s + '"'
|
|
@@ -820,6 +825,7 @@ class SimpleSolver:
|
|
|
820
825
|
"""
|
|
821
826
|
|
|
822
827
|
graph = networkx.DiGraph()
|
|
828
|
+
constraints = self._get_transitive_subtype_constraints(constraints)
|
|
823
829
|
for constraint in constraints:
|
|
824
830
|
if isinstance(constraint, Subtype):
|
|
825
831
|
self._constraint_graph_add_edges(
|
|
@@ -830,6 +836,33 @@ class SimpleSolver:
|
|
|
830
836
|
self._constraint_graph_recall_forget_split(graph)
|
|
831
837
|
return graph
|
|
832
838
|
|
|
839
|
+
@staticmethod
|
|
840
|
+
def _get_transitive_subtype_constraints(constraints: set[TypeConstraint]) -> set[TypeConstraint]:
|
|
841
|
+
"""
|
|
842
|
+
Apply the S-Trans rule: a <: b, b <: c => a <: c
|
|
843
|
+
"""
|
|
844
|
+
tv2supertypes = defaultdict(set)
|
|
845
|
+
for constraint in constraints:
|
|
846
|
+
if isinstance(constraint, Subtype):
|
|
847
|
+
tv2supertypes[constraint.sub_type].add(constraint.super_type)
|
|
848
|
+
|
|
849
|
+
new_constraints = set()
|
|
850
|
+
while True:
|
|
851
|
+
changed = False
|
|
852
|
+
for subtype, supertypes in tv2supertypes.items():
|
|
853
|
+
supertypes_copy = set(supertypes)
|
|
854
|
+
for supertype in supertypes_copy:
|
|
855
|
+
if supertype in tv2supertypes:
|
|
856
|
+
for supertype_ in tv2supertypes[supertype]:
|
|
857
|
+
if supertype_ not in supertypes_copy:
|
|
858
|
+
changed = True
|
|
859
|
+
supertypes.add(supertype_)
|
|
860
|
+
new_constraints.add(Subtype(subtype, supertype_))
|
|
861
|
+
if not changed:
|
|
862
|
+
break
|
|
863
|
+
|
|
864
|
+
return constraints | new_constraints
|
|
865
|
+
|
|
833
866
|
@staticmethod
|
|
834
867
|
def _constraint_graph_add_recall_edges(graph: networkx.DiGraph, node: ConstraintGraphNode) -> None:
|
|
835
868
|
while True:
|
|
@@ -1234,21 +1267,21 @@ class SimpleSolver:
|
|
|
1234
1267
|
offset_to_maxsize[base] = max(offset_to_maxsize[base], (last_label.offset - base) + access_size)
|
|
1235
1268
|
offset_to_sizes[base].add(access_size)
|
|
1236
1269
|
|
|
1237
|
-
|
|
1270
|
+
idx_to_base = {}
|
|
1238
1271
|
|
|
1239
|
-
for labels,
|
|
1272
|
+
for idx, (labels, _) in enumerate(path_and_successors):
|
|
1240
1273
|
last_label = labels[-1] if labels else None
|
|
1241
1274
|
if isinstance(last_label, HasField):
|
|
1242
1275
|
prev_offset = next(offset_to_base.irange(maximum=last_label.offset, reverse=True))
|
|
1243
|
-
|
|
1276
|
+
idx_to_base[idx] = offset_to_base[prev_offset]
|
|
1244
1277
|
|
|
1245
1278
|
node_by_offset = defaultdict(set)
|
|
1246
1279
|
|
|
1247
|
-
for labels, succ in path_and_successors:
|
|
1280
|
+
for idx, (labels, succ) in enumerate(path_and_successors):
|
|
1248
1281
|
last_label = labels[-1] if labels else None
|
|
1249
1282
|
if isinstance(last_label, HasField):
|
|
1250
|
-
if
|
|
1251
|
-
node_by_offset[
|
|
1283
|
+
if idx in idx_to_base:
|
|
1284
|
+
node_by_offset[idx_to_base[idx]].add(succ)
|
|
1252
1285
|
else:
|
|
1253
1286
|
node_by_offset[last_label.offset].add(succ)
|
|
1254
1287
|
|
|
@@ -105,7 +105,10 @@ class TypeTranslator:
|
|
|
105
105
|
|
|
106
106
|
name = tc.name if tc.name else self.struct_name()
|
|
107
107
|
|
|
108
|
-
|
|
108
|
+
if tc.is_cppclass:
|
|
109
|
+
s = sim_type.SimCppClass(name=name).with_arch(self.arch)
|
|
110
|
+
else:
|
|
111
|
+
s = sim_type.SimStruct({}, name=name).with_arch(self.arch)
|
|
109
112
|
self.structs[tc] = s
|
|
110
113
|
|
|
111
114
|
next_offset = 0
|
|
@@ -114,6 +114,18 @@ class Int512(Int):
|
|
|
114
114
|
return "int512"
|
|
115
115
|
|
|
116
116
|
|
|
117
|
+
class IntVar(Int):
|
|
118
|
+
def __init__(self, size):
|
|
119
|
+
self._size = size
|
|
120
|
+
|
|
121
|
+
@property
|
|
122
|
+
def size(self) -> int:
|
|
123
|
+
return self._size
|
|
124
|
+
|
|
125
|
+
def __repr__(self, memo=None):
|
|
126
|
+
return "intvar"
|
|
127
|
+
|
|
128
|
+
|
|
117
129
|
class Float(TypeConstant):
|
|
118
130
|
def __repr__(self, memo=None) -> str:
|
|
119
131
|
return "floatbase"
|
|
@@ -211,10 +223,11 @@ class Array(TypeConstant):
|
|
|
211
223
|
|
|
212
224
|
|
|
213
225
|
class Struct(TypeConstant):
|
|
214
|
-
def __init__(self, fields=None, name=None, field_names=None):
|
|
226
|
+
def __init__(self, fields=None, name=None, field_names=None, is_cppclass: bool = False):
|
|
215
227
|
self.fields = {} if fields is None else fields # offset to type
|
|
216
228
|
self.name = name
|
|
217
229
|
self.field_names = field_names
|
|
230
|
+
self.is_cppclass = is_cppclass
|
|
218
231
|
|
|
219
232
|
def _hash(self, visited: set[int]):
|
|
220
233
|
if id(self) in visited:
|
|
@@ -236,9 +249,9 @@ class Struct(TypeConstant):
|
|
|
236
249
|
|
|
237
250
|
@memoize
|
|
238
251
|
def __repr__(self, memo=None):
|
|
239
|
-
prefix = "struct"
|
|
252
|
+
prefix = "CppClass" if self.is_cppclass else "struct"
|
|
240
253
|
if self.name:
|
|
241
|
-
prefix = f"
|
|
254
|
+
prefix = f"{prefix} {self.name}"
|
|
242
255
|
return (
|
|
243
256
|
prefix
|
|
244
257
|
+ "{"
|
|
@@ -312,9 +325,7 @@ def int_type(bits: int) -> Int:
|
|
|
312
325
|
256: Int256,
|
|
313
326
|
512: Int512,
|
|
314
327
|
}
|
|
315
|
-
if bits in mapping
|
|
316
|
-
return mapping[bits]()
|
|
317
|
-
raise TypeError(f"Not a known size of int: {bits}")
|
|
328
|
+
return mapping[bits]() if bits in mapping else IntVar(bits)
|
|
318
329
|
|
|
319
330
|
|
|
320
331
|
def float_type(bits: int) -> Float | None:
|
|
@@ -10,7 +10,7 @@ from angr.sim_variable import SimVariable, SimStackVariable
|
|
|
10
10
|
from .simple_solver import SimpleSolver
|
|
11
11
|
from .translator import TypeTranslator
|
|
12
12
|
from .typeconsts import Struct, Pointer, TypeConstant, Array, TopType
|
|
13
|
-
from .typevars import Equivalence, Subtype, TypeVariable
|
|
13
|
+
from .typevars import Equivalence, Subtype, TypeVariable, DerivedTypeVariable
|
|
14
14
|
|
|
15
15
|
if TYPE_CHECKING:
|
|
16
16
|
from angr.sim_type import SimType
|
|
@@ -187,6 +187,10 @@ class Typehoon(Analysis):
|
|
|
187
187
|
if self._ground_truth and self.simtypes_solution is not None:
|
|
188
188
|
self.simtypes_solution.update(self._ground_truth)
|
|
189
189
|
|
|
190
|
+
@staticmethod
|
|
191
|
+
def _resolve_derived(tv):
|
|
192
|
+
return tv.type_var if isinstance(tv, DerivedTypeVariable) else tv
|
|
193
|
+
|
|
190
194
|
def _solve(self):
|
|
191
195
|
typevars = set()
|
|
192
196
|
if self._var_mapping:
|
|
@@ -198,9 +202,10 @@ class Typehoon(Analysis):
|
|
|
198
202
|
for constraint in self._constraints[self.func_var]:
|
|
199
203
|
if isinstance(constraint, Subtype):
|
|
200
204
|
if isinstance(constraint.sub_type, TypeVariable):
|
|
201
|
-
typevars.add(constraint.sub_type)
|
|
205
|
+
typevars.add(self._resolve_derived(constraint.sub_type))
|
|
202
206
|
if isinstance(constraint.super_type, TypeVariable):
|
|
203
|
-
typevars.add(constraint.super_type)
|
|
207
|
+
typevars.add(self._resolve_derived(constraint.super_type))
|
|
208
|
+
|
|
204
209
|
solver = SimpleSolver(self.bits, self._constraints, typevars, stackvar_max_sizes=self._stackvar_max_sizes)
|
|
205
210
|
self.solution = solver.solution
|
|
206
211
|
|
|
@@ -214,13 +219,16 @@ class Typehoon(Analysis):
|
|
|
214
219
|
if not self.solution:
|
|
215
220
|
return
|
|
216
221
|
|
|
222
|
+
memo = set()
|
|
217
223
|
for tv in list(self.solution.keys()):
|
|
218
224
|
if self._must_struct and tv in self._must_struct:
|
|
219
225
|
continue
|
|
220
226
|
sol = self.solution[tv]
|
|
221
|
-
specialized = self._specialize_struct(sol)
|
|
227
|
+
specialized = self._specialize_struct(sol, memo=memo)
|
|
222
228
|
if specialized is not None:
|
|
223
229
|
self.solution[tv] = specialized
|
|
230
|
+
else:
|
|
231
|
+
memo.add(sol)
|
|
224
232
|
|
|
225
233
|
def _specialize_struct(self, tc, memo: set | None = None):
|
|
226
234
|
if isinstance(tc, Pointer):
|
|
@@ -240,7 +248,11 @@ class Typehoon(Analysis):
|
|
|
240
248
|
return field0
|
|
241
249
|
|
|
242
250
|
# are all fields the same?
|
|
243
|
-
if
|
|
251
|
+
if (
|
|
252
|
+
len(tc.fields) > 1
|
|
253
|
+
and not self._is_pointer_to(field0, tc)
|
|
254
|
+
and all(tc.fields[off] == field0 for off in offsets)
|
|
255
|
+
):
|
|
244
256
|
# are all fields aligned properly?
|
|
245
257
|
try:
|
|
246
258
|
alignment = field0.size
|
|
@@ -251,12 +263,19 @@ class Typehoon(Analysis):
|
|
|
251
263
|
max_offset = offsets[-1]
|
|
252
264
|
field0_size = 1
|
|
253
265
|
if not isinstance(field0, TopType):
|
|
254
|
-
|
|
266
|
+
try:
|
|
267
|
+
field0_size = field0.size
|
|
268
|
+
except NotImplementedError:
|
|
269
|
+
field0_size = 1
|
|
255
270
|
count = (max_offset + field0_size) // alignment
|
|
256
271
|
return Array(field0, count=count)
|
|
257
272
|
|
|
258
273
|
return None
|
|
259
274
|
|
|
275
|
+
@staticmethod
|
|
276
|
+
def _is_pointer_to(pointer_to: TypeConstant, base_type: TypeConstant) -> bool:
|
|
277
|
+
return isinstance(pointer_to, Pointer) and pointer_to.basetype == base_type
|
|
278
|
+
|
|
260
279
|
def _translate_to_simtypes(self):
|
|
261
280
|
"""
|
|
262
281
|
Translate solutions in type variables to solutions in SimTypes.
|
|
@@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, cast
|
|
|
4
4
|
import logging
|
|
5
5
|
|
|
6
6
|
import ailment
|
|
7
|
+
from ailment.constant import UNDETERMINED_SIZE
|
|
7
8
|
import claripy
|
|
8
9
|
from unique_log_filter import UniqueLogFilter
|
|
9
10
|
|
|
@@ -30,8 +31,15 @@ class SimEngineVRAIL(
|
|
|
30
31
|
The engine for variable recovery on AIL.
|
|
31
32
|
"""
|
|
32
33
|
|
|
33
|
-
def __init__(
|
|
34
|
-
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
*args,
|
|
37
|
+
call_info=None,
|
|
38
|
+
vvar_to_vvar: dict[int, int] | None,
|
|
39
|
+
vvar_type_hints: dict[int, typeconsts.TypeConstant] | None = None,
|
|
40
|
+
**kwargs,
|
|
41
|
+
):
|
|
42
|
+
super().__init__(*args, vvar_type_hints=vvar_type_hints, **kwargs)
|
|
35
43
|
|
|
36
44
|
self._reference_spoffset: bool = False
|
|
37
45
|
self.call_info = call_info or {}
|
|
@@ -100,6 +108,13 @@ class SimEngineVRAIL(
|
|
|
100
108
|
else:
|
|
101
109
|
l.warning("Unsupported dst type %s.", dst_type)
|
|
102
110
|
|
|
111
|
+
def _handle_stmt_WeakAssignment(self, stmt) -> None:
|
|
112
|
+
src = self._expr(stmt.src)
|
|
113
|
+
dst = self._expr(stmt.dst)
|
|
114
|
+
if isinstance(src, RichR) and isinstance(dst, RichR) and src.typevar is not None and dst.typevar is not None:
|
|
115
|
+
tc = typevars.Subtype(src.typevar, dst.typevar)
|
|
116
|
+
self.state.add_type_constraint(tc)
|
|
117
|
+
|
|
103
118
|
def _handle_stmt_Store(self, stmt: ailment.Stmt.Store):
|
|
104
119
|
addr_r = self._expr_bv(stmt.addr)
|
|
105
120
|
data = self._expr(stmt.data)
|
|
@@ -325,7 +340,9 @@ class SimEngineVRAIL(
|
|
|
325
340
|
addr_r = self._expr_bv(expr.addr)
|
|
326
341
|
size = expr.size
|
|
327
342
|
|
|
328
|
-
|
|
343
|
+
if size != UNDETERMINED_SIZE:
|
|
344
|
+
return self._load(addr_r, size, expr=expr)
|
|
345
|
+
return self._top(8)
|
|
329
346
|
|
|
330
347
|
def _handle_expr_VirtualVariable(self, expr: ailment.Expr.VirtualVariable):
|
|
331
348
|
return self._read_from_vvar(expr, expr=expr, vvar_id=self._mapped_vvarid(expr.varid))
|
|
@@ -419,6 +436,29 @@ class SimEngineVRAIL(
|
|
|
419
436
|
self._reference(richr, codeloc, src=expr)
|
|
420
437
|
return richr
|
|
421
438
|
|
|
439
|
+
def _handle_unop_Reference(self, expr: ailment.Expr.UnaryOp):
|
|
440
|
+
if isinstance(expr.operand, ailment.Expr.VirtualVariable) and expr.operand.was_stack:
|
|
441
|
+
off = expr.operand.stack_offset
|
|
442
|
+
refbase_typevar = self.state.stack_offset_typevars.get(off, None)
|
|
443
|
+
if refbase_typevar is None:
|
|
444
|
+
# allocate a new type variable
|
|
445
|
+
refbase_typevar = typevars.TypeVariable()
|
|
446
|
+
self.state.stack_offset_typevars[off] = refbase_typevar
|
|
447
|
+
|
|
448
|
+
ref_typevar = typevars.TypeVariable()
|
|
449
|
+
access_derived_typevar = self._create_access_typevar(ref_typevar, False, None, 0)
|
|
450
|
+
load_constraint = typevars.Subtype(refbase_typevar, access_derived_typevar)
|
|
451
|
+
self.state.add_type_constraint(load_constraint)
|
|
452
|
+
|
|
453
|
+
value_v = self.state.stack_address(off)
|
|
454
|
+
richr = RichR(value_v, typevar=ref_typevar)
|
|
455
|
+
codeloc = self._codeloc()
|
|
456
|
+
self._ensure_variable_existence(richr, codeloc, src_expr=expr.operand)
|
|
457
|
+
if self._reference_spoffset:
|
|
458
|
+
self._reference(richr, codeloc, src=expr.operand)
|
|
459
|
+
return richr
|
|
460
|
+
return RichR(self.state.top(expr.bits))
|
|
461
|
+
|
|
422
462
|
def _handle_expr_BasePointerOffset(self, expr):
|
|
423
463
|
# TODO
|
|
424
464
|
return self._top(expr.bits)
|
|
@@ -433,7 +473,7 @@ class SimEngineVRAIL(
|
|
|
433
473
|
def _handle_binop_Add(self, expr):
|
|
434
474
|
arg0, arg1 = expr.operands
|
|
435
475
|
r0, r1 = self._expr_pair(arg0, arg1)
|
|
436
|
-
compute = r0.data + r1.data # type: ignore
|
|
476
|
+
compute = r0.data + r1.data if r0.data.size() == r1.data.size() else self.state.top(expr.bits) # type: ignore
|
|
437
477
|
|
|
438
478
|
type_constraints = set()
|
|
439
479
|
# create a new type variable and add constraints accordingly
|
|
@@ -844,7 +884,6 @@ class SimEngineVRAIL(
|
|
|
844
884
|
self._expr(expr.operands[0])
|
|
845
885
|
return RichR(self.state.top(expr.bits))
|
|
846
886
|
|
|
847
|
-
_handle_unop_Reference = _handle_unop_Default
|
|
848
887
|
_handle_unop_Dereference = _handle_unop_Default
|
|
849
888
|
_handle_unop_Clz = _handle_unop_Default
|
|
850
889
|
_handle_unop_Ctz = _handle_unop_Default
|
|
@@ -70,9 +70,12 @@ class SimEngineVRBase(
|
|
|
70
70
|
and storing data.
|
|
71
71
|
"""
|
|
72
72
|
|
|
73
|
-
def __init__(self, project, kb):
|
|
73
|
+
def __init__(self, project, kb, vvar_type_hints: dict[int, typeconsts.TypeConstant] | None = None):
|
|
74
74
|
super().__init__(project)
|
|
75
75
|
|
|
76
|
+
self.vvar_type_hints: dict[int, typeconsts.TypeConstant] = (
|
|
77
|
+
vvar_type_hints if vvar_type_hints is not None else {}
|
|
78
|
+
)
|
|
76
79
|
self.kb = kb
|
|
77
80
|
self.vvar_region: dict[int, Any] = {}
|
|
78
81
|
|
|
@@ -453,13 +456,19 @@ class SimEngineVRBase(
|
|
|
453
456
|
# assign a new type variable to it
|
|
454
457
|
typevar = typevars.TypeVariable()
|
|
455
458
|
self.state.typevars.add_type_variable(variable, typevar)
|
|
456
|
-
# create constraints
|
|
457
459
|
else:
|
|
458
460
|
typevar = self.state.typevars.get_type_variable(variable)
|
|
461
|
+
|
|
462
|
+
# create constraints accordingly
|
|
463
|
+
|
|
459
464
|
self.state.add_type_constraint(typevars.Subtype(richr.typevar, typevar))
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
465
|
+
if vvar.varid in self.vvar_type_hints:
|
|
466
|
+
# handle type hints
|
|
467
|
+
self.state.add_type_constraint(typevars.Subtype(typevar, self.vvar_type_hints[vvar.varid]))
|
|
468
|
+
else:
|
|
469
|
+
# the constraint below is a default constraint that may conflict with more specific ones with different
|
|
470
|
+
# sizes; we post-process at the very end of VRA to remove conflicting default constraints.
|
|
471
|
+
self.state.add_type_constraint(typevars.Subtype(typevar, typeconsts.int_type(variable.size * 8)))
|
|
463
472
|
|
|
464
473
|
return variable
|
|
465
474
|
|
|
@@ -978,14 +987,22 @@ class SimEngineVRBase(
|
|
|
978
987
|
value = self.state.top(size * self.project.arch.byte_width)
|
|
979
988
|
if create_variable:
|
|
980
989
|
# create a new variable if necessary
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
990
|
+
|
|
991
|
+
# check if there is an existing variable for the atom at this location already
|
|
992
|
+
existing_vars: set[tuple[SimVariable, int]] = self.state.variable_manager[
|
|
993
|
+
self.func_addr
|
|
994
|
+
].find_variables_by_atom(self.block.addr, self.stmt_idx, expr)
|
|
995
|
+
if not existing_vars:
|
|
996
|
+
variable = SimRegisterVariable(
|
|
997
|
+
offset,
|
|
998
|
+
size if force_variable_size is None else force_variable_size,
|
|
999
|
+
ident=self.state.variable_manager[self.func_addr].next_variable_ident("register"),
|
|
1000
|
+
region=self.func_addr,
|
|
1001
|
+
)
|
|
1002
|
+
self.state.variable_manager[self.func_addr].add_variable("register", offset, variable)
|
|
1003
|
+
else:
|
|
1004
|
+
variable = next(iter(existing_vars))[0]
|
|
987
1005
|
value = self.state.annotate_with_variables(value, [(0, variable)])
|
|
988
|
-
self.state.variable_manager[self.func_addr].add_variable("register", offset, variable)
|
|
989
1006
|
self.state.register_region.store(offset, value)
|
|
990
1007
|
value_list = [{value}]
|
|
991
1008
|
else:
|
|
@@ -1131,6 +1148,12 @@ class SimEngineVRBase(
|
|
|
1131
1148
|
if var is not None and var.size != vvar.size:
|
|
1132
1149
|
# ignore the variable and the associated type if we are only reading part of the variable
|
|
1133
1150
|
return RichR(value, variable=var)
|
|
1151
|
+
|
|
1152
|
+
# handle type hints
|
|
1153
|
+
if vvar.varid in self.vvar_type_hints:
|
|
1154
|
+
assert isinstance(typevar, typevars.TypeVariable)
|
|
1155
|
+
self.state.add_type_constraint(typevars.Subtype(typevar, self.vvar_type_hints[vvar.varid]))
|
|
1156
|
+
|
|
1134
1157
|
return RichR(value, variable=var, typevar=typevar)
|
|
1135
1158
|
|
|
1136
1159
|
def _create_access_typevar(
|
|
@@ -12,21 +12,25 @@ import ailment
|
|
|
12
12
|
from ailment.expression import VirtualVariable
|
|
13
13
|
|
|
14
14
|
import angr.errors
|
|
15
|
+
from angr import SIM_TYPE_COLLECTIONS
|
|
15
16
|
from angr.analyses import AnalysesHub
|
|
16
17
|
from angr.storage.memory_mixins.paged_memory.pages.multi_values import MultiValues
|
|
17
18
|
from angr.block import Block
|
|
18
19
|
from angr.errors import AngrVariableRecoveryError, SimEngineError
|
|
19
20
|
from angr.knowledge_plugins import Function
|
|
21
|
+
from angr.knowledge_plugins.key_definitions import atoms
|
|
20
22
|
from angr.sim_variable import SimStackVariable, SimRegisterVariable, SimVariable, SimMemoryVariable
|
|
21
23
|
from angr.engines.vex.claripy.irop import vexop_to_simop
|
|
22
24
|
from angr.analyses import ForwardAnalysis, visitors
|
|
23
25
|
from angr.analyses.typehoon.typevars import Equivalence, TypeVariable, TypeVariables, Subtype, DerivedTypeVariable
|
|
24
|
-
from angr.analyses.typehoon.typeconsts import Int
|
|
26
|
+
from angr.analyses.typehoon.typeconsts import Int, TypeConstant, BottomType, TopType
|
|
27
|
+
from angr.analyses.typehoon.lifter import TypeLifter
|
|
25
28
|
from .variable_recovery_base import VariableRecoveryBase, VariableRecoveryStateBase
|
|
26
29
|
from .engine_vex import SimEngineVRVEX
|
|
27
30
|
from .engine_ail import SimEngineVRAIL
|
|
28
31
|
import contextlib
|
|
29
32
|
|
|
33
|
+
|
|
30
34
|
if TYPE_CHECKING:
|
|
31
35
|
from angr.analyses.typehoon.typevars import TypeConstraint
|
|
32
36
|
|
|
@@ -241,6 +245,7 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
|
|
|
241
245
|
unify_variables=True,
|
|
242
246
|
func_arg_vvars: dict[int, tuple[VirtualVariable, SimVariable]] | None = None,
|
|
243
247
|
vvar_to_vvar: dict[int, int] | None = None,
|
|
248
|
+
type_hints: list[tuple[atoms.VirtualVariable | atoms.MemoryLocation, str]] | None = None,
|
|
244
249
|
):
|
|
245
250
|
if not isinstance(func, Function):
|
|
246
251
|
func = self.kb.functions[func]
|
|
@@ -269,8 +274,17 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
|
|
|
269
274
|
self._func_arg_vvars = func_arg_vvars
|
|
270
275
|
self._unify_variables = unify_variables
|
|
271
276
|
|
|
277
|
+
# handle type hints
|
|
278
|
+
self.vvar_type_hints = {}
|
|
279
|
+
if type_hints:
|
|
280
|
+
self._parse_type_hints(type_hints)
|
|
281
|
+
|
|
272
282
|
self._ail_engine: SimEngineVRAIL = SimEngineVRAIL(
|
|
273
|
-
self.project,
|
|
283
|
+
self.project,
|
|
284
|
+
self.kb,
|
|
285
|
+
call_info=call_info,
|
|
286
|
+
vvar_to_vvar=self.vvar_to_vvar,
|
|
287
|
+
vvar_type_hints=self.vvar_type_hints,
|
|
274
288
|
)
|
|
275
289
|
self._vex_engine: SimEngineVRVEX = SimEngineVRVEX(self.project, self.kb, call_info=call_info)
|
|
276
290
|
|
|
@@ -617,5 +631,22 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
|
|
|
617
631
|
if adjusted:
|
|
618
632
|
state.register_region.store(self.project.arch.sp_offset, sp_v)
|
|
619
633
|
|
|
634
|
+
def _parse_type_hints(self, type_hints: list[tuple[atoms.VirtualVariable | atoms.MemoryLocation, str]]) -> None:
|
|
635
|
+
self.vvar_type_hints = {}
|
|
636
|
+
for loc, type_hint_str in type_hints:
|
|
637
|
+
if isinstance(loc, atoms.VirtualVariable):
|
|
638
|
+
type_hint = self._parse_type_hint(type_hint_str)
|
|
639
|
+
if type_hint is not None:
|
|
640
|
+
self.vvar_type_hints[loc.varid] = type_hint
|
|
641
|
+
# TODO: Handle other types of locations
|
|
642
|
+
|
|
643
|
+
def _parse_type_hint(self, type_hint_str: str) -> TypeConstant | None:
|
|
644
|
+
ty = SIM_TYPE_COLLECTIONS["cpp::std"].get(type_hint_str)
|
|
645
|
+
if ty is None:
|
|
646
|
+
return None
|
|
647
|
+
ty = ty.with_arch(self.project.arch)
|
|
648
|
+
lifted = TypeLifter(self.project.arch.bits).lift(ty)
|
|
649
|
+
return None if isinstance(lifted, (BottomType, TopType)) else lifted
|
|
650
|
+
|
|
620
651
|
|
|
621
652
|
AnalysesHub.register_default("VariableRecoveryFast", VariableRecoveryFast)
|