angr 9.2.148__py3-none-macosx_11_0_arm64.whl → 9.2.149__py3-none-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/calling_convention/calling_convention.py +42 -2
- angr/analyses/cfg/cfg_emulated.py +5 -2
- angr/analyses/cfg/cfg_fast.py +48 -46
- angr/analyses/decompiler/ail_simplifier.py +65 -32
- angr/analyses/decompiler/block_simplifier.py +20 -6
- angr/analyses/decompiler/clinic.py +80 -13
- angr/analyses/decompiler/dephication/rewriting_engine.py +24 -2
- angr/analyses/decompiler/optimization_passes/__init__.py +5 -0
- angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +15 -13
- angr/analyses/decompiler/optimization_passes/determine_load_sizes.py +64 -0
- angr/analyses/decompiler/optimization_passes/eager_std_string_concatenation.py +165 -0
- angr/analyses/decompiler/optimization_passes/engine_base.py +11 -2
- angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +17 -2
- angr/analyses/decompiler/optimization_passes/optimization_pass.py +10 -6
- angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +99 -30
- angr/analyses/decompiler/peephole_optimizations/__init__.py +6 -0
- angr/analyses/decompiler/peephole_optimizations/base.py +43 -3
- angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +1 -1
- angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +3 -0
- angr/analyses/decompiler/peephole_optimizations/inlined_strcpy_consolidation.py +4 -1
- angr/analyses/decompiler/peephole_optimizations/remove_cxx_destructor_calls.py +32 -0
- angr/analyses/decompiler/peephole_optimizations/remove_redundant_bitmasks.py +69 -2
- angr/analyses/decompiler/peephole_optimizations/rewrite_conv_mul.py +40 -0
- angr/analyses/decompiler/peephole_optimizations/rewrite_cxx_operator_calls.py +90 -0
- angr/analyses/decompiler/presets/fast.py +2 -0
- angr/analyses/decompiler/presets/full.py +2 -0
- angr/analyses/decompiler/ssailification/rewriting_engine.py +51 -4
- angr/analyses/decompiler/ssailification/ssailification.py +23 -3
- angr/analyses/decompiler/ssailification/traversal_engine.py +15 -1
- angr/analyses/decompiler/structured_codegen/c.py +141 -10
- angr/analyses/decompiler/utils.py +6 -1
- angr/analyses/s_reaching_definitions/s_rda_view.py +1 -0
- angr/analyses/typehoon/lifter.py +20 -0
- angr/analyses/typehoon/simple_solver.py +42 -9
- angr/analyses/typehoon/translator.py +4 -1
- angr/analyses/typehoon/typeconsts.py +17 -6
- angr/analyses/typehoon/typehoon.py +21 -5
- angr/analyses/variable_recovery/engine_ail.py +44 -5
- angr/analyses/variable_recovery/engine_base.py +35 -12
- angr/analyses/variable_recovery/variable_recovery_fast.py +33 -2
- angr/calling_conventions.py +23 -5
- angr/engines/light/engine.py +7 -0
- angr/knowledge_plugins/functions/function.py +68 -0
- angr/knowledge_plugins/propagations/states.py +5 -2
- angr/knowledge_plugins/variables/variable_manager.py +3 -3
- angr/lib/angr_native.dylib +0 -0
- angr/procedures/definitions/__init__.py +1 -1
- angr/procedures/definitions/types_stl.py +22 -0
- angr/sim_type.py +251 -130
- {angr-9.2.148.dist-info → angr-9.2.149.dist-info}/METADATA +7 -7
- {angr-9.2.148.dist-info → angr-9.2.149.dist-info}/RECORD +56 -50
- {angr-9.2.148.dist-info → angr-9.2.149.dist-info}/WHEEL +1 -1
- {angr-9.2.148.dist-info → angr-9.2.149.dist-info}/licenses/LICENSE +3 -0
- {angr-9.2.148.dist-info → angr-9.2.149.dist-info}/entry_points.txt +0 -0
- {angr-9.2.148.dist-info → angr-9.2.149.dist-info}/top_level.txt +0 -0
angr/analyses/typehoon/lifter.py
CHANGED
|
@@ -13,6 +13,7 @@ from angr.sim_type import (
|
|
|
13
13
|
SimTypeArray,
|
|
14
14
|
SimTypeFloat,
|
|
15
15
|
SimTypeDouble,
|
|
16
|
+
SimCppClass,
|
|
16
17
|
)
|
|
17
18
|
from .typeconsts import BottomType, Int8, Int16, Int32, Int64, Pointer32, Pointer64, Struct, Array, Float32, Float64
|
|
18
19
|
|
|
@@ -77,6 +78,24 @@ class TypeLifter:
|
|
|
77
78
|
obj.field_names = field_names
|
|
78
79
|
return obj
|
|
79
80
|
|
|
81
|
+
def _lift_SimCppClass(self, ty: SimCppClass) -> TypeConstant | BottomType:
|
|
82
|
+
if ty in self.memo:
|
|
83
|
+
return BottomType()
|
|
84
|
+
|
|
85
|
+
obj = Struct(fields={}, name=ty.name, is_cppclass=True)
|
|
86
|
+
self.memo[ty] = obj
|
|
87
|
+
converted_fields = {}
|
|
88
|
+
field_names = {}
|
|
89
|
+
ty_offsets = ty.offsets
|
|
90
|
+
for field_name, simtype in ty.members.items():
|
|
91
|
+
if field_name not in ty_offsets:
|
|
92
|
+
return BottomType()
|
|
93
|
+
converted_fields[ty_offsets[field_name]] = self.lift(simtype)
|
|
94
|
+
field_names[ty_offsets[field_name]] = field_name
|
|
95
|
+
obj.fields = converted_fields
|
|
96
|
+
obj.field_names = field_names
|
|
97
|
+
return obj
|
|
98
|
+
|
|
80
99
|
def _lift_SimTypeArray(self, ty: SimTypeArray) -> Array:
|
|
81
100
|
elem_type = self.lift(ty.elem_type)
|
|
82
101
|
return Array(elem_type, count=ty.length)
|
|
@@ -96,6 +115,7 @@ _mapping = {
|
|
|
96
115
|
SimTypeLongLong: TypeLifter._lift_SimTypeLongLong,
|
|
97
116
|
SimTypePointer: TypeLifter._lift_SimTypePointer,
|
|
98
117
|
SimStruct: TypeLifter._lift_SimStruct,
|
|
118
|
+
SimCppClass: TypeLifter._lift_SimCppClass,
|
|
99
119
|
SimTypeArray: TypeLifter._lift_SimTypeArray,
|
|
100
120
|
SimTypeFloat: TypeLifter._lift_SimTypeFloat,
|
|
101
121
|
SimTypeDouble: TypeLifter._lift_SimTypeDouble,
|
|
@@ -181,7 +181,7 @@ class Sketch:
|
|
|
181
181
|
|
|
182
182
|
def __init__(self, solver: SimpleSolver, root: TypeVariable):
|
|
183
183
|
self.root: SketchNode = SketchNode(root)
|
|
184
|
-
self.graph = networkx.
|
|
184
|
+
self.graph = networkx.MultiDiGraph()
|
|
185
185
|
self.node_mapping: dict[TypeVariable | DerivedTypeVariable, SketchNodeBase] = {}
|
|
186
186
|
self.solver = solver
|
|
187
187
|
|
|
@@ -200,7 +200,7 @@ class Sketch:
|
|
|
200
200
|
for label in typevar.labels:
|
|
201
201
|
succs = []
|
|
202
202
|
for _, dst, data in self.graph.out_edges(node, data=True):
|
|
203
|
-
if "label" in data and data["label"] == label:
|
|
203
|
+
if "label" in data and data["label"] == label and dst not in succs:
|
|
204
204
|
succs.append(dst)
|
|
205
205
|
if len(succs) > 1:
|
|
206
206
|
_l.warning(
|
|
@@ -215,6 +215,11 @@ class Sketch:
|
|
|
215
215
|
return node
|
|
216
216
|
|
|
217
217
|
def add_edge(self, src: SketchNodeBase, dst: SketchNodeBase, label) -> None:
|
|
218
|
+
# ensure the label does not already exist in existing edges
|
|
219
|
+
if self.graph.has_edge(src, dst):
|
|
220
|
+
for data in self.graph.get_edge_data(src, dst).values():
|
|
221
|
+
if "label" in data and data["label"] == label:
|
|
222
|
+
return
|
|
218
223
|
self.graph.add_edge(src, dst, label=label)
|
|
219
224
|
|
|
220
225
|
def add_constraint(self, constraint: TypeConstraint) -> None:
|
|
@@ -315,7 +320,7 @@ class ConstraintGraphNode:
|
|
|
315
320
|
tag_str = "R"
|
|
316
321
|
else:
|
|
317
322
|
tag_str = "U"
|
|
318
|
-
forgotten_str = "PRE" if FORGOTTEN.PRE_FORGOTTEN else "POST"
|
|
323
|
+
forgotten_str = "PRE" if self.forgotten == FORGOTTEN.PRE_FORGOTTEN else "POST"
|
|
319
324
|
s = f"{self.typevar}#{variance_str}.{tag_str}.{forgotten_str}"
|
|
320
325
|
if ":" in s:
|
|
321
326
|
return '"' + s + '"'
|
|
@@ -820,6 +825,7 @@ class SimpleSolver:
|
|
|
820
825
|
"""
|
|
821
826
|
|
|
822
827
|
graph = networkx.DiGraph()
|
|
828
|
+
constraints = self._get_transitive_subtype_constraints(constraints)
|
|
823
829
|
for constraint in constraints:
|
|
824
830
|
if isinstance(constraint, Subtype):
|
|
825
831
|
self._constraint_graph_add_edges(
|
|
@@ -830,6 +836,33 @@ class SimpleSolver:
|
|
|
830
836
|
self._constraint_graph_recall_forget_split(graph)
|
|
831
837
|
return graph
|
|
832
838
|
|
|
839
|
+
@staticmethod
|
|
840
|
+
def _get_transitive_subtype_constraints(constraints: set[TypeConstraint]) -> set[TypeConstraint]:
|
|
841
|
+
"""
|
|
842
|
+
Apply the S-Trans rule: a <: b, b <: c => a <: c
|
|
843
|
+
"""
|
|
844
|
+
tv2supertypes = defaultdict(set)
|
|
845
|
+
for constraint in constraints:
|
|
846
|
+
if isinstance(constraint, Subtype):
|
|
847
|
+
tv2supertypes[constraint.sub_type].add(constraint.super_type)
|
|
848
|
+
|
|
849
|
+
new_constraints = set()
|
|
850
|
+
while True:
|
|
851
|
+
changed = False
|
|
852
|
+
for subtype, supertypes in tv2supertypes.items():
|
|
853
|
+
supertypes_copy = set(supertypes)
|
|
854
|
+
for supertype in supertypes_copy:
|
|
855
|
+
if supertype in tv2supertypes:
|
|
856
|
+
for supertype_ in tv2supertypes[supertype]:
|
|
857
|
+
if supertype_ not in supertypes_copy:
|
|
858
|
+
changed = True
|
|
859
|
+
supertypes.add(supertype_)
|
|
860
|
+
new_constraints.add(Subtype(subtype, supertype_))
|
|
861
|
+
if not changed:
|
|
862
|
+
break
|
|
863
|
+
|
|
864
|
+
return constraints | new_constraints
|
|
865
|
+
|
|
833
866
|
@staticmethod
|
|
834
867
|
def _constraint_graph_add_recall_edges(graph: networkx.DiGraph, node: ConstraintGraphNode) -> None:
|
|
835
868
|
while True:
|
|
@@ -1234,21 +1267,21 @@ class SimpleSolver:
|
|
|
1234
1267
|
offset_to_maxsize[base] = max(offset_to_maxsize[base], (last_label.offset - base) + access_size)
|
|
1235
1268
|
offset_to_sizes[base].add(access_size)
|
|
1236
1269
|
|
|
1237
|
-
|
|
1270
|
+
idx_to_base = {}
|
|
1238
1271
|
|
|
1239
|
-
for labels,
|
|
1272
|
+
for idx, (labels, _) in enumerate(path_and_successors):
|
|
1240
1273
|
last_label = labels[-1] if labels else None
|
|
1241
1274
|
if isinstance(last_label, HasField):
|
|
1242
1275
|
prev_offset = next(offset_to_base.irange(maximum=last_label.offset, reverse=True))
|
|
1243
|
-
|
|
1276
|
+
idx_to_base[idx] = offset_to_base[prev_offset]
|
|
1244
1277
|
|
|
1245
1278
|
node_by_offset = defaultdict(set)
|
|
1246
1279
|
|
|
1247
|
-
for labels, succ in path_and_successors:
|
|
1280
|
+
for idx, (labels, succ) in enumerate(path_and_successors):
|
|
1248
1281
|
last_label = labels[-1] if labels else None
|
|
1249
1282
|
if isinstance(last_label, HasField):
|
|
1250
|
-
if
|
|
1251
|
-
node_by_offset[
|
|
1283
|
+
if idx in idx_to_base:
|
|
1284
|
+
node_by_offset[idx_to_base[idx]].add(succ)
|
|
1252
1285
|
else:
|
|
1253
1286
|
node_by_offset[last_label.offset].add(succ)
|
|
1254
1287
|
|
|
@@ -105,7 +105,10 @@ class TypeTranslator:
|
|
|
105
105
|
|
|
106
106
|
name = tc.name if tc.name else self.struct_name()
|
|
107
107
|
|
|
108
|
-
|
|
108
|
+
if tc.is_cppclass:
|
|
109
|
+
s = sim_type.SimCppClass(name=name).with_arch(self.arch)
|
|
110
|
+
else:
|
|
111
|
+
s = sim_type.SimStruct({}, name=name).with_arch(self.arch)
|
|
109
112
|
self.structs[tc] = s
|
|
110
113
|
|
|
111
114
|
next_offset = 0
|
|
@@ -114,6 +114,18 @@ class Int512(Int):
|
|
|
114
114
|
return "int512"
|
|
115
115
|
|
|
116
116
|
|
|
117
|
+
class IntVar(Int):
|
|
118
|
+
def __init__(self, size):
|
|
119
|
+
self._size = size
|
|
120
|
+
|
|
121
|
+
@property
|
|
122
|
+
def size(self) -> int:
|
|
123
|
+
return self._size
|
|
124
|
+
|
|
125
|
+
def __repr__(self, memo=None):
|
|
126
|
+
return "intvar"
|
|
127
|
+
|
|
128
|
+
|
|
117
129
|
class Float(TypeConstant):
|
|
118
130
|
def __repr__(self, memo=None) -> str:
|
|
119
131
|
return "floatbase"
|
|
@@ -211,10 +223,11 @@ class Array(TypeConstant):
|
|
|
211
223
|
|
|
212
224
|
|
|
213
225
|
class Struct(TypeConstant):
|
|
214
|
-
def __init__(self, fields=None, name=None, field_names=None):
|
|
226
|
+
def __init__(self, fields=None, name=None, field_names=None, is_cppclass: bool = False):
|
|
215
227
|
self.fields = {} if fields is None else fields # offset to type
|
|
216
228
|
self.name = name
|
|
217
229
|
self.field_names = field_names
|
|
230
|
+
self.is_cppclass = is_cppclass
|
|
218
231
|
|
|
219
232
|
def _hash(self, visited: set[int]):
|
|
220
233
|
if id(self) in visited:
|
|
@@ -236,9 +249,9 @@ class Struct(TypeConstant):
|
|
|
236
249
|
|
|
237
250
|
@memoize
|
|
238
251
|
def __repr__(self, memo=None):
|
|
239
|
-
prefix = "struct"
|
|
252
|
+
prefix = "CppClass" if self.is_cppclass else "struct"
|
|
240
253
|
if self.name:
|
|
241
|
-
prefix = f"
|
|
254
|
+
prefix = f"{prefix} {self.name}"
|
|
242
255
|
return (
|
|
243
256
|
prefix
|
|
244
257
|
+ "{"
|
|
@@ -312,9 +325,7 @@ def int_type(bits: int) -> Int:
|
|
|
312
325
|
256: Int256,
|
|
313
326
|
512: Int512,
|
|
314
327
|
}
|
|
315
|
-
if bits in mapping
|
|
316
|
-
return mapping[bits]()
|
|
317
|
-
raise TypeError(f"Not a known size of int: {bits}")
|
|
328
|
+
return mapping[bits]() if bits in mapping else IntVar(bits)
|
|
318
329
|
|
|
319
330
|
|
|
320
331
|
def float_type(bits: int) -> Float | None:
|
|
@@ -10,7 +10,7 @@ from angr.sim_variable import SimVariable, SimStackVariable
|
|
|
10
10
|
from .simple_solver import SimpleSolver
|
|
11
11
|
from .translator import TypeTranslator
|
|
12
12
|
from .typeconsts import Struct, Pointer, TypeConstant, Array, TopType
|
|
13
|
-
from .typevars import Equivalence, Subtype, TypeVariable
|
|
13
|
+
from .typevars import Equivalence, Subtype, TypeVariable, DerivedTypeVariable
|
|
14
14
|
|
|
15
15
|
if TYPE_CHECKING:
|
|
16
16
|
from angr.sim_type import SimType
|
|
@@ -187,6 +187,10 @@ class Typehoon(Analysis):
|
|
|
187
187
|
if self._ground_truth and self.simtypes_solution is not None:
|
|
188
188
|
self.simtypes_solution.update(self._ground_truth)
|
|
189
189
|
|
|
190
|
+
@staticmethod
|
|
191
|
+
def _resolve_derived(tv):
|
|
192
|
+
return tv.type_var if isinstance(tv, DerivedTypeVariable) else tv
|
|
193
|
+
|
|
190
194
|
def _solve(self):
|
|
191
195
|
typevars = set()
|
|
192
196
|
if self._var_mapping:
|
|
@@ -198,9 +202,10 @@ class Typehoon(Analysis):
|
|
|
198
202
|
for constraint in self._constraints[self.func_var]:
|
|
199
203
|
if isinstance(constraint, Subtype):
|
|
200
204
|
if isinstance(constraint.sub_type, TypeVariable):
|
|
201
|
-
typevars.add(constraint.sub_type)
|
|
205
|
+
typevars.add(self._resolve_derived(constraint.sub_type))
|
|
202
206
|
if isinstance(constraint.super_type, TypeVariable):
|
|
203
|
-
typevars.add(constraint.super_type)
|
|
207
|
+
typevars.add(self._resolve_derived(constraint.super_type))
|
|
208
|
+
|
|
204
209
|
solver = SimpleSolver(self.bits, self._constraints, typevars, stackvar_max_sizes=self._stackvar_max_sizes)
|
|
205
210
|
self.solution = solver.solution
|
|
206
211
|
|
|
@@ -214,13 +219,16 @@ class Typehoon(Analysis):
|
|
|
214
219
|
if not self.solution:
|
|
215
220
|
return
|
|
216
221
|
|
|
222
|
+
memo = set()
|
|
217
223
|
for tv in list(self.solution.keys()):
|
|
218
224
|
if self._must_struct and tv in self._must_struct:
|
|
219
225
|
continue
|
|
220
226
|
sol = self.solution[tv]
|
|
221
|
-
specialized = self._specialize_struct(sol)
|
|
227
|
+
specialized = self._specialize_struct(sol, memo=memo)
|
|
222
228
|
if specialized is not None:
|
|
223
229
|
self.solution[tv] = specialized
|
|
230
|
+
else:
|
|
231
|
+
memo.add(sol)
|
|
224
232
|
|
|
225
233
|
def _specialize_struct(self, tc, memo: set | None = None):
|
|
226
234
|
if isinstance(tc, Pointer):
|
|
@@ -240,7 +248,11 @@ class Typehoon(Analysis):
|
|
|
240
248
|
return field0
|
|
241
249
|
|
|
242
250
|
# are all fields the same?
|
|
243
|
-
if
|
|
251
|
+
if (
|
|
252
|
+
len(tc.fields) > 1
|
|
253
|
+
and not self._is_pointer_to(field0, tc)
|
|
254
|
+
and all(tc.fields[off] == field0 for off in offsets)
|
|
255
|
+
):
|
|
244
256
|
# are all fields aligned properly?
|
|
245
257
|
try:
|
|
246
258
|
alignment = field0.size
|
|
@@ -257,6 +269,10 @@ class Typehoon(Analysis):
|
|
|
257
269
|
|
|
258
270
|
return None
|
|
259
271
|
|
|
272
|
+
@staticmethod
|
|
273
|
+
def _is_pointer_to(pointer_to: TypeConstant, base_type: TypeConstant) -> bool:
|
|
274
|
+
return isinstance(pointer_to, Pointer) and pointer_to.basetype == base_type
|
|
275
|
+
|
|
260
276
|
def _translate_to_simtypes(self):
|
|
261
277
|
"""
|
|
262
278
|
Translate solutions in type variables to solutions in SimTypes.
|
|
@@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, cast
|
|
|
4
4
|
import logging
|
|
5
5
|
|
|
6
6
|
import ailment
|
|
7
|
+
from ailment.constant import UNDETERMINED_SIZE
|
|
7
8
|
import claripy
|
|
8
9
|
from unique_log_filter import UniqueLogFilter
|
|
9
10
|
|
|
@@ -30,8 +31,15 @@ class SimEngineVRAIL(
|
|
|
30
31
|
The engine for variable recovery on AIL.
|
|
31
32
|
"""
|
|
32
33
|
|
|
33
|
-
def __init__(
|
|
34
|
-
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
*args,
|
|
37
|
+
call_info=None,
|
|
38
|
+
vvar_to_vvar: dict[int, int] | None,
|
|
39
|
+
vvar_type_hints: dict[int, typeconsts.TypeConstant] | None = None,
|
|
40
|
+
**kwargs,
|
|
41
|
+
):
|
|
42
|
+
super().__init__(*args, vvar_type_hints=vvar_type_hints, **kwargs)
|
|
35
43
|
|
|
36
44
|
self._reference_spoffset: bool = False
|
|
37
45
|
self.call_info = call_info or {}
|
|
@@ -100,6 +108,13 @@ class SimEngineVRAIL(
|
|
|
100
108
|
else:
|
|
101
109
|
l.warning("Unsupported dst type %s.", dst_type)
|
|
102
110
|
|
|
111
|
+
def _handle_stmt_WeakAssignment(self, stmt) -> None:
|
|
112
|
+
src = self._expr(stmt.src)
|
|
113
|
+
dst = self._expr(stmt.dst)
|
|
114
|
+
if isinstance(src, RichR) and isinstance(dst, RichR) and src.typevar is not None and dst.typevar is not None:
|
|
115
|
+
tc = typevars.Subtype(src.typevar, dst.typevar)
|
|
116
|
+
self.state.add_type_constraint(tc)
|
|
117
|
+
|
|
103
118
|
def _handle_stmt_Store(self, stmt: ailment.Stmt.Store):
|
|
104
119
|
addr_r = self._expr_bv(stmt.addr)
|
|
105
120
|
data = self._expr(stmt.data)
|
|
@@ -325,7 +340,9 @@ class SimEngineVRAIL(
|
|
|
325
340
|
addr_r = self._expr_bv(expr.addr)
|
|
326
341
|
size = expr.size
|
|
327
342
|
|
|
328
|
-
|
|
343
|
+
if size != UNDETERMINED_SIZE:
|
|
344
|
+
return self._load(addr_r, size, expr=expr)
|
|
345
|
+
return self._top(8)
|
|
329
346
|
|
|
330
347
|
def _handle_expr_VirtualVariable(self, expr: ailment.Expr.VirtualVariable):
|
|
331
348
|
return self._read_from_vvar(expr, expr=expr, vvar_id=self._mapped_vvarid(expr.varid))
|
|
@@ -419,6 +436,29 @@ class SimEngineVRAIL(
|
|
|
419
436
|
self._reference(richr, codeloc, src=expr)
|
|
420
437
|
return richr
|
|
421
438
|
|
|
439
|
+
def _handle_unop_Reference(self, expr: ailment.Expr.UnaryOp):
|
|
440
|
+
if isinstance(expr.operand, ailment.Expr.VirtualVariable) and expr.operand.was_stack:
|
|
441
|
+
off = expr.operand.stack_offset
|
|
442
|
+
refbase_typevar = self.state.stack_offset_typevars.get(off, None)
|
|
443
|
+
if refbase_typevar is None:
|
|
444
|
+
# allocate a new type variable
|
|
445
|
+
refbase_typevar = typevars.TypeVariable()
|
|
446
|
+
self.state.stack_offset_typevars[off] = refbase_typevar
|
|
447
|
+
|
|
448
|
+
ref_typevar = typevars.TypeVariable()
|
|
449
|
+
access_derived_typevar = self._create_access_typevar(ref_typevar, False, None, 0)
|
|
450
|
+
load_constraint = typevars.Subtype(refbase_typevar, access_derived_typevar)
|
|
451
|
+
self.state.add_type_constraint(load_constraint)
|
|
452
|
+
|
|
453
|
+
value_v = self.state.stack_address(off)
|
|
454
|
+
richr = RichR(value_v, typevar=ref_typevar)
|
|
455
|
+
codeloc = self._codeloc()
|
|
456
|
+
self._ensure_variable_existence(richr, codeloc, src_expr=expr.operand)
|
|
457
|
+
if self._reference_spoffset:
|
|
458
|
+
self._reference(richr, codeloc, src=expr.operand)
|
|
459
|
+
return richr
|
|
460
|
+
return RichR(self.state.top(expr.bits))
|
|
461
|
+
|
|
422
462
|
def _handle_expr_BasePointerOffset(self, expr):
|
|
423
463
|
# TODO
|
|
424
464
|
return self._top(expr.bits)
|
|
@@ -433,7 +473,7 @@ class SimEngineVRAIL(
|
|
|
433
473
|
def _handle_binop_Add(self, expr):
|
|
434
474
|
arg0, arg1 = expr.operands
|
|
435
475
|
r0, r1 = self._expr_pair(arg0, arg1)
|
|
436
|
-
compute = r0.data + r1.data # type: ignore
|
|
476
|
+
compute = r0.data + r1.data if r0.data.size() == r1.data.size() else self.state.top(expr.bits) # type: ignore
|
|
437
477
|
|
|
438
478
|
type_constraints = set()
|
|
439
479
|
# create a new type variable and add constraints accordingly
|
|
@@ -844,7 +884,6 @@ class SimEngineVRAIL(
|
|
|
844
884
|
self._expr(expr.operands[0])
|
|
845
885
|
return RichR(self.state.top(expr.bits))
|
|
846
886
|
|
|
847
|
-
_handle_unop_Reference = _handle_unop_Default
|
|
848
887
|
_handle_unop_Dereference = _handle_unop_Default
|
|
849
888
|
_handle_unop_Clz = _handle_unop_Default
|
|
850
889
|
_handle_unop_Ctz = _handle_unop_Default
|
|
@@ -70,9 +70,12 @@ class SimEngineVRBase(
|
|
|
70
70
|
and storing data.
|
|
71
71
|
"""
|
|
72
72
|
|
|
73
|
-
def __init__(self, project, kb):
|
|
73
|
+
def __init__(self, project, kb, vvar_type_hints: dict[int, typeconsts.TypeConstant] | None = None):
|
|
74
74
|
super().__init__(project)
|
|
75
75
|
|
|
76
|
+
self.vvar_type_hints: dict[int, typeconsts.TypeConstant] = (
|
|
77
|
+
vvar_type_hints if vvar_type_hints is not None else {}
|
|
78
|
+
)
|
|
76
79
|
self.kb = kb
|
|
77
80
|
self.vvar_region: dict[int, Any] = {}
|
|
78
81
|
|
|
@@ -453,13 +456,19 @@ class SimEngineVRBase(
|
|
|
453
456
|
# assign a new type variable to it
|
|
454
457
|
typevar = typevars.TypeVariable()
|
|
455
458
|
self.state.typevars.add_type_variable(variable, typevar)
|
|
456
|
-
# create constraints
|
|
457
459
|
else:
|
|
458
460
|
typevar = self.state.typevars.get_type_variable(variable)
|
|
461
|
+
|
|
462
|
+
# create constraints accordingly
|
|
463
|
+
|
|
459
464
|
self.state.add_type_constraint(typevars.Subtype(richr.typevar, typevar))
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
465
|
+
if vvar.varid in self.vvar_type_hints:
|
|
466
|
+
# handle type hints
|
|
467
|
+
self.state.add_type_constraint(typevars.Subtype(typevar, self.vvar_type_hints[vvar.varid]))
|
|
468
|
+
else:
|
|
469
|
+
# the constraint below is a default constraint that may conflict with more specific ones with different
|
|
470
|
+
# sizes; we post-process at the very end of VRA to remove conflicting default constraints.
|
|
471
|
+
self.state.add_type_constraint(typevars.Subtype(typevar, typeconsts.int_type(variable.size * 8)))
|
|
463
472
|
|
|
464
473
|
return variable
|
|
465
474
|
|
|
@@ -978,14 +987,22 @@ class SimEngineVRBase(
|
|
|
978
987
|
value = self.state.top(size * self.project.arch.byte_width)
|
|
979
988
|
if create_variable:
|
|
980
989
|
# create a new variable if necessary
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
990
|
+
|
|
991
|
+
# check if there is an existing variable for the atom at this location already
|
|
992
|
+
existing_vars: set[tuple[SimVariable, int]] = self.state.variable_manager[
|
|
993
|
+
self.func_addr
|
|
994
|
+
].find_variables_by_atom(self.block.addr, self.stmt_idx, expr)
|
|
995
|
+
if not existing_vars:
|
|
996
|
+
variable = SimRegisterVariable(
|
|
997
|
+
offset,
|
|
998
|
+
size if force_variable_size is None else force_variable_size,
|
|
999
|
+
ident=self.state.variable_manager[self.func_addr].next_variable_ident("register"),
|
|
1000
|
+
region=self.func_addr,
|
|
1001
|
+
)
|
|
1002
|
+
self.state.variable_manager[self.func_addr].add_variable("register", offset, variable)
|
|
1003
|
+
else:
|
|
1004
|
+
variable = next(iter(existing_vars))[0]
|
|
987
1005
|
value = self.state.annotate_with_variables(value, [(0, variable)])
|
|
988
|
-
self.state.variable_manager[self.func_addr].add_variable("register", offset, variable)
|
|
989
1006
|
self.state.register_region.store(offset, value)
|
|
990
1007
|
value_list = [{value}]
|
|
991
1008
|
else:
|
|
@@ -1131,6 +1148,12 @@ class SimEngineVRBase(
|
|
|
1131
1148
|
if var is not None and var.size != vvar.size:
|
|
1132
1149
|
# ignore the variable and the associated type if we are only reading part of the variable
|
|
1133
1150
|
return RichR(value, variable=var)
|
|
1151
|
+
|
|
1152
|
+
# handle type hints
|
|
1153
|
+
if vvar.varid in self.vvar_type_hints:
|
|
1154
|
+
assert isinstance(typevar, typevars.TypeVariable)
|
|
1155
|
+
self.state.add_type_constraint(typevars.Subtype(typevar, self.vvar_type_hints[vvar.varid]))
|
|
1156
|
+
|
|
1134
1157
|
return RichR(value, variable=var, typevar=typevar)
|
|
1135
1158
|
|
|
1136
1159
|
def _create_access_typevar(
|
|
@@ -12,21 +12,25 @@ import ailment
|
|
|
12
12
|
from ailment.expression import VirtualVariable
|
|
13
13
|
|
|
14
14
|
import angr.errors
|
|
15
|
+
from angr import SIM_TYPE_COLLECTIONS
|
|
15
16
|
from angr.analyses import AnalysesHub
|
|
16
17
|
from angr.storage.memory_mixins.paged_memory.pages.multi_values import MultiValues
|
|
17
18
|
from angr.block import Block
|
|
18
19
|
from angr.errors import AngrVariableRecoveryError, SimEngineError
|
|
19
20
|
from angr.knowledge_plugins import Function
|
|
21
|
+
from angr.knowledge_plugins.key_definitions import atoms
|
|
20
22
|
from angr.sim_variable import SimStackVariable, SimRegisterVariable, SimVariable, SimMemoryVariable
|
|
21
23
|
from angr.engines.vex.claripy.irop import vexop_to_simop
|
|
22
24
|
from angr.analyses import ForwardAnalysis, visitors
|
|
23
25
|
from angr.analyses.typehoon.typevars import Equivalence, TypeVariable, TypeVariables, Subtype, DerivedTypeVariable
|
|
24
|
-
from angr.analyses.typehoon.typeconsts import Int
|
|
26
|
+
from angr.analyses.typehoon.typeconsts import Int, TypeConstant, BottomType, TopType
|
|
27
|
+
from angr.analyses.typehoon.lifter import TypeLifter
|
|
25
28
|
from .variable_recovery_base import VariableRecoveryBase, VariableRecoveryStateBase
|
|
26
29
|
from .engine_vex import SimEngineVRVEX
|
|
27
30
|
from .engine_ail import SimEngineVRAIL
|
|
28
31
|
import contextlib
|
|
29
32
|
|
|
33
|
+
|
|
30
34
|
if TYPE_CHECKING:
|
|
31
35
|
from angr.analyses.typehoon.typevars import TypeConstraint
|
|
32
36
|
|
|
@@ -241,6 +245,7 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
|
|
|
241
245
|
unify_variables=True,
|
|
242
246
|
func_arg_vvars: dict[int, tuple[VirtualVariable, SimVariable]] | None = None,
|
|
243
247
|
vvar_to_vvar: dict[int, int] | None = None,
|
|
248
|
+
type_hints: list[tuple[atoms.VirtualVariable | atoms.MemoryLocation, str]] | None = None,
|
|
244
249
|
):
|
|
245
250
|
if not isinstance(func, Function):
|
|
246
251
|
func = self.kb.functions[func]
|
|
@@ -269,8 +274,17 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
|
|
|
269
274
|
self._func_arg_vvars = func_arg_vvars
|
|
270
275
|
self._unify_variables = unify_variables
|
|
271
276
|
|
|
277
|
+
# handle type hints
|
|
278
|
+
self.vvar_type_hints = {}
|
|
279
|
+
if type_hints:
|
|
280
|
+
self._parse_type_hints(type_hints)
|
|
281
|
+
|
|
272
282
|
self._ail_engine: SimEngineVRAIL = SimEngineVRAIL(
|
|
273
|
-
self.project,
|
|
283
|
+
self.project,
|
|
284
|
+
self.kb,
|
|
285
|
+
call_info=call_info,
|
|
286
|
+
vvar_to_vvar=self.vvar_to_vvar,
|
|
287
|
+
vvar_type_hints=self.vvar_type_hints,
|
|
274
288
|
)
|
|
275
289
|
self._vex_engine: SimEngineVRVEX = SimEngineVRVEX(self.project, self.kb, call_info=call_info)
|
|
276
290
|
|
|
@@ -617,5 +631,22 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
|
|
|
617
631
|
if adjusted:
|
|
618
632
|
state.register_region.store(self.project.arch.sp_offset, sp_v)
|
|
619
633
|
|
|
634
|
+
def _parse_type_hints(self, type_hints: list[tuple[atoms.VirtualVariable | atoms.MemoryLocation, str]]) -> None:
|
|
635
|
+
self.vvar_type_hints = {}
|
|
636
|
+
for loc, type_hint_str in type_hints:
|
|
637
|
+
if isinstance(loc, atoms.VirtualVariable):
|
|
638
|
+
type_hint = self._parse_type_hint(type_hint_str)
|
|
639
|
+
if type_hint is not None:
|
|
640
|
+
self.vvar_type_hints[loc.varid] = type_hint
|
|
641
|
+
# TODO: Handle other types of locations
|
|
642
|
+
|
|
643
|
+
def _parse_type_hint(self, type_hint_str: str) -> TypeConstant | None:
|
|
644
|
+
ty = SIM_TYPE_COLLECTIONS["cpp::std"].get(type_hint_str)
|
|
645
|
+
if ty is None:
|
|
646
|
+
return None
|
|
647
|
+
ty = ty.with_arch(self.project.arch)
|
|
648
|
+
lifted = TypeLifter(self.project.arch.bits).lift(ty)
|
|
649
|
+
return None if isinstance(lifted, (BottomType, TopType)) else lifted
|
|
650
|
+
|
|
620
651
|
|
|
621
652
|
AnalysesHub.register_default("VariableRecoveryFast", VariableRecoveryFast)
|
angr/calling_conventions.py
CHANGED
|
@@ -1196,7 +1196,7 @@ class SimCC:
|
|
|
1196
1196
|
|
|
1197
1197
|
@staticmethod
|
|
1198
1198
|
def find_cc(
|
|
1199
|
-
arch: archinfo.Arch, args: list[SimRegArg | SimStackArg], sp_delta: int, platform: str = "Linux"
|
|
1199
|
+
arch: archinfo.Arch, args: list[SimRegArg | SimStackArg], sp_delta: int, platform: str | None = "Linux"
|
|
1200
1200
|
) -> SimCC | None:
|
|
1201
1201
|
"""
|
|
1202
1202
|
Pinpoint the best-fit calling convention and return the corresponding SimCC instance, or None if no fit is
|
|
@@ -1335,6 +1335,21 @@ class SimCCMicrosoftCdecl(SimCCCdecl):
|
|
|
1335
1335
|
STRUCT_RETURN_THRESHOLD = 64
|
|
1336
1336
|
|
|
1337
1337
|
|
|
1338
|
+
class SimCCMicrosoftThiscall(SimCCCdecl):
|
|
1339
|
+
CALLEE_CLEANUP = True
|
|
1340
|
+
ARG_REGS = ["ecx"]
|
|
1341
|
+
CALLER_SAVED_REGS = ["eax", "ecx", "edx"]
|
|
1342
|
+
STRUCT_RETURN_THRESHOLD = 64
|
|
1343
|
+
|
|
1344
|
+
def arg_locs(self, prototype) -> list[SimFunctionArgument]:
|
|
1345
|
+
if prototype._arch is None:
|
|
1346
|
+
prototype = prototype.with_arch(self.arch)
|
|
1347
|
+
session = self.arg_session(prototype.returnty)
|
|
1348
|
+
if not prototype.args:
|
|
1349
|
+
return []
|
|
1350
|
+
return [SimRegArg("ecx", self.arch.bytes)] + [self.next_arg(session, arg_ty) for arg_ty in prototype.args[1:]]
|
|
1351
|
+
|
|
1352
|
+
|
|
1338
1353
|
class SimCCStdcall(SimCCMicrosoftCdecl):
|
|
1339
1354
|
CALLEE_CLEANUP = True
|
|
1340
1355
|
|
|
@@ -1469,7 +1484,7 @@ class SimCCSyscall(SimCC):
|
|
|
1469
1484
|
self.ERROR_REG.set_value(state, error_reg_val)
|
|
1470
1485
|
return expr
|
|
1471
1486
|
|
|
1472
|
-
def set_return_val(self, state, val, ty, **kwargs): # pylint:disable=arguments-differ
|
|
1487
|
+
def set_return_val(self, state, val, ty, **kwargs): # type:ignore # pylint:disable=arguments-differ
|
|
1473
1488
|
if self.ERROR_REG is not None:
|
|
1474
1489
|
val = self.linux_syscall_update_error_reg(state, val)
|
|
1475
1490
|
super().set_return_val(state, val, ty, **kwargs)
|
|
@@ -1607,6 +1622,7 @@ class SimCCSystemVAMD64(SimCC):
|
|
|
1607
1622
|
classification = self._classify(ty)
|
|
1608
1623
|
if any(cls == "MEMORY" for cls in classification):
|
|
1609
1624
|
assert all(cls == "MEMORY" for cls in classification)
|
|
1625
|
+
assert ty.size is not None
|
|
1610
1626
|
byte_size = ty.size // self.arch.byte_width
|
|
1611
1627
|
referenced_locs = [SimStackArg(offset, self.arch.bytes) for offset in range(0, byte_size, self.arch.bytes)]
|
|
1612
1628
|
referenced_loc = refine_locs_with_struct_type(self.arch, referenced_locs, ty)
|
|
@@ -1645,6 +1661,7 @@ class SimCCSystemVAMD64(SimCC):
|
|
|
1645
1661
|
if isinstance(ty, (SimTypeFloat,)):
|
|
1646
1662
|
return ["SSE"] + ["SSEUP"] * (nchunks - 1)
|
|
1647
1663
|
if isinstance(ty, (SimStruct, SimTypeFixedSizeArray, SimUnion)):
|
|
1664
|
+
assert ty.size is not None
|
|
1648
1665
|
if ty.size > 512:
|
|
1649
1666
|
return ["MEMORY"] * nchunks
|
|
1650
1667
|
flattened = self._flatten(ty)
|
|
@@ -1723,7 +1740,7 @@ class SimCCAMD64LinuxSyscall(SimCCSyscall):
|
|
|
1723
1740
|
CALLER_SAVED_REGS = ["rax", "rcx", "r11"]
|
|
1724
1741
|
|
|
1725
1742
|
@staticmethod
|
|
1726
|
-
def _match(arch, args, sp_delta): # pylint: disable=unused-argument
|
|
1743
|
+
def _match(arch, args, sp_delta): # type:ignore # pylint: disable=unused-argument
|
|
1727
1744
|
# doesn't appear anywhere but syscalls
|
|
1728
1745
|
return False
|
|
1729
1746
|
|
|
@@ -1855,6 +1872,7 @@ class SimCCARM(SimCC):
|
|
|
1855
1872
|
for suboffset, subsubty_list in subresult.items():
|
|
1856
1873
|
result[offset + suboffset] += subsubty_list
|
|
1857
1874
|
elif isinstance(ty, SimTypeFixedSizeArray):
|
|
1875
|
+
assert ty.elem_type.size is not None
|
|
1858
1876
|
subresult = self._flatten(ty.elem_type)
|
|
1859
1877
|
if subresult is None:
|
|
1860
1878
|
return None
|
|
@@ -2273,7 +2291,7 @@ class SimCCUnknown(SimCC):
|
|
|
2273
2291
|
"""
|
|
2274
2292
|
|
|
2275
2293
|
@staticmethod
|
|
2276
|
-
def _match(arch, args, sp_delta): # pylint: disable=unused-argument
|
|
2294
|
+
def _match(arch, args, sp_delta): # type:ignore # pylint: disable=unused-argument
|
|
2277
2295
|
# It always returns True
|
|
2278
2296
|
return True
|
|
2279
2297
|
|
|
@@ -2317,7 +2335,7 @@ CC: dict[str, dict[str, list[type[SimCC]]]] = {
|
|
|
2317
2335
|
"default": [SimCCCdecl],
|
|
2318
2336
|
"Linux": [SimCCCdecl],
|
|
2319
2337
|
"CGC": [SimCCCdecl],
|
|
2320
|
-
"Win32": [SimCCMicrosoftCdecl, SimCCMicrosoftFastcall],
|
|
2338
|
+
"Win32": [SimCCMicrosoftCdecl, SimCCMicrosoftFastcall, SimCCMicrosoftThiscall],
|
|
2321
2339
|
},
|
|
2322
2340
|
"ARMEL": {
|
|
2323
2341
|
"default": [SimCCARM],
|
angr/engines/light/engine.py
CHANGED
|
@@ -533,6 +533,7 @@ class SimEngineLightAIL(
|
|
|
533
533
|
def __init__(self, *args, **kwargs):
|
|
534
534
|
self._stmt_handlers: dict[str, Callable[[Any], StmtDataType]] = {
|
|
535
535
|
"Assignment": self._handle_stmt_Assignment,
|
|
536
|
+
"WeakAssignment": self._handle_stmt_WeakAssignment,
|
|
536
537
|
"Store": self._handle_stmt_Store,
|
|
537
538
|
"Jump": self._handle_stmt_Jump,
|
|
538
539
|
"ConditionalJump": self._handle_stmt_ConditionalJump,
|
|
@@ -697,6 +698,9 @@ class SimEngineLightAIL(
|
|
|
697
698
|
@abstractmethod
|
|
698
699
|
def _handle_stmt_Assignment(self, stmt: ailment.statement.Assignment) -> StmtDataType: ...
|
|
699
700
|
|
|
701
|
+
@abstractmethod
|
|
702
|
+
def _handle_stmt_WeakAssignment(self, stmt: ailment.statement.WeakAssignment) -> StmtDataType: ...
|
|
703
|
+
|
|
700
704
|
@abstractmethod
|
|
701
705
|
def _handle_stmt_Store(self, stmt: ailment.statement.Store) -> StmtDataType: ...
|
|
702
706
|
|
|
@@ -1006,6 +1010,9 @@ class SimEngineNostmtAIL(
|
|
|
1006
1010
|
def _handle_stmt_Assignment(self, stmt) -> StmtDataType | None:
|
|
1007
1011
|
pass
|
|
1008
1012
|
|
|
1013
|
+
def _handle_stmt_WeakAssignment(self, stmt) -> StmtDataType | None:
|
|
1014
|
+
pass
|
|
1015
|
+
|
|
1009
1016
|
def _handle_stmt_Store(self, stmt) -> StmtDataType | None:
|
|
1010
1017
|
pass
|
|
1011
1018
|
|