angr 9.2.159__cp310-abi3-manylinux2014_aarch64.whl → 9.2.161__cp310-abi3-manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +4 -1
- angr/analyses/decompiler/ail_simplifier.py +81 -1
- angr/analyses/decompiler/block_simplifier.py +7 -5
- angr/analyses/decompiler/clinic.py +5 -1
- angr/analyses/decompiler/decompiler.py +12 -9
- angr/analyses/decompiler/peephole_optimizations/__init__.py +4 -4
- angr/analyses/decompiler/peephole_optimizations/eager_eval.py +53 -0
- angr/analyses/decompiler/peephole_optimizations/modulo_simplifier.py +89 -0
- angr/analyses/decompiler/peephole_optimizations/{const_mull_a_shift.py → optimized_div_simplifier.py} +139 -25
- angr/analyses/decompiler/peephole_optimizations/remove_redundant_bitmasks.py +18 -9
- angr/analyses/decompiler/structuring/phoenix.py +19 -32
- angr/analyses/s_reaching_definitions/s_rda_model.py +1 -0
- angr/analyses/s_reaching_definitions/s_reaching_definitions.py +5 -2
- angr/analyses/typehoon/simple_solver.py +231 -29
- angr/analyses/typehoon/typehoon.py +10 -2
- angr/analyses/variable_recovery/engine_ail.py +8 -20
- angr/analyses/variable_recovery/engine_base.py +9 -1
- angr/analyses/variable_recovery/variable_recovery_base.py +30 -2
- angr/analyses/variable_recovery/variable_recovery_fast.py +11 -2
- angr/emulator.py +143 -0
- angr/engines/concrete.py +66 -0
- angr/engines/icicle.py +66 -30
- angr/exploration_techniques/driller_core.py +2 -2
- angr/project.py +7 -0
- angr/rustylib.abi3.so +0 -0
- angr/sim_type.py +16 -8
- angr/state_plugins/unicorn_engine.py +4 -4
- angr/utils/graph.py +20 -1
- angr/utils/ssa/__init__.py +3 -3
- {angr-9.2.159.dist-info → angr-9.2.161.dist-info}/METADATA +5 -6
- {angr-9.2.159.dist-info → angr-9.2.161.dist-info}/RECORD +36 -34
- angr/analyses/decompiler/peephole_optimizations/a_sub_a_div_const_mul_const.py +0 -57
- /angr/{lib/angr_native.so → unicornlib.so} +0 -0
- {angr-9.2.159.dist-info → angr-9.2.161.dist-info}/WHEEL +0 -0
- {angr-9.2.159.dist-info → angr-9.2.161.dist-info}/entry_points.txt +0 -0
- {angr-9.2.159.dist-info → angr-9.2.161.dist-info}/licenses/LICENSE +0 -0
- {angr-9.2.159.dist-info → angr-9.2.161.dist-info}/top_level.txt +0 -0
|
@@ -33,6 +33,7 @@ class RemoveRedundantBitmasks(PeepholeOptimizationExprBase):
|
|
|
33
33
|
def _optimize_BinaryOp(self, expr: BinaryOp):
|
|
34
34
|
# And(expr, full_N_bitmask) ==> expr
|
|
35
35
|
# And(SHR(expr, N), bitmask)) ==> SHR(expr, N)
|
|
36
|
+
# And(Div(Conv(M->N, expr), P), 2 ** M - 1) ==> Div(Conv(M->N, expr), P) where M < N
|
|
36
37
|
# And(Conv(1->N, expr), bitmask) ==> Conv(1->N, expr)
|
|
37
38
|
# And(Conv(1->N, bool_expr), bitmask) ==> Conv(1->N, bool_expr)
|
|
38
39
|
# And(ITE(?, const_expr, const_expr), bitmask) ==> ITE(?, const_expr, const_expr)
|
|
@@ -41,15 +42,23 @@ class RemoveRedundantBitmasks(PeepholeOptimizationExprBase):
|
|
|
41
42
|
if expr.operands[1].value == _MASKS.get(inner_expr.bits, None):
|
|
42
43
|
return inner_expr
|
|
43
44
|
|
|
44
|
-
if isinstance(inner_expr, BinaryOp)
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
45
|
+
if isinstance(inner_expr, BinaryOp):
|
|
46
|
+
if inner_expr.op == "Shr":
|
|
47
|
+
mask = expr.operands[1]
|
|
48
|
+
shift_val = inner_expr.operands[1]
|
|
49
|
+
if (
|
|
50
|
+
isinstance(shift_val, Const)
|
|
51
|
+
and shift_val.value in _MASKS
|
|
52
|
+
and mask.value == _MASKS.get(int(64 - shift_val.value), None)
|
|
53
|
+
):
|
|
54
|
+
return inner_expr
|
|
55
|
+
if inner_expr.op == "Div" and isinstance(inner_expr.operands[0], Convert):
|
|
56
|
+
from_bits = inner_expr.operands[0].from_bits
|
|
57
|
+
to_bits = inner_expr.operands[0].to_bits
|
|
58
|
+
if from_bits < to_bits:
|
|
59
|
+
mask = expr.operands[1]
|
|
60
|
+
if mask.value == _MASKS.get(from_bits):
|
|
61
|
+
return inner_expr
|
|
53
62
|
|
|
54
63
|
if isinstance(inner_expr, Convert) and self.is_bool_expr(inner_expr.operand):
|
|
55
64
|
# useless masking
|
|
@@ -11,7 +11,7 @@ import networkx
|
|
|
11
11
|
import claripy
|
|
12
12
|
from angr.ailment.block import Block
|
|
13
13
|
from angr.ailment.statement import Statement, ConditionalJump, Jump, Label, Return
|
|
14
|
-
from angr.ailment.expression import Const, UnaryOp, MultiStatementExpression
|
|
14
|
+
from angr.ailment.expression import Const, UnaryOp, MultiStatementExpression, BinaryOp
|
|
15
15
|
|
|
16
16
|
from angr.utils.graph import GraphUtils
|
|
17
17
|
from angr.utils.ail import is_phi_assignment, is_head_controlled_loop_block
|
|
@@ -2174,20 +2174,17 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2174
2174
|
if r is not None:
|
|
2175
2175
|
left, left_cond, right, left_right_cond, succ = r
|
|
2176
2176
|
# create the condition node
|
|
2177
|
-
|
|
2177
|
+
left_cond_expr = self.cond_proc.convert_claripy_bool_ast(left_cond)
|
|
2178
|
+
left_cond_expr_neg = UnaryOp(None, "Not", left_cond_expr, ins_addr=start_node.addr)
|
|
2179
|
+
left_right_cond_expr = self.cond_proc.convert_claripy_bool_ast(left_right_cond)
|
|
2178
2180
|
if not self._is_single_statement_block(left):
|
|
2179
2181
|
if not self._should_use_multistmtexprs(left):
|
|
2180
2182
|
return False
|
|
2181
2183
|
# create a MultiStatementExpression for left_right_cond
|
|
2182
2184
|
stmts = self._build_multistatementexpr_statements(left)
|
|
2183
2185
|
assert stmts is not None
|
|
2184
|
-
|
|
2185
|
-
|
|
2186
|
-
)
|
|
2187
|
-
memo[left_right_cond._hash] = mstmt_expr
|
|
2188
|
-
cond = self.cond_proc.convert_claripy_bool_ast(
|
|
2189
|
-
claripy.Or(claripy.Not(left_cond), left_right_cond), memo=memo
|
|
2190
|
-
)
|
|
2186
|
+
left_right_cond_expr = MultiStatementExpression(None, stmts, left_right_cond_expr, ins_addr=left.addr)
|
|
2187
|
+
cond = BinaryOp(None, "LogicalOr", [left_cond_expr_neg, left_right_cond_expr], ins_addr=start_node.addr)
|
|
2191
2188
|
cond_jump = ConditionalJump(
|
|
2192
2189
|
None,
|
|
2193
2190
|
cond,
|
|
@@ -2212,18 +2209,16 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2212
2209
|
if r is not None:
|
|
2213
2210
|
left, left_cond, right, right_left_cond, else_node = r
|
|
2214
2211
|
# create the condition node
|
|
2215
|
-
|
|
2212
|
+
left_cond_expr = self.cond_proc.convert_claripy_bool_ast(left_cond)
|
|
2213
|
+
right_left_cond_expr = self.cond_proc.convert_claripy_bool_ast(right_left_cond)
|
|
2216
2214
|
if not self._is_single_statement_block(right):
|
|
2217
2215
|
if not self._should_use_multistmtexprs(right):
|
|
2218
2216
|
return False
|
|
2219
2217
|
# create a MultiStatementExpression for left_right_cond
|
|
2220
2218
|
stmts = self._build_multistatementexpr_statements(right)
|
|
2221
2219
|
assert stmts is not None
|
|
2222
|
-
|
|
2223
|
-
|
|
2224
|
-
)
|
|
2225
|
-
memo[right_left_cond._hash] = mstmt_expr
|
|
2226
|
-
cond = self.cond_proc.convert_claripy_bool_ast(claripy.Or(left_cond, right_left_cond), memo=memo)
|
|
2220
|
+
right_left_cond_expr = MultiStatementExpression(None, stmts, right_left_cond_expr, ins_addr=left.addr)
|
|
2221
|
+
cond = BinaryOp(None, "LogicalOr", [left_cond_expr, right_left_cond_expr], ins_addr=start_node.addr)
|
|
2227
2222
|
cond_jump = ConditionalJump(
|
|
2228
2223
|
None,
|
|
2229
2224
|
cond,
|
|
@@ -2248,20 +2243,17 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2248
2243
|
if r is not None:
|
|
2249
2244
|
left, left_cond, succ, left_succ_cond, right = r
|
|
2250
2245
|
# create the condition node
|
|
2251
|
-
|
|
2246
|
+
left_cond_expr = self.cond_proc.convert_claripy_bool_ast(left_cond)
|
|
2247
|
+
left_succ_cond_expr = self.cond_proc.convert_claripy_bool_ast(left_succ_cond)
|
|
2252
2248
|
if not self._is_single_statement_block(left):
|
|
2253
2249
|
if not self._should_use_multistmtexprs(left):
|
|
2254
2250
|
return False
|
|
2255
2251
|
# create a MultiStatementExpression for left_right_cond
|
|
2256
2252
|
stmts = self._build_multistatementexpr_statements(left)
|
|
2257
2253
|
assert stmts is not None
|
|
2258
|
-
|
|
2259
|
-
|
|
2260
|
-
|
|
2261
|
-
memo[left_succ_cond._hash] = mstmt_expr
|
|
2262
|
-
cond = self.cond_proc.convert_claripy_bool_ast(
|
|
2263
|
-
claripy.And(left_cond, claripy.Not(left_succ_cond)), memo=memo
|
|
2264
|
-
)
|
|
2254
|
+
left_succ_cond_expr = MultiStatementExpression(None, stmts, left_succ_cond_expr, ins_addr=left.addr)
|
|
2255
|
+
left_succ_cond_expr_neg = UnaryOp(None, "Not", left_succ_cond_expr, ins_addr=start_node.addr)
|
|
2256
|
+
cond = BinaryOp(None, "LogicalAnd", [left_cond_expr, left_succ_cond_expr_neg], ins_addr=start_node.addr)
|
|
2265
2257
|
cond_jump = ConditionalJump(
|
|
2266
2258
|
None,
|
|
2267
2259
|
cond,
|
|
@@ -2285,21 +2277,16 @@ class PhoenixStructurer(StructurerBase):
|
|
|
2285
2277
|
if r is not None:
|
|
2286
2278
|
left, left_cond, right, right_left_cond, else_node = r
|
|
2287
2279
|
# create the condition node
|
|
2288
|
-
|
|
2280
|
+
left_cond_expr = self.cond_proc.convert_claripy_bool_ast(left_cond)
|
|
2281
|
+
left_right_cond_expr = self.cond_proc.convert_claripy_bool_ast(right_left_cond)
|
|
2289
2282
|
if not self._is_single_statement_block(left):
|
|
2290
2283
|
if not self._should_use_multistmtexprs(left):
|
|
2291
2284
|
return False
|
|
2292
2285
|
# create a MultiStatementExpression for left_right_cond
|
|
2293
2286
|
stmts = self._build_multistatementexpr_statements(left)
|
|
2294
2287
|
assert stmts is not None
|
|
2295
|
-
|
|
2296
|
-
|
|
2297
|
-
)
|
|
2298
|
-
memo[right_left_cond._hash] = mstmt_expr
|
|
2299
|
-
cond = self.cond_proc.convert_claripy_bool_ast(
|
|
2300
|
-
claripy.And(left_cond, right_left_cond),
|
|
2301
|
-
memo=memo,
|
|
2302
|
-
)
|
|
2288
|
+
left_right_cond_expr = MultiStatementExpression(None, stmts, left_right_cond_expr, ins_addr=left.addr)
|
|
2289
|
+
cond = BinaryOp(None, "LogicalAnd", [left_cond_expr, left_right_cond_expr], ins_addr=start_node.addr)
|
|
2303
2290
|
cond_jump = ConditionalJump(
|
|
2304
2291
|
None,
|
|
2305
2292
|
cond,
|
|
@@ -25,6 +25,7 @@ class SRDAModel:
|
|
|
25
25
|
self.all_tmp_definitions: dict[CodeLocation, dict[atoms.Tmp, int]] = defaultdict(dict)
|
|
26
26
|
self.all_tmp_uses: dict[CodeLocation, dict[atoms.Tmp, set[tuple[Tmp, int]]]] = defaultdict(dict)
|
|
27
27
|
self.phi_vvar_ids: set[int] = set()
|
|
28
|
+
self.phivarid_to_varids_with_unknown: dict[int, set[int | None]] = {}
|
|
28
29
|
self.phivarid_to_varids: dict[int, set[int]] = {}
|
|
29
30
|
self.vvar_uses_by_loc: dict[CodeLocation, list[int]] = {}
|
|
30
31
|
|
|
@@ -63,7 +63,7 @@ class SReachingDefinitionsAnalysis(Analysis):
|
|
|
63
63
|
case _:
|
|
64
64
|
raise NotImplementedError
|
|
65
65
|
|
|
66
|
-
phi_vvars: dict[int, set[int]] = {}
|
|
66
|
+
phi_vvars: dict[int, set[int | None]] = {}
|
|
67
67
|
# find all vvar definitions
|
|
68
68
|
vvar_deflocs = get_vvar_deflocs(blocks.values(), phi_vvars=phi_vvars)
|
|
69
69
|
# find all explicit vvar uses
|
|
@@ -87,7 +87,10 @@ class SReachingDefinitionsAnalysis(Analysis):
|
|
|
87
87
|
self.model.phi_vvar_ids = set(phi_vvars)
|
|
88
88
|
self.model.phivarid_to_varids = {}
|
|
89
89
|
for vvar_id, src_vvars in phi_vvars.items():
|
|
90
|
-
self.model.
|
|
90
|
+
self.model.phivarid_to_varids_with_unknown[vvar_id] = src_vvars
|
|
91
|
+
self.model.phivarid_to_varids[vvar_id] = ( # type: ignore
|
|
92
|
+
{vvar_id for vvar_id in src_vvars if vvar_id is not None} if None in src_vvars else src_vvars
|
|
93
|
+
)
|
|
91
94
|
|
|
92
95
|
if self.mode == "function":
|
|
93
96
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# pylint:disable=missing-class-docstring
|
|
1
|
+
# pylint:disable=missing-class-docstring,too-many-boolean-expressions
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
import enum
|
|
4
4
|
from collections import defaultdict
|
|
@@ -432,7 +432,14 @@ class SimpleSolver:
|
|
|
432
432
|
improvements.
|
|
433
433
|
"""
|
|
434
434
|
|
|
435
|
-
def __init__(
|
|
435
|
+
def __init__(
|
|
436
|
+
self,
|
|
437
|
+
bits: int,
|
|
438
|
+
constraints,
|
|
439
|
+
typevars,
|
|
440
|
+
constraint_set_degradation_threshold: int = 150,
|
|
441
|
+
stackvar_max_sizes: dict[TypeVariable, int] | None = None,
|
|
442
|
+
):
|
|
436
443
|
if bits not in (32, 64):
|
|
437
444
|
raise ValueError(f"Pointer size {bits} is not supported. Expect 32 or 64.")
|
|
438
445
|
|
|
@@ -440,6 +447,7 @@ class SimpleSolver:
|
|
|
440
447
|
self._constraints: dict[TypeVariable, set[TypeConstraint]] = constraints
|
|
441
448
|
self._typevars: set[TypeVariable] = typevars
|
|
442
449
|
self.stackvar_max_sizes = stackvar_max_sizes if stackvar_max_sizes is not None else {}
|
|
450
|
+
self._constraint_set_degradation_threshold = constraint_set_degradation_threshold
|
|
443
451
|
self._base_lattice = BASE_LATTICES[bits]
|
|
444
452
|
self._base_lattice_inverted = networkx.DiGraph()
|
|
445
453
|
for src, dst in self._base_lattice.edges:
|
|
@@ -459,7 +467,11 @@ class SimpleSolver:
|
|
|
459
467
|
self.processed_constraints_count += len(self._constraints[typevar])
|
|
460
468
|
|
|
461
469
|
self._constraints[typevar] |= self._eq_constraints_from_add(typevar)
|
|
462
|
-
self._constraints[typevar]
|
|
470
|
+
self._constraints[typevar] |= self._discover_equivalence(self._constraints[typevar])
|
|
471
|
+
new_constraints, replacements = self._handle_equivalence(self._constraints[typevar])
|
|
472
|
+
self._equivalence |= replacements
|
|
473
|
+
self._constraints[typevar] = new_constraints
|
|
474
|
+
self._constraints[typevar] = self._filter_constraints(self._constraints[typevar])
|
|
463
475
|
|
|
464
476
|
self.simplified_constraints_count += len(self._constraints[typevar])
|
|
465
477
|
|
|
@@ -517,11 +529,18 @@ class SimpleSolver:
|
|
|
517
529
|
|
|
518
530
|
_, sketches = self.infer_shapes(typevars, constraints)
|
|
519
531
|
constraintset2tvs = defaultdict(set)
|
|
532
|
+
tvs_seen = set()
|
|
520
533
|
for idx, tv in enumerate(constrained_typevars):
|
|
521
534
|
_l.debug("Collecting constraints for type variable %r (%d/%d)", tv, idx + 1, len(constrained_typevars))
|
|
535
|
+
if tv in tvs_seen:
|
|
536
|
+
continue
|
|
522
537
|
# build a sub constraint set for the type variable
|
|
523
|
-
constraint_subset =
|
|
524
|
-
|
|
538
|
+
constraint_subset, related_tvs = self._generate_constraint_subset(constraints, {tv})
|
|
539
|
+
# drop all type vars outside constrained_typevars
|
|
540
|
+
related_tvs = related_tvs.intersection(constrained_typevars)
|
|
541
|
+
tvs_seen |= related_tvs
|
|
542
|
+
frozen_constraint_subset = frozenset(constraint_subset)
|
|
543
|
+
constraintset2tvs[frozen_constraint_subset] = related_tvs
|
|
525
544
|
|
|
526
545
|
for idx, (constraint_subset, tvs) in enumerate(constraintset2tvs.items()):
|
|
527
546
|
_l.debug(
|
|
@@ -534,8 +553,31 @@ class SimpleSolver:
|
|
|
534
553
|
)
|
|
535
554
|
self.eqclass_constraints_count.append(len(constraint_subset))
|
|
536
555
|
|
|
537
|
-
|
|
538
|
-
|
|
556
|
+
if len(constraint_subset) > self._constraint_set_degradation_threshold:
|
|
557
|
+
_l.debug(
|
|
558
|
+
"Constraint subset contains %d constraints, which is over the limit of %d. Enter degradation.",
|
|
559
|
+
len(constraint_subset),
|
|
560
|
+
self._constraint_set_degradation_threshold,
|
|
561
|
+
)
|
|
562
|
+
constraint_subset = self._degrade_constraint_set(constraint_subset)
|
|
563
|
+
_l.debug("Degraded constraint subset to %d constraints.", len(constraint_subset))
|
|
564
|
+
|
|
565
|
+
while constraint_subset:
|
|
566
|
+
|
|
567
|
+
_l.debug("Working with %d constraints.", len(constraint_subset))
|
|
568
|
+
|
|
569
|
+
# remove constraints that are a <: b where a only appears once; in this case, the solution fo a is
|
|
570
|
+
# entirely determined by the solution of b (which is the upper bound of a)
|
|
571
|
+
filtered_constraint_subset, ub_subtypes = self._filter_leaf_typevars(constraint_subset, tvs)
|
|
572
|
+
_l.debug(
|
|
573
|
+
"Filtered %d leaf typevars; %d constraints remain.",
|
|
574
|
+
len(ub_subtypes),
|
|
575
|
+
len(filtered_constraint_subset),
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
base_constraint_graph = self._generate_constraint_graph(
|
|
579
|
+
filtered_constraint_subset, tvs | PRIMITIVE_TYPES
|
|
580
|
+
)
|
|
539
581
|
primitive_constraints = self._generate_primitive_constraints(tvs, base_constraint_graph)
|
|
540
582
|
tvs_with_primitive_constraints = set()
|
|
541
583
|
for primitive_constraint in primitive_constraints:
|
|
@@ -546,12 +588,22 @@ class SimpleSolver:
|
|
|
546
588
|
solutions = {}
|
|
547
589
|
self.determine(sketches, tvs_with_primitive_constraints, solutions)
|
|
548
590
|
_l.debug("Determined solutions for %d type variable(s).", len(tvs_with_primitive_constraints))
|
|
591
|
+
|
|
592
|
+
leaf_solutions = 0
|
|
593
|
+
for tv_, ub_tv in ub_subtypes.items():
|
|
594
|
+
if ub_tv in solutions:
|
|
595
|
+
solutions[tv_] = solutions[ub_tv]
|
|
596
|
+
leaf_solutions += 1
|
|
597
|
+
elif isinstance(ub_tv, TypeConstant):
|
|
598
|
+
solutions[tv_] = ub_tv
|
|
599
|
+
leaf_solutions += 1
|
|
600
|
+
_l.debug("Determined solutions for %d leaf type variable(s).", leaf_solutions)
|
|
601
|
+
|
|
549
602
|
if not solutions:
|
|
550
603
|
break
|
|
551
|
-
|
|
552
604
|
self.solution |= solutions
|
|
553
605
|
|
|
554
|
-
tvs = {tv for tv in tvs if tv not in
|
|
606
|
+
tvs = {tv for tv in tvs if tv not in solutions}
|
|
555
607
|
if not tvs:
|
|
556
608
|
break
|
|
557
609
|
# rewrite existing constraints
|
|
@@ -559,7 +611,7 @@ class SimpleSolver:
|
|
|
559
611
|
for constraint in constraint_subset:
|
|
560
612
|
rewritten = self._rewrite_constraint(constraint, solutions)
|
|
561
613
|
new_constraint_subset.add(rewritten)
|
|
562
|
-
constraint_subset = new_constraint_subset
|
|
614
|
+
constraint_subset = self._filter_constraints(new_constraint_subset)
|
|
563
615
|
|
|
564
616
|
# set the solution for missing type vars to TOP
|
|
565
617
|
self.determine(sketches, set(sketches).difference(set(self.solution)), self.solution)
|
|
@@ -775,14 +827,45 @@ class SimpleSolver:
|
|
|
775
827
|
new_constraints.add(Equivalence(constraint.type_1, constraint.type_r))
|
|
776
828
|
return new_constraints
|
|
777
829
|
|
|
778
|
-
|
|
830
|
+
@staticmethod
|
|
831
|
+
def _discover_equivalence(constraints: set[TypeConstraint]) -> set[Equivalence]:
|
|
832
|
+
"""
|
|
833
|
+
a <:b && b <: a ==> a == b
|
|
834
|
+
"""
|
|
835
|
+
|
|
836
|
+
new_eq_constraints: set[Equivalence] = set()
|
|
837
|
+
subtypes = defaultdict(set)
|
|
838
|
+
for constraint in constraints:
|
|
839
|
+
if isinstance(constraint, Subtype):
|
|
840
|
+
sub_type = constraint.sub_type
|
|
841
|
+
super_type = constraint.super_type
|
|
842
|
+
subtypes[sub_type].add(super_type)
|
|
843
|
+
|
|
844
|
+
# check everything
|
|
845
|
+
seen = set()
|
|
846
|
+
for tv, tv_supers in subtypes.items():
|
|
847
|
+
for tv_super in tv_supers:
|
|
848
|
+
if tv_super in subtypes and tv in subtypes[tv_super]: # noqa: SIM102
|
|
849
|
+
# we have a pair of subtypes that are equivalent
|
|
850
|
+
if (tv, tv_super) not in seen and (tv_super, tv) not in seen:
|
|
851
|
+
new_eq_constraints.add(Equivalence(tv, tv_super))
|
|
852
|
+
seen.add((tv, tv_super))
|
|
853
|
+
|
|
854
|
+
_l.debug(
|
|
855
|
+
"Discovered %d equivalence constraints from %d constraints.", len(new_eq_constraints), len(constraints)
|
|
856
|
+
)
|
|
857
|
+
return new_eq_constraints
|
|
858
|
+
|
|
859
|
+
@staticmethod
|
|
860
|
+
def _handle_equivalence(
|
|
861
|
+
constraint_set: set[TypeConstraint],
|
|
862
|
+
) -> tuple[set[TypeConstraint], dict[TypeVariable, TypeVariable | TypeConstant]]:
|
|
779
863
|
graph = networkx.Graph()
|
|
780
864
|
|
|
781
|
-
replacements = {}
|
|
782
|
-
constraints = set()
|
|
865
|
+
replacements: dict[TypeVariable, TypeVariable | TypeConstant] = {}
|
|
783
866
|
|
|
784
867
|
# collect equivalence relations
|
|
785
|
-
for constraint in
|
|
868
|
+
for constraint in constraint_set:
|
|
786
869
|
if isinstance(constraint, Equivalence):
|
|
787
870
|
# | type_a == type_b
|
|
788
871
|
# we apply unification and removes one of them
|
|
@@ -803,15 +886,30 @@ class SimpleSolver:
|
|
|
803
886
|
for tv in components_lst[1:]:
|
|
804
887
|
replacements[tv] = representative
|
|
805
888
|
|
|
806
|
-
|
|
807
|
-
|
|
889
|
+
constraints = SimpleSolver._rewrite_constraints_with_replacements(constraint_set, replacements)
|
|
890
|
+
|
|
891
|
+
# import pprint
|
|
892
|
+
# print("Replacements")
|
|
893
|
+
# pprint.pprint(replacements)
|
|
894
|
+
# print("Constraints (after replacement)")
|
|
895
|
+
# pprint.pprint(constraints)
|
|
896
|
+
|
|
897
|
+
return constraints, replacements
|
|
898
|
+
|
|
899
|
+
@staticmethod
|
|
900
|
+
def _rewrite_constraints_with_replacements(
|
|
901
|
+
constraints: set[TypeConstraint], replacements: dict[TypeVariable, TypeVariable]
|
|
902
|
+
) -> set[TypeConstraint]:
|
|
903
|
+
# replace constraints according to a dictionary of type variable replacements
|
|
904
|
+
replaced_constraints = set()
|
|
905
|
+
for constraint in constraints:
|
|
808
906
|
if isinstance(constraint, Existence):
|
|
809
907
|
replaced, new_constraint = constraint.replace(replacements)
|
|
810
908
|
|
|
811
909
|
if replaced:
|
|
812
|
-
|
|
910
|
+
replaced_constraints.add(new_constraint)
|
|
813
911
|
else:
|
|
814
|
-
|
|
912
|
+
replaced_constraints.add(constraint)
|
|
815
913
|
|
|
816
914
|
elif isinstance(constraint, Subtype):
|
|
817
915
|
# subtype <: supertype
|
|
@@ -819,18 +917,122 @@ class SimpleSolver:
|
|
|
819
917
|
replaced, new_constraint = constraint.replace(replacements)
|
|
820
918
|
|
|
821
919
|
if replaced:
|
|
822
|
-
|
|
920
|
+
replaced_constraints.add(new_constraint)
|
|
823
921
|
else:
|
|
824
|
-
|
|
922
|
+
replaced_constraints.add(constraint)
|
|
923
|
+
return replaced_constraints
|
|
825
924
|
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
925
|
+
@staticmethod
|
|
926
|
+
def _filter_constraints(constraints: set[TypeConstraint]) -> set[TypeConstraint]:
|
|
927
|
+
"""
|
|
928
|
+
Filter out constraints that we don't yet support.
|
|
929
|
+
"""
|
|
930
|
+
|
|
931
|
+
filtered_constraints = set()
|
|
932
|
+
for constraint in constraints:
|
|
933
|
+
dropped = False
|
|
934
|
+
if isinstance(constraint, Subtype) and (
|
|
935
|
+
(isinstance(constraint.sub_type, TypeConstant) and isinstance(constraint.super_type, TypeConstant))
|
|
936
|
+
or (
|
|
937
|
+
isinstance(constraint.sub_type, DerivedTypeVariable)
|
|
938
|
+
and isinstance(constraint.sub_type.labels[-1], ConvertTo)
|
|
939
|
+
)
|
|
940
|
+
or (
|
|
941
|
+
isinstance(constraint.sub_type, TypeVariable)
|
|
942
|
+
and isinstance(constraint.super_type, TypeVariable)
|
|
943
|
+
and constraint.sub_type == constraint.super_type
|
|
944
|
+
)
|
|
945
|
+
):
|
|
946
|
+
dropped = True
|
|
831
947
|
|
|
832
|
-
|
|
833
|
-
|
|
948
|
+
if not dropped:
|
|
949
|
+
filtered_constraints.add(constraint)
|
|
950
|
+
|
|
951
|
+
return filtered_constraints
|
|
952
|
+
|
|
953
|
+
@staticmethod
|
|
954
|
+
def _filter_leaf_typevars(
|
|
955
|
+
constraints: set[TypeConstraint], tvs_to_solve: set[TypeVariable]
|
|
956
|
+
) -> tuple[set[TypeConstraint], dict[TypeVariable, TypeVariable]]:
|
|
957
|
+
"""
|
|
958
|
+
Filter out leaf type variables that only appear once in the constraints. These type variables are not
|
|
959
|
+
interesting and can be removed from the constraints.
|
|
960
|
+
"""
|
|
961
|
+
|
|
962
|
+
sub_typevars = defaultdict(set)
|
|
963
|
+
tv_to_dtvs: dict[TypeVariable, set[TypeVariable | DerivedTypeVariable]] = defaultdict(set)
|
|
964
|
+
for constraint in constraints:
|
|
965
|
+
if isinstance(constraint, Subtype):
|
|
966
|
+
if isinstance(constraint.sub_type, TypeVariable):
|
|
967
|
+
sub_typevars[constraint.sub_type].add(constraint.super_type)
|
|
968
|
+
for tv in [constraint.sub_type, constraint.super_type]:
|
|
969
|
+
if isinstance(tv, DerivedTypeVariable):
|
|
970
|
+
tv_to_dtvs[tv.type_var].add(constraint.sub_type)
|
|
971
|
+
elif isinstance(tv, TypeVariable):
|
|
972
|
+
tv_to_dtvs[tv].add(constraint.sub_type)
|
|
973
|
+
|
|
974
|
+
ub_subtypes: dict[TypeVariable, TypeVariable] = {}
|
|
975
|
+
for tv, dtvs in tv_to_dtvs.items():
|
|
976
|
+
if len(dtvs) == 1 and tv in sub_typevars and len(sub_typevars[tv]) == 1:
|
|
977
|
+
ub = next(iter(sub_typevars[tv]))
|
|
978
|
+
if ub in tvs_to_solve:
|
|
979
|
+
ub_subtypes[tv] = ub
|
|
980
|
+
|
|
981
|
+
filtered_constraints = set()
|
|
982
|
+
for constraint in constraints:
|
|
983
|
+
if isinstance(constraint, Subtype) and constraint.sub_type in ub_subtypes:
|
|
984
|
+
continue
|
|
985
|
+
filtered_constraints.add(constraint)
|
|
986
|
+
|
|
987
|
+
return filtered_constraints, ub_subtypes
|
|
988
|
+
|
|
989
|
+
def _degrade_constraint_set(self, constraints: set[TypeConstraint]) -> set[TypeConstraint]:
|
|
990
|
+
"""
|
|
991
|
+
Degrade the constraint set to a smaller set of constraints to speed up the DFA generation process.
|
|
992
|
+
"""
|
|
993
|
+
|
|
994
|
+
tv_with_ls = defaultdict(set) # tv_with_ls are type variables with Loads or Stores
|
|
995
|
+
graph = networkx.Graph()
|
|
996
|
+
|
|
997
|
+
for constraint in constraints:
|
|
998
|
+
if isinstance(constraint, Subtype):
|
|
999
|
+
if isinstance(constraint.sub_type, DerivedTypeVariable) and isinstance(
|
|
1000
|
+
constraint.sub_type.labels[0], (Load, Store)
|
|
1001
|
+
):
|
|
1002
|
+
tv_with_ls[constraint.sub_type.type_var].add(constraint.sub_type)
|
|
1003
|
+
if type(constraint.sub_type) is TypeVariable and type(constraint.super_type) is TypeVariable:
|
|
1004
|
+
graph.add_edge(constraint.sub_type, constraint.super_type)
|
|
1005
|
+
|
|
1006
|
+
tv_to_degrade = set()
|
|
1007
|
+
for tv, dtvs in tv_with_ls.items():
|
|
1008
|
+
if len(dtvs) > 5:
|
|
1009
|
+
# degrade all subtype relationships involving this type variable to equivalence
|
|
1010
|
+
tv_to_degrade.add(tv)
|
|
1011
|
+
|
|
1012
|
+
replacements = {}
|
|
1013
|
+
for components in networkx.connected_components(graph):
|
|
1014
|
+
if len(components) == 1:
|
|
1015
|
+
continue
|
|
1016
|
+
if any(tv in tv_to_degrade for tv in components):
|
|
1017
|
+
components_lst = sorted(components, key=str)
|
|
1018
|
+
representative = components_lst[0]
|
|
1019
|
+
for tv in components_lst[1:]:
|
|
1020
|
+
replacements[tv] = representative
|
|
1021
|
+
|
|
1022
|
+
degraded_constraints = self._rewrite_constraints_with_replacements(constraints, replacements)
|
|
1023
|
+
|
|
1024
|
+
# discover more equivalence relations
|
|
1025
|
+
eq_constraints = self._discover_equivalence(degraded_constraints)
|
|
1026
|
+
_l.debug("Discovered %d equivalence constraints from degraded constraints.", len(eq_constraints))
|
|
1027
|
+
if eq_constraints:
|
|
1028
|
+
degraded_constraints, eq_replacements = self._handle_equivalence(degraded_constraints | eq_constraints)
|
|
1029
|
+
self._equivalence |= eq_replacements
|
|
1030
|
+
|
|
1031
|
+
# filter them
|
|
1032
|
+
degraded_constraints = self._filter_constraints(degraded_constraints)
|
|
1033
|
+
|
|
1034
|
+
self._equivalence |= replacements
|
|
1035
|
+
return degraded_constraints
|
|
834
1036
|
|
|
835
1037
|
def _convert_arrays(self, constraints):
|
|
836
1038
|
for constraint in constraints:
|
|
@@ -860,7 +1062,7 @@ class SimpleSolver:
|
|
|
860
1062
|
@staticmethod
|
|
861
1063
|
def _generate_constraint_subset(
|
|
862
1064
|
constraints: set[TypeConstraint], typevars: set[TypeVariable]
|
|
863
|
-
) -> set[TypeConstraint]:
|
|
1065
|
+
) -> tuple[set[TypeConstraint], set[TypeVariable]]:
|
|
864
1066
|
subset = set()
|
|
865
1067
|
related_typevars = set(typevars)
|
|
866
1068
|
while True:
|
|
@@ -890,7 +1092,7 @@ class SimpleSolver:
|
|
|
890
1092
|
if not new:
|
|
891
1093
|
break
|
|
892
1094
|
subset |= new
|
|
893
|
-
return subset
|
|
1095
|
+
return subset, related_typevars
|
|
894
1096
|
|
|
895
1097
|
def _generate_constraint_graph(
|
|
896
1098
|
self, constraints: set[TypeConstraint], interesting_variables: set[DerivedTypeVariable]
|
|
@@ -40,6 +40,7 @@ class Typehoon(Analysis):
|
|
|
40
40
|
must_struct: set[TypeVariable] | None = None,
|
|
41
41
|
stackvar_max_sizes: dict[TypeVariable, int] | None = None,
|
|
42
42
|
stack_offset_tvs: dict[int, TypeVariable] | None = None,
|
|
43
|
+
constraint_set_degradation_threshold: int = 150,
|
|
43
44
|
):
|
|
44
45
|
"""
|
|
45
46
|
|
|
@@ -57,6 +58,7 @@ class Typehoon(Analysis):
|
|
|
57
58
|
self._must_struct = must_struct
|
|
58
59
|
self._stackvar_max_sizes = stackvar_max_sizes if stackvar_max_sizes is not None else {}
|
|
59
60
|
self._stack_offset_tvs = stack_offset_tvs if stack_offset_tvs is not None else {}
|
|
61
|
+
self._constraint_set_degradation_threshold = constraint_set_degradation_threshold
|
|
60
62
|
|
|
61
63
|
self.bits = self.project.arch.bits
|
|
62
64
|
self.solution = None
|
|
@@ -193,7 +195,7 @@ class Typehoon(Analysis):
|
|
|
193
195
|
self.simtypes_solution.update(self._ground_truth)
|
|
194
196
|
|
|
195
197
|
@staticmethod
|
|
196
|
-
def _resolve_derived(tv):
|
|
198
|
+
def _resolve_derived(tv: TypeVariable | DerivedTypeVariable) -> TypeVariable:
|
|
197
199
|
return tv.type_var if isinstance(tv, DerivedTypeVariable) else tv
|
|
198
200
|
|
|
199
201
|
def _solve(self):
|
|
@@ -211,7 +213,13 @@ class Typehoon(Analysis):
|
|
|
211
213
|
if isinstance(constraint.super_type, TypeVariable):
|
|
212
214
|
typevars.add(self._resolve_derived(constraint.super_type))
|
|
213
215
|
|
|
214
|
-
solver = SimpleSolver(
|
|
216
|
+
solver = SimpleSolver(
|
|
217
|
+
self.bits,
|
|
218
|
+
self._constraints,
|
|
219
|
+
typevars,
|
|
220
|
+
stackvar_max_sizes=self._stackvar_max_sizes,
|
|
221
|
+
constraint_set_degradation_threshold=self._constraint_set_degradation_threshold,
|
|
222
|
+
)
|
|
215
223
|
self.solution = solver.solution
|
|
216
224
|
self.processed_constraints_count = solver.processed_constraints_count
|
|
217
225
|
self.eqclass_constraints_count = solver.eqclass_constraints_count
|
|
@@ -633,10 +633,7 @@ class SimEngineVRAIL(
|
|
|
633
633
|
if not r1.data.concrete:
|
|
634
634
|
# we don't support symbolic shiftamount
|
|
635
635
|
r = self.state.top(result_size)
|
|
636
|
-
return RichR(
|
|
637
|
-
r,
|
|
638
|
-
typevar=r0.typevar,
|
|
639
|
-
)
|
|
636
|
+
return RichR(r)
|
|
640
637
|
|
|
641
638
|
shiftamount = r1.data.concrete_value
|
|
642
639
|
return RichR(r0.data << shiftamount, typevar=typeconsts.int_type(result_size), type_constraints=None)
|
|
@@ -651,10 +648,7 @@ class SimEngineVRAIL(
|
|
|
651
648
|
if not r1.data.concrete:
|
|
652
649
|
# we don't support symbolic shiftamount
|
|
653
650
|
r = self.state.top(result_size)
|
|
654
|
-
return RichR(
|
|
655
|
-
r,
|
|
656
|
-
typevar=r0.typevar,
|
|
657
|
-
)
|
|
651
|
+
return RichR(r)
|
|
658
652
|
|
|
659
653
|
shiftamount = r1.data.concrete_value
|
|
660
654
|
|
|
@@ -672,10 +666,7 @@ class SimEngineVRAIL(
|
|
|
672
666
|
if not r1.data.concrete:
|
|
673
667
|
# we don't support symbolic shiftamount
|
|
674
668
|
r = self.state.top(result_size)
|
|
675
|
-
return RichR(
|
|
676
|
-
r,
|
|
677
|
-
typevar=r0.typevar,
|
|
678
|
-
)
|
|
669
|
+
return RichR(r)
|
|
679
670
|
|
|
680
671
|
shiftamount = r1.data.concrete_value
|
|
681
672
|
|
|
@@ -691,10 +682,7 @@ class SimEngineVRAIL(
|
|
|
691
682
|
if not r1.data.concrete:
|
|
692
683
|
# we don't support symbolic shiftamount
|
|
693
684
|
r = self.state.top(result_size)
|
|
694
|
-
return RichR(
|
|
695
|
-
r,
|
|
696
|
-
typevar=r0.typevar,
|
|
697
|
-
)
|
|
685
|
+
return RichR(r)
|
|
698
686
|
|
|
699
687
|
shiftamount = r1.data.concrete_value
|
|
700
688
|
|
|
@@ -761,22 +749,22 @@ class SimEngineVRAIL(
|
|
|
761
749
|
def _handle_binop_Rol(self, expr):
|
|
762
750
|
arg0, arg1 = expr.operands
|
|
763
751
|
|
|
764
|
-
|
|
752
|
+
_ = self._expr_bv(arg0)
|
|
765
753
|
_ = self._expr_bv(arg1)
|
|
766
754
|
result_size = arg0.bits
|
|
767
755
|
|
|
768
756
|
r = self.state.top(result_size)
|
|
769
|
-
return RichR(r
|
|
757
|
+
return RichR(r)
|
|
770
758
|
|
|
771
759
|
def _handle_binop_Ror(self, expr):
|
|
772
760
|
arg0, arg1 = expr.operands
|
|
773
761
|
|
|
774
|
-
|
|
762
|
+
_ = self._expr_bv(arg0)
|
|
775
763
|
_ = self._expr_bv(arg1)
|
|
776
764
|
result_size = arg0.bits
|
|
777
765
|
|
|
778
766
|
r = self.state.top(result_size)
|
|
779
|
-
return RichR(r
|
|
767
|
+
return RichR(r)
|
|
780
768
|
|
|
781
769
|
def _handle_binop_Concat(self, expr):
|
|
782
770
|
arg0, arg1 = expr.operands
|