angr 9.2.159__cp310-abi3-manylinux2014_aarch64.whl → 9.2.161__cp310-abi3-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (37) hide show
  1. angr/__init__.py +4 -1
  2. angr/analyses/decompiler/ail_simplifier.py +81 -1
  3. angr/analyses/decompiler/block_simplifier.py +7 -5
  4. angr/analyses/decompiler/clinic.py +5 -1
  5. angr/analyses/decompiler/decompiler.py +12 -9
  6. angr/analyses/decompiler/peephole_optimizations/__init__.py +4 -4
  7. angr/analyses/decompiler/peephole_optimizations/eager_eval.py +53 -0
  8. angr/analyses/decompiler/peephole_optimizations/modulo_simplifier.py +89 -0
  9. angr/analyses/decompiler/peephole_optimizations/{const_mull_a_shift.py → optimized_div_simplifier.py} +139 -25
  10. angr/analyses/decompiler/peephole_optimizations/remove_redundant_bitmasks.py +18 -9
  11. angr/analyses/decompiler/structuring/phoenix.py +19 -32
  12. angr/analyses/s_reaching_definitions/s_rda_model.py +1 -0
  13. angr/analyses/s_reaching_definitions/s_reaching_definitions.py +5 -2
  14. angr/analyses/typehoon/simple_solver.py +231 -29
  15. angr/analyses/typehoon/typehoon.py +10 -2
  16. angr/analyses/variable_recovery/engine_ail.py +8 -20
  17. angr/analyses/variable_recovery/engine_base.py +9 -1
  18. angr/analyses/variable_recovery/variable_recovery_base.py +30 -2
  19. angr/analyses/variable_recovery/variable_recovery_fast.py +11 -2
  20. angr/emulator.py +143 -0
  21. angr/engines/concrete.py +66 -0
  22. angr/engines/icicle.py +66 -30
  23. angr/exploration_techniques/driller_core.py +2 -2
  24. angr/project.py +7 -0
  25. angr/rustylib.abi3.so +0 -0
  26. angr/sim_type.py +16 -8
  27. angr/state_plugins/unicorn_engine.py +4 -4
  28. angr/utils/graph.py +20 -1
  29. angr/utils/ssa/__init__.py +3 -3
  30. {angr-9.2.159.dist-info → angr-9.2.161.dist-info}/METADATA +5 -6
  31. {angr-9.2.159.dist-info → angr-9.2.161.dist-info}/RECORD +36 -34
  32. angr/analyses/decompiler/peephole_optimizations/a_sub_a_div_const_mul_const.py +0 -57
  33. /angr/{lib/angr_native.so → unicornlib.so} +0 -0
  34. {angr-9.2.159.dist-info → angr-9.2.161.dist-info}/WHEEL +0 -0
  35. {angr-9.2.159.dist-info → angr-9.2.161.dist-info}/entry_points.txt +0 -0
  36. {angr-9.2.159.dist-info → angr-9.2.161.dist-info}/licenses/LICENSE +0 -0
  37. {angr-9.2.159.dist-info → angr-9.2.161.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,7 @@ class RemoveRedundantBitmasks(PeepholeOptimizationExprBase):
33
33
  def _optimize_BinaryOp(self, expr: BinaryOp):
34
34
  # And(expr, full_N_bitmask) ==> expr
35
35
  # And(SHR(expr, N), bitmask)) ==> SHR(expr, N)
36
+ # And(Div(Conv(M->N, expr), P), 2 ** M - 1) ==> Div(Conv(M->N, expr), P) where M < N
36
37
  # And(Conv(1->N, expr), bitmask) ==> Conv(1->N, expr)
37
38
  # And(Conv(1->N, bool_expr), bitmask) ==> Conv(1->N, bool_expr)
38
39
  # And(ITE(?, const_expr, const_expr), bitmask) ==> ITE(?, const_expr, const_expr)
@@ -41,15 +42,23 @@ class RemoveRedundantBitmasks(PeepholeOptimizationExprBase):
41
42
  if expr.operands[1].value == _MASKS.get(inner_expr.bits, None):
42
43
  return inner_expr
43
44
 
44
- if isinstance(inner_expr, BinaryOp) and inner_expr.op == "Shr":
45
- mask = expr.operands[1]
46
- shift_val = inner_expr.operands[1]
47
- if (
48
- isinstance(shift_val, Const)
49
- and shift_val.value in _MASKS
50
- and mask.value == _MASKS.get(int(64 - shift_val.value), None)
51
- ):
52
- return inner_expr
45
+ if isinstance(inner_expr, BinaryOp):
46
+ if inner_expr.op == "Shr":
47
+ mask = expr.operands[1]
48
+ shift_val = inner_expr.operands[1]
49
+ if (
50
+ isinstance(shift_val, Const)
51
+ and shift_val.value in _MASKS
52
+ and mask.value == _MASKS.get(int(64 - shift_val.value), None)
53
+ ):
54
+ return inner_expr
55
+ if inner_expr.op == "Div" and isinstance(inner_expr.operands[0], Convert):
56
+ from_bits = inner_expr.operands[0].from_bits
57
+ to_bits = inner_expr.operands[0].to_bits
58
+ if from_bits < to_bits:
59
+ mask = expr.operands[1]
60
+ if mask.value == _MASKS.get(from_bits):
61
+ return inner_expr
53
62
 
54
63
  if isinstance(inner_expr, Convert) and self.is_bool_expr(inner_expr.operand):
55
64
  # useless masking
@@ -11,7 +11,7 @@ import networkx
11
11
  import claripy
12
12
  from angr.ailment.block import Block
13
13
  from angr.ailment.statement import Statement, ConditionalJump, Jump, Label, Return
14
- from angr.ailment.expression import Const, UnaryOp, MultiStatementExpression
14
+ from angr.ailment.expression import Const, UnaryOp, MultiStatementExpression, BinaryOp
15
15
 
16
16
  from angr.utils.graph import GraphUtils
17
17
  from angr.utils.ail import is_phi_assignment, is_head_controlled_loop_block
@@ -2174,20 +2174,17 @@ class PhoenixStructurer(StructurerBase):
2174
2174
  if r is not None:
2175
2175
  left, left_cond, right, left_right_cond, succ = r
2176
2176
  # create the condition node
2177
- memo = {}
2177
+ left_cond_expr = self.cond_proc.convert_claripy_bool_ast(left_cond)
2178
+ left_cond_expr_neg = UnaryOp(None, "Not", left_cond_expr, ins_addr=start_node.addr)
2179
+ left_right_cond_expr = self.cond_proc.convert_claripy_bool_ast(left_right_cond)
2178
2180
  if not self._is_single_statement_block(left):
2179
2181
  if not self._should_use_multistmtexprs(left):
2180
2182
  return False
2181
2183
  # create a MultiStatementExpression for left_right_cond
2182
2184
  stmts = self._build_multistatementexpr_statements(left)
2183
2185
  assert stmts is not None
2184
- mstmt_expr = MultiStatementExpression(
2185
- None, stmts, self.cond_proc.convert_claripy_bool_ast(left_right_cond), ins_addr=left.addr
2186
- )
2187
- memo[left_right_cond._hash] = mstmt_expr
2188
- cond = self.cond_proc.convert_claripy_bool_ast(
2189
- claripy.Or(claripy.Not(left_cond), left_right_cond), memo=memo
2190
- )
2186
+ left_right_cond_expr = MultiStatementExpression(None, stmts, left_right_cond_expr, ins_addr=left.addr)
2187
+ cond = BinaryOp(None, "LogicalOr", [left_cond_expr_neg, left_right_cond_expr], ins_addr=start_node.addr)
2191
2188
  cond_jump = ConditionalJump(
2192
2189
  None,
2193
2190
  cond,
@@ -2212,18 +2209,16 @@ class PhoenixStructurer(StructurerBase):
2212
2209
  if r is not None:
2213
2210
  left, left_cond, right, right_left_cond, else_node = r
2214
2211
  # create the condition node
2215
- memo = {}
2212
+ left_cond_expr = self.cond_proc.convert_claripy_bool_ast(left_cond)
2213
+ right_left_cond_expr = self.cond_proc.convert_claripy_bool_ast(right_left_cond)
2216
2214
  if not self._is_single_statement_block(right):
2217
2215
  if not self._should_use_multistmtexprs(right):
2218
2216
  return False
2219
2217
  # create a MultiStatementExpression for left_right_cond
2220
2218
  stmts = self._build_multistatementexpr_statements(right)
2221
2219
  assert stmts is not None
2222
- mstmt_expr = MultiStatementExpression(
2223
- None, stmts, self.cond_proc.convert_claripy_bool_ast(right_left_cond), ins_addr=left.addr
2224
- )
2225
- memo[right_left_cond._hash] = mstmt_expr
2226
- cond = self.cond_proc.convert_claripy_bool_ast(claripy.Or(left_cond, right_left_cond), memo=memo)
2220
+ right_left_cond_expr = MultiStatementExpression(None, stmts, right_left_cond_expr, ins_addr=left.addr)
2221
+ cond = BinaryOp(None, "LogicalOr", [left_cond_expr, right_left_cond_expr], ins_addr=start_node.addr)
2227
2222
  cond_jump = ConditionalJump(
2228
2223
  None,
2229
2224
  cond,
@@ -2248,20 +2243,17 @@ class PhoenixStructurer(StructurerBase):
2248
2243
  if r is not None:
2249
2244
  left, left_cond, succ, left_succ_cond, right = r
2250
2245
  # create the condition node
2251
- memo = {}
2246
+ left_cond_expr = self.cond_proc.convert_claripy_bool_ast(left_cond)
2247
+ left_succ_cond_expr = self.cond_proc.convert_claripy_bool_ast(left_succ_cond)
2252
2248
  if not self._is_single_statement_block(left):
2253
2249
  if not self._should_use_multistmtexprs(left):
2254
2250
  return False
2255
2251
  # create a MultiStatementExpression for left_right_cond
2256
2252
  stmts = self._build_multistatementexpr_statements(left)
2257
2253
  assert stmts is not None
2258
- mstmt_expr = MultiStatementExpression(
2259
- None, stmts, self.cond_proc.convert_claripy_bool_ast(left_succ_cond), ins_addr=left.addr
2260
- )
2261
- memo[left_succ_cond._hash] = mstmt_expr
2262
- cond = self.cond_proc.convert_claripy_bool_ast(
2263
- claripy.And(left_cond, claripy.Not(left_succ_cond)), memo=memo
2264
- )
2254
+ left_succ_cond_expr = MultiStatementExpression(None, stmts, left_succ_cond_expr, ins_addr=left.addr)
2255
+ left_succ_cond_expr_neg = UnaryOp(None, "Not", left_succ_cond_expr, ins_addr=start_node.addr)
2256
+ cond = BinaryOp(None, "LogicalAnd", [left_cond_expr, left_succ_cond_expr_neg], ins_addr=start_node.addr)
2265
2257
  cond_jump = ConditionalJump(
2266
2258
  None,
2267
2259
  cond,
@@ -2285,21 +2277,16 @@ class PhoenixStructurer(StructurerBase):
2285
2277
  if r is not None:
2286
2278
  left, left_cond, right, right_left_cond, else_node = r
2287
2279
  # create the condition node
2288
- memo = {}
2280
+ left_cond_expr = self.cond_proc.convert_claripy_bool_ast(left_cond)
2281
+ left_right_cond_expr = self.cond_proc.convert_claripy_bool_ast(right_left_cond)
2289
2282
  if not self._is_single_statement_block(left):
2290
2283
  if not self._should_use_multistmtexprs(left):
2291
2284
  return False
2292
2285
  # create a MultiStatementExpression for left_right_cond
2293
2286
  stmts = self._build_multistatementexpr_statements(left)
2294
2287
  assert stmts is not None
2295
- mstmt_expr = MultiStatementExpression(
2296
- None, stmts, self.cond_proc.convert_claripy_bool_ast(right_left_cond), ins_addr=left.addr
2297
- )
2298
- memo[right_left_cond._hash] = mstmt_expr
2299
- cond = self.cond_proc.convert_claripy_bool_ast(
2300
- claripy.And(left_cond, right_left_cond),
2301
- memo=memo,
2302
- )
2288
+ left_right_cond_expr = MultiStatementExpression(None, stmts, left_right_cond_expr, ins_addr=left.addr)
2289
+ cond = BinaryOp(None, "LogicalAnd", [left_cond_expr, left_right_cond_expr], ins_addr=start_node.addr)
2303
2290
  cond_jump = ConditionalJump(
2304
2291
  None,
2305
2292
  cond,
@@ -25,6 +25,7 @@ class SRDAModel:
25
25
  self.all_tmp_definitions: dict[CodeLocation, dict[atoms.Tmp, int]] = defaultdict(dict)
26
26
  self.all_tmp_uses: dict[CodeLocation, dict[atoms.Tmp, set[tuple[Tmp, int]]]] = defaultdict(dict)
27
27
  self.phi_vvar_ids: set[int] = set()
28
+ self.phivarid_to_varids_with_unknown: dict[int, set[int | None]] = {}
28
29
  self.phivarid_to_varids: dict[int, set[int]] = {}
29
30
  self.vvar_uses_by_loc: dict[CodeLocation, list[int]] = {}
30
31
 
@@ -63,7 +63,7 @@ class SReachingDefinitionsAnalysis(Analysis):
63
63
  case _:
64
64
  raise NotImplementedError
65
65
 
66
- phi_vvars: dict[int, set[int]] = {}
66
+ phi_vvars: dict[int, set[int | None]] = {}
67
67
  # find all vvar definitions
68
68
  vvar_deflocs = get_vvar_deflocs(blocks.values(), phi_vvars=phi_vvars)
69
69
  # find all explicit vvar uses
@@ -87,7 +87,10 @@ class SReachingDefinitionsAnalysis(Analysis):
87
87
  self.model.phi_vvar_ids = set(phi_vvars)
88
88
  self.model.phivarid_to_varids = {}
89
89
  for vvar_id, src_vvars in phi_vvars.items():
90
- self.model.phivarid_to_varids[vvar_id] = src_vvars
90
+ self.model.phivarid_to_varids_with_unknown[vvar_id] = src_vvars
91
+ self.model.phivarid_to_varids[vvar_id] = ( # type: ignore
92
+ {vvar_id for vvar_id in src_vvars if vvar_id is not None} if None in src_vvars else src_vvars
93
+ )
91
94
 
92
95
  if self.mode == "function":
93
96
 
@@ -1,4 +1,4 @@
1
- # pylint:disable=missing-class-docstring
1
+ # pylint:disable=missing-class-docstring,too-many-boolean-expressions
2
2
  from __future__ import annotations
3
3
  import enum
4
4
  from collections import defaultdict
@@ -432,7 +432,14 @@ class SimpleSolver:
432
432
  improvements.
433
433
  """
434
434
 
435
- def __init__(self, bits: int, constraints, typevars, stackvar_max_sizes: dict[TypeVariable, int] | None = None):
435
+ def __init__(
436
+ self,
437
+ bits: int,
438
+ constraints,
439
+ typevars,
440
+ constraint_set_degradation_threshold: int = 150,
441
+ stackvar_max_sizes: dict[TypeVariable, int] | None = None,
442
+ ):
436
443
  if bits not in (32, 64):
437
444
  raise ValueError(f"Pointer size {bits} is not supported. Expect 32 or 64.")
438
445
 
@@ -440,6 +447,7 @@ class SimpleSolver:
440
447
  self._constraints: dict[TypeVariable, set[TypeConstraint]] = constraints
441
448
  self._typevars: set[TypeVariable] = typevars
442
449
  self.stackvar_max_sizes = stackvar_max_sizes if stackvar_max_sizes is not None else {}
450
+ self._constraint_set_degradation_threshold = constraint_set_degradation_threshold
443
451
  self._base_lattice = BASE_LATTICES[bits]
444
452
  self._base_lattice_inverted = networkx.DiGraph()
445
453
  for src, dst in self._base_lattice.edges:
@@ -459,7 +467,11 @@ class SimpleSolver:
459
467
  self.processed_constraints_count += len(self._constraints[typevar])
460
468
 
461
469
  self._constraints[typevar] |= self._eq_constraints_from_add(typevar)
462
- self._constraints[typevar] = self._handle_equivalence(typevar)
470
+ self._constraints[typevar] |= self._discover_equivalence(self._constraints[typevar])
471
+ new_constraints, replacements = self._handle_equivalence(self._constraints[typevar])
472
+ self._equivalence |= replacements
473
+ self._constraints[typevar] = new_constraints
474
+ self._constraints[typevar] = self._filter_constraints(self._constraints[typevar])
463
475
 
464
476
  self.simplified_constraints_count += len(self._constraints[typevar])
465
477
 
@@ -517,11 +529,18 @@ class SimpleSolver:
517
529
 
518
530
  _, sketches = self.infer_shapes(typevars, constraints)
519
531
  constraintset2tvs = defaultdict(set)
532
+ tvs_seen = set()
520
533
  for idx, tv in enumerate(constrained_typevars):
521
534
  _l.debug("Collecting constraints for type variable %r (%d/%d)", tv, idx + 1, len(constrained_typevars))
535
+ if tv in tvs_seen:
536
+ continue
522
537
  # build a sub constraint set for the type variable
523
- constraint_subset = frozenset(self._generate_constraint_subset(constraints, {tv}))
524
- constraintset2tvs[constraint_subset].add(tv)
538
+ constraint_subset, related_tvs = self._generate_constraint_subset(constraints, {tv})
539
+ # drop all type vars outside constrained_typevars
540
+ related_tvs = related_tvs.intersection(constrained_typevars)
541
+ tvs_seen |= related_tvs
542
+ frozen_constraint_subset = frozenset(constraint_subset)
543
+ constraintset2tvs[frozen_constraint_subset] = related_tvs
525
544
 
526
545
  for idx, (constraint_subset, tvs) in enumerate(constraintset2tvs.items()):
527
546
  _l.debug(
@@ -534,8 +553,31 @@ class SimpleSolver:
534
553
  )
535
554
  self.eqclass_constraints_count.append(len(constraint_subset))
536
555
 
537
- while True:
538
- base_constraint_graph = self._generate_constraint_graph(constraint_subset, tvs | PRIMITIVE_TYPES)
556
+ if len(constraint_subset) > self._constraint_set_degradation_threshold:
557
+ _l.debug(
558
+ "Constraint subset contains %d constraints, which is over the limit of %d. Enter degradation.",
559
+ len(constraint_subset),
560
+ self._constraint_set_degradation_threshold,
561
+ )
562
+ constraint_subset = self._degrade_constraint_set(constraint_subset)
563
+ _l.debug("Degraded constraint subset to %d constraints.", len(constraint_subset))
564
+
565
+ while constraint_subset:
566
+
567
+ _l.debug("Working with %d constraints.", len(constraint_subset))
568
+
569
+ # remove constraints that are a <: b where a only appears once; in this case, the solution fo a is
570
+ # entirely determined by the solution of b (which is the upper bound of a)
571
+ filtered_constraint_subset, ub_subtypes = self._filter_leaf_typevars(constraint_subset, tvs)
572
+ _l.debug(
573
+ "Filtered %d leaf typevars; %d constraints remain.",
574
+ len(ub_subtypes),
575
+ len(filtered_constraint_subset),
576
+ )
577
+
578
+ base_constraint_graph = self._generate_constraint_graph(
579
+ filtered_constraint_subset, tvs | PRIMITIVE_TYPES
580
+ )
539
581
  primitive_constraints = self._generate_primitive_constraints(tvs, base_constraint_graph)
540
582
  tvs_with_primitive_constraints = set()
541
583
  for primitive_constraint in primitive_constraints:
@@ -546,12 +588,22 @@ class SimpleSolver:
546
588
  solutions = {}
547
589
  self.determine(sketches, tvs_with_primitive_constraints, solutions)
548
590
  _l.debug("Determined solutions for %d type variable(s).", len(tvs_with_primitive_constraints))
591
+
592
+ leaf_solutions = 0
593
+ for tv_, ub_tv in ub_subtypes.items():
594
+ if ub_tv in solutions:
595
+ solutions[tv_] = solutions[ub_tv]
596
+ leaf_solutions += 1
597
+ elif isinstance(ub_tv, TypeConstant):
598
+ solutions[tv_] = ub_tv
599
+ leaf_solutions += 1
600
+ _l.debug("Determined solutions for %d leaf type variable(s).", leaf_solutions)
601
+
549
602
  if not solutions:
550
603
  break
551
-
552
604
  self.solution |= solutions
553
605
 
554
- tvs = {tv for tv in tvs if tv not in tvs_with_primitive_constraints}
606
+ tvs = {tv for tv in tvs if tv not in solutions}
555
607
  if not tvs:
556
608
  break
557
609
  # rewrite existing constraints
@@ -559,7 +611,7 @@ class SimpleSolver:
559
611
  for constraint in constraint_subset:
560
612
  rewritten = self._rewrite_constraint(constraint, solutions)
561
613
  new_constraint_subset.add(rewritten)
562
- constraint_subset = new_constraint_subset
614
+ constraint_subset = self._filter_constraints(new_constraint_subset)
563
615
 
564
616
  # set the solution for missing type vars to TOP
565
617
  self.determine(sketches, set(sketches).difference(set(self.solution)), self.solution)
@@ -775,14 +827,45 @@ class SimpleSolver:
775
827
  new_constraints.add(Equivalence(constraint.type_1, constraint.type_r))
776
828
  return new_constraints
777
829
 
778
- def _handle_equivalence(self, typevar: TypeVariable):
830
+ @staticmethod
831
+ def _discover_equivalence(constraints: set[TypeConstraint]) -> set[Equivalence]:
832
+ """
833
+ a <:b && b <: a ==> a == b
834
+ """
835
+
836
+ new_eq_constraints: set[Equivalence] = set()
837
+ subtypes = defaultdict(set)
838
+ for constraint in constraints:
839
+ if isinstance(constraint, Subtype):
840
+ sub_type = constraint.sub_type
841
+ super_type = constraint.super_type
842
+ subtypes[sub_type].add(super_type)
843
+
844
+ # check everything
845
+ seen = set()
846
+ for tv, tv_supers in subtypes.items():
847
+ for tv_super in tv_supers:
848
+ if tv_super in subtypes and tv in subtypes[tv_super]: # noqa: SIM102
849
+ # we have a pair of subtypes that are equivalent
850
+ if (tv, tv_super) not in seen and (tv_super, tv) not in seen:
851
+ new_eq_constraints.add(Equivalence(tv, tv_super))
852
+ seen.add((tv, tv_super))
853
+
854
+ _l.debug(
855
+ "Discovered %d equivalence constraints from %d constraints.", len(new_eq_constraints), len(constraints)
856
+ )
857
+ return new_eq_constraints
858
+
859
+ @staticmethod
860
+ def _handle_equivalence(
861
+ constraint_set: set[TypeConstraint],
862
+ ) -> tuple[set[TypeConstraint], dict[TypeVariable, TypeVariable | TypeConstant]]:
779
863
  graph = networkx.Graph()
780
864
 
781
- replacements = {}
782
- constraints = set()
865
+ replacements: dict[TypeVariable, TypeVariable | TypeConstant] = {}
783
866
 
784
867
  # collect equivalence relations
785
- for constraint in self._constraints[typevar]:
868
+ for constraint in constraint_set:
786
869
  if isinstance(constraint, Equivalence):
787
870
  # | type_a == type_b
788
871
  # we apply unification and removes one of them
@@ -803,15 +886,30 @@ class SimpleSolver:
803
886
  for tv in components_lst[1:]:
804
887
  replacements[tv] = representative
805
888
 
806
- # replace
807
- for constraint in self._constraints[typevar]:
889
+ constraints = SimpleSolver._rewrite_constraints_with_replacements(constraint_set, replacements)
890
+
891
+ # import pprint
892
+ # print("Replacements")
893
+ # pprint.pprint(replacements)
894
+ # print("Constraints (after replacement)")
895
+ # pprint.pprint(constraints)
896
+
897
+ return constraints, replacements
898
+
899
+ @staticmethod
900
+ def _rewrite_constraints_with_replacements(
901
+ constraints: set[TypeConstraint], replacements: dict[TypeVariable, TypeVariable]
902
+ ) -> set[TypeConstraint]:
903
+ # replace constraints according to a dictionary of type variable replacements
904
+ replaced_constraints = set()
905
+ for constraint in constraints:
808
906
  if isinstance(constraint, Existence):
809
907
  replaced, new_constraint = constraint.replace(replacements)
810
908
 
811
909
  if replaced:
812
- constraints.add(new_constraint)
910
+ replaced_constraints.add(new_constraint)
813
911
  else:
814
- constraints.add(constraint)
912
+ replaced_constraints.add(constraint)
815
913
 
816
914
  elif isinstance(constraint, Subtype):
817
915
  # subtype <: supertype
@@ -819,18 +917,122 @@ class SimpleSolver:
819
917
  replaced, new_constraint = constraint.replace(replacements)
820
918
 
821
919
  if replaced:
822
- constraints.add(new_constraint)
920
+ replaced_constraints.add(new_constraint)
823
921
  else:
824
- constraints.add(constraint)
922
+ replaced_constraints.add(constraint)
923
+ return replaced_constraints
825
924
 
826
- # import pprint
827
- # print("Replacements")
828
- # pprint.pprint(replacements)
829
- # print("Constraints (after replacement)")
830
- # pprint.pprint(constraints)
925
+ @staticmethod
926
+ def _filter_constraints(constraints: set[TypeConstraint]) -> set[TypeConstraint]:
927
+ """
928
+ Filter out constraints that we don't yet support.
929
+ """
930
+
931
+ filtered_constraints = set()
932
+ for constraint in constraints:
933
+ dropped = False
934
+ if isinstance(constraint, Subtype) and (
935
+ (isinstance(constraint.sub_type, TypeConstant) and isinstance(constraint.super_type, TypeConstant))
936
+ or (
937
+ isinstance(constraint.sub_type, DerivedTypeVariable)
938
+ and isinstance(constraint.sub_type.labels[-1], ConvertTo)
939
+ )
940
+ or (
941
+ isinstance(constraint.sub_type, TypeVariable)
942
+ and isinstance(constraint.super_type, TypeVariable)
943
+ and constraint.sub_type == constraint.super_type
944
+ )
945
+ ):
946
+ dropped = True
831
947
 
832
- self._equivalence = replacements
833
- return constraints
948
+ if not dropped:
949
+ filtered_constraints.add(constraint)
950
+
951
+ return filtered_constraints
952
+
953
+ @staticmethod
954
+ def _filter_leaf_typevars(
955
+ constraints: set[TypeConstraint], tvs_to_solve: set[TypeVariable]
956
+ ) -> tuple[set[TypeConstraint], dict[TypeVariable, TypeVariable]]:
957
+ """
958
+ Filter out leaf type variables that only appear once in the constraints. These type variables are not
959
+ interesting and can be removed from the constraints.
960
+ """
961
+
962
+ sub_typevars = defaultdict(set)
963
+ tv_to_dtvs: dict[TypeVariable, set[TypeVariable | DerivedTypeVariable]] = defaultdict(set)
964
+ for constraint in constraints:
965
+ if isinstance(constraint, Subtype):
966
+ if isinstance(constraint.sub_type, TypeVariable):
967
+ sub_typevars[constraint.sub_type].add(constraint.super_type)
968
+ for tv in [constraint.sub_type, constraint.super_type]:
969
+ if isinstance(tv, DerivedTypeVariable):
970
+ tv_to_dtvs[tv.type_var].add(constraint.sub_type)
971
+ elif isinstance(tv, TypeVariable):
972
+ tv_to_dtvs[tv].add(constraint.sub_type)
973
+
974
+ ub_subtypes: dict[TypeVariable, TypeVariable] = {}
975
+ for tv, dtvs in tv_to_dtvs.items():
976
+ if len(dtvs) == 1 and tv in sub_typevars and len(sub_typevars[tv]) == 1:
977
+ ub = next(iter(sub_typevars[tv]))
978
+ if ub in tvs_to_solve:
979
+ ub_subtypes[tv] = ub
980
+
981
+ filtered_constraints = set()
982
+ for constraint in constraints:
983
+ if isinstance(constraint, Subtype) and constraint.sub_type in ub_subtypes:
984
+ continue
985
+ filtered_constraints.add(constraint)
986
+
987
+ return filtered_constraints, ub_subtypes
988
+
989
+ def _degrade_constraint_set(self, constraints: set[TypeConstraint]) -> set[TypeConstraint]:
990
+ """
991
+ Degrade the constraint set to a smaller set of constraints to speed up the DFA generation process.
992
+ """
993
+
994
+ tv_with_ls = defaultdict(set) # tv_with_ls are type variables with Loads or Stores
995
+ graph = networkx.Graph()
996
+
997
+ for constraint in constraints:
998
+ if isinstance(constraint, Subtype):
999
+ if isinstance(constraint.sub_type, DerivedTypeVariable) and isinstance(
1000
+ constraint.sub_type.labels[0], (Load, Store)
1001
+ ):
1002
+ tv_with_ls[constraint.sub_type.type_var].add(constraint.sub_type)
1003
+ if type(constraint.sub_type) is TypeVariable and type(constraint.super_type) is TypeVariable:
1004
+ graph.add_edge(constraint.sub_type, constraint.super_type)
1005
+
1006
+ tv_to_degrade = set()
1007
+ for tv, dtvs in tv_with_ls.items():
1008
+ if len(dtvs) > 5:
1009
+ # degrade all subtype relationships involving this type variable to equivalence
1010
+ tv_to_degrade.add(tv)
1011
+
1012
+ replacements = {}
1013
+ for components in networkx.connected_components(graph):
1014
+ if len(components) == 1:
1015
+ continue
1016
+ if any(tv in tv_to_degrade for tv in components):
1017
+ components_lst = sorted(components, key=str)
1018
+ representative = components_lst[0]
1019
+ for tv in components_lst[1:]:
1020
+ replacements[tv] = representative
1021
+
1022
+ degraded_constraints = self._rewrite_constraints_with_replacements(constraints, replacements)
1023
+
1024
+ # discover more equivalence relations
1025
+ eq_constraints = self._discover_equivalence(degraded_constraints)
1026
+ _l.debug("Discovered %d equivalence constraints from degraded constraints.", len(eq_constraints))
1027
+ if eq_constraints:
1028
+ degraded_constraints, eq_replacements = self._handle_equivalence(degraded_constraints | eq_constraints)
1029
+ self._equivalence |= eq_replacements
1030
+
1031
+ # filter them
1032
+ degraded_constraints = self._filter_constraints(degraded_constraints)
1033
+
1034
+ self._equivalence |= replacements
1035
+ return degraded_constraints
834
1036
 
835
1037
  def _convert_arrays(self, constraints):
836
1038
  for constraint in constraints:
@@ -860,7 +1062,7 @@ class SimpleSolver:
860
1062
  @staticmethod
861
1063
  def _generate_constraint_subset(
862
1064
  constraints: set[TypeConstraint], typevars: set[TypeVariable]
863
- ) -> set[TypeConstraint]:
1065
+ ) -> tuple[set[TypeConstraint], set[TypeVariable]]:
864
1066
  subset = set()
865
1067
  related_typevars = set(typevars)
866
1068
  while True:
@@ -890,7 +1092,7 @@ class SimpleSolver:
890
1092
  if not new:
891
1093
  break
892
1094
  subset |= new
893
- return subset
1095
+ return subset, related_typevars
894
1096
 
895
1097
  def _generate_constraint_graph(
896
1098
  self, constraints: set[TypeConstraint], interesting_variables: set[DerivedTypeVariable]
@@ -40,6 +40,7 @@ class Typehoon(Analysis):
40
40
  must_struct: set[TypeVariable] | None = None,
41
41
  stackvar_max_sizes: dict[TypeVariable, int] | None = None,
42
42
  stack_offset_tvs: dict[int, TypeVariable] | None = None,
43
+ constraint_set_degradation_threshold: int = 150,
43
44
  ):
44
45
  """
45
46
 
@@ -57,6 +58,7 @@ class Typehoon(Analysis):
57
58
  self._must_struct = must_struct
58
59
  self._stackvar_max_sizes = stackvar_max_sizes if stackvar_max_sizes is not None else {}
59
60
  self._stack_offset_tvs = stack_offset_tvs if stack_offset_tvs is not None else {}
61
+ self._constraint_set_degradation_threshold = constraint_set_degradation_threshold
60
62
 
61
63
  self.bits = self.project.arch.bits
62
64
  self.solution = None
@@ -193,7 +195,7 @@ class Typehoon(Analysis):
193
195
  self.simtypes_solution.update(self._ground_truth)
194
196
 
195
197
  @staticmethod
196
- def _resolve_derived(tv):
198
+ def _resolve_derived(tv: TypeVariable | DerivedTypeVariable) -> TypeVariable:
197
199
  return tv.type_var if isinstance(tv, DerivedTypeVariable) else tv
198
200
 
199
201
  def _solve(self):
@@ -211,7 +213,13 @@ class Typehoon(Analysis):
211
213
  if isinstance(constraint.super_type, TypeVariable):
212
214
  typevars.add(self._resolve_derived(constraint.super_type))
213
215
 
214
- solver = SimpleSolver(self.bits, self._constraints, typevars, stackvar_max_sizes=self._stackvar_max_sizes)
216
+ solver = SimpleSolver(
217
+ self.bits,
218
+ self._constraints,
219
+ typevars,
220
+ stackvar_max_sizes=self._stackvar_max_sizes,
221
+ constraint_set_degradation_threshold=self._constraint_set_degradation_threshold,
222
+ )
215
223
  self.solution = solver.solution
216
224
  self.processed_constraints_count = solver.processed_constraints_count
217
225
  self.eqclass_constraints_count = solver.eqclass_constraints_count
@@ -633,10 +633,7 @@ class SimEngineVRAIL(
633
633
  if not r1.data.concrete:
634
634
  # we don't support symbolic shiftamount
635
635
  r = self.state.top(result_size)
636
- return RichR(
637
- r,
638
- typevar=r0.typevar,
639
- )
636
+ return RichR(r)
640
637
 
641
638
  shiftamount = r1.data.concrete_value
642
639
  return RichR(r0.data << shiftamount, typevar=typeconsts.int_type(result_size), type_constraints=None)
@@ -651,10 +648,7 @@ class SimEngineVRAIL(
651
648
  if not r1.data.concrete:
652
649
  # we don't support symbolic shiftamount
653
650
  r = self.state.top(result_size)
654
- return RichR(
655
- r,
656
- typevar=r0.typevar,
657
- )
651
+ return RichR(r)
658
652
 
659
653
  shiftamount = r1.data.concrete_value
660
654
 
@@ -672,10 +666,7 @@ class SimEngineVRAIL(
672
666
  if not r1.data.concrete:
673
667
  # we don't support symbolic shiftamount
674
668
  r = self.state.top(result_size)
675
- return RichR(
676
- r,
677
- typevar=r0.typevar,
678
- )
669
+ return RichR(r)
679
670
 
680
671
  shiftamount = r1.data.concrete_value
681
672
 
@@ -691,10 +682,7 @@ class SimEngineVRAIL(
691
682
  if not r1.data.concrete:
692
683
  # we don't support symbolic shiftamount
693
684
  r = self.state.top(result_size)
694
- return RichR(
695
- r,
696
- typevar=r0.typevar,
697
- )
685
+ return RichR(r)
698
686
 
699
687
  shiftamount = r1.data.concrete_value
700
688
 
@@ -761,22 +749,22 @@ class SimEngineVRAIL(
761
749
  def _handle_binop_Rol(self, expr):
762
750
  arg0, arg1 = expr.operands
763
751
 
764
- r0 = self._expr_bv(arg0)
752
+ _ = self._expr_bv(arg0)
765
753
  _ = self._expr_bv(arg1)
766
754
  result_size = arg0.bits
767
755
 
768
756
  r = self.state.top(result_size)
769
- return RichR(r, typevar=r0.typevar)
757
+ return RichR(r)
770
758
 
771
759
  def _handle_binop_Ror(self, expr):
772
760
  arg0, arg1 = expr.operands
773
761
 
774
- r0 = self._expr_bv(arg0)
762
+ _ = self._expr_bv(arg0)
775
763
  _ = self._expr_bv(arg1)
776
764
  result_size = arg0.bits
777
765
 
778
766
  r = self.state.top(result_size)
779
- return RichR(r, typevar=r0.typevar)
767
+ return RichR(r)
780
768
 
781
769
  def _handle_binop_Concat(self, expr):
782
770
  arg0, arg1 = expr.operands