angr 9.2.159__cp310-abi3-macosx_11_0_arm64.whl → 9.2.161__cp310-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (37) hide show
  1. angr/__init__.py +4 -1
  2. angr/analyses/decompiler/ail_simplifier.py +81 -1
  3. angr/analyses/decompiler/block_simplifier.py +7 -5
  4. angr/analyses/decompiler/clinic.py +5 -1
  5. angr/analyses/decompiler/decompiler.py +12 -9
  6. angr/analyses/decompiler/peephole_optimizations/__init__.py +4 -4
  7. angr/analyses/decompiler/peephole_optimizations/eager_eval.py +53 -0
  8. angr/analyses/decompiler/peephole_optimizations/modulo_simplifier.py +89 -0
  9. angr/analyses/decompiler/peephole_optimizations/{const_mull_a_shift.py → optimized_div_simplifier.py} +139 -25
  10. angr/analyses/decompiler/peephole_optimizations/remove_redundant_bitmasks.py +18 -9
  11. angr/analyses/decompiler/structuring/phoenix.py +19 -32
  12. angr/analyses/s_reaching_definitions/s_rda_model.py +1 -0
  13. angr/analyses/s_reaching_definitions/s_reaching_definitions.py +5 -2
  14. angr/analyses/typehoon/simple_solver.py +231 -29
  15. angr/analyses/typehoon/typehoon.py +10 -2
  16. angr/analyses/variable_recovery/engine_ail.py +8 -20
  17. angr/analyses/variable_recovery/engine_base.py +9 -1
  18. angr/analyses/variable_recovery/variable_recovery_base.py +30 -2
  19. angr/analyses/variable_recovery/variable_recovery_fast.py +11 -2
  20. angr/emulator.py +143 -0
  21. angr/engines/concrete.py +66 -0
  22. angr/engines/icicle.py +66 -30
  23. angr/exploration_techniques/driller_core.py +2 -2
  24. angr/project.py +7 -0
  25. angr/rustylib.abi3.so +0 -0
  26. angr/sim_type.py +16 -8
  27. angr/state_plugins/unicorn_engine.py +4 -4
  28. angr/{lib/angr_native.dylib → unicornlib.dylib} +0 -0
  29. angr/utils/graph.py +20 -1
  30. angr/utils/ssa/__init__.py +3 -3
  31. {angr-9.2.159.dist-info → angr-9.2.161.dist-info}/METADATA +5 -6
  32. {angr-9.2.159.dist-info → angr-9.2.161.dist-info}/RECORD +36 -34
  33. angr/analyses/decompiler/peephole_optimizations/a_sub_a_div_const_mul_const.py +0 -57
  34. {angr-9.2.159.dist-info → angr-9.2.161.dist-info}/WHEEL +0 -0
  35. {angr-9.2.159.dist-info → angr-9.2.161.dist-info}/entry_points.txt +0 -0
  36. {angr-9.2.159.dist-info → angr-9.2.161.dist-info}/licenses/LICENSE +0 -0
  37. {angr-9.2.159.dist-info → angr-9.2.161.dist-info}/top_level.txt +0 -0
angr/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
2
  # pylint: disable=wrong-import-position
3
3
  from __future__ import annotations
4
4
 
5
- __version__ = "9.2.159"
5
+ __version__ = "9.2.161"
6
6
 
7
7
  if bytes is str:
8
8
  raise Exception(
@@ -192,6 +192,7 @@ from . import concretization_strategies
192
192
  from .distributed import Server
193
193
  from .knowledge_base import KnowledgeBase
194
194
  from .procedures.definitions import load_external_definitions
195
+ from .emulator import Emulator, EmulatorStopReason
195
196
 
196
197
  # for compatibility reasons
197
198
  from . import sim_manager as manager
@@ -259,6 +260,8 @@ __all__ = (
259
260
  "AngrVaultError",
260
261
  "Blade",
261
262
  "Block",
263
+ "Emulator",
264
+ "EmulatorStopReason",
262
265
  "ExplorationTechnique",
263
266
  "KnowledgeBase",
264
267
  "PTChunk",
@@ -226,6 +226,15 @@ class AILSimplifier(Analysis):
226
226
  # reaching definition analysis results are no longer reliable
227
227
  self._clear_cache()
228
228
 
229
+ _l.debug("Rewriting constant expressions with phi variables")
230
+ phi_const_rewritten = self._rewrite_phi_const_exprs()
231
+ self.simplified |= phi_const_rewritten
232
+ if phi_const_rewritten:
233
+ _l.debug("... constant expressions with phi variables rewritten")
234
+ self._rebuild_func_graph()
235
+ # reaching definition analysis results are no longer reliable
236
+ self._clear_cache()
237
+
229
238
  if self._only_consts:
230
239
  return
231
240
 
@@ -698,6 +707,11 @@ class AILSimplifier(Analysis):
698
707
  if not replacements_by_block_addrs_and_idx:
699
708
  return False
700
709
 
710
+ return self._replace_exprs_in_blocks(replacements_by_block_addrs_and_idx)
711
+
712
+ def _replace_exprs_in_blocks(
713
+ self, replacements: dict[tuple[int, int | None], dict[CodeLocation, dict[Expression, Expression]]]
714
+ ) -> bool:
701
715
  blocks_by_addr_and_idx = {(node.addr, node.idx): node for node in self.func_graph.nodes()}
702
716
 
703
717
  if self._stack_arg_offsets:
@@ -706,7 +720,7 @@ class AILSimplifier(Analysis):
706
720
  insn_addrs_using_stack_args = None
707
721
 
708
722
  replaced = False
709
- for (block_addr, block_idx), reps in replacements_by_block_addrs_and_idx.items():
723
+ for (block_addr, block_idx), reps in replacements.items():
710
724
  block = blocks_by_addr_and_idx[(block_addr, block_idx)]
711
725
 
712
726
  # only replace loads if there are stack arguments in this block
@@ -787,6 +801,72 @@ class AILSimplifier(Analysis):
787
801
 
788
802
  return changed
789
803
 
804
+ #
805
+ # Rewriting constant expressions with phi variables
806
+ #
807
+
808
+ def _rewrite_phi_const_exprs(self) -> bool:
809
+ """
810
+ Rewrite phi variables that are definitely constant expressions to constants.
811
+ """
812
+
813
+ # gather constant assignments
814
+
815
+ vvar_values: dict[int, tuple[int, int]] = {}
816
+ for block in self.func_graph:
817
+ for stmt in block.statements:
818
+ if (
819
+ isinstance(stmt, Assignment)
820
+ and isinstance(stmt.dst, VirtualVariable)
821
+ and isinstance(stmt.src, Const)
822
+ and isinstance(stmt.src.value, int)
823
+ ):
824
+ vvar_values[stmt.dst.varid] = stmt.src.value, stmt.src.bits
825
+
826
+ srda = self._compute_reaching_definitions()
827
+ # compute vvar reachability for phi variables
828
+ # ensure that each phi variable is fully defined, i.e., all its source variables are defined
829
+ g = networkx.Graph()
830
+ for phi_vvar_id, vvar_ids in srda.phivarid_to_varids_with_unknown.items():
831
+ for vvar_id in vvar_ids:
832
+ # we cannot store None to networkx graph, so we use -1 to represent unknown source vvars
833
+ g.add_edge(phi_vvar_id, vvar_id if vvar_id is not None else -1)
834
+
835
+ phi_vvar_ids = srda.phi_vvar_ids
836
+ to_replace = {}
837
+ for cc in networkx.algorithms.connected_components(g):
838
+ if -1 in cc:
839
+ continue
840
+ normal_vvar_ids = cc.difference(phi_vvar_ids)
841
+ # ensure there is at least one phi variable and all remaining vvars are constant non-phi variables
842
+ if len(normal_vvar_ids) < len(cc) and len(normal_vvar_ids.intersection(vvar_values)) == len(
843
+ normal_vvar_ids
844
+ ):
845
+ all_values = {vvar_values[vvar_id] for vvar_id in normal_vvar_ids}
846
+ if len(all_values) == 1:
847
+ # found it!
848
+ value, bits = next(iter(all_values))
849
+ for var_id in cc:
850
+ to_replace[var_id] = value, bits
851
+
852
+ # build the replacement dictionary
853
+ blocks_dict = {(node.addr, node.idx): node for node in self.func_graph.nodes()}
854
+ replacements: dict[tuple[int, int | None], dict[CodeLocation, dict[Expression, Expression]]] = defaultdict(dict)
855
+ for vvar_id, (value, bits) in to_replace.items():
856
+ for expr, use_loc in srda.all_vvar_uses[vvar_id]:
857
+ if expr is None:
858
+ continue
859
+ assert use_loc.block_addr is not None
860
+ key = use_loc.block_addr, use_loc.block_idx
861
+ stmt = blocks_dict[key].statements[use_loc.stmt_idx]
862
+ if is_phi_assignment(stmt):
863
+ continue
864
+ if use_loc not in replacements[key]:
865
+ replacements[key][use_loc] = {}
866
+ replacements[key][use_loc][expr] = Const(None, None, value, bits, **expr.tags)
867
+
868
+ return self._replace_exprs_in_blocks(replacements) if replacements else False
869
+
790
870
  #
791
871
  # Unifying local variables
792
872
  #
@@ -330,18 +330,20 @@ class BlockSimplifier(Analysis):
330
330
  for idx, stmt in enumerate(block.statements):
331
331
  if type(stmt) is Assignment:
332
332
  # tmps can't execute new code
333
- if type(stmt.dst) is Tmp and stmt.dst.tmp_idx not in used_tmps:
334
- continue
333
+ if (type(stmt.dst) is Tmp and stmt.dst.tmp_idx not in used_tmps) or idx in dead_defs_stmt_idx:
334
+ # is it assigning to an unused tmp or a dead virgin?
335
335
 
336
- # is it a dead virgin?
337
- if idx in dead_defs_stmt_idx:
338
336
  # does .src involve any Call expressions? if so, we cannot remove it
339
337
  walker = HasCallExprWalker()
340
338
  walker.walk_expression(stmt.src)
341
339
  if not walker.has_call_expr:
342
340
  continue
343
341
 
344
- if stmt.src == stmt.dst:
342
+ if type(stmt.dst) is Tmp and isinstance(stmt.src, Call):
343
+ # eliminate the assignment and replace it with the call
344
+ stmt = stmt.src
345
+
346
+ if isinstance(stmt, Assignment) and stmt.src == stmt.dst:
345
347
  continue
346
348
 
347
349
  new_statements.append(stmt)
@@ -143,7 +143,8 @@ class Clinic(Analysis):
143
143
  desired_variables: set[str] | None = None,
144
144
  force_loop_single_exit: bool = True,
145
145
  complete_successors: bool = False,
146
- max_type_constraints: int = 4000,
146
+ max_type_constraints: int = 100_000,
147
+ type_constraint_set_degradation_threshold: int = 150,
147
148
  ail_graph: networkx.DiGraph | None = None,
148
149
  arg_vvars: dict[int, tuple[ailment.Expr.VirtualVariable, SimVariable]] | None = None,
149
150
  start_stage: ClinicStage | None = ClinicStage.INITIALIZATION,
@@ -185,6 +186,7 @@ class Clinic(Analysis):
185
186
  self._cache = cache
186
187
  self._mode = mode
187
188
  self._max_type_constraints = max_type_constraints
189
+ self._type_constraint_set_degradation_threshold = type_constraint_set_degradation_threshold
188
190
  self.vvar_id_start = vvar_id_start
189
191
  self.vvar_to_vvar: dict[int, int] | None = None
190
192
  # during SSA conversion, we create secondary stack variables because they overlap and are larger than the
@@ -1814,6 +1816,7 @@ class Clinic(Analysis):
1814
1816
  self.function, # pylint:disable=unused-variable
1815
1817
  fail_fast=self._fail_fast, # type:ignore
1816
1818
  func_graph=ail_graph,
1819
+ entry_node_addr=self.entry_node_addr,
1817
1820
  kb=tmp_kb, # type:ignore
1818
1821
  track_sp=False,
1819
1822
  func_args=arg_list,
@@ -1871,6 +1874,7 @@ class Clinic(Analysis):
1871
1874
  must_struct=must_struct,
1872
1875
  ground_truth=groundtruth,
1873
1876
  stackvar_max_sizes=tv_max_sizes,
1877
+ constraint_set_degradation_threshold=self._type_constraint_set_degradation_threshold,
1874
1878
  )
1875
1879
  # tp.pp_constraints()
1876
1880
  # tp.pp_solution()
@@ -23,7 +23,7 @@ from .ailgraph_walker import AILGraphWalker
23
23
  from .condition_processor import ConditionProcessor
24
24
  from .decompilation_options import DecompilationOption
25
25
  from .decompilation_cache import DecompilationCache
26
- from .utils import remove_labels, remove_edges_in_ailgraph
26
+ from .utils import remove_edges_in_ailgraph
27
27
  from .sequence_walker import SequenceWalker
28
28
  from .structuring.structurer_nodes import SequenceNode
29
29
  from .presets import DECOMPILATION_PRESETS, DecompilationPreset
@@ -319,12 +319,8 @@ class Decompiler(Analysis):
319
319
  # removed!
320
320
  remove_edges_in_ailgraph(clinic.graph, clinic.edges_to_remove)
321
321
 
322
- # Rewrite the graph to remove phi expressions
323
- # this is probably optional if we do not pretty-print clinic.graph
324
- clinic.graph = self._transform_graph_from_ssa(clinic.graph)
325
-
326
322
  # save the graph before structuring happens (for AIL view)
327
- clinic.cc_graph = remove_labels(clinic.copy_graph())
323
+ clinic.cc_graph = clinic.copy_graph()
328
324
 
329
325
  codegen = None
330
326
  seq_node = None
@@ -357,7 +353,7 @@ class Decompiler(Analysis):
357
353
  )
358
354
 
359
355
  # rewrite the sequence node to remove phi expressions
360
- seq_node = self._transform_seqnode_from_ssa(seq_node)
356
+ seq_node = self.transform_seqnode_from_ssa(seq_node)
361
357
 
362
358
  # update memory data
363
359
  if self._cfg is not None and self._update_memory_data:
@@ -670,14 +666,21 @@ class Decompiler(Analysis):
670
666
  memory_data_addrs=added_memory_data_addrs,
671
667
  )
672
668
 
673
- def _transform_graph_from_ssa(self, ail_graph: networkx.DiGraph) -> networkx.DiGraph:
669
+ def transform_graph_from_ssa(self, ail_graph: networkx.DiGraph) -> networkx.DiGraph:
670
+ """
671
+ Translate an SSA AIL graph out of SSA form. This is useful for producing a non-SSA AIL graph for displaying in
672
+ angr management.
673
+
674
+ :param ail_graph: The AIL graph to transform out of SSA form.
675
+ :return: The translated AIL graph.
676
+ """
674
677
  variable_kb = self._variable_kb
675
678
  dephication = self.project.analyses.GraphDephication(
676
679
  self.func, ail_graph, rewrite=True, variable_kb=variable_kb, kb=self.kb, fail_fast=self._fail_fast
677
680
  )
678
681
  return dephication.output
679
682
 
680
- def _transform_seqnode_from_ssa(self, seq_node: SequenceNode) -> SequenceNode:
683
+ def transform_seqnode_from_ssa(self, seq_node: SequenceNode) -> SequenceNode:
681
684
  variable_kb = self._variable_kb
682
685
  dephication = self.project.analyses.SeqNodeDephication(
683
686
  self.func, seq_node, rewrite=True, variable_kb=variable_kb, kb=self.kb, fail_fast=self._fail_fast
@@ -4,14 +4,14 @@ from .a_div_const_add_a_mul_n_div_const import ADivConstAddAMulNDivConst
4
4
  from .a_mul_const_div_shr_const import AMulConstDivShrConst
5
5
  from .a_shl_const_sub_a import AShlConstSubA
6
6
  from .a_sub_a_div import ASubADiv
7
- from .a_sub_a_div_const_mul_const import ASubADivConstMulConst
7
+ from .modulo_simplifier import ModuloSimplifier
8
8
  from .a_sub_a_shr_const_shr_const import ASubAShrConstShrConst
9
9
  from .arm_cmpf import ARMCmpF
10
10
  from .bswap import Bswap
11
11
  from .cas_intrinsics import CASIntrinsics
12
12
  from .coalesce_same_cascading_ifs import CoalesceSameCascadingIfs
13
13
  from .constant_derefs import ConstantDereferences
14
- from .const_mull_a_shift import ConstMullAShift
14
+ from .optimized_div_simplifier import OptimizedDivisionSimplifier
15
15
  from .extended_byte_and_mask import ExtendedByteAndMask
16
16
  from .remove_empty_if_body import RemoveEmptyIfBody
17
17
  from .remove_redundant_ite_branch import RemoveRedundantITEBranches
@@ -61,14 +61,14 @@ ALL_PEEPHOLE_OPTS: list[type[PeepholeOptimizationExprBase]] = [
61
61
  AShlConstSubA,
62
62
  AMulConstSubA,
63
63
  ASubADiv,
64
- ASubADivConstMulConst,
64
+ ModuloSimplifier,
65
65
  ASubAShrConstShrConst,
66
66
  ARMCmpF,
67
67
  Bswap,
68
68
  CASIntrinsics,
69
69
  CoalesceSameCascadingIfs,
70
70
  ConstantDereferences,
71
- ConstMullAShift,
71
+ OptimizedDivisionSimplifier,
72
72
  ExtendedByteAndMask,
73
73
  RemoveEmptyIfBody,
74
74
  RemoveRedundantITEBranches,
@@ -170,6 +170,10 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
170
170
  if isinstance(expr.operands[0], Const) and expr.operands[0].value == 0:
171
171
  return UnaryOp(expr.idx, "Neg", expr.operands[1], **expr.tags)
172
172
 
173
+ r = EagerEvaluation._combine_like_terms(expr)
174
+ if r is not None:
175
+ return r
176
+
173
177
  if isinstance(expr.operands[0], StackBaseOffset) and isinstance(expr.operands[1], StackBaseOffset):
174
178
  assert isinstance(expr.operands[0].offset, int) and isinstance(expr.operands[1].offset, int)
175
179
  return Const(expr.idx, None, expr.operands[0].offset - expr.operands[1].offset, expr.bits, **expr.tags)
@@ -354,6 +358,55 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
354
358
 
355
359
  return None
356
360
 
361
+ @staticmethod
362
+ def _combine_like_terms(expr: BinaryOp) -> BinaryOp | None:
363
+ """
364
+ Combine like terms for binary operations.
365
+ """
366
+
367
+ op = expr.op
368
+ assert op in {"Add", "Sub"}
369
+
370
+ expr0, expr1 = expr.operands
371
+
372
+ conv = None
373
+ if isinstance(expr0, Convert) and expr0.from_bits < expr0.to_bits:
374
+ conv = expr0.from_bits, expr0.to_bits, expr0.is_signed
375
+ expr0 = expr0.operand
376
+
377
+ if isinstance(expr0, BinaryOp) and expr0.op == "Mul" and isinstance(expr0.operands[1], Const):
378
+ n = expr0.operands[0]
379
+
380
+ if isinstance(n, Convert) and n.from_bits > n.to_bits:
381
+ if conv is not None and (n.to_bits, n.from_bits, n.is_signed) != conv:
382
+ return None
383
+ n = n.operand
384
+
385
+ if n.likes(expr1):
386
+ # (n * C) - n ==> (C - 1) * n
387
+ coeff_0 = expr0.operands[1]
388
+ coeff = Const(coeff_0.idx, None, coeff_0.value - 1, expr.bits, **coeff_0.tags)
389
+ return BinaryOp(
390
+ expr.idx, "Mul", [n, coeff], expr.signed, variable=expr.variable, bits=expr.bits, **expr.tags
391
+ )
392
+ if isinstance(expr1, BinaryOp) and expr1.op == "Mul" and isinstance(expr.operands[1].operands[1], Const):
393
+ n1 = expr.operands[1].operands[0]
394
+ if n.likes(n1):
395
+ # (n * C) - (n1 * C1) ==> n * (C - C1)
396
+ coeff_0 = expr0.operands[1]
397
+ coeff_1 = expr1.operands[1]
398
+ coeff = Const(coeff_0.idx, None, coeff_0.value - coeff_1.value, expr.bits, **coeff_0.tags)
399
+ return BinaryOp(
400
+ expr.idx,
401
+ "Mul",
402
+ [n, coeff],
403
+ expr.signed,
404
+ variable=expr.variable,
405
+ bits=expr.bits,
406
+ **expr.tags,
407
+ )
408
+ return None
409
+
357
410
  @staticmethod
358
411
  def _optimize_unaryop(expr: UnaryOp):
359
412
  if expr.op == "Neg" and isinstance(expr.operand, Const) and isinstance(expr.operand.value, int):
@@ -0,0 +1,89 @@
1
+ # pylint:disable=too-many-boolean-expressions
2
+ from __future__ import annotations
3
+ from angr.ailment.expression import BinaryOp, Const, Convert
4
+
5
+ from .base import PeepholeOptimizationExprBase
6
+
7
+
8
+ class ModuloSimplifier(PeepholeOptimizationExprBase):
9
+ """
10
+ Simplify division and multiplication expressions that can be reduced to a modulo operation.
11
+ """
12
+
13
+ __slots__ = ()
14
+
15
+ NAME = "a - (a / N) * N => a % N"
16
+ expr_classes = (BinaryOp,)
17
+
18
+ def optimize( # pylint:disable=unused-argument
19
+ self, expr: BinaryOp, stmt_idx: int | None = None, block=None, **kwargs
20
+ ):
21
+ if expr.op == "Sub" and len(expr.operands) == 2:
22
+ sub0, sub1 = expr.operands
23
+ # unpack Conversions
24
+ outer_conv_expr = None
25
+ if (
26
+ isinstance(sub0, Convert)
27
+ and isinstance(sub1, Convert)
28
+ and sub0.to_bits == sub1.to_bits
29
+ and sub0.from_bits == sub1.from_bits
30
+ and sub0.to_bits > sub0.from_bits
31
+ and sub0.is_signed == sub1.is_signed
32
+ ):
33
+ # Convert(a) - Convert(a / N * N) ==> Convert(a % N)
34
+ outer_conv_expr = sub0
35
+ sub0 = sub0.operand
36
+ sub1 = sub1.operand
37
+
38
+ if isinstance(sub1, BinaryOp) and sub1.op == "Mul" and isinstance(sub1.operands[1], Const):
39
+ a0, op1 = sub0, sub1
40
+ op1_left = op1.operands[0]
41
+ mul_const = sub1.operands[1]
42
+
43
+ if (
44
+ isinstance(op1_left, Convert)
45
+ and isinstance(a0, Convert)
46
+ and op1_left.to_bits == a0.to_bits
47
+ and op1_left.from_bits == a0.from_bits
48
+ ):
49
+ # Convert(a) - (Convert(a / N)) * N ==> Convert(a) % N
50
+ inner_conv_expr = a0
51
+ a0 = a0.operand
52
+ op1_left = op1_left.operand
53
+ else:
54
+ inner_conv_expr = None
55
+
56
+ if isinstance(op1_left, BinaryOp) and op1_left.op == "Div" and isinstance(op1_left.operands[1], Const):
57
+ # a - (a / N) * N ==> a % N
58
+ a1 = op1_left.operands[0]
59
+ div_const = op1_left.operands[1]
60
+
61
+ if a0.likes(a1) and mul_const.value == div_const.value:
62
+ operands = [a0, div_const]
63
+ mod = BinaryOp(expr.idx, "Mod", operands, False, bits=a0.bits, **expr.tags)
64
+ if inner_conv_expr is not None:
65
+ conv_from_bits = inner_conv_expr.from_bits
66
+ conv_to_bits = (
67
+ inner_conv_expr.to_bits if outer_conv_expr is None else outer_conv_expr.to_bits
68
+ )
69
+ conv_signed = inner_conv_expr.is_signed
70
+ conv_expr = inner_conv_expr
71
+ elif outer_conv_expr is not None:
72
+ conv_from_bits = outer_conv_expr.from_bits
73
+ conv_to_bits = outer_conv_expr.to_bits
74
+ conv_signed = outer_conv_expr.is_signed
75
+ conv_expr = outer_conv_expr
76
+ else:
77
+ # no conversion necessary
78
+ return mod
79
+
80
+ return Convert(
81
+ conv_expr.idx,
82
+ conv_from_bits,
83
+ conv_to_bits,
84
+ conv_signed,
85
+ mod,
86
+ **conv_expr.tags,
87
+ )
88
+
89
+ return None
@@ -1,22 +1,25 @@
1
1
  # pylint:disable=too-many-boolean-expressions
2
2
  from __future__ import annotations
3
+ import math
3
4
 
4
5
  from angr.ailment.expression import Convert, BinaryOp, Const, Expression
5
6
 
6
7
  from .base import PeepholeOptimizationExprBase
7
8
 
8
9
 
9
- class ConstMullAShift(PeepholeOptimizationExprBase):
10
+ class OptimizedDivisionSimplifier(PeepholeOptimizationExprBase):
10
11
  """
11
12
  Convert expressions with right shifts into expressions with divisions.
12
13
  """
13
14
 
14
15
  __slots__ = ()
15
16
 
16
- NAME = "Conv(64->32, (N * a) >> M) => a / N1"
17
+ NAME = "Simplify optimized division expressions, e.g., (N * a) >> M => a / N1"
17
18
  expr_classes = (Convert, BinaryOp)
18
19
 
19
- def optimize(self, expr: Convert | BinaryOp, **kwargs):
20
+ def optimize( # pylint:disable=unused-argument
21
+ self, expr: Convert | BinaryOp, stmt_idx: int | None = None, block=None, **kwargs
22
+ ):
20
23
  r = None
21
24
 
22
25
  if isinstance(expr, Convert):
@@ -37,28 +40,16 @@ class ConstMullAShift(PeepholeOptimizationExprBase):
37
40
  # try to unify if both operands are wrapped with Convert()
38
41
  conv_expr = self._unify_conversion(original_expr)
39
42
  expr = original_expr if conv_expr is None else conv_expr.operand
43
+ assert isinstance(expr, BinaryOp)
40
44
 
41
45
  if expr.op == "Shr" and isinstance(expr.operands[1], Const):
42
- # (N * a) >> M ==> a / N1
43
- inner = expr.operands[0]
44
- if isinstance(inner, BinaryOp) and inner.op in {"Mull", "Mul"} and not inner.signed:
45
- if isinstance(inner.operands[0], Const) and not isinstance(inner.operands[1], Const):
46
- C = inner.operands[0].value
47
- X = inner.operands[1]
48
- elif isinstance(inner.operands[1], Const) and not isinstance(inner.operands[0], Const):
49
- C = inner.operands[1].value
50
- X = inner.operands[0]
51
- else:
52
- C = X = None
53
-
54
- if C is not None and X is not None:
55
- V = expr.operands[1].value
56
- ndigits = 5 if V == 32 else 6
57
- divisor = self._check_divisor(pow(2, V), C, ndigits)
58
- if divisor is not None:
59
- new_const = Const(None, None, divisor, X.bits)
60
- r = BinaryOp(inner.idx, "Div", [X, new_const], inner.signed, **inner.tags)
61
- return self._reconvert(r, conv_expr) if conv_expr is not None else r
46
+ r = self._match_case_b(expr)
47
+ if r is not None:
48
+ return self._reconvert(r, conv_expr) if conv_expr is not None else r
49
+ assert isinstance(expr.operands[1].value, int)
50
+ r = self._match_case_c(expr.operands[0], expr.operands[1].value)
51
+ if r is not None:
52
+ return self._reconvert(r, conv_expr) if conv_expr is not None else r
62
53
 
63
54
  elif expr.op in {"Add", "Sub"}:
64
55
  expr0, expr1 = expr.operands
@@ -119,13 +110,13 @@ class ConstMullAShift(PeepholeOptimizationExprBase):
119
110
 
120
111
  return None
121
112
 
122
- def _match_case_a(self, expr0: Expression, expr1_op: Convert) -> BinaryOp | None:
113
+ def _match_case_a(self, expr0: Expression, expr1: Convert) -> BinaryOp | None:
123
114
  # (
124
115
  # (((Conv(32->64, vvar_44{reg 32}) * 0x4325c53f<64>) >>a 0x24<8>) & 0xffffffff<64>) -
125
116
  # Conv(32->s64, (vvar_44{reg 32} >>a 0x1f<8>))
126
117
  # )
127
118
 
128
- expr1_op = expr1_op.operand
119
+ expr1_op = expr1.operand
129
120
 
130
121
  if (
131
122
  isinstance(expr0, BinaryOp)
@@ -187,6 +178,129 @@ class ConstMullAShift(PeepholeOptimizationExprBase):
187
178
 
188
179
  return None
189
180
 
181
+ @staticmethod
182
+ def _match_case_b(expr: BinaryOp) -> BinaryOp | Convert | None:
183
+ """
184
+ A more complex (but general) case for unsigned 32-bit division by a constant integer.
185
+
186
+ Ref: https://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html
187
+
188
+ Given n and d, n//d (unsigned) can be rewritten to t >> (p - 1) where
189
+ - p = ceiling(log2(d))
190
+ - m = ceiling((2 ** (32 + p) / d))
191
+ - q = (m * n) >> 32
192
+ - t = q + ((n - q) >> 1)
193
+
194
+ We can match the expression against t >> (p - 1).
195
+ """
196
+
197
+ # t >> (p - 1)
198
+ if not (isinstance(expr, BinaryOp) and expr.op == "Shr"):
199
+ return None
200
+ if not (isinstance(expr.operands[1], Const) and expr.operands[1].value > 0):
201
+ return None
202
+ p_minus_1 = expr.operands[1].value
203
+ p = p_minus_1 + 1
204
+ t = expr.operands[0]
205
+
206
+ # unmask
207
+ if isinstance(t, BinaryOp) and t.op == "And":
208
+ if isinstance(t.operands[1], Const) and t.operands[1].value == 0xFFFFFFFF:
209
+ t = t.operands[0]
210
+ elif isinstance(t.operands[0], Const) and t.operands[0].value == 0xFFFFFFFF:
211
+ t = t.operands[1]
212
+ else:
213
+ return None
214
+
215
+ # t = q + ((n - q) >> 1)
216
+ if not (isinstance(t, BinaryOp) and t.op == "Add"):
217
+ return None
218
+
219
+ if (
220
+ isinstance(t.operands[0], BinaryOp)
221
+ and t.operands[0].op == "Shr"
222
+ and isinstance(t.operands[0].operands[1], Const)
223
+ and t.operands[0].operands[1].value == 1
224
+ ):
225
+ q = t.operands[1]
226
+ n_minus_q = t.operands[0].operands[0]
227
+ elif (
228
+ isinstance(t.operands[1], BinaryOp)
229
+ and t.operands[1].op == "Shr"
230
+ and isinstance(t.operands[1].operands[1], Const)
231
+ and t.operands[1].operands[1].value == 1
232
+ ):
233
+ q = t.operands[0]
234
+ n_minus_q = t.operands[1]
235
+ else:
236
+ return None
237
+ if isinstance(q, Convert) and q.from_bits == 64 and q.to_bits == 32:
238
+ q = q.operand
239
+ if isinstance(n_minus_q, Convert) and n_minus_q.from_bits == 64 and n_minus_q.to_bits == 32:
240
+ n_minus_q = n_minus_q.operand
241
+
242
+ # unmask
243
+ if isinstance(n_minus_q, BinaryOp) and n_minus_q.op == "And":
244
+ if isinstance(n_minus_q.operands[1], Const) and n_minus_q.operands[1].value == 0xFFFFFFFF:
245
+ n_minus_q = n_minus_q.operands[0]
246
+ elif isinstance(n_minus_q.operands[0], Const) and n_minus_q.operands[0].value == 0xFFFFFFFF:
247
+ n_minus_q = n_minus_q.operands[1]
248
+ else:
249
+ return None
250
+
251
+ if not (isinstance(n_minus_q, BinaryOp) and n_minus_q.op == "Sub"):
252
+ return None
253
+ if not q.likes(n_minus_q.operands[1]):
254
+ return None
255
+
256
+ # q = (m * n) >> 32
257
+ if not (
258
+ isinstance(q, BinaryOp) and q.op == "Shr" and isinstance(q.operands[1], Const) and q.operands[1].value == 32
259
+ ):
260
+ return None
261
+ if not (isinstance(q.operands[0], BinaryOp) and q.operands[0].op in {"Mull", "Mul"}):
262
+ return None
263
+ if isinstance(q.operands[0].operands[1], Const):
264
+ n = q.operands[0].operands[0]
265
+ m = q.operands[0].operands[1].value
266
+ elif isinstance(q.operands[0].operands[0], Const):
267
+ n = q.operands[0].operands[1]
268
+ m = q.operands[0].operands[0].value
269
+ else:
270
+ # this should never happen, because multiplication of two constants are eagerly evaluated
271
+ return None
272
+
273
+ assert isinstance(m, int) and isinstance(p, int)
274
+ divisor = math.ceil((2 ** (32 + p)) / (m + 0x1_0000_0000))
275
+ if divisor == 0:
276
+ return None
277
+ divisor_expr = Const(None, None, divisor, n.bits)
278
+ div = BinaryOp(expr.idx, "Div", [n, divisor_expr], signed=False, **expr.tags)
279
+ if expr.bits != div.bits:
280
+ div = Convert(expr.idx, div.bits, expr.bits, False, div, **expr.tags)
281
+ return div
282
+
283
+ def _match_case_c(self, inner, m: int) -> BinaryOp | None:
284
+ # (N * a) >> M ==> a / N1
285
+ if isinstance(inner, BinaryOp) and inner.op in {"Mull", "Mul"} and not inner.signed:
286
+ if isinstance(inner.operands[0], Const) and not isinstance(inner.operands[1], Const):
287
+ C = inner.operands[0].value
288
+ X = inner.operands[1]
289
+ elif isinstance(inner.operands[1], Const) and not isinstance(inner.operands[0], Const):
290
+ C = inner.operands[1].value
291
+ X = inner.operands[0]
292
+ else:
293
+ C = X = None
294
+
295
+ if C is not None and X is not None:
296
+ V = m
297
+ ndigits = 5 if V == 32 else 6
298
+ divisor = self._check_divisor(pow(2, V), C, ndigits)
299
+ if divisor is not None:
300
+ new_const = Const(None, None, divisor, X.bits)
301
+ return BinaryOp(inner.idx, "Div", [X, new_const], inner.signed, **inner.tags)
302
+ return None
303
+
190
304
  @staticmethod
191
305
  def _check_divisor(a, b, ndigits=6):
192
306
  if b == 0: