angr 9.2.160__cp310-abi3-macosx_10_9_x86_64.whl → 9.2.162__cp310-abi3-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (58) hide show
  1. angr/__init__.py +4 -1
  2. angr/analyses/analysis.py +0 -1
  3. angr/analyses/cfg/cfg_base.py +5 -1
  4. angr/analyses/decompiler/ail_simplifier.py +101 -2
  5. angr/analyses/decompiler/block_simplifier.py +13 -8
  6. angr/analyses/decompiler/clinic.py +1 -0
  7. angr/analyses/decompiler/condition_processor.py +24 -0
  8. angr/analyses/decompiler/counters/call_counter.py +11 -1
  9. angr/analyses/decompiler/decompiler.py +3 -1
  10. angr/analyses/decompiler/graph_region.py +11 -2
  11. angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +1 -1
  12. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +1 -0
  13. angr/analyses/decompiler/optimization_passes/optimization_pass.py +31 -11
  14. angr/analyses/decompiler/optimization_passes/return_duplicator_low.py +2 -0
  15. angr/analyses/decompiler/peephole_optimizations/__init__.py +4 -4
  16. angr/analyses/decompiler/peephole_optimizations/eager_eval.py +53 -0
  17. angr/analyses/decompiler/peephole_optimizations/modulo_simplifier.py +89 -0
  18. angr/analyses/decompiler/peephole_optimizations/{const_mull_a_shift.py → optimized_div_simplifier.py} +139 -25
  19. angr/analyses/decompiler/peephole_optimizations/remove_redundant_bitmasks.py +18 -9
  20. angr/analyses/decompiler/region_simplifiers/goto.py +3 -3
  21. angr/analyses/decompiler/region_simplifiers/if_.py +2 -2
  22. angr/analyses/decompiler/region_simplifiers/loop.py +2 -2
  23. angr/analyses/decompiler/structured_codegen/c.py +3 -3
  24. angr/analyses/decompiler/structuring/dream.py +1 -1
  25. angr/analyses/decompiler/structuring/phoenix.py +138 -99
  26. angr/analyses/decompiler/structuring/recursive_structurer.py +3 -2
  27. angr/analyses/decompiler/structuring/sailr.py +51 -43
  28. angr/analyses/decompiler/structuring/structurer_base.py +2 -3
  29. angr/analyses/deobfuscator/string_obf_opt_passes.py +1 -1
  30. angr/analyses/disassembly.py +1 -1
  31. angr/analyses/reaching_definitions/function_handler.py +1 -0
  32. angr/analyses/s_propagator.py +2 -2
  33. angr/analyses/s_reaching_definitions/s_rda_model.py +1 -0
  34. angr/analyses/s_reaching_definitions/s_reaching_definitions.py +5 -2
  35. angr/analyses/variable_recovery/engine_base.py +17 -1
  36. angr/analyses/variable_recovery/variable_recovery_base.py +30 -2
  37. angr/analyses/variable_recovery/variable_recovery_fast.py +11 -2
  38. angr/emulator.py +143 -0
  39. angr/engines/concrete.py +66 -0
  40. angr/engines/icicle.py +66 -30
  41. angr/exploration_techniques/driller_core.py +2 -2
  42. angr/knowledge_plugins/functions/function.py +1 -1
  43. angr/knowledge_plugins/functions/function_manager.py +1 -2
  44. angr/project.py +7 -0
  45. angr/rustylib.abi3.so +0 -0
  46. angr/sim_type.py +16 -8
  47. angr/simos/javavm.py +1 -1
  48. angr/unicornlib.dylib +0 -0
  49. angr/utils/graph.py +48 -13
  50. angr/utils/library.py +13 -12
  51. angr/utils/ssa/__init__.py +57 -5
  52. {angr-9.2.160.dist-info → angr-9.2.162.dist-info}/METADATA +5 -5
  53. {angr-9.2.160.dist-info → angr-9.2.162.dist-info}/RECORD +57 -55
  54. angr/analyses/decompiler/peephole_optimizations/a_sub_a_div_const_mul_const.py +0 -57
  55. {angr-9.2.160.dist-info → angr-9.2.162.dist-info}/WHEEL +0 -0
  56. {angr-9.2.160.dist-info → angr-9.2.162.dist-info}/entry_points.txt +0 -0
  57. {angr-9.2.160.dist-info → angr-9.2.162.dist-info}/licenses/LICENSE +0 -0
  58. {angr-9.2.160.dist-info → angr-9.2.162.dist-info}/top_level.txt +0 -0
angr/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
2
  # pylint: disable=wrong-import-position
3
3
  from __future__ import annotations
4
4
 
5
- __version__ = "9.2.160"
5
+ __version__ = "9.2.162"
6
6
 
7
7
  if bytes is str:
8
8
  raise Exception(
@@ -192,6 +192,7 @@ from . import concretization_strategies
192
192
  from .distributed import Server
193
193
  from .knowledge_base import KnowledgeBase
194
194
  from .procedures.definitions import load_external_definitions
195
+ from .emulator import Emulator, EmulatorStopReason
195
196
 
196
197
  # for compatibility reasons
197
198
  from . import sim_manager as manager
@@ -259,6 +260,8 @@ __all__ = (
259
260
  "AngrVaultError",
260
261
  "Blade",
261
262
  "Block",
263
+ "Emulator",
264
+ "EmulatorStopReason",
262
265
  "ExplorationTechnique",
263
266
  "KnowledgeBase",
264
267
  "PTChunk",
angr/analyses/analysis.py CHANGED
@@ -1,4 +1,3 @@
1
- # ruff: noqa: F401
2
1
  from __future__ import annotations
3
2
  import functools
4
3
  import sys
@@ -2566,7 +2566,11 @@ class CFGBase(Analysis):
2566
2566
  """
2567
2567
 
2568
2568
  if arch.name == "X86" or arch.name == "AMD64":
2569
- if set(block.bytes) == {0x90}:
2569
+ block_bytes_set = set(block.bytes)
2570
+ if block_bytes_set == {0x90}:
2571
+ return True
2572
+ if block_bytes_set == {0xCC}:
2573
+ # technically this is not a no-op, but for our purposes we can settle for now
2570
2574
  return True
2571
2575
  elif arch.name == "MIPS32":
2572
2576
  if arch.memory_endness == "Iend_BE":
@@ -10,7 +10,16 @@ import networkx
10
10
 
11
11
  from angr.ailment import AILBlockWalker
12
12
  from angr.ailment.block import Block
13
- from angr.ailment.statement import Statement, Assignment, Store, Call, ConditionalJump, DirtyStatement, WeakAssignment
13
+ from angr.ailment.statement import (
14
+ Statement,
15
+ Assignment,
16
+ Store,
17
+ Call,
18
+ ConditionalJump,
19
+ DirtyStatement,
20
+ WeakAssignment,
21
+ Return,
22
+ )
14
23
  from angr.ailment.expression import (
15
24
  Register,
16
25
  Convert,
@@ -226,6 +235,15 @@ class AILSimplifier(Analysis):
226
235
  # reaching definition analysis results are no longer reliable
227
236
  self._clear_cache()
228
237
 
238
+ _l.debug("Rewriting constant expressions with phi variables")
239
+ phi_const_rewritten = self._rewrite_phi_const_exprs()
240
+ self.simplified |= phi_const_rewritten
241
+ if phi_const_rewritten:
242
+ _l.debug("... constant expressions with phi variables rewritten")
243
+ self._rebuild_func_graph()
244
+ # reaching definition analysis results are no longer reliable
245
+ self._clear_cache()
246
+
229
247
  if self._only_consts:
230
248
  return
231
249
 
@@ -698,6 +716,11 @@ class AILSimplifier(Analysis):
698
716
  if not replacements_by_block_addrs_and_idx:
699
717
  return False
700
718
 
719
+ return self._replace_exprs_in_blocks(replacements_by_block_addrs_and_idx)
720
+
721
+ def _replace_exprs_in_blocks(
722
+ self, replacements: dict[tuple[int, int | None], dict[CodeLocation, dict[Expression, Expression]]]
723
+ ) -> bool:
701
724
  blocks_by_addr_and_idx = {(node.addr, node.idx): node for node in self.func_graph.nodes()}
702
725
 
703
726
  if self._stack_arg_offsets:
@@ -706,7 +729,7 @@ class AILSimplifier(Analysis):
706
729
  insn_addrs_using_stack_args = None
707
730
 
708
731
  replaced = False
709
- for (block_addr, block_idx), reps in replacements_by_block_addrs_and_idx.items():
732
+ for (block_addr, block_idx), reps in replacements.items():
710
733
  block = blocks_by_addr_and_idx[(block_addr, block_idx)]
711
734
 
712
735
  # only replace loads if there are stack arguments in this block
@@ -787,6 +810,72 @@ class AILSimplifier(Analysis):
787
810
 
788
811
  return changed
789
812
 
813
+ #
814
+ # Rewriting constant expressions with phi variables
815
+ #
816
+
817
+ def _rewrite_phi_const_exprs(self) -> bool:
818
+ """
819
+ Rewrite phi variables that are definitely constant expressions to constants.
820
+ """
821
+
822
+ # gather constant assignments
823
+
824
+ vvar_values: dict[int, tuple[int, int]] = {}
825
+ for block in self.func_graph:
826
+ for stmt in block.statements:
827
+ if (
828
+ isinstance(stmt, Assignment)
829
+ and isinstance(stmt.dst, VirtualVariable)
830
+ and isinstance(stmt.src, Const)
831
+ and isinstance(stmt.src.value, int)
832
+ ):
833
+ vvar_values[stmt.dst.varid] = stmt.src.value, stmt.src.bits
834
+
835
+ srda = self._compute_reaching_definitions()
836
+ # compute vvar reachability for phi variables
837
+ # ensure that each phi variable is fully defined, i.e., all its source variables are defined
838
+ g = networkx.Graph()
839
+ for phi_vvar_id, vvar_ids in srda.phivarid_to_varids_with_unknown.items():
840
+ for vvar_id in vvar_ids:
841
+ # we cannot store None to networkx graph, so we use -1 to represent unknown source vvars
842
+ g.add_edge(phi_vvar_id, vvar_id if vvar_id is not None else -1)
843
+
844
+ phi_vvar_ids = srda.phi_vvar_ids
845
+ to_replace = {}
846
+ for cc in networkx.algorithms.connected_components(g):
847
+ if -1 in cc:
848
+ continue
849
+ normal_vvar_ids = cc.difference(phi_vvar_ids)
850
+ # ensure there is at least one phi variable and all remaining vvars are constant non-phi variables
851
+ if len(normal_vvar_ids) < len(cc) and len(normal_vvar_ids.intersection(vvar_values)) == len(
852
+ normal_vvar_ids
853
+ ):
854
+ all_values = {vvar_values[vvar_id] for vvar_id in normal_vvar_ids}
855
+ if len(all_values) == 1:
856
+ # found it!
857
+ value, bits = next(iter(all_values))
858
+ for var_id in cc:
859
+ to_replace[var_id] = value, bits
860
+
861
+ # build the replacement dictionary
862
+ blocks_dict = {(node.addr, node.idx): node for node in self.func_graph.nodes()}
863
+ replacements: dict[tuple[int, int | None], dict[CodeLocation, dict[Expression, Expression]]] = defaultdict(dict)
864
+ for vvar_id, (value, bits) in to_replace.items():
865
+ for expr, use_loc in srda.all_vvar_uses[vvar_id]:
866
+ if expr is None:
867
+ continue
868
+ assert use_loc.block_addr is not None
869
+ key = use_loc.block_addr, use_loc.block_idx
870
+ stmt = blocks_dict[key].statements[use_loc.stmt_idx]
871
+ if is_phi_assignment(stmt):
872
+ continue
873
+ if use_loc not in replacements[key]:
874
+ replacements[key][use_loc] = {}
875
+ replacements[key][use_loc][expr] = Const(None, None, value, bits, **expr.tags)
876
+
877
+ return self._replace_exprs_in_blocks(replacements) if replacements else False
878
+
790
879
  #
791
880
  # Unifying local variables
792
881
  #
@@ -1563,6 +1652,11 @@ class AILSimplifier(Analysis):
1563
1652
  stackarg_offsets = (
1564
1653
  {(tpl[1] & mask) for tpl in self._stack_arg_offsets} if self._stack_arg_offsets is not None else None
1565
1654
  )
1655
+ retpoints: set[tuple[int, int]] = {
1656
+ (node.addr, node.idx)
1657
+ for node in self.func_graph
1658
+ if node.statements and isinstance(node.statements[-1], Return) and self.func_graph.out_degree[node] == 0
1659
+ }
1566
1660
 
1567
1661
  while True:
1568
1662
  new_dead_vars_found = False
@@ -1596,6 +1690,11 @@ class AILSimplifier(Analysis):
1596
1690
  elif vvar_id in self._secondary_stackvars:
1597
1691
  # secondary stack variables are potentially removable
1598
1692
  pass
1693
+ elif (def_codeloc.block_addr, def_codeloc.block_idx) in retpoints:
1694
+ # slack variable assignments in endpoint blocks are potentially removable.
1695
+ # note that this is a hack! we should rely on more reliable stack variable
1696
+ # eliminatability detection.
1697
+ pass
1599
1698
  elif stackarg_offsets is not None:
1600
1699
  # we always remove definitions for stack arguments
1601
1700
  assert vvar.stack_offset is not None
@@ -5,15 +5,14 @@ from typing import TYPE_CHECKING
5
5
  from collections.abc import Iterable, Mapping
6
6
 
7
7
  from angr.ailment.statement import Statement, Assignment, Call, Store, Jump
8
- from angr.ailment.expression import Tmp, Load, Const, Register, Convert, Expression
8
+ from angr.ailment.expression import Tmp, Load, Const, Register, Convert, Expression, VirtualVariable
9
9
  from angr.ailment import AILBlockWalkerBase
10
-
11
10
  from angr.code_location import ExternalCodeLocation, CodeLocation
12
-
13
11
  from angr.knowledge_plugins.key_definitions import atoms
14
12
  from angr.analyses.s_propagator import SPropagatorAnalysis
15
13
  from angr.analyses.s_reaching_definitions import SReachingDefinitionsAnalysis, SRDAModel
16
14
  from angr.analyses import Analysis, register_analysis
15
+ from angr.utils.ssa import has_reference_to_vvar
17
16
  from .peephole_optimizations import (
18
17
  MULTI_STMT_OPTS,
19
18
  STMT_OPTS,
@@ -247,6 +246,10 @@ class BlockSimplifier(Analysis):
247
246
  # don't replace
248
247
  r = False
249
248
  new_stmt = None
249
+ elif isinstance(old, VirtualVariable) and has_reference_to_vvar(stmt, old.varid):
250
+ # never replace an l-value with an r-value
251
+ r = False
252
+ new_stmt = None
250
253
  elif isinstance(stmt, Call) and isinstance(new, Call) and old == stmt.ret_expr:
251
254
  # special case: do not replace the ret_expr of a call statement to another call statement
252
255
  r = False
@@ -330,18 +333,20 @@ class BlockSimplifier(Analysis):
330
333
  for idx, stmt in enumerate(block.statements):
331
334
  if type(stmt) is Assignment:
332
335
  # tmps can't execute new code
333
- if type(stmt.dst) is Tmp and stmt.dst.tmp_idx not in used_tmps:
334
- continue
336
+ if (type(stmt.dst) is Tmp and stmt.dst.tmp_idx not in used_tmps) or idx in dead_defs_stmt_idx:
337
+ # is it assigning to an unused tmp or a dead virgin?
335
338
 
336
- # is it a dead virgin?
337
- if idx in dead_defs_stmt_idx:
338
339
  # does .src involve any Call expressions? if so, we cannot remove it
339
340
  walker = HasCallExprWalker()
340
341
  walker.walk_expression(stmt.src)
341
342
  if not walker.has_call_expr:
342
343
  continue
343
344
 
344
- if stmt.src == stmt.dst:
345
+ if type(stmt.dst) is Tmp and isinstance(stmt.src, Call):
346
+ # eliminate the assignment and replace it with the call
347
+ stmt = stmt.src
348
+
349
+ if isinstance(stmt, Assignment) and stmt.src == stmt.dst:
345
350
  continue
346
351
 
347
352
  new_statements.append(stmt)
@@ -1816,6 +1816,7 @@ class Clinic(Analysis):
1816
1816
  self.function, # pylint:disable=unused-variable
1817
1817
  fail_fast=self._fail_fast, # type:ignore
1818
1818
  func_graph=ail_graph,
1819
+ entry_node_addr=self.entry_node_addr,
1819
1820
  kb=tmp_kb, # type:ignore
1820
1821
  track_sp=False,
1821
1822
  func_args=arg_list,
@@ -241,6 +241,30 @@ class ConditionProcessor:
241
241
  self.guarding_conditions = {}
242
242
  self._ast2annotations = {}
243
243
 
244
+ def have_opposite_edge_conditions(self, graph: networkx.DiGraph, src, dst0, dst1) -> bool:
245
+ """
246
+ Check if the edge conditions of two edges (src, dst0) and (src, dst1) are opposite to each other. Try to avoid
247
+ condition translation if possible.
248
+ """
249
+
250
+ if src in graph and graph.out_degree[src] == 2 and graph.has_edge(src, dst0) and graph.has_edge(src, dst1):
251
+ # sometimes the last statement is the conditional jump. sometimes it's the first statement of the block
252
+ if isinstance(src, ailment.Block) and src.statements and is_head_controlled_loop_block(src):
253
+ last_stmt = next(
254
+ iter(stmt for stmt in src.statements[:-1] if isinstance(stmt, ailment.Stmt.ConditionalJump)), None
255
+ )
256
+ assert last_stmt is not None
257
+ else:
258
+ last_stmt = self.get_last_statement(src)
259
+
260
+ if isinstance(last_stmt, ailment.Stmt.ConditionalJump):
261
+ return True
262
+
263
+ # fallback
264
+ edge_cond_left = self.recover_edge_condition(graph, src, dst0)
265
+ edge_cond_right = self.recover_edge_condition(graph, src, dst1)
266
+ return claripy.is_true(claripy.Not(edge_cond_left) == edge_cond_right) # type: ignore
267
+
244
268
  def recover_edge_condition(self, graph: networkx.DiGraph, src, dst):
245
269
  edge = src, dst
246
270
  edge_data = graph.get_edge_data(*edge)
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
  from typing import TYPE_CHECKING
3
3
 
4
4
  from angr.ailment import Block
5
- from angr.ailment.statement import Label
5
+ from angr.ailment.statement import Label, ConditionalJump
6
6
  from angr.ailment.block_walker import AILBlockWalkerBase
7
7
 
8
8
  from angr.analyses.decompiler.sequence_walker import SequenceWalker
@@ -18,6 +18,9 @@ class AILBlockCallCounter(AILBlockWalkerBase):
18
18
 
19
19
  calls = 0
20
20
 
21
+ def _handle_ConditionalJump(self, stmt_idx: int, stmt: ConditionalJump, block: Block | None):
22
+ return
23
+
21
24
  def _handle_CallExpr(self, expr_idx: int, expr: Call, stmt_idx: int, stmt, block: Block | None):
22
25
  self.calls += 1
23
26
  super()._handle_CallExpr(expr_idx, expr, stmt_idx, stmt, block)
@@ -40,6 +43,13 @@ class AILCallCounter(SequenceWalker):
40
43
  self.calls = 0
41
44
  self.non_label_stmts = 0
42
45
 
46
+ def _handle_Condition(self, node, **kwargs):
47
+ # do not count calls in conditions
48
+ if node.true_node is not None:
49
+ super()._handle(node.true_node, **kwargs)
50
+ if node.false_node is not None:
51
+ super()._handle(node.false_node, **kwargs)
52
+
43
53
  def _handle_Block(self, node: Block, **kwargs): # pylint:disable=unused-argument
44
54
  ctr = AILBlockCallCounter()
45
55
  ctr.walk(node)
@@ -618,7 +618,9 @@ class Decompiler(Analysis):
618
618
  new_type = var_manager.get_variable_type(var)
619
619
  if new_type is not None:
620
620
  self.func.prototype.args = (
621
- self.func.prototype.args[:i] + (new_type,) + self.func.prototype.args[i + 1 :]
621
+ *self.func.prototype.args[:i],
622
+ new_type,
623
+ *self.func.prototype.args[i + 1 :],
622
624
  )
623
625
  except Exception: # pylint:disable=broad-except
624
626
  if self._fail_fast:
@@ -324,6 +324,15 @@ class GraphRegion:
324
324
  out_edges = list(graph.out_edges(node))
325
325
 
326
326
  graph.remove_node(node)
327
+
328
+ # FIXME: this is a giant hack to work around the problem that the graph region might have been restructured
329
+ # but not updated in *all* other regions whose .graph_with_successors references this graph region (we only
330
+ # update the parent_region graph right now).
331
+ existing_graph_regions: dict[int, GraphRegion] = {r.addr: r for r in graph if isinstance(r, GraphRegion)}
332
+ for r in sub_graph:
333
+ if isinstance(r, GraphRegion) and r not in graph and r.addr in existing_graph_regions:
334
+ self._replaced_regions[r] = existing_graph_regions[r.addr]
335
+
327
336
  sub_graph_nodes = [self._replaced_regions.get(nn, nn) for nn in sub_graph.nodes]
328
337
  sub_graph_edges = [
329
338
  (self._replaced_regions.get(src, src), self._replaced_regions.get(dst, dst)) for src, dst in sub_graph.edges
@@ -376,11 +385,11 @@ class GraphRegion:
376
385
  else:
377
386
  if dst_in_subgraph in sub_graph:
378
387
  for src in sub_graph.predecessors(dst_in_subgraph):
379
- graph.add_edge(src, dst)
388
+ graph.add_edge(self._replaced_regions.get(src, src), dst)
380
389
  elif reference_full_graph is not None and dst_in_subgraph in reference_full_graph:
381
390
  for src in reference_full_graph.predecessors(dst_in_subgraph):
382
391
  if src in graph:
383
- graph.add_edge(src, dst)
392
+ graph.add_edge(self._replaced_regions.get(src, src), dst)
384
393
  else:
385
394
  # it may happen that the dst node no longer exists in sub_graph or its successors
386
395
  # this is because we have deemed that the dst node is no longer a valid successor for sub_graph
@@ -313,7 +313,7 @@ class ConstPropOptReverter(OptimizationPass):
313
313
 
314
314
  # construct new constant block
315
315
  new_const_block = const_block.copy()
316
- new_const_block.statements = new_const_block.statements[:-1] + [reg_assign] + [symb_return_stmt.copy()]
316
+ new_const_block.statements = [*new_const_block.statements[:-1], reg_assign, symb_return_stmt.copy()]
317
317
  self._update_block(const_block, new_const_block)
318
318
  self.resolution = True
319
319
  else:
@@ -159,6 +159,7 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
159
159
  def __init__(self, func, min_distinct_cases=2, **kwargs):
160
160
  super().__init__(
161
161
  func,
162
+ require_structurable_graph=False,
162
163
  require_gotos=False,
163
164
  prevent_new_gotos=False,
164
165
  simplify_ail=False,
@@ -15,7 +15,7 @@ from angr.analyses.decompiler.ailgraph_walker import AILGraphWalker
15
15
  from angr.analyses.decompiler.condition_processor import ConditionProcessor
16
16
  from angr.analyses.decompiler.goto_manager import Goto, GotoManager
17
17
  from angr.analyses.decompiler.structuring import RecursiveStructurer, SAILRStructurer
18
- from angr.analyses.decompiler.utils import add_labels, remove_edges_in_ailgraph
18
+ from angr.analyses.decompiler.utils import add_labels, remove_edges_in_ailgraph, is_empty_node
19
19
  from angr.analyses.decompiler.counters import ControlFlowStructureCounter
20
20
  from angr.project import Project
21
21
 
@@ -432,12 +432,13 @@ class StructuringOptimizationPass(OptimizationPass):
432
432
  STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
433
433
 
434
434
  _initial_gotos: set[Goto]
435
- _goto_manager: GotoManager
435
+ _goto_manager: GotoManager | None
436
436
  _prev_graph: networkx.DiGraph
437
437
 
438
438
  def __init__(
439
439
  self,
440
440
  func,
441
+ require_structurable_graph: bool = True,
441
442
  prevent_new_gotos: bool = True,
442
443
  strictly_less_gotos: bool = False,
443
444
  recover_structure_fails: bool = True,
@@ -450,6 +451,7 @@ class StructuringOptimizationPass(OptimizationPass):
450
451
  **kwargs,
451
452
  ):
452
453
  super().__init__(func, **kwargs)
454
+ self._require_structurable_graph = require_structurable_graph
453
455
  self._prevent_new_gotos = prevent_new_gotos
454
456
  self._strictly_less_gotos = strictly_less_gotos
455
457
  self._recover_structure_fails = recover_structure_fails
@@ -459,6 +461,8 @@ class StructuringOptimizationPass(OptimizationPass):
459
461
  self._must_improve_rel_quality = must_improve_rel_quality
460
462
  self._readd_labels = readd_labels
461
463
  self._edges_to_remove = edges_to_remove or []
464
+ self._goto_manager = None
465
+ self._initial_gotos = set()
462
466
 
463
467
  # relative quality metrics (excludes gotos)
464
468
  self._initial_structure_counter = None
@@ -476,13 +480,20 @@ class StructuringOptimizationPass(OptimizationPass):
476
480
  if not ret:
477
481
  return
478
482
 
479
- if not self._graph_is_structurable(self._graph, initial=True):
480
- return
483
+ # only initialize self._goto_manager if this optimization requires a structurable graph or gotos
484
+ initial_structurable: bool | None = None
485
+ if self._require_structurable_graph or self._require_gotos or self._prevent_new_gotos:
486
+ initial_structurable = self._graph_is_structurable(self._graph, initial=True)
481
487
 
482
- self._initial_gotos = self._goto_manager.gotos.copy()
483
- if self._require_gotos and not self._initial_gotos:
488
+ if self._require_structurable_graph and initial_structurable is False:
484
489
  return
485
490
 
491
+ if self._require_gotos:
492
+ assert self._goto_manager is not None
493
+ self._initial_gotos = self._goto_manager.gotos.copy()
494
+ if not self._initial_gotos:
495
+ return
496
+
486
497
  # setup for the very first analysis
487
498
  self.out_graph = networkx.DiGraph(self._graph)
488
499
  if self._max_opt_iters > 1:
@@ -500,7 +511,13 @@ class StructuringOptimizationPass(OptimizationPass):
500
511
  if self._readd_labels:
501
512
  self.out_graph = add_labels(self.out_graph)
502
513
 
503
- if not self._graph_is_structurable(self.out_graph, readd_labels=False):
514
+ if (
515
+ self._require_structurable_graph
516
+ and self._max_opt_iters <= 1
517
+ and not self._graph_is_structurable(self.out_graph, readd_labels=False)
518
+ ):
519
+ # fixed-point analysis ensures that the output graph is always structurable, otherwise it clears the output
520
+ # graph. so we only check the structurability of the graph when fixed-point analysis did not run.
504
521
  self.out_graph = None
505
522
  return
506
523
 
@@ -523,13 +540,16 @@ class StructuringOptimizationPass(OptimizationPass):
523
540
  return
524
541
 
525
542
  def _get_new_gotos(self):
543
+ assert self._goto_manager is not None
526
544
  return self._goto_manager.gotos
527
545
 
528
546
  def _fixed_point_analyze(self, cache=None):
529
547
  had_any_changes = False
530
548
  for _ in range(self._max_opt_iters):
531
- if self._require_gotos and not self._goto_manager.gotos:
532
- break
549
+ if self._require_gotos:
550
+ assert self._goto_manager is not None
551
+ if not self._goto_manager.gotos:
552
+ break
533
553
 
534
554
  # backup the graph before the optimization
535
555
  if self._recover_structure_fails and self.out_graph is not None:
@@ -590,7 +610,7 @@ class StructuringOptimizationPass(OptimizationPass):
590
610
  _l.warning("Internal structuring failed for OptimizationPass on %s", self._func.name)
591
611
  rs = None
592
612
 
593
- if not rs or not rs.result or not rs.result.nodes or rs.result_incomplete:
613
+ if not rs or not rs.result or is_empty_node(rs.result) or rs.result_incomplete:
594
614
  return False
595
615
 
596
616
  rs = self.project.analyses.RegionSimplifier(self._func, rs.result, arg_vvars=self._arg_vvars, kb=self.kb)
@@ -648,7 +668,7 @@ class StructuringOptimizationPass(OptimizationPass):
648
668
  # Gotos play an important part in readability and control flow structure. We already count gotos in other parts
649
669
  # of the analysis, so we don't need to count them here. However, some gotos are worse than others. Much
650
670
  # like loops, trading gotos (keeping the same total, but getting worse types), is bad for decompilation.
651
- if len(self._initial_gotos) == len(self._goto_manager.gotos) != 0:
671
+ if self._goto_manager is not None and len(self._initial_gotos) == len(self._goto_manager.gotos) != 0:
652
672
  prev_labels = self._initial_structure_counter.goto_targets
653
673
  curr_labels = self._current_structure_counter.goto_targets
654
674
 
@@ -55,6 +55,7 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
55
55
  region_identifier=None,
56
56
  vvar_id_start: int | None = None,
57
57
  scratch: dict[str, Any] | None = None,
58
+ max_func_blocks: int = 500,
58
59
  **kwargs,
59
60
  ):
60
61
  StructuringOptimizationPass.__init__(
@@ -76,6 +77,7 @@ class ReturnDuplicatorLow(StructuringOptimizationPass, ReturnDuplicatorBase):
76
77
  ri=region_identifier,
77
78
  vvar_id_start=vvar_id_start,
78
79
  scratch=scratch,
80
+ max_func_blocks=max_func_blocks,
79
81
  )
80
82
  self.analyze()
81
83
 
@@ -4,14 +4,14 @@ from .a_div_const_add_a_mul_n_div_const import ADivConstAddAMulNDivConst
4
4
  from .a_mul_const_div_shr_const import AMulConstDivShrConst
5
5
  from .a_shl_const_sub_a import AShlConstSubA
6
6
  from .a_sub_a_div import ASubADiv
7
- from .a_sub_a_div_const_mul_const import ASubADivConstMulConst
7
+ from .modulo_simplifier import ModuloSimplifier
8
8
  from .a_sub_a_shr_const_shr_const import ASubAShrConstShrConst
9
9
  from .arm_cmpf import ARMCmpF
10
10
  from .bswap import Bswap
11
11
  from .cas_intrinsics import CASIntrinsics
12
12
  from .coalesce_same_cascading_ifs import CoalesceSameCascadingIfs
13
13
  from .constant_derefs import ConstantDereferences
14
- from .const_mull_a_shift import ConstMullAShift
14
+ from .optimized_div_simplifier import OptimizedDivisionSimplifier
15
15
  from .extended_byte_and_mask import ExtendedByteAndMask
16
16
  from .remove_empty_if_body import RemoveEmptyIfBody
17
17
  from .remove_redundant_ite_branch import RemoveRedundantITEBranches
@@ -61,14 +61,14 @@ ALL_PEEPHOLE_OPTS: list[type[PeepholeOptimizationExprBase]] = [
61
61
  AShlConstSubA,
62
62
  AMulConstSubA,
63
63
  ASubADiv,
64
- ASubADivConstMulConst,
64
+ ModuloSimplifier,
65
65
  ASubAShrConstShrConst,
66
66
  ARMCmpF,
67
67
  Bswap,
68
68
  CASIntrinsics,
69
69
  CoalesceSameCascadingIfs,
70
70
  ConstantDereferences,
71
- ConstMullAShift,
71
+ OptimizedDivisionSimplifier,
72
72
  ExtendedByteAndMask,
73
73
  RemoveEmptyIfBody,
74
74
  RemoveRedundantITEBranches,
@@ -170,6 +170,10 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
170
170
  if isinstance(expr.operands[0], Const) and expr.operands[0].value == 0:
171
171
  return UnaryOp(expr.idx, "Neg", expr.operands[1], **expr.tags)
172
172
 
173
+ r = EagerEvaluation._combine_like_terms(expr)
174
+ if r is not None:
175
+ return r
176
+
173
177
  if isinstance(expr.operands[0], StackBaseOffset) and isinstance(expr.operands[1], StackBaseOffset):
174
178
  assert isinstance(expr.operands[0].offset, int) and isinstance(expr.operands[1].offset, int)
175
179
  return Const(expr.idx, None, expr.operands[0].offset - expr.operands[1].offset, expr.bits, **expr.tags)
@@ -354,6 +358,55 @@ class EagerEvaluation(PeepholeOptimizationExprBase):
354
358
 
355
359
  return None
356
360
 
361
+ @staticmethod
362
+ def _combine_like_terms(expr: BinaryOp) -> BinaryOp | None:
363
+ """
364
+ Combine like terms for binary operations.
365
+ """
366
+
367
+ op = expr.op
368
+ assert op in {"Add", "Sub"}
369
+
370
+ expr0, expr1 = expr.operands
371
+
372
+ conv = None
373
+ if isinstance(expr0, Convert) and expr0.from_bits < expr0.to_bits:
374
+ conv = expr0.from_bits, expr0.to_bits, expr0.is_signed
375
+ expr0 = expr0.operand
376
+
377
+ if isinstance(expr0, BinaryOp) and expr0.op == "Mul" and isinstance(expr0.operands[1], Const):
378
+ n = expr0.operands[0]
379
+
380
+ if isinstance(n, Convert) and n.from_bits > n.to_bits:
381
+ if conv is not None and (n.to_bits, n.from_bits, n.is_signed) != conv:
382
+ return None
383
+ n = n.operand
384
+
385
+ if n.likes(expr1):
386
+ # (n * C) - n ==> (C - 1) * n
387
+ coeff_0 = expr0.operands[1]
388
+ coeff = Const(coeff_0.idx, None, coeff_0.value - 1, expr.bits, **coeff_0.tags)
389
+ return BinaryOp(
390
+ expr.idx, "Mul", [n, coeff], expr.signed, variable=expr.variable, bits=expr.bits, **expr.tags
391
+ )
392
+ if isinstance(expr1, BinaryOp) and expr1.op == "Mul" and isinstance(expr.operands[1].operands[1], Const):
393
+ n1 = expr.operands[1].operands[0]
394
+ if n.likes(n1):
395
+ # (n * C) - (n1 * C1) ==> n * (C - C1)
396
+ coeff_0 = expr0.operands[1]
397
+ coeff_1 = expr1.operands[1]
398
+ coeff = Const(coeff_0.idx, None, coeff_0.value - coeff_1.value, expr.bits, **coeff_0.tags)
399
+ return BinaryOp(
400
+ expr.idx,
401
+ "Mul",
402
+ [n, coeff],
403
+ expr.signed,
404
+ variable=expr.variable,
405
+ bits=expr.bits,
406
+ **expr.tags,
407
+ )
408
+ return None
409
+
357
410
  @staticmethod
358
411
  def _optimize_unaryop(expr: UnaryOp):
359
412
  if expr.op == "Neg" and isinstance(expr.operand, Const) and isinstance(expr.operand.value, int):