angr 9.2.114__py3-none-win_amd64.whl → 9.2.116__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (50) hide show
  1. angr/__init__.py +1 -1
  2. angr/__main__.py +2 -2
  3. angr/analyses/cfg/cfg_fast.py +1 -1
  4. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +2 -2
  5. angr/analyses/decompiler/decompilation_options.py +2 -12
  6. angr/analyses/decompiler/decompiler.py +14 -3
  7. angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +3 -0
  8. angr/analyses/decompiler/optimization_passes/cross_jump_reverter.py +0 -3
  9. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +2 -3
  10. angr/analyses/decompiler/optimization_passes/optimization_pass.py +9 -2
  11. angr/analyses/decompiler/optimization_passes/ret_deduplicator.py +2 -0
  12. angr/analyses/decompiler/optimization_passes/return_duplicator_high.py +2 -0
  13. angr/analyses/decompiler/optimization_passes/stack_canary_simplifier.py +5 -1
  14. angr/analyses/decompiler/structured_codegen/c.py +10 -13
  15. angr/analyses/decompiler/structuring/__init__.py +6 -2
  16. angr/analyses/decompiler/structuring/dream.py +3 -4
  17. angr/analyses/decompiler/structuring/phoenix.py +29 -93
  18. angr/analyses/decompiler/structuring/recursive_structurer.py +0 -3
  19. angr/analyses/decompiler/structuring/sailr.py +111 -0
  20. angr/analyses/decompiler/structuring/structurer_base.py +2 -5
  21. angr/analyses/decompiler/structuring/structurer_nodes.py +3 -3
  22. angr/analyses/reaching_definitions/dep_graph.py +62 -5
  23. angr/analyses/reaching_definitions/function_handler.py +11 -1
  24. angr/analyses/reaching_definitions/function_handler_library/__init__.py +11 -0
  25. angr/analyses/reaching_definitions/function_handler_library/stdio.py +262 -0
  26. angr/analyses/reaching_definitions/function_handler_library/stdlib.py +157 -0
  27. angr/analyses/reaching_definitions/function_handler_library/string.py +93 -0
  28. angr/analyses/reaching_definitions/function_handler_library/unistd.py +23 -0
  29. angr/analyses/reaching_definitions/rd_state.py +28 -29
  30. angr/analyses/variable_recovery/engine_vex.py +0 -9
  31. angr/analyses/vfg.py +13 -14
  32. angr/code_location.py +4 -4
  33. angr/engines/pcode/cc.py +2 -0
  34. angr/engines/vex/heavy/heavy.py +1 -1
  35. angr/knowledge_plugins/key_definitions/live_definitions.py +12 -13
  36. angr/lib/angr_native.dll +0 -0
  37. angr/procedures/libc/strlen.py +5 -2
  38. angr/sim_variable.py +3 -18
  39. angr/state_plugins/solver.py +3 -9
  40. angr/storage/memory_mixins/address_concretization_mixin.py +1 -1
  41. angr/storage/memory_mixins/paged_memory/pages/cooperation.py +2 -1
  42. angr/storage/memory_mixins/regioned_memory/abstract_merger_mixin.py +4 -2
  43. angr/storage/memory_mixins/regioned_memory/regioned_memory_mixin.py +5 -5
  44. angr/storage/memory_mixins/regioned_memory/static_find_mixin.py +3 -3
  45. {angr-9.2.114.dist-info → angr-9.2.116.dist-info}/METADATA +7 -7
  46. {angr-9.2.114.dist-info → angr-9.2.116.dist-info}/RECORD +50 -44
  47. {angr-9.2.114.dist-info → angr-9.2.116.dist-info}/WHEEL +1 -1
  48. {angr-9.2.114.dist-info → angr-9.2.116.dist-info}/LICENSE +0 -0
  49. {angr-9.2.114.dist-info → angr-9.2.116.dist-info}/entry_points.txt +0 -0
  50. {angr-9.2.114.dist-info → angr-9.2.116.dist-info}/top_level.txt +0 -0
angr/__init__.py CHANGED
@@ -1,7 +1,7 @@
1
1
  # pylint: disable=wildcard-import
2
2
  # pylint: disable=wrong-import-position
3
3
 
4
- __version__ = "9.2.114"
4
+ __version__ = "9.2.116"
5
5
 
6
6
  if bytes is str:
7
7
  raise Exception(
angr/__main__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import argparse
2
2
 
3
- from angr.analyses.decompiler.structuring import STRUCTURER_CLASSES
3
+ from angr.analyses.decompiler.structuring import STRUCTURER_CLASSES, DEFAULT_STRUCTURER
4
4
  from angr.analyses.decompiler.utils import decompile_functions
5
5
 
6
6
 
@@ -42,7 +42,7 @@ def main():
42
42
  "--structurer",
43
43
  help="The structuring algorithm to use for decompilation.",
44
44
  choices=STRUCTURER_CLASSES.keys(),
45
- default="phoenix",
45
+ default=DEFAULT_STRUCTURER.NAME,
46
46
  )
47
47
 
48
48
  args = parser.parse_args()
@@ -2981,7 +2981,7 @@ class CFGFast(ForwardAnalysis[CFGNode, CFGNode, CFGJob, int], CFGBase): # pylin
2981
2981
  simsucc = self.project.factory.default_engine.process(self._initial_state, irsb, force_addr=addr)
2982
2982
  if len(simsucc.successors) == 1:
2983
2983
  ip = simsucc.successors[0].ip
2984
- if ip._model_concrete is not ip:
2984
+ if claripy.backends.concrete.convert(ip) is not ip:
2985
2985
  target_addr = ip.concrete_value
2986
2986
  obj = self.project.loader.find_object_containing(target_addr, membership_check=False)
2987
2987
  if (obj is not None and obj is not self.project.loader.main_object) or self.project.is_hooked(
@@ -1734,7 +1734,7 @@ class JumpTableResolver(IndirectJumpResolver):
1734
1734
  # full-function data propagation before performing jump table recovery.
1735
1735
  l.debug("Multiple statements adding bases, not supported yet") # FIXME: Just check the addresses?
1736
1736
 
1737
- jumptable_addr_vsa = jumptable_addr._model_vsa
1737
+ jumptable_addr_vsa = claripy.backends.vsa.convert(jumptable_addr)
1738
1738
 
1739
1739
  if not isinstance(jumptable_addr_vsa, claripy.vsa.StridedInterval):
1740
1740
  return None
@@ -2103,7 +2103,7 @@ class JumpTableResolver(IndirectJumpResolver):
2103
2103
 
2104
2104
  read_length = state.inspect.mem_read_length
2105
2105
  if not isinstance(read_length, int):
2106
- read_length = read_length._model_vsa.upper_bound
2106
+ read_length = claripy.backends.vsa.convert(read_length).upper_bound
2107
2107
  if read_length > 16:
2108
2108
  return
2109
2109
  new_read_addr = state.solver.BVV(UninitReadMeta.uninit_read_base, state.arch.bits)
@@ -193,21 +193,11 @@ options = [
193
193
  "recursive_structurer",
194
194
  "structurer_cls",
195
195
  category="Structuring",
196
- default_value="Phoenix",
197
- candidate_values=["Dream", "Phoenix"],
196
+ default_value="SAILR",
197
+ candidate_values=["SAILR", "Phoenix", "DREAM"],
198
198
  clears_cache=True,
199
199
  convert=structurer_class_from_name,
200
200
  ),
201
- O(
202
- "Improve structuring algorithm",
203
- "If applicable in deeper structurer, like Phoenix, improves decompilation output",
204
- bool,
205
- "recursive_structurer",
206
- "improve_structurer",
207
- category="Structuring",
208
- default_value=True,
209
- clears_cache=True,
210
- ),
211
201
  O(
212
202
  "C-style null compares",
213
203
  "Rewrites the (x == 0) => (!x) && (x != 0) => (x)",
@@ -14,7 +14,7 @@ from ...knowledge_base import KnowledgeBase
14
14
  from ...sim_variable import SimMemoryVariable, SimRegisterVariable, SimStackVariable
15
15
  from ...utils import timethis
16
16
  from .. import Analysis, AnalysesHub
17
- from .structuring import RecursiveStructurer, PhoenixStructurer
17
+ from .structuring import RecursiveStructurer, PhoenixStructurer, DEFAULT_STRUCTURER
18
18
  from .region_identifier import RegionIdentifier
19
19
  from .optimization_passes.optimization_pass import OptimizationPassStage
20
20
  from .optimization_passes import get_default_optimization_passes
@@ -146,8 +146,9 @@ class Decompiler(Analysis):
146
146
  self._complete_successors = False
147
147
  self._recursive_structurer_params = self.options_to_params(self.options_by_class["recursive_structurer"])
148
148
  if "structurer_cls" not in self._recursive_structurer_params:
149
- self._recursive_structurer_params["structurer_cls"] = PhoenixStructurer
150
- if self._recursive_structurer_params["structurer_cls"] == PhoenixStructurer:
149
+ self._recursive_structurer_params["structurer_cls"] = DEFAULT_STRUCTURER
150
+ # is the algorithm based on Phoenix (a schema-based algorithm)?
151
+ if issubclass(self._recursive_structurer_params["structurer_cls"], PhoenixStructurer):
151
152
  self._force_loop_single_exit = False
152
153
  self._complete_successors = True
153
154
  fold_callexprs_into_conditions = True
@@ -316,6 +317,11 @@ class Decompiler(Analysis):
316
317
  continue
317
318
  if pass_.STRUCTURING:
318
319
  if self._recursive_structurer_params["structurer_cls"].NAME not in pass_.STRUCTURING:
320
+ l.warning(
321
+ "Skipping %s because it does not support structuring algorithm: %s",
322
+ pass_,
323
+ self._recursive_structurer_params["structurer_cls"].NAME,
324
+ )
319
325
  continue
320
326
 
321
327
  a = pass_(
@@ -367,6 +373,11 @@ class Decompiler(Analysis):
367
373
  continue
368
374
  if pass_.STRUCTURING:
369
375
  if self._recursive_structurer_params["structurer_cls"].NAME not in pass_.STRUCTURING:
376
+ l.warning(
377
+ "Skipping %s because it does not support structuring algorithm: %s",
378
+ pass_,
379
+ self._recursive_structurer_params["structurer_cls"].NAME,
380
+ )
370
381
  continue
371
382
 
372
383
  a = pass_(
@@ -10,6 +10,7 @@ from ailment.statement import Call, Statement, ConditionalJump, Assignment, Stor
10
10
  from ailment.expression import Convert, Register, Expression
11
11
 
12
12
  from .optimization_pass import OptimizationPass, OptimizationPassStage
13
+ from ..structuring import SAILRStructurer, DreamStructurer
13
14
  from ....knowledge_plugins.key_definitions.atoms import MemoryLocation
14
15
  from ....knowledge_plugins.key_definitions.constants import OP_BEFORE
15
16
 
@@ -140,6 +141,8 @@ class ConstPropOptReverter(OptimizationPass):
140
141
 
141
142
  ARCHES = None
142
143
  PLATFORMS = None
144
+ # allow DREAM since it's useful for return merging
145
+ STRUCTURING = [SAILRStructurer.NAME, DreamStructurer.NAME]
143
146
  STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
144
147
  NAME = "Revert Constant Propagation Optimizations"
145
148
  DESCRIPTION = __doc__.strip()
@@ -21,11 +21,8 @@ class CrossJumpReverter(StructuringOptimizationPass):
21
21
  a max of max_opt_iters times. Second, it will not duplicate a block with too many calls.
22
22
  """
23
23
 
24
- ARCHES = None
25
- PLATFORMS = None
26
24
  STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
27
25
  NAME = "Duplicate linear blocks with gotos"
28
- STRUCTURING = ["phoenix"]
29
26
  DESCRIPTION = inspect.cleandoc(__doc__).strip()
30
27
 
31
28
  def __init__(
@@ -11,7 +11,7 @@ from ailment.expression import Expression, BinaryOp, Const, Load
11
11
  from angr.utils.graph import GraphUtils
12
12
  from ..utils import first_nonlabel_statement, remove_last_statement
13
13
  from ..structuring.structurer_nodes import IncompleteSwitchCaseHeadStatement, SequenceNode, MultiNode
14
- from .optimization_pass import OptimizationPassStage, MultipleBlocksException, StructuringOptimizationPass
14
+ from .optimization_pass import MultipleBlocksException, StructuringOptimizationPass
15
15
  from ..region_simplifiers.switch_cluster_simplifier import SwitchClusterFinder
16
16
 
17
17
  if TYPE_CHECKING:
@@ -143,14 +143,13 @@ class LoweredSwitchSimplifier(StructuringOptimizationPass):
143
143
  As a hack for now, we only run this deoptimization on Linux binaries.
144
144
  """
145
145
 
146
+ # TODO: this needs to be updated to support Windows, but detect and disable on MSVC
146
147
  PLATFORMS = ["linux"]
147
- STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
148
148
  NAME = "Convert lowered switch-cases (if-else) to switch-cases"
149
149
  DESCRIPTION = (
150
150
  "Convert lowered switch-cases (if-else) to switch-cases. Only works when the Phoenix structuring "
151
151
  "algorithm is in use."
152
152
  )
153
- STRUCTURING = ["phoenix"]
154
153
 
155
154
  def __init__(self, func, min_distinct_cases=2, **kwargs):
156
155
  super().__init__(
@@ -10,7 +10,7 @@ import ailment
10
10
  from angr.analyses.decompiler import RegionIdentifier
11
11
  from angr.analyses.decompiler.condition_processor import ConditionProcessor
12
12
  from angr.analyses.decompiler.goto_manager import GotoManager
13
- from angr.analyses.decompiler.structuring import RecursiveStructurer, PhoenixStructurer
13
+ from angr.analyses.decompiler.structuring import RecursiveStructurer, SAILRStructurer
14
14
  from angr.analyses.decompiler.utils import add_labels
15
15
  from angr.analyses.decompiler.seq_cf_structure_counter import ControlFlowStructureCounter
16
16
 
@@ -266,10 +266,17 @@ class StructuringOptimizationPass(OptimizationPass):
266
266
  The base class for any optimization pass that requires structuring. Optimization passes that inherit from this class
267
267
  should directly depend on structuring artifacts, such as regions and gotos. Otherwise, they should use
268
268
  OptimizationPass. This is the heaviest (computation time) optimization pass class.
269
+
270
+ By default this type of optimization should work:
271
+ - on any architecture
272
+ - on any platform
273
+ - during region identification (to have iterative structuring)
274
+ - only with the SAILR structuring algorithm
269
275
  """
270
276
 
271
277
  ARCHES = None
272
278
  PLATFORMS = None
279
+ STRUCTURING = [SAILRStructurer.NAME]
273
280
  STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
274
281
 
275
282
  def __init__(
@@ -401,7 +408,7 @@ class StructuringOptimizationPass(OptimizationPass):
401
408
  self._ri.region,
402
409
  cond_proc=self._ri.cond_proc,
403
410
  func=self._func,
404
- structurer_cls=PhoenixStructurer,
411
+ structurer_cls=SAILRStructurer,
405
412
  )
406
413
  # pylint:disable=broad-except
407
414
  except Exception:
@@ -4,6 +4,7 @@ import logging
4
4
  from ailment import Block
5
5
  from ailment.statement import ConditionalJump, Return
6
6
 
7
+ from ..structuring import SAILRStructurer, DreamStructurer
7
8
  from ....utils.graph import subgraph_between_nodes
8
9
  from ..utils import remove_labels, to_ail_supergraph, update_labels
9
10
  from .optimization_pass import OptimizationPass, OptimizationPassStage
@@ -28,6 +29,7 @@ class ReturnDeduplicator(OptimizationPass):
28
29
  STAGE = OptimizationPassStage.DURING_REGION_IDENTIFICATION
29
30
  NAME = "Deduplicates return statements that may have been duplicated"
30
31
  DESCRIPTION = __doc__.strip()
32
+ STRUCTURING = [SAILRStructurer.NAME, DreamStructurer.NAME]
31
33
 
32
34
  def __init__(self, func, **kwargs):
33
35
  super().__init__(func, **kwargs)
@@ -4,6 +4,7 @@ import networkx
4
4
 
5
5
  from .return_duplicator_base import ReturnDuplicatorBase
6
6
  from .optimization_pass import OptimizationPass, OptimizationPassStage
7
+ from ..structuring import SAILRStructurer, DreamStructurer
7
8
 
8
9
  _l = logging.getLogger(name=__name__)
9
10
 
@@ -19,6 +20,7 @@ class ReturnDuplicatorHigh(OptimizationPass, ReturnDuplicatorBase):
19
20
  STAGE = OptimizationPassStage.AFTER_VARIABLE_RECOVERY
20
21
  NAME = "Duplicate return-only blocks (high)"
21
22
  DESCRIPTION = __doc__
23
+ STRUCTURING = [SAILRStructurer.NAME, DreamStructurer.NAME]
22
24
 
23
25
  def __init__(
24
26
  self,
@@ -182,7 +182,11 @@ class StackCanarySimplifier(OptimizationPass):
182
182
 
183
183
  while True:
184
184
  traversed.add(block_addr)
185
- first_block = next(self._get_blocks(block_addr))
185
+ try:
186
+ first_block = next(self._get_blocks(block_addr))
187
+ except StopIteration:
188
+ break
189
+
186
190
  if first_block is None:
187
191
  break
188
192
 
@@ -1,7 +1,7 @@
1
1
  # pylint:disable=missing-class-docstring,too-many-boolean-expressions,unused-argument,no-self-use
2
2
  from typing import Optional, Any, TYPE_CHECKING
3
3
  from collections.abc import Callable
4
- from collections import defaultdict
4
+ from collections import defaultdict, Counter
5
5
  import logging
6
6
  import struct
7
7
 
@@ -491,19 +491,16 @@ class CFunction(CConstruct): # pylint:disable=abstract-method
491
491
  else:
492
492
  name = str(variable)
493
493
 
494
- # sort by number of occurrences, with a preference of non-basic types
494
+ # sort by the following:
495
+ # * if it's a a non-basic type
496
+ # * the number of occurrences
497
+ # * the repr of the type itself
495
498
  # TODO: The type selection should actually happen during variable unification
496
499
  vartypes = [x[1] for x in cvar_and_vartypes]
497
- nonprimitive_vartypes = [
498
- vt for vt in vartypes if not isinstance(vt, (SimTypeChar, SimTypeInt, SimTypeFloat))
499
- ]
500
- vartypes = list(dict.fromkeys(sorted(vartypes, key=vartypes.count, reverse=True)))
501
- if nonprimitive_vartypes:
502
- nonprimitive_vartypes = list(
503
- dict.fromkeys(sorted(nonprimitive_vartypes, key=nonprimitive_vartypes.count, reverse=True))
504
- )
505
- vartypes.remove(nonprimitive_vartypes[0])
506
- vartypes.insert(0, nonprimitive_vartypes[0])
500
+ count = Counter(vartypes)
501
+ vartypes = sorted(
502
+ count.copy(), key=lambda x: (isinstance(x, (SimTypeChar, SimTypeInt, SimTypeFloat)), count[x], repr(x))
503
+ )
507
504
 
508
505
  for i, var_type in enumerate(vartypes):
509
506
  if i == 0:
@@ -2177,8 +2174,8 @@ class CConstant(CExpression):
2177
2174
  v = refval.content.decode("utf-8")
2178
2175
  else:
2179
2176
  # it's a string
2180
- assert isinstance(v, str)
2181
2177
  v = refval
2178
+ assert isinstance(v, str)
2182
2179
  yield CConstant.str_to_c_str(v), self
2183
2180
  elif isinstance(self._type, SimTypePointer) and isinstance(self._type.pts_to, SimTypeWideChar):
2184
2181
  refval = self.reference_values[self._type]
@@ -2,14 +2,18 @@ from typing import Optional, Type
2
2
 
3
3
  from .dream import DreamStructurer
4
4
  from .phoenix import PhoenixStructurer
5
+ from .sailr import SAILRStructurer
5
6
  from .recursive_structurer import RecursiveStructurer
6
7
 
7
8
 
8
9
  STRUCTURER_CLASSES = {
9
- "dream": DreamStructurer,
10
- "phoenix": PhoenixStructurer,
10
+ SAILRStructurer.NAME: SAILRStructurer,
11
+ PhoenixStructurer.NAME: PhoenixStructurer,
12
+ DreamStructurer.NAME: DreamStructurer,
11
13
  }
12
14
 
15
+ DEFAULT_STRUCTURER = SAILRStructurer
16
+
13
17
 
14
18
  def structurer_class_from_name(name: str) -> type | None:
15
19
  return STRUCTURER_CLASSES.get(name.lower(), None)
@@ -1,6 +1,5 @@
1
1
  # pylint:disable=multiple-statements,line-too-long,consider-using-enumerate
2
2
  from typing import Optional, Any, TYPE_CHECKING
3
- from collections import OrderedDict as ODict
4
3
  import logging
5
4
  from collections import defaultdict, OrderedDict
6
5
 
@@ -661,7 +660,7 @@ class DreamStructurer(StructurerBase):
661
660
  i,
662
661
  node,
663
662
  cmp_expr,
664
- cases: ODict,
663
+ cases: OrderedDict,
665
664
  node_default,
666
665
  addr,
667
666
  addr2nodes,
@@ -909,7 +908,7 @@ class DreamStructurer(StructurerBase):
909
908
  head_node_idx: int,
910
909
  node_b_addr: int,
911
910
  addr2nodes: dict[int, set[CodeNode]],
912
- ) -> tuple[ODict, Any, Any]:
911
+ ) -> tuple[OrderedDict, Any, Any]:
913
912
  """
914
913
  Discover all cases for the switch-case structure and build the switch-cases dict.
915
914
 
@@ -922,7 +921,7 @@ class DreamStructurer(StructurerBase):
922
921
  :return: A tuple of (dict of cases, the default node if exists, nodes to remove).
923
922
  """
924
923
 
925
- cases: ODict[int | tuple[int, ...], SequenceNode] = OrderedDict()
924
+ cases: OrderedDict[int | tuple[int, ...], SequenceNode] = OrderedDict()
926
925
  to_remove = set()
927
926
  node_default = addr2nodes.get(node_b_addr, None)
928
927
  if node_default is not None:
@@ -1,6 +1,5 @@
1
1
  # pylint:disable=line-too-long,import-outside-toplevel,import-error,multiple-statements,too-many-boolean-expressions
2
2
  from typing import Any, DefaultDict, Optional, TYPE_CHECKING
3
- from collections import OrderedDict as ODict
4
3
  from collections import defaultdict, OrderedDict
5
4
  from enum import Enum
6
5
  import logging
@@ -12,7 +11,7 @@ from ailment.block import Block
12
11
  from ailment.statement import Statement, ConditionalJump, Jump, Label, Return
13
12
  from ailment.expression import Const, UnaryOp, MultiStatementExpression
14
13
 
15
- from angr.utils.graph import GraphUtils, TemporaryNode, PostDominators
14
+ from angr.utils.graph import GraphUtils
16
15
  from ....knowledge_plugins.cfg import IndirectJumpType
17
16
  from ....utils.constants import SWITCH_MISSING_DEFAULT_NODE_ADDR
18
17
  from ....utils.graph import dominates, to_acyclic_graph, dfs_back_edges
@@ -24,7 +23,6 @@ from ..utils import (
24
23
  is_empty_or_label_only_node,
25
24
  has_nonlabel_statements,
26
25
  first_nonlabel_statement,
27
- structured_node_is_simple_return,
28
26
  )
29
27
  from ..call_counter import AILCallCounter
30
28
  from .structurer_nodes import (
@@ -84,7 +82,7 @@ class PhoenixStructurer(StructurerBase):
84
82
  func: Optional["Function"] = None,
85
83
  case_entry_to_switch_head: dict[int, int] | None = None,
86
84
  parent_region=None,
87
- improve_structurer=True,
85
+ improve_algorithm=False,
88
86
  use_multistmtexprs: MultiStmtExprMode = MultiStmtExprMode.MAX_ONE_CALL,
89
87
  **kwargs,
90
88
  ):
@@ -95,7 +93,6 @@ class PhoenixStructurer(StructurerBase):
95
93
  func=func,
96
94
  case_entry_to_switch_head=case_entry_to_switch_head,
97
95
  parent_region=parent_region,
98
- improve_structurer=improve_structurer,
99
96
  **kwargs,
100
97
  )
101
98
 
@@ -112,13 +109,17 @@ class PhoenixStructurer(StructurerBase):
112
109
  # absorbed into other SequenceNodes
113
110
  self.dowhile_known_tail_nodes: set = set()
114
111
 
115
- self._phoenix_improved = self._improve_structurer
112
+ # in reimplementing the core phoenix algorithm from the phoenix decompiler paper, two types of changes were
113
+ # made to the algorithm:
114
+ # 1. Mandatory fixes to correct flaws we found in the algorithm
115
+ # 2. Optional fixes to improve the results of already correct choices
116
+ #
117
+ # the improve_algorithm flag controls whether the optional fixes are applied. these are disabled by default
118
+ # to be as close to the original algorithm as possible. for best results, enable this flag.
119
+ self._improve_algorithm = improve_algorithm
116
120
  self._edge_virtualization_hints = []
117
121
 
118
122
  self._use_multistmtexprs = use_multistmtexprs
119
- if not self._phoenix_improved:
120
- self._use_multistmtexprs = MultiStmtExprMode.NEVER
121
-
122
123
  self._analyze()
123
124
 
124
125
  @staticmethod
@@ -246,7 +247,7 @@ class PhoenixStructurer(StructurerBase):
246
247
  self._rewrite_jumps_to_continues(loop_node.sequence_node, loop_node=loop_node)
247
248
  return True
248
249
 
249
- if self._phoenix_improved:
250
+ if self._improve_algorithm:
250
251
  matched, loop_node, successor_node = self._match_cyclic_while_with_single_successor(
251
252
  node, head, graph, full_graph
252
253
  )
@@ -379,7 +380,7 @@ class PhoenixStructurer(StructurerBase):
379
380
 
380
381
  return True, loop_node, right
381
382
 
382
- if self._phoenix_improved:
383
+ if self._improve_algorithm:
383
384
  if full_graph.out_degree[node] == 1:
384
385
  # while (true) { ...; if (...) break; }
385
386
  _, _, head_block = self._find_node_going_to_dst(node, left, condjump_only=True)
@@ -498,7 +499,7 @@ class PhoenixStructurer(StructurerBase):
498
499
  self._remove_last_statement_if_jump(succ)
499
500
  drop_succ = False
500
501
 
501
- if self._phoenix_improved:
502
+ if self._improve_algorithm:
502
503
  # absorb the entire succ block if possible
503
504
  if self._is_sequential_statement_block(succ) and self._should_use_multistmtexprs(succ):
504
505
  stmts = self._build_multistatementexpr_statements(succ)
@@ -1004,7 +1005,7 @@ class PhoenixStructurer(StructurerBase):
1004
1005
  any_matches |= matched
1005
1006
  if matched:
1006
1007
  break
1007
- if self._phoenix_improved:
1008
+ if self._improve_algorithm:
1008
1009
  l.debug("... matching acyclic ITE with short-circuit conditions at %r", node)
1009
1010
  matched = self._match_acyclic_short_circuit_conditions(graph, full_graph, node)
1010
1011
  l.debug("... matched: %s", matched)
@@ -1306,8 +1307,8 @@ class PhoenixStructurer(StructurerBase):
1306
1307
  node_b_addr,
1307
1308
  graph,
1308
1309
  full_graph,
1309
- ) -> tuple[ODict, Any, set[Any]]:
1310
- cases: ODict[int | tuple[int], SequenceNode] = OrderedDict()
1310
+ ) -> tuple[OrderedDict, Any, set[Any]]:
1311
+ cases: OrderedDict[int | tuple[int], SequenceNode] = OrderedDict()
1311
1312
  to_remove = set()
1312
1313
 
1313
1314
  # it is possible that the default node gets duplicated by other analyses and creates a default node (addr.a)
@@ -1416,7 +1417,7 @@ class PhoenixStructurer(StructurerBase):
1416
1417
  self,
1417
1418
  head,
1418
1419
  cmp_expr,
1419
- cases: ODict,
1420
+ cases: OrderedDict,
1420
1421
  node_default_addr: int,
1421
1422
  node_default,
1422
1423
  addr,
@@ -2108,7 +2109,7 @@ class PhoenixStructurer(StructurerBase):
2108
2109
  return None
2109
2110
 
2110
2111
  def _last_resort_refinement(self, head, graph: networkx.DiGraph, full_graph: networkx.DiGraph | None) -> bool:
2111
- if self._phoenix_improved:
2112
+ if self._improve_algorithm:
2112
2113
  while self._edge_virtualization_hints:
2113
2114
  src, dst = self._edge_virtualization_hints.pop(0)
2114
2115
  if graph.has_edge(src, dst):
@@ -2229,6 +2230,15 @@ class PhoenixStructurer(StructurerBase):
2229
2230
  remove_last_statement(src)
2230
2231
 
2231
2232
  def _should_use_multistmtexprs(self, node: Block | BaseNode) -> bool:
2233
+ """
2234
+ The original Phoenix algorithm had no support for multi-stmt expressions, such as the following:
2235
+ if ((x = y) && z) { ... }
2236
+
2237
+ There are multiple levels at which multi-stmt expressions can be used. If the Phoenix algorith is not not
2238
+ set to be in improved mode, then we should not use multi-stmt expressions at all.
2239
+ """
2240
+ if not self._improve_algorithm:
2241
+ return False
2232
2242
  if self._use_multistmtexprs == MultiStmtExprMode.NEVER:
2233
2243
  return False
2234
2244
  if self._use_multistmtexprs == MultiStmtExprMode.ALWAYS:
@@ -2313,7 +2323,6 @@ class PhoenixStructurer(StructurerBase):
2313
2323
  walker.block_id += 1
2314
2324
  if _check(block.nodes[-1].statements[-1]):
2315
2325
  walker.parent_and_block.append((walker.block_id, parent, block))
2316
- return
2317
2326
 
2318
2327
  def _handle_BreakNode(break_node: BreakNode, parent=None, **kwargs): # pylint:disable=unused-argument
2319
2328
  walker.block_id += 1
@@ -2324,7 +2333,6 @@ class PhoenixStructurer(StructurerBase):
2324
2333
  ):
2325
2334
  # FIXME: idx is ignored
2326
2335
  walker.parent_and_block.append((walker.block_id, parent, break_node))
2327
- return
2328
2336
 
2329
2337
  walker = SequenceWalker(
2330
2338
  handlers={
@@ -2502,84 +2510,12 @@ class PhoenixStructurer(StructurerBase):
2502
2510
  break
2503
2511
  return None
2504
2512
 
2513
+ # pylint: disable=unused-argument,no-self-use
2505
2514
  def _order_virtualizable_edges(self, graph: networkx.DiGraph, edges: list, node_seq: dict[Any, int]) -> list:
2506
2515
  """
2507
2516
  Returns a list of edges that are ordered by the best edges to virtualize first.
2508
- The criteria for "best" is defined by a variety of heuristics described below.
2509
2517
  """
2510
- if len(edges) <= 1:
2511
- return edges
2512
-
2513
- # TODO: the graph we have here is not an accurate graph and can have no "entry node". We need a better graph.
2514
- try:
2515
- entry_node = [node for node in graph.nodes if graph.in_degree(node) == 0][0]
2516
- except IndexError:
2517
- entry_node = None
2518
-
2519
- best_edges = edges
2520
- if self._phoenix_improved and entry_node is not None:
2521
- # the first few heuristics are based on the post-dominator count of the edge
2522
- # so we collect them for each candidate edge
2523
- edge_postdom_count = {}
2524
- edge_sibling_count = {}
2525
- for edge in edges:
2526
- _, dst = edge
2527
- graph_copy = networkx.DiGraph(graph)
2528
- graph_copy.remove_edge(*edge)
2529
- sibling_cnt = graph_copy.in_degree(dst)
2530
- if sibling_cnt == 0:
2531
- continue
2532
-
2533
- edge_sibling_count[edge] = sibling_cnt
2534
- post_dom_graph = PostDominators(graph_copy, entry_node).post_dom
2535
- post_doms = set()
2536
- for postdom_node, dominatee in post_dom_graph.edges():
2537
- if not isinstance(postdom_node, TemporaryNode) and not isinstance(dominatee, TemporaryNode):
2538
- post_doms.add((postdom_node, dominatee))
2539
- edge_postdom_count[edge] = len(post_doms)
2540
-
2541
- # H1: the edge that has the least amount of sibling edges should be virtualized first
2542
- # this is believed to reduce the amount of virtualization needed in future rounds and increase
2543
- # the edges that enter a single outer-scope if-stmt
2544
- if edge_sibling_count:
2545
- min_sibling_count = min(edge_sibling_count.values())
2546
- best_edges = [edge for edge, cnt in edge_sibling_count.items() if cnt == min_sibling_count]
2547
- if len(best_edges) == 1:
2548
- return best_edges
2549
-
2550
- # create the next heuristic based on the best edges from the previous heuristic
2551
- filtered_edge_postdom_count = edge_postdom_count.copy()
2552
- for edge in list(edge_postdom_count.keys()):
2553
- if edge not in best_edges:
2554
- del filtered_edge_postdom_count[edge]
2555
- if filtered_edge_postdom_count:
2556
- edge_postdom_count = filtered_edge_postdom_count
2557
-
2558
- # H2: the edge, when removed, that causes the most post-dominators of the graph should be virtualized
2559
- # first. this is believed to make the code more linear looking be reducing the amount of scopes.
2560
- # informally, we believe post-dominators to be an inverse indicator of the number of scopes present
2561
- if edge_postdom_count:
2562
- max_postdom_count = max(edge_postdom_count.values())
2563
- best_edges = [edge for edge, cnt in edge_postdom_count.items() if cnt == max_postdom_count]
2564
- if len(best_edges) == 1:
2565
- return best_edges
2566
-
2567
- # H3: the edge that goes directly to a return statement should be virtualized first
2568
- # this is believed to be good because it can be corrected in later optimization by duplicating
2569
- # the return
2570
- candidate_edges = best_edges
2571
- best_edges = []
2572
- for src, dst in candidate_edges:
2573
- if graph.has_node(dst) and structured_node_is_simple_return(dst, graph):
2574
- best_edges.append((src, dst))
2575
-
2576
- if len(best_edges) == 1:
2577
- return best_edges
2578
- elif not best_edges:
2579
- best_edges = candidate_edges
2580
-
2581
- # if we have another tie, or we never used improved heuristics, then we do the chick_order.
2582
- return PhoenixStructurer._chick_order_edges(best_edges, node_seq)
2518
+ return PhoenixStructurer._chick_order_edges(edges, node_seq)
2583
2519
 
2584
2520
  @staticmethod
2585
2521
  def _chick_order_edges(edges: list, node_seq: dict[Any, int]) -> list:
@@ -34,14 +34,12 @@ class RecursiveStructurer(Analysis):
34
34
  cond_proc=None,
35
35
  func: Optional["Function"] = None,
36
36
  structurer_cls: type | None = None,
37
- improve_structurer=True,
38
37
  **kwargs,
39
38
  ):
40
39
  self._region = region
41
40
  self.cond_proc = cond_proc if cond_proc is not None else ConditionProcessor(self.project.arch)
42
41
  self.function = func
43
42
  self.structurer_cls = structurer_cls if structurer_cls is not None else DreamStructurer
44
- self.improve_structurer = improve_structurer
45
43
  self.structurer_options = kwargs
46
44
 
47
45
  self.result = None
@@ -91,7 +89,6 @@ class RecursiveStructurer(Analysis):
91
89
  case_entry_to_switch_head=self._case_entry_to_switch_head,
92
90
  func=self.function,
93
91
  parent_region=parent_region,
94
- improve_structurer=self.improve_structurer,
95
92
  **self.structurer_options,
96
93
  )
97
94
  # replace this region with the resulting node in its parent region... if it's not an orphan