angr 9.2.177__cp310-abi3-win_amd64.whl → 9.2.179__cp310-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (41) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/cfg/cfb.py +11 -0
  3. angr/analyses/cfg/cfg_fast.py +15 -0
  4. angr/analyses/decompiler/ail_simplifier.py +69 -1
  5. angr/analyses/decompiler/ccall_rewriters/amd64_ccalls.py +45 -7
  6. angr/analyses/decompiler/clinic.py +24 -10
  7. angr/analyses/decompiler/dirty_rewriters/__init__.py +7 -0
  8. angr/analyses/decompiler/dirty_rewriters/amd64_dirty.py +69 -0
  9. angr/analyses/decompiler/dirty_rewriters/rewriter_base.py +27 -0
  10. angr/analyses/decompiler/optimization_passes/__init__.py +3 -0
  11. angr/analyses/decompiler/optimization_passes/optimization_pass.py +10 -8
  12. angr/analyses/decompiler/optimization_passes/register_save_area_simplifier.py +44 -6
  13. angr/analyses/decompiler/optimization_passes/register_save_area_simplifier_adv.py +198 -0
  14. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +111 -55
  15. angr/analyses/decompiler/peephole_optimizations/remove_redundant_shifts_around_comparators.py +72 -1
  16. angr/analyses/decompiler/presets/basic.py +2 -0
  17. angr/analyses/decompiler/presets/fast.py +2 -0
  18. angr/analyses/decompiler/presets/full.py +2 -0
  19. angr/analyses/decompiler/region_simplifiers/expr_folding.py +38 -18
  20. angr/analyses/decompiler/region_simplifiers/region_simplifier.py +10 -4
  21. angr/analyses/decompiler/structured_codegen/c.py +54 -12
  22. angr/analyses/decompiler/structuring/phoenix.py +129 -64
  23. angr/analyses/decompiler/utils.py +26 -8
  24. angr/analyses/disassembly.py +108 -52
  25. angr/analyses/proximity_graph.py +20 -19
  26. angr/analyses/s_propagator.py +23 -21
  27. angr/analyses/smc.py +2 -3
  28. angr/flirt/__init__.py +69 -42
  29. angr/knowledge_plugins/key_definitions/live_definitions.py +2 -1
  30. angr/knowledge_plugins/labels.py +4 -4
  31. angr/rustylib.pyd +0 -0
  32. angr/unicornlib.dll +0 -0
  33. angr/utils/funcid.py +85 -0
  34. angr/utils/ssa/__init__.py +2 -6
  35. angr/utils/types.py +2 -0
  36. {angr-9.2.177.dist-info → angr-9.2.179.dist-info}/METADATA +9 -8
  37. {angr-9.2.177.dist-info → angr-9.2.179.dist-info}/RECORD +41 -37
  38. {angr-9.2.177.dist-info → angr-9.2.179.dist-info}/WHEEL +0 -0
  39. {angr-9.2.177.dist-info → angr-9.2.179.dist-info}/entry_points.txt +0 -0
  40. {angr-9.2.177.dist-info → angr-9.2.179.dist-info}/licenses/LICENSE +0 -0
  41. {angr-9.2.177.dist-info → angr-9.2.179.dist-info}/top_level.txt +0 -0
@@ -125,10 +125,16 @@ class RegionSimplifier(Analysis):
125
125
  # before the definition site and the use site.
126
126
  var_with_loads = {}
127
127
  single_use_variables = []
128
- for var, uses in expr_counter.uses.items():
129
- if len(uses) == 1 and var in expr_counter.assignments and len(expr_counter.assignments[var]) == 1:
128
+ for var, outerscope_uses in expr_counter.outerscope_uses.items():
129
+ all_uses = expr_counter.all_uses[var]
130
+ if (
131
+ len(outerscope_uses) == 1
132
+ and len(all_uses) == 1
133
+ and var in expr_counter.assignments
134
+ and len(expr_counter.assignments[var]) == 1
135
+ ):
130
136
  definition, deps, loc, has_loads = next(iter(expr_counter.assignments[var]))
131
- _, use_expr_loc = next(iter(uses))
137
+ _, use_expr_loc = next(iter(outerscope_uses))
132
138
  if isinstance(use_expr_loc, ExpressionLocation) and use_expr_loc.phi_stmt:
133
139
  # we cannot fold expressions that are used in phi statements
134
140
  continue
@@ -169,7 +175,7 @@ class RegionSimplifier(Analysis):
169
175
  definition.ret_expr = definition.ret_expr.copy()
170
176
  definition.ret_expr.variable = None
171
177
  variable_assignments[var] = definition, loc
172
- variable_uses[var] = next(iter(expr_counter.uses[var]))
178
+ variable_uses[var] = next(iter(expr_counter.outerscope_uses[var]))
173
179
  variable_assignment_dependencies[var] = deps
174
180
 
175
181
  # any variable definition that uses an existing to-be-removed variable cannot be folded
@@ -561,7 +561,10 @@ class CFunction(CConstruct): # pylint:disable=abstract-method
561
561
  continue
562
562
  varname = v.c_repr() if v.type is None else v.variable.name
563
563
  yield "extern ", None
564
- yield from type_to_c_repr_chunks(v.type, name=varname, name_type=v, full=False)
564
+ if v.type is None:
565
+ yield "<unknown-type>", None
566
+ else:
567
+ yield from type_to_c_repr_chunks(v.type, name=varname, name_type=v, full=False)
565
568
  yield ";\n", None
566
569
  yield "\n", None
567
570
 
@@ -1327,9 +1330,10 @@ class CFunctionCall(CStatement, CExpression):
1327
1330
  return True
1328
1331
 
1329
1332
  # FIXME: Handle name mangle
1330
- for func in self.codegen.kb.functions.get_by_name(callee.name):
1331
- if func is not callee and (caller.binary is not callee.binary or func.binary is callee.binary):
1332
- return True
1333
+ if callee is not None:
1334
+ for func in self.codegen.kb.functions.get_by_name(callee.name):
1335
+ if func is not callee and (caller.binary is not callee.binary or func.binary is callee.binary):
1336
+ return True
1333
1337
 
1334
1338
  return False
1335
1339
 
@@ -3194,7 +3198,9 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3194
3198
  # Handlers
3195
3199
  #
3196
3200
 
3197
- def _handle(self, node, is_expr: bool = True, lvalue: bool = False, likely_signed=False):
3201
+ def _handle(
3202
+ self, node, is_expr: bool = True, lvalue: bool = False, likely_signed=False, type_: SimType | None = None
3203
+ ):
3198
3204
  if (node, is_expr) in self.ailexpr2cnode:
3199
3205
  return self.ailexpr2cnode[(node, is_expr)]
3200
3206
 
@@ -3204,7 +3210,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3204
3210
  converted = (
3205
3211
  handler(node, is_expr=is_expr)
3206
3212
  if isinstance(node, Stmt.Call)
3207
- else handler(node, lvalue=lvalue, likely_signed=likely_signed)
3213
+ else handler(node, lvalue=lvalue, likely_signed=likely_signed, type_=type_)
3208
3214
  )
3209
3215
  self.ailexpr2cnode[(node, is_expr)] = converted
3210
3216
  return converted
@@ -3483,6 +3489,8 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3483
3489
  and i < len(target_func.prototype.args)
3484
3490
  ):
3485
3491
  type_ = target_func.prototype.args[i].with_arch(self.project.arch)
3492
+ if target_func.prototype_libname is not None:
3493
+ type_ = dereference_simtype_by_lib(type_, target_func.prototype_libname)
3486
3494
 
3487
3495
  if isinstance(arg, Expr.Const):
3488
3496
  if type_ is None or is_machine_word_size_type(type_, self.project.arch):
@@ -3490,7 +3498,7 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3490
3498
 
3491
3499
  new_arg = self._handle_Expr_Const(arg, type_=type_)
3492
3500
  else:
3493
- new_arg = self._handle(arg)
3501
+ new_arg = self._handle(arg, type_=type_)
3494
3502
  args.append(new_arg)
3495
3503
 
3496
3504
  ret_expr = None
@@ -3737,10 +3745,19 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3737
3745
  reference_values["offset"] = var_access
3738
3746
  return CConstant(expr.value, type_, reference_values=reference_values, tags=expr.tags, codegen=self)
3739
3747
 
3740
- def _handle_Expr_UnaryOp(self, expr, **kwargs):
3748
+ def _handle_Expr_UnaryOp(self, expr, type_: SimType | None = None, **kwargs):
3749
+ data_type = None
3750
+ if expr.op == "Reference" and isinstance(type_, SimTypePointer) and not isinstance(type_.pts_to, SimTypeBottom):
3751
+ data_type = type_.pts_to
3752
+
3753
+ operand = self._handle(expr.operand, lvalue=expr.op == "Reference", type_=data_type)
3754
+
3755
+ if expr.op == "Reference" and isinstance(operand, CUnaryOp) and operand.op == "Dereference":
3756
+ # cancel out
3757
+ return operand.operand
3741
3758
  return CUnaryOp(
3742
3759
  expr.op,
3743
- self._handle(expr.operand),
3760
+ operand,
3744
3761
  tags=expr.tags,
3745
3762
  codegen=self,
3746
3763
  )
@@ -3847,7 +3864,9 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3847
3864
  cexpr = self._handle(expr.expr)
3848
3865
  return CMultiStatementExpression(cstmts, cexpr, tags=expr.tags, codegen=self)
3849
3866
 
3850
- def _handle_VirtualVariable(self, expr: Expr.VirtualVariable, **kwargs):
3867
+ def _handle_VirtualVariable(
3868
+ self, expr: Expr.VirtualVariable, lvalue: bool = False, type_: SimType | None = None, **kwargs
3869
+ ):
3851
3870
  def negotiate(old_ty: SimType, proposed_ty: SimType) -> SimType:
3852
3871
  # we do not allow returning a struct for a primitive type
3853
3872
  if old_ty.size == proposed_ty.size and (
@@ -3860,13 +3879,29 @@ class CStructuredCodeGenerator(BaseStructuredCodeGenerator, Analysis):
3860
3879
  if "struct_member_info" in expr.tags:
3861
3880
  offset, var, _ = expr.struct_member_info
3862
3881
  cbasevar = self._variable(var, expr.size, vvar_id=expr.varid)
3882
+ data_type = type_
3883
+ if data_type is None:
3884
+ # try to determine the type of this variable read
3885
+ data_type = cbasevar.type
3886
+ if data_type.size // self.project.arch.byte_width > expr.size:
3887
+ # fallback to a more suitable type
3888
+ data_type = (
3889
+ {
3890
+ 64: SimTypeLongLong(signed=False),
3891
+ 32: SimTypeInt(signed=False),
3892
+ 16: SimTypeShort(signed=False),
3893
+ 8: SimTypeChar(signed=False),
3894
+ }
3895
+ .get(expr.bits, data_type)
3896
+ .with_arch(self.project.arch)
3897
+ )
3863
3898
  cvar = self._access_constant_offset(
3864
- self._get_variable_reference(cbasevar), offset, cbasevar.type, False, negotiate
3899
+ self._get_variable_reference(cbasevar), offset, data_type, lvalue, negotiate
3865
3900
  )
3866
3901
  else:
3867
3902
  cvar = self._variable(expr.variable, None, vvar_id=expr.varid)
3868
3903
 
3869
- if expr.variable.size != expr.size:
3904
+ if not lvalue and expr.variable.size != expr.size:
3870
3905
  l.warning(
3871
3906
  "VirtualVariable size (%d) and variable size (%d) do not match. Force a type cast.",
3872
3907
  expr.size,
@@ -4097,6 +4132,13 @@ class PointerArithmeticFixer(CStructuredCodeWalker):
4097
4132
  a_ptr = a_ptr + 1.
4098
4133
  """
4099
4134
 
4135
+ def handle_CAssignment(self, obj: CAssignment):
4136
+ if "type" in obj.tags and "dst" in obj.tags["type"] and "src" in obj.tags["type"]:
4137
+ # HACK: do not attempt to fix pointer arithmetic if dst and src types are explicitly given
4138
+ # FIXME: Properly propagate dst and src types to lhs and rhs
4139
+ return obj
4140
+ return super().handle_CAssignment(obj)
4141
+
4100
4142
  def handle_CBinaryOp(self, obj: CBinaryOp): # type: ignore
4101
4143
  obj: CBinaryOp = super().handle_CBinaryOp(obj)
4102
4144
  if (
@@ -1464,6 +1464,8 @@ class PhoenixStructurer(StructurerBase):
1464
1464
  switch_head_addr: int = 0
1465
1465
 
1466
1466
  # case 1: the last block is a ConditionNode with two goto statements
1467
+ cond_expr_or_stmt = None
1468
+ cond_case = None
1467
1469
  if isinstance(node, SequenceNode) and node.nodes and isinstance(node.nodes[-1], ConditionNode):
1468
1470
  cond_node = node.nodes[-1]
1469
1471
  assert isinstance(cond_node, ConditionNode)
@@ -1480,14 +1482,8 @@ class PhoenixStructurer(StructurerBase):
1480
1482
  if len(successor_addrs) != 2 or None in successor_addrs:
1481
1483
  return False
1482
1484
 
1483
- # extract the comparison expression, lower-, and upper-bounds from the last statement
1484
- cmp = switch_extract_cmp_bounds_from_condition(
1485
- self.cond_proc.convert_claripy_bool_ast(cond_node.condition)
1486
- )
1487
- if not cmp:
1488
- return False
1489
- cmp_expr, cmp_lb, _cmp_ub = cmp
1490
-
1485
+ cond_expr_or_stmt = cond_node.condition
1486
+ cond_case = 1
1491
1487
  assert cond_node.addr is not None
1492
1488
  switch_head_addr = cond_node.addr
1493
1489
 
@@ -1505,14 +1501,22 @@ class PhoenixStructurer(StructurerBase):
1505
1501
  if len(successor_addrs) != 2:
1506
1502
  return False
1507
1503
 
1508
- # extract the comparison expression, lower-, and upper-bounds from the last statement
1509
- cmp = switch_extract_cmp_bounds(last_stmt)
1510
- if not cmp:
1511
- return False
1512
- cmp_expr, cmp_lb, _cmp_ub = cmp # pylint:disable=unused-variable
1513
-
1504
+ cond_expr_or_stmt = last_stmt
1505
+ cond_case = 2
1514
1506
  switch_head_addr = last_stmt.ins_addr
1515
1507
 
1508
+ graph = _f(graph_raw)
1509
+ full_graph = _f(full_graph_raw)
1510
+
1511
+ # special fix
1512
+ if (
1513
+ len(successor_addrs) == 2
1514
+ and graph.out_degree[node] == 2
1515
+ and len(set(successor_addrs).intersection({succ.addr for succ in graph.successors(node)})) == 1
1516
+ ):
1517
+ # there is an unmatched successor addr! fix it
1518
+ successor_addrs = [succ.addr for succ in graph.successors(node)]
1519
+
1516
1520
  for t in successor_addrs:
1517
1521
  if t in self.jump_tables:
1518
1522
  # this is a candidate!
@@ -1521,13 +1525,29 @@ class PhoenixStructurer(StructurerBase):
1521
1525
  else:
1522
1526
  return False
1523
1527
 
1528
+ # extract the comparison expression, lower-, and upper-bounds from the last statement
1529
+ match cond_case:
1530
+ case 1:
1531
+ cmp = switch_extract_cmp_bounds_from_condition(
1532
+ self.cond_proc.convert_claripy_bool_ast(cond_expr_or_stmt)
1533
+ )
1534
+ if not cmp:
1535
+ return False
1536
+ case 2:
1537
+ # extract the comparison expression, lower-, and upper-bounds from the last statement
1538
+ cmp = switch_extract_cmp_bounds(cond_expr_or_stmt)
1539
+ if not cmp:
1540
+ return False
1541
+ case _:
1542
+ # unreachable!
1543
+ return False
1544
+
1545
+ cmp_expr, cmp_lb, _cmp_ub = cmp # pylint:disable=unused-variable
1546
+
1524
1547
  jump_table = self.jump_tables[target]
1525
1548
  if jump_table.type != IndirectJumpType.Jumptable_AddressLoadedFromMemory:
1526
1549
  return False
1527
1550
 
1528
- graph = _f(graph_raw)
1529
- full_graph = _f(full_graph_raw)
1530
-
1531
1551
  node_a = next(iter(nn for nn in graph.nodes if nn.addr == target), None)
1532
1552
  if node_a is None:
1533
1553
  return False
@@ -1585,10 +1605,24 @@ class PhoenixStructurer(StructurerBase):
1585
1605
  # update node_a
1586
1606
  node_a = next(iter(nn for nn in graph.nodes if nn.addr == target))
1587
1607
  if isinstance(node_a, IncompleteSwitchCaseNode):
1588
- r = self._unpack_incompleteswitchcasenode(graph_raw, node_a)
1608
+ # special case: if node_default is None, node_a has a missing case, and node_a has a successor in the full
1609
+ # graph that is not the default node, then we know
1610
+ # 1. there is a default node (instead of the successor of the entire switch-case construct).
1611
+ # 2. the default node is in a parent region.
1612
+ # as a result, we cannot structure this switch-case right now
1613
+ if (
1614
+ len(node_a.cases) == len(set(jump_table.jumptable_entries)) - 1
1615
+ and node_default is None
1616
+ and len([succ for succ in full_graph.successors(node_a) if succ.addr != node_b_addr]) > 0
1617
+ ):
1618
+ return False
1619
+
1620
+ r = self._unpack_incompleteswitchcasenode(graph_raw, node_a, jump_table.jumptable_entries)
1589
1621
  if not r:
1590
1622
  return False
1591
- self._unpack_incompleteswitchcasenode(full_graph_raw, node_a) # this shall not fail
1623
+ self._unpack_incompleteswitchcasenode(
1624
+ full_graph_raw, node_a, jump_table.jumptable_entries
1625
+ ) # this shall not fail
1592
1626
  # update node_a
1593
1627
  node_a = next(iter(nn for nn in graph.nodes if nn.addr == target))
1594
1628
  if self._node_order is not None:
@@ -1721,10 +1755,16 @@ class PhoenixStructurer(StructurerBase):
1721
1755
 
1722
1756
  # un-structure IncompleteSwitchCaseNode
1723
1757
  if isinstance(node, IncompleteSwitchCaseNode):
1724
- r = self._unpack_incompleteswitchcasenode(graph_raw, node)
1758
+ if len(set(jump_table.jumptable_entries)) > len(node.cases):
1759
+ # it has a missing default case node! we cannot structure it as a no-default switch-case
1760
+ return False
1761
+
1762
+ r = self._unpack_incompleteswitchcasenode(graph_raw, node, jump_table.jumptable_entries)
1725
1763
  if not r:
1726
1764
  return False
1727
- self._unpack_incompleteswitchcasenode(full_graph_raw, node) # this shall not fail
1765
+ self._unpack_incompleteswitchcasenode(
1766
+ full_graph_raw, node, jump_table.jumptable_entries
1767
+ ) # this shall not fail
1728
1768
  # update node
1729
1769
  node = next(iter(nn for nn in graph.nodes if nn.addr == jump_table.addr))
1730
1770
 
@@ -1934,31 +1974,35 @@ class PhoenixStructurer(StructurerBase):
1934
1974
 
1935
1975
  jump_table = self.jump_tables[node.addr]
1936
1976
  assert jump_table.jumptable_entries is not None
1937
- if (
1938
- successors
1939
- and {succ.addr for succ in successors} == set(jump_table.jumptable_entries)
1940
- and all(graph.in_degree[succ] == 1 for succ in successors)
1941
- ):
1942
- out_nodes = set()
1943
- for succ in successors:
1944
- out_nodes |= {
1945
- succ for succ in full_graph.successors(succ) if succ is not node and succ not in successors
1946
- }
1947
- out_nodes = list(out_nodes)
1948
- if len(out_nodes) <= 1 and node.addr not in self._matched_incomplete_switch_case_addrs:
1949
- self._matched_incomplete_switch_case_addrs.add(node.addr)
1950
- new_node = IncompleteSwitchCaseNode(node.addr, node, successors)
1951
- graph_raw.remove_nodes_from(successors)
1952
- self.replace_nodes(graph_raw, node, new_node)
1953
- if out_nodes and out_nodes[0] in graph:
1954
- graph_raw.add_edge(new_node, out_nodes[0])
1955
- full_graph_raw.remove_nodes_from(successors)
1956
- self.replace_nodes(full_graph_raw, node, new_node, update_node_order=True)
1957
- if out_nodes:
1958
- full_graph_raw.add_edge(new_node, out_nodes[0])
1959
- if self._node_order:
1960
- self._node_order[new_node] = self._node_order[node]
1961
- return True
1977
+
1978
+ if successors and all(graph.in_degree[succ] == 1 for succ in successors):
1979
+ succ_addrs = {succ.addr for succ in successors}
1980
+ expected_entry_addrs = set(jump_table.jumptable_entries)
1981
+ # test if we have found all entries or all but one entry (where the one missing entry is likely the default
1982
+ # case).
1983
+ if succ_addrs == expected_entry_addrs or (
1984
+ succ_addrs.issubset(expected_entry_addrs) and len(expected_entry_addrs - succ_addrs) == 1
1985
+ ):
1986
+ out_nodes = set()
1987
+ for succ in successors:
1988
+ out_nodes |= {
1989
+ succ for succ in full_graph.successors(succ) if succ is not node and succ not in successors
1990
+ }
1991
+ out_nodes = list(out_nodes)
1992
+ if len(out_nodes) <= 1 and node.addr not in self._matched_incomplete_switch_case_addrs:
1993
+ self._matched_incomplete_switch_case_addrs.add(node.addr)
1994
+ new_node = IncompleteSwitchCaseNode(node.addr, node, successors)
1995
+ graph_raw.remove_nodes_from(successors)
1996
+ self.replace_nodes(graph_raw, node, new_node)
1997
+ if out_nodes and out_nodes[0] in graph:
1998
+ graph_raw.add_edge(new_node, out_nodes[0])
1999
+ full_graph_raw.remove_nodes_from(successors)
2000
+ self.replace_nodes(full_graph_raw, node, new_node, update_node_order=True)
2001
+ if out_nodes:
2002
+ full_graph_raw.add_edge(new_node, out_nodes[0])
2003
+ if self._node_order:
2004
+ self._node_order[new_node] = self._node_order[node]
2005
+ return True
1962
2006
  return False
1963
2007
 
1964
2008
  def _switch_build_cases(
@@ -2195,9 +2239,11 @@ class PhoenixStructurer(StructurerBase):
2195
2239
  out_dst_succs_fullgraph = []
2196
2240
  for _, o in other_out_edges:
2197
2241
  if o in graph:
2198
- out_dst_succs.append(o)
2242
+ if o not in out_dst_succs:
2243
+ out_dst_succs.append(o)
2199
2244
  elif o in full_graph:
2200
- out_dst_succs_fullgraph.append(o)
2245
+ if o not in out_dst_succs_fullgraph:
2246
+ out_dst_succs_fullgraph.append(o)
2201
2247
  out_dst_succ = sorted(out_dst_succs, key=lambda o: o.addr)[0] if out_dst_succs else None
2202
2248
  out_dst_succ_fullgraph = (
2203
2249
  sorted(out_dst_succs_fullgraph, key=lambda o: o.addr)[0] if out_dst_succs_fullgraph else None
@@ -2283,6 +2329,8 @@ class PhoenixStructurer(StructurerBase):
2283
2329
  def _is_switch_cases_address_loaded_from_memory_head_or_jumpnode(self, graph, node) -> bool:
2284
2330
  if self._is_node_unstructured_switch_case_head(node):
2285
2331
  return True
2332
+ if isinstance(node, IncompleteSwitchCaseNode):
2333
+ return True
2286
2334
  for succ in graph.successors(node):
2287
2335
  if self._is_node_unstructured_switch_case_head(succ):
2288
2336
  return True
@@ -2299,20 +2347,31 @@ class PhoenixStructurer(StructurerBase):
2299
2347
  graph = _f(graph_raw)
2300
2348
 
2301
2349
  succs = list(graph.successors(start_node))
2302
- if len(succs) == 1:
2303
- end_node = succs[0]
2304
- if (
2305
- full_graph.out_degree[start_node] == 1
2306
- and full_graph.in_degree[end_node] == 1
2307
- and not full_graph.has_edge(end_node, start_node)
2308
- and not self._is_switch_cases_address_loaded_from_memory_head_or_jumpnode(full_graph, end_node)
2309
- and not self._is_switch_cases_address_loaded_from_memory_head_or_jumpnode(full_graph, start_node)
2310
- and end_node not in self.dowhile_known_tail_nodes
2311
- and not isinstance(end_node, IncompleteSwitchCaseNode)
2312
- ):
2350
+ if len(succs) != 1:
2351
+ return False
2352
+ end_node = succs[0]
2353
+ if (
2354
+ full_graph.out_degree[start_node] == 1
2355
+ and full_graph.in_degree[end_node] == 1
2356
+ and not full_graph.has_edge(end_node, start_node)
2357
+ and not self._is_switch_cases_address_loaded_from_memory_head_or_jumpnode(full_graph, start_node)
2358
+ and end_node not in self.dowhile_known_tail_nodes
2359
+ ):
2360
+ new_seq = None
2361
+ if not self._is_switch_cases_address_loaded_from_memory_head_or_jumpnode(full_graph, end_node):
2313
2362
  # merge two blocks
2314
2363
  new_seq = self._merge_nodes(start_node, end_node)
2315
-
2364
+ elif isinstance(end_node, IncompleteSwitchCaseNode):
2365
+ # a special case where there is a node between the actual switch-case head and the jump table
2366
+ # head
2367
+ # binary 7995a0325b446c462bdb6ae10b692eee2ecadd8e888e9d7729befe4412007afb, function 0x1400326C0
2368
+ # keep the IncompleteSwitchCaseNode, and merge two blocks into the head of the IncompleteSwitchCaseNode.
2369
+ new_seq = self._merge_nodes(start_node, end_node.head)
2370
+ new_seq.addr = end_node.addr
2371
+ end_node.head = new_seq
2372
+ new_seq = end_node
2373
+
2374
+ if new_seq is not None:
2316
2375
  # on the original graph
2317
2376
  self.replace_nodes(graph_raw, start_node, new_seq, old_node_1=end_node if end_node in graph else None)
2318
2377
  # on the graph with successors
@@ -3267,17 +3326,23 @@ class PhoenixStructurer(StructurerBase):
3267
3326
  return True, new_seq
3268
3327
 
3269
3328
  @staticmethod
3270
- def _unpack_incompleteswitchcasenode(graph: networkx.DiGraph, incscnode: IncompleteSwitchCaseNode) -> bool:
3329
+ def _unpack_incompleteswitchcasenode(
3330
+ graph: networkx.DiGraph, incscnode: IncompleteSwitchCaseNode, jumptable_entries: list[int]
3331
+ ) -> bool:
3271
3332
  preds = list(graph.predecessors(incscnode))
3272
3333
  succs = list(graph.successors(incscnode))
3273
- if len(succs) <= 1:
3334
+ non_case_succs = [succ for succ in succs if succ.addr not in jumptable_entries]
3335
+ if len(non_case_succs) <= 1:
3274
3336
  graph.remove_node(incscnode)
3275
3337
  for pred in preds:
3276
3338
  graph.add_edge(pred, incscnode.head)
3339
+ for succ in succs:
3340
+ if succ not in non_case_succs:
3341
+ graph.add_edge(incscnode.head, succ)
3277
3342
  for case_node in incscnode.cases:
3278
3343
  graph.add_edge(incscnode.head, case_node)
3279
- if succs:
3280
- graph.add_edge(case_node, succs[0])
3344
+ if non_case_succs:
3345
+ graph.add_edge(case_node, non_case_succs[0])
3281
3346
  return True
3282
3347
  return False
3283
3348
 
@@ -163,12 +163,30 @@ def switch_extract_cmp_bounds(
163
163
  def switch_extract_cmp_bounds_from_condition(cond: ailment.Expr.Expression) -> tuple[Any, int, int] | None:
164
164
  # TODO: Add more operations
165
165
  if isinstance(cond, ailment.Expr.BinaryOp):
166
- if cond.op in {"CmpLE", "CmpLT"}:
167
- if not (isinstance(cond.operands[1], ailment.Expr.Const) and isinstance(cond.operands[1].value, int)):
166
+ op = cond.op
167
+ op0, op1 = cond.operands
168
+ if not isinstance(op1, ailment.Expr.Const):
169
+ # swap them
170
+ match op:
171
+ case "CmpLE":
172
+ op = "CmpGE"
173
+ case "CmpLT":
174
+ op = "CmpGT"
175
+ case "CmpGE":
176
+ op = "CmpLE"
177
+ case "CmpGT":
178
+ op = "CmpLT"
179
+ case _:
180
+ # unsupported
181
+ return None
182
+ op0, op1 = op1, op0
183
+
184
+ if op in {"CmpLE", "CmpLT"}:
185
+ if not (isinstance(op1, ailment.Expr.Const) and isinstance(op1.value, int)):
168
186
  return None
169
- cmp_ub = cond.operands[1].value if cond.op == "CmpLE" else cond.operands[1].value - 1
187
+ cmp_ub = op1.value if op == "CmpLE" else op1.value - 1
170
188
  cmp_lb = 0
171
- cmp = cond.operands[0]
189
+ cmp = op0
172
190
  if (
173
191
  isinstance(cmp, ailment.Expr.BinaryOp)
174
192
  and cmp.op == "Sub"
@@ -180,15 +198,15 @@ def switch_extract_cmp_bounds_from_condition(cond: ailment.Expr.Expression) -> t
180
198
  cmp = cmp.operands[0]
181
199
  return cmp, cmp_lb, cmp_ub
182
200
 
183
- if cond.op in {"CmpGE", "CmpGT"}:
201
+ if op in {"CmpGE", "CmpGT"}:
184
202
  # We got the negated condition here
185
203
  # CmpGE -> CmpLT
186
204
  # CmpGT -> CmpLE
187
- if not (isinstance(cond.operands[1], ailment.Expr.Const) and isinstance(cond.operands[1].value, int)):
205
+ if not (isinstance(op1, ailment.Expr.Const) and isinstance(op1.value, int)):
188
206
  return None
189
- cmp_ub = cond.operands[1].value if cond.op == "CmpGT" else cond.operands[1].value - 1
207
+ cmp_ub = op1.value if op == "CmpGT" else op1.value - 1
190
208
  cmp_lb = 0
191
- cmp = cond.operands[0]
209
+ cmp = op0
192
210
  if (
193
211
  isinstance(cmp, ailment.Expr.BinaryOp)
194
212
  and cmp.op == "Sub"