angr 9.2.166__cp310-abi3-macosx_11_0_arm64.whl → 9.2.168__cp310-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (36) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/cfg/cfb.py +7 -7
  3. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +8 -8
  4. angr/analyses/decompiler/clinic.py +8 -0
  5. angr/analyses/decompiler/condition_processor.py +44 -1
  6. angr/analyses/decompiler/decompilation_cache.py +2 -0
  7. angr/analyses/decompiler/decompilation_options.py +10 -0
  8. angr/analyses/decompiler/decompiler.py +26 -2
  9. angr/analyses/decompiler/node_replacer.py +42 -0
  10. angr/analyses/decompiler/notes/__init__.py +9 -0
  11. angr/analyses/decompiler/notes/decompilation_note.py +48 -0
  12. angr/analyses/decompiler/notes/deobfuscated_strings.py +56 -0
  13. angr/analyses/decompiler/optimization_passes/lowered_switch_simplifier.py +1 -1
  14. angr/analyses/decompiler/optimization_passes/optimization_pass.py +5 -0
  15. angr/analyses/decompiler/optimization_passes/return_duplicator_low.py +5 -76
  16. angr/analyses/decompiler/region_identifier.py +12 -3
  17. angr/analyses/decompiler/sequence_walker.py +11 -7
  18. angr/analyses/decompiler/structured_codegen/base.py +34 -1
  19. angr/analyses/decompiler/structured_codegen/c.py +44 -10
  20. angr/analyses/decompiler/structuring/phoenix.py +645 -305
  21. angr/analyses/decompiler/structuring/structurer_base.py +75 -1
  22. angr/analyses/decompiler/utils.py +71 -28
  23. angr/analyses/deobfuscator/string_obf_finder.py +19 -16
  24. angr/analyses/deobfuscator/string_obf_opt_passes.py +6 -3
  25. angr/analyses/reaching_definitions/engine_vex.py +3 -2
  26. angr/procedures/glibc/scanf.py +8 -0
  27. angr/procedures/glibc/sscanf.py +4 -0
  28. angr/rustylib.abi3.so +0 -0
  29. angr/unicornlib.dylib +0 -0
  30. angr/utils/graph.py +62 -24
  31. {angr-9.2.166.dist-info → angr-9.2.168.dist-info}/METADATA +5 -5
  32. {angr-9.2.166.dist-info → angr-9.2.168.dist-info}/RECORD +36 -32
  33. {angr-9.2.166.dist-info → angr-9.2.168.dist-info}/WHEEL +0 -0
  34. {angr-9.2.166.dist-info → angr-9.2.168.dist-info}/entry_points.txt +0 -0
  35. {angr-9.2.166.dist-info → angr-9.2.168.dist-info}/licenses/LICENSE +0 -0
  36. {angr-9.2.166.dist-info → angr-9.2.168.dist-info}/top_level.txt +0 -0
@@ -35,6 +35,7 @@ from .structurer_nodes import (
35
35
  LoopNode,
36
36
  EmptyBlockNotice,
37
37
  IncompleteSwitchCaseNode,
38
+ IncompleteSwitchCaseHeadStatement,
38
39
  )
39
40
 
40
41
  if TYPE_CHECKING:
@@ -900,8 +901,24 @@ class StructurerBase(Analysis):
900
901
 
901
902
  @staticmethod
902
903
  def _remove_last_statement_if_jump(
903
- node: BaseNode | ailment.Block | MultiNode,
904
+ node: BaseNode | ailment.Block | MultiNode | SequenceNode,
904
905
  ) -> ailment.Stmt.Jump | ailment.Stmt.ConditionalJump | None:
906
+ if isinstance(node, SequenceNode) and node.nodes and isinstance(node.nodes[-1], ConditionNode):
907
+ cond_node = node.nodes[-1]
908
+ the_stmt: ailment.Stmt.Jump | None = None
909
+ for block in [cond_node.true_node, cond_node.false_node]:
910
+ if (
911
+ isinstance(block, ailment.Block)
912
+ and block.statements
913
+ and isinstance(block.statements[-1], ailment.Stmt.Jump)
914
+ ):
915
+ the_stmt = block.statements[-1] # type: ignore
916
+ break
917
+
918
+ if the_stmt is not None:
919
+ node.nodes = node.nodes[:-1]
920
+ return the_stmt
921
+
905
922
  try:
906
923
  last_stmts = ConditionProcessor.get_last_statements(node)
907
924
  except EmptyBlockNotice:
@@ -911,6 +928,63 @@ class StructurerBase(Analysis):
911
928
  return remove_last_statement(node) # type: ignore
912
929
  return None
913
930
 
931
+ @staticmethod
932
+ def _remove_last_statement_if_jump_or_schead(
933
+ node: BaseNode | ailment.Block | MultiNode | SequenceNode,
934
+ ) -> ailment.Stmt.Jump | ailment.Stmt.ConditionalJump | IncompleteSwitchCaseHeadStatement | None:
935
+ if isinstance(node, SequenceNode) and node.nodes and isinstance(node.nodes[-1], ConditionNode):
936
+ cond_node = node.nodes[-1]
937
+ the_stmt: ailment.Stmt.Jump | None = None
938
+ for block in [cond_node.true_node, cond_node.false_node]:
939
+ if (
940
+ isinstance(block, ailment.Block)
941
+ and block.statements
942
+ and isinstance(block.statements[-1], ailment.Stmt.Jump)
943
+ ):
944
+ the_stmt = block.statements[-1] # type: ignore
945
+ break
946
+
947
+ if the_stmt is not None:
948
+ node.nodes = node.nodes[:-1]
949
+ return the_stmt
950
+
951
+ try:
952
+ last_stmts = ConditionProcessor.get_last_statements(node)
953
+ except EmptyBlockNotice:
954
+ return None
955
+
956
+ if len(last_stmts) == 1 and isinstance(
957
+ last_stmts[0], (ailment.Stmt.Jump, ailment.Stmt.ConditionalJump, IncompleteSwitchCaseHeadStatement)
958
+ ):
959
+ return remove_last_statement(node) # type: ignore
960
+ return None
961
+
962
+ @staticmethod
963
+ def _copy_and_remove_last_statement_if_jump(
964
+ node: ailment.Block | MultiNode | SequenceNode,
965
+ ) -> ailment.Block | MultiNode | SequenceNode:
966
+ if isinstance(node, SequenceNode):
967
+ if node.nodes and isinstance(node.nodes[-1], ConditionNode):
968
+ # copy the node and remove the last condition node
969
+ return SequenceNode(node.addr, nodes=node.nodes[:-1])
970
+ return node.copy()
971
+
972
+ if isinstance(node, MultiNode):
973
+ if node.nodes:
974
+ last_block = StructurerBase._copy_and_remove_last_statement_if_jump(node.nodes[-1])
975
+ nodes = [*node.nodes[:-1], last_block]
976
+ else:
977
+ nodes = []
978
+ return MultiNode(nodes, addr=node.addr, idx=node.idx)
979
+
980
+ assert isinstance(node, ailment.Block)
981
+ if node.statements and isinstance(node.statements[-1], (ailment.Stmt.Jump, ailment.Stmt.ConditionalJump)):
982
+ # copy the block and remove the last statement
983
+ stmts = node.statements[:-1]
984
+ else:
985
+ stmts = node.statements[::]
986
+ return ailment.Block(node.addr, node.original_size, statements=stmts, idx=node.idx)
987
+
914
988
  @staticmethod
915
989
  def _merge_nodes(node_0, node_1):
916
990
  addr = node_0.addr if node_0.addr is not None else node_1.addr
@@ -156,27 +156,48 @@ def switch_extract_cmp_bounds(
156
156
 
157
157
  if not isinstance(last_stmt, ailment.Stmt.ConditionalJump):
158
158
  return None
159
+ return switch_extract_cmp_bounds_from_condition(last_stmt.condition)
159
160
 
161
+
162
+ def switch_extract_cmp_bounds_from_condition(cond: ailment.Expr.Expression) -> tuple[Any, int, int] | None:
160
163
  # TODO: Add more operations
161
- if isinstance(last_stmt.condition, ailment.Expr.BinaryOp) and last_stmt.condition.op in {"CmpLE", "CmpLT"}:
162
- if not isinstance(last_stmt.condition.operands[1], ailment.Expr.Const):
163
- return None
164
- cmp_ub = (
165
- last_stmt.condition.operands[1].value
166
- if last_stmt.condition.op == "CmpLE"
167
- else last_stmt.condition.operands[1].value - 1
168
- )
169
- cmp_lb = 0
170
- cmp = last_stmt.condition.operands[0]
171
- if (
172
- isinstance(cmp, ailment.Expr.BinaryOp)
173
- and cmp.op == "Sub"
174
- and isinstance(cmp.operands[1], ailment.Expr.Const)
175
- ):
176
- cmp_ub += cmp.operands[1].value
177
- cmp_lb += cmp.operands[1].value
178
- cmp = cmp.operands[0]
179
- return cmp, cmp_lb, cmp_ub
164
+ if isinstance(cond, ailment.Expr.BinaryOp):
165
+ if cond.op in {"CmpLE", "CmpLT"}:
166
+ if not (isinstance(cond.operands[1], ailment.Expr.Const) and isinstance(cond.operands[1].value, int)):
167
+ return None
168
+ cmp_ub = cond.operands[1].value if cond.op == "CmpLE" else cond.operands[1].value - 1
169
+ cmp_lb = 0
170
+ cmp = cond.operands[0]
171
+ if (
172
+ isinstance(cmp, ailment.Expr.BinaryOp)
173
+ and cmp.op == "Sub"
174
+ and isinstance(cmp.operands[1], ailment.Expr.Const)
175
+ and isinstance(cmp.operands[1].value, int)
176
+ ):
177
+ cmp_ub += cmp.operands[1].value
178
+ cmp_lb += cmp.operands[1].value
179
+ cmp = cmp.operands[0]
180
+ return cmp, cmp_lb, cmp_ub
181
+
182
+ if cond.op in {"CmpGE", "CmpGT"}:
183
+ # We got the negated condition here
184
+ # CmpGE -> CmpLT
185
+ # CmpGT -> CmpLE
186
+ if not (isinstance(cond.operands[1], ailment.Expr.Const) and isinstance(cond.operands[1].value, int)):
187
+ return None
188
+ cmp_ub = cond.operands[1].value if cond.op == "CmpGT" else cond.operands[1].value - 1
189
+ cmp_lb = 0
190
+ cmp = cond.operands[0]
191
+ if (
192
+ isinstance(cmp, ailment.Expr.BinaryOp)
193
+ and cmp.op == "Sub"
194
+ and isinstance(cmp.operands[1], ailment.Expr.Const)
195
+ and isinstance(cmp.operands[1].value, int)
196
+ ):
197
+ cmp_ub += cmp.operands[1].value
198
+ cmp_lb += cmp.operands[1].value
199
+ cmp = cmp.operands[0]
200
+ return cmp, cmp_lb, cmp_ub
180
201
 
181
202
  return None
182
203
 
@@ -315,7 +336,7 @@ def switch_extract_bitwiseand_jumptable_info(last_stmt: ailment.Stmt.Jump) -> tu
315
336
  coeff = None
316
337
  index_expr = None
317
338
  lb = None
318
- ub = None
339
+ ub: int | None = None
319
340
  while expr is not None:
320
341
  if isinstance(expr, ailment.Expr.BinaryOp):
321
342
  if expr.op == "Mul":
@@ -331,12 +352,12 @@ def switch_extract_bitwiseand_jumptable_info(last_stmt: ailment.Stmt.Jump) -> tu
331
352
  masks = {0x1, 0x3, 0x7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF}
332
353
  if isinstance(expr.operands[1], ailment.Expr.Const) and expr.operands[1].value in masks:
333
354
  lb = 0
334
- ub = expr.operands[1].value
355
+ ub = expr.operands[1].value # type:ignore
335
356
  index_expr = expr
336
357
  break
337
358
  if isinstance(expr.operands[0], ailment.Expr.Const) and expr.operands[1].value in masks:
338
359
  lb = 0
339
- ub = expr.operands[0].value
360
+ ub = expr.operands[0].value # type:ignore
340
361
  index_expr = expr
341
362
  break
342
363
  return None
@@ -366,7 +387,7 @@ def get_ast_subexprs(claripy_ast):
366
387
  yield ast
367
388
 
368
389
 
369
- def insert_node(parent, insert_location: str, node, node_idx: int | tuple[int] | None, label=None):
390
+ def insert_node(parent, insert_location: str, node, node_idx: int, label=None):
370
391
  if insert_location not in {"before", "after"}:
371
392
  raise ValueError('"insert_location" must be either "before" or "after"')
372
393
 
@@ -538,12 +559,13 @@ def is_empty_or_label_only_node(node) -> bool:
538
559
 
539
560
 
540
561
  def has_nonlabel_statements(block: ailment.Block) -> bool:
541
- return block.statements and any(not isinstance(stmt, ailment.Stmt.Label) for stmt in block.statements)
562
+ return bool(block.statements and any(not isinstance(stmt, ailment.Stmt.Label) for stmt in block.statements))
542
563
 
543
564
 
544
565
  def has_nonlabel_nonphi_statements(block: ailment.Block) -> bool:
545
- return block.statements and any(
546
- not (isinstance(stmt, ailment.Stmt.Label) or is_phi_assignment(stmt)) for stmt in block.statements
566
+ return bool(
567
+ block.statements
568
+ and any(not (isinstance(stmt, ailment.Stmt.Label) or is_phi_assignment(stmt)) for stmt in block.statements)
547
569
  )
548
570
 
549
571
 
@@ -589,6 +611,19 @@ def last_nonlabel_statement(block: ailment.Block) -> ailment.Stmt.Statement | No
589
611
  return None
590
612
 
591
613
 
614
+ def last_node(node: BaseNode) -> BaseNode | ailment.Block | None:
615
+ """
616
+ Get the last node in a sequence or code node.
617
+ """
618
+ if isinstance(node, CodeNode):
619
+ return last_node(node.node)
620
+ if isinstance(node, SequenceNode):
621
+ if not node.nodes:
622
+ return None
623
+ return last_node(node.nodes[-1])
624
+ return node
625
+
626
+
592
627
  def first_nonlabel_node(seq: SequenceNode) -> BaseNode | ailment.Block | None:
593
628
  for node in seq.nodes:
594
629
  inner_node = node.node if isinstance(node, CodeNode) else node
@@ -672,7 +707,9 @@ def _find_node_in_graph(node: ailment.Block, graph: networkx.DiGraph) -> ailment
672
707
  return None
673
708
 
674
709
 
675
- def structured_node_has_multi_predecessors(node: SequenceNode | MultiNode, graph: networkx.DiGraph) -> bool:
710
+ def structured_node_has_multi_predecessors(
711
+ node: SequenceNode | MultiNode | ailment.Block, graph: networkx.DiGraph
712
+ ) -> bool:
676
713
  if graph is None:
677
714
  return False
678
715
 
@@ -728,6 +765,7 @@ def structured_node_is_simple_return(
728
765
  if valid_last_stmt:
729
766
  # note that the block may not be the same block in the AIL graph post dephication. we must find the block again
730
767
  # in the graph.
768
+ assert isinstance(last_block, ailment.Block)
731
769
  last_graph_block = _find_node_in_graph(last_block, graph)
732
770
  if last_graph_block is not None:
733
771
  succs = list(graph.successors(last_graph_block))
@@ -927,6 +965,7 @@ def peephole_optimize_multistmts(block, stmt_opts):
927
965
  break
928
966
 
929
967
  if matched:
968
+ assert stmt_seq_len is not None
930
969
  matched_stmts = statements[stmt_idx : stmt_idx + stmt_seq_len]
931
970
  r = opt.optimize(matched_stmts, stmt_idx=stmt_idx, block=block)
932
971
  if r is not None:
@@ -1036,7 +1075,11 @@ def decompile_functions(
1036
1075
  _l.critical("Failed to decompile %s because %s", repr(f), exception_string)
1037
1076
  decompilation += f"// [error: {func} | {exception_string}]\n"
1038
1077
  else:
1039
- decompilation += dec.codegen.text + "\n"
1078
+ if dec is not None and dec.codegen is not None and dec.codegen.text is not None:
1079
+ decompilation += dec.codegen.text
1080
+ else:
1081
+ decompilation += "Invalid decompilation output"
1082
+ decompilation += "\n"
1040
1083
 
1041
1084
  return decompilation
1042
1085
 
@@ -173,10 +173,8 @@ class StringObfuscationFinder(Analysis):
173
173
  continue
174
174
 
175
175
  # decompile this function and see if it "looks like" a deobfuscation function
176
- try:
177
- dec = self.project.analyses.Decompiler(func, cfg=cfg)
178
- except Exception: # pylint:disable=broad-exception-caught
179
- continue
176
+ with self._resilience():
177
+ dec = self.project.analyses.Decompiler(func, cfg=cfg, fail_fast=self._fail_fast) # type:ignore
180
178
  if (
181
179
  dec.codegen is None
182
180
  or not dec.codegen.text
@@ -470,10 +468,13 @@ class StringObfuscationFinder(Analysis):
470
468
  continue
471
469
 
472
470
  # decompile this function and see if it "looks like" a deobfuscation function
473
- try:
474
- dec = self.project.analyses.Decompiler(func, cfg=cfg, expr_collapse_depth=64)
475
- except Exception: # pylint:disable=broad-exception-caught
476
- continue
471
+ with self._resilience():
472
+ dec = self.project.analyses.Decompiler(
473
+ func,
474
+ cfg=cfg,
475
+ expr_collapse_depth=64,
476
+ fail_fast=self._fail_fast, # type:ignore
477
+ )
477
478
  if (
478
479
  dec.codegen is None
479
480
  or not dec.codegen.text
@@ -666,11 +667,9 @@ class StringObfuscationFinder(Analysis):
666
667
  continue
667
668
 
668
669
  # take a look at the content
669
- try:
670
- dec = self.project.analyses.Decompiler(func, cfg=cfg)
671
- except Exception: # pylint:disable=broad-exception-caught
670
+ with self._resilience():
672
671
  # catch all exceptions
673
- continue
672
+ dec = self.project.analyses.Decompiler(func, cfg=cfg, fail_fast=self._fail_fast) # type:ignore
674
673
  if dec.codegen is None or not dec.codegen.text:
675
674
  continue
676
675
 
@@ -736,7 +735,7 @@ class StringObfuscationFinder(Analysis):
736
735
  callinsn2content = {}
737
736
  for idx, call_site in enumerate(call_sites):
738
737
  _l.debug("Analyzing type 3 candidate call site %#x (%d/%d)...", call_site.addr, idx + 1, len(call_sites))
739
- assert call_site.function_address is not None
738
+ assert call_site.function_address is not None and isinstance(call_site.function_address, int)
740
739
  data, _ = self._type3_prepare_and_execute(func_addr, call_site.addr, call_site.function_address, cfg)
741
740
  if data:
742
741
  callinsn2content[call_site.instruction_addrs[-1]] = data
@@ -831,7 +830,7 @@ class StringObfuscationFinder(Analysis):
831
830
  # at least a byte
832
831
  continue
833
832
  con = prop_state.load_register(reg_offset, reg_width // 8)
834
- if isinstance(con, claripy.ast.Base) and con.op == "BVV":
833
+ if isinstance(con, claripy.ast.Base) and con.op == "BVV" and isinstance(con.concrete_value, int):
835
834
  state.registers.store(reg_offset, claripy.BVV(con.concrete_value, reg_width))
836
835
  if reg_offset == state.arch.bp_offset:
837
836
  bp_set = True
@@ -851,7 +850,11 @@ class StringObfuscationFinder(Analysis):
851
850
 
852
851
  in_state = simgr.active[0]
853
852
 
854
- cc = default_cc(self.project.arch.name, self.project.simos.name)(self.project.arch)
853
+ cc_cls = default_cc(self.project.arch.name, self.project.simos.name)
854
+ assert (
855
+ cc_cls is not None
856
+ ), f"Failed to obtain the default calling convention for {self.project.arch.name}-{self.project.simos.name}."
857
+ cc = cc_cls(self.project.arch)
855
858
  cc.STACKARG_SP_BUFF = 0 # disable shadow stack space because the binary code already sets it if needed
856
859
  cc.STACK_ALIGNMENT = 1 # disable stack address aligning because the binary code already sets it if needed
857
860
  prototype_0 = SimTypeFunction([], SimTypePointer(pts_to=SimTypeBottom(label="void"))).with_arch(
@@ -875,7 +878,7 @@ class StringObfuscationFinder(Analysis):
875
878
  out_state = callable_0.result_state
876
879
 
877
880
  # figure out what was written
878
- assert out_state is not None
881
+ assert out_state is not None and ret_value is not None
879
882
  ptr = out_state.memory.load(ret_value, size=self.project.arch.bytes, endness=self.project.arch.memory_endness)
880
883
  if out_state.memory.load(ptr, size=4).concrete_value == 0:
881
884
  # fall back to using the return value as the pointer
@@ -7,6 +7,7 @@ from angr.ailment import Block
7
7
  from angr.ailment.statement import Statement, Call, Assignment
8
8
  from angr.ailment.expression import Const, Register, VirtualVariable
9
9
 
10
+ from angr.analyses.decompiler.notes.deobfuscated_strings import DeobfuscatedStringsNote
10
11
  from angr.analyses.decompiler.optimization_passes.optimization_pass import OptimizationPass, OptimizationPassStage
11
12
  from angr.analyses.decompiler.optimization_passes import register_optimization_pass
12
13
 
@@ -57,9 +58,11 @@ class StringObfType3Rewriter(OptimizationPass):
57
58
  self.is_call_or_call_assignment(last_stmt)
58
59
  and last_stmt.ins_addr in self.kb.obfuscations.type3_deobfuscated_strings
59
60
  ):
60
- new_block = self._process_block(
61
- block, self.kb.obfuscations.type3_deobfuscated_strings[block.statements[-1].ins_addr]
62
- )
61
+ the_str = self.kb.obfuscations.type3_deobfuscated_strings[block.statements[-1].ins_addr]
62
+ if "deobfuscated_strings" not in self.kb.notes:
63
+ self.kb.notes["deobfuscated_strings"] = DeobfuscatedStringsNote("deobfuscated_strings")
64
+ self.kb.notes["deobfuscated_strings"].add_string("3", the_str, ref_addr=last_stmt.ins_addr)
65
+ new_block = self._process_block(block, the_str)
63
66
  if new_block is not None:
64
67
  self._update_block(block, new_block)
65
68
 
@@ -434,8 +434,9 @@ class SimEngineRDVEX(
434
434
  size = bits // self.arch.byte_width
435
435
 
436
436
  # convert addr from MultiValues to a list of valid addresses
437
- if (one_addr := addr.one_value()) is not None:
438
- return self._load_core([one_addr], size, expr.endness)
437
+ if addr.count() == 1 and 0 in addr:
438
+ addrs = list(addr[0])
439
+ return self._load_core(addrs, size, expr.endness)
439
440
 
440
441
  top = self.state.top(bits)
441
442
  # annotate it
@@ -7,5 +7,13 @@ class __isoc99_scanf(scanf):
7
7
  pass
8
8
 
9
9
 
10
+ class __isoc23_scanf(scanf):
11
+ pass
12
+
13
+
10
14
  class __isoc99_fscanf(fscanf):
11
15
  pass
16
+
17
+
18
+ class __isoc23_fscanf(fscanf):
19
+ pass
@@ -4,3 +4,7 @@ from angr.procedures.libc.sscanf import sscanf
4
4
 
5
5
  class __isoc99_sscanf(sscanf):
6
6
  pass
7
+
8
+
9
+ class __isoc23_sscanf(sscanf):
10
+ pass
angr/rustylib.abi3.so CHANGED
Binary file
angr/unicornlib.dylib CHANGED
Binary file
angr/utils/graph.py CHANGED
@@ -85,7 +85,7 @@ def to_acyclic_graph(
85
85
  return acyclic_graph
86
86
 
87
87
 
88
- def dfs_back_edges(graph, start_node):
88
+ def dfs_back_edges(graph, start_node, *, visit_all_nodes: bool = False, visited: set | None = None):
89
89
  """
90
90
  Perform an iterative DFS traversal of the graph, returning back edges.
91
91
 
@@ -96,9 +96,9 @@ def dfs_back_edges(graph, start_node):
96
96
  if start_node not in graph:
97
97
  return # Ensures that the start node is in the graph
98
98
 
99
- visited = set() # Tracks visited nodes
99
+ visited = set() if visited is None else visited # Tracks visited nodes
100
100
  finished = set() # Tracks nodes whose descendants are fully explored
101
- stack = [(start_node, iter(graph[start_node]))]
101
+ stack = [(start_node, iter(sorted(graph[start_node], key=GraphUtils._sort_node)))]
102
102
 
103
103
  while stack:
104
104
  node, children = stack[-1]
@@ -110,11 +110,17 @@ def dfs_back_edges(graph, start_node):
110
110
  if child not in finished:
111
111
  yield node, child # Found a back edge
112
112
  elif child not in finished: # Check if the child has not been finished
113
- stack.append((child, iter(graph[child])))
113
+ stack.append((child, iter(sorted(graph[child], key=GraphUtils._sort_node))))
114
114
  except StopIteration:
115
115
  stack.pop() # Done with this node's children
116
116
  finished.add(node) # Mark this node as finished
117
117
 
118
+ if visit_all_nodes:
119
+ while len(visited) < len(graph):
120
+ # If we need to visit all nodes, we can start from unvisited nodes
121
+ node = sorted(set(graph) - visited, key=GraphUtils._sort_node)[0]
122
+ yield from dfs_back_edges(graph, node, visited=visited)
123
+
118
124
 
119
125
  def subgraph_between_nodes(graph, source, frontier, include_frontier=False):
120
126
  """
@@ -594,17 +600,24 @@ class SCCPlaceholder:
594
600
  Describes a placeholder for strongly-connected-components in a graph.
595
601
  """
596
602
 
597
- __slots__ = ("scc_id",)
603
+ __slots__ = (
604
+ "addr",
605
+ "scc_id",
606
+ )
598
607
 
599
- def __init__(self, scc_id):
608
+ def __init__(self, scc_id, addr):
600
609
  self.scc_id = scc_id
610
+ self.addr = addr
601
611
 
602
612
  def __eq__(self, other):
603
- return isinstance(other, SCCPlaceholder) and other.scc_id == self.scc_id
613
+ return isinstance(other, SCCPlaceholder) and other.scc_id == self.scc_id and other.addr == self.addr
604
614
 
605
615
  def __hash__(self):
606
616
  return hash(f"scc_placeholder_{self.scc_id}")
607
617
 
618
+ def __repr__(self):
619
+ return f"SCCPlaceholder({self.scc_id}, addr={self.addr:#x})"
620
+
608
621
 
609
622
  class GraphUtils:
610
623
  """
@@ -676,17 +689,17 @@ class GraphUtils:
676
689
  @staticmethod
677
690
  def dfs_postorder_nodes_deterministic(graph: networkx.DiGraph, source):
678
691
  visited = set()
679
- stack = [source]
692
+ stack: list[tuple[Any, bool]] = [(source, True)] # NodeType, is_pre_visit
680
693
  while stack:
681
- node = stack[-1]
682
- if node not in visited:
694
+ node, pre_visit = stack.pop()
695
+ if pre_visit and node not in visited:
683
696
  visited.add(node)
697
+ stack.append((node, False))
684
698
  for succ in sorted(graph.successors(node), key=GraphUtils._sort_node):
685
699
  if succ not in visited:
686
- stack.append(succ)
687
- else:
700
+ stack.append((succ, True))
701
+ elif not pre_visit:
688
702
  yield node
689
- stack.pop()
690
703
 
691
704
  @staticmethod
692
705
  def reverse_post_order_sort_nodes(graph, nodes=None):
@@ -759,7 +772,10 @@ class GraphUtils:
759
772
  """
760
773
 
761
774
  # fast path for single node graphs
762
- if graph.number_of_nodes() == 1:
775
+ number_of_nodes = graph.number_of_nodes()
776
+ if number_of_nodes == 0:
777
+ return []
778
+ if number_of_nodes == 1:
763
779
  if nodes is None:
764
780
  return list(graph.nodes)
765
781
  return [n for n in graph.nodes() if n in nodes]
@@ -768,21 +784,25 @@ class GraphUtils:
768
784
  graph_copy = networkx.DiGraph()
769
785
 
770
786
  # find all strongly connected components in the graph
771
- sccs = [scc for scc in networkx.strongly_connected_components(graph) if len(scc) > 1]
787
+ sccs = sorted(
788
+ (scc for scc in networkx.strongly_connected_components(graph) if len(scc) > 1),
789
+ key=lambda x: (len(x), min(node.addr if hasattr(node, "addr") else node for node in x)),
790
+ )
772
791
  comp_indices = {}
773
792
  for i, scc in enumerate(sccs):
793
+ scc_addr = min(node.addr if hasattr(node, "addr") else node for node in scc)
774
794
  for node in scc:
775
795
  if node not in comp_indices:
776
- comp_indices[node] = i
796
+ comp_indices[node] = (i, scc_addr)
777
797
 
778
798
  # collapse all strongly connected components
779
799
  for src, dst in sorted(graph.edges(), key=GraphUtils._sort_edge):
780
- scc_index = comp_indices.get(src)
800
+ scc_index, scc_addr = comp_indices.get(src, (None, None))
781
801
  if scc_index is not None:
782
- src = SCCPlaceholder(scc_index)
783
- scc_index = comp_indices.get(dst)
802
+ src = SCCPlaceholder(scc_index, scc_addr)
803
+ scc_index, scc_addr = comp_indices.get(dst, (None, None))
784
804
  if scc_index is not None:
785
- dst = SCCPlaceholder(scc_index)
805
+ dst = SCCPlaceholder(scc_index, scc_addr)
786
806
 
787
807
  if isinstance(src, SCCPlaceholder) and isinstance(dst, SCCPlaceholder) and src == dst:
788
808
  if src not in graph_copy:
@@ -801,11 +821,28 @@ class GraphUtils:
801
821
  if graph.in_degree(node) == 0:
802
822
  graph_copy.add_node(node)
803
823
 
804
- # topological sort on acyclic graph `graph_copy`
805
- tmp_nodes = networkx.topological_sort(graph_copy)
824
+ class NodeWithAddr:
825
+ """
826
+ Temporary node class.
827
+ """
806
828
 
829
+ def __init__(self, addr: int):
830
+ self.addr = addr
831
+
832
+ # topological sort on acyclic graph `graph_copy`
833
+ heads = [nn for nn in graph_copy if graph_copy.in_degree[nn] == 0]
834
+ if len(heads) > 1:
835
+ head = NodeWithAddr(-1)
836
+ for real_head in heads:
837
+ graph_copy.add_edge(head, real_head)
838
+ else:
839
+ assert heads
840
+ head = heads[0]
841
+ tmp_nodes = reversed(list(GraphUtils.dfs_postorder_nodes_deterministic(graph_copy, head)))
807
842
  ordered_nodes = []
808
843
  for n in tmp_nodes:
844
+ if isinstance(n, NodeWithAddr):
845
+ continue
809
846
  if isinstance(n, SCCPlaceholder):
810
847
  GraphUtils._append_scc(
811
848
  graph,
@@ -860,9 +897,10 @@ class GraphUtils:
860
897
  if len(scc_succs) > 1:
861
898
  # calculate the distance between each pair of nodes within scc_succs, pick the one with the
862
899
  # shortest total distance
900
+ sorted_scc_succs = sorted(scc_succs, key=GraphUtils._sort_node)
863
901
  scc_node_distance = defaultdict(int)
864
- for scc_succ in scc_succs:
865
- for other_node in scc_succs:
902
+ for scc_succ in sorted_scc_succs:
903
+ for other_node in sorted_scc_succs:
866
904
  if other_node is scc_succ:
867
905
  continue
868
906
  scc_node_distance[scc_succ] += networkx.algorithms.shortest_path_length(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: angr
3
- Version: 9.2.166
3
+ Version: 9.2.168
4
4
  Summary: A multi-architecture binary analysis toolkit, with the ability to perform dynamic symbolic execution and various static analyses on binaries
5
5
  License: BSD-2-Clause
6
6
  Project-URL: Homepage, https://angr.io/
@@ -16,12 +16,12 @@ Description-Content-Type: text/markdown
16
16
  License-File: LICENSE
17
17
  Requires-Dist: cxxheaderparser
18
18
  Requires-Dist: GitPython
19
- Requires-Dist: archinfo==9.2.166
19
+ Requires-Dist: archinfo==9.2.168
20
20
  Requires-Dist: cachetools
21
21
  Requires-Dist: capstone==5.0.3
22
22
  Requires-Dist: cffi>=1.14.0
23
- Requires-Dist: claripy==9.2.166
24
- Requires-Dist: cle==9.2.166
23
+ Requires-Dist: claripy==9.2.168
24
+ Requires-Dist: cle==9.2.168
25
25
  Requires-Dist: mulpyplexer
26
26
  Requires-Dist: networkx!=2.8.1,>=2.0
27
27
  Requires-Dist: protobuf>=5.28.2
@@ -30,7 +30,7 @@ Requires-Dist: pycparser>=2.18
30
30
  Requires-Dist: pydemumble
31
31
  Requires-Dist: pyformlang
32
32
  Requires-Dist: pypcode<4.0,>=3.2.1
33
- Requires-Dist: pyvex==9.2.166
33
+ Requires-Dist: pyvex==9.2.168
34
34
  Requires-Dist: rich>=13.1.0
35
35
  Requires-Dist: sortedcontainers
36
36
  Requires-Dist: sympy