angr 9.2.139__py3-none-manylinux2014_x86_64.whl → 9.2.140__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (68) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +48 -21
  3. angr/analyses/cfg/cfg_base.py +13 -0
  4. angr/analyses/cfg/cfg_fast.py +11 -0
  5. angr/analyses/decompiler/ail_simplifier.py +67 -52
  6. angr/analyses/decompiler/clinic.py +68 -43
  7. angr/analyses/decompiler/decompiler.py +17 -7
  8. angr/analyses/decompiler/expression_narrower.py +1 -1
  9. angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +8 -7
  10. angr/analyses/decompiler/optimization_passes/ite_region_converter.py +21 -13
  11. angr/analyses/decompiler/optimization_passes/optimization_pass.py +16 -10
  12. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +2 -2
  13. angr/analyses/decompiler/region_simplifiers/expr_folding.py +259 -108
  14. angr/analyses/decompiler/region_simplifiers/region_simplifier.py +27 -12
  15. angr/analyses/decompiler/structuring/dream.py +21 -17
  16. angr/analyses/decompiler/structuring/phoenix.py +152 -40
  17. angr/analyses/decompiler/structuring/recursive_structurer.py +1 -0
  18. angr/analyses/decompiler/structuring/structurer_base.py +36 -10
  19. angr/analyses/decompiler/structuring/structurer_nodes.py +4 -1
  20. angr/analyses/decompiler/utils.py +60 -1
  21. angr/analyses/deobfuscator/api_obf_finder.py +8 -5
  22. angr/analyses/deobfuscator/api_obf_type2_finder.py +18 -10
  23. angr/analyses/deobfuscator/string_obf_finder.py +105 -18
  24. angr/analyses/forward_analysis/forward_analysis.py +1 -1
  25. angr/analyses/propagator/top_checker_mixin.py +6 -6
  26. angr/analyses/reaching_definitions/__init__.py +2 -1
  27. angr/analyses/reaching_definitions/dep_graph.py +1 -12
  28. angr/analyses/reaching_definitions/engine_vex.py +36 -31
  29. angr/analyses/reaching_definitions/function_handler.py +15 -2
  30. angr/analyses/reaching_definitions/rd_state.py +1 -37
  31. angr/analyses/reaching_definitions/reaching_definitions.py +13 -24
  32. angr/analyses/s_propagator.py +6 -41
  33. angr/analyses/s_reaching_definitions/s_rda_model.py +7 -1
  34. angr/analyses/stack_pointer_tracker.py +36 -22
  35. angr/analyses/typehoon/simple_solver.py +45 -7
  36. angr/analyses/typehoon/typeconsts.py +18 -5
  37. angr/analyses/variable_recovery/engine_base.py +7 -5
  38. angr/block.py +69 -107
  39. angr/callable.py +14 -7
  40. angr/calling_conventions.py +15 -1
  41. angr/distributed/__init__.py +1 -1
  42. angr/engines/__init__.py +7 -8
  43. angr/engines/engine.py +1 -120
  44. angr/engines/failure.py +2 -2
  45. angr/engines/hook.py +2 -2
  46. angr/engines/light/engine.py +2 -2
  47. angr/engines/pcode/engine.py +2 -14
  48. angr/engines/procedure.py +2 -2
  49. angr/engines/soot/engine.py +2 -2
  50. angr/engines/soot/statements/switch.py +1 -1
  51. angr/engines/successors.py +124 -11
  52. angr/engines/syscall.py +2 -2
  53. angr/engines/unicorn.py +3 -3
  54. angr/engines/vex/heavy/heavy.py +3 -15
  55. angr/factory.py +4 -19
  56. angr/knowledge_plugins/key_definitions/atoms.py +8 -4
  57. angr/knowledge_plugins/key_definitions/live_definitions.py +41 -103
  58. angr/sim_type.py +19 -17
  59. angr/state_plugins/plugin.py +19 -4
  60. angr/storage/memory_mixins/memory_mixin.py +1 -1
  61. angr/storage/memory_mixins/paged_memory/pages/multi_values.py +10 -5
  62. angr/utils/ssa/__init__.py +119 -4
  63. {angr-9.2.139.dist-info → angr-9.2.140.dist-info}/METADATA +6 -6
  64. {angr-9.2.139.dist-info → angr-9.2.140.dist-info}/RECORD +68 -68
  65. {angr-9.2.139.dist-info → angr-9.2.140.dist-info}/LICENSE +0 -0
  66. {angr-9.2.139.dist-info → angr-9.2.140.dist-info}/WHEEL +0 -0
  67. {angr-9.2.139.dist-info → angr-9.2.140.dist-info}/entry_points.txt +0 -0
  68. {angr-9.2.139.dist-info → angr-9.2.140.dist-info}/top_level.txt +0 -0
@@ -20,6 +20,7 @@ from angr.analyses.decompiler.utils import (
20
20
  )
21
21
  from angr.analyses.decompiler.label_collector import LabelCollector
22
22
  from angr.errors import AngrDecompilationError
23
+ from angr.knowledge_plugins.cfg import IndirectJump
23
24
  from .structurer_nodes import (
24
25
  MultiNode,
25
26
  SequenceNode,
@@ -33,6 +34,7 @@ from .structurer_nodes import (
33
34
  BreakNode,
34
35
  LoopNode,
35
36
  EmptyBlockNotice,
37
+ IncompleteSwitchCaseNode,
36
38
  )
37
39
 
38
40
  if TYPE_CHECKING:
@@ -60,6 +62,7 @@ class StructurerBase(Analysis):
60
62
  func: Function | None = None,
61
63
  case_entry_to_switch_head: dict[int, int] | None = None,
62
64
  parent_region=None,
65
+ jump_tables: dict[int, IndirectJump] | None = None,
63
66
  **kwargs,
64
67
  ):
65
68
  self._region: GraphRegion = region
@@ -67,6 +70,7 @@ class StructurerBase(Analysis):
67
70
  self.function = func
68
71
  self._case_entry_to_switch_head = case_entry_to_switch_head
69
72
  self._parent_region = parent_region
73
+ self.jump_tables = jump_tables or {}
70
74
 
71
75
  self.cond_proc = (
72
76
  condition_processor if condition_processor is not None else ConditionProcessor(self.project.arch)
@@ -304,6 +308,7 @@ class StructurerBase(Analysis):
304
308
  jump_stmt = this_node.statements[-1] # type: ignore
305
309
 
306
310
  if isinstance(jump_stmt, ailment.Stmt.Jump):
311
+ assert isinstance(this_node, ailment.Block)
307
312
  next_node = node.nodes[i + 1]
308
313
  if (
309
314
  isinstance(jump_stmt.target, ailment.Expr.Const)
@@ -312,6 +317,7 @@ class StructurerBase(Analysis):
312
317
  # this goto is useless
313
318
  this_node.statements = this_node.statements[:-1]
314
319
  elif isinstance(jump_stmt, ailment.Stmt.ConditionalJump):
320
+ assert isinstance(this_node, ailment.Block)
315
321
  next_node = node.nodes[i + 1]
316
322
  if (
317
323
  isinstance(jump_stmt.true_target, ailment.Expr.Const)
@@ -365,6 +371,7 @@ class StructurerBase(Analysis):
365
371
  jump_stmt = this_node.nodes[-1].statements[-1]
366
372
  this_node = this_node.nodes[-1]
367
373
 
374
+ assert isinstance(this_node, ailment.Block)
368
375
  if isinstance(jump_stmt, ailment.Stmt.Jump):
369
376
  next_node = node.nodes[i + 1]
370
377
  if (
@@ -785,10 +792,6 @@ class StructurerBase(Analysis):
785
792
 
786
793
  return _Holder.merged, seq
787
794
 
788
- #
789
- # Util methods
790
- #
791
-
792
795
  def _reorganize_switch_cases(
793
796
  self, cases: OrderedDict[int | tuple[int, ...], SequenceNode]
794
797
  ) -> OrderedDict[int | tuple[int, ...], SequenceNode]:
@@ -891,12 +894,12 @@ class StructurerBase(Analysis):
891
894
  if isinstance(last_stmt.false_target, ailment.Expr.Const):
892
895
  jump_targets.append((last_stmt.false_target.value, last_stmt.false_target_idx))
893
896
  if any(tpl in addr_and_ids for tpl in jump_targets):
894
- return remove_last_statement(node)
897
+ return remove_last_statement(node) # type: ignore
895
898
  return None
896
899
 
897
900
  @staticmethod
898
901
  def _remove_last_statement_if_jump(
899
- node: BaseNode | ailment.Block,
902
+ node: BaseNode | ailment.Block | MultiNode,
900
903
  ) -> ailment.Stmt.Jump | ailment.Stmt.ConditionalJump | None:
901
904
  try:
902
905
  last_stmts = ConditionProcessor.get_last_statements(node)
@@ -904,7 +907,7 @@ class StructurerBase(Analysis):
904
907
  return None
905
908
 
906
909
  if len(last_stmts) == 1 and isinstance(last_stmts[0], (ailment.Stmt.Jump, ailment.Stmt.ConditionalJump)):
907
- return remove_last_statement(node)
910
+ return remove_last_statement(node) # type: ignore
908
911
  return None
909
912
 
910
913
  @staticmethod
@@ -994,8 +997,8 @@ class StructurerBase(Analysis):
994
997
  @staticmethod
995
998
  def replace_node_in_node(
996
999
  parent_node: BaseNode,
997
- old_node: BaseNode | ailment.Block,
998
- new_node: BaseNode | ailment.Block,
1000
+ old_node: BaseNode | ailment.Block | MultiNode,
1001
+ new_node: BaseNode | ailment.Block | MultiNode,
999
1002
  ) -> None:
1000
1003
  if isinstance(parent_node, SequenceNode):
1001
1004
  for i in range(len(parent_node.nodes)): # pylint:disable=consider-using-enumerate
@@ -1018,7 +1021,9 @@ class StructurerBase(Analysis):
1018
1021
  raise TypeError(f"Unsupported node type {type(parent_node)}")
1019
1022
 
1020
1023
  @staticmethod
1021
- def is_a_jump_target(stmt: ailment.Stmt.ConditionalJump | ailment.Stmt.Jump, addr: int) -> bool:
1024
+ def is_a_jump_target(
1025
+ stmt: ailment.Stmt.ConditionalJump | ailment.Stmt.Jump | ailment.Stmt.Statement, addr: int
1026
+ ) -> bool:
1022
1027
  if isinstance(stmt, ailment.Stmt.ConditionalJump):
1023
1028
  if isinstance(stmt.true_target, ailment.Expr.Const) and stmt.true_target.value == addr:
1024
1029
  return True
@@ -1038,3 +1043,24 @@ class StructurerBase(Analysis):
1038
1043
  if isinstance(node, SequenceNode):
1039
1044
  return any(StructurerBase.has_nonlabel_nonphi_statements(nn) for nn in node.nodes)
1040
1045
  return False
1046
+
1047
+ def _node_ending_with_jump_table_header(self, node: BaseNode) -> tuple[int | None, IndirectJump | None]:
1048
+ if isinstance(node, (ailment.Block, MultiNode, IncompleteSwitchCaseNode)):
1049
+ assert node.addr is not None
1050
+ return node.addr, self.jump_tables.get(node.addr, None)
1051
+ if isinstance(node, SequenceNode):
1052
+ return node.addr, self._node_ending_with_jump_table_header(node.nodes[-1])[1]
1053
+ return None, None
1054
+
1055
+ @staticmethod
1056
+ def _switch_find_default_node(
1057
+ graph: networkx.DiGraph, head_node: BaseNode, default_node_addr: int
1058
+ ) -> BaseNode | None:
1059
+ # it is possible that the default node gets duplicated by other analyses and creates a default node (addr.a)
1060
+ # and a case node (addr.b). The addr.a node is a successor to the head node while the addr.b node is a
1061
+ # successor to node_a
1062
+ default_node_candidates = [nn for nn in graph.nodes if nn.addr == default_node_addr]
1063
+ node_default: BaseNode | None = next(
1064
+ iter(nn for nn in default_node_candidates if graph.has_edge(head_node, nn)), None
1065
+ )
1066
+ return node_default
@@ -231,7 +231,10 @@ class CascadingConditionNode(BaseNode):
231
231
  )
232
232
 
233
233
  def __init__(
234
- self, addr, condition_and_nodes: list[tuple[Any, BaseNode | ailment.Block]], else_node: BaseNode = None
234
+ self,
235
+ addr,
236
+ condition_and_nodes: list[tuple[Any, BaseNode | ailment.Block | MultiNode]],
237
+ else_node: BaseNode = None,
235
238
  ):
236
239
  self.addr = addr
237
240
  self.condition_and_nodes = condition_and_nodes
@@ -144,7 +144,9 @@ def extract_jump_targets(stmt):
144
144
  return targets
145
145
 
146
146
 
147
- def switch_extract_cmp_bounds(last_stmt: ailment.Stmt.ConditionalJump) -> tuple[Any, int, int] | None:
147
+ def switch_extract_cmp_bounds(
148
+ last_stmt: ailment.Stmt.ConditionalJump | ailment.Stmt.Statement,
149
+ ) -> tuple[Any, int, int] | None:
148
150
  """
149
151
  Check the last statement of the switch-case header node, and extract lower+upper bounds for the comparison.
150
152
 
@@ -175,6 +177,54 @@ def switch_extract_cmp_bounds(last_stmt: ailment.Stmt.ConditionalJump) -> tuple[
175
177
  return None
176
178
 
177
179
 
180
+ def switch_extract_switch_expr_from_jump_target(target: ailment.Expr.Expression) -> ailment.Expr.Expression | None:
181
+ """
182
+ Extract the switch expression from the indirect jump target expression.
183
+
184
+ :param target: The target of the indirect jump statement.
185
+ :return: The extracted expression if successful, or None otherwise.
186
+ """
187
+
188
+ # e.g.: Jump (Conv(32->64, (Load(addr=((0x140000000<64> + (vvar_229{reg 80} * 0x4<64>)) + 0x2290<64>),
189
+ # size=4,
190
+ # endness=Iend_LE
191
+ # ) + 0x140000000<32>)))
192
+
193
+ found_load = False
194
+ while True:
195
+ if isinstance(target, ailment.Expr.Convert):
196
+ if target.from_bits < target.to_bits:
197
+ target = target.operand
198
+ else:
199
+ return None
200
+ elif isinstance(target, ailment.Expr.BinaryOp):
201
+ if target.op == "Add":
202
+ # it must be adding the target expr with a constant
203
+ if isinstance(target.operands[0], ailment.Expr.Const):
204
+ target = target.operands[1]
205
+ elif isinstance(target.operands[1], ailment.Expr.Const):
206
+ target = target.operands[0]
207
+ else:
208
+ return None
209
+ elif target.op == "Mul":
210
+ # it must be multiplying the target expr with a constant
211
+ if isinstance(target.operands[0], ailment.Expr.Const):
212
+ target = target.operands[1]
213
+ elif isinstance(target.operands[1], ailment.Expr.Const):
214
+ target = target.operands[0]
215
+ else:
216
+ return None
217
+ elif isinstance(target, ailment.Expr.Load):
218
+ # we want the address!
219
+ found_load = True
220
+ target = target.addr
221
+ elif isinstance(target, ailment.Expr.VirtualVariable):
222
+ break
223
+ else:
224
+ return None
225
+ return target if found_load else None
226
+
227
+
178
228
  def switch_extract_bitwiseand_jumptable_info(last_stmt: ailment.Stmt.Jump) -> tuple[Any, int, int] | None:
179
229
  """
180
230
  Check the last statement of the switch-case header node (whose address is loaded from a jump table and computed
@@ -973,6 +1023,15 @@ def sequence_to_statements(
973
1023
  return statements
974
1024
 
975
1025
 
1026
+ def remove_edges_in_ailgraph(
1027
+ ail_graph: networkx.DiGraph, edges_to_remove: list[tuple[tuple[int, int | None], tuple[int, int | None]]]
1028
+ ) -> None:
1029
+ d = {(bb.addr, bb.idx): bb for bb in ail_graph}
1030
+ for src_addr, dst_addr in edges_to_remove:
1031
+ if src_addr in d and dst_addr in d and ail_graph.has_edge(d[src_addr], d[dst_addr]):
1032
+ ail_graph.remove_edge(d[src_addr], d[dst_addr])
1033
+
1034
+
976
1035
  # delayed import
977
1036
  from .structuring.structurer_nodes import (
978
1037
  MultiNode,
@@ -12,6 +12,7 @@ import claripy
12
12
  from angr import SIM_LIBRARIES
13
13
  from angr.calling_conventions import SimRegArg
14
14
  from angr.errors import SimMemoryMissingError
15
+ from angr.knowledge_base import KnowledgeBase
15
16
  from angr.knowledge_plugins.key_definitions.constants import ObservationPointType
16
17
  from angr.sim_type import SimTypePointer, SimTypeChar
17
18
  from angr.analyses import Analysis, AnalysesHub
@@ -35,7 +36,7 @@ class APIObfuscationType(IntEnum):
35
36
 
36
37
 
37
38
  class APIDeobFuncDescriptor:
38
- def __init__(self, type_: APIObfuscationType, func_addr=None, libname_argidx=None, funcname_argidx=None):
39
+ def __init__(self, type_: APIObfuscationType, *, func_addr: int, libname_argidx: int, funcname_argidx: int):
39
40
  self.type = type_
40
41
  self.func_addr = func_addr
41
42
  self.libname_argidx = libname_argidx
@@ -96,8 +97,9 @@ class APIObfuscationFinder(Analysis):
96
97
  - Type 2: GetProcAddress(_, "api_name").
97
98
  """
98
99
 
99
- def __init__(self):
100
+ def __init__(self, variable_kb: KnowledgeBase | None = None):
100
101
  self.type1_candidates = []
102
+ self.variable_kb = variable_kb or self.project.kb
101
103
 
102
104
  self.analyze()
103
105
 
@@ -109,7 +111,7 @@ class APIObfuscationFinder(Analysis):
109
111
  type1_deobfuscated = self._analyze_type1(desc.func_addr, desc)
110
112
  self.kb.obfuscations.type1_deobfuscated_apis.update(type1_deobfuscated)
111
113
 
112
- APIObfuscationType2Finder(self.project).analyze()
114
+ APIObfuscationType2Finder(self.project, self.variable_kb).analyze()
113
115
 
114
116
  def _find_type1(self):
115
117
  cfg = self.kb.cfgs.get_most_accurate()
@@ -195,6 +197,8 @@ class APIObfuscationFinder(Analysis):
195
197
  callsite_node.instruction_addrs[-1],
196
198
  ObservationPointType.OP_BEFORE,
197
199
  )
200
+ if observ is None:
201
+ continue
198
202
  args: list[tuple[int, Any]] = []
199
203
  for arg_idx, func_arg in enumerate(func.arguments):
200
204
  # FIXME: We are ignoring all non-register function arguments until we see a test case where
@@ -232,9 +236,8 @@ class APIObfuscationFinder(Analysis):
232
236
  acceptable_args = False
233
237
  break
234
238
  arg_strs.append((idx, value.decode("utf-8")))
235
- if acceptable_args:
239
+ if acceptable_args and len(arg_strs) == 2:
236
240
  libname_arg_idx, funcname_arg_idx = None, None
237
- assert len(arg_strs) == 2
238
241
  for arg_idx, name in arg_strs:
239
242
  if self.is_libname(name):
240
243
  libname_arg_idx = arg_idx
@@ -1,21 +1,24 @@
1
1
  from __future__ import annotations
2
- from typing import cast
2
+ from typing import TYPE_CHECKING, cast
3
3
 
4
4
  from collections.abc import Iterator
5
5
  from dataclasses import dataclass
6
6
  import logging
7
7
 
8
8
  from angr.project import Project
9
- from angr.analyses.reaching_definitions.reaching_definitions import (
10
- ReachingDefinitionsAnalysis,
11
- FunctionCallRelationships,
12
- )
9
+ from angr.knowledge_base import KnowledgeBase
13
10
  from angr.knowledge_plugins.functions.function import Function
14
11
  from angr.knowledge_plugins.key_definitions import DerefSize
15
12
  from angr.knowledge_plugins.key_definitions.constants import ObservationPointType
16
13
  from angr.knowledge_plugins.key_definitions.atoms import MemoryLocation
17
14
  from angr.sim_variable import SimMemoryVariable
18
15
 
16
+ if TYPE_CHECKING:
17
+ from angr.analyses.reaching_definitions import (
18
+ ReachingDefinitionsAnalysis,
19
+ FunctionCallRelationships,
20
+ )
21
+
19
22
 
20
23
  log = logging.getLogger(__name__)
21
24
 
@@ -40,8 +43,9 @@ class APIObfuscationType2Finder:
40
43
 
41
44
  results: list[APIObfuscationType2]
42
45
 
43
- def __init__(self, project: Project):
46
+ def __init__(self, project: Project, variable_kb: KnowledgeBase | None = None):
44
47
  self.project = project
48
+ self.variable_kb = variable_kb or self.project.kb
45
49
  self.results = []
46
50
 
47
51
  def analyze(self) -> list[APIObfuscationType2]:
@@ -91,8 +95,12 @@ class APIObfuscationType2Finder:
91
95
  log.debug("...Failed to resolve a function name")
92
96
  return
93
97
 
94
- proc_name = result.rstrip(b"\x00").decode("utf-8")
95
- log.debug("...Resolved concrete function name: %s", proc_name)
98
+ try:
99
+ func_name = result.rstrip(b"\x00").decode("utf-8")
100
+ log.debug("...Resolved concrete function name: %s", func_name)
101
+ except UnicodeDecodeError:
102
+ log.debug("...Failed to decode utf-8 function name")
103
+ return
96
104
 
97
105
  # Examine successor definitions to find where the function pointer is written
98
106
  for successor in rda.dep_graph.find_all_successors(callsite_info.ret_defns):
@@ -121,7 +129,7 @@ class APIObfuscationType2Finder:
121
129
 
122
130
  self.results.append(
123
131
  APIObfuscationType2(
124
- resolved_func_name=proc_name,
132
+ resolved_func_name=func_name,
125
133
  resolved_func_ptr=ptr,
126
134
  resolved_in=caller,
127
135
  resolved_by=callee,
@@ -139,7 +147,7 @@ class APIObfuscationType2Finder:
139
147
  log.debug("...Created label %s for address %x", lbl, result.resolved_func_ptr.addr)
140
148
 
141
149
  # Create a variable
142
- global_variables = self.project.kb.variables["global"]
150
+ global_variables = self.variable_kb.variables["global"]
143
151
  variables = global_variables.get_global_variables(result.resolved_func_ptr.addr)
144
152
  if not variables:
145
153
  ident = global_variables.next_variable_ident("global")
@@ -9,11 +9,11 @@ import networkx
9
9
 
10
10
  import claripy
11
11
 
12
- from angr import sim_options
13
12
  from angr.analyses import Analysis, AnalysesHub
14
- from angr.errors import SimMemoryMissingError, AngrCallableMultistateError, AngrCallableError
13
+ from angr.errors import SimMemoryMissingError, AngrCallableMultistateError, AngrCallableError, AngrAnalysisError
15
14
  from angr.calling_conventions import SimRegArg, default_cc
16
15
  from angr.state_plugins.sim_action import SimActionData
16
+ from angr.sim_options import ZERO_FILL_UNCONSTRAINED_REGISTERS, ZERO_FILL_UNCONSTRAINED_MEMORY, TRACK_MEMORY_ACTIONS
17
17
  from angr.sim_type import SimTypeFunction, SimTypeBottom, SimTypePointer
18
18
  from angr.analyses.reaching_definitions import ObservationPointType
19
19
  from angr.utils.graph import GraphUtils
@@ -23,12 +23,23 @@ from .irsb_reg_collector import IRSBRegisterCollector
23
23
  _l = logging.getLogger(__name__)
24
24
 
25
25
 
26
+ STEP_LIMIT_FIND = 500
27
+ STEP_LIMIT_ANALYSIS = 5000
28
+
29
+
26
30
  class StringDeobFuncDescriptor:
31
+ """
32
+ Describes a string deobfuscation function.
33
+ """
34
+
35
+ string_input_arg_idx: int
36
+ string_output_arg_idx: int
37
+ string_length_arg_idx: int | None
38
+ string_null_terminating: bool | None
39
+
27
40
  def __init__(self):
28
- self.string_input_arg_idx = None
29
- self.string_output_arg_idx = None
30
41
  self.string_length_arg_idx = None
31
- self.string_null_terminating: bool | None = None
42
+ self.string_null_terminating = None
32
43
 
33
44
 
34
45
  class StringObfuscationFinder(Analysis):
@@ -89,6 +100,9 @@ class StringObfuscationFinder(Analysis):
89
100
  # Type 1 string deobfuscation functions will decrypt each string once and for good.
90
101
 
91
102
  cfg = self.kb.cfgs.get_most_accurate()
103
+ if cfg is None:
104
+ raise AngrAnalysisError("StringObfuscationFinder needs a CFG for the analysis")
105
+
92
106
  arch = self.project.arch
93
107
 
94
108
  type1_candidates: list[tuple[int, StringDeobFuncDescriptor]] = []
@@ -100,6 +114,10 @@ class StringObfuscationFinder(Analysis):
100
114
  if func.prototype is None or len(func.prototype.args) < 1:
101
115
  continue
102
116
 
117
+ if len(func.arguments) != len(func.prototype.args):
118
+ # function argument locations and function prototype arguments do not match
119
+ continue
120
+
103
121
  if self.project.kb.functions.callgraph.out_degree[func.addr] != 0:
104
122
  continue
105
123
 
@@ -123,14 +141,22 @@ class StringObfuscationFinder(Analysis):
123
141
  dec = self.project.analyses.Decompiler(func, cfg=cfg)
124
142
  except Exception: # pylint:disable=broad-exception-caught
125
143
  continue
126
- if dec.codegen is None or not self._like_type1_deobfuscation_function(dec.codegen.text):
144
+ if (
145
+ dec.codegen is None
146
+ or not dec.codegen.text
147
+ or not self._like_type1_deobfuscation_function(dec.codegen.text)
148
+ ):
149
+ continue
150
+
151
+ func_node = cfg.get_any_node(func.addr)
152
+ if func_node is None:
127
153
  continue
128
154
 
129
155
  args_list = []
130
156
  for caller in callers:
131
157
  callsite_nodes = [
132
158
  pred
133
- for pred in cfg.get_predecessors(cfg.get_any_node(func.addr))
159
+ for pred in cfg.get_predecessors(func_node)
134
160
  if pred.function_address == caller and pred.instruction_addrs
135
161
  ]
136
162
  observation_points = []
@@ -148,15 +174,21 @@ class StringObfuscationFinder(Analysis):
148
174
  callsite_node.instruction_addrs[-1],
149
175
  ObservationPointType.OP_BEFORE,
150
176
  )
177
+ if observ is None:
178
+ continue
151
179
  # load values for each function argument
152
180
  args: list[tuple[int, Any]] = []
153
181
  for arg_idx, func_arg in enumerate(func.arguments):
154
182
  # FIXME: We are ignoring all non-register function arguments until we see a test case where
155
183
  # FIXME: stack-passing arguments are used
184
+ real_arg = func.prototype.args[arg_idx]
156
185
  if isinstance(func_arg, SimRegArg):
157
186
  reg_offset, reg_size = arch.registers[func_arg.reg_name]
187
+ arg_size = (
188
+ real_arg.size if real_arg.size is not None else reg_size
189
+ ) // self.project.arch.byte_width
158
190
  try:
159
- mv = observ.registers.load(reg_offset, size=reg_size)
191
+ mv = observ.registers.load(reg_offset, size=arg_size)
160
192
  except SimMemoryMissingError:
161
193
  args.append((arg_idx, claripy.BVV(0xDEADBEEF, self.project.arch.bits)))
162
194
  continue
@@ -185,7 +217,15 @@ class StringObfuscationFinder(Analysis):
185
217
  # now that we have good arguments, let's test the function!
186
218
  for args in args_list:
187
219
  func_call = self.project.factory.callable(
188
- func.addr, concrete_only=True, cc=func.calling_convention, prototype=func.prototype
220
+ func.addr,
221
+ concrete_only=True,
222
+ cc=func.calling_convention,
223
+ prototype=func.prototype,
224
+ add_options={
225
+ ZERO_FILL_UNCONSTRAINED_MEMORY,
226
+ ZERO_FILL_UNCONSTRAINED_REGISTERS,
227
+ },
228
+ step_limit=STEP_LIMIT_FIND,
189
229
  )
190
230
 
191
231
  # before calling the function, let's record the crime scene
@@ -202,6 +242,9 @@ class StringObfuscationFinder(Analysis):
202
242
  except (AngrCallableMultistateError, AngrCallableError):
203
243
  continue
204
244
 
245
+ if func_call.result_state is None:
246
+ continue
247
+
205
248
  # let's see what this amazing function has done
206
249
  # TODO: Support cases where input and output are using different function arguments
207
250
  for arg_idx, addr, old_value in values:
@@ -240,6 +283,9 @@ class StringObfuscationFinder(Analysis):
240
283
 
241
284
  arch = self.project.arch
242
285
  cfg = self.kb.cfgs.get_most_accurate()
286
+ if cfg is None:
287
+ raise AngrAnalysisError("StringObfuscationFinder needs a CFG for the analysis")
288
+
243
289
  func = self.kb.functions.get_by_addr(func_addr)
244
290
  func_node = cfg.get_any_node(func_addr)
245
291
  assert func_node is not None
@@ -260,14 +306,20 @@ class StringObfuscationFinder(Analysis):
260
306
  callsite_node.instruction_addrs[-1],
261
307
  ObservationPointType.OP_BEFORE,
262
308
  )
309
+ if observ is None:
310
+ continue
263
311
  args = []
264
- for func_arg in func.arguments:
312
+ assert func.prototype is not None and len(func.arguments) == len(func.prototype.args)
313
+ for func_arg, real_arg in zip(func.arguments, func.prototype.args):
265
314
  # FIXME: We are ignoring all non-register function arguments until we see a test case where
266
315
  # FIXME: stack-passing arguments are used
267
316
  if isinstance(func_arg, SimRegArg):
268
317
  reg_offset, reg_size = arch.registers[func_arg.reg_name]
318
+ arg_size = (
319
+ real_arg.size if real_arg.size is not None else reg_size
320
+ ) // self.project.arch.byte_width
269
321
  try:
270
- mv = observ.registers.load(reg_offset, size=reg_size)
322
+ mv = observ.registers.load(reg_offset, size=arg_size)
271
323
  except SimMemoryMissingError:
272
324
  args.append(claripy.BVV(0xDEADBEEF, self.project.arch.bits))
273
325
  continue
@@ -286,7 +338,12 @@ class StringObfuscationFinder(Analysis):
286
338
 
287
339
  # call the function
288
340
  func_call = self.project.factory.callable(
289
- func.addr, concrete_only=True, cc=func.calling_convention, prototype=func.prototype
341
+ func.addr,
342
+ concrete_only=True,
343
+ cc=func.calling_convention,
344
+ prototype=func.prototype,
345
+ add_options={ZERO_FILL_UNCONSTRAINED_MEMORY, ZERO_FILL_UNCONSTRAINED_REGISTERS},
346
+ step_limit=STEP_LIMIT_ANALYSIS,
290
347
  )
291
348
  try:
292
349
  func_call(*args)
@@ -303,6 +360,9 @@ class StringObfuscationFinder(Analysis):
303
360
  )
304
361
  continue
305
362
 
363
+ if func_call.result_state is None:
364
+ continue
365
+
306
366
  # dump the decrypted string!
307
367
  output_addr = args[desc.string_output_arg_idx]
308
368
  length = args[desc.string_length_arg_idx].concrete_value if desc.string_length_arg_idx is not None else 256
@@ -322,6 +382,8 @@ class StringObfuscationFinder(Analysis):
322
382
  xref_set = xrefs.get_xrefs_by_dst(str_addr)
323
383
  block_addrs = {xref.block_addr for xref in xref_set}
324
384
  for block_addr in block_addrs:
385
+ if block_addr is None:
386
+ continue
325
387
  node = cfg.get_any_node(block_addr)
326
388
  if node is not None:
327
389
  callees = list(self.kb.functions.callgraph.successors(node.function_address))
@@ -340,6 +402,8 @@ class StringObfuscationFinder(Analysis):
340
402
  # Type 2 string deobfuscation functions will decrypt each string once and for good.
341
403
 
342
404
  cfg = self.kb.cfgs.get_most_accurate()
405
+ if cfg is None:
406
+ raise AngrAnalysisError("StringObfuscationFinder needs a CFG for the analysis")
343
407
 
344
408
  type2_candidates: list[tuple[int, StringDeobFuncDescriptor, list[tuple[int, int, bytes]]]] = []
345
409
 
@@ -374,7 +438,11 @@ class StringObfuscationFinder(Analysis):
374
438
  dec = self.project.analyses.Decompiler(func, cfg=cfg, expr_collapse_depth=64)
375
439
  except Exception: # pylint:disable=broad-exception-caught
376
440
  continue
377
- if dec.codegen is None or not self._like_type2_deobfuscation_function(dec.codegen.text):
441
+ if (
442
+ dec.codegen is None
443
+ or not dec.codegen.text
444
+ or not self._like_type2_deobfuscation_function(dec.codegen.text)
445
+ ):
378
446
  continue
379
447
 
380
448
  desc = StringDeobFuncDescriptor()
@@ -384,7 +452,8 @@ class StringObfuscationFinder(Analysis):
384
452
  concrete_only=True,
385
453
  cc=func.calling_convention,
386
454
  prototype=func.prototype,
387
- add_options={sim_options.TRACK_MEMORY_ACTIONS},
455
+ add_options={TRACK_MEMORY_ACTIONS, ZERO_FILL_UNCONSTRAINED_MEMORY, ZERO_FILL_UNCONSTRAINED_REGISTERS},
456
+ step_limit=STEP_LIMIT_FIND,
388
457
  )
389
458
 
390
459
  try:
@@ -392,6 +461,9 @@ class StringObfuscationFinder(Analysis):
392
461
  except (AngrCallableMultistateError, AngrCallableError):
393
462
  continue
394
463
 
464
+ if func_call.result_state is None:
465
+ continue
466
+
395
467
  # where are the reads and writes?
396
468
  all_global_reads = []
397
469
  all_global_writes = []
@@ -399,7 +471,7 @@ class StringObfuscationFinder(Analysis):
399
471
  if not isinstance(action, SimActionData):
400
472
  continue
401
473
  if not action.actual_addrs:
402
- if not action.addr.ast.concrete:
474
+ if action.addr is None or not action.addr.ast.concrete:
403
475
  continue
404
476
  actual_addrs = [action.addr.ast.concrete_value]
405
477
  else:
@@ -469,6 +541,8 @@ class StringObfuscationFinder(Analysis):
469
541
  """
470
542
 
471
543
  cfg = self.kb.cfgs.get_most_accurate()
544
+ if cfg is None:
545
+ raise AngrAnalysisError("StringObfuscationFinder needs a CFG for the analysis")
472
546
 
473
547
  # for each string table address, we find its string loader function
474
548
  # an obvious candidate function is 0x140001b20
@@ -478,6 +552,8 @@ class StringObfuscationFinder(Analysis):
478
552
  xref_set = xrefs.get_xrefs_by_dst(table_addr)
479
553
  block_addrs = {xref.block_addr for xref in xref_set}
480
554
  for block_addr in block_addrs:
555
+ if block_addr is None:
556
+ continue
481
557
  node = cfg.get_any_node(block_addr)
482
558
  if node is not None:
483
559
  callees = list(self.kb.functions.callgraph.successors(node.function_address))
@@ -496,6 +572,9 @@ class StringObfuscationFinder(Analysis):
496
572
  # not have a SimProcedure for)
497
573
 
498
574
  cfg = self.kb.cfgs.get_most_accurate()
575
+ if cfg is None:
576
+ raise AngrAnalysisError("StringObfuscationFinder needs a CFG for the analysis")
577
+
499
578
  functions = self.kb.functions
500
579
  callgraph_digraph = networkx.DiGraph(functions.callgraph)
501
580
 
@@ -554,7 +633,7 @@ class StringObfuscationFinder(Analysis):
554
633
  except Exception: # pylint:disable=broad-exception-caught
555
634
  # catch all exceptions
556
635
  continue
557
- if dec.codegen is None:
636
+ if dec.codegen is None or not dec.codegen.text:
558
637
  continue
559
638
  if not self._like_type3_deobfuscation_function(dec.codegen.text):
560
639
  continue
@@ -605,6 +684,8 @@ class StringObfuscationFinder(Analysis):
605
684
  """
606
685
 
607
686
  cfg = self.kb.cfgs.get_most_accurate()
687
+ if cfg is None:
688
+ raise AngrAnalysisError("StringObfuscationFinder needs a CFG for the analysis")
608
689
 
609
690
  call_sites = cfg.get_predecessors(cfg.get_any_node(func_addr))
610
691
  callinsn2content = {}
@@ -687,7 +768,7 @@ class StringObfuscationFinder(Analysis):
687
768
  # execute the block at the call site
688
769
  state = self.project.factory.blank_state(
689
770
  addr=call_site_addr,
690
- add_options={sim_options.ZERO_FILL_UNCONSTRAINED_REGISTERS, sim_options.ZERO_FILL_UNCONSTRAINED_MEMORY},
771
+ add_options={ZERO_FILL_UNCONSTRAINED_REGISTERS, ZERO_FILL_UNCONSTRAINED_MEMORY},
691
772
  )
692
773
  # setup sp and bp, just in case
693
774
  state.regs._sp = 0x7FFF0000
@@ -728,7 +809,13 @@ class StringObfuscationFinder(Analysis):
728
809
  self.project.arch
729
810
  )
730
811
  callable_0 = self.project.factory.callable(
731
- func_addr, concrete_only=True, base_state=in_state, cc=cc, prototype=prototype_0
812
+ func_addr,
813
+ concrete_only=True,
814
+ base_state=in_state,
815
+ cc=cc,
816
+ prototype=prototype_0,
817
+ add_options={ZERO_FILL_UNCONSTRAINED_MEMORY, ZERO_FILL_UNCONSTRAINED_REGISTERS},
818
+ step_limit=STEP_LIMIT_ANALYSIS,
732
819
  )
733
820
 
734
821
  try:
@@ -181,7 +181,7 @@ class ForwardAnalysis(Generic[AnalysisState, NodeType, JobType, JobKey]):
181
181
  """
182
182
  return node
183
183
 
184
- def _run_on_node(self, node: NodeType, state: AnalysisState) -> tuple[bool, AnalysisState]:
184
+ def _run_on_node(self, node: NodeType, state: AnalysisState) -> tuple[bool | None, AnalysisState]:
185
185
  """
186
186
  The analysis routine that runs on each node in the graph.
187
187