angr 9.2.147__py3-none-win_amd64.whl → 9.2.149__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (91) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/analysis.py +3 -11
  3. angr/analyses/calling_convention/calling_convention.py +42 -2
  4. angr/analyses/calling_convention/fact_collector.py +5 -4
  5. angr/analyses/calling_convention/utils.py +1 -0
  6. angr/analyses/cfg/cfg_base.py +3 -59
  7. angr/analyses/cfg/cfg_emulated.py +17 -14
  8. angr/analyses/cfg/cfg_fast.py +68 -63
  9. angr/analyses/cfg/cfg_fast_soot.py +3 -3
  10. angr/analyses/decompiler/ail_simplifier.py +65 -32
  11. angr/analyses/decompiler/block_simplifier.py +20 -6
  12. angr/analyses/decompiler/callsite_maker.py +28 -18
  13. angr/analyses/decompiler/clinic.py +84 -17
  14. angr/analyses/decompiler/condition_processor.py +0 -21
  15. angr/analyses/decompiler/counters/call_counter.py +3 -0
  16. angr/analyses/decompiler/dephication/rewriting_engine.py +24 -2
  17. angr/analyses/decompiler/optimization_passes/__init__.py +5 -0
  18. angr/analyses/decompiler/optimization_passes/base_ptr_save_simplifier.py +15 -13
  19. angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +1 -1
  20. angr/analyses/decompiler/optimization_passes/determine_load_sizes.py +64 -0
  21. angr/analyses/decompiler/optimization_passes/eager_std_string_concatenation.py +165 -0
  22. angr/analyses/decompiler/optimization_passes/engine_base.py +11 -2
  23. angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +17 -2
  24. angr/analyses/decompiler/optimization_passes/optimization_pass.py +10 -6
  25. angr/analyses/decompiler/optimization_passes/win_stack_canary_simplifier.py +99 -30
  26. angr/analyses/decompiler/peephole_optimizations/__init__.py +6 -0
  27. angr/analyses/decompiler/peephole_optimizations/base.py +43 -3
  28. angr/analyses/decompiler/peephole_optimizations/constant_derefs.py +1 -1
  29. angr/analyses/decompiler/peephole_optimizations/inlined_strcpy.py +3 -0
  30. angr/analyses/decompiler/peephole_optimizations/inlined_strcpy_consolidation.py +4 -1
  31. angr/analyses/decompiler/peephole_optimizations/remove_cxx_destructor_calls.py +32 -0
  32. angr/analyses/decompiler/peephole_optimizations/remove_redundant_bitmasks.py +69 -2
  33. angr/analyses/decompiler/peephole_optimizations/remove_redundant_conversions.py +14 -0
  34. angr/analyses/decompiler/peephole_optimizations/rewrite_conv_mul.py +40 -0
  35. angr/analyses/decompiler/peephole_optimizations/rewrite_cxx_operator_calls.py +90 -0
  36. angr/analyses/decompiler/presets/fast.py +2 -0
  37. angr/analyses/decompiler/presets/full.py +2 -0
  38. angr/analyses/decompiler/ssailification/rewriting_engine.py +51 -4
  39. angr/analyses/decompiler/ssailification/ssailification.py +23 -3
  40. angr/analyses/decompiler/ssailification/traversal_engine.py +15 -1
  41. angr/analyses/decompiler/structured_codegen/c.py +146 -15
  42. angr/analyses/decompiler/structuring/phoenix.py +11 -3
  43. angr/analyses/decompiler/utils.py +6 -1
  44. angr/analyses/deobfuscator/api_obf_finder.py +5 -1
  45. angr/analyses/deobfuscator/api_obf_peephole_optimizer.py +1 -1
  46. angr/analyses/forward_analysis/visitors/graph.py +0 -8
  47. angr/analyses/identifier/runner.py +1 -1
  48. angr/analyses/reaching_definitions/function_handler.py +4 -4
  49. angr/analyses/reassembler.py +1 -1
  50. angr/analyses/s_reaching_definitions/s_rda_view.py +1 -0
  51. angr/analyses/stack_pointer_tracker.py +1 -1
  52. angr/analyses/static_hooker.py +11 -9
  53. angr/analyses/typehoon/lifter.py +20 -0
  54. angr/analyses/typehoon/simple_solver.py +42 -9
  55. angr/analyses/typehoon/translator.py +4 -1
  56. angr/analyses/typehoon/typeconsts.py +17 -6
  57. angr/analyses/typehoon/typehoon.py +21 -5
  58. angr/analyses/variable_recovery/engine_ail.py +52 -13
  59. angr/analyses/variable_recovery/engine_base.py +37 -12
  60. angr/analyses/variable_recovery/variable_recovery_fast.py +33 -2
  61. angr/calling_conventions.py +96 -27
  62. angr/engines/light/engine.py +7 -0
  63. angr/exploration_techniques/director.py +1 -1
  64. angr/knowledge_plugins/functions/function.py +109 -38
  65. angr/knowledge_plugins/functions/function_manager.py +9 -0
  66. angr/knowledge_plugins/functions/function_parser.py +9 -1
  67. angr/knowledge_plugins/functions/soot_function.py +1 -1
  68. angr/knowledge_plugins/key_definitions/key_definition_manager.py +1 -1
  69. angr/knowledge_plugins/propagations/states.py +5 -2
  70. angr/knowledge_plugins/variables/variable_manager.py +3 -3
  71. angr/lib/angr_native.dll +0 -0
  72. angr/procedures/definitions/__init__.py +15 -12
  73. angr/procedures/definitions/types_stl.py +22 -0
  74. angr/procedures/stubs/format_parser.py +1 -1
  75. angr/project.py +23 -29
  76. angr/protos/cfg_pb2.py +14 -25
  77. angr/protos/function_pb2.py +11 -22
  78. angr/protos/primitives_pb2.py +36 -47
  79. angr/protos/variables_pb2.py +28 -39
  80. angr/protos/xrefs_pb2.py +8 -19
  81. angr/sim_type.py +251 -146
  82. angr/simos/cgc.py +1 -1
  83. angr/simos/linux.py +5 -5
  84. angr/simos/windows.py +5 -5
  85. angr/storage/memory_mixins/paged_memory/paged_memory_mixin.py +1 -1
  86. {angr-9.2.147.dist-info → angr-9.2.149.dist-info}/METADATA +9 -8
  87. {angr-9.2.147.dist-info → angr-9.2.149.dist-info}/RECORD +91 -85
  88. {angr-9.2.147.dist-info → angr-9.2.149.dist-info}/WHEEL +1 -1
  89. {angr-9.2.147.dist-info → angr-9.2.149.dist-info/licenses}/LICENSE +3 -0
  90. {angr-9.2.147.dist-info → angr-9.2.149.dist-info}/entry_points.txt +0 -0
  91. {angr-9.2.147.dist-info → angr-9.2.149.dist-info}/top_level.txt +0 -0
@@ -9,7 +9,7 @@ import networkx
9
9
 
10
10
  from ailment import AILBlockWalker
11
11
  from ailment.block import Block
12
- from ailment.statement import Statement, Assignment, Store, Call, ConditionalJump, DirtyStatement
12
+ from ailment.statement import Statement, Assignment, Store, Call, ConditionalJump, DirtyStatement, WeakAssignment
13
13
  from ailment.expression import (
14
14
  Register,
15
15
  Convert,
@@ -247,6 +247,9 @@ class AILSimplifier(Analysis):
247
247
  ):
248
248
  codeloc = CodeLocation(block.addr, stmt_idx, block_idx=block.idx, ins_addr=stmt.ins_addr)
249
249
  equivalence.add(Equivalence(codeloc, stmt.dst, stmt.src))
250
+ elif isinstance(stmt, WeakAssignment):
251
+ codeloc = CodeLocation(block.addr, stmt_idx, block_idx=block.idx, ins_addr=stmt.ins_addr)
252
+ equivalence.add(Equivalence(codeloc, stmt.dst, stmt.src, is_weakassignment=True))
250
253
  elif isinstance(stmt, Call):
251
254
  if isinstance(stmt.ret_expr, (VirtualVariable, Load)):
252
255
  codeloc = CodeLocation(block.addr, stmt_idx, block_idx=block.idx, ins_addr=stmt.ins_addr)
@@ -1172,6 +1175,9 @@ class AILSimplifier(Analysis):
1172
1175
  # register variable = Convert(Call)
1173
1176
  call = eq.atom1
1174
1177
  # call_addr = call.operand.target.value if isinstance(call.operand.target, Const) else None
1178
+ elif eq.is_weakassignment:
1179
+ # variable =w something else
1180
+ call = eq.atom1
1175
1181
  else:
1176
1182
  continue
1177
1183
 
@@ -1196,6 +1202,9 @@ class AILSimplifier(Analysis):
1196
1202
  assert the_def.codeloc.stmt_idx is not None
1197
1203
 
1198
1204
  all_uses: set[tuple[Any, CodeLocation]] = rd.get_vvar_uses_with_expr(the_def.atom)
1205
+ if eq.is_weakassignment:
1206
+ # eliminate the "use" at the weak assignment site
1207
+ all_uses = {use for use in all_uses if use[1] != eq.codeloc}
1199
1208
 
1200
1209
  if len(all_uses) != 1:
1201
1210
  continue
@@ -1218,10 +1227,13 @@ class AILSimplifier(Analysis):
1218
1227
  continue
1219
1228
 
1220
1229
  # check if the use and the definition is within the same supernode
1221
- super_node_blocks = self._get_super_node_blocks(
1222
- addr_and_idx_to_block[(the_def.codeloc.block_addr, the_def.codeloc.block_idx)]
1223
- )
1224
- if u.block_addr not in {b.addr for b in super_node_blocks}:
1230
+ # also we do not allow any calls between the def site and the use site
1231
+ if not self._loc_within_superblock(
1232
+ addr_and_idx_to_block[(the_def.codeloc.block_addr, the_def.codeloc.block_idx)],
1233
+ u.block_addr,
1234
+ u.block_idx,
1235
+ terminate_with_calls=True,
1236
+ ):
1225
1237
  continue
1226
1238
 
1227
1239
  # ensure there are no other calls between the def site and the use site.
@@ -1247,10 +1259,6 @@ class AILSimplifier(Analysis):
1247
1259
  ):
1248
1260
  continue
1249
1261
 
1250
- # check if there are any calls in between the def site and the use site
1251
- if self._count_calls_in_supernodeblocks(super_node_blocks, the_def.codeloc, u) > 0:
1252
- continue
1253
-
1254
1262
  # replace all uses
1255
1263
  old_block = addr_and_idx_to_block.get((u.block_addr, u.block_idx), None)
1256
1264
  if old_block is None:
@@ -1262,7 +1270,7 @@ class AILSimplifier(Analysis):
1262
1270
 
1263
1271
  if isinstance(eq.atom0, VirtualVariable):
1264
1272
  src = used_expr
1265
- dst: Call | Convert = call.copy()
1273
+ dst: Expression = call.copy()
1266
1274
 
1267
1275
  if isinstance(dst, Call) and dst.ret_expr is not None:
1268
1276
  dst_bits = dst.ret_expr.bits
@@ -1272,7 +1280,7 @@ class AILSimplifier(Analysis):
1272
1280
  dst.fp_ret_expr = None
1273
1281
  dst.bits = dst_bits
1274
1282
 
1275
- if src.bits != dst.bits:
1283
+ if src.bits != dst.bits and not eq.is_weakassignment:
1276
1284
  dst = Convert(None, dst.bits, src.bits, False, dst)
1277
1285
  else:
1278
1286
  continue
@@ -1320,6 +1328,42 @@ class AILSimplifier(Analysis):
1320
1328
  break
1321
1329
  return lst
1322
1330
 
1331
+ def _loc_within_superblock(
1332
+ self, start_node: Block, block_addr: int, block_idx: int | None, terminate_with_calls=False
1333
+ ) -> bool:
1334
+ b = start_node
1335
+ if block_addr == b.addr and block_idx == b.idx:
1336
+ return True
1337
+
1338
+ encountered_block_addrs: set[tuple[int, int | None]] = {(b.addr, b.idx)}
1339
+ while True:
1340
+ if terminate_with_calls and b.statements and isinstance(b.statements[-1], Call):
1341
+ return False
1342
+
1343
+ encountered_block_addrs.add((b.addr, b.idx))
1344
+ successors = list(self.func_graph.successors(b))
1345
+ if len(successors) == 0:
1346
+ # did not encounter the block before running out of successors
1347
+ return False
1348
+ if len(successors) == 1:
1349
+ succ = successors[0]
1350
+ # check its predecessors
1351
+ succ_predecessors = list(self.func_graph.predecessors(succ))
1352
+ if len(succ_predecessors) == 1:
1353
+ if (succ.addr, succ.idx) in encountered_block_addrs:
1354
+ # we are about to form a loop - bad!
1355
+ # example: binary ce1897b492c80bf94083dd783aefb413ab1f6d8d4981adce8420f6669d0cb3e1, block
1356
+ # 0x2976EF7.
1357
+ return False
1358
+ if block_addr == succ.addr and block_idx == succ.idx:
1359
+ return True
1360
+ b = succ
1361
+ else:
1362
+ return False
1363
+ else:
1364
+ # too many successors
1365
+ return False
1366
+
1323
1367
  @staticmethod
1324
1368
  def _replace_expr_and_update_block(block, stmt_idx, stmt, src_expr, dst_expr) -> tuple[bool, Block | None]:
1325
1369
  replaced, new_stmt = stmt.replace(src_expr, dst_expr)
@@ -1492,9 +1536,18 @@ class AILSimplifier(Analysis):
1492
1536
  simplified = True
1493
1537
 
1494
1538
  if idx in stmts_to_remove and idx not in stmts_to_keep and not isinstance(stmt, DirtyStatement):
1495
- if isinstance(stmt, (Assignment, Store)):
1539
+ if isinstance(stmt, (Assignment, WeakAssignment, Store)):
1496
1540
  # Special logic for Assignment and Store statements
1497
1541
 
1542
+ # if this statement writes to a virtual variable that must be preserved, we ignore it
1543
+ if (
1544
+ isinstance(stmt, Assignment)
1545
+ and isinstance(stmt.dst, VirtualVariable)
1546
+ and stmt.dst.varid in self._avoid_vvar_ids
1547
+ ):
1548
+ new_statements.append(stmt)
1549
+ continue
1550
+
1498
1551
  # if this statement triggers a call, it should only be removed if it's in self._calls_to_remove
1499
1552
  codeloc = CodeLocation(block.addr, idx, ins_addr=stmt.ins_addr, block_idx=block.idx)
1500
1553
  if codeloc in self._assignments_to_remove:
@@ -1716,26 +1769,6 @@ class AILSimplifier(Analysis):
1716
1769
 
1717
1770
  return False
1718
1771
 
1719
- @staticmethod
1720
- def _count_calls_in_supernodeblocks(blocks: list[Block], start: CodeLocation, end: CodeLocation) -> int:
1721
- """
1722
- Count the number of call statements in a list of blocks for a single super block between two given code
1723
- locations (exclusive).
1724
- """
1725
- calls = 0
1726
- started = False
1727
- for b in blocks:
1728
- if b.addr == start.block_addr:
1729
- started = True
1730
- continue
1731
- if b.addr == end.block_addr:
1732
- started = False
1733
- continue
1734
-
1735
- if started and b.statements and isinstance(b.statements[-1], Call):
1736
- calls += 1
1737
- return calls
1738
-
1739
1772
  @staticmethod
1740
1773
  def _exprs_contain_vvar(exprs: Iterable[Expression], vvar_ids: set[int]) -> bool:
1741
1774
  def _handle_VirtualVariable(expr_idx, expr, stmt_idx, stmt, block): # pylint:disable=unused-argument
@@ -10,6 +10,7 @@ from ailment import AILBlockWalkerBase
10
10
 
11
11
  from angr.code_location import ExternalCodeLocation, CodeLocation
12
12
 
13
+ from angr.knowledge_plugins.key_definitions import atoms
13
14
  from angr.analyses.s_propagator import SPropagatorAnalysis
14
15
  from angr.analyses.s_reaching_definitions import SReachingDefinitionsAnalysis, SRDAModel
15
16
  from angr.analyses import Analysis, register_analysis
@@ -62,6 +63,8 @@ class BlockSimplifier(Analysis):
62
63
  peephole_optimizations: None | (
63
64
  Iterable[type[PeepholeOptimizationStmtBase] | type[PeepholeOptimizationExprBase]]
64
65
  ) = None,
66
+ preserve_vvar_ids: set[int] | None = None,
67
+ type_hints: list[tuple[atoms.VirtualVariable | atoms.MemoryLocation, str]] | None = None,
65
68
  cached_reaching_definitions=None,
66
69
  cached_propagator=None,
67
70
  ):
@@ -74,24 +77,35 @@ class BlockSimplifier(Analysis):
74
77
  self.func_addr = func_addr
75
78
 
76
79
  self._stack_pointer_tracker = stack_pointer_tracker
80
+ self._preserve_vvar_ids = preserve_vvar_ids
81
+ self._type_hints = type_hints
77
82
 
78
83
  if peephole_optimizations is None:
79
- self._expr_peephole_opts = [cls(self.project, self.kb, self.func_addr) for cls in EXPR_OPTS]
80
- self._stmt_peephole_opts = [cls(self.project, self.kb, self.func_addr) for cls in STMT_OPTS]
81
- self._multistmt_peephole_opts = [cls(self.project, self.kb, self.func_addr) for cls in MULTI_STMT_OPTS]
84
+ self._expr_peephole_opts = [
85
+ cls(self.project, self.kb, self.func_addr, self._preserve_vvar_ids, self._type_hints)
86
+ for cls in EXPR_OPTS
87
+ ]
88
+ self._stmt_peephole_opts = [
89
+ cls(self.project, self.kb, self.func_addr, self._preserve_vvar_ids, self._type_hints)
90
+ for cls in STMT_OPTS
91
+ ]
92
+ self._multistmt_peephole_opts = [
93
+ cls(self.project, self.kb, self.func_addr, self._preserve_vvar_ids, self._type_hints)
94
+ for cls in MULTI_STMT_OPTS
95
+ ]
82
96
  else:
83
97
  self._expr_peephole_opts = [
84
- cls(self.project, self.kb, self.func_addr)
98
+ cls(self.project, self.kb, self.func_addr, self._preserve_vvar_ids, self._type_hints)
85
99
  for cls in peephole_optimizations
86
100
  if issubclass(cls, PeepholeOptimizationExprBase)
87
101
  ]
88
102
  self._stmt_peephole_opts = [
89
- cls(self.project, self.kb, self.func_addr)
103
+ cls(self.project, self.kb, self.func_addr, self._preserve_vvar_ids, self._type_hints)
90
104
  for cls in peephole_optimizations
91
105
  if issubclass(cls, PeepholeOptimizationStmtBase)
92
106
  ]
93
107
  self._multistmt_peephole_opts = [
94
- cls(self.project, self.kb, self.func_addr)
108
+ cls(self.project, self.kb, self.func_addr, self._preserve_vvar_ids, self._type_hints)
95
109
  for cls in peephole_optimizations
96
110
  if issubclass(cls, PeepholeOptimizationMultiStmtBase)
97
111
  ]
@@ -18,7 +18,7 @@ from angr.sim_type import (
18
18
  SimTypeFunction,
19
19
  SimTypeLongLong,
20
20
  )
21
- from angr.calling_conventions import SimRegArg, SimStackArg, SimCC, SimStructArg, SimComboArg
21
+ from angr.calling_conventions import SimReferenceArgument, SimRegArg, SimStackArg, SimCC, SimStructArg, SimComboArg
22
22
  from angr.knowledge_plugins.key_definitions.constants import OP_BEFORE
23
23
  from angr.analyses import Analysis, register_analysis
24
24
  from angr.analyses.s_reaching_definitions import SRDAView
@@ -111,10 +111,10 @@ class CallSiteMaker(Analysis):
111
111
  prototype_libname = func.prototype_libname
112
112
  type_collections = []
113
113
  if prototype_libname is not None:
114
- prototype_lib = SIM_LIBRARIES[prototype_libname]
115
- if prototype_lib.type_collection_names:
116
- for typelib_name in prototype_lib.type_collection_names:
117
- type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
114
+ for prototype_lib in SIM_LIBRARIES[prototype_libname]:
115
+ if prototype_lib.type_collection_names:
116
+ for typelib_name in prototype_lib.type_collection_names:
117
+ type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
118
118
  if type_collections:
119
119
  prototype = dereference_simtype(prototype, type_collections).with_arch( # type: ignore
120
120
  self.project.arch
@@ -144,17 +144,30 @@ class CallSiteMaker(Analysis):
144
144
  arg_locs = cc.arg_locs(callsite_ty)
145
145
 
146
146
  if arg_locs is not None and cc is not None:
147
- expanded_arg_locs = []
147
+ expanded_arg_locs: list[SimStackArg | SimRegArg | SimReferenceArgument] = []
148
148
  for arg_loc in arg_locs:
149
149
  if isinstance(arg_loc, SimComboArg):
150
150
  # a ComboArg spans across multiple locations (mostly stack but *in theory* can also be spanning
151
151
  # across registers). most importantly, a ComboArg represents one variable, not multiple, but we
152
152
  # have no way to know that until later down the pipeline.
153
153
  expanded_arg_locs += arg_loc.locations
154
- else:
154
+ elif isinstance(arg_loc, (SimRegArg, SimStackArg, SimReferenceArgument)):
155
155
  expanded_arg_locs.append(arg_loc)
156
+ else:
157
+ raise NotImplementedError("Not implemented yet.")
156
158
 
157
159
  for arg_loc in expanded_arg_locs:
160
+ if isinstance(arg_loc, SimReferenceArgument):
161
+ if not isinstance(arg_loc.ptr_loc, (SimRegArg, SimStackArg)):
162
+ raise NotImplementedError("Why would a calling convention produce this?")
163
+ if isinstance(arg_loc.main_loc, SimStructArg):
164
+ dereference_size = arg_loc.main_loc.struct.size // self.project.arch.byte_width
165
+ else:
166
+ dereference_size = arg_loc.main_loc.size
167
+ arg_loc = arg_loc.ptr_loc
168
+ else:
169
+ dereference_size = None
170
+
158
171
  if isinstance(arg_loc, SimRegArg):
159
172
  size = arg_loc.size
160
173
  offset = arg_loc.check_offset(cc.arch)
@@ -202,7 +215,7 @@ class CallSiteMaker(Analysis):
202
215
  vvar_use,
203
216
  **vvar_use.tags,
204
217
  )
205
- args.append(vvar_use)
218
+ arg_expr = vvar_use
206
219
  else:
207
220
  reg = Expr.Register(
208
221
  self._atom_idx(),
@@ -212,20 +225,17 @@ class CallSiteMaker(Analysis):
212
225
  reg_name=arg_loc.reg_name,
213
226
  ins_addr=last_stmt.ins_addr,
214
227
  )
215
- args.append(reg)
228
+ arg_expr = reg
216
229
  elif isinstance(arg_loc, SimStackArg):
217
230
  stack_arg_locs.append(arg_loc)
218
231
  _, the_arg = self._resolve_stack_argument(call_stmt, arg_loc)
219
-
220
- if the_arg is not None:
221
- args.append(the_arg)
222
- else:
223
- args.append(None)
224
- elif isinstance(arg_loc, SimStructArg):
225
- l.warning("SimStructArg is not yet supported")
226
-
232
+ arg_expr = the_arg if the_arg is not None else None
227
233
  else:
228
- raise NotImplementedError("Not implemented yet.")
234
+ assert False, "Unreachable"
235
+
236
+ if arg_expr is not None and dereference_size is not None:
237
+ arg_expr = Expr.Load(self._atom_idx(), arg_expr, dereference_size, endness=archinfo.Endness.BE)
238
+ args.append(arg_expr)
229
239
 
230
240
  # Remove the old call statement
231
241
  new_stmts = self.block.statements[:-1]
@@ -18,6 +18,7 @@ from angr.errors import AngrDecompilationError
18
18
  from angr.knowledge_base import KnowledgeBase
19
19
  from angr.knowledge_plugins.functions import Function
20
20
  from angr.knowledge_plugins.cfg.memory_data import MemoryDataSort
21
+ from angr.knowledge_plugins.key_definitions import atoms
21
22
  from angr.codenode import BlockNode
22
23
  from angr.utils import timethis
23
24
  from angr.utils.graph import GraphUtils
@@ -122,7 +123,7 @@ class Clinic(Analysis):
122
123
  desired_variables: set[str] | None = None,
123
124
  force_loop_single_exit: bool = True,
124
125
  complete_successors: bool = False,
125
- max_type_constraints: int = 750,
126
+ max_type_constraints: int = 4000,
126
127
  ):
127
128
  if not func.normalized and mode == ClinicMode.DECOMPILE:
128
129
  raise ValueError("Decompilation must work on normalized function graphs.")
@@ -505,17 +506,29 @@ class Clinic(Analysis):
505
506
  self._update_progress(37.0, text="Tracking stack pointers")
506
507
  spt = self._track_stack_pointers()
507
508
 
509
+ preserve_vvar_ids: set[int] = set()
510
+ type_hints: list[tuple[atoms.VirtualVariable | atoms.MemoryLocation, str]] = []
511
+
508
512
  # Simplify blocks
509
513
  # we never remove dead memory definitions before making callsites. otherwise stack arguments may go missing
510
514
  # before they are recognized as stack arguments.
511
515
  self._update_progress(38.0, text="Simplifying blocks 1")
512
- ail_graph = self._simplify_blocks(ail_graph, stack_pointer_tracker=spt, cache=block_simplification_cache)
516
+ ail_graph = self._simplify_blocks(
517
+ ail_graph,
518
+ stack_pointer_tracker=spt,
519
+ cache=block_simplification_cache,
520
+ preserve_vvar_ids=preserve_vvar_ids,
521
+ type_hints=type_hints,
522
+ )
513
523
  self._rewrite_alloca(ail_graph)
514
524
 
515
525
  # Run simplification passes
516
526
  self._update_progress(40.0, text="Running simplifications 1")
517
527
  ail_graph = self._run_simplification_passes(
518
- ail_graph, stack_items=self.stack_items, stage=OptimizationPassStage.AFTER_SINGLE_BLOCK_SIMPLIFICATION
528
+ ail_graph,
529
+ stack_pointer_tracker=spt,
530
+ stack_items=self.stack_items,
531
+ stage=OptimizationPassStage.AFTER_SINGLE_BLOCK_SIMPLIFICATION,
519
532
  )
520
533
 
521
534
  # Simplify the entire function for the first time
@@ -532,7 +545,19 @@ class Clinic(Analysis):
532
545
  # Run simplification passes again. there might be more chances for peephole optimizations after function-level
533
546
  # simplification
534
547
  self._update_progress(48.0, text="Simplifying blocks 2")
535
- ail_graph = self._simplify_blocks(ail_graph, stack_pointer_tracker=spt, cache=block_simplification_cache)
548
+ ail_graph = self._simplify_blocks(
549
+ ail_graph,
550
+ stack_pointer_tracker=spt,
551
+ cache=block_simplification_cache,
552
+ preserve_vvar_ids=preserve_vvar_ids,
553
+ type_hints=type_hints,
554
+ )
555
+
556
+ # Run simplification passes
557
+ self._update_progress(49.0, text="Running simplifications 2")
558
+ ail_graph = self._run_simplification_passes(
559
+ ail_graph, stage=OptimizationPassStage.BEFORE_SSA_LEVEL1_TRANSFORMATION
560
+ )
536
561
 
537
562
  # rewrite (qualified) stack variables into SSA form
538
563
  ail_graph = self._transform_to_ssa_level1(ail_graph, func_args)
@@ -544,11 +569,13 @@ class Clinic(Analysis):
544
569
  # Rust-specific; only call this on Rust binaries when we can identify language and compiler
545
570
  ail_graph = self._rewrite_rust_probestack_call(ail_graph)
546
571
  # Windows-specific
547
- ail_graph = self._rewrite_windows_stkchk_call(ail_graph)
572
+ ail_graph = self._rewrite_windows_chkstk_call(ail_graph)
548
573
 
549
574
  # Make call-sites
550
575
  self._update_progress(50.0, text="Making callsites")
551
- _, stackarg_offsets, removed_vvar_ids = self._make_callsites(ail_graph, func_args, stack_pointer_tracker=spt)
576
+ _, stackarg_offsets, removed_vvar_ids = self._make_callsites(
577
+ ail_graph, func_args, stack_pointer_tracker=spt, preserve_vvar_ids=preserve_vvar_ids
578
+ )
552
579
 
553
580
  # Run simplification passes
554
581
  self._update_progress(53.0, text="Running simplifications 2")
@@ -565,6 +592,7 @@ class Clinic(Analysis):
565
592
  fold_callexprs_into_conditions=self._fold_callexprs_into_conditions,
566
593
  removed_vvar_ids=removed_vvar_ids,
567
594
  arg_vvars=arg_vvars,
595
+ preserve_vvar_ids=preserve_vvar_ids,
568
596
  )
569
597
 
570
598
  # After global optimization, there might be more chances for peephole optimizations.
@@ -574,10 +602,12 @@ class Clinic(Analysis):
574
602
  ail_graph,
575
603
  stack_pointer_tracker=spt,
576
604
  cache=block_simplification_cache,
605
+ preserve_vvar_ids=preserve_vvar_ids,
606
+ type_hints=type_hints,
577
607
  )
578
608
 
579
609
  # Run simplification passes
580
- self._update_progress(65.0, text="Running simplifications 3 ")
610
+ self._update_progress(65.0, text="Running simplifications 3")
581
611
  ail_graph = self._run_simplification_passes(
582
612
  ail_graph, stack_items=self.stack_items, stage=OptimizationPassStage.AFTER_GLOBAL_SIMPLIFICATION
583
613
  )
@@ -592,6 +622,7 @@ class Clinic(Analysis):
592
622
  narrow_expressions=True,
593
623
  fold_callexprs_into_conditions=self._fold_callexprs_into_conditions,
594
624
  arg_vvars=arg_vvars,
625
+ preserve_vvar_ids=preserve_vvar_ids,
595
626
  )
596
627
 
597
628
  self._update_progress(75.0, text="Simplifying blocks 4")
@@ -599,6 +630,8 @@ class Clinic(Analysis):
599
630
  ail_graph,
600
631
  stack_pointer_tracker=spt,
601
632
  cache=block_simplification_cache,
633
+ preserve_vvar_ids=preserve_vvar_ids,
634
+ type_hints=type_hints,
602
635
  )
603
636
 
604
637
  # Simplify the entire function for the fourth time
@@ -611,6 +644,12 @@ class Clinic(Analysis):
611
644
  narrow_expressions=True,
612
645
  fold_callexprs_into_conditions=self._fold_callexprs_into_conditions,
613
646
  arg_vvars=arg_vvars,
647
+ preserve_vvar_ids=preserve_vvar_ids,
648
+ )
649
+
650
+ self._update_progress(79.0, text="Running simplifications 4")
651
+ ail_graph = self._run_simplification_passes(
652
+ ail_graph, stack_items=self.stack_items, stage=OptimizationPassStage.BEFORE_VARIABLE_RECOVERY
614
653
  )
615
654
 
616
655
  # update arg_list
@@ -623,7 +662,7 @@ class Clinic(Analysis):
623
662
 
624
663
  # Recover variables on AIL blocks
625
664
  self._update_progress(80.0, text="Recovering variables")
626
- variable_kb = self._recover_and_link_variables(ail_graph, arg_list, arg_vvars, vvar2vvar)
665
+ variable_kb = self._recover_and_link_variables(ail_graph, arg_list, arg_vvars, vvar2vvar, type_hints)
627
666
 
628
667
  # Run simplification passes
629
668
  self._update_progress(85.0, text="Running simplifications 4")
@@ -1197,10 +1236,10 @@ class Clinic(Analysis):
1197
1236
  prototype_libname = func.prototype_libname
1198
1237
  type_collections = []
1199
1238
  if prototype_libname is not None:
1200
- prototype_lib = SIM_LIBRARIES[prototype_libname]
1201
- if prototype_lib.type_collection_names:
1202
- for typelib_name in prototype_lib.type_collection_names:
1203
- type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
1239
+ for prototype_lib in SIM_LIBRARIES[prototype_libname]:
1240
+ if prototype_lib.type_collection_names:
1241
+ for typelib_name in prototype_lib.type_collection_names:
1242
+ type_collections.append(SIM_TYPE_COLLECTIONS[typelib_name])
1204
1243
  if type_collections:
1205
1244
  prototype = dereference_simtype(prototype, type_collections).with_arch( # type: ignore
1206
1245
  self.project.arch
@@ -1226,6 +1265,8 @@ class Clinic(Analysis):
1226
1265
  ail_graph: networkx.DiGraph,
1227
1266
  stack_pointer_tracker=None,
1228
1267
  cache: dict[ailment.Block, NamedTuple] | None = None,
1268
+ preserve_vvar_ids: set[int] | None = None,
1269
+ type_hints: list[tuple[atoms.VirtualVariable | atoms.MemoryLocation, str]] | None = None,
1229
1270
  ):
1230
1271
  """
1231
1272
  Simplify all blocks in self._blocks.
@@ -1244,6 +1285,8 @@ class Clinic(Analysis):
1244
1285
  ail_block,
1245
1286
  stack_pointer_tracker=stack_pointer_tracker,
1246
1287
  cache=cache,
1288
+ preserve_vvar_ids=preserve_vvar_ids,
1289
+ type_hints=type_hints,
1247
1290
  )
1248
1291
  key = ail_block.addr, ail_block.idx
1249
1292
  blocks_by_addr_and_idx[key] = simplified
@@ -1259,7 +1302,14 @@ class Clinic(Analysis):
1259
1302
 
1260
1303
  return ail_graph
1261
1304
 
1262
- def _simplify_block(self, ail_block, stack_pointer_tracker=None, cache=None):
1305
+ def _simplify_block(
1306
+ self,
1307
+ ail_block,
1308
+ stack_pointer_tracker=None,
1309
+ cache=None,
1310
+ preserve_vvar_ids: set[int] | None = None,
1311
+ type_hints: list[tuple[atoms.VirtualVariable | atoms.MemoryLocation, str]] | None = None,
1312
+ ):
1263
1313
  """
1264
1314
  Simplify a single AIL block.
1265
1315
 
@@ -1286,6 +1336,8 @@ class Clinic(Analysis):
1286
1336
  peephole_optimizations=self.peephole_optimizations,
1287
1337
  cached_reaching_definitions=cached_rd,
1288
1338
  cached_propagator=cached_prop,
1339
+ preserve_vvar_ids=preserve_vvar_ids,
1340
+ type_hints=type_hints,
1289
1341
  )
1290
1342
  # update the cache
1291
1343
  if cache is not None:
@@ -1308,6 +1360,7 @@ class Clinic(Analysis):
1308
1360
  rewrite_ccalls=True,
1309
1361
  removed_vvar_ids: set[int] | None = None,
1310
1362
  arg_vvars: dict[int, tuple[ailment.Expr.VirtualVariable, SimVariable]] | None = None,
1363
+ preserve_vvar_ids: set[int] | None = None,
1311
1364
  ) -> None:
1312
1365
  """
1313
1366
  Simplify the entire function until it reaches a fixed point.
@@ -1326,6 +1379,7 @@ class Clinic(Analysis):
1326
1379
  rewrite_ccalls=rewrite_ccalls,
1327
1380
  removed_vvar_ids=removed_vvar_ids,
1328
1381
  arg_vvars=arg_vvars,
1382
+ preserve_vvar_ids=preserve_vvar_ids,
1329
1383
  )
1330
1384
  if not simplified:
1331
1385
  break
@@ -1343,6 +1397,7 @@ class Clinic(Analysis):
1343
1397
  rewrite_ccalls=True,
1344
1398
  removed_vvar_ids: set[int] | None = None,
1345
1399
  arg_vvars: dict[int, tuple[ailment.Expr.VirtualVariable, SimVariable]] | None = None,
1400
+ preserve_vvar_ids: set[int] | None = None,
1346
1401
  ):
1347
1402
  """
1348
1403
  Simplify the entire function once.
@@ -1367,6 +1422,7 @@ class Clinic(Analysis):
1367
1422
  removed_vvar_ids=removed_vvar_ids,
1368
1423
  arg_vvars=arg_vvars,
1369
1424
  secondary_stackvars=self.secondary_stackvars,
1425
+ avoid_vvar_ids=preserve_vvar_ids,
1370
1426
  )
1371
1427
  # cache the simplifier's RDA analysis
1372
1428
  self.reaching_definitions = simp._reaching_definitions
@@ -1381,6 +1437,7 @@ class Clinic(Analysis):
1381
1437
  stage: OptimizationPassStage = OptimizationPassStage.AFTER_GLOBAL_SIMPLIFICATION,
1382
1438
  variable_kb=None,
1383
1439
  stack_items: dict[int, StackItem] | None = None,
1440
+ stack_pointer_tracker=None,
1384
1441
  **kwargs,
1385
1442
  ):
1386
1443
  addr_and_idx_to_blocks: dict[tuple[int, int | None], ailment.Block] = {}
@@ -1415,6 +1472,7 @@ class Clinic(Analysis):
1415
1472
  scratch=self.optimization_scratch,
1416
1473
  force_loop_single_exit=self._force_loop_single_exit,
1417
1474
  complete_successors=self._complete_successors,
1475
+ stack_pointer_tracker=stack_pointer_tracker,
1418
1476
  **kwargs,
1419
1477
  )
1420
1478
  if a.out_graph:
@@ -1550,7 +1608,13 @@ class Clinic(Analysis):
1550
1608
  return []
1551
1609
 
1552
1610
  @timethis
1553
- def _make_callsites(self, ail_graph, func_args: set[ailment.Expr.VirtualVariable], stack_pointer_tracker=None):
1611
+ def _make_callsites(
1612
+ self,
1613
+ ail_graph,
1614
+ func_args: set[ailment.Expr.VirtualVariable],
1615
+ stack_pointer_tracker=None,
1616
+ preserve_vvar_ids: set[int] | None = None,
1617
+ ):
1554
1618
  """
1555
1619
  Simplify all function call statements.
1556
1620
  """
@@ -1588,6 +1652,7 @@ class Clinic(Analysis):
1588
1652
  fail_fast=self._fail_fast,
1589
1653
  stack_pointer_tracker=stack_pointer_tracker,
1590
1654
  peephole_optimizations=self.peephole_optimizations,
1655
+ preserve_vvar_ids=preserve_vvar_ids,
1591
1656
  )
1592
1657
  return simp.result_block
1593
1658
  return None
@@ -1663,6 +1728,7 @@ class Clinic(Analysis):
1663
1728
  arg_list: list,
1664
1729
  arg_vvars: dict[int, tuple[ailment.Expr.VirtualVariable, SimVariable]],
1665
1730
  vvar2vvar: dict[int, int],
1731
+ type_hints: list[tuple[atoms.VirtualVariable | atoms.MemoryLocation, str]],
1666
1732
  ):
1667
1733
  # variable recovery
1668
1734
  tmp_kb = KnowledgeBase(self.project) if self.variable_kb is None else self.variable_kb
@@ -1677,6 +1743,7 @@ class Clinic(Analysis):
1677
1743
  unify_variables=False,
1678
1744
  func_arg_vvars=arg_vvars,
1679
1745
  vvar_to_vvar=vvar2vvar,
1746
+ type_hints=type_hints,
1680
1747
  )
1681
1748
  # get ground-truth types
1682
1749
  var_manager = tmp_kb.variables[self.function.addr]
@@ -1710,7 +1777,7 @@ class Clinic(Analysis):
1710
1777
  must_struct = None
1711
1778
  total_type_constraints = sum(len(tc) for tc in vr.type_constraints.values()) if vr.type_constraints else 0
1712
1779
  if total_type_constraints > self._max_type_constraints:
1713
- l.info(
1780
+ l.warning(
1714
1781
  "The number of type constraints (%d) is greater than the threshold (%d). Skipping type inference.",
1715
1782
  total_type_constraints,
1716
1783
  self._max_type_constraints,
@@ -1821,7 +1888,7 @@ class Clinic(Analysis):
1821
1888
  if off in variable_manager.stack_offset_to_struct_member_info:
1822
1889
  stmt.tags["struct_member_info"] = variable_manager.stack_offset_to_struct_member_info[off]
1823
1890
 
1824
- elif stmt_type is ailment.Stmt.Assignment:
1891
+ elif stmt_type is ailment.Stmt.Assignment or stmt_type is ailment.Stmt.WeakAssignment:
1825
1892
  self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, stmt.dst)
1826
1893
  self._link_variables_on_expr(variable_manager, global_variables, block, stmt_idx, stmt, stmt.src)
1827
1894
 
@@ -2937,7 +3004,7 @@ class Clinic(Analysis):
2937
3004
  break
2938
3005
  return ail_graph
2939
3006
 
2940
- def _rewrite_windows_stkchk_call(self, ail_graph) -> networkx.DiGraph:
3007
+ def _rewrite_windows_chkstk_call(self, ail_graph) -> networkx.DiGraph:
2941
3008
  if not (self.project.simos is not None and self.project.simos.name == "Win32"):
2942
3009
  return ail_graph
2943
3010
 
@@ -961,27 +961,6 @@ class ConditionProcessor:
961
961
  sympy_expr = ConditionProcessor.claripy_ast_to_sympy_expr(cond, memo=memo)
962
962
  return ConditionProcessor.sympy_expr_to_claripy_ast(sympy.simplify_logic(sympy_expr, deep=False), memo)
963
963
 
964
- @staticmethod
965
- def simplify_condition_deprecated(cond):
966
- # Z3's simplification may yield weird and unreadable results
967
- # hence we mostly rely on our own simplification. we only use Z3's simplification results when it returns a
968
- # concrete value.
969
- claripy_simplified = claripy.simplify(cond)
970
- if not claripy_simplified.symbolic:
971
- return claripy_simplified
972
-
973
- simplified = ConditionProcessor._fold_double_negations(cond)
974
- cond = simplified if simplified is not None else cond
975
- simplified = ConditionProcessor._revert_short_circuit_conditions(cond)
976
- cond = simplified if simplified is not None else cond
977
- simplified = ConditionProcessor._extract_common_subexpressions(cond)
978
- cond = simplified if simplified is not None else cond
979
- # simplified = ConditionProcessor._remove_redundant_terms(cond)
980
- # cond = simplified if simplified is not None else cond
981
- # in the end, use claripy's simplification to handle really easy cases again
982
- simplified = ConditionProcessor._simplify_trivial_cases(cond)
983
- return simplified if simplified is not None else cond
984
-
985
964
  @staticmethod
986
965
  def _simplify_trivial_cases(cond):
987
966
  if cond.op == "And":