angr 9.2.138__py3-none-manylinux2014_x86_64.whl → 9.2.140__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (100) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +48 -21
  3. angr/analyses/calling_convention/fact_collector.py +59 -12
  4. angr/analyses/calling_convention/utils.py +2 -2
  5. angr/analyses/cfg/cfg_base.py +13 -0
  6. angr/analyses/cfg/cfg_fast.py +23 -4
  7. angr/analyses/decompiler/ail_simplifier.py +79 -53
  8. angr/analyses/decompiler/block_simplifier.py +0 -2
  9. angr/analyses/decompiler/callsite_maker.py +80 -14
  10. angr/analyses/decompiler/clinic.py +99 -80
  11. angr/analyses/decompiler/condition_processor.py +2 -2
  12. angr/analyses/decompiler/decompiler.py +19 -7
  13. angr/analyses/decompiler/dephication/rewriting_engine.py +16 -7
  14. angr/analyses/decompiler/expression_narrower.py +1 -1
  15. angr/analyses/decompiler/optimization_passes/__init__.py +3 -0
  16. angr/analyses/decompiler/optimization_passes/condition_constprop.py +149 -0
  17. angr/analyses/decompiler/optimization_passes/const_prop_reverter.py +8 -7
  18. angr/analyses/decompiler/optimization_passes/deadblock_remover.py +12 -3
  19. angr/analyses/decompiler/optimization_passes/inlined_string_transformation_simplifier.py +1 -1
  20. angr/analyses/decompiler/optimization_passes/ite_region_converter.py +21 -13
  21. angr/analyses/decompiler/optimization_passes/optimization_pass.py +21 -12
  22. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +17 -9
  23. angr/analyses/decompiler/optimization_passes/return_duplicator_high.py +7 -10
  24. angr/analyses/decompiler/peephole_optimizations/eager_eval.py +12 -1
  25. angr/analyses/decompiler/peephole_optimizations/remove_redundant_conversions.py +61 -25
  26. angr/analyses/decompiler/peephole_optimizations/remove_redundant_shifts.py +50 -1
  27. angr/analyses/decompiler/presets/fast.py +2 -0
  28. angr/analyses/decompiler/presets/full.py +2 -0
  29. angr/analyses/decompiler/region_simplifiers/expr_folding.py +259 -108
  30. angr/analyses/decompiler/region_simplifiers/region_simplifier.py +28 -9
  31. angr/analyses/decompiler/ssailification/rewriting_engine.py +20 -2
  32. angr/analyses/decompiler/ssailification/traversal_engine.py +4 -3
  33. angr/analyses/decompiler/structured_codegen/c.py +10 -3
  34. angr/analyses/decompiler/structuring/dream.py +28 -19
  35. angr/analyses/decompiler/structuring/phoenix.py +253 -89
  36. angr/analyses/decompiler/structuring/recursive_structurer.py +1 -0
  37. angr/analyses/decompiler/structuring/structurer_base.py +121 -46
  38. angr/analyses/decompiler/structuring/structurer_nodes.py +6 -1
  39. angr/analyses/decompiler/utils.py +60 -1
  40. angr/analyses/deobfuscator/api_obf_finder.py +13 -5
  41. angr/analyses/deobfuscator/api_obf_type2_finder.py +166 -0
  42. angr/analyses/deobfuscator/string_obf_finder.py +105 -18
  43. angr/analyses/forward_analysis/forward_analysis.py +1 -1
  44. angr/analyses/propagator/top_checker_mixin.py +6 -6
  45. angr/analyses/reaching_definitions/__init__.py +2 -1
  46. angr/analyses/reaching_definitions/dep_graph.py +1 -12
  47. angr/analyses/reaching_definitions/engine_vex.py +36 -31
  48. angr/analyses/reaching_definitions/function_handler.py +15 -2
  49. angr/analyses/reaching_definitions/rd_state.py +1 -37
  50. angr/analyses/reaching_definitions/reaching_definitions.py +13 -24
  51. angr/analyses/s_propagator.py +129 -87
  52. angr/analyses/s_reaching_definitions/s_rda_model.py +7 -1
  53. angr/analyses/s_reaching_definitions/s_rda_view.py +2 -2
  54. angr/analyses/s_reaching_definitions/s_reaching_definitions.py +3 -1
  55. angr/analyses/stack_pointer_tracker.py +36 -22
  56. angr/analyses/typehoon/simple_solver.py +45 -7
  57. angr/analyses/typehoon/typeconsts.py +18 -5
  58. angr/analyses/variable_recovery/engine_ail.py +1 -1
  59. angr/analyses/variable_recovery/engine_base.py +62 -67
  60. angr/analyses/variable_recovery/engine_vex.py +1 -1
  61. angr/analyses/variable_recovery/irsb_scanner.py +2 -2
  62. angr/block.py +69 -107
  63. angr/callable.py +14 -7
  64. angr/calling_conventions.py +81 -10
  65. angr/distributed/__init__.py +1 -1
  66. angr/engines/__init__.py +7 -8
  67. angr/engines/engine.py +3 -138
  68. angr/engines/failure.py +2 -2
  69. angr/engines/hook.py +2 -2
  70. angr/engines/light/engine.py +5 -10
  71. angr/engines/pcode/emulate.py +2 -2
  72. angr/engines/pcode/engine.py +2 -14
  73. angr/engines/pcode/lifter.py +2 -2
  74. angr/engines/procedure.py +2 -2
  75. angr/engines/soot/engine.py +2 -2
  76. angr/engines/soot/statements/switch.py +1 -1
  77. angr/engines/successors.py +123 -17
  78. angr/engines/syscall.py +2 -2
  79. angr/engines/unicorn.py +3 -3
  80. angr/engines/vex/heavy/heavy.py +3 -15
  81. angr/engines/vex/lifter.py +2 -2
  82. angr/engines/vex/light/light.py +2 -2
  83. angr/factory.py +4 -19
  84. angr/knowledge_plugins/cfg/cfg_model.py +3 -2
  85. angr/knowledge_plugins/key_definitions/atoms.py +8 -4
  86. angr/knowledge_plugins/key_definitions/live_definitions.py +41 -103
  87. angr/knowledge_plugins/labels.py +2 -2
  88. angr/knowledge_plugins/obfuscations.py +1 -0
  89. angr/knowledge_plugins/xrefs/xref_manager.py +4 -0
  90. angr/sim_type.py +19 -17
  91. angr/state_plugins/plugin.py +19 -4
  92. angr/storage/memory_mixins/memory_mixin.py +1 -1
  93. angr/storage/memory_mixins/paged_memory/pages/multi_values.py +10 -5
  94. angr/utils/ssa/__init__.py +119 -4
  95. {angr-9.2.138.dist-info → angr-9.2.140.dist-info}/METADATA +6 -6
  96. {angr-9.2.138.dist-info → angr-9.2.140.dist-info}/RECORD +100 -98
  97. {angr-9.2.138.dist-info → angr-9.2.140.dist-info}/LICENSE +0 -0
  98. {angr-9.2.138.dist-info → angr-9.2.140.dist-info}/WHEEL +0 -0
  99. {angr-9.2.138.dist-info → angr-9.2.140.dist-info}/entry_points.txt +0 -0
  100. {angr-9.2.138.dist-info → angr-9.2.140.dist-info}/top_level.txt +0 -0
@@ -16,7 +16,7 @@ from ailment.expression import (
16
16
  Convert,
17
17
  Expression,
18
18
  )
19
- from ailment.statement import Assignment, Store, Return, Jump
19
+ from ailment.statement import Assignment, Store, Return, Jump, ConditionalJump
20
20
 
21
21
  from angr.knowledge_plugins.functions import Function
22
22
  from angr.code_location import CodeLocation, ExternalCodeLocation
@@ -35,6 +35,7 @@ from angr.utils.ssa import (
35
35
  get_tmp_uselocs,
36
36
  get_tmp_deflocs,
37
37
  phi_assignment_get_src,
38
+ has_store_stmt_in_between_stmts,
38
39
  )
39
40
 
40
41
 
@@ -45,6 +46,8 @@ class SPropagatorModel:
45
46
 
46
47
  def __init__(self):
47
48
  self.replacements: Mapping[CodeLocation, Mapping[Expression, Expression]] = {}
49
+ # store vvars that are definitely dead (but usually not removed by default because they are stack variables)
50
+ self.dead_vvar_ids: set[int] = set()
48
51
 
49
52
 
50
53
  class SPropagatorAnalysis(Analysis):
@@ -90,6 +93,7 @@ class SPropagatorAnalysis(Analysis):
90
93
  bp_as_gpr = the_func.info.get("bp_as_gpr", False)
91
94
  self._bp_as_gpr = bp_as_gpr
92
95
 
96
+ # output
93
97
  self.model = SPropagatorModel()
94
98
 
95
99
  self._analyze()
@@ -98,6 +102,10 @@ class SPropagatorAnalysis(Analysis):
98
102
  def replacements(self):
99
103
  return self.model.replacements
100
104
 
105
+ @property
106
+ def dead_vvar_ids(self):
107
+ return self.model.dead_vvar_ids
108
+
101
109
  def _analyze(self):
102
110
  blocks: dict[tuple[int, int | None], Block]
103
111
  match self.mode:
@@ -132,7 +140,7 @@ class SPropagatorAnalysis(Analysis):
132
140
 
133
141
  replacements = defaultdict(dict)
134
142
 
135
- # find constant assignments
143
+ # find constant and other propagatable assignments
136
144
  vvarid_to_vvar = {}
137
145
  const_vvars: dict[int, Const] = {}
138
146
  for vvar, defloc in vvar_deflocs.items():
@@ -140,7 +148,6 @@ class SPropagatorAnalysis(Analysis):
140
148
  continue
141
149
 
142
150
  vvarid_to_vvar[vvar.varid] = vvar
143
- defloc = vvar_deflocs[vvar]
144
151
  if isinstance(defloc, ExternalCodeLocation):
145
152
  continue
146
153
 
@@ -178,8 +185,29 @@ class SPropagatorAnalysis(Analysis):
178
185
  for vvar_at_use, useloc in vvar_uselocs[vvar.varid]:
179
186
  replacements[useloc][vvar_at_use] = const_value
180
187
 
181
- if self.mode == "function" and vvar.varid in vvar_uselocs:
182
- if len(vvar_uselocs[vvar.varid]) <= 2 and isinstance(stmt, Assignment) and isinstance(stmt.src, Load):
188
+ # function mode only
189
+ if self.mode == "function":
190
+ assert self.func_graph is not None
191
+
192
+ for vvar, defloc in vvar_deflocs.items():
193
+ if vvar.varid not in vvar_uselocs:
194
+ continue
195
+ if vvar.varid in const_vvars:
196
+ continue
197
+ if isinstance(defloc, ExternalCodeLocation):
198
+ continue
199
+
200
+ assert defloc.block_addr is not None
201
+ assert defloc.stmt_idx is not None
202
+
203
+ block = blocks[(defloc.block_addr, defloc.block_idx)]
204
+ stmt = block.statements[defloc.stmt_idx]
205
+ if (
206
+ (vvar.was_reg or vvar.was_parameter)
207
+ and len(vvar_uselocs[vvar.varid]) <= 2
208
+ and isinstance(stmt, Assignment)
209
+ and isinstance(stmt.src, Load)
210
+ ):
183
211
  # do we want to propagate this Load expression if it's used for less than twice?
184
212
  # it's often seen in the following pattern, where propagation will be beneficial:
185
213
  # v0 = Load(...)
@@ -188,7 +216,7 @@ class SPropagatorAnalysis(Analysis):
188
216
  # }
189
217
  can_replace = True
190
218
  for _, vvar_useloc in vvar_uselocs[vvar.varid]:
191
- if self.has_store_stmt_in_between(blocks, defloc, vvar_useloc):
219
+ if has_store_stmt_in_between_stmts(self.func_graph, blocks, defloc, vvar_useloc):
192
220
  can_replace = False
193
221
 
194
222
  if can_replace:
@@ -197,63 +225,80 @@ class SPropagatorAnalysis(Analysis):
197
225
  replacements[vvar_useloc][vvar_used] = stmt.src
198
226
  continue
199
227
 
200
- if len(vvar_uselocs[vvar.varid]) == 1:
201
- vvar_used, vvar_useloc = next(iter(vvar_uselocs[vvar.varid]))
202
- if is_const_vvar_load_assignment(stmt) and not self.has_store_stmt_in_between(
203
- blocks, defloc, vvar_useloc
204
- ):
205
- # we can propagate this load because there is no store between its def and use
206
- replacements[vvar_useloc][vvar_used] = stmt.src
228
+ if (
229
+ (vvar.was_reg or vvar.was_stack)
230
+ and len(vvar_uselocs[vvar.varid]) == 2
231
+ and not is_phi_assignment(stmt)
232
+ ):
233
+ # a special case: in a typical switch-case construct, a variable may be used once for comparison
234
+ # for the default case and then used again for constructing the jump target. we can propagate this
235
+ # variable for such cases.
236
+ uselocs = {loc for _, loc in vvar_uselocs[vvar.varid]}
237
+ if self.is_vvar_used_for_addr_loading_switch_case(uselocs, blocks):
238
+ for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
239
+ replacements[vvar_useloc][vvar_used] = stmt.src
240
+ # mark the vvar as dead and should be removed
241
+ self.model.dead_vvar_ids.add(vvar.varid)
207
242
  continue
208
243
 
209
- if is_const_and_vvar_assignment(stmt):
210
- # if the useloc is a phi assignment statement, ensure that stmt.src is the same as the phi
211
- # variable
212
- assert vvar_useloc.block_addr is not None
213
- assert vvar_useloc.stmt_idx is not None
214
- useloc_stmt = blocks[(vvar_useloc.block_addr, vvar_useloc.block_idx)].statements[
215
- vvar_useloc.stmt_idx
216
- ]
217
- if is_phi_assignment(useloc_stmt):
218
- if (
219
- isinstance(stmt.src, VirtualVariable)
220
- and stmt.src.oident == useloc_stmt.dst.oident
221
- and stmt.src.category == useloc_stmt.dst.category
222
- ):
223
- replacements[vvar_useloc][vvar_used] = stmt.src
224
- else:
244
+ if vvar.was_reg or vvar.was_parameter:
245
+ if len(vvar_uselocs[vvar.varid]) == 1:
246
+ vvar_used, vvar_useloc = next(iter(vvar_uselocs[vvar.varid]))
247
+ if is_const_vvar_load_assignment(stmt) and not has_store_stmt_in_between_stmts(
248
+ self.func_graph, blocks, defloc, vvar_useloc
249
+ ):
250
+ # we can propagate this load because there is no store between its def and use
225
251
  replacements[vvar_useloc][vvar_used] = stmt.src
226
- continue
252
+ continue
227
253
 
228
- else:
229
- non_exitsite_uselocs = [
230
- loc
231
- for _, loc in vvar_uselocs[vvar.varid]
232
- if (loc.block_addr, loc.block_idx, loc.stmt_idx) not in (retsites | jumpsites)
233
- ]
234
- if is_const_and_vvar_assignment(stmt):
235
- if len(non_exitsite_uselocs) == 1:
236
- # this vvar is used once if we exclude its uses at ret sites or jump sites. we can
237
- # propagate it
238
- for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
254
+ if is_const_and_vvar_assignment(stmt):
255
+ # if the useloc is a phi assignment statement, ensure that stmt.src is the same as the phi
256
+ # variable
257
+ assert vvar_useloc.block_addr is not None
258
+ assert vvar_useloc.stmt_idx is not None
259
+ useloc_stmt = blocks[(vvar_useloc.block_addr, vvar_useloc.block_idx)].statements[
260
+ vvar_useloc.stmt_idx
261
+ ]
262
+ if is_phi_assignment(useloc_stmt):
263
+ if (
264
+ isinstance(stmt.src, VirtualVariable)
265
+ and stmt.src.oident == useloc_stmt.dst.oident
266
+ and stmt.src.category == useloc_stmt.dst.category
267
+ ):
268
+ replacements[vvar_useloc][vvar_used] = stmt.src
269
+ else:
239
270
  replacements[vvar_useloc][vvar_used] = stmt.src
240
271
  continue
241
272
 
242
- if len(set(non_exitsite_uselocs)) == 1 and not has_ite_expr(stmt.src):
243
- useloc = non_exitsite_uselocs[0]
244
- assert useloc.block_addr is not None
245
- assert useloc.stmt_idx is not None
246
- useloc_stmt = blocks[(useloc.block_addr, useloc.block_idx)].statements[useloc.stmt_idx]
247
- if stmt.src.depth <= 3 and not has_ite_stmt(useloc_stmt):
248
- # remove duplicate use locs (e.g., if the variable is used multiple times by the same
249
- # statement) - but ensure stmt is simple enough
273
+ else:
274
+ non_exitsite_uselocs = [
275
+ loc
276
+ for _, loc in vvar_uselocs[vvar.varid]
277
+ if (loc.block_addr, loc.block_idx, loc.stmt_idx) not in (retsites | jumpsites)
278
+ ]
279
+ if is_const_and_vvar_assignment(stmt):
280
+ if len(non_exitsite_uselocs) == 1:
281
+ # this vvar is used once if we exclude its uses at ret sites or jump sites. we can
282
+ # propagate it
250
283
  for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
251
284
  replacements[vvar_useloc][vvar_used] = stmt.src
252
285
  continue
253
286
 
287
+ if len(set(non_exitsite_uselocs)) == 1 and not has_ite_expr(stmt.src):
288
+ useloc = non_exitsite_uselocs[0]
289
+ assert useloc.block_addr is not None
290
+ assert useloc.stmt_idx is not None
291
+ useloc_stmt = blocks[(useloc.block_addr, useloc.block_idx)].statements[useloc.stmt_idx]
292
+ if stmt.src.depth <= 3 and not has_ite_stmt(useloc_stmt):
293
+ # remove duplicate use locs (e.g., if the variable is used multiple times by the
294
+ # same statement) - but ensure stmt is simple enough
295
+ for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
296
+ replacements[vvar_useloc][vvar_used] = stmt.src
297
+ continue
298
+
254
299
  # special logic for global variables: if it's used once or multiple times, and the variable is never
255
300
  # updated before it's used, we will propagate the load
256
- if isinstance(stmt, Assignment):
301
+ if (vvar.was_reg or vvar.was_parameter) and isinstance(stmt, Assignment):
257
302
  stmt_src = stmt.src
258
303
  # unpack conversions
259
304
  while isinstance(stmt_src, Convert):
@@ -420,43 +465,40 @@ class SPropagatorAnalysis(Analysis):
420
465
 
421
466
  return False
422
467
 
423
- def has_store_stmt_in_between(
424
- self, blocks: dict[tuple[int, int | None], Block], defloc: CodeLocation, useloc: CodeLocation
425
- ) -> bool:
426
- assert defloc.block_addr is not None
427
- assert defloc.stmt_idx is not None
428
- assert useloc.block_addr is not None
429
- assert useloc.stmt_idx is not None
430
- assert self.func_graph is not None
431
-
432
- use_block = blocks[(useloc.block_addr, useloc.block_idx)]
433
- def_block = blocks[(defloc.block_addr, defloc.block_idx)]
434
-
435
- # traverse the graph, go from use_block until we reach def_block, and look for Store statements
436
- seen = {use_block}
437
- queue = [use_block]
438
- while queue:
439
- block = queue.pop(0)
440
-
441
- starting_stmt_idx, ending_stmt_idx = 0, len(block.statements)
442
- if block is def_block:
443
- starting_stmt_idx = defloc.stmt_idx + 1
444
- if block is use_block:
445
- ending_stmt_idx = useloc.stmt_idx + 1
446
-
447
- for i in range(starting_stmt_idx, ending_stmt_idx):
448
- if isinstance(block.statements[i], Store):
449
- return True
450
-
451
- if block is def_block:
452
- continue
453
-
454
- for pred in self.func_graph.predecessors(block):
455
- if pred not in seen:
456
- seen.add(pred)
457
- queue.append(pred)
458
-
459
- return False
468
+ @staticmethod
469
+ def is_vvar_used_for_addr_loading_switch_case(uselocs: set[CodeLocation], blocks) -> bool:
470
+ """
471
+ Check if a virtual variable is used for loading an address in a switch-case construct.
472
+
473
+ :param uselocs: The use locations of the virtual variable.
474
+ :param blocks: All blocks of the current function.
475
+ :return: True if the virtual variable is used for loading an address in a switch-case construct, False
476
+ otherwise.
477
+ """
478
+
479
+ if len(uselocs) != 2:
480
+ return False
481
+
482
+ useloc_0, useloc_1 = list(uselocs)
483
+ block_0 = blocks[(useloc_0.block_addr, useloc_0.block_idx)]
484
+ stmt_0 = block_0.statements[useloc_0.stmt_idx]
485
+ block_1 = blocks[(useloc_1.block_addr, useloc_1.block_idx)]
486
+ stmt_1 = block_1.statements[useloc_1.stmt_idx]
487
+
488
+ if isinstance(stmt_0, Jump):
489
+ stmt_0, stmt_1 = stmt_1, stmt_0
490
+ block_0, block_1 = block_1, block_0
491
+ if not isinstance(stmt_0, ConditionalJump) or not isinstance(stmt_1, Jump):
492
+ return False
493
+
494
+ # check if stmt_0 jumps to block_1
495
+ if not isinstance(stmt_0.true_target, Const) or not isinstance(stmt_0.false_target, Const):
496
+ return False
497
+ stmt_0_targets = {
498
+ (stmt_0.true_target.value, stmt_0.true_target_idx),
499
+ (stmt_0.false_target.value, stmt_0.false_target_idx),
500
+ }
501
+ return (block_1.addr, block_1.idx) in stmt_0_targets
460
502
 
461
503
 
462
504
  register_analysis(SPropagatorAnalysis, "SPropagator")
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  from collections import defaultdict
4
4
  from collections.abc import Generator
5
- from typing import Any
5
+ from typing import Any, Literal, overload
6
6
 
7
7
  from ailment.expression import VirtualVariable, Tmp
8
8
 
@@ -48,6 +48,12 @@ class SRDAModel:
48
48
  s.add(Definition(tmp_atom, CodeLocation(block_loc.block_addr, stmt_idx, block_idx=block_loc.block_idx)))
49
49
  return s
50
50
 
51
+ @overload
52
+ def get_uses_by_location(self, loc: CodeLocation, exprs: Literal[True]) -> set[tuple[Definition, Any | None]]: ...
53
+
54
+ @overload
55
+ def get_uses_by_location(self, loc: CodeLocation, exprs: Literal[False] = ...) -> set[Definition]: ...
56
+
51
57
  def get_uses_by_location(
52
58
  self, loc: CodeLocation, exprs: bool = False
53
59
  ) -> set[Definition] | set[tuple[Definition, Any | None]]:
@@ -79,8 +79,8 @@ class StackVVarPredicate:
79
79
  isinstance(stmt, Assignment)
80
80
  and isinstance(stmt.dst, VirtualVariable)
81
81
  and stmt.dst.was_stack
82
- and stmt.dst.stack_offset == self.stack_offset
83
- and stmt.dst.size == self.size
82
+ and stmt.dst.stack_offset <= self.stack_offset < stmt.dst.stack_offset + stmt.dst.size
83
+ and stmt.dst.stack_offset <= self.stack_offset + self.size <= stmt.dst.stack_offset + stmt.dst.size
84
84
  ):
85
85
  self.vvars.add(stmt.dst)
86
86
  return True
@@ -143,7 +143,9 @@ class SReachingDefinitionsAnalysis(Analysis):
143
143
  cc = cc_cls(self.project.arch)
144
144
 
145
145
  codeloc = CodeLocation(block_addr, stmt_idx, block_idx=block_idx, ins_addr=stmt.ins_addr)
146
- arg_locs = cc.ARG_REGS
146
+ arg_locs = list(cc.ARG_REGS)
147
+ if cc.FP_ARG_REGS:
148
+ arg_locs += [r_name for r_name in cc.FP_ARG_REGS if r_name not in arg_locs]
147
149
 
148
150
  for arg_reg_name in arg_locs:
149
151
  reg_offset = self.project.arch.registers[arg_reg_name][0]
@@ -22,6 +22,7 @@ try:
22
22
  from angr.engines import pcode
23
23
  except ImportError:
24
24
  pypcode = None
25
+ pcode = None
25
26
 
26
27
  if TYPE_CHECKING:
27
28
  from angr.block import Block
@@ -93,6 +94,11 @@ class Register:
93
94
  return self.offset == other.offset
94
95
  return False
95
96
 
97
+ def __add__(self, other) -> OffsetVal:
98
+ if type(other) is Constant:
99
+ return OffsetVal(self, other.val)
100
+ raise CouldNotResolveException
101
+
96
102
  def __repr__(self):
97
103
  return str(self.offset)
98
104
 
@@ -232,6 +238,7 @@ class StackPointerTrackerState:
232
238
  def give_up_on_memory_tracking(self):
233
239
  self.memory = {}
234
240
  self.is_tracking_memory = False
241
+ return self
235
242
 
236
243
  def store(self, addr, val):
237
244
  # strong update
@@ -370,7 +377,8 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
370
377
  self._mem_merge_cache = {}
371
378
 
372
379
  if initial_reg_values:
373
- self._reg_value_at_block_start[func.addr if func is not None else block.addr] = initial_reg_values
380
+ block_start_addr = func.addr if func is not None else block.addr # type: ignore
381
+ self._reg_value_at_block_start[block_start_addr] = initial_reg_values
374
382
 
375
383
  _l.debug("Running on function %r", self._func)
376
384
  self._analyze()
@@ -461,9 +469,13 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
461
469
  return any(self.inconsistent_for(r) for r in self.reg_offsets)
462
470
 
463
471
  def inconsistent_for(self, reg):
472
+ if self._func is None:
473
+ raise ValueError("inconsistent_for() is only supported in function mode")
464
474
  return any(self.offset_after_block(endpoint.addr, reg) is TOP for endpoint in self._func.endpoints)
465
475
 
466
476
  def offsets_for(self, reg):
477
+ if self._func is None:
478
+ raise ValueError("offsets_for() is only supported in function mode")
467
479
  return [
468
480
  o for block in self._func.blocks if (o := self.offset_after_block(block.addr, reg)) not in (TOP, BOTTOM)
469
481
  ]
@@ -481,7 +493,7 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
481
493
  def _post_analysis(self):
482
494
  pass
483
495
 
484
- def _get_register(self, offset):
496
+ def _get_register(self, offset) -> Register:
485
497
  name = self.project.arch.register_names[offset]
486
498
  size = self.project.arch.registers[name][1]
487
499
  return Register(offset, size * self.project.arch.byte_width)
@@ -557,7 +569,7 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
557
569
  output_state = state.freeze()
558
570
  return None, output_state
559
571
 
560
- def _process_vex_irsb(self, node, vex_block: pyvex.IRSB, state: StackPointerTrackerState) -> int:
572
+ def _process_vex_irsb(self, node, vex_block: pyvex.IRSB, state: StackPointerTrackerState) -> int | None:
561
573
  tmps = {}
562
574
  curr_stmt_start_addr = None
563
575
 
@@ -704,21 +716,16 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
704
716
  if callees:
705
717
  if len(callees) == 1:
706
718
  callee = callees[0]
719
+ track_rax = False
720
+ if (
721
+ (callee.info.get("is_rust_probestack", False) and self.project.arch.name == "AMD64")
722
+ or (callee.info.get("is_alloca_probe", False) and self.project.arch.name == "AMD64")
723
+ or callee.name == "__chkstk"
724
+ ):
725
+ # sp = sp - rax right after returning from the call
726
+ track_rax = True
707
727
 
708
- if callee.info.get("is_rust_probestack", False) is True and self.project.arch.name == "AMD64":
709
- # special-case for rust_probestack: sp = sp - rax right after returning from the call, so we
710
- # need to keep track of rax
711
- for stmt in reversed(vex_block.statements):
712
- if (
713
- isinstance(stmt, pyvex.IRStmt.Put)
714
- and stmt.offset == self.project.arch.registers["rax"][0]
715
- and isinstance(stmt.data, pyvex.IRExpr.Const)
716
- ):
717
- state.put(stmt.offset, Constant(stmt.data.con.value), force=True)
718
- break
719
- elif callee.name == "__chkstk":
720
- # special-case for __chkstk: sp = sp - rax right after returning from the call, so we need to
721
- # keep track of rax
728
+ if track_rax:
722
729
  for stmt in reversed(vex_block.statements):
723
730
  if (
724
731
  isinstance(stmt, pyvex.IRStmt.Put)
@@ -737,18 +744,20 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
737
744
  # found callee clean-up cases...
738
745
  try:
739
746
  v = state.get(self.project.arch.sp_offset)
747
+ incremented = None
740
748
  if v is BOTTOM:
741
749
  incremented = BOTTOM
742
750
  elif callee_cleanups[0].prototype is not None:
743
751
  num_args = len(callee_cleanups[0].prototype.args)
744
752
  incremented = v + Constant(self.project.arch.bytes * num_args)
745
- state.put(self.project.arch.sp_offset, incremented)
753
+ if incremented is not None:
754
+ state.put(self.project.arch.sp_offset, incremented)
746
755
  except CouldNotResolveException:
747
756
  pass
748
757
 
749
758
  return curr_stmt_start_addr
750
759
 
751
- def _process_pcode_irsb(self, node, pcode_irsb: pcode.lifter.IRSB, state: StackPointerTrackerState) -> int:
760
+ def _process_pcode_irsb(self, node, pcode_irsb: pcode.lifter.IRSB, state: StackPointerTrackerState) -> int | None:
752
761
  unique = {}
753
762
  curr_stmt_start_addr = None
754
763
 
@@ -830,18 +839,20 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
830
839
  # found callee clean-up cases...
831
840
  try:
832
841
  v = state.get(self.project.arch.sp_offset)
842
+ incremented = None
833
843
  if v is BOTTOM:
834
844
  incremented = BOTTOM
835
845
  elif callee_cleanups[0].prototype is not None:
836
846
  num_args = len(callee_cleanups[0].prototype.args)
837
847
  incremented = v + Constant(self.project.arch.bytes * num_args)
838
- state.put(self.project.arch.sp_offset, incremented)
848
+ if incremented is not None:
849
+ state.put(self.project.arch.sp_offset, incremented)
839
850
  except CouldNotResolveException:
840
851
  pass
841
852
 
842
853
  return curr_stmt_start_addr
843
854
 
844
- def _widen_states(self, *states):
855
+ def _widen_states(self, *states: FrozenStackPointerTrackerState):
845
856
  assert len(states) == 2
846
857
  merged, _ = self._merge_states(None, *states)
847
858
  if len(merged.memory) > 5:
@@ -849,13 +860,16 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
849
860
  merged = merged.unfreeze().give_up_on_memory_tracking().freeze()
850
861
  return merged
851
862
 
852
- def _merge_states(self, node, *states: StackPointerTrackerState):
863
+ def _merge_states(self, node, *states: FrozenStackPointerTrackerState):
853
864
  merged_state = states[0]
854
865
  for other in states[1:]:
855
866
  merged_state = merged_state.merge(other, node.addr, self._reg_merge_cache, self._mem_merge_cache)
856
867
  return merged_state, merged_state == states[0]
857
868
 
858
869
  def _find_callees(self, node) -> list[Function]:
870
+ if self._func is None:
871
+ raise ValueError("find_callees() is only supported in function mode")
872
+
859
873
  callees: list[Function] = []
860
874
  for _, dst, data in self._func.transition_graph.out_edges(node, data=True):
861
875
  if data.get("type") == "call" and isinstance(dst, Function):
@@ -5,6 +5,7 @@ from collections import defaultdict
5
5
  import logging
6
6
 
7
7
  import networkx
8
+ from sortedcontainers import SortedDict
8
9
 
9
10
  from angr.utils.constants import MAX_POINTSTO_BITS
10
11
  from .typevars import (
@@ -1165,25 +1166,45 @@ class SimpleSolver:
1165
1166
  # this might be a struct
1166
1167
  fields = {}
1167
1168
 
1168
- candidate_bases = defaultdict(set)
1169
+ candidate_bases = SortedDict()
1169
1170
 
1170
1171
  for labels, _succ in path_and_successors:
1171
1172
  last_label = labels[-1] if labels else None
1172
1173
  if isinstance(last_label, HasField):
1173
1174
  # TODO: Really determine the maximum possible size of the field when MAX_POINTSTO_BITS is in use
1175
+ if last_label.offset not in candidate_bases:
1176
+ candidate_bases[last_label.offset] = set()
1174
1177
  candidate_bases[last_label.offset].add(
1175
1178
  1 if last_label.bits == MAX_POINTSTO_BITS else (last_label.bits // 8)
1176
1179
  )
1177
1180
 
1181
+ # determine possible bases and map each offset to its base
1182
+ offset_to_base = SortedDict()
1183
+ for start_offset, sizes in candidate_bases.items():
1184
+ for size in sizes:
1185
+ for i in range(size):
1186
+ access_off = start_offset + i
1187
+ if access_off not in offset_to_base:
1188
+ offset_to_base[access_off] = start_offset
1189
+
1190
+ # determine again the maximum size of each field (at each offset)
1191
+ offset_to_maxsize = defaultdict(int)
1192
+ offset_to_sizes = defaultdict(set) # we do not consider offsets to each base offset
1193
+ for labels, _succ in path_and_successors:
1194
+ last_label = labels[-1] if labels else None
1195
+ if isinstance(last_label, HasField):
1196
+ base = offset_to_base[last_label.offset]
1197
+ access_size = 1 if last_label.bits == MAX_POINTSTO_BITS else (last_label.bits // 8)
1198
+ offset_to_maxsize[base] = max(offset_to_maxsize[base], (last_label.offset - base) + access_size)
1199
+ offset_to_sizes[base].add(access_size)
1200
+
1178
1201
  node_to_base = {}
1179
1202
 
1180
1203
  for labels, succ in path_and_successors:
1181
1204
  last_label = labels[-1] if labels else None
1182
1205
  if isinstance(last_label, HasField):
1183
- for start_offset, sizes in candidate_bases.items():
1184
- for size in sizes:
1185
- if last_label.offset > start_offset and last_label.offset < start_offset + size: # ???
1186
- node_to_base[succ] = start_offset
1206
+ prev_offset = next(offset_to_base.irange(maximum=last_label.offset, reverse=True))
1207
+ node_to_base[succ] = offset_to_base[prev_offset]
1187
1208
 
1188
1209
  node_by_offset = defaultdict(set)
1189
1210
 
@@ -1195,16 +1216,33 @@ class SimpleSolver:
1195
1216
  else:
1196
1217
  node_by_offset[last_label.offset].add(succ)
1197
1218
 
1198
- for offset, child_nodes in node_by_offset.items():
1219
+ sorted_offsets: list[int] = sorted(node_by_offset)
1220
+ for i in range(len(sorted_offsets)): # pylint:disable=consider-using-enumerate
1221
+ offset = sorted_offsets[i]
1222
+
1223
+ child_nodes = node_by_offset[offset]
1199
1224
  sol = self._determine(equivalent_classes, the_typevar, sketch, solution, nodes=child_nodes)
1200
1225
  if isinstance(sol, TopType):
1201
- sol = int_type(min(candidate_bases[offset]) * 8)
1226
+ # make it an array if possible
1227
+ elem_size = min(offset_to_sizes[offset])
1228
+ array_size = offset_to_maxsize[offset]
1229
+ if array_size % elem_size != 0:
1230
+ # fall back to byte_t
1231
+ elem_size = 1
1232
+ elem_type = int_type(elem_size * 8)
1233
+ sol = elem_type if array_size == elem_size else Array(elem_type, array_size // elem_size)
1202
1234
  fields[offset] = sol
1203
1235
 
1204
1236
  if not fields:
1205
1237
  result = Top_
1206
1238
  for node in nodes:
1207
1239
  self._solution_cache[node.typevar] = result
1240
+ solution[node.typevar] = result
1241
+ elif any(off < 0 for off in fields):
1242
+ result = self._pointer_class()(Bottom_)
1243
+ for node in nodes:
1244
+ self._solution_cache[node.typevar] = result
1245
+ solution[node.typevar] = result
1208
1246
  else:
1209
1247
  # back-patch
1210
1248
  struct_type.fields = fields