angr 9.2.136__py3-none-macosx_11_0_arm64.whl → 9.2.138__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (65) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/calling_convention/calling_convention.py +2 -1
  3. angr/analyses/calling_convention/fact_collector.py +10 -2
  4. angr/analyses/cfg/cfg_base.py +3 -33
  5. angr/analyses/cfg/cfg_emulated.py +0 -103
  6. angr/analyses/cfg/cfg_fast.py +31 -12
  7. angr/analyses/cfg/indirect_jump_resolvers/jumptable.py +15 -0
  8. angr/analyses/class_identifier.py +1 -2
  9. angr/analyses/complete_calling_conventions.py +6 -3
  10. angr/analyses/decompiler/ail_simplifier.py +12 -1
  11. angr/analyses/decompiler/block_simplifier.py +2 -2
  12. angr/analyses/decompiler/ccall_rewriters/__init__.py +2 -0
  13. angr/analyses/decompiler/ccall_rewriters/amd64_ccalls.py +1 -1
  14. angr/analyses/decompiler/ccall_rewriters/x86_ccalls.py +69 -0
  15. angr/analyses/decompiler/clinic.py +77 -65
  16. angr/analyses/decompiler/condition_processor.py +2 -0
  17. angr/analyses/decompiler/decompilation_options.py +10 -0
  18. angr/analyses/decompiler/decompiler.py +1 -0
  19. angr/analyses/decompiler/dephication/dephication_base.py +2 -0
  20. angr/analyses/decompiler/dephication/rewriting_engine.py +8 -6
  21. angr/analyses/decompiler/dephication/seqnode_dephication.py +10 -1
  22. angr/analyses/decompiler/optimization_passes/flip_boolean_cmp.py +2 -2
  23. angr/analyses/decompiler/optimization_passes/return_duplicator_base.py +1 -2
  24. angr/analyses/decompiler/peephole_optimizations/remove_redundant_nots.py +21 -3
  25. angr/analyses/decompiler/sequence_walker.py +6 -2
  26. angr/analyses/decompiler/ssailification/rewriting.py +11 -1
  27. angr/analyses/decompiler/ssailification/rewriting_engine.py +56 -19
  28. angr/analyses/decompiler/ssailification/ssailification.py +13 -3
  29. angr/analyses/decompiler/ssailification/traversal.py +28 -2
  30. angr/analyses/decompiler/ssailification/traversal_state.py +6 -1
  31. angr/analyses/decompiler/structured_codegen/c.py +44 -21
  32. angr/analyses/decompiler/structuring/phoenix.py +117 -14
  33. angr/analyses/decompiler/utils.py +113 -8
  34. angr/analyses/reaching_definitions/function_handler.py +1 -1
  35. angr/analyses/s_liveness.py +5 -1
  36. angr/analyses/s_propagator.py +127 -28
  37. angr/analyses/s_reaching_definitions/s_rda_model.py +2 -1
  38. angr/analyses/s_reaching_definitions/s_rda_view.py +20 -1
  39. angr/analyses/s_reaching_definitions/s_reaching_definitions.py +11 -1
  40. angr/analyses/stack_pointer_tracker.py +26 -16
  41. angr/analyses/variable_recovery/engine_ail.py +19 -7
  42. angr/analyses/variable_recovery/engine_base.py +16 -14
  43. angr/analyses/variable_recovery/engine_vex.py +2 -2
  44. angr/analyses/variable_recovery/variable_recovery_fast.py +22 -1
  45. angr/block.py +59 -20
  46. angr/engines/pcode/emulate.py +1 -1
  47. angr/engines/pcode/lifter.py +31 -18
  48. angr/engines/soot/expressions/__init__.py +2 -4
  49. angr/engines/soot/statements/__init__.py +1 -2
  50. angr/engines/soot/values/__init__.py +1 -2
  51. angr/engines/successors.py +11 -6
  52. angr/engines/vex/lifter.py +9 -6
  53. angr/flirt/build_sig.py +8 -15
  54. angr/knowledge_plugins/functions/function.py +0 -6
  55. angr/knowledge_plugins/functions/soot_function.py +5 -8
  56. angr/knowledge_plugins/variables/variable_manager.py +16 -10
  57. angr/lib/angr_native.dylib +0 -0
  58. angr/procedures/glibc/__libc_start_main.py +10 -3
  59. angr/utils/ssa/__init__.py +14 -1
  60. {angr-9.2.136.dist-info → angr-9.2.138.dist-info}/METADATA +7 -7
  61. {angr-9.2.136.dist-info → angr-9.2.138.dist-info}/RECORD +65 -64
  62. {angr-9.2.136.dist-info → angr-9.2.138.dist-info}/WHEEL +1 -1
  63. {angr-9.2.136.dist-info → angr-9.2.138.dist-info}/LICENSE +0 -0
  64. {angr-9.2.136.dist-info → angr-9.2.138.dist-info}/entry_points.txt +0 -0
  65. {angr-9.2.136.dist-info → angr-9.2.138.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import networkx
4
4
  from ailment.expression import VirtualVariable
5
- from ailment.statement import Assignment
5
+ from ailment.statement import Assignment, Call
6
6
 
7
7
  from angr.analyses import Analysis, register_analysis
8
8
  from angr.utils.ssa import VVarUsesCollector, phi_assignment_get_src
@@ -86,6 +86,8 @@ class SLivenessAnalysis(Analysis):
86
86
  # handle assignments: a defined vvar is not live before the assignment
87
87
  if isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable):
88
88
  live.discard(stmt.dst.varid)
89
+ elif isinstance(stmt, Call) and isinstance(stmt.ret_expr, VirtualVariable):
90
+ live.discard(stmt.ret_expr.varid)
89
91
 
90
92
  phi_expr = phi_assignment_get_src(stmt)
91
93
  if phi_expr is not None:
@@ -136,6 +138,8 @@ class SLivenessAnalysis(Analysis):
136
138
  for stmt in reversed(block.statements):
137
139
  if isinstance(stmt, Assignment) and isinstance(stmt.dst, VirtualVariable):
138
140
  def_vvar = stmt.dst.varid
141
+ elif isinstance(stmt, Call) and isinstance(stmt.ret_expr, VirtualVariable):
142
+ def_vvar = stmt.ret_expr.varid
139
143
  else:
140
144
  def_vvar = None
141
145
 
@@ -4,6 +4,8 @@ import contextlib
4
4
  from collections.abc import Mapping
5
5
  from collections import defaultdict
6
6
 
7
+ import networkx
8
+
7
9
  from ailment.block import Block
8
10
  from ailment.expression import (
9
11
  Const,
@@ -17,11 +19,13 @@ from ailment.expression import (
17
19
  from ailment.statement import Assignment, Store, Return, Jump
18
20
 
19
21
  from angr.knowledge_plugins.functions import Function
20
- from angr.code_location import CodeLocation
22
+ from angr.code_location import CodeLocation, ExternalCodeLocation
21
23
  from angr.analyses import Analysis, register_analysis
22
24
  from angr.utils.ssa import (
23
25
  get_vvar_uselocs,
24
26
  get_vvar_deflocs,
27
+ has_ite_expr,
28
+ has_ite_stmt,
25
29
  is_phi_assignment,
26
30
  is_const_assignment,
27
31
  is_const_and_vvar_assignment,
@@ -51,9 +55,10 @@ class SPropagatorAnalysis(Analysis):
51
55
  def __init__( # pylint: disable=too-many-positional-arguments
52
56
  self,
53
57
  subject: Block | Function,
54
- func_graph=None,
58
+ func_graph: networkx.DiGraph | None = None,
55
59
  only_consts: bool = True,
56
60
  stack_pointer_tracker=None,
61
+ func_args: set[VirtualVariable] | None = None,
57
62
  func_addr: int | None = None,
58
63
  ):
59
64
  if isinstance(subject, Block):
@@ -69,6 +74,7 @@ class SPropagatorAnalysis(Analysis):
69
74
 
70
75
  self.func_graph = func_graph
71
76
  self.func_addr = func_addr
77
+ self.func_args = func_args
72
78
  self.only_consts = only_consts
73
79
  self._sp_tracker = stack_pointer_tracker
74
80
 
@@ -109,6 +115,11 @@ class SPropagatorAnalysis(Analysis):
109
115
  # find all vvar uses
110
116
  vvar_uselocs = get_vvar_uselocs(blocks.values())
111
117
 
118
+ # update vvar_deflocs using function arguments
119
+ if self.func_args:
120
+ for func_arg in self.func_args:
121
+ vvar_deflocs[func_arg] = ExternalCodeLocation()
122
+
112
123
  # find all ret sites and indirect jump sites
113
124
  retsites: set[tuple[int, int | None, int]] = set()
114
125
  jumpsites: set[tuple[int, int | None, int]] = set()
@@ -130,8 +141,12 @@ class SPropagatorAnalysis(Analysis):
130
141
 
131
142
  vvarid_to_vvar[vvar.varid] = vvar
132
143
  defloc = vvar_deflocs[vvar]
144
+ if isinstance(defloc, ExternalCodeLocation):
145
+ continue
146
+
133
147
  assert defloc.block_addr is not None
134
148
  assert defloc.stmt_idx is not None
149
+
135
150
  block = blocks[(defloc.block_addr, defloc.block_idx)]
136
151
  stmt = block.statements[defloc.stmt_idx]
137
152
  r, v = is_const_assignment(stmt)
@@ -147,56 +162,94 @@ class SPropagatorAnalysis(Analysis):
147
162
  if v is not None:
148
163
  src_varids = {vvar.varid if vvar is not None else None for _, vvar in v.src_and_vvars}
149
164
  if None not in src_varids and all(varid in const_vvars for varid in src_varids):
165
+ all_int_src_varids: set[int] = {varid for varid in src_varids if varid is not None}
150
166
  src_values = {
151
167
  (
152
168
  (const_vvars[varid].value, const_vvars[varid].bits)
153
169
  if isinstance(const_vvars[varid], Const)
154
170
  else const_vvars[varid]
155
171
  )
156
- for varid in src_varids
172
+ for varid in all_int_src_varids
157
173
  }
158
174
  if len(src_values) == 1:
159
175
  # replace it!
160
- const_value = const_vvars[next(iter(src_varids))]
176
+ const_value = const_vvars[next(iter(all_int_src_varids))]
161
177
  const_vvars[vvar.varid] = const_value
162
178
  for vvar_at_use, useloc in vvar_uselocs[vvar.varid]:
163
179
  replacements[useloc][vvar_at_use] = const_value
164
180
 
165
181
  if self.mode == "function" and vvar.varid in vvar_uselocs:
182
+ if len(vvar_uselocs[vvar.varid]) <= 2 and isinstance(stmt, Assignment) and isinstance(stmt.src, Load):
183
+ # do we want to propagate this Load expression if it's used for less than twice?
184
+ # it's often seen in the following pattern, where propagation will be beneficial:
185
+ # v0 = Load(...)
186
+ # if (!v0) {
187
+ # v1 = v0 + 1;
188
+ # }
189
+ can_replace = True
190
+ for _, vvar_useloc in vvar_uselocs[vvar.varid]:
191
+ if self.has_store_stmt_in_between(blocks, defloc, vvar_useloc):
192
+ can_replace = False
193
+
194
+ if can_replace:
195
+ # we can propagate this load because there is no store between its def and use
196
+ for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
197
+ replacements[vvar_useloc][vvar_used] = stmt.src
198
+ continue
199
+
166
200
  if len(vvar_uselocs[vvar.varid]) == 1:
167
201
  vvar_used, vvar_useloc = next(iter(vvar_uselocs[vvar.varid]))
168
- if (
169
- is_const_vvar_load_assignment(stmt)
170
- and vvar_useloc.block_addr == defloc.block_addr
171
- and vvar_useloc.block_idx == defloc.block_idx
172
- and not any(
173
- isinstance(stmt_, Store)
174
- for stmt_ in block.statements[defloc.stmt_idx + 1 : vvar_useloc.stmt_idx]
175
- )
202
+ if is_const_vvar_load_assignment(stmt) and not self.has_store_stmt_in_between(
203
+ blocks, defloc, vvar_useloc
176
204
  ):
177
205
  # we can propagate this load because there is no store between its def and use
178
206
  replacements[vvar_useloc][vvar_used] = stmt.src
179
207
  continue
180
208
 
181
209
  if is_const_and_vvar_assignment(stmt):
182
- replacements[vvar_useloc][vvar_used] = stmt.src
210
+ # if the useloc is a phi assignment statement, ensure that stmt.src is the same as the phi
211
+ # variable
212
+ assert vvar_useloc.block_addr is not None
213
+ assert vvar_useloc.stmt_idx is not None
214
+ useloc_stmt = blocks[(vvar_useloc.block_addr, vvar_useloc.block_idx)].statements[
215
+ vvar_useloc.stmt_idx
216
+ ]
217
+ if is_phi_assignment(useloc_stmt):
218
+ if (
219
+ isinstance(stmt.src, VirtualVariable)
220
+ and stmt.src.oident == useloc_stmt.dst.oident
221
+ and stmt.src.category == useloc_stmt.dst.category
222
+ ):
223
+ replacements[vvar_useloc][vvar_used] = stmt.src
224
+ else:
225
+ replacements[vvar_useloc][vvar_used] = stmt.src
183
226
  continue
184
227
 
185
- elif (
186
- len(
187
- {
188
- loc
189
- for _, loc in vvar_uselocs[vvar.varid]
190
- if (loc.block_addr, loc.block_idx, loc.stmt_idx) not in (retsites | jumpsites)
191
- }
192
- )
193
- == 1
194
- ):
228
+ else:
229
+ non_exitsite_uselocs = [
230
+ loc
231
+ for _, loc in vvar_uselocs[vvar.varid]
232
+ if (loc.block_addr, loc.block_idx, loc.stmt_idx) not in (retsites | jumpsites)
233
+ ]
195
234
  if is_const_and_vvar_assignment(stmt):
196
- # this vvar is used once if we exclude its uses at ret sites or jump sites. we can propagate it
197
- for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
198
- replacements[vvar_useloc][vvar_used] = stmt.src
199
- continue
235
+ if len(non_exitsite_uselocs) == 1:
236
+ # this vvar is used once if we exclude its uses at ret sites or jump sites. we can
237
+ # propagate it
238
+ for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
239
+ replacements[vvar_useloc][vvar_used] = stmt.src
240
+ continue
241
+
242
+ if len(set(non_exitsite_uselocs)) == 1 and not has_ite_expr(stmt.src):
243
+ useloc = non_exitsite_uselocs[0]
244
+ assert useloc.block_addr is not None
245
+ assert useloc.stmt_idx is not None
246
+ useloc_stmt = blocks[(useloc.block_addr, useloc.block_idx)].statements[useloc.stmt_idx]
247
+ if stmt.src.depth <= 3 and not has_ite_stmt(useloc_stmt):
248
+ # remove duplicate use locs (e.g., if the variable is used multiple times by the same
249
+ # statement) - but ensure stmt is simple enough
250
+ for vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
251
+ replacements[vvar_useloc][vvar_used] = stmt.src
252
+ continue
200
253
 
201
254
  # special logic for global variables: if it's used once or multiple times, and the variable is never
202
255
  # updated before it's used, we will propagate the load
@@ -205,7 +258,11 @@ class SPropagatorAnalysis(Analysis):
205
258
  # unpack conversions
206
259
  while isinstance(stmt_src, Convert):
207
260
  stmt_src = stmt_src.operand
208
- if isinstance(stmt_src, Load) and isinstance(stmt_src.addr, Const):
261
+ if (
262
+ isinstance(stmt_src, Load)
263
+ and isinstance(stmt_src.addr, Const)
264
+ and isinstance(stmt_src.addr.value, int)
265
+ ):
209
266
  gv_updated = False
210
267
  for _vvar_used, vvar_useloc in vvar_uselocs[vvar.varid]:
211
268
  gv_updated |= self.is_global_variable_updated(
@@ -265,6 +322,8 @@ class SPropagatorAnalysis(Analysis):
265
322
  for block_loc, tmp_and_uses in tmp_uselocs.items():
266
323
  for tmp_atom, tmp_uses in tmp_and_uses.items():
267
324
  # take a look at the definition and propagate the definition if supported
325
+ assert block_loc.block_addr is not None
326
+
268
327
  block = blocks[(block_loc.block_addr, block_loc.block_idx)]
269
328
  tmp_def_stmtidx = tmp_deflocs[block_loc][tmp_atom]
270
329
 
@@ -329,6 +388,8 @@ class SPropagatorAnalysis(Analysis):
329
388
 
330
389
  start_stmt_idx = defloc.stmt_idx if block is defblock else 0 # inclusive
331
390
  end_stmt_idx = useloc.stmt_idx if block is useblock else len(block.statements) # exclusive
391
+ assert start_stmt_idx is not None
392
+ assert end_stmt_idx is not None
332
393
 
333
394
  for idx in range(start_stmt_idx, end_stmt_idx):
334
395
  stmt = block.statements[idx]
@@ -359,5 +420,43 @@ class SPropagatorAnalysis(Analysis):
359
420
 
360
421
  return False
361
422
 
423
+ def has_store_stmt_in_between(
424
+ self, blocks: dict[tuple[int, int | None], Block], defloc: CodeLocation, useloc: CodeLocation
425
+ ) -> bool:
426
+ assert defloc.block_addr is not None
427
+ assert defloc.stmt_idx is not None
428
+ assert useloc.block_addr is not None
429
+ assert useloc.stmt_idx is not None
430
+ assert self.func_graph is not None
431
+
432
+ use_block = blocks[(useloc.block_addr, useloc.block_idx)]
433
+ def_block = blocks[(defloc.block_addr, defloc.block_idx)]
434
+
435
+ # traverse the graph, go from use_block until we reach def_block, and look for Store statements
436
+ seen = {use_block}
437
+ queue = [use_block]
438
+ while queue:
439
+ block = queue.pop(0)
440
+
441
+ starting_stmt_idx, ending_stmt_idx = 0, len(block.statements)
442
+ if block is def_block:
443
+ starting_stmt_idx = defloc.stmt_idx + 1
444
+ if block is use_block:
445
+ ending_stmt_idx = useloc.stmt_idx + 1
446
+
447
+ for i in range(starting_stmt_idx, ending_stmt_idx):
448
+ if isinstance(block.statements[i], Store):
449
+ return True
450
+
451
+ if block is def_block:
452
+ continue
453
+
454
+ for pred in self.func_graph.predecessors(block):
455
+ if pred not in seen:
456
+ seen.add(pred)
457
+ queue.append(pred)
458
+
459
+ return False
460
+
362
461
 
363
462
  register_analysis(SPropagatorAnalysis, "SPropagator")
@@ -15,8 +15,9 @@ class SRDAModel:
15
15
  The model for SRDA.
16
16
  """
17
17
 
18
- def __init__(self, func_graph, arch):
18
+ def __init__(self, func_graph, func_args, arch):
19
19
  self.func_graph = func_graph
20
+ self.func_args = func_args
20
21
  self.arch = arch
21
22
  self.varid_to_vvar: dict[int, VirtualVariable] = {}
22
23
  self.all_vvar_definitions: dict[VirtualVariable, CodeLocation] = {}
@@ -4,7 +4,7 @@ import logging
4
4
  from collections import defaultdict
5
5
 
6
6
  from ailment.statement import Statement, Assignment, Call, Label
7
- from ailment.expression import VirtualVariable, Expression
7
+ from ailment.expression import VirtualVariable, VirtualVariableCategory, Expression
8
8
 
9
9
  from angr.utils.ail import is_phi_assignment
10
10
  from angr.utils.graph import GraphUtils
@@ -133,6 +133,16 @@ class SRDAView:
133
133
  predicater = RegVVarPredicate(reg_offset, vvars, self.model.arch)
134
134
  self._get_vvar_by_stmt(block_addr, block_idx, stmt_idx, op_type, predicater.predicate)
135
135
 
136
+ if not vvars:
137
+ # not found - check function arguments
138
+ for func_arg in self.model.func_args:
139
+ if isinstance(func_arg, VirtualVariable):
140
+ func_arg_category = func_arg.oident[0]
141
+ if func_arg_category == VirtualVariableCategory.REGISTER:
142
+ func_arg_regoff = func_arg.oident[1]
143
+ if func_arg_regoff == reg_offset:
144
+ vvars.add(func_arg)
145
+
136
146
  assert len(vvars) <= 1
137
147
  return next(iter(vvars), None)
138
148
 
@@ -149,6 +159,15 @@ class SRDAView:
149
159
  predicater = StackVVarPredicate(stack_offset, size, vvars)
150
160
  self._get_vvar_by_stmt(block_addr, block_idx, stmt_idx, op_type, predicater.predicate)
151
161
 
162
+ if not vvars:
163
+ # not found - check function arguments
164
+ for func_arg in self.model.func_args:
165
+ if isinstance(func_arg, VirtualVariable):
166
+ func_arg_category = func_arg.oident[0]
167
+ if func_arg_category == VirtualVariableCategory.STACK:
168
+ func_arg_stackoff = func_arg.oident[1]
169
+ if func_arg_stackoff == stack_offset and func_arg.size == size:
170
+ vvars.add(func_arg)
152
171
  assert len(vvars) <= 1
153
172
  return next(iter(vvars), None)
154
173
 
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  from ailment.block import Block
4
4
  from ailment.statement import Assignment, Call, Return
5
+ from ailment.expression import VirtualVariable
5
6
  import networkx
6
7
 
7
8
  from angr.knowledge_plugins.functions import Function
@@ -24,6 +25,7 @@ class SReachingDefinitionsAnalysis(Analysis):
24
25
  subject,
25
26
  func_addr: int | None = None,
26
27
  func_graph: networkx.DiGraph[Block] | None = None,
28
+ func_args: set[VirtualVariable] | None = None,
27
29
  track_tmps: bool = False,
28
30
  ):
29
31
  if isinstance(subject, Block):
@@ -39,13 +41,14 @@ class SReachingDefinitionsAnalysis(Analysis):
39
41
 
40
42
  self.func_graph = func_graph
41
43
  self.func_addr = func_addr if func_addr is not None else self.func.addr if self.func is not None else None
44
+ self.func_args = func_args
42
45
  self._track_tmps = track_tmps
43
46
 
44
47
  self._bp_as_gpr = False
45
48
  if self.func is not None:
46
49
  self._bp_as_gpr = self.func.info.get("bp_as_gpr", False)
47
50
 
48
- self.model = SRDAModel(func_graph, self.project.arch)
51
+ self.model = SRDAModel(func_graph, func_args, self.project.arch)
49
52
 
50
53
  self._analyze()
51
54
 
@@ -66,6 +69,13 @@ class SReachingDefinitionsAnalysis(Analysis):
66
69
  # find all explicit vvar uses
67
70
  vvar_uselocs = get_vvar_uselocs(blocks.values())
68
71
 
72
+ # update vvar definitions using function arguments
73
+ if self.func_args:
74
+ for vvar in self.func_args:
75
+ if vvar not in vvar_deflocs:
76
+ vvar_deflocs[vvar] = ExternalCodeLocation()
77
+ self.model.func_args = self.func_args
78
+
69
79
  # update model
70
80
  for vvar, defloc in vvar_deflocs.items():
71
81
  self.model.varid_to_vvar[vvar.varid] = vvar
@@ -2,6 +2,7 @@
2
2
  from __future__ import annotations
3
3
 
4
4
  from typing import Any, TYPE_CHECKING
5
+ import contextlib
5
6
  import re
6
7
  import logging
7
8
  from collections import defaultdict
@@ -15,7 +16,6 @@ from angr.knowledge_plugins import Function
15
16
  from angr.block import BlockNode
16
17
  from angr.errors import SimTranslationError
17
18
  from .analysis import Analysis
18
- import contextlib
19
19
 
20
20
  try:
21
21
  import pypcode
@@ -702,21 +702,31 @@ class StackPointerTracker(Analysis, ForwardAnalysis):
702
702
  # who are we calling?
703
703
  callees = [] if self._func is None else self._find_callees(node)
704
704
  if callees:
705
- if (
706
- len(callees) == 1
707
- and callees[0].info.get("is_rust_probestack", False) is True
708
- and self.project.arch.name == "AMD64"
709
- ):
710
- # special-case for rust_probestack: sp = sp - rax right after returning from the call, so we need
711
- # to keep track of rax
712
- for stmt in reversed(vex_block.statements):
713
- if (
714
- isinstance(stmt, pyvex.IRStmt.Put)
715
- and stmt.offset == self.project.arch.registers["rax"][0]
716
- and isinstance(stmt.data, pyvex.IRExpr.Const)
717
- ):
718
- state.put(stmt.offset, Constant(stmt.data.con.value), force=True)
719
- break
705
+ if len(callees) == 1:
706
+ callee = callees[0]
707
+
708
+ if callee.info.get("is_rust_probestack", False) is True and self.project.arch.name == "AMD64":
709
+ # special-case for rust_probestack: sp = sp - rax right after returning from the call, so we
710
+ # need to keep track of rax
711
+ for stmt in reversed(vex_block.statements):
712
+ if (
713
+ isinstance(stmt, pyvex.IRStmt.Put)
714
+ and stmt.offset == self.project.arch.registers["rax"][0]
715
+ and isinstance(stmt.data, pyvex.IRExpr.Const)
716
+ ):
717
+ state.put(stmt.offset, Constant(stmt.data.con.value), force=True)
718
+ break
719
+ elif callee.name == "__chkstk":
720
+ # special-case for __chkstk: sp = sp - rax right after returning from the call, so we need to
721
+ # keep track of rax
722
+ for stmt in reversed(vex_block.statements):
723
+ if (
724
+ isinstance(stmt, pyvex.IRStmt.Put)
725
+ and stmt.offset == self.project.arch.registers["rax"][0]
726
+ and isinstance(stmt.data, pyvex.IRExpr.Const)
727
+ ):
728
+ state.put(stmt.offset, Constant(stmt.data.con.value), force=True)
729
+ break
720
730
 
721
731
  callee_cleanups = [
722
732
  callee
@@ -74,17 +74,28 @@ class SimEngineVRAIL(
74
74
 
75
75
  elif dst_type is ailment.Expr.VirtualVariable:
76
76
  data = self._expr(stmt.src)
77
- self._assign_to_vvar(
77
+ variable = self._assign_to_vvar(
78
78
  stmt.dst, data, src=stmt.src, dst=stmt.dst, vvar_id=self._mapped_vvarid(stmt.dst.varid)
79
79
  )
80
80
 
81
+ if variable is not None and isinstance(stmt.src, ailment.Expr.Phi):
82
+ # this is a phi node - we update variable manager's phi variable tracking
83
+ for _, vvar in stmt.src.src_and_vvars:
84
+ if vvar is not None:
85
+ r = self._read_from_vvar(vvar, expr=stmt.src, vvar_id=self._mapped_vvarid(vvar.varid))
86
+ if r.variable is not None:
87
+ pv = self.variable_manager[self.func_addr]._phi_variables
88
+ if variable not in pv:
89
+ pv[variable] = set()
90
+ pv[variable].add(r.variable)
91
+
81
92
  if stmt.dst.was_stack and isinstance(stmt.dst.stack_offset, int):
82
93
  # store it to the stack region in case it's directly referenced later
83
94
  self._store(
84
95
  RichR(self.state.stack_address(stmt.dst.stack_offset)),
85
96
  data,
86
97
  stmt.dst.bits // self.arch.byte_width,
87
- stmt=stmt,
98
+ atom=stmt.dst,
88
99
  )
89
100
 
90
101
  else:
@@ -94,10 +105,11 @@ class SimEngineVRAIL(
94
105
  addr_r = self._expr_bv(stmt.addr)
95
106
  data = self._expr(stmt.data)
96
107
  size = stmt.size
97
- self._store(addr_r, data, size, stmt=stmt)
108
+ self._store(addr_r, data, size, atom=stmt)
98
109
 
99
- def _handle_stmt_Jump(self, stmt):
100
- pass
110
+ def _handle_stmt_Jump(self, stmt: ailment.Stmt.Jump):
111
+ if not isinstance(stmt.target, ailment.Expr.Const):
112
+ self._expr(stmt.target)
101
113
 
102
114
  def _handle_stmt_ConditionalJump(self, stmt):
103
115
  self._expr(stmt.condition)
@@ -149,7 +161,7 @@ class SimEngineVRAIL(
149
161
  prototype_libname = func.prototype_libname
150
162
 
151
163
  # dump the type of the return value
152
- ret_ty = typevars.TypeVariable() if prototype is not None else typevars.TypeVariable()
164
+ ret_ty = typevars.TypeVariable()
153
165
  if isinstance(ret_ty, typeconsts.BottomType):
154
166
  ret_ty = typevars.TypeVariable()
155
167
 
@@ -218,7 +230,7 @@ class SimEngineVRAIL(
218
230
  prototype_libname = func.prototype_libname
219
231
 
220
232
  # dump the type of the return value
221
- ret_ty = typevars.TypeVariable() if prototype is not None else typevars.TypeVariable()
233
+ ret_ty = typevars.TypeVariable()
222
234
  if isinstance(ret_ty, typeconsts.BottomType):
223
235
  ret_ty = typevars.TypeVariable()
224
236
 
@@ -387,7 +387,7 @@ class SimEngineVRBase(
387
387
  ) or not create_variable:
388
388
  # only store the value. don't worry about variables.
389
389
  self.vvar_region[vvar_id] = richr.data
390
- return
390
+ return None
391
391
 
392
392
  codeloc: CodeLocation = self._codeloc()
393
393
  data = richr.data
@@ -463,10 +463,14 @@ class SimEngineVRBase(
463
463
  else:
464
464
  typevar = self.state.typevars.get_type_variable(variable, codeloc)
465
465
  self.state.add_type_constraint(typevars.Subtype(richr.typevar, typevar))
466
+ # the constraint below is a default constraint that may conflict with more specific ones with different
467
+ # sizes; we post-process at the very end of VRA to remove conflicting default constraints.
466
468
  self.state.add_type_constraint(typevars.Subtype(typevar, typeconsts.int_type(variable.size * 8)))
467
469
 
470
+ return variable
471
+
468
472
  def _store(
469
- self, richr_addr: RichR[claripy.ast.BV], data: RichR[claripy.ast.BV | claripy.ast.FP], size, stmt=None
473
+ self, richr_addr: RichR[claripy.ast.BV], data: RichR[claripy.ast.BV | claripy.ast.FP], size, atom=None
470
474
  ): # pylint:disable=unused-argument
471
475
  """
472
476
 
@@ -481,19 +485,19 @@ class SimEngineVRBase(
481
485
 
482
486
  if addr.concrete:
483
487
  # fully concrete. this is a global address
484
- self._store_to_global(addr.concrete_value, data, size, stmt=stmt)
488
+ self._store_to_global(addr.concrete_value, data, size, stmt=atom)
485
489
  stored = True
486
490
  elif self._addr_has_concrete_base(addr) and (parsed := self._parse_offsetted_addr(addr)) is not None:
487
491
  # we are storing to a concrete global address with an offset
488
492
  base_addr, offset, elem_size = parsed
489
- self._store_to_global(base_addr.concrete_value, data, size, stmt=stmt, offset=offset, elem_size=elem_size)
493
+ self._store_to_global(base_addr.concrete_value, data, size, stmt=atom, offset=offset, elem_size=elem_size)
490
494
  stored = True
491
495
  else:
492
496
  if self.state.is_stack_address(addr):
493
497
  stack_offset = self.state.get_stack_offset(addr)
494
498
  if stack_offset is not None:
495
499
  # fast path: Storing data to stack
496
- self._store_to_stack(stack_offset, data, size, stmt=stmt)
500
+ self._store_to_stack(stack_offset, data, size, atom=atom)
497
501
  stored = True
498
502
 
499
503
  if not stored:
@@ -504,21 +508,21 @@ class SimEngineVRBase(
504
508
  codeloc = self._codeloc()
505
509
  if existing_vars:
506
510
  for existing_var, _ in list(existing_vars):
507
- self.variable_manager[self.func_addr].remove_variable_by_atom(codeloc, existing_var, stmt)
511
+ self.variable_manager[self.func_addr].remove_variable_by_atom(codeloc, existing_var, atom)
508
512
 
509
513
  # storing to a location specified by a pointer whose value cannot be determined at this point
510
- self._store_to_variable(richr_addr, size, stmt=stmt)
514
+ self._store_to_variable(richr_addr, size)
511
515
 
512
516
  def _store_to_stack(
513
- self, stack_offset, data: RichR[claripy.ast.BV | claripy.ast.FP], size, offset=0, stmt=None, endness=None
517
+ self, stack_offset, data: RichR[claripy.ast.BV | claripy.ast.FP], size, offset=0, atom=None, endness=None
514
518
  ):
515
- if stmt is None:
519
+ if atom is None:
516
520
  existing_vars = self.variable_manager[self.func_addr].find_variables_by_stmt(
517
521
  self.block.addr, self.stmt_idx, "memory"
518
522
  )
519
523
  else:
520
524
  existing_vars = self.variable_manager[self.func_addr].find_variables_by_atom(
521
- self.block.addr, self.stmt_idx, stmt
525
+ self.block.addr, self.stmt_idx, atom
522
526
  )
523
527
  if not existing_vars:
524
528
  variable = SimStackVariable(
@@ -562,7 +566,7 @@ class SimEngineVRBase(
562
566
  var,
563
567
  offset_into_var,
564
568
  codeloc,
565
- atom=stmt,
569
+ atom=atom,
566
570
  )
567
571
 
568
572
  # create type constraints
@@ -673,9 +677,7 @@ class SimEngineVRBase(
673
677
  self.state.add_type_constraint(typevars.Subtype(store_typevar, typeconsts.TopType()))
674
678
  self.state.add_type_constraint(typevars.Subtype(data.typevar, store_typevar))
675
679
 
676
- def _store_to_variable(
677
- self, richr_addr: RichR[claripy.ast.BV], size: int, stmt=None
678
- ): # pylint:disable=unused-argument
680
+ def _store_to_variable(self, richr_addr: RichR[claripy.ast.BV], size: int):
679
681
  addr_variable = richr_addr.variable
680
682
  codeloc = self._codeloc()
681
683
 
@@ -74,7 +74,7 @@ class SimEngineVRVEX(
74
74
  size = stmt.data.result_size(self.tyenv) // 8
75
75
  r = self._expr(stmt.data)
76
76
 
77
- self._store(addr_r, r, size, stmt=stmt)
77
+ self._store(addr_r, r, size, atom=stmt)
78
78
 
79
79
  def _handle_stmt_StoreG(self, stmt):
80
80
  guard = self._expr(stmt.guard)
@@ -82,7 +82,7 @@ class SimEngineVRVEX(
82
82
  addr = self._expr_bv(stmt.addr)
83
83
  size = stmt.data.result_size(self.tyenv) // 8
84
84
  data = self._expr(stmt.data)
85
- self._store(addr, data, size, stmt=stmt)
85
+ self._store(addr, data, size, atom=stmt)
86
86
 
87
87
  def _handle_stmt_LoadG(self, stmt):
88
88
  guard = self._expr(stmt.guard)
@@ -20,7 +20,8 @@ from angr.knowledge_plugins import Function
20
20
  from angr.sim_variable import SimStackVariable, SimRegisterVariable, SimVariable, SimMemoryVariable
21
21
  from angr.engines.vex.claripy.irop import vexop_to_simop
22
22
  from angr.analyses import ForwardAnalysis, visitors
23
- from angr.analyses.typehoon.typevars import Equivalence, TypeVariable, TypeVariables
23
+ from angr.analyses.typehoon.typevars import Equivalence, TypeVariable, TypeVariables, Subtype, DerivedTypeVariable
24
+ from angr.analyses.typehoon.typeconsts import Int
24
25
  from .variable_recovery_base import VariableRecoveryBase, VariableRecoveryStateBase
25
26
  from .engine_vex import SimEngineVRVEX
26
27
  from .engine_ail import SimEngineVRAIL
@@ -500,6 +501,26 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
500
501
  for tv in sorted_typevars[1:]:
501
502
  self.type_constraints[self.func_typevar].add(Equivalence(sorted_typevars[0], tv))
502
503
 
504
+ # remove default constraints with size conflicts
505
+ for func_var in self.type_constraints:
506
+ var_to_subtyping: dict[TypeVariable, list[Subtype]] = defaultdict(list)
507
+ for constraint in self.type_constraints[func_var]:
508
+ if isinstance(constraint, Subtype) and isinstance(constraint.sub_type, TypeVariable):
509
+ var_to_subtyping[constraint.sub_type].append(constraint)
510
+
511
+ for constraints in var_to_subtyping.values():
512
+ if len(constraints) <= 1:
513
+ continue
514
+ default_subtyping_constraints = set()
515
+ has_nondefault_subtyping_constraints = False
516
+ for constraint in constraints:
517
+ if isinstance(constraint.super_type, Int):
518
+ default_subtyping_constraints.add(constraint)
519
+ elif isinstance(constraint.super_type, DerivedTypeVariable) and constraint.super_type.labels:
520
+ has_nondefault_subtyping_constraints = True
521
+ if has_nondefault_subtyping_constraints:
522
+ self.type_constraints[func_var].difference_update(default_subtyping_constraints)
523
+
503
524
  self.variable_manager[self.function.addr].ret_val_size = self.ret_val_size
504
525
 
505
526
  self.delayed_type_constraints = None