angr 9.2.61__py3-none-win_amd64.whl → 9.2.63__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (37) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/cfg/cfg_fast.py +18 -20
  3. angr/analyses/decompiler/ail_simplifier.py +15 -30
  4. angr/analyses/decompiler/block_simplifier.py +11 -7
  5. angr/analyses/decompiler/clinic.py +144 -0
  6. angr/analyses/decompiler/condition_processor.py +4 -2
  7. angr/analyses/decompiler/peephole_optimizations/eager_eval.py +2 -0
  8. angr/analyses/decompiler/structured_codegen/c.py +20 -16
  9. angr/analyses/decompiler/structuring/phoenix.py +6 -10
  10. angr/analyses/disassembly.py +7 -11
  11. angr/analyses/propagator/engine_ail.py +14 -42
  12. angr/analyses/propagator/outdated_definition_walker.py +17 -53
  13. angr/analyses/propagator/propagator.py +1 -0
  14. angr/analyses/reaching_definitions/engine_ail.py +20 -9
  15. angr/analyses/reaching_definitions/engine_vex.py +20 -5
  16. angr/analyses/reaching_definitions/function_handler.py +9 -3
  17. angr/analyses/reaching_definitions/rd_state.py +34 -13
  18. angr/analyses/reaching_definitions/reaching_definitions.py +92 -16
  19. angr/analyses/variable_recovery/engine_vex.py +20 -0
  20. angr/analyses/variable_recovery/variable_recovery_fast.py +5 -0
  21. angr/calling_conventions.py +28 -0
  22. angr/knowledge_plugins/key_definitions/live_definitions.py +15 -5
  23. angr/knowledge_plugins/key_definitions/liveness.py +94 -0
  24. angr/knowledge_plugins/key_definitions/rd_model.py +90 -9
  25. angr/knowledge_plugins/propagations/states.py +3 -0
  26. angr/knowledge_plugins/variables/variable_manager.py +26 -4
  27. angr/lib/angr_native.dll +0 -0
  28. angr/project.py +2 -2
  29. angr/sim_type.py +2 -2
  30. angr/state_plugins/unicorn_engine.py +9 -1
  31. angr/storage/memory_mixins/multi_value_merger_mixin.py +7 -2
  32. angr/utils/timing.py +12 -5
  33. {angr-9.2.61.dist-info → angr-9.2.63.dist-info}/METADATA +15 -15
  34. {angr-9.2.61.dist-info → angr-9.2.63.dist-info}/RECORD +37 -36
  35. {angr-9.2.61.dist-info → angr-9.2.63.dist-info}/WHEEL +1 -1
  36. {angr-9.2.61.dist-info → angr-9.2.63.dist-info}/LICENSE +0 -0
  37. {angr-9.2.61.dist-info → angr-9.2.63.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,16 @@
1
+ # pylint:disable=consider-using-in
1
2
  from typing import Optional, Callable, TYPE_CHECKING
2
3
 
3
4
  from ailment import Block, Stmt, Expr, AILBlockWalker
4
5
 
5
- from ...errors import SimMemoryMissingError
6
6
  from ...code_location import CodeLocation
7
+ from ...knowledge_plugins.key_definitions.constants import OP_BEFORE, OP_AFTER
8
+ from ...knowledge_plugins.key_definitions import atoms
7
9
 
8
10
  if TYPE_CHECKING:
9
11
  from archinfo import Arch
10
12
  from .propagator import PropagatorAILState
11
- from angr.storage.memory_mixins.paged_memory.pages.multi_values import MultiValues
12
- from angr.knowledge_plugins.key_definitions import LiveDefinitions
13
+ from angr.analyses.reaching_definitions import ReachingDefinitionsModel
13
14
 
14
15
 
15
16
  class OutdatedDefinitionWalker(AILBlockWalker):
@@ -21,20 +22,17 @@ class OutdatedDefinitionWalker(AILBlockWalker):
21
22
  self,
22
23
  expr,
23
24
  expr_defat: CodeLocation,
24
- livedefs_defat: "LiveDefinitions",
25
25
  current_loc: CodeLocation,
26
- livedefs_currentloc: "LiveDefinitions",
27
26
  state: "PropagatorAILState",
28
27
  arch: "Arch",
29
28
  avoid: Optional[Expr.Expression] = None,
30
29
  extract_offset_to_sp: Callable = None,
30
+ rda: "ReachingDefinitionsModel" = None,
31
31
  ):
32
32
  super().__init__()
33
33
  self.expr = expr
34
34
  self.expr_defat = expr_defat
35
- self.livedefs_defat = livedefs_defat
36
35
  self.current_loc = current_loc
37
- self.livedefs_currentloc = livedefs_currentloc
38
36
  self.state = state
39
37
  self.avoid = avoid
40
38
  self.arch = arch
@@ -45,6 +43,7 @@ class OutdatedDefinitionWalker(AILBlockWalker):
45
43
  self.expr_handlers[Expr.VEXCCallExpression] = self._handle_VEXCCallExpression
46
44
  self.out_dated = False
47
45
  self.has_avoid = False
46
+ self.rda = rda
48
47
 
49
48
  # pylint:disable=unused-argument
50
49
  def _handle_Tmp(self, expr_idx: int, expr: Expr.Tmp, stmt_idx: int, stmt: Stmt.Assignment, block: Optional[Block]):
@@ -63,19 +62,8 @@ class OutdatedDefinitionWalker(AILBlockWalker):
63
62
  self.has_avoid = True
64
63
 
65
64
  # is the used register still alive at this point?
66
- try:
67
- reg_vals: "MultiValues" = self.livedefs_defat.register_definitions.load(expr.reg_offset, size=expr.size)
68
- defs_defat = list(self.livedefs_defat.extract_defs_from_mv(reg_vals))
69
- except SimMemoryMissingError:
70
- defs_defat = []
71
-
72
- try:
73
- reg_vals: "MultiValues" = self.livedefs_currentloc.register_definitions.load(
74
- expr.reg_offset, size=expr.size
75
- )
76
- defs_currentloc = list(self.livedefs_currentloc.extract_defs_from_mv(reg_vals))
77
- except SimMemoryMissingError:
78
- defs_currentloc = []
65
+ defs_defat = self.rda.get_defs(atoms.Register(expr.reg_offset, expr.size), self.expr_defat, OP_AFTER)
66
+ defs_currentloc = self.rda.get_defs(atoms.Register(expr.reg_offset, expr.size), self.current_loc, OP_BEFORE)
79
67
 
80
68
  codelocs_defat = {def_.codeloc for def_ in defs_defat}
81
69
  codelocs_currentloc = {def_.codeloc for def_ in defs_currentloc}
@@ -83,31 +71,19 @@ class OutdatedDefinitionWalker(AILBlockWalker):
83
71
  self.out_dated = True
84
72
 
85
73
  def _handle_Load(self, expr_idx: int, expr: Expr.Load, stmt_idx: int, stmt: Stmt.Statement, block: Optional[Block]):
86
- if self.avoid is not None and ( # pylint:disable=consider-using-in
87
- expr == self.avoid or expr.addr == self.avoid
88
- ):
74
+ if self.avoid is not None and (expr == self.avoid or expr.addr == self.avoid):
89
75
  self.has_avoid = True
90
76
 
91
77
  if isinstance(expr.addr, Expr.StackBaseOffset):
92
78
  sp_offset = self.extract_offset_to_sp(expr.addr)
93
79
 
94
80
  if sp_offset is not None:
95
- stack_addr = self.livedefs_defat.stack_offset_to_stack_addr(sp_offset)
96
- try:
97
- mem_vals: "MultiValues" = self.livedefs_defat.stack_definitions.load(
98
- stack_addr, size=expr.size, endness=expr.endness
99
- )
100
- defs_defat = list(self.livedefs_defat.extract_defs_from_mv(mem_vals))
101
- except SimMemoryMissingError:
102
- defs_defat = []
103
-
104
- try:
105
- mem_vals: "MultiValues" = self.livedefs_currentloc.stack_definitions.load(
106
- stack_addr, size=expr.size, endness=expr.endness
107
- )
108
- defs_currentloc = list(self.livedefs_defat.extract_defs_from_mv(mem_vals))
109
- except SimMemoryMissingError:
110
- defs_currentloc = []
81
+ defs_defat = self.rda.get_defs(
82
+ atoms.MemoryLocation(atoms.SpOffset(expr.bits, sp_offset), expr.size), self.expr_defat, OP_AFTER
83
+ )
84
+ defs_currentloc = self.rda.get_defs(
85
+ atoms.MemoryLocation(atoms.SpOffset(expr.bits, sp_offset), expr.size), self.current_loc, OP_BEFORE
86
+ )
111
87
 
112
88
  codelocs_defat = {def_.codeloc for def_ in defs_defat}
113
89
  codelocs_currentloc = {def_.codeloc for def_ in defs_currentloc}
@@ -126,21 +102,9 @@ class OutdatedDefinitionWalker(AILBlockWalker):
126
102
 
127
103
  elif isinstance(expr.addr, Expr.Const):
128
104
  mem_addr = expr.addr.value
129
- try:
130
- mem_vals: "MultiValues" = self.livedefs_defat.memory_definitions.load(
131
- mem_addr, size=expr.size, endness=expr.endness
132
- )
133
- defs_defat = list(self.livedefs_defat.extract_defs_from_mv(mem_vals))
134
- except SimMemoryMissingError:
135
- defs_defat = []
136
105
 
137
- try:
138
- mem_vals: "MultiValues" = self.livedefs_currentloc.memory_definitions.load(
139
- mem_addr, size=expr.size, endness=expr.endness
140
- )
141
- defs_currentloc = list(self.livedefs_defat.extract_defs_from_mv(mem_vals))
142
- except SimMemoryMissingError:
143
- defs_currentloc = []
106
+ defs_defat = self.rda.get_defs(atoms.MemoryLocation(mem_addr, expr.size), self.expr_defat, OP_AFTER)
107
+ defs_currentloc = self.rda.get_defs(atoms.MemoryLocation(mem_addr, expr.size), self.current_loc, OP_BEFORE)
144
108
 
145
109
  codelocs_defat = {def_.codeloc for def_ in defs_defat}
146
110
  codelocs_currentloc = {def_.codeloc for def_ in defs_currentloc}
@@ -257,6 +257,7 @@ class PropagatorAnalysis(ForwardAnalysis, Analysis): # pylint:disable=abstract-
257
257
  # make a copy of the state if it's not the initial state
258
258
  state = state.copy()
259
259
  state._equivalence.clear()
260
+ state.init_replacements()
260
261
  else:
261
262
  # clear self._initial_state so that we *do not* run this optimization again!
262
263
  self._initial_state = None
@@ -100,20 +100,26 @@ class SimEngineRDAIL(
100
100
 
101
101
  def _set_codeloc(self):
102
102
  # TODO do we want a better mechanism to specify context updates?
103
- self.state.move_codelocs(
104
- CodeLocation(
105
- self.block.addr,
106
- self.stmt_idx,
107
- ins_addr=self.ins_addr,
108
- block_idx=self.block.idx,
109
- context=self.state.codeloc.context,
110
- )
103
+ new_codeloc = CodeLocation(
104
+ self.block.addr,
105
+ self.stmt_idx,
106
+ ins_addr=self.ins_addr,
107
+ block_idx=self.block.idx,
108
+ context=self.state.codeloc.context,
111
109
  )
110
+ self.state.move_codelocs(new_codeloc)
111
+ self.state.analysis.model.at_new_stmt(new_codeloc)
112
112
 
113
113
  #
114
114
  # AIL statement handlers
115
115
  #
116
116
 
117
+ def _process_Stmt(self, whitelist=None):
118
+ super()._process_Stmt(whitelist=whitelist)
119
+
120
+ if self.state.analysis:
121
+ self.state.analysis.model.complete_loc()
122
+
117
123
  def _handle_Stmt(self, stmt):
118
124
  if self.state.analysis:
119
125
  self.state.analysis.stmt_observe(self.stmt_idx, stmt, self.block, self.state, OP_BEFORE)
@@ -250,6 +256,11 @@ class SimEngineRDAIL(
250
256
  ip = Register(self.arch.ip_offset, self.arch.bytes)
251
257
  self.state.kill_definitions(ip)
252
258
 
259
+ statement = self.block.statements[self.stmt_idx]
260
+ caller_will_handle_single_ret = True
261
+ if hasattr(statement, "dst") and statement.dst != stmt.ret_expr:
262
+ caller_will_handle_single_ret = False
263
+
253
264
  data = FunctionCallData(
254
265
  self.state.codeloc,
255
266
  self._function_handler.make_function_codeloc(
@@ -261,7 +272,7 @@ class SimEngineRDAIL(
261
272
  name=func_name,
262
273
  args_values=[self._expr(arg) for arg in stmt.args] if stmt.args is not None else None,
263
274
  redefine_locals=stmt.args is None and not is_expr,
264
- caller_will_handle_single_ret=True,
275
+ caller_will_handle_single_ret=caller_will_handle_single_ret,
265
276
  ret_atoms={Atom.from_ail_expr(stmt.ret_expr, self.arch)} if stmt.ret_expr is not None else None,
266
277
  )
267
278
 
@@ -90,9 +90,11 @@ class SimEngineRDVEX(
90
90
 
91
91
  def _set_codeloc(self):
92
92
  # TODO do we want a better mechanism to specify context updates?
93
- self.state.move_codelocs(
94
- CodeLocation(self.block.addr, self.stmt_idx, ins_addr=self.ins_addr, context=self.state.codeloc.context)
93
+ new_codeloc = CodeLocation(
94
+ self.block.addr, self.stmt_idx, ins_addr=self.ins_addr, context=self.state.codeloc.context
95
95
  )
96
+ self.state.move_codelocs(new_codeloc)
97
+ self.state.analysis.model.at_new_stmt(new_codeloc)
96
98
 
97
99
  #
98
100
  # VEX statement handlers
@@ -211,7 +213,10 @@ class SimEngineRDVEX(
211
213
  atom = MemoryLocation(a, size)
212
214
  tags = None
213
215
  elif self.state.is_stack_address(a):
214
- atom = MemoryLocation(SpOffset(self.arch.bits, self.state.get_stack_offset(a)), size)
216
+ offset = self.state.get_stack_offset(a)
217
+ if offset is None:
218
+ continue
219
+ atom = MemoryLocation(SpOffset(self.arch.bits, offset), size)
215
220
  function_address = None # we cannot get the function address in the middle of a store if a CFG
216
221
  # does not exist. you should backpatch the function address later using
217
222
  # the 'ins_addr' metadata entry.
@@ -267,6 +272,14 @@ class SimEngineRDVEX(
267
272
  _ = self._expr(stmt.guard)
268
273
  target = stmt.dst.value
269
274
  self.state.mark_guard(target)
275
+ if self.state.analysis is not None:
276
+ self.state.analysis.exit_observe(
277
+ self.block.addr,
278
+ self.stmt_idx,
279
+ self.block,
280
+ self.state,
281
+ node_idx=self.block.block_idx if hasattr(self.block, "block_idx") else None,
282
+ )
270
283
  if (
271
284
  self.block.instruction_addrs
272
285
  and self.ins_addr in self.block.instruction_addrs
@@ -669,12 +682,14 @@ class SimEngineRDVEX(
669
682
  # we do not support division between two real multivalues
670
683
  r = MultiValues(self.state.top(bits))
671
684
  elif expr0_v is None and expr1_v is not None:
672
- if expr0.count() == 1 and 0 in expr0:
685
+ if expr1_v == 0:
686
+ r = MultiValues(self.state.top(bits))
687
+ elif expr0.count() == 1 and 0 in expr0:
673
688
  vs = {v / expr1_v for v in expr0[0]}
674
689
  r = MultiValues(offset_to_values={0: vs})
675
690
  elif expr0_v is not None and expr1_v is None:
676
691
  if expr1.count() == 1 and 0 in expr1:
677
- vs = {v / expr0_v for v in expr1[0]}
692
+ vs = {expr0_v / v for v in expr1[0] if (not v.concrete) or v.concrete_value != 0}
678
693
  r = MultiValues(offset_to_values={0: vs})
679
694
  else:
680
695
  if expr0_v.concrete and expr1_v.concrete:
@@ -52,7 +52,7 @@ class FunctionCallData:
52
52
 
53
53
  Function handler contract:
54
54
 
55
- - If redefine_locals is unset, do not adjust any artifacts of the function call abstration, such as the stack
55
+ - If redefine_locals is unset, do not adjust any artifacts of the function call abstraction, such as the stack
56
56
  pointer, the caller saved registers, etc.
57
57
  - If caller_will_handle_single_ret is set, and there is a single entry in `ret_atoms`, do not apply to the state
58
58
  effects modifying this atom. Instead, set `ret_values` and `ret_values_deps` to the values and deps which are
@@ -137,7 +137,13 @@ class FunctionCallData:
137
137
  )
138
138
  else:
139
139
  self.effects.append(
140
- FunctionEffect(dest, set(sources), value=value, apply_at_callsite=apply_at_callsite, tags=tags)
140
+ FunctionEffect(
141
+ dest,
142
+ set(sources),
143
+ value=value,
144
+ apply_at_callsite=apply_at_callsite,
145
+ tags=tags,
146
+ )
141
147
  )
142
148
 
143
149
 
@@ -333,7 +339,7 @@ class FunctionHandler:
333
339
  mv, defs = state.kill_and_add_definition(
334
340
  effect.dest,
335
341
  value,
336
- endness=state.arch.memory_endness,
342
+ endness=None,
337
343
  uses=effect.sources_defns or set(),
338
344
  tags=effect.tags,
339
345
  )
@@ -318,6 +318,10 @@ class ReachingDefinitionsState:
318
318
  sp = self.annotate_with_def(self._initial_stack_pointer(), sp_def)
319
319
  self.register_definitions.store(self.arch.sp_offset, sp)
320
320
 
321
+ ex_loc = ExternalCodeLocation(call_string)
322
+ if self.analysis is not None:
323
+ self.analysis.model.at_new_stmt(ex_loc)
324
+
321
325
  if cc is not None:
322
326
  prototype = self.analysis.kb.functions[func_addr].prototype
323
327
  if prototype is not None:
@@ -328,20 +332,20 @@ class ReachingDefinitionsState:
328
332
  # FIXME: implement reg_offset handling in SimRegArg
329
333
  reg_offset = self.arch.registers[arg.reg_name][0]
330
334
  reg_atom = Register(reg_offset, self.arch.bytes)
331
- reg_def = Definition(
332
- reg_atom, ExternalCodeLocation(call_string), tags={ParameterTag(function=func_addr)}
333
- )
335
+ reg_def = Definition(reg_atom, ex_loc, tags={ParameterTag(function=func_addr)})
334
336
  self.all_definitions.add(reg_def)
337
+ if self.analysis is not None:
338
+ self.analysis.model.add_def(reg_def, ex_loc)
335
339
  reg = self.annotate_with_def(self.top(self.arch.bits), reg_def)
336
340
  self.register_definitions.store(reg_offset, reg)
337
341
 
338
342
  # initialize stack parameters
339
343
  elif isinstance(arg, SimStackArg):
340
344
  ml_atom = MemoryLocation(SpOffset(self.arch.bits, arg.stack_offset), arg.size)
341
- ml_def = Definition(
342
- ml_atom, ExternalCodeLocation(call_string), tags={ParameterTag(function=func_addr)}
343
- )
345
+ ml_def = Definition(ml_atom, ex_loc, tags={ParameterTag(function=func_addr)})
344
346
  self.all_definitions.add(ml_def)
347
+ if self.analysis is not None:
348
+ self.analysis.model.add_def(ml_def, ex_loc)
345
349
  ml = self.annotate_with_def(self.top(self.arch.bits), ml_def)
346
350
  stack_address = self.get_stack_address(self.stack_address(arg.stack_offset))
347
351
  self.stack_definitions.store(stack_address, ml, endness=self.arch.memory_endness)
@@ -354,15 +358,19 @@ class ReachingDefinitionsState:
354
358
  raise TypeError("rtoc_value must be provided on PPC64.")
355
359
  offset, size = self.arch.registers["rtoc"]
356
360
  rtoc_atom = Register(offset, size)
357
- rtoc_def = Definition(rtoc_atom, ExternalCodeLocation(call_string), tags={InitialValueTag()})
361
+ rtoc_def = Definition(rtoc_atom, ex_loc, tags={InitialValueTag()})
358
362
  self.all_definitions.add(rtoc_def)
363
+ if self.analysis is not None:
364
+ self.analysis.model.add_def(rtoc_def, ex_loc)
359
365
  rtoc = self.annotate_with_def(claripy.BVV(rtoc_value, self.arch.bits), rtoc_def)
360
366
  self.register_definitions.store(offset, rtoc)
361
367
  elif self.arch.name.startswith("MIPS64"):
362
368
  offset, size = self.arch.registers["t9"]
363
369
  t9_atom = Register(offset, size)
364
- t9_def = Definition(t9_atom, ExternalCodeLocation(call_string), tags={InitialValueTag()})
370
+ t9_def = Definition(t9_atom, ex_loc, tags={InitialValueTag()})
365
371
  self.all_definitions.add(t9_def)
372
+ if self.analysis is not None:
373
+ self.analysis.model.add_def(t9_def, ex_loc)
366
374
  t9 = self.annotate_with_def(claripy.BVV(func_addr, self.arch.bits), t9_def)
367
375
  self.register_definitions.store(offset, t9)
368
376
  elif self.arch.name.startswith("MIPS"):
@@ -370,12 +378,17 @@ class ReachingDefinitionsState:
370
378
  l.warning("func_addr must not be None to initialize a function in mips")
371
379
  t9_offset = self.arch.registers["t9"][0]
372
380
  t9_atom = Register(t9_offset, self.arch.bytes)
373
- t9_def = Definition(t9_atom, ExternalCodeLocation(call_string), tags={InitialValueTag()})
381
+ t9_def = Definition(t9_atom, ex_loc, tags={InitialValueTag()})
374
382
  self.all_definitions.add(t9_def)
383
+ if self.analysis is not None:
384
+ self.analysis.model.add_def(t9_def, ex_loc)
375
385
  t9 = self.annotate_with_def(claripy.BVV(func_addr, self.arch.bits), t9_def)
376
386
  self.register_definitions.store(t9_offset, t9)
377
387
 
378
- def copy(self) -> "ReachingDefinitionsState":
388
+ if self.analysis is not None:
389
+ self.analysis.model.complete_loc()
390
+
391
+ def copy(self, discard_tmpdefs=False) -> "ReachingDefinitionsState":
379
392
  rd = ReachingDefinitionsState(
380
393
  self.codeloc,
381
394
  self.arch,
@@ -383,7 +396,7 @@ class ReachingDefinitionsState:
383
396
  track_tmps=self._track_tmps,
384
397
  track_consts=self._track_consts,
385
398
  analysis=self.analysis,
386
- live_definitions=self.live_definitions.copy(),
399
+ live_definitions=self.live_definitions.copy(discard_tmpdefs=discard_tmpdefs),
387
400
  canonical_size=self._canonical_size,
388
401
  heap_allocator=self.heap_allocator,
389
402
  environment=self._environment,
@@ -412,9 +425,13 @@ class ReachingDefinitionsState:
412
425
  Overwrite existing definitions w.r.t 'atom' with a dummy definition instance. A dummy definition will not be
413
426
  removed during simplification.
414
427
  """
428
+ existing_defs = set(self.live_definitions.get_definitions(atom))
415
429
 
416
430
  self.live_definitions.kill_definitions(atom)
417
431
 
432
+ for def_ in existing_defs:
433
+ self.analysis.model.kill_def(def_)
434
+
418
435
  def kill_and_add_definition(
419
436
  self,
420
437
  atom: Atom,
@@ -427,6 +444,7 @@ class ReachingDefinitionsState:
427
444
  override_codeloc: Optional[CodeLocation] = None,
428
445
  ) -> Tuple[Optional[MultiValues], Set[Definition]]:
429
446
  codeloc = override_codeloc or self.codeloc
447
+ existing_defs = set(self.live_definitions.get_definitions(atom))
430
448
  mv = self.live_definitions.kill_and_add_definition(
431
449
  atom, codeloc, data, dummy=dummy, tags=tags, endness=endness, annotated=annotated
432
450
  )
@@ -493,16 +511,19 @@ class ReachingDefinitionsState:
493
511
  else:
494
512
  defs = set()
495
513
 
514
+ for def_ in existing_defs:
515
+ self.analysis.model.kill_def(def_)
516
+ for def_ in defs:
517
+ self.analysis.model.add_def(def_, codeloc)
518
+
496
519
  return mv, defs
497
520
 
498
521
  def add_use(self, atom: Atom, expr: Optional[Any] = None) -> None:
499
522
  self.codeloc_uses.update(self.get_definitions(atom))
500
-
501
523
  self.live_definitions.add_use(atom, self.codeloc, expr=expr)
502
524
 
503
525
  def add_use_by_def(self, definition: Definition, expr: Optional[Any] = None) -> None:
504
526
  self.codeloc_uses.add(definition)
505
-
506
527
  self.live_definitions.add_use_by_def(definition, self.codeloc, expr=expr)
507
528
 
508
529
  def add_tmp_use(self, tmp: int, expr: Optional[Any] = None) -> None:
@@ -6,6 +6,7 @@ import ailment
6
6
  import pyvex
7
7
 
8
8
  from angr.analyses import ForwardAnalysis
9
+ from angr.analyses.reaching_definitions.external_codeloc import ExternalCodeLocation
9
10
  from ...block import Block
10
11
  from ...knowledge_plugins.cfg.cfg_node import CFGNode
11
12
  from ...codenode import CodeNode
@@ -20,14 +21,16 @@ from ..analysis import Analysis
20
21
  from .engine_ail import SimEngineRDAIL
21
22
  from .engine_vex import SimEngineRDVEX
22
23
  from .rd_state import ReachingDefinitionsState
23
- from .subject import Subject
24
+ from .subject import Subject, SubjectType
24
25
  from .function_handler import FunctionHandler, FunctionCallRelationships
25
26
  from .dep_graph import DepGraph
26
27
 
27
28
  if TYPE_CHECKING:
28
29
  from typing import Literal
29
30
 
30
- ObservationPoint = Tuple[Literal["insn", "node", "stmt"], Union[int, Tuple[int, int, int]], ObservationPointType]
31
+ ObservationPoint = Tuple[
32
+ Literal["insn", "node", "stmt", "exit"], Union[int, Tuple[int, int], Tuple[int, int, int]], ObservationPointType
33
+ ]
31
34
 
32
35
  l = logging.getLogger(name=__name__)
33
36
 
@@ -144,6 +147,10 @@ class ReachingDefinitionsAnalysis(
144
147
 
145
148
  self._node_iterations: DefaultDict[int, int] = defaultdict(int)
146
149
 
150
+ self.model: ReachingDefinitionsModel = ReachingDefinitionsModel(
151
+ func_addr=self.subject.content.addr if isinstance(self.subject.content, Function) else None
152
+ )
153
+
147
154
  self._engine_vex = SimEngineRDVEX(
148
155
  self.project,
149
156
  functions=self.kb.functions,
@@ -157,9 +164,6 @@ class ReachingDefinitionsAnalysis(
157
164
  )
158
165
 
159
166
  self._visited_blocks: Set[Any] = visited_blocks or set()
160
- self.model: ReachingDefinitionsModel = ReachingDefinitionsModel(
161
- func_addr=self.subject.content.addr if isinstance(self.subject.content, Function) else None
162
- )
163
167
  self.function_calls: Dict[CodeLocation, FunctionCallRelationships] = {}
164
168
 
165
169
  self._analyze()
@@ -226,11 +230,18 @@ class ReachingDefinitionsAnalysis(
226
230
 
227
231
  return self.observed_results[key]
228
232
 
229
- def node_observe(self, node_addr: int, state: ReachingDefinitionsState, op_type: ObservationPointType) -> None:
233
+ def node_observe(
234
+ self,
235
+ node_addr: int,
236
+ state: ReachingDefinitionsState,
237
+ op_type: ObservationPointType,
238
+ node_idx: Optional[int] = None,
239
+ ) -> None:
230
240
  """
231
241
  :param node_addr: Address of the node.
232
242
  :param state: The analysis state.
233
- :param op_type: Type of the bbservation point. Must be one of the following: OP_BEFORE, OP_AFTER.
243
+ :param op_type: Type of the observation point. Must be one of the following: OP_BEFORE, OP_AFTER.
244
+ :param node_idx: ID of the node. Used in AIL to differentiate blocks with the same address.
234
245
  """
235
246
 
236
247
  key = None
@@ -239,15 +250,21 @@ class ReachingDefinitionsAnalysis(
239
250
 
240
251
  if self._observe_all:
241
252
  observe = True
242
- key: ObservationPoint = ("node", node_addr, op_type)
253
+ key: ObservationPoint = (
254
+ ("node", node_addr, op_type) if node_idx is None else ("node", (node_addr, node_idx), op_type)
255
+ )
243
256
  elif self._observation_points is not None:
244
- key: ObservationPoint = ("node", node_addr, op_type)
257
+ key: ObservationPoint = (
258
+ ("node", node_addr, op_type) if node_idx is None else ("node", (node_addr, node_idx), op_type)
259
+ )
245
260
  if key in self._observation_points:
246
261
  observe = True
247
262
  elif self._observe_callback is not None:
248
- observe = self._observe_callback("node", addr=node_addr, state=state, op_type=op_type)
263
+ observe = self._observe_callback("node", addr=node_addr, state=state, op_type=op_type, node_idx=node_idx)
249
264
  if observe:
250
- key: ObservationPoint = ("node", node_addr, op_type)
265
+ key: ObservationPoint = (
266
+ ("node", node_addr, op_type) if node_idx is None else ("node", (node_addr, node_idx), op_type)
267
+ )
251
268
 
252
269
  if observe:
253
270
  self.observed_results[key] = state.live_definitions
@@ -349,6 +366,52 @@ class ReachingDefinitionsAnalysis(
349
366
  # it's an AIL block
350
367
  self.observed_results[key] = state.live_definitions.copy()
351
368
 
369
+ def exit_observe(
370
+ self,
371
+ node_addr: int,
372
+ exit_stmt_idx: int,
373
+ block: Union[Block, ailment.Block],
374
+ state: ReachingDefinitionsState,
375
+ node_idx: Optional[int] = None,
376
+ ):
377
+ observe = False
378
+ key = None
379
+
380
+ if self._observe_all:
381
+ observe = True
382
+ key = (
383
+ ("exit", (node_addr, exit_stmt_idx), ObservationPointType.OP_AFTER)
384
+ if node_idx is None
385
+ else ("exit", (node_addr, node_idx, exit_stmt_idx), ObservationPointType.OP_AFTER)
386
+ )
387
+ elif self._observation_points is not None:
388
+ key = (
389
+ ("exit", (node_addr, exit_stmt_idx), ObservationPointType.OP_AFTER)
390
+ if node_idx is None
391
+ else ("exit", (node_addr, node_idx, exit_stmt_idx), ObservationPointType.OP_AFTER)
392
+ )
393
+ if key in self._observation_points:
394
+ observe = True
395
+ elif self._observe_callback is not None:
396
+ observe = self._observe_callback(
397
+ "exit",
398
+ node_addr=node_addr,
399
+ exit_stmt_idx=exit_stmt_idx,
400
+ block=block,
401
+ state=state,
402
+ )
403
+ if observe:
404
+ key = (
405
+ ("exit", (node_addr, exit_stmt_idx), ObservationPointType.OP_AFTER)
406
+ if node_idx is None
407
+ else ("exit", (node_addr, node_idx, exit_stmt_idx), ObservationPointType.OP_AFTER)
408
+ )
409
+
410
+ if not observe:
411
+ return
412
+
413
+ self.observed_results[key] = state.live_definitions.copy()
414
+
352
415
  @property
353
416
  def subject(self):
354
417
  return self._subject
@@ -401,23 +464,36 @@ class ReachingDefinitionsAnalysis(
401
464
  block_key = node.addr
402
465
  elif isinstance(node, CFGNode):
403
466
  if node.is_simprocedure or node.is_syscall:
404
- return False, state.copy()
467
+ return False, state.copy(discard_tmpdefs=True)
405
468
  block = node.block
406
469
  engine = self._engine_vex
407
470
  block_key = node.addr
408
471
  else:
409
472
  l.warning("Unsupported node type %s.", node.__class__)
410
- return False, state.copy()
411
-
412
- self.node_observe(node.addr, state, OP_BEFORE)
473
+ return False, state.copy(discard_tmpdefs=True)
474
+
475
+ state = state.copy(discard_tmpdefs=True)
476
+ self.node_observe(node.addr, state.copy(), OP_BEFORE)
477
+
478
+ if self.subject.type == SubjectType.Function:
479
+ node_parents = [
480
+ CodeLocation(pred.addr, 0, block_idx=pred.idx if isinstance(pred, ailment.Block) else None)
481
+ for pred in self._graph_visitor.predecessors(node)
482
+ ]
483
+ if node.addr == self.subject.content.addr:
484
+ node_parents += [ExternalCodeLocation()]
485
+ self.model.at_new_block(
486
+ CodeLocation(block.addr, 0, block_idx=block.idx if isinstance(block, ailment.Block) else None),
487
+ node_parents,
488
+ )
413
489
 
414
- state = state.copy()
415
490
  state = engine.process(
416
491
  state,
417
492
  block=block,
418
493
  fail_fast=self._fail_fast,
419
494
  visited_blocks=self._visited_blocks,
420
495
  dep_graph=self._dep_graph,
496
+ model=self.model,
421
497
  )
422
498
 
423
499
  self._node_iterations[block_key] += 1
@@ -389,6 +389,26 @@ class SimEngineVRVEX(
389
389
  r = self.state.top(result_size)
390
390
  return RichR(r)
391
391
 
392
+ def _handle_Mod(self, expr):
393
+ arg0, arg1 = expr.args
394
+ r0 = self._expr(arg0)
395
+ r1 = self._expr(arg1)
396
+
397
+ result_size = expr.result_size(self.tyenv)
398
+ if r0.data.concrete and r1.data.concrete and r1.data.concrete_value != 0:
399
+ # constants
400
+ try:
401
+ if result_size != r1.data.size():
402
+ remainder = r0.data.SMod(claripy.SignExt(result_size - r1.data.size(), r1.data))
403
+ else:
404
+ remainder = r0.data.SMod(r1.data)
405
+ return RichR(remainder)
406
+ except ZeroDivisionError:
407
+ pass
408
+
409
+ r = self.state.top(result_size)
410
+ return RichR(r)
411
+
392
412
  def _handle_Shr(self, expr):
393
413
  arg0, arg1 = expr.args
394
414
  r0 = self._expr(arg0)
@@ -241,6 +241,7 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
241
241
  track_sp=True,
242
242
  func_args: Optional[List[SimVariable]] = None,
243
243
  store_live_variables=False,
244
+ unify_variables=True,
244
245
  ):
245
246
  if not isinstance(func, Function):
246
247
  func = self.kb.functions[func]
@@ -268,6 +269,7 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
268
269
  self._job_ctr = 0
269
270
  self._track_sp = track_sp and self.project.arch.sp_offset is not None
270
271
  self._func_args = func_args
272
+ self._unify_variables = unify_variables
271
273
 
272
274
  self._ail_engine = SimEngineVRAIL(self.project, self.kb, call_info=call_info)
273
275
  self._vex_engine = SimEngineVRVEX(self.project, self.kb, call_info=call_info)
@@ -460,6 +462,9 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
460
462
  state.downsize_region(state.stack_region),
461
463
  )
462
464
 
465
+ if self._unify_variables:
466
+ self.variable_manager[self.function.addr].unify_variables()
467
+
463
468
  # unify type variables for global variables
464
469
  for var, typevars in self.var_to_typevars.items():
465
470
  if len(typevars) > 1 and isinstance(var, SimMemoryVariable) and not isinstance(var, SimStackVariable):