angr 9.2.75__py3-none-manylinux2014_x86_64.whl → 9.2.77__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of angr might be problematic. Click here for more details.

Files changed (50) hide show
  1. angr/__init__.py +1 -1
  2. angr/analyses/cfg/cfg_fast.py +37 -0
  3. angr/analyses/cfg/indirect_jump_resolvers/amd64_pe_iat.py +7 -1
  4. angr/analyses/cfg/indirect_jump_resolvers/x86_pe_iat.py +7 -1
  5. angr/analyses/decompiler/clinic.py +4 -1
  6. angr/analyses/decompiler/condition_processor.py +4 -0
  7. angr/analyses/decompiler/decompiler.py +4 -0
  8. angr/analyses/decompiler/optimization_passes/ite_region_converter.py +4 -3
  9. angr/analyses/decompiler/optimization_passes/multi_simplifier.py +1 -1
  10. angr/analyses/decompiler/structured_codegen/c.py +32 -21
  11. angr/analyses/propagator/engine_ail.py +1 -1
  12. angr/analyses/reaching_definitions/engine_ail.py +3 -6
  13. angr/analyses/reaching_definitions/engine_vex.py +32 -2
  14. angr/analyses/reaching_definitions/function_handler.py +1 -1
  15. angr/analyses/reaching_definitions/rd_initializer.py +6 -6
  16. angr/analyses/reaching_definitions/rd_state.py +9 -11
  17. angr/analyses/typehoon/typevars.py +19 -29
  18. angr/analyses/variable_recovery/irsb_scanner.py +16 -0
  19. angr/analyses/variable_recovery/variable_recovery_fast.py +33 -31
  20. angr/engines/light/engine.py +1 -1
  21. angr/keyed_region.py +19 -3
  22. angr/knowledge_plugins/cfg/cfg_model.py +25 -16
  23. angr/knowledge_plugins/cfg/memory_data.py +1 -1
  24. angr/knowledge_plugins/functions/function.py +8 -0
  25. angr/knowledge_plugins/key_definitions/live_definitions.py +53 -44
  26. angr/knowledge_plugins/key_definitions/liveness.py +102 -34
  27. angr/knowledge_plugins/key_definitions/rd_model.py +4 -4
  28. angr/knowledge_plugins/propagations/states.py +3 -1
  29. angr/knowledge_plugins/variables/variable_manager.py +51 -25
  30. angr/misc/bug_report.py +2 -2
  31. angr/sim_type.py +46 -0
  32. angr/storage/memory_mixins/__init__.py +3 -2
  33. angr/storage/memory_mixins/paged_memory/paged_memory_multivalue_mixin.py +63 -0
  34. angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +5 -0
  35. {angr-9.2.75.dist-info → angr-9.2.77.dist-info}/METADATA +6 -6
  36. {angr-9.2.75.dist-info → angr-9.2.77.dist-info}/RECORD +50 -49
  37. tests/analyses/cfg/test_cfgfast.py +21 -0
  38. tests/analyses/decompiler/test_decompiler.py +22 -1
  39. tests/analyses/test_flirt.py +3 -1
  40. tests/analyses/test_identifier.py +2 -0
  41. tests/engines/test_unicorn.py +4 -0
  42. tests/exploration_techniques/test_driller_core.py +4 -0
  43. tests/exploration_techniques/test_oppologist.py +2 -0
  44. tests/exploration_techniques/test_tracer.py +9 -0
  45. tests/procedures/libc/test_string.py +2 -1
  46. tests/sim/options/test_0div.py +2 -0
  47. tests/state_plugins/posix/test_files.py +2 -0
  48. {angr-9.2.75.dist-info → angr-9.2.77.dist-info}/LICENSE +0 -0
  49. {angr-9.2.75.dist-info → angr-9.2.77.dist-info}/WHEEL +0 -0
  50. {angr-9.2.75.dist-info → angr-9.2.77.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  # pylint:disable=wrong-import-position,wrong-import-order
2
- from typing import Optional, List, Tuple, Union
2
+ from typing import Optional, List, Tuple, Union, DefaultDict, Set
3
3
  import logging
4
4
  from collections import defaultdict
5
5
 
@@ -17,7 +17,7 @@ from ...knowledge_plugins import Function
17
17
  from ...sim_variable import SimStackVariable, SimRegisterVariable, SimVariable, SimMemoryVariable
18
18
  from ...engines.vex.claripy.irop import vexop_to_simop
19
19
  from angr.analyses import ForwardAnalysis, visitors
20
- from ..typehoon.typevars import Equivalence, TypeVariable
20
+ from ..typehoon.typevars import Equivalence, TypeVariable, TypeVariables
21
21
  from .variable_recovery_base import VariableRecoveryBase, VariableRecoveryStateBase
22
22
  from .engine_vex import SimEngineVRVEX
23
23
  from .engine_ail import SimEngineVRAIL
@@ -86,9 +86,9 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
86
86
  stack_region=self.stack_region.copy(),
87
87
  register_region=self.register_region.copy(),
88
88
  global_region=self.global_region.copy(),
89
- typevars=self.typevars.copy(),
90
- type_constraints=self.type_constraints.copy(),
91
- delayed_type_constraints=self.delayed_type_constraints.copy(),
89
+ typevars=self.typevars,
90
+ type_constraints=self.type_constraints,
91
+ delayed_type_constraints=self.delayed_type_constraints,
92
92
  stack_offset_typevars=dict(self.stack_offset_typevars),
93
93
  project=self.project,
94
94
  ret_val_size=self.ret_val_size,
@@ -125,26 +125,17 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
125
125
  merged_global_region.set_state(self)
126
126
  merge_occurred |= merged_global_region.merge([other.global_region for other in others], None)
127
127
 
128
- merged_typevars = self.typevars
129
- merged_typeconstraints = self.type_constraints.copy()
130
- delayed_typeconstraints = self.delayed_type_constraints.copy().clean()
131
- for other in others:
132
- merged_typevars = merged_typevars.merge(other.typevars)
133
- merged_typeconstraints |= other.type_constraints
134
- for v, cons in other.delayed_type_constraints.items():
135
- delayed_typeconstraints[v] |= cons
136
-
137
- merge_occurred |= self.typevars != merged_typevars
138
- merge_occurred |= self.type_constraints != merged_typeconstraints
139
- merge_occurred |= self.delayed_type_constraints != delayed_typeconstraints
128
+ typevars = self.typevars
129
+ type_constraints = self.type_constraints
130
+ delayed_typeconstraints = self.delayed_type_constraints
140
131
 
141
132
  # add subtype constraints for all replacements
142
133
  for v0, v1 in self.phi_variables.items():
143
134
  # v0 will be replaced by v1
144
- if not merged_typevars.has_type_variable_for(v1, None):
145
- merged_typevars.add_type_variable(v1, None, TypeVariable())
146
- if not merged_typevars.has_type_variable_for(v0, None):
147
- merged_typevars.add_type_variable(v0, None, TypeVariable())
135
+ if not typevars.has_type_variable_for(v1, None):
136
+ typevars.add_type_variable(v1, None, TypeVariable())
137
+ if not typevars.has_type_variable_for(v0, None):
138
+ typevars.add_type_variable(v0, None, TypeVariable())
148
139
  # Assuming v2 = phi(v0, v1), then we know that v0_typevar == v1_typevar == v2_typevar
149
140
  # However, it's possible that neither v0 nor v1 will ever be used in future blocks, which not only makes
150
141
  # this phi function useless, but also leads to the incorrect assumption that v1_typevar == v2_typevar.
@@ -152,9 +143,7 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
152
143
  # when v1 (the new variable that will end up in the state) is ever used in the future.
153
144
 
154
145
  # create an equivalence relationship
155
- equivalence = Equivalence(
156
- merged_typevars.get_type_variable(v1, None), merged_typevars.get_type_variable(v0, None)
157
- )
146
+ equivalence = Equivalence(typevars.get_type_variable(v1, None), typevars.get_type_variable(v0, None))
158
147
  delayed_typeconstraints[v1].add(equivalence)
159
148
 
160
149
  stack_offset_typevars = {}
@@ -173,7 +162,7 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
173
162
  else:
174
163
  typevar = TypeVariable()
175
164
  for orig_typevar in all_typevars:
176
- merged_typeconstraints.add(Equivalence(orig_typevar, typevar))
165
+ type_constraints.add(Equivalence(orig_typevar, typevar))
177
166
  stack_offset_typevars[offset] = typevar
178
167
 
179
168
  ret_val_size = self.ret_val_size
@@ -195,8 +184,8 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
195
184
  stack_region=merged_stack_region,
196
185
  register_region=merged_register_region,
197
186
  global_region=merged_global_region,
198
- typevars=merged_typevars,
199
- type_constraints=merged_typeconstraints,
187
+ typevars=typevars,
188
+ type_constraints=type_constraints,
200
189
  delayed_type_constraints=delayed_typeconstraints,
201
190
  stack_offset_typevars=stack_offset_typevars,
202
191
  project=self.project,
@@ -205,6 +194,9 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
205
194
 
206
195
  return state, merge_occurred
207
196
 
197
+ def downsize(self) -> None:
198
+ pass
199
+
208
200
  #
209
201
  # Util methods
210
202
  #
@@ -277,8 +269,10 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
277
269
  self._node_iterations = defaultdict(int)
278
270
 
279
271
  self._node_to_cc = {}
280
- self.var_to_typevars = defaultdict(set)
272
+ self.var_to_typevars: DefaultDict[SimVariable, Set[TypeVariable]] = defaultdict(set)
273
+ self.typevars = None
281
274
  self.type_constraints = None
275
+ self.delayed_type_constraints = None
282
276
  self.ret_val_size = None
283
277
 
284
278
  self._analyze()
@@ -293,7 +287,9 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
293
287
  #
294
288
 
295
289
  def _pre_analysis(self):
290
+ self.typevars = TypeVariables()
296
291
  self.type_constraints = set()
292
+ self.delayed_type_constraints = defaultdict(set)
297
293
 
298
294
  self.initialize_dominance_frontiers()
299
295
 
@@ -321,6 +317,9 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
321
317
  self.project.arch,
322
318
  self.function,
323
319
  project=self.project,
320
+ typevars=self.typevars,
321
+ type_constraints=self.type_constraints,
322
+ delayed_type_constraints=self.delayed_type_constraints,
324
323
  )
325
324
  initial_sp = state.stack_address(self.project.arch.bytes if self.project.arch.call_pushes_ret else 0)
326
325
  if self.project.arch.sp_offset is not None:
@@ -434,9 +433,6 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
434
433
  self._process_block(state, block)
435
434
 
436
435
  self._node_iterations[block_key] += 1
437
- self.type_constraints |= state.type_constraints
438
- for var, typevar in state.typevars._typevars.items():
439
- self.var_to_typevars[var].add(typevar)
440
436
 
441
437
  if state.ret_val_size is not None:
442
438
  if self.ret_val_size is None or self.ret_val_size < state.ret_val_size:
@@ -467,6 +463,10 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
467
463
  if self._unify_variables:
468
464
  self.variable_manager[self.function.addr].unify_variables()
469
465
 
466
+ # fill in var_to_typevars
467
+ for var, typevar_set in self.typevars._typevars.items():
468
+ self.var_to_typevars[var] = typevar_set
469
+
470
470
  # unify type variables for global variables
471
471
  for var, typevars in self.var_to_typevars.items():
472
472
  if len(typevars) > 1 and isinstance(var, SimMemoryVariable) and not isinstance(var, SimStackVariable):
@@ -476,6 +476,8 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
476
476
 
477
477
  self.variable_manager[self.function.addr].ret_val_size = self.ret_val_size
478
478
 
479
+ self.delayed_type_constraints = None
480
+
479
481
  #
480
482
  # Private methods
481
483
  #
@@ -547,7 +547,7 @@ class SimEngineLightVEXMixin(SimEngineLightMixin):
547
547
  to_size = expr_1.size()
548
548
  if signed:
549
549
  quotient = expr_0.SDiv(claripy.SignExt(from_size - to_size, expr_1))
550
- remainder = expr_1.SMod(claripy.SignExt(from_size - to_size, expr_1))
550
+ remainder = expr_0.SMod(claripy.SignExt(from_size - to_size, expr_1))
551
551
  quotient_size = to_size
552
552
  remainder_size = to_size
553
553
  return claripy.Concat(
angr/keyed_region.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import logging
2
2
  import weakref
3
- from typing import Union, TYPE_CHECKING
3
+ from typing import Union, Optional, Tuple, TYPE_CHECKING
4
4
 
5
5
  from sortedcontainers import SortedDict
6
6
 
@@ -133,7 +133,7 @@ class KeyedRegion:
133
133
  self._storage, om, self._phi_node_contains = s
134
134
  self._object_mapping = weakref.WeakValueDictionary(om)
135
135
 
136
- def _get_container(self, offset):
136
+ def _get_container(self, offset) -> Tuple[int, Optional[RegionObject]]:
137
137
  try:
138
138
  base_offset = next(self._storage.irange(maximum=offset, reverse=True))
139
139
  except StopIteration:
@@ -419,7 +419,23 @@ class KeyedRegion:
419
419
 
420
420
  # is there a region item that begins before the start and overlaps with this variable?
421
421
  floor_key, floor_item = self._get_container(start)
422
- if floor_item is not None and floor_key not in overlapping_items:
422
+ if floor_item is None:
423
+ # fast path: just insert it
424
+ self._storage[start] = RegionObject(start, object_size, {stored_object})
425
+ return
426
+
427
+ # fast path: if there is a perfect overlap, just update the item
428
+ if len(overlapping_items) == 1 and floor_item.start == start and floor_item.end == end:
429
+ if overwrite:
430
+ floor_item.set_object(stored_object)
431
+ elif merge_to_top is False and top is None:
432
+ floor_item.add_object(stored_object)
433
+ else:
434
+ self._add_object_with_check(floor_item, stored_object, merge_to_top=merge_to_top, top=top)
435
+ return
436
+
437
+ # slower path: there are multiple overlapping items
438
+ if floor_key not in overlapping_items:
423
439
  # insert it into the beginning
424
440
  overlapping_items.insert(0, floor_key)
425
441
 
@@ -770,23 +770,16 @@ class CFGModel(Serializable):
770
770
  if elfheader_sort:
771
771
  return elfheader_sort, elfheader_size
772
772
 
773
+ pointer_size = self.project.arch.bytes
774
+
775
+ # who's using it?
773
776
  irsb_addr, stmt_idx = None, None
774
777
  if xrefs is not None and seg_list is not None:
775
778
  try:
776
779
  ref: "XRef" = next(iter(xrefs.get_xrefs_by_dst(data_addr)))
777
780
  irsb_addr = ref.block_addr
778
- stmt_idx = ref.stmt_idx
779
781
  except StopIteration:
780
782
  pass
781
-
782
- if seg_list.is_occupied(data_addr) and seg_list.occupied_by_sort(data_addr) == "code":
783
- # it's a code reference
784
- # TODO: Further check if it's the beginning of an instruction
785
- return MemoryDataSort.CodeReference, 0
786
-
787
- pointer_size = self.project.arch.bytes
788
-
789
- # who's using it?
790
783
  if irsb_addr is not None and isinstance(self.project.loader.main_object, cle.MetaELF):
791
784
  plt_entry = self.project.loader.main_object.reverse_plt.get(irsb_addr, None)
792
785
  if plt_entry is not None:
@@ -839,12 +832,13 @@ class CFGModel(Serializable):
839
832
  if running_failures > 3:
840
833
  break
841
834
 
842
- if content_holder is not None:
843
- string_data = data[: last_success * 2]
844
- if string_data.endswith(b"\x00\x00"):
845
- string_data = string_data[:-2]
846
- content_holder.append(string_data)
847
- return MemoryDataSort.UnicodeString, last_success
835
+ if last_success > 5:
836
+ if content_holder is not None:
837
+ string_data = data[: last_success * 2]
838
+ if string_data.endswith(b"\x00\x00"):
839
+ string_data = string_data[:-2]
840
+ content_holder.append(string_data)
841
+ return MemoryDataSort.UnicodeString, last_success * 2
848
842
 
849
843
  if data:
850
844
  try:
@@ -864,6 +858,21 @@ class CFGModel(Serializable):
864
858
  string_len += 1
865
859
  return MemoryDataSort.String, min(string_len, 1024)
866
860
 
861
+ # is it a code reference?
862
+ irsb_addr, stmt_idx = None, None
863
+ if xrefs is not None and seg_list is not None:
864
+ try:
865
+ ref: "XRef" = next(iter(xrefs.get_xrefs_by_dst(data_addr)))
866
+ irsb_addr = ref.block_addr
867
+ stmt_idx = ref.stmt_idx
868
+ except StopIteration:
869
+ pass
870
+
871
+ if seg_list.is_occupied(data_addr) and seg_list.occupied_by_sort(data_addr) == "code":
872
+ # it's a code reference
873
+ # TODO: Further check if it's the beginning of an instruction
874
+ return MemoryDataSort.CodeReference, 0
875
+
867
876
  if data_type_guessing_handlers:
868
877
  for handler in data_type_guessing_handlers:
869
878
  irsb = None if irsb_addr is None else self.get_any_node(irsb_addr).block.vex
@@ -122,7 +122,7 @@ class MemoryData(Serializable):
122
122
  self.content = loader.memory.load(
123
123
  self.addr, self.reference_size if self.reference_size is not None else self.size
124
124
  )
125
- while self.content.endswith(b"\x00\x00"):
125
+ if self.content.endswith(b"\x00\x00"):
126
126
  self.content = self.content[:-2]
127
127
  else:
128
128
  # FIXME: Other types are not supported yet
@@ -648,6 +648,14 @@ class Function(Serializable):
648
648
  """
649
649
  return self.binary.loader.find_symbol(self.addr)
650
650
 
651
+ @property
652
+ def pseudocode(self) -> str:
653
+ """
654
+ :return: the function's pseudocode
655
+ """
656
+ dec = self.project.analyses.Decompiler(self, cfg=self._function_manager._kb.cfgs.get_most_accurate())
657
+ return dec.codegen.text
658
+
651
659
  def add_jumpout_site(self, node):
652
660
  """
653
661
  Add a custom jumpout site.
@@ -370,6 +370,14 @@ class LiveDefinitions:
370
370
  if isinstance(anno, DefinitionAnnotation):
371
371
  yield anno.definition
372
372
 
373
+ @staticmethod
374
+ def extract_defs_from_annotations(annos: Iterable["Annotation"]) -> Set[Definition]:
375
+ defs = set()
376
+ for anno in annos:
377
+ if isinstance(anno, DefinitionAnnotation):
378
+ defs.add(anno.definition)
379
+ return defs
380
+
373
381
  @staticmethod
374
382
  def extract_defs_from_mv(mv: MultiValues) -> Generator[Definition, None, None]:
375
383
  for vs in mv.values():
@@ -614,64 +622,60 @@ class LiveDefinitions:
614
622
 
615
623
  def get_definitions(
616
624
  self, thing: Union[Atom, Definition[Atom], Iterable[Atom], Iterable[Definition[Atom]], MultiValues]
617
- ) -> Iterable[Definition[Atom]]:
625
+ ) -> Set[Definition[Atom]]:
618
626
  if isinstance(thing, MultiValues):
627
+ defs = set()
619
628
  for vs in thing.values():
620
629
  for v in vs:
621
- for anno in v.annotations:
622
- if isinstance(anno, DefinitionAnnotation):
623
- yield anno.definition
624
- return
630
+ defs.update(LiveDefinitions.extract_defs_from_annotations(v.annotations))
631
+ return defs
625
632
  elif isinstance(thing, Atom):
626
633
  pass
627
634
  elif isinstance(thing, Definition):
628
635
  thing = thing.atom
629
636
  else:
637
+ defs = set()
630
638
  for atom2 in thing:
631
- yield from self.get_definitions(atom2)
632
- return
639
+ defs |= self.get_definitions(atom2)
640
+ return defs
633
641
 
634
642
  if isinstance(thing, Register):
635
- yield from self.get_register_definitions(thing.reg_offset, thing.size)
643
+ return self.get_register_definitions(thing.reg_offset, thing.size)
636
644
  elif isinstance(thing, MemoryLocation):
637
645
  if isinstance(thing.addr, SpOffset):
638
- yield from self.get_stack_definitions(thing.addr.offset, thing.size, thing.endness)
646
+ return self.get_stack_definitions(thing.addr.offset, thing.size)
639
647
  elif isinstance(thing.addr, HeapAddress):
640
- yield from self.get_heap_definitions(thing.addr.value, size=thing.size, endness=thing.endness)
648
+ return self.get_heap_definitions(thing.addr.value, size=thing.size)
641
649
  elif isinstance(thing.addr, int):
642
- yield from self.get_memory_definitions(thing.addr, thing.size, thing.endness)
650
+ return self.get_memory_definitions(thing.addr, thing.size)
643
651
  else:
644
- return
652
+ return set()
645
653
  elif isinstance(thing, Tmp):
646
- yield from self.get_tmp_definitions(thing.tmp_idx)
654
+ return self.get_tmp_definitions(thing.tmp_idx)
647
655
  else:
656
+ defs = set()
648
657
  for mvs in self.others.get(thing, {}).values():
649
658
  for mv in mvs:
650
- yield from self.get_definitions(mv)
659
+ defs |= self.get_definitions(mv)
660
+ return defs
651
661
 
652
- def get_tmp_definitions(self, tmp_idx: int) -> Iterable[Definition]:
662
+ def get_tmp_definitions(self, tmp_idx: int) -> Set[Definition]:
653
663
  if tmp_idx in self.tmps:
654
- yield from self.tmps[tmp_idx]
664
+ return self.tmps[tmp_idx]
655
665
  else:
656
- return
666
+ return set()
657
667
 
658
- def get_register_definitions(self, reg_offset: int, size: int, endness=None) -> Iterable[Definition]:
668
+ def get_register_definitions(self, reg_offset: int, size: int) -> Set[Definition]:
659
669
  try:
660
- values: MultiValues = self.registers.load(
661
- reg_offset,
662
- size=size,
663
- endness=endness,
664
- )
670
+ annotations = self.registers.load_annotations(reg_offset, size)
665
671
  except SimMemoryMissingError as ex:
666
- # load values and stop at the missing location
667
672
  if ex.missing_addr > reg_offset:
668
- values: MultiValues = self.registers.load(
669
- reg_offset, size=ex.missing_addr - reg_offset, endness=endness
670
- )
673
+ annotations = self.registers.load_annotations(reg_offset, ex.missing_addr - reg_offset)
671
674
  else:
672
675
  # nothing we can do
673
- return
674
- yield from LiveDefinitions.extract_defs_from_mv(values)
676
+ return set()
677
+
678
+ return LiveDefinitions.extract_defs_from_annotations(annotations)
675
679
 
676
680
  def get_stack_values(self, stack_offset: int, size: int, endness: str) -> Optional[MultiValues]:
677
681
  stack_addr = self.stack_offset_to_stack_addr(stack_offset)
@@ -680,31 +684,36 @@ class LiveDefinitions:
680
684
  except SimMemoryMissingError:
681
685
  return None
682
686
 
683
- def get_stack_definitions(self, stack_offset: int, size: int, endness) -> Iterable[Definition]:
684
- mv = self.get_stack_values(stack_offset, size, endness)
685
- if not mv:
686
- return
687
- yield from LiveDefinitions.extract_defs_from_mv(mv)
687
+ def get_stack_definitions(self, stack_offset: int, size: int) -> Set[Definition]:
688
+ try:
689
+ stack_addr = self.stack_offset_to_stack_addr(stack_offset)
690
+ annotations = self.stack.load_annotations(stack_addr, size)
691
+ except SimMemoryMissingError:
692
+ return set()
688
693
 
689
- def get_heap_definitions(self, heap_addr: int, size: int, endness) -> Iterable[Definition]:
694
+ return LiveDefinitions.extract_defs_from_annotations(annotations)
695
+
696
+ def get_heap_definitions(self, heap_addr: int, size: int) -> Set[Definition]:
690
697
  try:
691
- mv: MultiValues = self.heap.load(heap_addr, size=size, endness=endness)
698
+ annotations = self.heap.load_annotations(heap_addr, size)
692
699
  except SimMemoryMissingError:
693
- return
694
- yield from LiveDefinitions.extract_defs_from_mv(mv)
700
+ return set()
701
+
702
+ return LiveDefinitions.extract_defs_from_annotations(annotations)
695
703
 
696
- def get_memory_definitions(self, addr: int, size: int, endness) -> Iterable[Definition]:
704
+ def get_memory_definitions(self, addr: int, size: int) -> Set[Definition]:
697
705
  try:
698
- values = self.memory.load(addr, size=size, endness=endness)
706
+ annotations = self.memory.load_annotations(addr, size)
699
707
  except SimMemoryMissingError:
700
- return
701
- yield from LiveDefinitions.extract_defs_from_mv(values)
708
+ return set()
709
+
710
+ return LiveDefinitions.extract_defs_from_annotations(annotations)
702
711
 
703
712
  @deprecated("get_definitions")
704
713
  def get_definitions_from_atoms(self, atoms: Iterable[Atom]) -> Iterable[Definition]:
705
714
  result = set()
706
715
  for atom in atoms:
707
- result |= set(self.get_definitions(atom))
716
+ result |= self.get_definitions(atom)
708
717
  return result
709
718
 
710
719
  @deprecated("get_values")
@@ -905,7 +914,7 @@ class LiveDefinitions:
905
914
 
906
915
  def add_memory_use(self, atom: MemoryLocation, code_loc: CodeLocation, expr: Optional[Any] = None) -> None:
907
916
  # get all current definitions
908
- current_defs: Iterable[Definition] = self.get_definitions(atom)
917
+ current_defs: Set[Definition] = self.get_definitions(atom)
909
918
 
910
919
  for current_def in current_defs:
911
920
  self.add_memory_use_by_def(current_def, code_loc, expr=expr)
@@ -1,7 +1,9 @@
1
- from typing import DefaultDict, Optional, List, Set, Tuple, TYPE_CHECKING
1
+ from typing import DefaultDict, Optional, List, Set, Tuple, Dict, TYPE_CHECKING
2
2
 
3
3
  from collections import defaultdict
4
+ from itertools import chain
4
5
 
6
+ from angr.utils.constants import DEFAULT_STATEMENT
5
7
  from angr.knowledge_plugins.key_definitions.atoms import Tmp
6
8
 
7
9
  from .constants import ObservationPointType, OP_BEFORE, OP_AFTER
@@ -15,6 +17,7 @@ LocationType = Tuple[int, Optional[int], Optional[int]] # block addr, block ID,
15
17
  LocationWithPosType = Tuple[
16
18
  int, Optional[int], Optional[int], ObservationPointType
17
19
  ] # block addr, block ID, stmt ID, before/after
20
+ BlockAddrType = Tuple[int, Optional[int]] # block addr, block ID
18
21
 
19
22
 
20
23
  class Liveness:
@@ -25,30 +28,37 @@ class Liveness:
25
28
  def __init__(self):
26
29
  self.curr_live_defs: Set["Definition"] = set()
27
30
  self.curr_loc: Optional[LocationType] = None
28
- self.def_to_liveness: DefaultDict["Definition", Set[LocationType]] = defaultdict(set)
29
- self.loc_to_defs: DefaultDict[LocationWithPosType, Set["Definition"]] = defaultdict(set)
30
- self._node_max_stmt_id: DefaultDict[Tuple[int, Optional[int]], int] = defaultdict(int)
31
-
32
- def add_def(self, d: "Definition", code_loc: "CodeLocation") -> None:
33
- loc = (code_loc.block_addr, code_loc.block_idx, code_loc.stmt_idx)
31
+ self.curr_block: Optional[BlockAddrType] = None
32
+ self.curr_stmt_idx: Optional[int] = None
33
+ self.blockstart_to_defs: DefaultDict[BlockAddrType, Set["Definition"]] = defaultdict(set)
34
+ self.blockend_to_defs: DefaultDict[BlockAddrType, Set["Definition"]] = defaultdict(set)
35
+ self.loc_to_killed_defs: DefaultDict[BlockAddrType, Dict[int, Set["Definition"]]] = defaultdict(dict)
36
+ self.loc_to_added_defs: DefaultDict[BlockAddrType, Dict[int, Set["Definition"]]] = defaultdict(dict)
37
+ self._node_max_stmt_id: DefaultDict[BlockAddrType, int] = defaultdict(int)
38
+
39
+ def add_def(self, d: "Definition") -> None:
34
40
  self.curr_live_defs.add(d)
35
- self.def_to_liveness[d].add(loc)
41
+ if self.curr_stmt_idx not in self.loc_to_added_defs[self.curr_block]:
42
+ self.loc_to_added_defs[self.curr_block][self.curr_stmt_idx] = set()
43
+ self.loc_to_added_defs[self.curr_block][self.curr_stmt_idx].add(d)
36
44
 
37
45
  def kill_def(self, d: "Definition") -> None:
38
46
  self.curr_live_defs.discard(d)
47
+ if self.curr_stmt_idx not in self.loc_to_killed_defs[self.curr_block]:
48
+ self.loc_to_killed_defs[self.curr_block][self.curr_stmt_idx] = set()
49
+ self.loc_to_killed_defs[self.curr_block][self.curr_stmt_idx].add(d)
39
50
 
40
- def complete_loc(self) -> None:
41
- if self.curr_loc is not None:
42
- for live_def in self.curr_live_defs:
43
- self.def_to_liveness[live_def].add(self.curr_loc)
44
- self.loc_to_defs[self.curr_loc + (OP_AFTER,)] |= self.curr_live_defs
51
+ def make_liveness_snapshot(self) -> None:
52
+ if self.curr_block is not None:
53
+ self.blockend_to_defs[self.curr_block] |= self.curr_live_defs
45
54
 
46
55
  def at_new_stmt(self, code_loc: "CodeLocation") -> None:
47
56
  """
48
57
  Only support moving from a statement to the next statement within one basic block.
49
58
  """
50
- self.complete_loc()
51
59
  self.curr_loc = code_loc.block_addr, code_loc.block_idx, code_loc.stmt_idx
60
+ self.curr_block = code_loc.block_addr, code_loc.block_idx
61
+ self.curr_stmt_idx = code_loc.stmt_idx
52
62
  if (
53
63
  code_loc.stmt_idx is not None
54
64
  and code_loc.stmt_idx > self._node_max_stmt_id[(code_loc.block_addr, code_loc.block_idx)]
@@ -59,44 +69,102 @@ class Liveness:
59
69
  """
60
70
  Only support moving to a new block from one or more blocks.
61
71
  """
72
+ self.make_liveness_snapshot()
73
+
62
74
  loc = code_loc.block_addr, code_loc.block_idx, code_loc.stmt_idx
63
- key = code_loc.block_addr, code_loc.block_idx, code_loc.stmt_idx, OP_BEFORE
75
+ key = code_loc.block_addr, code_loc.block_idx
64
76
  for pred_codeloc in pred_codelocs:
65
- if pred_codeloc.stmt_idx is None:
66
- # external code location
67
- pred_max_stmt_id = None
68
- else:
69
- pred_max_stmt_id = self._node_max_stmt_id[(pred_codeloc.block_addr, pred_codeloc.block_idx)]
70
- pred_key = pred_codeloc.block_addr, pred_codeloc.block_idx, pred_max_stmt_id, OP_AFTER
71
- all_pred_defs = self.loc_to_defs[pred_key]
77
+ all_pred_defs = self.blockend_to_defs[pred_codeloc.block_addr, pred_codeloc.block_idx]
72
78
 
73
79
  # remove tmp defs
74
80
  pred_defs = set()
75
81
  for pred_def in all_pred_defs:
76
82
  if not isinstance(pred_def.atom, Tmp):
77
83
  pred_defs.add(pred_def)
78
- for pred_def in pred_defs:
79
- self.def_to_liveness[pred_def].add(loc)
80
- self.loc_to_defs[key] |= pred_defs
84
+ self.blockstart_to_defs[key] |= pred_defs
81
85
 
82
- self.curr_live_defs = set(self.loc_to_defs[key])
86
+ self.curr_live_defs = self.blockstart_to_defs[key].copy()
83
87
  self.curr_loc = loc
88
+ self.curr_stmt_idx = 0
84
89
 
85
90
  def find_defs_at(self, code_loc: "CodeLocation", op: int = OP_BEFORE) -> Set["Definition"]:
86
- if op == OP_BEFORE:
87
- if code_loc.stmt_idx != 0:
88
- loc = code_loc.block_addr, code_loc.block_idx, code_loc.stmt_idx - 1, OP_AFTER
91
+ return self.find_defs_at_raw(code_loc.block_addr, code_loc.block_idx, code_loc.stmt_idx, op=op)
92
+
93
+ def find_defs_at_raw(
94
+ self, block_addr: int, block_idx: Optional[int], stmt_idx: Optional[int], op: int = OP_BEFORE
95
+ ) -> Set["Definition"]:
96
+ block: BlockAddrType = block_addr, block_idx
97
+ if block not in self.blockstart_to_defs:
98
+ defs = set()
99
+ else:
100
+ defs = self.blockstart_to_defs[block].copy()
101
+
102
+ if stmt_idx is None:
103
+ return defs
104
+
105
+ added_defs = self.loc_to_added_defs[block] if block in self.loc_to_added_defs else None
106
+ killed_defs = self.loc_to_killed_defs[block] if block in self.loc_to_added_defs else None
107
+
108
+ if stmt_idx == DEFAULT_STATEMENT:
109
+ end_stmt_idx = self._node_max_stmt_id[block] + 1
110
+ else:
111
+ if op == OP_BEFORE:
112
+ end_stmt_idx = stmt_idx
89
113
  else:
90
- loc = code_loc.block_addr, code_loc.block_idx, 0, OP_BEFORE
114
+ end_stmt_idx = stmt_idx + 1
115
+
116
+ if added_defs is not None and killed_defs is not None:
117
+ indices = chain(added_defs, killed_defs)
118
+ elif added_defs is None and killed_defs is not None:
119
+ indices = killed_defs
120
+ elif added_defs is not None and killed_defs is None:
121
+ indices = added_defs
91
122
  else:
92
- loc = code_loc.block_addr, code_loc.block_idx, code_loc.stmt_idx, OP_AFTER
93
- return set() if loc not in self.loc_to_defs else self.loc_to_defs[loc]
123
+ indices = []
124
+
125
+ tmp_indices = []
126
+ if killed_defs is not None and None in killed_defs:
127
+ # External codeloc
128
+ defs.difference_update(killed_defs[None])
129
+ for idx in indices:
130
+ if idx is not None:
131
+ tmp_indices.append(idx)
132
+ indices = tmp_indices
133
+
134
+ tmp_indices = []
135
+ if added_defs is not None and None in added_defs:
136
+ # External codeloc
137
+ defs.update(added_defs[None])
138
+ for idx in indices:
139
+ if idx is not None:
140
+ tmp_indices.append(idx)
141
+ indices = tmp_indices
142
+
143
+ for idx in sorted(indices):
144
+ if idx >= end_stmt_idx:
145
+ break
146
+ if killed_defs is not None and idx in killed_defs:
147
+ defs.difference_update(killed_defs[idx])
148
+ if added_defs is not None and idx in added_defs:
149
+ defs.update(added_defs[idx])
150
+
151
+ if stmt_idx == DEFAULT_STATEMENT and op == OP_AFTER:
152
+ if killed_defs is not None and DEFAULT_STATEMENT in killed_defs:
153
+ defs.difference_update(killed_defs[DEFAULT_STATEMENT])
154
+ if added_defs is not None and DEFAULT_STATEMENT in added_defs:
155
+ defs.update(added_defs[DEFAULT_STATEMENT])
156
+
157
+ return defs
94
158
 
95
159
  def copy(self) -> "Liveness":
96
160
  o = Liveness()
97
161
  o.curr_live_defs = self.curr_live_defs.copy()
98
162
  o.curr_loc = self.curr_loc
99
- o.def_to_liveness = self.def_to_liveness.copy()
100
- o.loc_to_defs = self.loc_to_defs.copy()
163
+ o.curr_block = self.curr_block
164
+ o.curr_stmt_idx = self.curr_stmt_idx
165
+ o.blockstart_to_defs = self.blockstart_to_defs.copy()
166
+ o.blockend_to_defs = self.blockend_to_defs.copy()
167
+ o.loc_to_added_defs = self.loc_to_added_defs.copy()
168
+ o.loc_to_killed_defs = self.loc_to_killed_defs.copy()
101
169
  o._node_max_stmt_id = self._node_max_stmt_id.copy()
102
170
  return o