angr 9.2.75__py3-none-manylinux2014_x86_64.whl → 9.2.77__py3-none-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of angr might be problematic. Click here for more details.
- angr/__init__.py +1 -1
- angr/analyses/cfg/cfg_fast.py +37 -0
- angr/analyses/cfg/indirect_jump_resolvers/amd64_pe_iat.py +7 -1
- angr/analyses/cfg/indirect_jump_resolvers/x86_pe_iat.py +7 -1
- angr/analyses/decompiler/clinic.py +4 -1
- angr/analyses/decompiler/condition_processor.py +4 -0
- angr/analyses/decompiler/decompiler.py +4 -0
- angr/analyses/decompiler/optimization_passes/ite_region_converter.py +4 -3
- angr/analyses/decompiler/optimization_passes/multi_simplifier.py +1 -1
- angr/analyses/decompiler/structured_codegen/c.py +32 -21
- angr/analyses/propagator/engine_ail.py +1 -1
- angr/analyses/reaching_definitions/engine_ail.py +3 -6
- angr/analyses/reaching_definitions/engine_vex.py +32 -2
- angr/analyses/reaching_definitions/function_handler.py +1 -1
- angr/analyses/reaching_definitions/rd_initializer.py +6 -6
- angr/analyses/reaching_definitions/rd_state.py +9 -11
- angr/analyses/typehoon/typevars.py +19 -29
- angr/analyses/variable_recovery/irsb_scanner.py +16 -0
- angr/analyses/variable_recovery/variable_recovery_fast.py +33 -31
- angr/engines/light/engine.py +1 -1
- angr/keyed_region.py +19 -3
- angr/knowledge_plugins/cfg/cfg_model.py +25 -16
- angr/knowledge_plugins/cfg/memory_data.py +1 -1
- angr/knowledge_plugins/functions/function.py +8 -0
- angr/knowledge_plugins/key_definitions/live_definitions.py +53 -44
- angr/knowledge_plugins/key_definitions/liveness.py +102 -34
- angr/knowledge_plugins/key_definitions/rd_model.py +4 -4
- angr/knowledge_plugins/propagations/states.py +3 -1
- angr/knowledge_plugins/variables/variable_manager.py +51 -25
- angr/misc/bug_report.py +2 -2
- angr/sim_type.py +46 -0
- angr/storage/memory_mixins/__init__.py +3 -2
- angr/storage/memory_mixins/paged_memory/paged_memory_multivalue_mixin.py +63 -0
- angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +5 -0
- {angr-9.2.75.dist-info → angr-9.2.77.dist-info}/METADATA +6 -6
- {angr-9.2.75.dist-info → angr-9.2.77.dist-info}/RECORD +50 -49
- tests/analyses/cfg/test_cfgfast.py +21 -0
- tests/analyses/decompiler/test_decompiler.py +22 -1
- tests/analyses/test_flirt.py +3 -1
- tests/analyses/test_identifier.py +2 -0
- tests/engines/test_unicorn.py +4 -0
- tests/exploration_techniques/test_driller_core.py +4 -0
- tests/exploration_techniques/test_oppologist.py +2 -0
- tests/exploration_techniques/test_tracer.py +9 -0
- tests/procedures/libc/test_string.py +2 -1
- tests/sim/options/test_0div.py +2 -0
- tests/state_plugins/posix/test_files.py +2 -0
- {angr-9.2.75.dist-info → angr-9.2.77.dist-info}/LICENSE +0 -0
- {angr-9.2.75.dist-info → angr-9.2.77.dist-info}/WHEEL +0 -0
- {angr-9.2.75.dist-info → angr-9.2.77.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# pylint:disable=wrong-import-position,wrong-import-order
|
|
2
|
-
from typing import Optional, List, Tuple, Union
|
|
2
|
+
from typing import Optional, List, Tuple, Union, DefaultDict, Set
|
|
3
3
|
import logging
|
|
4
4
|
from collections import defaultdict
|
|
5
5
|
|
|
@@ -17,7 +17,7 @@ from ...knowledge_plugins import Function
|
|
|
17
17
|
from ...sim_variable import SimStackVariable, SimRegisterVariable, SimVariable, SimMemoryVariable
|
|
18
18
|
from ...engines.vex.claripy.irop import vexop_to_simop
|
|
19
19
|
from angr.analyses import ForwardAnalysis, visitors
|
|
20
|
-
from ..typehoon.typevars import Equivalence, TypeVariable
|
|
20
|
+
from ..typehoon.typevars import Equivalence, TypeVariable, TypeVariables
|
|
21
21
|
from .variable_recovery_base import VariableRecoveryBase, VariableRecoveryStateBase
|
|
22
22
|
from .engine_vex import SimEngineVRVEX
|
|
23
23
|
from .engine_ail import SimEngineVRAIL
|
|
@@ -86,9 +86,9 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
|
|
|
86
86
|
stack_region=self.stack_region.copy(),
|
|
87
87
|
register_region=self.register_region.copy(),
|
|
88
88
|
global_region=self.global_region.copy(),
|
|
89
|
-
typevars=self.typevars
|
|
90
|
-
type_constraints=self.type_constraints
|
|
91
|
-
delayed_type_constraints=self.delayed_type_constraints
|
|
89
|
+
typevars=self.typevars,
|
|
90
|
+
type_constraints=self.type_constraints,
|
|
91
|
+
delayed_type_constraints=self.delayed_type_constraints,
|
|
92
92
|
stack_offset_typevars=dict(self.stack_offset_typevars),
|
|
93
93
|
project=self.project,
|
|
94
94
|
ret_val_size=self.ret_val_size,
|
|
@@ -125,26 +125,17 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
|
|
|
125
125
|
merged_global_region.set_state(self)
|
|
126
126
|
merge_occurred |= merged_global_region.merge([other.global_region for other in others], None)
|
|
127
127
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
delayed_typeconstraints = self.delayed_type_constraints
|
|
131
|
-
for other in others:
|
|
132
|
-
merged_typevars = merged_typevars.merge(other.typevars)
|
|
133
|
-
merged_typeconstraints |= other.type_constraints
|
|
134
|
-
for v, cons in other.delayed_type_constraints.items():
|
|
135
|
-
delayed_typeconstraints[v] |= cons
|
|
136
|
-
|
|
137
|
-
merge_occurred |= self.typevars != merged_typevars
|
|
138
|
-
merge_occurred |= self.type_constraints != merged_typeconstraints
|
|
139
|
-
merge_occurred |= self.delayed_type_constraints != delayed_typeconstraints
|
|
128
|
+
typevars = self.typevars
|
|
129
|
+
type_constraints = self.type_constraints
|
|
130
|
+
delayed_typeconstraints = self.delayed_type_constraints
|
|
140
131
|
|
|
141
132
|
# add subtype constraints for all replacements
|
|
142
133
|
for v0, v1 in self.phi_variables.items():
|
|
143
134
|
# v0 will be replaced by v1
|
|
144
|
-
if not
|
|
145
|
-
|
|
146
|
-
if not
|
|
147
|
-
|
|
135
|
+
if not typevars.has_type_variable_for(v1, None):
|
|
136
|
+
typevars.add_type_variable(v1, None, TypeVariable())
|
|
137
|
+
if not typevars.has_type_variable_for(v0, None):
|
|
138
|
+
typevars.add_type_variable(v0, None, TypeVariable())
|
|
148
139
|
# Assuming v2 = phi(v0, v1), then we know that v0_typevar == v1_typevar == v2_typevar
|
|
149
140
|
# However, it's possible that neither v0 nor v1 will ever be used in future blocks, which not only makes
|
|
150
141
|
# this phi function useless, but also leads to the incorrect assumption that v1_typevar == v2_typevar.
|
|
@@ -152,9 +143,7 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
|
|
|
152
143
|
# when v1 (the new variable that will end up in the state) is ever used in the future.
|
|
153
144
|
|
|
154
145
|
# create an equivalence relationship
|
|
155
|
-
equivalence = Equivalence(
|
|
156
|
-
merged_typevars.get_type_variable(v1, None), merged_typevars.get_type_variable(v0, None)
|
|
157
|
-
)
|
|
146
|
+
equivalence = Equivalence(typevars.get_type_variable(v1, None), typevars.get_type_variable(v0, None))
|
|
158
147
|
delayed_typeconstraints[v1].add(equivalence)
|
|
159
148
|
|
|
160
149
|
stack_offset_typevars = {}
|
|
@@ -173,7 +162,7 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
|
|
|
173
162
|
else:
|
|
174
163
|
typevar = TypeVariable()
|
|
175
164
|
for orig_typevar in all_typevars:
|
|
176
|
-
|
|
165
|
+
type_constraints.add(Equivalence(orig_typevar, typevar))
|
|
177
166
|
stack_offset_typevars[offset] = typevar
|
|
178
167
|
|
|
179
168
|
ret_val_size = self.ret_val_size
|
|
@@ -195,8 +184,8 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
|
|
|
195
184
|
stack_region=merged_stack_region,
|
|
196
185
|
register_region=merged_register_region,
|
|
197
186
|
global_region=merged_global_region,
|
|
198
|
-
typevars=
|
|
199
|
-
type_constraints=
|
|
187
|
+
typevars=typevars,
|
|
188
|
+
type_constraints=type_constraints,
|
|
200
189
|
delayed_type_constraints=delayed_typeconstraints,
|
|
201
190
|
stack_offset_typevars=stack_offset_typevars,
|
|
202
191
|
project=self.project,
|
|
@@ -205,6 +194,9 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
|
|
|
205
194
|
|
|
206
195
|
return state, merge_occurred
|
|
207
196
|
|
|
197
|
+
def downsize(self) -> None:
|
|
198
|
+
pass
|
|
199
|
+
|
|
208
200
|
#
|
|
209
201
|
# Util methods
|
|
210
202
|
#
|
|
@@ -277,8 +269,10 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
|
|
|
277
269
|
self._node_iterations = defaultdict(int)
|
|
278
270
|
|
|
279
271
|
self._node_to_cc = {}
|
|
280
|
-
self.var_to_typevars = defaultdict(set)
|
|
272
|
+
self.var_to_typevars: DefaultDict[SimVariable, Set[TypeVariable]] = defaultdict(set)
|
|
273
|
+
self.typevars = None
|
|
281
274
|
self.type_constraints = None
|
|
275
|
+
self.delayed_type_constraints = None
|
|
282
276
|
self.ret_val_size = None
|
|
283
277
|
|
|
284
278
|
self._analyze()
|
|
@@ -293,7 +287,9 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
|
|
|
293
287
|
#
|
|
294
288
|
|
|
295
289
|
def _pre_analysis(self):
|
|
290
|
+
self.typevars = TypeVariables()
|
|
296
291
|
self.type_constraints = set()
|
|
292
|
+
self.delayed_type_constraints = defaultdict(set)
|
|
297
293
|
|
|
298
294
|
self.initialize_dominance_frontiers()
|
|
299
295
|
|
|
@@ -321,6 +317,9 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
|
|
|
321
317
|
self.project.arch,
|
|
322
318
|
self.function,
|
|
323
319
|
project=self.project,
|
|
320
|
+
typevars=self.typevars,
|
|
321
|
+
type_constraints=self.type_constraints,
|
|
322
|
+
delayed_type_constraints=self.delayed_type_constraints,
|
|
324
323
|
)
|
|
325
324
|
initial_sp = state.stack_address(self.project.arch.bytes if self.project.arch.call_pushes_ret else 0)
|
|
326
325
|
if self.project.arch.sp_offset is not None:
|
|
@@ -434,9 +433,6 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
|
|
|
434
433
|
self._process_block(state, block)
|
|
435
434
|
|
|
436
435
|
self._node_iterations[block_key] += 1
|
|
437
|
-
self.type_constraints |= state.type_constraints
|
|
438
|
-
for var, typevar in state.typevars._typevars.items():
|
|
439
|
-
self.var_to_typevars[var].add(typevar)
|
|
440
436
|
|
|
441
437
|
if state.ret_val_size is not None:
|
|
442
438
|
if self.ret_val_size is None or self.ret_val_size < state.ret_val_size:
|
|
@@ -467,6 +463,10 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
|
|
|
467
463
|
if self._unify_variables:
|
|
468
464
|
self.variable_manager[self.function.addr].unify_variables()
|
|
469
465
|
|
|
466
|
+
# fill in var_to_typevars
|
|
467
|
+
for var, typevar_set in self.typevars._typevars.items():
|
|
468
|
+
self.var_to_typevars[var] = typevar_set
|
|
469
|
+
|
|
470
470
|
# unify type variables for global variables
|
|
471
471
|
for var, typevars in self.var_to_typevars.items():
|
|
472
472
|
if len(typevars) > 1 and isinstance(var, SimMemoryVariable) and not isinstance(var, SimStackVariable):
|
|
@@ -476,6 +476,8 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase): # pylint:dis
|
|
|
476
476
|
|
|
477
477
|
self.variable_manager[self.function.addr].ret_val_size = self.ret_val_size
|
|
478
478
|
|
|
479
|
+
self.delayed_type_constraints = None
|
|
480
|
+
|
|
479
481
|
#
|
|
480
482
|
# Private methods
|
|
481
483
|
#
|
angr/engines/light/engine.py
CHANGED
|
@@ -547,7 +547,7 @@ class SimEngineLightVEXMixin(SimEngineLightMixin):
|
|
|
547
547
|
to_size = expr_1.size()
|
|
548
548
|
if signed:
|
|
549
549
|
quotient = expr_0.SDiv(claripy.SignExt(from_size - to_size, expr_1))
|
|
550
|
-
remainder =
|
|
550
|
+
remainder = expr_0.SMod(claripy.SignExt(from_size - to_size, expr_1))
|
|
551
551
|
quotient_size = to_size
|
|
552
552
|
remainder_size = to_size
|
|
553
553
|
return claripy.Concat(
|
angr/keyed_region.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import weakref
|
|
3
|
-
from typing import Union, TYPE_CHECKING
|
|
3
|
+
from typing import Union, Optional, Tuple, TYPE_CHECKING
|
|
4
4
|
|
|
5
5
|
from sortedcontainers import SortedDict
|
|
6
6
|
|
|
@@ -133,7 +133,7 @@ class KeyedRegion:
|
|
|
133
133
|
self._storage, om, self._phi_node_contains = s
|
|
134
134
|
self._object_mapping = weakref.WeakValueDictionary(om)
|
|
135
135
|
|
|
136
|
-
def _get_container(self, offset):
|
|
136
|
+
def _get_container(self, offset) -> Tuple[int, Optional[RegionObject]]:
|
|
137
137
|
try:
|
|
138
138
|
base_offset = next(self._storage.irange(maximum=offset, reverse=True))
|
|
139
139
|
except StopIteration:
|
|
@@ -419,7 +419,23 @@ class KeyedRegion:
|
|
|
419
419
|
|
|
420
420
|
# is there a region item that begins before the start and overlaps with this variable?
|
|
421
421
|
floor_key, floor_item = self._get_container(start)
|
|
422
|
-
if floor_item is
|
|
422
|
+
if floor_item is None:
|
|
423
|
+
# fast path: just insert it
|
|
424
|
+
self._storage[start] = RegionObject(start, object_size, {stored_object})
|
|
425
|
+
return
|
|
426
|
+
|
|
427
|
+
# fast path: if there is a perfect overlap, just update the item
|
|
428
|
+
if len(overlapping_items) == 1 and floor_item.start == start and floor_item.end == end:
|
|
429
|
+
if overwrite:
|
|
430
|
+
floor_item.set_object(stored_object)
|
|
431
|
+
elif merge_to_top is False and top is None:
|
|
432
|
+
floor_item.add_object(stored_object)
|
|
433
|
+
else:
|
|
434
|
+
self._add_object_with_check(floor_item, stored_object, merge_to_top=merge_to_top, top=top)
|
|
435
|
+
return
|
|
436
|
+
|
|
437
|
+
# slower path: there are multiple overlapping items
|
|
438
|
+
if floor_key not in overlapping_items:
|
|
423
439
|
# insert it into the beginning
|
|
424
440
|
overlapping_items.insert(0, floor_key)
|
|
425
441
|
|
|
@@ -770,23 +770,16 @@ class CFGModel(Serializable):
|
|
|
770
770
|
if elfheader_sort:
|
|
771
771
|
return elfheader_sort, elfheader_size
|
|
772
772
|
|
|
773
|
+
pointer_size = self.project.arch.bytes
|
|
774
|
+
|
|
775
|
+
# who's using it?
|
|
773
776
|
irsb_addr, stmt_idx = None, None
|
|
774
777
|
if xrefs is not None and seg_list is not None:
|
|
775
778
|
try:
|
|
776
779
|
ref: "XRef" = next(iter(xrefs.get_xrefs_by_dst(data_addr)))
|
|
777
780
|
irsb_addr = ref.block_addr
|
|
778
|
-
stmt_idx = ref.stmt_idx
|
|
779
781
|
except StopIteration:
|
|
780
782
|
pass
|
|
781
|
-
|
|
782
|
-
if seg_list.is_occupied(data_addr) and seg_list.occupied_by_sort(data_addr) == "code":
|
|
783
|
-
# it's a code reference
|
|
784
|
-
# TODO: Further check if it's the beginning of an instruction
|
|
785
|
-
return MemoryDataSort.CodeReference, 0
|
|
786
|
-
|
|
787
|
-
pointer_size = self.project.arch.bytes
|
|
788
|
-
|
|
789
|
-
# who's using it?
|
|
790
783
|
if irsb_addr is not None and isinstance(self.project.loader.main_object, cle.MetaELF):
|
|
791
784
|
plt_entry = self.project.loader.main_object.reverse_plt.get(irsb_addr, None)
|
|
792
785
|
if plt_entry is not None:
|
|
@@ -839,12 +832,13 @@ class CFGModel(Serializable):
|
|
|
839
832
|
if running_failures > 3:
|
|
840
833
|
break
|
|
841
834
|
|
|
842
|
-
if
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
835
|
+
if last_success > 5:
|
|
836
|
+
if content_holder is not None:
|
|
837
|
+
string_data = data[: last_success * 2]
|
|
838
|
+
if string_data.endswith(b"\x00\x00"):
|
|
839
|
+
string_data = string_data[:-2]
|
|
840
|
+
content_holder.append(string_data)
|
|
841
|
+
return MemoryDataSort.UnicodeString, last_success * 2
|
|
848
842
|
|
|
849
843
|
if data:
|
|
850
844
|
try:
|
|
@@ -864,6 +858,21 @@ class CFGModel(Serializable):
|
|
|
864
858
|
string_len += 1
|
|
865
859
|
return MemoryDataSort.String, min(string_len, 1024)
|
|
866
860
|
|
|
861
|
+
# is it a code reference?
|
|
862
|
+
irsb_addr, stmt_idx = None, None
|
|
863
|
+
if xrefs is not None and seg_list is not None:
|
|
864
|
+
try:
|
|
865
|
+
ref: "XRef" = next(iter(xrefs.get_xrefs_by_dst(data_addr)))
|
|
866
|
+
irsb_addr = ref.block_addr
|
|
867
|
+
stmt_idx = ref.stmt_idx
|
|
868
|
+
except StopIteration:
|
|
869
|
+
pass
|
|
870
|
+
|
|
871
|
+
if seg_list.is_occupied(data_addr) and seg_list.occupied_by_sort(data_addr) == "code":
|
|
872
|
+
# it's a code reference
|
|
873
|
+
# TODO: Further check if it's the beginning of an instruction
|
|
874
|
+
return MemoryDataSort.CodeReference, 0
|
|
875
|
+
|
|
867
876
|
if data_type_guessing_handlers:
|
|
868
877
|
for handler in data_type_guessing_handlers:
|
|
869
878
|
irsb = None if irsb_addr is None else self.get_any_node(irsb_addr).block.vex
|
|
@@ -122,7 +122,7 @@ class MemoryData(Serializable):
|
|
|
122
122
|
self.content = loader.memory.load(
|
|
123
123
|
self.addr, self.reference_size if self.reference_size is not None else self.size
|
|
124
124
|
)
|
|
125
|
-
|
|
125
|
+
if self.content.endswith(b"\x00\x00"):
|
|
126
126
|
self.content = self.content[:-2]
|
|
127
127
|
else:
|
|
128
128
|
# FIXME: Other types are not supported yet
|
|
@@ -648,6 +648,14 @@ class Function(Serializable):
|
|
|
648
648
|
"""
|
|
649
649
|
return self.binary.loader.find_symbol(self.addr)
|
|
650
650
|
|
|
651
|
+
@property
|
|
652
|
+
def pseudocode(self) -> str:
|
|
653
|
+
"""
|
|
654
|
+
:return: the function's pseudocode
|
|
655
|
+
"""
|
|
656
|
+
dec = self.project.analyses.Decompiler(self, cfg=self._function_manager._kb.cfgs.get_most_accurate())
|
|
657
|
+
return dec.codegen.text
|
|
658
|
+
|
|
651
659
|
def add_jumpout_site(self, node):
|
|
652
660
|
"""
|
|
653
661
|
Add a custom jumpout site.
|
|
@@ -370,6 +370,14 @@ class LiveDefinitions:
|
|
|
370
370
|
if isinstance(anno, DefinitionAnnotation):
|
|
371
371
|
yield anno.definition
|
|
372
372
|
|
|
373
|
+
@staticmethod
|
|
374
|
+
def extract_defs_from_annotations(annos: Iterable["Annotation"]) -> Set[Definition]:
|
|
375
|
+
defs = set()
|
|
376
|
+
for anno in annos:
|
|
377
|
+
if isinstance(anno, DefinitionAnnotation):
|
|
378
|
+
defs.add(anno.definition)
|
|
379
|
+
return defs
|
|
380
|
+
|
|
373
381
|
@staticmethod
|
|
374
382
|
def extract_defs_from_mv(mv: MultiValues) -> Generator[Definition, None, None]:
|
|
375
383
|
for vs in mv.values():
|
|
@@ -614,64 +622,60 @@ class LiveDefinitions:
|
|
|
614
622
|
|
|
615
623
|
def get_definitions(
|
|
616
624
|
self, thing: Union[Atom, Definition[Atom], Iterable[Atom], Iterable[Definition[Atom]], MultiValues]
|
|
617
|
-
) ->
|
|
625
|
+
) -> Set[Definition[Atom]]:
|
|
618
626
|
if isinstance(thing, MultiValues):
|
|
627
|
+
defs = set()
|
|
619
628
|
for vs in thing.values():
|
|
620
629
|
for v in vs:
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
yield anno.definition
|
|
624
|
-
return
|
|
630
|
+
defs.update(LiveDefinitions.extract_defs_from_annotations(v.annotations))
|
|
631
|
+
return defs
|
|
625
632
|
elif isinstance(thing, Atom):
|
|
626
633
|
pass
|
|
627
634
|
elif isinstance(thing, Definition):
|
|
628
635
|
thing = thing.atom
|
|
629
636
|
else:
|
|
637
|
+
defs = set()
|
|
630
638
|
for atom2 in thing:
|
|
631
|
-
|
|
632
|
-
return
|
|
639
|
+
defs |= self.get_definitions(atom2)
|
|
640
|
+
return defs
|
|
633
641
|
|
|
634
642
|
if isinstance(thing, Register):
|
|
635
|
-
|
|
643
|
+
return self.get_register_definitions(thing.reg_offset, thing.size)
|
|
636
644
|
elif isinstance(thing, MemoryLocation):
|
|
637
645
|
if isinstance(thing.addr, SpOffset):
|
|
638
|
-
|
|
646
|
+
return self.get_stack_definitions(thing.addr.offset, thing.size)
|
|
639
647
|
elif isinstance(thing.addr, HeapAddress):
|
|
640
|
-
|
|
648
|
+
return self.get_heap_definitions(thing.addr.value, size=thing.size)
|
|
641
649
|
elif isinstance(thing.addr, int):
|
|
642
|
-
|
|
650
|
+
return self.get_memory_definitions(thing.addr, thing.size)
|
|
643
651
|
else:
|
|
644
|
-
return
|
|
652
|
+
return set()
|
|
645
653
|
elif isinstance(thing, Tmp):
|
|
646
|
-
|
|
654
|
+
return self.get_tmp_definitions(thing.tmp_idx)
|
|
647
655
|
else:
|
|
656
|
+
defs = set()
|
|
648
657
|
for mvs in self.others.get(thing, {}).values():
|
|
649
658
|
for mv in mvs:
|
|
650
|
-
|
|
659
|
+
defs |= self.get_definitions(mv)
|
|
660
|
+
return defs
|
|
651
661
|
|
|
652
|
-
def get_tmp_definitions(self, tmp_idx: int) ->
|
|
662
|
+
def get_tmp_definitions(self, tmp_idx: int) -> Set[Definition]:
|
|
653
663
|
if tmp_idx in self.tmps:
|
|
654
|
-
|
|
664
|
+
return self.tmps[tmp_idx]
|
|
655
665
|
else:
|
|
656
|
-
return
|
|
666
|
+
return set()
|
|
657
667
|
|
|
658
|
-
def get_register_definitions(self, reg_offset: int, size: int
|
|
668
|
+
def get_register_definitions(self, reg_offset: int, size: int) -> Set[Definition]:
|
|
659
669
|
try:
|
|
660
|
-
|
|
661
|
-
reg_offset,
|
|
662
|
-
size=size,
|
|
663
|
-
endness=endness,
|
|
664
|
-
)
|
|
670
|
+
annotations = self.registers.load_annotations(reg_offset, size)
|
|
665
671
|
except SimMemoryMissingError as ex:
|
|
666
|
-
# load values and stop at the missing location
|
|
667
672
|
if ex.missing_addr > reg_offset:
|
|
668
|
-
|
|
669
|
-
reg_offset, size=ex.missing_addr - reg_offset, endness=endness
|
|
670
|
-
)
|
|
673
|
+
annotations = self.registers.load_annotations(reg_offset, ex.missing_addr - reg_offset)
|
|
671
674
|
else:
|
|
672
675
|
# nothing we can do
|
|
673
|
-
return
|
|
674
|
-
|
|
676
|
+
return set()
|
|
677
|
+
|
|
678
|
+
return LiveDefinitions.extract_defs_from_annotations(annotations)
|
|
675
679
|
|
|
676
680
|
def get_stack_values(self, stack_offset: int, size: int, endness: str) -> Optional[MultiValues]:
|
|
677
681
|
stack_addr = self.stack_offset_to_stack_addr(stack_offset)
|
|
@@ -680,31 +684,36 @@ class LiveDefinitions:
|
|
|
680
684
|
except SimMemoryMissingError:
|
|
681
685
|
return None
|
|
682
686
|
|
|
683
|
-
def get_stack_definitions(self, stack_offset: int, size: int
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
687
|
+
def get_stack_definitions(self, stack_offset: int, size: int) -> Set[Definition]:
|
|
688
|
+
try:
|
|
689
|
+
stack_addr = self.stack_offset_to_stack_addr(stack_offset)
|
|
690
|
+
annotations = self.stack.load_annotations(stack_addr, size)
|
|
691
|
+
except SimMemoryMissingError:
|
|
692
|
+
return set()
|
|
688
693
|
|
|
689
|
-
|
|
694
|
+
return LiveDefinitions.extract_defs_from_annotations(annotations)
|
|
695
|
+
|
|
696
|
+
def get_heap_definitions(self, heap_addr: int, size: int) -> Set[Definition]:
|
|
690
697
|
try:
|
|
691
|
-
|
|
698
|
+
annotations = self.heap.load_annotations(heap_addr, size)
|
|
692
699
|
except SimMemoryMissingError:
|
|
693
|
-
return
|
|
694
|
-
|
|
700
|
+
return set()
|
|
701
|
+
|
|
702
|
+
return LiveDefinitions.extract_defs_from_annotations(annotations)
|
|
695
703
|
|
|
696
|
-
def get_memory_definitions(self, addr: int, size: int
|
|
704
|
+
def get_memory_definitions(self, addr: int, size: int) -> Set[Definition]:
|
|
697
705
|
try:
|
|
698
|
-
|
|
706
|
+
annotations = self.memory.load_annotations(addr, size)
|
|
699
707
|
except SimMemoryMissingError:
|
|
700
|
-
return
|
|
701
|
-
|
|
708
|
+
return set()
|
|
709
|
+
|
|
710
|
+
return LiveDefinitions.extract_defs_from_annotations(annotations)
|
|
702
711
|
|
|
703
712
|
@deprecated("get_definitions")
|
|
704
713
|
def get_definitions_from_atoms(self, atoms: Iterable[Atom]) -> Iterable[Definition]:
|
|
705
714
|
result = set()
|
|
706
715
|
for atom in atoms:
|
|
707
|
-
result |=
|
|
716
|
+
result |= self.get_definitions(atom)
|
|
708
717
|
return result
|
|
709
718
|
|
|
710
719
|
@deprecated("get_values")
|
|
@@ -905,7 +914,7 @@ class LiveDefinitions:
|
|
|
905
914
|
|
|
906
915
|
def add_memory_use(self, atom: MemoryLocation, code_loc: CodeLocation, expr: Optional[Any] = None) -> None:
|
|
907
916
|
# get all current definitions
|
|
908
|
-
current_defs:
|
|
917
|
+
current_defs: Set[Definition] = self.get_definitions(atom)
|
|
909
918
|
|
|
910
919
|
for current_def in current_defs:
|
|
911
920
|
self.add_memory_use_by_def(current_def, code_loc, expr=expr)
|
|
@@ -1,7 +1,9 @@
|
|
|
1
|
-
from typing import DefaultDict, Optional, List, Set, Tuple, TYPE_CHECKING
|
|
1
|
+
from typing import DefaultDict, Optional, List, Set, Tuple, Dict, TYPE_CHECKING
|
|
2
2
|
|
|
3
3
|
from collections import defaultdict
|
|
4
|
+
from itertools import chain
|
|
4
5
|
|
|
6
|
+
from angr.utils.constants import DEFAULT_STATEMENT
|
|
5
7
|
from angr.knowledge_plugins.key_definitions.atoms import Tmp
|
|
6
8
|
|
|
7
9
|
from .constants import ObservationPointType, OP_BEFORE, OP_AFTER
|
|
@@ -15,6 +17,7 @@ LocationType = Tuple[int, Optional[int], Optional[int]] # block addr, block ID,
|
|
|
15
17
|
LocationWithPosType = Tuple[
|
|
16
18
|
int, Optional[int], Optional[int], ObservationPointType
|
|
17
19
|
] # block addr, block ID, stmt ID, before/after
|
|
20
|
+
BlockAddrType = Tuple[int, Optional[int]] # block addr, block ID
|
|
18
21
|
|
|
19
22
|
|
|
20
23
|
class Liveness:
|
|
@@ -25,30 +28,37 @@ class Liveness:
|
|
|
25
28
|
def __init__(self):
|
|
26
29
|
self.curr_live_defs: Set["Definition"] = set()
|
|
27
30
|
self.curr_loc: Optional[LocationType] = None
|
|
28
|
-
self.
|
|
29
|
-
self.
|
|
30
|
-
self.
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
31
|
+
self.curr_block: Optional[BlockAddrType] = None
|
|
32
|
+
self.curr_stmt_idx: Optional[int] = None
|
|
33
|
+
self.blockstart_to_defs: DefaultDict[BlockAddrType, Set["Definition"]] = defaultdict(set)
|
|
34
|
+
self.blockend_to_defs: DefaultDict[BlockAddrType, Set["Definition"]] = defaultdict(set)
|
|
35
|
+
self.loc_to_killed_defs: DefaultDict[BlockAddrType, Dict[int, Set["Definition"]]] = defaultdict(dict)
|
|
36
|
+
self.loc_to_added_defs: DefaultDict[BlockAddrType, Dict[int, Set["Definition"]]] = defaultdict(dict)
|
|
37
|
+
self._node_max_stmt_id: DefaultDict[BlockAddrType, int] = defaultdict(int)
|
|
38
|
+
|
|
39
|
+
def add_def(self, d: "Definition") -> None:
|
|
34
40
|
self.curr_live_defs.add(d)
|
|
35
|
-
self.
|
|
41
|
+
if self.curr_stmt_idx not in self.loc_to_added_defs[self.curr_block]:
|
|
42
|
+
self.loc_to_added_defs[self.curr_block][self.curr_stmt_idx] = set()
|
|
43
|
+
self.loc_to_added_defs[self.curr_block][self.curr_stmt_idx].add(d)
|
|
36
44
|
|
|
37
45
|
def kill_def(self, d: "Definition") -> None:
|
|
38
46
|
self.curr_live_defs.discard(d)
|
|
47
|
+
if self.curr_stmt_idx not in self.loc_to_killed_defs[self.curr_block]:
|
|
48
|
+
self.loc_to_killed_defs[self.curr_block][self.curr_stmt_idx] = set()
|
|
49
|
+
self.loc_to_killed_defs[self.curr_block][self.curr_stmt_idx].add(d)
|
|
39
50
|
|
|
40
|
-
def
|
|
41
|
-
if self.
|
|
42
|
-
|
|
43
|
-
self.def_to_liveness[live_def].add(self.curr_loc)
|
|
44
|
-
self.loc_to_defs[self.curr_loc + (OP_AFTER,)] |= self.curr_live_defs
|
|
51
|
+
def make_liveness_snapshot(self) -> None:
|
|
52
|
+
if self.curr_block is not None:
|
|
53
|
+
self.blockend_to_defs[self.curr_block] |= self.curr_live_defs
|
|
45
54
|
|
|
46
55
|
def at_new_stmt(self, code_loc: "CodeLocation") -> None:
|
|
47
56
|
"""
|
|
48
57
|
Only support moving from a statement to the next statement within one basic block.
|
|
49
58
|
"""
|
|
50
|
-
self.complete_loc()
|
|
51
59
|
self.curr_loc = code_loc.block_addr, code_loc.block_idx, code_loc.stmt_idx
|
|
60
|
+
self.curr_block = code_loc.block_addr, code_loc.block_idx
|
|
61
|
+
self.curr_stmt_idx = code_loc.stmt_idx
|
|
52
62
|
if (
|
|
53
63
|
code_loc.stmt_idx is not None
|
|
54
64
|
and code_loc.stmt_idx > self._node_max_stmt_id[(code_loc.block_addr, code_loc.block_idx)]
|
|
@@ -59,44 +69,102 @@ class Liveness:
|
|
|
59
69
|
"""
|
|
60
70
|
Only support moving to a new block from one or more blocks.
|
|
61
71
|
"""
|
|
72
|
+
self.make_liveness_snapshot()
|
|
73
|
+
|
|
62
74
|
loc = code_loc.block_addr, code_loc.block_idx, code_loc.stmt_idx
|
|
63
|
-
key = code_loc.block_addr, code_loc.block_idx
|
|
75
|
+
key = code_loc.block_addr, code_loc.block_idx
|
|
64
76
|
for pred_codeloc in pred_codelocs:
|
|
65
|
-
|
|
66
|
-
# external code location
|
|
67
|
-
pred_max_stmt_id = None
|
|
68
|
-
else:
|
|
69
|
-
pred_max_stmt_id = self._node_max_stmt_id[(pred_codeloc.block_addr, pred_codeloc.block_idx)]
|
|
70
|
-
pred_key = pred_codeloc.block_addr, pred_codeloc.block_idx, pred_max_stmt_id, OP_AFTER
|
|
71
|
-
all_pred_defs = self.loc_to_defs[pred_key]
|
|
77
|
+
all_pred_defs = self.blockend_to_defs[pred_codeloc.block_addr, pred_codeloc.block_idx]
|
|
72
78
|
|
|
73
79
|
# remove tmp defs
|
|
74
80
|
pred_defs = set()
|
|
75
81
|
for pred_def in all_pred_defs:
|
|
76
82
|
if not isinstance(pred_def.atom, Tmp):
|
|
77
83
|
pred_defs.add(pred_def)
|
|
78
|
-
|
|
79
|
-
self.def_to_liveness[pred_def].add(loc)
|
|
80
|
-
self.loc_to_defs[key] |= pred_defs
|
|
84
|
+
self.blockstart_to_defs[key] |= pred_defs
|
|
81
85
|
|
|
82
|
-
self.curr_live_defs =
|
|
86
|
+
self.curr_live_defs = self.blockstart_to_defs[key].copy()
|
|
83
87
|
self.curr_loc = loc
|
|
88
|
+
self.curr_stmt_idx = 0
|
|
84
89
|
|
|
85
90
|
def find_defs_at(self, code_loc: "CodeLocation", op: int = OP_BEFORE) -> Set["Definition"]:
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
91
|
+
return self.find_defs_at_raw(code_loc.block_addr, code_loc.block_idx, code_loc.stmt_idx, op=op)
|
|
92
|
+
|
|
93
|
+
def find_defs_at_raw(
|
|
94
|
+
self, block_addr: int, block_idx: Optional[int], stmt_idx: Optional[int], op: int = OP_BEFORE
|
|
95
|
+
) -> Set["Definition"]:
|
|
96
|
+
block: BlockAddrType = block_addr, block_idx
|
|
97
|
+
if block not in self.blockstart_to_defs:
|
|
98
|
+
defs = set()
|
|
99
|
+
else:
|
|
100
|
+
defs = self.blockstart_to_defs[block].copy()
|
|
101
|
+
|
|
102
|
+
if stmt_idx is None:
|
|
103
|
+
return defs
|
|
104
|
+
|
|
105
|
+
added_defs = self.loc_to_added_defs[block] if block in self.loc_to_added_defs else None
|
|
106
|
+
killed_defs = self.loc_to_killed_defs[block] if block in self.loc_to_added_defs else None
|
|
107
|
+
|
|
108
|
+
if stmt_idx == DEFAULT_STATEMENT:
|
|
109
|
+
end_stmt_idx = self._node_max_stmt_id[block] + 1
|
|
110
|
+
else:
|
|
111
|
+
if op == OP_BEFORE:
|
|
112
|
+
end_stmt_idx = stmt_idx
|
|
89
113
|
else:
|
|
90
|
-
|
|
114
|
+
end_stmt_idx = stmt_idx + 1
|
|
115
|
+
|
|
116
|
+
if added_defs is not None and killed_defs is not None:
|
|
117
|
+
indices = chain(added_defs, killed_defs)
|
|
118
|
+
elif added_defs is None and killed_defs is not None:
|
|
119
|
+
indices = killed_defs
|
|
120
|
+
elif added_defs is not None and killed_defs is None:
|
|
121
|
+
indices = added_defs
|
|
91
122
|
else:
|
|
92
|
-
|
|
93
|
-
|
|
123
|
+
indices = []
|
|
124
|
+
|
|
125
|
+
tmp_indices = []
|
|
126
|
+
if killed_defs is not None and None in killed_defs:
|
|
127
|
+
# External codeloc
|
|
128
|
+
defs.difference_update(killed_defs[None])
|
|
129
|
+
for idx in indices:
|
|
130
|
+
if idx is not None:
|
|
131
|
+
tmp_indices.append(idx)
|
|
132
|
+
indices = tmp_indices
|
|
133
|
+
|
|
134
|
+
tmp_indices = []
|
|
135
|
+
if added_defs is not None and None in added_defs:
|
|
136
|
+
# External codeloc
|
|
137
|
+
defs.update(added_defs[None])
|
|
138
|
+
for idx in indices:
|
|
139
|
+
if idx is not None:
|
|
140
|
+
tmp_indices.append(idx)
|
|
141
|
+
indices = tmp_indices
|
|
142
|
+
|
|
143
|
+
for idx in sorted(indices):
|
|
144
|
+
if idx >= end_stmt_idx:
|
|
145
|
+
break
|
|
146
|
+
if killed_defs is not None and idx in killed_defs:
|
|
147
|
+
defs.difference_update(killed_defs[idx])
|
|
148
|
+
if added_defs is not None and idx in added_defs:
|
|
149
|
+
defs.update(added_defs[idx])
|
|
150
|
+
|
|
151
|
+
if stmt_idx == DEFAULT_STATEMENT and op == OP_AFTER:
|
|
152
|
+
if killed_defs is not None and DEFAULT_STATEMENT in killed_defs:
|
|
153
|
+
defs.difference_update(killed_defs[DEFAULT_STATEMENT])
|
|
154
|
+
if added_defs is not None and DEFAULT_STATEMENT in added_defs:
|
|
155
|
+
defs.update(added_defs[DEFAULT_STATEMENT])
|
|
156
|
+
|
|
157
|
+
return defs
|
|
94
158
|
|
|
95
159
|
def copy(self) -> "Liveness":
|
|
96
160
|
o = Liveness()
|
|
97
161
|
o.curr_live_defs = self.curr_live_defs.copy()
|
|
98
162
|
o.curr_loc = self.curr_loc
|
|
99
|
-
o.
|
|
100
|
-
o.
|
|
163
|
+
o.curr_block = self.curr_block
|
|
164
|
+
o.curr_stmt_idx = self.curr_stmt_idx
|
|
165
|
+
o.blockstart_to_defs = self.blockstart_to_defs.copy()
|
|
166
|
+
o.blockend_to_defs = self.blockend_to_defs.copy()
|
|
167
|
+
o.loc_to_added_defs = self.loc_to_added_defs.copy()
|
|
168
|
+
o.loc_to_killed_defs = self.loc_to_killed_defs.copy()
|
|
101
169
|
o._node_max_stmt_id = self._node_max_stmt_id.copy()
|
|
102
170
|
return o
|