PyPI - angr - Versions diffs - 9.2.75__py3-none-manylinux2014_x86_64.whl → 9.2.77__py3-none-manylinux2014_x86_64.whl - Mend

angr 9.2.75__py3-none-manylinux2014_x86_64.whl → 9.2.77__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of angr might be problematic. Click here for more details.

Files changed (50) hide show

angr/__init__.py +1 -1
angr/analyses/cfg/cfg_fast.py +37 -0
angr/analyses/cfg/indirect_jump_resolvers/amd64_pe_iat.py +7 -1
angr/analyses/cfg/indirect_jump_resolvers/x86_pe_iat.py +7 -1
angr/analyses/decompiler/clinic.py +4 -1
angr/analyses/decompiler/condition_processor.py +4 -0
angr/analyses/decompiler/decompiler.py +4 -0
angr/analyses/decompiler/optimization_passes/ite_region_converter.py +4 -3
angr/analyses/decompiler/optimization_passes/multi_simplifier.py +1 -1
angr/analyses/decompiler/structured_codegen/c.py +32 -21
angr/analyses/propagator/engine_ail.py +1 -1
angr/analyses/reaching_definitions/engine_ail.py +3 -6
angr/analyses/reaching_definitions/engine_vex.py +32 -2
angr/analyses/reaching_definitions/function_handler.py +1 -1
angr/analyses/reaching_definitions/rd_initializer.py +6 -6
angr/analyses/reaching_definitions/rd_state.py +9 -11
angr/analyses/typehoon/typevars.py +19 -29
angr/analyses/variable_recovery/irsb_scanner.py +16 -0
angr/analyses/variable_recovery/variable_recovery_fast.py +33 -31
angr/engines/light/engine.py +1 -1
angr/keyed_region.py +19 -3
angr/knowledge_plugins/cfg/cfg_model.py +25 -16
angr/knowledge_plugins/cfg/memory_data.py +1 -1
angr/knowledge_plugins/functions/function.py +8 -0
angr/knowledge_plugins/key_definitions/live_definitions.py +53 -44
angr/knowledge_plugins/key_definitions/liveness.py +102 -34
angr/knowledge_plugins/key_definitions/rd_model.py +4 -4
angr/knowledge_plugins/propagations/states.py +3 -1
angr/knowledge_plugins/variables/variable_manager.py +51 -25
angr/misc/bug_report.py +2 -2
angr/sim_type.py +46 -0
angr/storage/memory_mixins/__init__.py +3 -2
angr/storage/memory_mixins/paged_memory/paged_memory_multivalue_mixin.py +63 -0
angr/storage/memory_mixins/paged_memory/pages/mv_list_page.py +5 -0
{angr-9.2.75.dist-info → angr-9.2.77.dist-info}/METADATA +6 -6
{angr-9.2.75.dist-info → angr-9.2.77.dist-info}/RECORD +50 -49
tests/analyses/cfg/test_cfgfast.py +21 -0
tests/analyses/decompiler/test_decompiler.py +22 -1
tests/analyses/test_flirt.py +3 -1
tests/analyses/test_identifier.py +2 -0
tests/engines/test_unicorn.py +4 -0
tests/exploration_techniques/test_driller_core.py +4 -0
tests/exploration_techniques/test_oppologist.py +2 -0
tests/exploration_techniques/test_tracer.py +9 -0
tests/procedures/libc/test_string.py +2 -1
tests/sim/options/test_0div.py +2 -0
tests/state_plugins/posix/test_files.py +2 -0
{angr-9.2.75.dist-info → angr-9.2.77.dist-info}/LICENSE +0 -0
{angr-9.2.75.dist-info → angr-9.2.77.dist-info}/WHEEL +0 -0
{angr-9.2.75.dist-info → angr-9.2.77.dist-info}/top_level.txt +0 -0

angr/analyses/variable_recovery/variable_recovery_fast.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # pylint:disable=wrong-import-position,wrong-import-order
-from typing import Optional, List, Tuple, Union
+from typing import Optional, List, Tuple, Union, DefaultDict, Set
 import logging
 from collections import defaultdict
@@ -17,7 +17,7 @@ from ...knowledge_plugins import Function
 from ...sim_variable import SimStackVariable, SimRegisterVariable, SimVariable, SimMemoryVariable
 from ...engines.vex.claripy.irop import vexop_to_simop
 from angr.analyses import ForwardAnalysis, visitors
-from ..typehoon.typevars import Equivalence, TypeVariable
+from ..typehoon.typevars import Equivalence, TypeVariable, TypeVariables
 from .variable_recovery_base import VariableRecoveryBase, VariableRecoveryStateBase
 from .engine_vex import SimEngineVRVEX
 from .engine_ail import SimEngineVRAIL
@@ -86,9 +86,9 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
             stack_region=self.stack_region.copy(),
             register_region=self.register_region.copy(),
             global_region=self.global_region.copy(),
-            typevars=self.typevars.copy(),
-            type_constraints=self.type_constraints.copy(),
-            delayed_type_constraints=self.delayed_type_constraints.copy(),
+            typevars=self.typevars,
+            type_constraints=self.type_constraints,
+            delayed_type_constraints=self.delayed_type_constraints,
             stack_offset_typevars=dict(self.stack_offset_typevars),
             project=self.project,
             ret_val_size=self.ret_val_size,
@@ -125,26 +125,17 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
         merged_global_region.set_state(self)
         merge_occurred |= merged_global_region.merge([other.global_region for other in others], None)
-        merged_typevars = self.typevars
-        merged_typeconstraints = self.type_constraints.copy()
-        delayed_typeconstraints = self.delayed_type_constraints.copy().clean()
-        for other in others:
-            merged_typevars = merged_typevars.merge(other.typevars)
-            merged_typeconstraints |= other.type_constraints
-            for v, cons in other.delayed_type_constraints.items():
-                delayed_typeconstraints[v] |= cons
-        merge_occurred |= self.typevars != merged_typevars
-        merge_occurred |= self.type_constraints != merged_typeconstraints
-        merge_occurred |= self.delayed_type_constraints != delayed_typeconstraints
+        typevars = self.typevars
+        type_constraints = self.type_constraints
+        delayed_typeconstraints = self.delayed_type_constraints
         # add subtype constraints for all replacements
         for v0, v1 in self.phi_variables.items():
             # v0 will be replaced by v1
-            if not merged_typevars.has_type_variable_for(v1, None):
-                merged_typevars.add_type_variable(v1, None, TypeVariable())
-            if not merged_typevars.has_type_variable_for(v0, None):
-                merged_typevars.add_type_variable(v0, None, TypeVariable())
+            if not typevars.has_type_variable_for(v1, None):
+                typevars.add_type_variable(v1, None, TypeVariable())
+            if not typevars.has_type_variable_for(v0, None):
+                typevars.add_type_variable(v0, None, TypeVariable())
             # Assuming v2 = phi(v0, v1), then we know that v0_typevar == v1_typevar == v2_typevar
             # However, it's possible that neither v0 nor v1 will ever be used in future blocks, which not only makes
             # this phi function useless, but also leads to the incorrect assumption that v1_typevar == v2_typevar.
@@ -152,9 +143,7 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
             # when v1 (the new variable that will end up in the state) is ever used in the future.
             # create an equivalence relationship
-            equivalence = Equivalence(
-                merged_typevars.get_type_variable(v1, None), merged_typevars.get_type_variable(v0, None)
-            )
+            equivalence = Equivalence(typevars.get_type_variable(v1, None), typevars.get_type_variable(v0, None))
             delayed_typeconstraints[v1].add(equivalence)
         stack_offset_typevars = {}
@@ -173,7 +162,7 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
             else:
                 typevar = TypeVariable()
                 for orig_typevar in all_typevars:
-                    merged_typeconstraints.add(Equivalence(orig_typevar, typevar))
+                    type_constraints.add(Equivalence(orig_typevar, typevar))
             stack_offset_typevars[offset] = typevar
         ret_val_size = self.ret_val_size
@@ -195,8 +184,8 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
             stack_region=merged_stack_region,
             register_region=merged_register_region,
             global_region=merged_global_region,
-            typevars=merged_typevars,
-            type_constraints=merged_typeconstraints,
+            typevars=typevars,
+            type_constraints=type_constraints,
             delayed_type_constraints=delayed_typeconstraints,
             stack_offset_typevars=stack_offset_typevars,
             project=self.project,
@@ -205,6 +194,9 @@ class VariableRecoveryFastState(VariableRecoveryStateBase):
         return state, merge_occurred
+    def downsize(self) -> None:
+        pass
     #
     # Util methods
     #
@@ -277,8 +269,10 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase):  # pylint:dis
         self._node_iterations = defaultdict(int)
         self._node_to_cc = {}
-        self.var_to_typevars = defaultdict(set)
+        self.var_to_typevars: DefaultDict[SimVariable, Set[TypeVariable]] = defaultdict(set)
+        self.typevars = None
         self.type_constraints = None
+        self.delayed_type_constraints = None
         self.ret_val_size = None
         self._analyze()
@@ -293,7 +287,9 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase):  # pylint:dis
     #
     def _pre_analysis(self):
+        self.typevars = TypeVariables()
         self.type_constraints = set()
+        self.delayed_type_constraints = defaultdict(set)
         self.initialize_dominance_frontiers()
@@ -321,6 +317,9 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase):  # pylint:dis
             self.project.arch,
             self.function,
             project=self.project,
+            typevars=self.typevars,
+            type_constraints=self.type_constraints,
+            delayed_type_constraints=self.delayed_type_constraints,
         )
         initial_sp = state.stack_address(self.project.arch.bytes if self.project.arch.call_pushes_ret else 0)
         if self.project.arch.sp_offset is not None:
@@ -434,9 +433,6 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase):  # pylint:dis
         self._process_block(state, block)
         self._node_iterations[block_key] += 1
-        self.type_constraints |= state.type_constraints
-        for var, typevar in state.typevars._typevars.items():
-            self.var_to_typevars[var].add(typevar)
         if state.ret_val_size is not None:
             if self.ret_val_size is None or self.ret_val_size < state.ret_val_size:
@@ -467,6 +463,10 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase):  # pylint:dis
         if self._unify_variables:
             self.variable_manager[self.function.addr].unify_variables()
+        # fill in var_to_typevars
+        for var, typevar_set in self.typevars._typevars.items():
+            self.var_to_typevars[var] = typevar_set
         # unify type variables for global variables
         for var, typevars in self.var_to_typevars.items():
             if len(typevars) > 1 and isinstance(var, SimMemoryVariable) and not isinstance(var, SimStackVariable):
@@ -476,6 +476,8 @@ class VariableRecoveryFast(ForwardAnalysis, VariableRecoveryBase):  # pylint:dis
         self.variable_manager[self.function.addr].ret_val_size = self.ret_val_size
+        self.delayed_type_constraints = None
     #
     # Private methods
     #

angr/engines/light/engine.py CHANGED Viewed

@@ -547,7 +547,7 @@ class SimEngineLightVEXMixin(SimEngineLightMixin):
         to_size = expr_1.size()
         if signed:
             quotient = expr_0.SDiv(claripy.SignExt(from_size - to_size, expr_1))
-            remainder = expr_1.SMod(claripy.SignExt(from_size - to_size, expr_1))
+            remainder = expr_0.SMod(claripy.SignExt(from_size - to_size, expr_1))
             quotient_size = to_size
             remainder_size = to_size
             return claripy.Concat(

angr/keyed_region.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import logging
 import weakref
-from typing import Union, TYPE_CHECKING
+from typing import Union, Optional, Tuple, TYPE_CHECKING
 from sortedcontainers import SortedDict
@@ -133,7 +133,7 @@ class KeyedRegion:
         self._storage, om, self._phi_node_contains = s
         self._object_mapping = weakref.WeakValueDictionary(om)
-    def _get_container(self, offset):
+    def _get_container(self, offset) -> Tuple[int, Optional[RegionObject]]:
         try:
             base_offset = next(self._storage.irange(maximum=offset, reverse=True))
         except StopIteration:
@@ -419,7 +419,23 @@ class KeyedRegion:
         # is there a region item that begins before the start and overlaps with this variable?
         floor_key, floor_item = self._get_container(start)
-        if floor_item is not None and floor_key not in overlapping_items:
+        if floor_item is None:
+            # fast path: just insert it
+            self._storage[start] = RegionObject(start, object_size, {stored_object})
+            return
+        # fast path: if there is a perfect overlap, just update the item
+        if len(overlapping_items) == 1 and floor_item.start == start and floor_item.end == end:
+            if overwrite:
+                floor_item.set_object(stored_object)
+            elif merge_to_top is False and top is None:
+                floor_item.add_object(stored_object)
+            else:
+                self._add_object_with_check(floor_item, stored_object, merge_to_top=merge_to_top, top=top)
+            return
+        # slower path: there are multiple overlapping items
+        if floor_key not in overlapping_items:
             # insert it into the beginning
             overlapping_items.insert(0, floor_key)

angr/knowledge_plugins/cfg/cfg_model.py CHANGED Viewed

@@ -770,23 +770,16 @@ class CFGModel(Serializable):
         if elfheader_sort:
             return elfheader_sort, elfheader_size
+        pointer_size = self.project.arch.bytes
+        # who's using it?
         irsb_addr, stmt_idx = None, None
         if xrefs is not None and seg_list is not None:
             try:
                 ref: "XRef" = next(iter(xrefs.get_xrefs_by_dst(data_addr)))
                 irsb_addr = ref.block_addr
-                stmt_idx = ref.stmt_idx
             except StopIteration:
                 pass
-            if seg_list.is_occupied(data_addr) and seg_list.occupied_by_sort(data_addr) == "code":
-                # it's a code reference
-                # TODO: Further check if it's the beginning of an instruction
-                return MemoryDataSort.CodeReference, 0
-        pointer_size = self.project.arch.bytes
-        # who's using it?
         if irsb_addr is not None and isinstance(self.project.loader.main_object, cle.MetaELF):
             plt_entry = self.project.loader.main_object.reverse_plt.get(irsb_addr, None)
             if plt_entry is not None:
@@ -839,12 +832,13 @@ class CFGModel(Serializable):
                         if running_failures > 3:
                             break
-                if content_holder is not None:
-                    string_data = data[: last_success * 2]
-                    if string_data.endswith(b"\x00\x00"):
-                        string_data = string_data[:-2]
-                    content_holder.append(string_data)
-                return MemoryDataSort.UnicodeString, last_success
+                if last_success > 5:
+                    if content_holder is not None:
+                        string_data = data[: last_success * 2]
+                        if string_data.endswith(b"\x00\x00"):
+                            string_data = string_data[:-2]
+                        content_holder.append(string_data)
+                    return MemoryDataSort.UnicodeString, last_success * 2
         if data:
             try:
@@ -864,6 +858,21 @@ class CFGModel(Serializable):
                     string_len += 1
                 return MemoryDataSort.String, min(string_len, 1024)
+        # is it a code reference?
+        irsb_addr, stmt_idx = None, None
+        if xrefs is not None and seg_list is not None:
+            try:
+                ref: "XRef" = next(iter(xrefs.get_xrefs_by_dst(data_addr)))
+                irsb_addr = ref.block_addr
+                stmt_idx = ref.stmt_idx
+            except StopIteration:
+                pass
+            if seg_list.is_occupied(data_addr) and seg_list.occupied_by_sort(data_addr) == "code":
+                # it's a code reference
+                # TODO: Further check if it's the beginning of an instruction
+                return MemoryDataSort.CodeReference, 0
         if data_type_guessing_handlers:
             for handler in data_type_guessing_handlers:
                 irsb = None if irsb_addr is None else self.get_any_node(irsb_addr).block.vex

angr/knowledge_plugins/cfg/memory_data.py CHANGED Viewed

@@ -122,7 +122,7 @@ class MemoryData(Serializable):
             self.content = loader.memory.load(
                 self.addr, self.reference_size if self.reference_size is not None else self.size
             )
-            while self.content.endswith(b"\x00\x00"):
+            if self.content.endswith(b"\x00\x00"):
                 self.content = self.content[:-2]
         else:
             # FIXME: Other types are not supported yet

angr/knowledge_plugins/functions/function.py CHANGED Viewed

@@ -648,6 +648,14 @@ class Function(Serializable):
         """
         return self.binary.loader.find_symbol(self.addr)
+    @property
+    def pseudocode(self) -> str:
+        """
+        :return: the function's pseudocode
+        """
+        dec = self.project.analyses.Decompiler(self, cfg=self._function_manager._kb.cfgs.get_most_accurate())
+        return dec.codegen.text
     def add_jumpout_site(self, node):
         """
         Add a custom jumpout site.

angr/knowledge_plugins/key_definitions/live_definitions.py CHANGED Viewed

@@ -370,6 +370,14 @@ class LiveDefinitions:
             if isinstance(anno, DefinitionAnnotation):
                 yield anno.definition
+    @staticmethod
+    def extract_defs_from_annotations(annos: Iterable["Annotation"]) -> Set[Definition]:
+        defs = set()
+        for anno in annos:
+            if isinstance(anno, DefinitionAnnotation):
+                defs.add(anno.definition)
+        return defs
     @staticmethod
     def extract_defs_from_mv(mv: MultiValues) -> Generator[Definition, None, None]:
         for vs in mv.values():
@@ -614,64 +622,60 @@ class LiveDefinitions:
     def get_definitions(
         self, thing: Union[Atom, Definition[Atom], Iterable[Atom], Iterable[Definition[Atom]], MultiValues]
-    ) -> Iterable[Definition[Atom]]:
+    ) -> Set[Definition[Atom]]:
         if isinstance(thing, MultiValues):
+            defs = set()
             for vs in thing.values():
                 for v in vs:
-                    for anno in v.annotations:
-                        if isinstance(anno, DefinitionAnnotation):
-                            yield anno.definition
-            return
+                    defs.update(LiveDefinitions.extract_defs_from_annotations(v.annotations))
+            return defs
         elif isinstance(thing, Atom):
             pass
         elif isinstance(thing, Definition):
             thing = thing.atom
         else:
+            defs = set()
             for atom2 in thing:
-                yield from self.get_definitions(atom2)
-            return
+                defs |= self.get_definitions(atom2)
+            return defs
         if isinstance(thing, Register):
-            yield from self.get_register_definitions(thing.reg_offset, thing.size)
+            return self.get_register_definitions(thing.reg_offset, thing.size)
         elif isinstance(thing, MemoryLocation):
             if isinstance(thing.addr, SpOffset):
-                yield from self.get_stack_definitions(thing.addr.offset, thing.size, thing.endness)
+                return self.get_stack_definitions(thing.addr.offset, thing.size)
             elif isinstance(thing.addr, HeapAddress):
-                yield from self.get_heap_definitions(thing.addr.value, size=thing.size, endness=thing.endness)
+                return self.get_heap_definitions(thing.addr.value, size=thing.size)
             elif isinstance(thing.addr, int):
-                yield from self.get_memory_definitions(thing.addr, thing.size, thing.endness)
+                return self.get_memory_definitions(thing.addr, thing.size)
             else:
-                return
+                return set()
         elif isinstance(thing, Tmp):
-            yield from self.get_tmp_definitions(thing.tmp_idx)
+            return self.get_tmp_definitions(thing.tmp_idx)
         else:
+            defs = set()
             for mvs in self.others.get(thing, {}).values():
                 for mv in mvs:
-                    yield from self.get_definitions(mv)
+                    defs |= self.get_definitions(mv)
+            return defs
-    def get_tmp_definitions(self, tmp_idx: int) -> Iterable[Definition]:
+    def get_tmp_definitions(self, tmp_idx: int) -> Set[Definition]:
         if tmp_idx in self.tmps:
-            yield from self.tmps[tmp_idx]
+            return self.tmps[tmp_idx]
         else:
-            return
+            return set()
-    def get_register_definitions(self, reg_offset: int, size: int, endness=None) -> Iterable[Definition]:
+    def get_register_definitions(self, reg_offset: int, size: int) -> Set[Definition]:
         try:
-            values: MultiValues = self.registers.load(
-                reg_offset,
-                size=size,
-                endness=endness,
-            )
+            annotations = self.registers.load_annotations(reg_offset, size)
         except SimMemoryMissingError as ex:
-            # load values and stop at the missing location
             if ex.missing_addr > reg_offset:
-                values: MultiValues = self.registers.load(
-                    reg_offset, size=ex.missing_addr - reg_offset, endness=endness
-                )
+                annotations = self.registers.load_annotations(reg_offset, ex.missing_addr - reg_offset)
             else:
                 # nothing we can do
-                return
-        yield from LiveDefinitions.extract_defs_from_mv(values)
+                return set()
+        return LiveDefinitions.extract_defs_from_annotations(annotations)
     def get_stack_values(self, stack_offset: int, size: int, endness: str) -> Optional[MultiValues]:
         stack_addr = self.stack_offset_to_stack_addr(stack_offset)
@@ -680,31 +684,36 @@ class LiveDefinitions:
         except SimMemoryMissingError:
             return None
-    def get_stack_definitions(self, stack_offset: int, size: int, endness) -> Iterable[Definition]:
-        mv = self.get_stack_values(stack_offset, size, endness)
-        if not mv:
-            return
-        yield from LiveDefinitions.extract_defs_from_mv(mv)
+    def get_stack_definitions(self, stack_offset: int, size: int) -> Set[Definition]:
+        try:
+            stack_addr = self.stack_offset_to_stack_addr(stack_offset)
+            annotations = self.stack.load_annotations(stack_addr, size)
+        except SimMemoryMissingError:
+            return set()
-    def get_heap_definitions(self, heap_addr: int, size: int, endness) -> Iterable[Definition]:
+        return LiveDefinitions.extract_defs_from_annotations(annotations)
+    def get_heap_definitions(self, heap_addr: int, size: int) -> Set[Definition]:
         try:
-            mv: MultiValues = self.heap.load(heap_addr, size=size, endness=endness)
+            annotations = self.heap.load_annotations(heap_addr, size)
         except SimMemoryMissingError:
-            return
-        yield from LiveDefinitions.extract_defs_from_mv(mv)
+            return set()
+        return LiveDefinitions.extract_defs_from_annotations(annotations)
-    def get_memory_definitions(self, addr: int, size: int, endness) -> Iterable[Definition]:
+    def get_memory_definitions(self, addr: int, size: int) -> Set[Definition]:
         try:
-            values = self.memory.load(addr, size=size, endness=endness)
+            annotations = self.memory.load_annotations(addr, size)
         except SimMemoryMissingError:
-            return
-        yield from LiveDefinitions.extract_defs_from_mv(values)
+            return set()
+        return LiveDefinitions.extract_defs_from_annotations(annotations)
     @deprecated("get_definitions")
     def get_definitions_from_atoms(self, atoms: Iterable[Atom]) -> Iterable[Definition]:
         result = set()
         for atom in atoms:
-            result |= set(self.get_definitions(atom))
+            result |= self.get_definitions(atom)
         return result
     @deprecated("get_values")
@@ -905,7 +914,7 @@ class LiveDefinitions:
     def add_memory_use(self, atom: MemoryLocation, code_loc: CodeLocation, expr: Optional[Any] = None) -> None:
         # get all current definitions
-        current_defs: Iterable[Definition] = self.get_definitions(atom)
+        current_defs: Set[Definition] = self.get_definitions(atom)
         for current_def in current_defs:
             self.add_memory_use_by_def(current_def, code_loc, expr=expr)

angr/knowledge_plugins/key_definitions/liveness.py CHANGED Viewed

@@ -1,7 +1,9 @@
-from typing import DefaultDict, Optional, List, Set, Tuple, TYPE_CHECKING
+from typing import DefaultDict, Optional, List, Set, Tuple, Dict, TYPE_CHECKING
 from collections import defaultdict
+from itertools import chain
+from angr.utils.constants import DEFAULT_STATEMENT
 from angr.knowledge_plugins.key_definitions.atoms import Tmp
 from .constants import ObservationPointType, OP_BEFORE, OP_AFTER
@@ -15,6 +17,7 @@ LocationType = Tuple[int, Optional[int], Optional[int]]  # block addr, block ID,
 LocationWithPosType = Tuple[
     int, Optional[int], Optional[int], ObservationPointType
 ]  # block addr, block ID, stmt ID, before/after
+BlockAddrType = Tuple[int, Optional[int]]  # block addr, block ID
 class Liveness:
@@ -25,30 +28,37 @@ class Liveness:
     def __init__(self):
         self.curr_live_defs: Set["Definition"] = set()
         self.curr_loc: Optional[LocationType] = None
-        self.def_to_liveness: DefaultDict["Definition", Set[LocationType]] = defaultdict(set)
-        self.loc_to_defs: DefaultDict[LocationWithPosType, Set["Definition"]] = defaultdict(set)
-        self._node_max_stmt_id: DefaultDict[Tuple[int, Optional[int]], int] = defaultdict(int)
-    def add_def(self, d: "Definition", code_loc: "CodeLocation") -> None:
-        loc = (code_loc.block_addr, code_loc.block_idx, code_loc.stmt_idx)
+        self.curr_block: Optional[BlockAddrType] = None
+        self.curr_stmt_idx: Optional[int] = None
+        self.blockstart_to_defs: DefaultDict[BlockAddrType, Set["Definition"]] = defaultdict(set)
+        self.blockend_to_defs: DefaultDict[BlockAddrType, Set["Definition"]] = defaultdict(set)
+        self.loc_to_killed_defs: DefaultDict[BlockAddrType, Dict[int, Set["Definition"]]] = defaultdict(dict)
+        self.loc_to_added_defs: DefaultDict[BlockAddrType, Dict[int, Set["Definition"]]] = defaultdict(dict)
+        self._node_max_stmt_id: DefaultDict[BlockAddrType, int] = defaultdict(int)
+    def add_def(self, d: "Definition") -> None:
         self.curr_live_defs.add(d)
-        self.def_to_liveness[d].add(loc)
+        if self.curr_stmt_idx not in self.loc_to_added_defs[self.curr_block]:
+            self.loc_to_added_defs[self.curr_block][self.curr_stmt_idx] = set()
+        self.loc_to_added_defs[self.curr_block][self.curr_stmt_idx].add(d)
     def kill_def(self, d: "Definition") -> None:
         self.curr_live_defs.discard(d)
+        if self.curr_stmt_idx not in self.loc_to_killed_defs[self.curr_block]:
+            self.loc_to_killed_defs[self.curr_block][self.curr_stmt_idx] = set()
+        self.loc_to_killed_defs[self.curr_block][self.curr_stmt_idx].add(d)
-    def complete_loc(self) -> None:
-        if self.curr_loc is not None:
-            for live_def in self.curr_live_defs:
-                self.def_to_liveness[live_def].add(self.curr_loc)
-            self.loc_to_defs[self.curr_loc + (OP_AFTER,)] |= self.curr_live_defs
+    def make_liveness_snapshot(self) -> None:
+        if self.curr_block is not None:
+            self.blockend_to_defs[self.curr_block] |= self.curr_live_defs
     def at_new_stmt(self, code_loc: "CodeLocation") -> None:
         """
         Only support moving from a statement to the next statement within one basic block.
         """
-        self.complete_loc()
         self.curr_loc = code_loc.block_addr, code_loc.block_idx, code_loc.stmt_idx
+        self.curr_block = code_loc.block_addr, code_loc.block_idx
+        self.curr_stmt_idx = code_loc.stmt_idx
         if (
             code_loc.stmt_idx is not None
             and code_loc.stmt_idx > self._node_max_stmt_id[(code_loc.block_addr, code_loc.block_idx)]
@@ -59,44 +69,102 @@ class Liveness:
         """
         Only support moving to a new block from one or more blocks.
         """
+        self.make_liveness_snapshot()
         loc = code_loc.block_addr, code_loc.block_idx, code_loc.stmt_idx
-        key = code_loc.block_addr, code_loc.block_idx, code_loc.stmt_idx, OP_BEFORE
+        key = code_loc.block_addr, code_loc.block_idx
         for pred_codeloc in pred_codelocs:
-            if pred_codeloc.stmt_idx is None:
-                # external code location
-                pred_max_stmt_id = None
-            else:
-                pred_max_stmt_id = self._node_max_stmt_id[(pred_codeloc.block_addr, pred_codeloc.block_idx)]
-            pred_key = pred_codeloc.block_addr, pred_codeloc.block_idx, pred_max_stmt_id, OP_AFTER
-            all_pred_defs = self.loc_to_defs[pred_key]
+            all_pred_defs = self.blockend_to_defs[pred_codeloc.block_addr, pred_codeloc.block_idx]
             # remove tmp defs
             pred_defs = set()
             for pred_def in all_pred_defs:
                 if not isinstance(pred_def.atom, Tmp):
                     pred_defs.add(pred_def)
-            for pred_def in pred_defs:
-                self.def_to_liveness[pred_def].add(loc)
-            self.loc_to_defs[key] |= pred_defs
+            self.blockstart_to_defs[key] |= pred_defs
-        self.curr_live_defs = set(self.loc_to_defs[key])
+        self.curr_live_defs = self.blockstart_to_defs[key].copy()
         self.curr_loc = loc
+        self.curr_stmt_idx = 0
     def find_defs_at(self, code_loc: "CodeLocation", op: int = OP_BEFORE) -> Set["Definition"]:
-        if op == OP_BEFORE:
-            if code_loc.stmt_idx != 0:
-                loc = code_loc.block_addr, code_loc.block_idx, code_loc.stmt_idx - 1, OP_AFTER
+        return self.find_defs_at_raw(code_loc.block_addr, code_loc.block_idx, code_loc.stmt_idx, op=op)
+    def find_defs_at_raw(
+        self, block_addr: int, block_idx: Optional[int], stmt_idx: Optional[int], op: int = OP_BEFORE
+    ) -> Set["Definition"]:
+        block: BlockAddrType = block_addr, block_idx
+        if block not in self.blockstart_to_defs:
+            defs = set()
+        else:
+            defs = self.blockstart_to_defs[block].copy()
+        if stmt_idx is None:
+            return defs
+        added_defs = self.loc_to_added_defs[block] if block in self.loc_to_added_defs else None
+        killed_defs = self.loc_to_killed_defs[block] if block in self.loc_to_added_defs else None
+        if stmt_idx == DEFAULT_STATEMENT:
+            end_stmt_idx = self._node_max_stmt_id[block] + 1
+        else:
+            if op == OP_BEFORE:
+                end_stmt_idx = stmt_idx
             else:
-                loc = code_loc.block_addr, code_loc.block_idx, 0, OP_BEFORE
+                end_stmt_idx = stmt_idx + 1
+        if added_defs is not None and killed_defs is not None:
+            indices = chain(added_defs, killed_defs)
+        elif added_defs is None and killed_defs is not None:
+            indices = killed_defs
+        elif added_defs is not None and killed_defs is None:
+            indices = added_defs
         else:
-            loc = code_loc.block_addr, code_loc.block_idx, code_loc.stmt_idx, OP_AFTER
-        return set() if loc not in self.loc_to_defs else self.loc_to_defs[loc]
+            indices = []
+        tmp_indices = []
+        if killed_defs is not None and None in killed_defs:
+            # External codeloc
+            defs.difference_update(killed_defs[None])
+            for idx in indices:
+                if idx is not None:
+                    tmp_indices.append(idx)
+            indices = tmp_indices
+        tmp_indices = []
+        if added_defs is not None and None in added_defs:
+            # External codeloc
+            defs.update(added_defs[None])
+            for idx in indices:
+                if idx is not None:
+                    tmp_indices.append(idx)
+            indices = tmp_indices
+        for idx in sorted(indices):
+            if idx >= end_stmt_idx:
+                break
+            if killed_defs is not None and idx in killed_defs:
+                defs.difference_update(killed_defs[idx])
+            if added_defs is not None and idx in added_defs:
+                defs.update(added_defs[idx])
+        if stmt_idx == DEFAULT_STATEMENT and op == OP_AFTER:
+            if killed_defs is not None and DEFAULT_STATEMENT in killed_defs:
+                defs.difference_update(killed_defs[DEFAULT_STATEMENT])
+            if added_defs is not None and DEFAULT_STATEMENT in added_defs:
+                defs.update(added_defs[DEFAULT_STATEMENT])
+        return defs
     def copy(self) -> "Liveness":
         o = Liveness()
         o.curr_live_defs = self.curr_live_defs.copy()
         o.curr_loc = self.curr_loc
-        o.def_to_liveness = self.def_to_liveness.copy()
-        o.loc_to_defs = self.loc_to_defs.copy()
+        o.curr_block = self.curr_block
+        o.curr_stmt_idx = self.curr_stmt_idx
+        o.blockstart_to_defs = self.blockstart_to_defs.copy()
+        o.blockend_to_defs = self.blockend_to_defs.copy()
+        o.loc_to_added_defs = self.loc_to_added_defs.copy()
+        o.loc_to_killed_defs = self.loc_to_killed_defs.copy()
         o._node_max_stmt_id = self._node_max_stmt_id.copy()
         return o