PyPI - PyNerva - Versions diffs - 0.0.7__py3-none-any.whl - Mend

PyNerva 0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

nervapy/__init__.py +50 -0
nervapy/abi.py +91 -0
nervapy/arm/__init__.py +124 -0
nervapy/arm/__main__.py +0 -0
nervapy/arm/abi.py +138 -0
nervapy/arm/formats.py +49 -0
nervapy/arm/function.py +2465 -0
nervapy/arm/generic.py +10796 -0
nervapy/arm/instructions.py +519 -0
nervapy/arm/isa.py +409 -0
nervapy/arm/literal_pool.py +331 -0
nervapy/arm/microarchitecture.py +211 -0
nervapy/arm/pseudo.py +652 -0
nervapy/arm/registers.py +1458 -0
nervapy/arm/vfpneon.py +4092 -0
nervapy/arm.py +13 -0
nervapy/c/__init__.py +1 -0
nervapy/c/types.py +436 -0
nervapy/codegen.py +99 -0
nervapy/common/__init__.py +4 -0
nervapy/common/function.py +5 -0
nervapy/common/regalloc.py +121 -0
nervapy/constant_data.py +282 -0
nervapy/encoder.py +246 -0
nervapy/formats/__init__.py +2 -0
nervapy/formats/elf/__init__.py +4 -0
nervapy/formats/elf/file.py +178 -0
nervapy/formats/elf/image.py +106 -0
nervapy/formats/elf/section.py +422 -0
nervapy/formats/elf/symbol.py +281 -0
nervapy/formats/macho/__init__.py +2 -0
nervapy/formats/macho/file.py +123 -0
nervapy/formats/macho/image.py +143 -0
nervapy/formats/macho/section.py +322 -0
nervapy/formats/macho/symbol.py +158 -0
nervapy/formats/mscoff/__init__.py +8 -0
nervapy/formats/mscoff/image.py +132 -0
nervapy/formats/mscoff/section.py +181 -0
nervapy/formats/mscoff/symbol.py +148 -0
nervapy/function.py +136 -0
nervapy/literal.py +731 -0
nervapy/loader.py +188 -0
nervapy/name.py +159 -0
nervapy/parse.py +52 -0
nervapy/stream.py +58 -0
nervapy/util.py +126 -0
nervapy/writer.py +518 -0
nervapy/x86_64/__init__.py +324 -0
nervapy/x86_64/__main__.py +407 -0
nervapy/x86_64/abi.py +517 -0
nervapy/x86_64/amd.py +6464 -0
nervapy/x86_64/avx.py +102029 -0
nervapy/x86_64/crypto.py +1533 -0
nervapy/x86_64/encoding.py +424 -0
nervapy/x86_64/fma.py +19138 -0
nervapy/x86_64/function.py +2707 -0
nervapy/x86_64/generic.py +23384 -0
nervapy/x86_64/instructions.py +500 -0
nervapy/x86_64/isa.py +476 -0
nervapy/x86_64/lower.py +126 -0
nervapy/x86_64/mask.py +2593 -0
nervapy/x86_64/meta.py +143 -0
nervapy/x86_64/mmxsse.py +17265 -0
nervapy/x86_64/nacl.py +327 -0
nervapy/x86_64/operand.py +1204 -0
nervapy/x86_64/options.py +21 -0
nervapy/x86_64/pseudo.py +686 -0
nervapy/x86_64/registers.py +1225 -0
nervapy/x86_64/types.py +17 -0
nervapy/x86_64/uarch.py +580 -0
pynerva-0.0.7.dist-info/METADATA +310 -0
pynerva-0.0.7.dist-info/RECORD +74 -0
pynerva-0.0.7.dist-info/WHEEL +4 -0
pynerva-0.0.7.dist-info/licenses/LICENSE.rst +15 -0

nervapy/x86_64/function.py ADDED Viewed

@@ -0,0 +1,2707 @@
+# This file is part of PeachPy package and is licensed under the Simplified BSD license.
+#    See license.rst for the full text of the license.
+from __future__ import print_function
+import bisect
+import collections
+import operator
+import os
+import six
+import nervapy
+import nervapy.name
+import nervapy.writer
+import nervapy.x86_64.avx
+import nervapy.x86_64.instructions
+import nervapy.x86_64.meta
+import nervapy.x86_64.options
+import nervapy.x86_64.registers
+class Function:
+    """Generalized x86-64 assembly function.
+    A function consists of C signature and a list of instructions.
+    On this level the function is supposed to be compatible with multiple ABIs. In particular, instructions may have
+    virtual registers, and instruction stream may contain pseudo-instructions, such as LOAD.ARGUMENT or RETURN.
+    """
+    def __init__(
+        self,
+        name,
+        arguments,
+        result_type=None,
+        package=None,
+        target=None,
+        debug_level=None,
+    ):
+        """
+        :param str name: name of the function without mangling (as in C language).
+        :param tuple arguments: a tuple of :class:`nervapy.Argument` objects.
+        :param Type result_type: the return type of the function. None if the function returns no value (void function).
+        :param str package: the name of the Go package containing this function.
+        :param Microarchitecture target: the target microarchitecture for this function.
+        :param int debug_level: the verbosity level for debug information collected for instructions. 0 means no
+            debug information, 1 and above enables information about the lines of Python code that originated an
+            instruction. Collecting debug information increases processing time by several times.
+        :ivar Label entry: a label that marks the entry point of the function. A user can place the entry point in any
+            place in the function by defining this label with LABEL pseudo-instruction. If this label is not defined
+            by the user, it will be placed automatically before the first instruction of the function.
+        :ivar int _indent_level: the level of indentation for this instruction in assembly listings. Indentation level
+            is changed by Loop statements.
+        :ivar list _instructions: the list of :class:`Instruction` objects that comprise the function code.
+        :ivar set _label_names: a set of string names of LABEL quasi-instructions in the function. The set is populated
+            as instructions are added and is intended to track duplicate labels.
+        :ivar dict _named_constants: a dictionary that maps names of literal constants to Constant objects.
+            As instructions are added the dictionary is used to track constants with same names, but different content.
+        """
+        self.name = name
+        self.arguments = arguments
+        self.result_type = result_type
+        if package is None:
+            self.package = nervapy.x86_64.options.package
+        self.package = package
+        if target is None:
+            target = nervapy.x86_64.options.target
+        if target is None:
+            target = nervapy.x86_64.uarch.default
+        if not isinstance(target, nervapy.x86_64.uarch.Microarchitecture):
+            raise TypeError("%s is not an valid CPU target" % str(target))
+        self.target = target
+        if debug_level is None:
+            self.debug_level = nervapy.x86_64.options.debug_level
+        else:
+            self.debug_level = int(debug_level)
+        from nervapy.name import Name
+        from nervapy.x86_64.pseudo import Label
+        self.entry = Label((Name("__entry__", None),))
+        self._indent_level = 1
+        self._instructions = list()
+        # This set is only used to ensure that all labels references in branches are defined
+        self._label_names = set()
+        # Map from id of Name objects to their copies.
+        # This ensures that Name objects without name can be compared for equality using id
+        self._names_memo = dict()
+        self._scope = nervapy.name.Namespace(None)
+        self._local_variables_count = 0
+        self._virtual_general_purpose_registers_count = 0
+        self._virtual_mmx_registers_count = 0
+        self._virtual_xmm_registers_count = 0
+        self._virtual_mask_registers_count = 0
+        from nervapy.x86_64 import m256, m256d, m256i
+        avx_types = [m256, m256d, m256i]
+        self.avx_environment = (
+            any([arg.c_type in avx_types for arg in self.arguments])
+            or self.result_type in avx_types
+        )
+        self._avx_prolog = None
+        from nervapy.common import RegisterAllocator
+        from nervapy.x86_64.registers import (GeneralPurposeRegister,
+                                              KRegister, MMXRegister,
+                                              XMMRegister)
+        self._register_allocators = {
+            GeneralPurposeRegister._kind: RegisterAllocator(),
+            MMXRegister._kind: RegisterAllocator(),
+            XMMRegister._kind: RegisterAllocator(),
+            KRegister._kind: RegisterAllocator(),
+        }
+    @property
+    def c_signature(self):
+        """C signature (including parameter names) for the function"""
+        signature = "void" if self.result_type is None else str(self.result_type)
+        signature = signature + " " + self.name
+        signature = signature + "(" + ", ".join(map(str, self.arguments)) + ")"
+        return signature
+    @property
+    def go_signature(self):
+        """Go signature (including parameter names) for the function.
+        None if the function argument or return type is incompatible with Go"""
+        def c_to_go_type(c_type):
+            assert isinstance(c_type, nervapy.Type)
+            if c_type.is_pointer:
+                if c_type.base is not None:
+                    return "*" + c_to_go_type(c_type.base)
+                else:
+                    return "uintptr"
+            elif c_type.is_bool:
+                return "boolean"
+            elif c_type.is_size_integer:
+                return "int" if c_type.is_signed_integer else "uint"
+            elif c_type.is_signed_integer:
+                return {1: "int8", 2: "int16", 4: "int32", 8: "int64"}[c_type.size]
+            elif c_type.is_unsigned_integer:
+                return {1: "uint8", 2: "uint16", 4: "uint32", 8: "uint64"}[c_type.size]
+            elif c_type.is_floating_point:
+                return {4: "float32", 8: "float64"}[c_type.size]
+            else:
+                return None
+        go_argument_types = list(
+            map(c_to_go_type, map(operator.attrgetter("c_type"), self.arguments))
+        )
+        # Some of the C types doesn't have a Go analog
+        if not (all(map(bool, go_argument_types))):
+            return None
+        go_arguments = map(
+            lambda name_gotype: " ".join(name_gotype),
+            zip(map(operator.attrgetter("name"), self.arguments), go_argument_types),
+        )
+        if self.result_type is None:
+            return "func %s(%s)" % (self.name, ", ".join(go_arguments))
+        else:
+            go_result_type = c_to_go_type(self.result_type)
+            if go_result_type is None:
+                return None
+            else:
+                return "func %s(%s) %s" % (
+                    self.name,
+                    ", ".join(go_arguments),
+                    go_result_type,
+                )
+    @property
+    def isa_extensions(self):
+        from nervapy.x86_64.isa import Extensions
+        extensions = set()
+        for instruction in self._instructions:
+            extensions.update(instruction.isa_extensions)
+        return Extensions(*extensions)
+    def __enter__(self):
+        self.attach()
+        return self
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.detach()
+        if exc_type is None:
+            self._add_default_labels()
+            self._check_undefined_labels()
+            self._remove_unused_labels()
+            self._analize()
+            if nervapy.x86_64.options.rtl_dump_file:
+                nervapy.x86_64.options.rtl_dump_file.write(self.format_instructions())
+            self._check_live_registers()
+            self._preallocate_registers()
+            self._bind_registers()
+            self._scope.assign_names()
+            if nervapy.x86_64.options.abi is not None:
+                abi_function = self.finalize(nervapy.x86_64.options.abi)
+                for writer in nervapy.writer.active_writers:
+                    writer.add_function(abi_function)
+        else:
+            raise
+    def attach(self):
+        """Makes active the function and its associated instruction stream.
+        While the instruction stream is active, generated instructions are added to this function.
+        While the function is active, generated instructions are checked for compatibility with the function target.
+        """
+        import nervapy.common.function
+        import nervapy.stream
+        if nervapy.common.function.active_function is not None:
+            raise ValueError(
+                "Can not attach the function: alternative function %s is active"
+                % nervapy.common.function.active_function.name
+            )
+        if nervapy.stream.active_stream is not None:
+            raise ValueError(
+                "Can not attach the function instruction stream: alternative instruction stream is active"
+            )
+        nervapy.common.function.active_function = self
+        nervapy.stream.active_stream = self
+        return self
+    def detach(self):
+        """Make the function and its associated instruction stream no longer active.
+        The function and its instruction stream must be active before calling the method.
+        """
+        import nervapy.common.function
+        import nervapy.stream
+        if nervapy.common.function.active_function is None:
+            raise ValueError("Can not detach the function: no function is active")
+        if nervapy.common.function.active_function is not self:
+            raise ValueError(
+                "Can not detach the function: a different function is active"
+            )
+        nervapy.common.function.active_function = None
+        nervapy.stream.active_stream = None
+        return self
+    @staticmethod
+    def _check_arguments(args):
+        # Check types
+        if not isinstance(args, (list, tuple)):
+            raise TypeError(
+                "Invalid arguments types (%s): a tuple or list of function arguments expected"
+                % str(args)
+            )
+        for i, arg in enumerate(args):
+            if not isinstance(arg, Argument):
+                raise TypeError(
+                    "Invalid argument object for argument #%d (%s): nervapy.Argument expected"
+                    % (i, str(arg))
+                )
+        # mapping from argument name to argument number
+        names = dict()
+        # First check argument names for arguments with explicit names
+        for i, arg in enumerate(args):
+            if arg.name:
+                if arg.name in names:
+                    raise ValueError(
+                        "Argument #%d (%s) has the same name as argument #%d (%s)"
+                        % (i, str(arg), names[arg.name], args[names[arg.name]])
+                    )
+                names[arg.name] = i
+    def _find_argument(self, argument_target):
+        from nervapy import Argument
+        assert isinstance(
+            argument_target, (Argument, str)
+        ), "Either Argument object or argument name expected"
+        if isinstance(argument_target, Argument):
+            if argument_target in self.arguments:
+                return argument_target
+            else:
+                return None
+        else:
+            return next(
+                (
+                    argument
+                    for argument in self.arguments
+                    if argument.name == argument_target
+                ),
+                None,
+            )
+    def add_instruction(self, instruction):
+        # If instruction is None, do nothing
+        if instruction is None:
+            return
+        from nervapy.x86_64.instructions import Instruction
+        if not isinstance(instruction, Instruction):
+            raise TypeError("Instruction object expected")
+        from nervapy.x86_64.pseudo import LABEL
+        # Check that label with the same name is not added twice
+        if isinstance(instruction, LABEL):
+            self._scope.add_scoped_name(instruction.identifier)
+            self._label_names.add(instruction.identifier)
+        constant = instruction.constant
+        if constant is not None:
+            self._scope.add_scoped_name(constant.name)
+        # Check that the instruction is supported by the target ISA
+        for extension in instruction.isa_extensions:
+            if self.target is not None and extension not in self.target.extensions:
+                raise ValueError(
+                    "{0} is not supported on the target microarchitecture".format(
+                        extension
+                    )
+                )
+        instruction._indent_level = self._indent_level
+        self._instructions.append(instruction)
+    def add_instructions(self, instructions):
+        for instruction in instructions:
+            self.add_instruction(instruction)
+    def finalize(self, abi):
+        from nervapy.x86_64.abi import ABI
+        if not isinstance(abi, ABI):
+            raise TypeError("%s is not an ABI object" % str(abi))
+        return ABIFunction(self, abi)
+    def _add_default_labels(self):
+        """Adds default labels if they are not defined"""
+        from nervapy.x86_64.pseudo import LABEL
+        if self.entry.name not in self._scope.names:
+            self._instructions.insert(0, LABEL(self.entry))
+            self._scope.add_scoped_name(self.entry.name)
+            self._label_names.add(self.entry.name)
+    def _check_undefined_labels(self):
+        """Verifies that all labels referenced by branch instructions are defined"""
+        from nervapy.x86_64.instructions import BranchInstruction
+        referenced_label_names = set()
+        for instruction in self._instructions:
+            if isinstance(instruction, BranchInstruction) and instruction.label_name:
+                referenced_label_names.add(instruction.label_name)
+        if not referenced_label_names.issubset(self._label_names):
+            undefined_label_names = referenced_label_names.difference(self._label_names)
+            raise ValueError(
+                "Undefined labels found: "
+                + ", ".join(
+                    map(lambda name: ".".join(map(str, name)), undefined_label_names)
+                )
+            )
+    def _remove_unused_labels(self):
+        """Removes labels that are not referenced by any instruction"""
+        from nervapy.x86_64.instructions import BranchInstruction
+        from nervapy.x86_64.pseudo import LABEL
+        referenced_label_names = set()
+        for instruction in self._instructions:
+            if isinstance(instruction, BranchInstruction) and instruction.label_name:
+                referenced_label_names.add(instruction.label_name)
+        unreferenced_label_names = self._label_names.difference(referenced_label_names)
+        # Do not remove entry label if it is in the middle of the function
+        if self.entry.name in unreferenced_label_names:
+            if (
+                not isinstance(self._instructions[0], LABEL)
+                or self._instructions[0].identifier != self.entry.name
+            ):
+                unreferenced_label_names.remove(self.entry.name)
+        # Remove LABEL pseudo-instructions with unreferenced label names
+        self._instructions = [
+            instruction
+            for instruction in self._instructions
+            if not isinstance(instruction, LABEL)
+            or instruction.identifier not in unreferenced_label_names
+        ]
+        self._label_names.difference_update(unreferenced_label_names)
+    def _analize(self):
+        from nervapy.x86_64.generic import RET
+        from nervapy.x86_64.instructions import BranchInstruction
+        from nervapy.x86_64.pseudo import LABEL, RETURN
+        # Query input/output registers for each instruction
+        input_registers = []
+        output_registers = []
+        for instruction in self._instructions:
+            input_registers.append(instruction.input_registers_masks)
+            output_registers.append(instruction.output_registers_masks)
+        # Map from label name to its quasi-instruction number in the stream
+        labels = {
+            instruction.identifier: i
+            for (i, instruction) in enumerate(self._instructions)
+            if isinstance(instruction, LABEL)
+        }
+        entry_position = 0
+        if self.entry.name in self._label_names:
+            entry_position = labels[self.entry.name]
+        branch_instructions = [
+            (i, instruction)
+            for (i, instruction) in enumerate(self._instructions)
+            if isinstance(instruction, BranchInstruction) and instruction.label_name
+        ]
+        # Basic blocks start at function entry position or on branch target
+        basic_block_starts = {entry_position}
+        for i, branch_instruction in branch_instructions:
+            basic_block_starts.add(labels[branch_instruction.label_name])
+            if branch_instruction.is_conditional:
+                basic_block_starts.add(i + 1)
+        basic_block_starts = sorted(basic_block_starts)
+        # Basic block ends on a referenced label instruction or right after return/branch instructions
+        basic_block_ends = [
+            i + int(not isinstance(instruction, LABEL))
+            for (i, instruction) in enumerate(self._instructions)
+            if isinstance(instruction, (BranchInstruction, RETURN, RET, LABEL))
+        ]
+        # TODO: check that the last block with an unconditional branch/return instruction
+        basic_block_bounds = [
+            (start, basic_block_ends[bisect.bisect_right(basic_block_ends, start)])
+            for start in basic_block_starts
+        ]
+        class BasicBlock:
+            def __init__(
+                self,
+                start_position,
+                end_position,
+                input_registers_list,
+                output_registers_list,
+            ):
+                self.start_position = start_position
+                self.end_position = end_position
+                self.input_registers_list = input_registers_list
+                self.output_registers_list = output_registers_list
+                self.consumed_register_masks = collections.defaultdict(int)
+                self.produced_register_masks = collections.defaultdict(int)
+                self.live_register_masks = collections.defaultdict(int)
+                self.available_register_masks = collections.defaultdict(int)
+                self.is_reachable = False
+                self.liveness_analysis_passes = 0
+                self.availability_analysis_passes = 0
+                self.input_blocks = list()
+                self.output_blocks = list()
+                self.processed_input_blocks = set()
+                self.processed_output_blocks = set()
+                # Mark available and consumed registers:
+                # - If a register is consumed by an instruction but not produced by preceding instructions of the basic
+                #   block, the register is consumed by the basic block
+                # - If a register is produced by an instruction, it becomes available for the subsequent instructions
+                #   of the basic block and counts as produced by the basic block
+                for input_registers, output_registers in zip(
+                    input_registers_list, output_registers_list
+                ):
+                    for input_register_id, input_register_mask in six.iteritems(
+                        input_registers
+                    ):
+                        consumed_mask = (
+                            input_register_mask
+                            & ~self.produced_register_masks[input_register_id]
+                        )
+                        if consumed_mask != 0:
+                            self.consumed_register_masks[
+                                input_register_id
+                            ] |= consumed_mask
+                    for output_register_id, output_register_mask in six.iteritems(
+                        output_registers
+                    ):
+                        self.produced_register_masks[
+                            output_register_id
+                        ] |= output_register_mask
+            def reset_processed_blocks(self):
+                self.processed_input_blocks = set()
+                self.processed_output_blocks = set()
+            @property
+            def available_registers_list(self):
+                from nervapy.x86_64.registers import Register
+                available_registers_list = []
+                available_registers_masks = self.available_register_masks.copy()
+                for output_registers in self.output_registers_list:
+                    # Record available registers for current instruction
+                    available_registers_list.append(available_registers_masks.copy())
+                    # Update with output registers for current instruction
+                    for output_register_id, output_register_mask in six.iteritems(
+                        output_registers
+                    ):
+                        available_registers_masks[output_register_id] = (
+                            available_registers_masks.get(output_register_id, 0)
+                            | output_register_mask
+                        )
+                return available_registers_list
+            @property
+            def live_registers_list(self):
+                from nervapy.x86_64.registers import Register
+                live_registers_list = []
+                live_registers_masks = self.live_register_masks.copy()
+                for input_registers, output_registers in reversed(
+                    list(zip(self.input_registers_list, self.output_registers_list))
+                ):
+                    # Mark register written by the instruction as non-live
+                    for output_register_id, output_register_mask in six.iteritems(
+                        output_registers
+                    ):
+                        if output_register_id in live_registers_masks:
+                            new_live_register_mask = (
+                                live_registers_masks[output_register_id]
+                                & ~output_register_mask
+                            )
+                            if new_live_register_mask != 0:
+                                live_registers_masks[output_register_id] = (
+                                    new_live_register_mask
+                                )
+                            else:
+                                del live_registers_masks[output_register_id]
+                    # Mark registers read by the instruction as live
+                    for input_register_id, input_register_mask in six.iteritems(
+                        input_registers
+                    ):
+                        live_registers_masks[input_register_id] = (
+                            live_registers_masks.get(input_register_id, 0)
+                            | input_register_mask
+                        )
+                    # Record available registers for current instruction
+                    live_registers_list.append(live_registers_masks.copy())
+                live_registers_list.reverse()
+                return live_registers_list
+            def __str__(self):
+                return "[%d, %d)" % (self.start_position, self.end_position)
+            def __repr__(self):
+                return str(self)
+            def analyze_availability(self, extra_available_registers):
+                self.availability_analysis_passes += 1
+                if self.availability_analysis_passes == 1:
+                    # First pass: add registers produced by the block and propagate further
+                    self.available_register_masks.update(extra_available_registers)
+                    if self.output_blocks:
+                        # Add registers produced by this block
+                        for produced_reg_id, produced_reg_mask in six.iteritems(
+                            self.produced_register_masks
+                        ):
+                            extra_available_registers[produced_reg_id] = (
+                                extra_available_registers.get(produced_reg_id, 0)
+                                | produced_reg_mask
+                            )
+                else:
+                    # Subsequent passes: compute and propagate only the input registers that were not processed before
+                    for reg_id, extra_reg_mask in list(
+                        six.iteritems(extra_available_registers)
+                    ):
+                        old_reg_mask = self.available_register_masks[reg_id]
+                        update_reg_mask = extra_reg_mask & ~old_reg_mask
+                        if update_reg_mask != 0:
+                            self.available_register_masks[reg_id] |= extra_reg_mask
+                        if self.output_blocks:
+                            if update_reg_mask != 0:
+                                extra_available_registers[reg_id] = update_reg_mask
+                            else:
+                                del extra_available_registers[reg_id]
+                if self.output_blocks and (
+                    extra_available_registers or self.availability_analysis_passes == 1
+                ):
+                    for output_block in self.output_blocks[1:]:
+                        # The dict needs to be copied because output blocks can change it
+                        output_block.analyze_availability(
+                            extra_available_registers.copy()
+                        )
+                    # Optimization: do not create a copy of the dict
+                    self.output_blocks[0].analyze_availability(
+                        extra_available_registers
+                    )
+            def analyze_liveness(self, extra_live_registers):
+                # Update in liveness analysis consists of three steps:
+                # 1. Update live registers for this basic block.
+                # 2. Mark registers which are produced to by the basic block as non-live.
+                # 3. Mark registers which are consumed by the basic block as live (only on first pass).
+                self.liveness_analysis_passes += 1
+                # Steps 1 and 2
+                for reg_id, extra_reg_mask in list(six.iteritems(extra_live_registers)):
+                    old_reg_mask = self.live_register_masks[reg_id]
+                    update_reg_mask = extra_reg_mask & ~old_reg_mask
+                    if update_reg_mask != 0:
+                        self.live_register_masks[reg_id] |= extra_reg_mask
+                    if self.input_blocks:
+                        # On the first pass do not modify the extra live registers masks that are passed to input blocks
+                        # On subsequent passes only the novel live registers need to be passed further
+                        if self.liveness_analysis_passes == 1:
+                            update_reg_mask = extra_reg_mask
+                        update_reg_mask &= ~self.produced_register_masks.get(reg_id, 0)
+                        if update_reg_mask != 0:
+                            extra_live_registers[reg_id] = update_reg_mask
+                        else:
+                            del extra_live_registers[reg_id]
+                # Step 3
+                if self.input_blocks:
+                    if self.liveness_analysis_passes == 1:
+                        for consumed_reg_id, consumed_reg_mask in six.iteritems(
+                            self.consumed_register_masks
+                        ):
+                            extra_live_registers[consumed_reg_id] = (
+                                extra_live_registers.get(consumed_reg_id, 0)
+                                | consumed_reg_mask
+                            )
+                if self.input_blocks and (
+                    extra_live_registers or self.liveness_analysis_passes == 1
+                ):
+                    for input_block in self.input_blocks[1:]:
+                        # The dict needs to be copied because input blocks can change it
+                        input_block.analyze_liveness(extra_live_registers.copy())
+                    # Optimization: do not create a copy of the dict
+                    self.input_blocks[0].analyze_liveness(extra_live_registers)
+            def analyze_reachability(self):
+                if not self.is_reachable:
+                    self.is_reachable = True
+                    for output_block in self.output_blocks:
+                        output_block.analyze_reachability()
+            def forward_pass(self, processing_function, instructions, input_state):
+                output_state = processing_function(self, instructions, input_state)
+                for output_block in self.output_blocks:
+                    if output_block.start_position not in self.processed_output_blocks:
+                        self.processed_output_blocks.add(output_block.start_position)
+                        output_block.forward_pass(
+                            processing_function, instructions, output_state
+                        )
+            def backward_pass(self, processing_function, instructions, input_state):
+                output_state = processing_function(self, instructions, input_state)
+                for input_block in self.input_blocks:
+                    if input_block.start_position not in self.processed_input_blocks:
+                        self.processed_input_blocks.add(input_block.start_position)
+                        input_block.backward_pass(
+                            processing_function, instructions, output_state
+                        )
+            def propogate_sse_avx_state_forward(self, instructions, is_avx_environment):
+                from nervapy.x86_64.avx import VZEROALL, VZEROUPPER
+                from nervapy.x86_64.pseudo import LOAD, STORE
+                avx_state = True if is_avx_environment else None
+                def propogate_forward(block, instructions, avx_state):
+                    for instruction in instructions[
+                        block.start_position : block.end_position
+                    ]:
+                        if isinstance(instruction, (VZEROUPPER, VZEROALL)):
+                            avx_state = None
+                        elif instruction.avx_mode is None:
+                            # Instruction without a mode
+                            if isinstance(
+                                instruction,
+                                (LOAD.ARGUMENT, STORE.RESULT, RETURN, RET, LABEL),
+                            ):
+                                # Some pseudo-instructions need AVX/SSE mode for lowering
+                                instruction.avx_mode = avx_state
+                        elif instruction.avx_mode:
+                            # AVX-mode instruction
+                            avx_state = True
+                        else:
+                            # SSE-mode instruction
+                            if avx_state:
+                                raise TypeError(
+                                    "AVX-mode instruction {0} follows an SSE-mode instruction".format(
+                                        instruction
+                                    )
+                                )
+                            avx_state = False
+                    return avx_state
+                self.forward_pass(propogate_forward, instructions, avx_state)
+            def propogate_sse_state_backward(self, instructions, is_avx_environment):
+                from nervapy.x86_64.pseudo import LOAD, STORE
+                avx_state = True if is_avx_environment else None
+                def propogate_sse_backward(block, instructions, avx_state):
+                    for instruction in reversed(
+                        instructions[block.start_position : block.end_position]
+                    ):
+                        if instruction.avx_mode is not None:
+                            avx_state = instruction.avx_mode
+                        elif avx_state is not None and not avx_state:
+                            if isinstance(
+                                instruction,
+                                (LOAD.ARGUMENT, STORE.RESULT, RETURN, RET, LABEL),
+                            ):
+                                instruction.avx_mode = avx_state
+                    return avx_state
+                self.backward_pass(propogate_sse_backward, instructions, avx_state)
+            def propogate_avx_state_backward(self, instructions, is_avx_environment):
+                from nervapy.x86_64.pseudo import LOAD, STORE
+                avx_state = True if is_avx_environment else None
+                def propogate_avx_backward(block, instructions, avx_state):
+                    for instruction in reversed(
+                        instructions[block.start_position : block.end_position]
+                    ):
+                        if instruction.avx_mode is not None:
+                            avx_state = instruction.avx_mode
+                        elif avx_state:
+                            if isinstance(
+                                instruction,
+                                (LOAD.ARGUMENT, STORE.RESULT, RETURN, RET, LABEL),
+                            ):
+                                instruction.avx_mode = avx_state
+                self.backward_pass(propogate_avx_backward, instructions, avx_state)
+        basic_blocks = list(
+            map(
+                lambda start_end: BasicBlock(
+                    start_end[0],
+                    start_end[1],
+                    input_registers[start_end[0] : start_end[1]],
+                    output_registers[start_end[0] : start_end[1]],
+                ),
+                basic_block_bounds,
+            )
+        )
+        # Map from block start position to BasicBlock object
+        basic_blocks_map = {
+            basic_block_start: basic_block
+            for (basic_block_start, basic_block) in zip(
+                basic_block_starts, basic_blocks
+            )
+        }
+        # Set output basic blocks for each basic block object
+        for i, basic_block in enumerate(basic_blocks):
+            # Consider last instruction of the basic block
+            last_instruction = self._instructions[basic_block.end_position - 1]
+            if isinstance(last_instruction, (RET, RETURN)):
+                # Basic block that ends with a return instruction has no output blocks
+                pass
+            elif (
+                isinstance(last_instruction, BranchInstruction)
+                and last_instruction.label_name
+            ):
+                # Basic block that ends with a branch instruction can jump to the block at branch label
+                target_position = labels[last_instruction.label_name]
+                basic_block.output_blocks = [basic_blocks_map[target_position]]
+                if last_instruction.is_conditional:
+                    # Basic blocks that end with a conditional branch instruction can fall through to the next block
+                    basic_block.output_blocks.append(basic_blocks[i + 1])
+            else:
+                # Basic block ends before a label and continues to the next basic block
+                basic_block.output_blocks = [basic_blocks[i + 1]]
+        # Set input basic blocks for each basic block object
+        for basic_block in basic_blocks:
+            basic_block.input_blocks = list(
+                filter(lambda bb: basic_block in bb.output_blocks, basic_blocks)
+            )
+        # Analyze which blocks can be reached from the entry point
+        basic_blocks_map[entry_position].analyze_reachability()
+        exit_positions = [
+            block.start_position for block in basic_blocks if not block.output_blocks
+        ]
+        # Analyze register lifetime
+        basic_blocks_map[entry_position].analyze_availability(dict())
+        for exit_position in exit_positions:
+            basic_blocks_map[exit_position].analyze_liveness(dict())
+        # Analyze SSE/AVX mode
+        basic_blocks_map[entry_position].propogate_sse_avx_state_forward(
+            self._instructions, self.avx_environment
+        )
+        for exit_position in exit_positions:
+            basic_blocks_map[exit_position].propogate_sse_state_backward(
+                self._instructions, self.avx_environment
+            )
+        for basic_block in basic_blocks:
+            basic_block.reset_processed_blocks()
+        for exit_position in exit_positions:
+            basic_blocks_map[exit_position].propogate_avx_state_backward(
+                self._instructions, self.avx_environment
+            )
+        self._avx_prolog = self._instructions[entry_position].avx_mode
+        # Reconstruct live and available registers for the whole instruction sequence
+        for basic_block in basic_blocks:
+            for instruction, available_registers, live_registers in zip(
+                self._instructions[
+                    basic_block.start_position : basic_block.end_position
+                ],
+                basic_block.available_registers_list,
+                basic_block.live_registers_list,
+            ):
+                instruction._live_registers = live_registers
+                instruction._available_registers = available_registers
+            # Remove referenced to input/output blocks to avoid memory leaks due to cycles in ref graph
+            basic_block.input_blocks = None
+            basic_block.output_blocks = None
+        # Analyze conflicting registers
+        output_registers = set()
+        for instruction in self._instructions:
+            instruction_registers = instruction.input_registers
+            instruction_registers.update(output_registers)
+            for instruction_register in instruction_registers:
+                if instruction_register.is_virtual:
+                    conflict_internal_ids = [
+                        reg_id
+                        for (reg_id, reg_mask) in six.iteritems(
+                            instruction._live_registers
+                        )
+                        if reg_mask & instruction_register.mask != 0
+                    ]
+                    self._register_allocators[instruction_register.kind].add_conflicts(
+                        instruction_register.virtual_id, conflict_internal_ids
+                    )
+            physical_registers = [r for r in instruction_registers if not r.is_virtual]
+            if physical_registers:
+                from nervapy.x86_64.registers import Register
+                live_virtual_registers = Register._reconstruct_multiple(
+                    {
+                        reg_id: reg_mask
+                        for (reg_id, reg_mask) in six.iteritems(
+                            instruction._live_registers
+                        )
+                        if reg_id < 0
+                    }
+                )
+                for live_virtual_register in live_virtual_registers:
+                    conflict_internal_ids = [
+                        reg._internal_id
+                        for reg in physical_registers
+                        if reg.mask & live_virtual_register.mask != 0
+                    ]
+                    self._register_allocators[live_virtual_register.kind].add_conflicts(
+                        live_virtual_register.virtual_id, conflict_internal_ids
+                    )
+            output_registers = instruction.output_registers
+    def _check_live_registers(self):
+        """Checks that the number of live registers does not exceed the number of physical registers for each insruction"""
+        from nervapy.x86_64.registers import (GeneralPurposeRegister,
+                                              KRegister, MMXRegister,
+                                              XMMRegister)
+        max_live_registers = {
+            GeneralPurposeRegister._kind: 15,
+            MMXRegister._kind: 8,
+            XMMRegister._kind: 16,
+            KRegister._kind: 8,
+        }
+        for instruction in self._instructions:
+            live_registers = max_live_registers.copy()
+            for reg in instruction.live_registers:
+                live_registers[reg.kind] -= 1
+            if any(
+                surplus_count < 0 for surplus_count in six.itervalues(live_registers)
+            ):
+                if (
+                    instruction.source_file is not None
+                    and instruction.line_number is not None
+                ):
+                    raise nervapy.RegisterAllocationError(
+                        "The number of live virtual registers exceeds physical constraints %s at %s:%d"
+                        % (
+                            str(instruction),
+                            instruction.source_file,
+                            instruction.line_number,
+                        )
+                    )
+                else:
+                    raise nervapy.RegisterAllocationError(
+                        "The number of live virtual registers exceeds physical constraints %s"
+                        % str(instruction)
+                    )
+    def _preallocate_registers(self):
+        """Allocates registers that can be binded only to a single virtual register.
+        Several instructions accept only a fixed a register as their operand. If a virtual register is supplied as such
+        operand, it must be binded to the fixed register accepted by instruction encoding.
+        These instructions are:
+            - BLENDVPS xmm, xmm/m128, xmm0
+            - BLENDVPD xmm, xmm/m128, xmm0
+            - PBLENDVB xmm, xmm/m128, xmm0
+            - SHA256RNDS2 xmm, xmm/m128, xmm0
+            - SHR r/m, cl
+            - SAR r/m, cl
+            - SAL r/m, cl
+            - SHL r/m, cl
+            - ROR r/m, cl
+            - ROL r/m, cl
+            - SHRD r/m, r, cl
+            - SHLD r/m, r, cl
+        """
+        from nervapy import RegisterAllocationError
+        from nervapy.x86_64.registers import (GeneralPurposeRegister,
+                                              GeneralPurposeRegister8,
+                                              XMMRegister, cl, xmm0)
+        cl_binded_registers = set()
+        xmm0_binded_registers = set()
+        for instruction in self._instructions:
+            if instruction.name in {"BLENDVPD", "BLENDVPS", "PBLENDVB", "SHA256RNDS2"}:
+                assert (
+                    len(instruction.operands) == 3
+                ), "expected 3 operands, got %d (%s)" % (
+                    len(instruction.operands),
+                    ", ".join(map(str, instruction.operands)),
+                )
+                xmm0_operand = instruction.operands[2]
+                assert isinstance(
+                    xmm0_operand, XMMRegister
+                ), "expected xmm registers in the 3rd operand, got %s" % str(
+                    xmm0_operand
+                )
+                if xmm0_operand.is_virtual:
+                    # Check that xmm0 is not live at this instruction
+                    if (
+                        instruction._live_registers.get(xmm0._internal_id, 0)
+                        & XMMRegister._mask
+                        != 0
+                    ):
+                        raise RegisterAllocationError(
+                            (
+                                "Instruction %s requires operand 3 to be allocated to xmm0 register, "
+                                + "but xmm0 is a live register"
+                            )
+                            % str(instruction.name)
+                        )
+                    xmm0_binded_registers.add(xmm0_operand._internal_id)
+            elif instruction.name in {
+                "SAL",
+                "SAR",
+                "SHL",
+                "SHR",
+                "ROL",
+                "ROR",
+                "RCL",
+                "RCR",
+            }:
+                assert (
+                    len(instruction.operands) == 2
+                ), "expected 2 operands, got %d (%s)" % (
+                    len(instruction.operands),
+                    ", ".join(map(str, instruction.operands)),
+                )
+                count_operand = instruction.operands[1]
+                # The count operand can be cl or imm8
+                if (
+                    isinstance(count_operand, GeneralPurposeRegister8)
+                    and count_operand.is_virtual
+                ):
+                    # Check that cl is not live at this instruction
+                    if (
+                        instruction._live_registers.get(cl._internal_id, 0)
+                        & GeneralPurposeRegister8._mask
+                        != 0
+                    ):
+                        raise RegisterAllocationError(
+                            "Instruction %s requires operand 2 to be allocated to cl register, "
+                            + "but cl is a live register" % instruction.name
+                        )
+                    cl_binded_registers.add(count_operand._internal_id)
+            elif instruction.name in {"SHLD", "SHRD"}:
+                assert (
+                    len(instruction.operands) == 3
+                ), "expected 3 operands, got %d (%s)" % (
+                    len(instruction.operands),
+                    ", ".join(map(str, instruction.operands)),
+                )
+                count_operand = instruction.operands[2]
+                # The count operand can be cl or imm8
+                if (
+                    isinstance(count_operand, GeneralPurposeRegister8)
+                    and count_operand.is_virtual
+                ):
+                    # Check that cl is not live at this instruction
+                    if (
+                        instruction._live_registers.get(cl._internal_id, 0)
+                        & GeneralPurposeRegister8._mask
+                        != 0
+                    ):
+                        raise RegisterAllocationError(
+                            "Instruction %s requires operand 3 to be allocated to cl register, "
+                            + "but cl is a live register" % instruction.name
+                        )
+                    cl_binded_registers.add(count_operand._internal_id)
+        # Check that cl-binded registers are not mutually conflicting
+        for cl_register in cl_binded_registers:
+            other_cl_registers = filter(
+                operator.methodcaller("__ne__", cl_register), cl_binded_registers
+            )
+            conflicting_registers = self._conflicting_registers[1][cl_register]
+            if any(
+                [
+                    other_register in conflicting_registers
+                    for other_register in other_cl_registers
+                ]
+            ):
+                raise RegisterAllocationError(
+                    "Two conflicting virtual registers are required to bind to cl"
+                )
+        # Check that xmm0-binded registers are not mutually conflicting
+        for xmm0_register in xmm0_binded_registers:
+            other_xmm0_registers = filter(
+                operator.methodcaller("__ne__", xmm0_register), xmm0_binded_registers
+            )
+            conflicting_registers = self._conflicting_registers[3][xmm0_register]
+            if any(
+                [
+                    other_register in conflicting_registers
+                    for other_register in other_xmm0_registers
+                ]
+            ):
+                raise RegisterAllocationError(
+                    "Two conflicting virtual registers are required to bind to xmm0"
+                )
+        # Commit register allocations
+        for cl_register in cl_binded_registers:
+            self._register_allocations[GeneralPurposeRegister._kind][
+                cl_register
+            ] = cl.physical_id
+        for xmm0_register in xmm0_binded_registers:
+            self._register_allocations[XMMRegister._kind][
+                xmm0_register
+            ] = xmm0.physical_id
+    def _bind_registers(self):
+        """Iterates through the list of instructions and assigns physical IDs to allocated registers"""
+        for instruction in self._instructions:
+            for register in instruction.register_objects:
+                if register.is_virtual:
+                    register.physical_id = self._register_allocators[
+                        register.kind
+                    ].register_allocations.get(
+                        register._internal_id, register.physical_id
+                    )
+    def _allocate_local_variable(self):
+        """Returns a new unique ID for a local variable"""
+        self._local_variables_count += 1
+        return self._local_variables_count
+    def _allocate_mask_register_id(self):
+        """Returns a new unique ID for a virtual mask (k) register"""
+        self._virtual_mask_registers_count += 1
+        return self._virtual_mask_registers_count
+    def _allocate_xmm_register_id(self):
+        """Returns a new unique ID for a virtual SSE/AVX/AVX-512 (xmm/ymm/zmm) register"""
+        self._virtual_xmm_registers_count += 1
+        return self._virtual_xmm_registers_count
+    def _allocate_mmx_register_id(self):
+        """Returns a new unique ID for a virtual MMX (mm) register"""
+        self._virtual_mmx_registers_count += 1
+        return self._virtual_mmx_registers_count
+    def _allocate_general_purpose_register_id(self):
+        """Returns a new unique ID for a virtual general-purpose register"""
+        self._virtual_general_purpose_registers_count += 1
+        return self._virtual_general_purpose_registers_count
+    def __str__(self):
+        """Returns string representation of the function signature and instructions"""
+        return self.format()
+    def format_instructions(self, line_separator=os.linesep):
+        """Formats instruction listing including data on input, output, available and live registers"""
+        from nervapy.x86_64.pseudo import ALIGN, LABEL
+        code = []
+        tab = " " * 4
+        for instruction in self._instructions:
+            code.append(instruction.format("peachpy", indent=False))
+            if not isinstance(instruction, (LABEL, ALIGN)):
+                code.append(
+                    tab
+                    + "In regs:    "
+                    + ", ".join(sorted(map(str, instruction.input_registers)))
+                )
+                code.append(
+                    tab
+                    + "Out regs:   "
+                    + ", ".join(sorted(map(str, instruction.output_registers)))
+                )
+                code.append(
+                    tab
+                    + "Live regs:  "
+                    + ", ".join(sorted(map(str, instruction.live_registers)))
+                )
+                code.append(
+                    tab
+                    + "Avail regs: "
+                    + ", ".join(sorted(map(str, instruction.available_registers)))
+                )
+            code.append("")
+        if line_separator is None:
+            return code
+        else:
+            return str(line_separator).join(code)
+    def format(self, line_separator=os.linesep):
+        """Formats assembly listing of the function according to specified parameters"""
+        code = [self.c_signature]
+        for instruction in self._instructions:
+            code.append(instruction.format("peachpy", indent=True))
+        if line_separator is None:
+            return code
+        else:
+            return str(line_separator).join(code)
+class Argument(nervapy.Argument):
+    def __init__(self, argument, abi):
+        """Extends generic Argument object with x86-64 specific attributes required for stack frame construction
+        :ivar nervapy.x86_64.registers.Register register: the register in which the argument is passed to the function
+            or None if the argument is passed on stack.
+        :ivar int stack_offset: offset from the end of return address on the stack to the location of the argument on
+            stack or None if the argument is passed in a register and has no stack location. Note that in Microsoft X64
+            ABI the first four arguments are passed in registers but have stack space reserved for their storage.
+            For these arguments both register and stack_offset are non-null.
+        :ivar nervapy.x86_64.operand.MemoryAddress address: address of the argument on stack, relative to rsp or rbp.
+            The value of this attribute is None until after register allocation. In Golang ABIs this attribute is never
+            initialized because to load arguments from stack Golang uses its own pseudo-register FP, which is not
+            representable in PeachPy (LOAD.ARGUMENT pseudo-instructions use stack_offset instead when formatted as
+            Golang assembly).
+        """
+        assert isinstance(
+            argument, nervapy.Argument
+        ), "Architecture-specific argument must be constructed from generic Argument object"
+        from nervapy.x86_64.abi import ABI
+        assert isinstance(abi, ABI), "ABI object expected"
+        from copy import deepcopy
+        super(Argument, self).__init__(deepcopy(argument.c_type), argument.name)
+        if self.c_type.size is None:
+            self.c_type.size = self.c_type.get_size(abi)
+        self.abi = abi
+        self.register = None
+        self.address = None
+        self.stack_offset = None
+        self.save_on_stack = False
+    @property
+    def passed_on_stack(self):
+        return self.register is None
+class ABIFunction:
+    """ABI-specific x86-64 assembly function.
+    A function consists of C signature, ABI, and a list of instructions without virtual registers.
+    """
+    def __init__(self, function, abi):
+        from copy import deepcopy
+        from nervapy.x86_64.abi import (ABI, goasm_amd64_abi,
+                                        goasm_amd64p32_abi, gosyso_amd64_abi,
+                                        gosyso_amd64p32_abi, linux_x32_abi,
+                                        microsoft_x64_abi,
+                                        native_client_x86_64_abi,
+                                        system_v_x86_64_abi)
+        assert isinstance(function, Function), "Function object expected"
+        assert isinstance(abi, ABI), "ABI object expected"
+        self.name = function.name
+        self.arguments = [Argument(argument, abi) for argument in function.arguments]
+        self.result_type = function.result_type
+        self.result_offset = None
+        self.package = function.package
+        self.target = function.target
+        self.isa_extensions = function.isa_extensions
+        self.abi = abi
+        self.c_signature = function.c_signature
+        self.go_signature = function.go_signature
+        self.avx_environment = function.avx_environment
+        self._avx_prolog = function._avx_prolog
+        from nervapy.x86_64.registers import rsp
+        self._stack_base = rsp
+        self._stack_frame_size = 0
+        self._stack_frame_alignment = self.abi.stack_alignment
+        self._local_variables_size = 0
+        self._instructions = deepcopy(function._instructions)
+        self._register_allocators = deepcopy(function._register_allocators)
+        if abi == microsoft_x64_abi:
+            self._setup_windows_arguments()
+        elif abi in {system_v_x86_64_abi, linux_x32_abi, native_client_x86_64_abi}:
+            self._setup_unix_arguments()
+        elif abi in {
+            gosyso_amd64_abi,
+            gosyso_amd64p32_abi,
+            goasm_amd64_abi,
+            goasm_amd64p32_abi,
+        }:
+            self._setup_golang_arguments()
+        else:
+            raise ValueError("Unsupported ABI: %s" % str(abi))
+        self._update_argument_loads(function.arguments)
+        self._layout_local_variables()
+        self._allocate_registers()
+        self._bind_registers()
+        self._clobbered_registers = self._analyze_clobbered_registers()
+        self._update_stack_frame()
+        self._update_argument_addresses()
+        self._lower_argument_loads()
+        self._lower_pseudoinstructions()
+        self._filter_instruction_encodings()
+        self.mangled_name = self.mangle_name()
+    def _update_argument_loads(self, arguments):
+        from nervapy.x86_64.pseudo import LOAD
+        for instruction in self._instructions:
+            if isinstance(instruction, LOAD.ARGUMENT):
+                instruction.operands = (
+                    instruction.operands[0],
+                    self.arguments[arguments.index(instruction.operands[1])],
+                )
+                if instruction.operands[1].register in instruction.available_registers:
+                    instruction.operands[1].save_on_stack = True
+    def _setup_windows_arguments(self):
+        from nervapy.x86_64.abi import microsoft_x64_abi
+        assert (
+            self.abi == microsoft_x64_abi
+        ), "This function must only be used with Microsoft x64 ABI"
+        # The first 4 arguments are passed in registers, others are on stack.
+        # 8 bytes on stack is reserved for each parameter (regardless of their size).
+        # On-stack space is also reserved, but not initialized, for parameters passed in registers.
+        # Arguments are NOT extended to 8 bytes, and high bytes of registers/stack cells may contain garbage.
+        from nervapy.x86_64 import m64
+        from nervapy.x86_64.registers import (r8, r9, rcx, rdx, xmm0, xmm1,
+                                              xmm2, xmm3)
+        floating_point_argument_registers = (xmm0, xmm1, xmm2, xmm3)
+        integer_argument_registers = (rcx, rdx, r8, r9)
+        for index, argument in enumerate(self.arguments):
+            argument.passed_by_reference = argument.is_vector and argument != m64
+            if index < 4:
+                if argument.is_floating_point:
+                    argument.register = floating_point_argument_registers[index]
+                elif (
+                    argument.is_integer
+                    or argument.is_pointer
+                    or argument.is_codeunit
+                    or argument.is_mask
+                    or argument.c_type == m64
+                ):
+                    argument_register = integer_argument_registers[index]
+                    argument.register = {
+                        1: argument_register.as_low_byte,
+                        2: argument_register.as_word,
+                        4: argument_register.as_dword,
+                        8: argument_register,
+                    }[argument.size]
+                elif argument.is_vector:
+                    argument.register = integer_argument_registers[index]
+                else:
+                    assert False
+            # Stack offset does not include return address
+            argument.stack_offset = index * 8
+    def _setup_unix_arguments(self):
+        from nervapy.x86_64.abi import (linux_x32_abi,
+                                        native_client_x86_64_abi,
+                                        system_v_x86_64_abi)
+        assert self.abi in {
+            system_v_x86_64_abi,
+            linux_x32_abi,
+            native_client_x86_64_abi,
+        }, "This function must only be used with System V x86-64, Linux x32 or Native Client x86-64 SFI ABI"
+        from nervapy.x86_64.registers import (r8, r9, rcx, rdi, rdx, rsi, xmm0,
+                                              xmm1, xmm2, xmm3, xmm4, xmm5,
+                                              xmm6, xmm7)
+        # The first 6 integer/pointer arguments are passed in general-purpose registers.
+        # The first 8 floating-point arguments are passed in SSE registers.
+        # For all integer arguments in excess of 6 and floating-point arguments in excess the caller reserves
+        # 8 bytes on stack.
+        # Arguments smaller than 4 bytes are extended (with sign-extension, if needed) to 4 bytes.
+        # For 4-byte and smaller arguments passed on stack the high 4 bytes are not initialized.
+        # For 4-byte and smaller arguments passed in registers the high 4 bytes are zero-initialized.
+        # X32 and Native Client ABIs were not much tested, but they seem similar
+        available_floating_point_registers = [
+            xmm0,
+            xmm1,
+            xmm2,
+            xmm3,
+            xmm4,
+            xmm5,
+            xmm6,
+            xmm7,
+        ]
+        available_integer_registers = [rdi, rsi, rdx, rcx, r8, r9]
+        # Stack offset does not include return address
+        stack_offset = 0
+        for argument in self.arguments:
+            if (argument.is_floating_point or argument.is_vector) and len(
+                available_floating_point_registers
+            ) > 0:
+                argument.register = available_floating_point_registers.pop(0)
+                if argument.size in {4, 8, 16}:
+                    pass
+                elif argument.size == 32:
+                    argument.register = argument.register.as_ymm
+                elif argument.size == 64:
+                    argument.register = argument.register.as_zmm
+                else:
+                    assert False
+            elif (
+                argument.is_integer or argument.is_pointer or argument.is_codeunit
+            ) and len(available_integer_registers) > 0:
+                argument_register = available_integer_registers.pop(0)
+                argument.register = {
+                    1: argument_register.as_dword,
+                    2: argument_register.as_dword,
+                    4: argument_register.as_dword,
+                    8: argument_register,
+                }[argument.size]
+            elif argument.is_vector or argument.is_mask:
+                assert False
+            else:
+                argument.stack_offset = stack_offset
+                stack_offset += 8
+    def _setup_golang_arguments(self):
+        from nervapy.x86_64.abi import (goasm_amd64_abi, goasm_amd64p32_abi,
+                                        gosyso_amd64_abi, gosyso_amd64p32_abi)
+        assert self.abi in {
+            gosyso_amd64_abi,
+            gosyso_amd64p32_abi,
+            goasm_amd64_abi,
+            goasm_amd64p32_abi,
+        }, "This function must only be used with Golang AMD64 or AMD64p32 ABI"
+        from nervapy.util import roundup
+        # All arguments are passed on stack
+        # Stack offset does not include the return address
+        stack_offset = 0
+        for index, argument in enumerate(self.arguments):
+            # Arguments are aligned on stack
+            stack_offset = roundup(stack_offset, argument.size)
+            argument.stack_offset = stack_offset
+            stack_offset += argument.size
+        if self.result_type is not None:
+            self.result_offset = roundup(stack_offset, self.result_type.size)
+    def _layout_local_variables(self):
+        from nervapy.x86_64.registers import rsp
+        local_variables_set = set()
+        local_variables_list = list()
+        for instruction in self._instructions:
+            local_variable = instruction.local_variable
+            if local_variable is not None:
+                local_variable = local_variable.root
+                if local_variable not in local_variables_set:
+                    local_variables_set.add(local_variable)
+                    local_variables_list.append(local_variable)
+        if local_variables_list:
+            local_variables_list = list(
+                sorted(local_variables_list, key=lambda var: var.size)
+            )
+            self._stack_frame_alignment = max(
+                var.alignment for var in local_variables_list
+            )
+            local_variable_address = 0
+            from nervapy.util import roundup
+            for local_variable in local_variables_list:
+                local_variable_address = roundup(
+                    local_variable_address, local_variable.alignment
+                )
+                local_variable._address = local_variable_address
+                local_variable_address += local_variable.size
+            self._local_variables_size = local_variable_address
+            for instruction in self._instructions:
+                local_variable = instruction.local_variable
+                if local_variable is not None:
+                    assert local_variable.address is not None
+                    memory_address = instruction.memory_address
+                    assert memory_address is not None
+                    assert memory_address.base == rsp
+                    instruction.memory_address.displacement = local_variable.address
+    def _allocate_registers(self):
+        for register_kind, register_allocator in six.iteritems(
+            self._register_allocators
+        ):
+            register_allocator.set_allocation_options(self.abi, register_kind)
+        from nervapy.x86_64.pseudo import LOAD
+        from nervapy.x86_64.registers import (GeneralPurposeRegister,
+                                              KRegister, MMXRegister, Register,
+                                              XMMRegister)
+        for instruction in self._instructions:
+            if isinstance(instruction, LOAD.ARGUMENT):
+                dst_reg = instruction.operands[0]
+                src_arg = instruction.operands[1]
+                assert isinstance(dst_reg, Register)
+                assert isinstance(src_arg, Argument)
+                if dst_reg.is_virtual and src_arg.register is not None:
+                    self._register_allocators[dst_reg.kind].try_allocate_register(
+                        dst_reg.virtual_id, src_arg.register.physical_id
+                    )
+        for register_allocator in six.itervalues(self._register_allocators):
+            register_allocator.allocate_registers()
+    def _lower_argument_loads(self):
+        from nervapy.x86_64.abi import goasm_amd64_abi, goasm_amd64p32_abi
+        from nervapy.x86_64.lower import load_memory, load_register
+        from nervapy.x86_64.pseudo import LOAD
+        from nervapy.x86_64.registers import (GeneralPurposeRegister,
+                                              MMXRegister, XMMRegister,
+                                              YMMRegister)
+        if self.abi == goasm_amd64_abi or self.abi == goasm_amd64p32_abi:
+            # Like PeachPy, Go assembler uses pseudo-instructions for argument loads
+            return
+        lowered_instructions = []
+        for i, instruction in enumerate(self._instructions):
+            if isinstance(instruction, LOAD.ARGUMENT):
+                assert isinstance(
+                    instruction.operands[0],
+                    (GeneralPurposeRegister, MMXRegister, XMMRegister, YMMRegister),
+                ), "Lowering LOAD.ARGUMENT is supported only for general-purpose, mmx, xmm, and ymm target registers"
+                if instruction.operands[1].register is not None:
+                    # The argument is passed to function in a register
+                    ld_reg = load_register(
+                        instruction.operands[0],
+                        instruction.operands[1].register,
+                        instruction.operands[1].c_type,
+                        prototype=instruction,
+                    )
+                    if ld_reg is not None:
+                        lowered_instructions.append(ld_reg)
+                else:
+                    # The argument is passed to function on stack
+                    ld_mem = load_memory(
+                        instruction.operands[0],
+                        instruction.operands[1].address,
+                        instruction.operands[1].c_type,
+                        prototype=instruction,
+                    )
+                    lowered_instructions.append(ld_mem)
+            else:
+                lowered_instructions.append(instruction)
+        self._instructions = lowered_instructions
+    def _lower_pseudoinstructions(self):
+        from nervapy.stream import InstructionStream
+        from nervapy.util import is_int, is_sint32, is_uint32
+        from nervapy.x86_64.abi import (goasm_amd64_abi, goasm_amd64p32_abi,
+                                        gosyso_amd64_abi, gosyso_amd64p32_abi,
+                                        native_client_x86_64_abi)
+        from nervapy.x86_64.avx import VMOVAPS, VZEROUPPER
+        from nervapy.x86_64.generic import (ADD, AND, LEA, MOV, POP, PUSH, RET,
+                                            SUB, XOR)
+        from nervapy.x86_64.lower import load_register
+        from nervapy.x86_64.mmxsse import MOVAPS
+        from nervapy.x86_64.nacl import (NACLASP, NACLJMP, NACLRESTBP,
+                                         NACLRESTSP, NACLSSP)
+        from nervapy.x86_64.pseudo import RETURN, STORE
+        from nervapy.x86_64.registers import (GeneralPurposeRegister64,
+                                              XMMRegister, eax, rbp, rsp)
+        # The new list with lowered instructions
+        instructions = list()
+        # Generate prologue
+        cloberred_xmm_registers = list()
+        cloberred_general_purpose_registers = list()
+        with InstructionStream() as prolog_stream:
+            # 1. Save clobbered general-purpose registers with PUSH instruction
+            # 2. If there are clobbered XMM registers, allocate space for them on stack (subtract stack pointer)
+            # 3. Save clobbered XMM registers on stack with (V)MOVAPS instruction
+            for reg in self._clobbered_registers:
+                assert isinstance(
+                    reg, (GeneralPurposeRegister64, XMMRegister)
+                ), "Internal error: unexpected register %s in clobber list" % str(reg)
+                if isinstance(reg, GeneralPurposeRegister64):
+                    cloberred_general_purpose_registers.append(reg)
+                    PUSH(reg)
+                else:
+                    cloberred_xmm_registers.append(reg)
+            # If stack needs to be realigned
+            if self._stack_frame_alignment > self.abi.stack_alignment:
+                cloberred_general_purpose_registers.append(rbp)
+                PUSH(rbp)
+                MOV(rbp, rsp)
+            if cloberred_xmm_registers or self._local_variables_size != 0:
+                # Total size of the stack frame less what is already adjusted with PUSH instructions
+                stack_adjustment = (
+                    self._stack_frame_size
+                    - len(cloberred_general_purpose_registers)
+                    * GeneralPurposeRegister64.size
+                )
+                if self.abi != native_client_x86_64_abi:
+                    SUB(rsp, stack_adjustment + self._local_variables_size)
+                    if self._stack_frame_alignment > self.abi.stack_alignment:
+                        AND(rsp, -self._stack_frame_alignment)
+                else:
+                    if self._stack_frame_alignment > self.abi.stack_alignment:
+                        # Note: do not modify rcx/rdx/r8/r9 as they may contain function arguments
+                        LEA(
+                            eax, [rsp - (stack_adjustment + self._local_variables_size)]
+                        )
+                        AND(eax, -self._stack_frame_alignment)
+                        NACLRESTSP(eax)
+                    else:
+                        NACLSSP(stack_adjustment + self._local_variables_size)
+            for i, xmm_reg in enumerate(cloberred_xmm_registers):
+                movaps = VMOVAPS if self._avx_prolog else MOVAPS
+                movaps(
+                    [rsp + self._local_variables_size + i * XMMRegister.size], xmm_reg
+                )
+        # TODO: handle situations when entry point is in the middle of a function
+        instructions.extend(prolog_stream.instructions)
+        for instruction in self._instructions:
+            if isinstance(instruction, RETURN):
+                from nervapy.x86_64.registers import (GeneralPurposeRegister,
+                                                      MMXRegister, XMMRegister,
+                                                      YMMRegister, al, ax, eax,
+                                                      ecx, mm0, rax, rcx, xmm0,
+                                                      ymm0)
+                is_goasm_abi = self.abi in {goasm_amd64_abi, goasm_amd64p32_abi}
+                is_gosyso_abi = self.abi in {gosyso_amd64_abi, gosyso_amd64p32_abi}
+                with InstructionStream() as epilog_stream:
+                    # Save return value
+                    if instruction.operands:
+                        assert len(instruction.operands) == 1
+                        if is_int(instruction.operands[0]):
+                            assert (
+                                self.result_type.is_integer
+                                or self.result_type.is_pointer
+                            )
+                            # Return immediate constant
+                            if is_goasm_abi:
+                                # Return value must be saved on stack with STORE.RESULT pseudo-instruction
+                                if self.result_type.size <= 4 or is_sint32(
+                                    instruction.operands[0]
+                                ):
+                                    # STORE.RESULT will assemble to one of the forms:
+                                    # - MOV m8, imm8
+                                    # - MOV m16, imm16
+                                    # - MOV m32, imm32
+                                    # - MOV m64, imm32
+                                    STORE.RESULT(
+                                        instruction.operands[0],
+                                        prototype=instruction,
+                                        target_function=self,
+                                    )
+                                else:
+                                    # STORE.RESULT can't be used directly (MOV m64, imm64 doesn't exist), instead use
+                                    # MOV rax, imm64 + MOV m64, rax (STORE.RESULT)
+                                    MOV(
+                                        eax,
+                                        instruction.operands[0],
+                                        prototype=instruction,
+                                    )
+                                    STORE.RESULT(
+                                        eax, prototype=instruction, target_function=self
+                                    )
+                            else:
+                                # Return value is returned in:
+                                # - eax register if result type is not greater than 4 bytes
+                                # - rax register if result type is greater than 8 bytes
+                                if instruction.operands[0] == 0:
+                                    # - Zero eax register (high 32 bits of rax register clear automatically)
+                                    XOR(eax, eax, prototype=instruction)
+                                elif self.result_type.size <= 4 or is_uint32(
+                                    instruction.operands[0]
+                                ):
+                                    # - If the result type is not greater than 4 bytes, directly mov it to eax register
+                                    # - If the result type is greater than 4 bytes, but the result value is
+                                    #   representable as unsigned 32-bit literal, mov it to eax register and the high
+                                    #   32 bits of rax will be cleared automatically
+                                    MOV(
+                                        eax,
+                                        instruction.operands[0],
+                                        prototype=instruction,
+                                    )
+                                else:
+                                    # - Either negative 32-bit constant (would use MOV rax, imm32 form)
+                                    # - Or large 64-bit constant (would use MOV rax, imm64 form)
+                                    MOV(
+                                        rax,
+                                        instruction.operands[0],
+                                        prototype=instruction,
+                                    )
+                        elif isinstance(
+                            instruction.operands[0], GeneralPurposeRegister
+                        ):
+                            if (
+                                is_goasm_abi
+                                and instruction.operands[0].size
+                                == self.result_type.size
+                            ):
+                                STORE.RESULT(
+                                    instruction.operands[0],
+                                    prototype=instruction,
+                                    target_function=self,
+                                )
+                            else:
+                                result_reg = eax if self.result_type.size <= 4 else rax
+                                epilog_stream.add_instruction(
+                                    load_register(
+                                        result_reg,
+                                        instruction.operands[0],
+                                        self.result_type,
+                                        prototype=instruction,
+                                    )
+                                )
+                                if is_goasm_abi:
+                                    result_subreg = {1: al, 2: ax, 4: eax, 8: rax}[
+                                        self.result_type.size
+                                    ]
+                                    STORE.RESULT(
+                                        result_subreg,
+                                        prototype=instruction,
+                                        target_function=self,
+                                    )
+                        elif isinstance(instruction.operands[0], MMXRegister):
+                            epilog_stream.add_instruction(
+                                load_register(
+                                    mm0,
+                                    instruction.operands[0],
+                                    self.result_type,
+                                    prototype=instruction,
+                                )
+                            )
+                        elif isinstance(instruction.operands[0], XMMRegister):
+                            if self.result_type.is_floating_point and is_goasm_abi:
+                                assert self.result_type.size in {4, 8}
+                                STORE.RESULT(
+                                    instruction.operands[0],
+                                    prototype=instruction,
+                                    target_function=self,
+                                )
+                            else:
+                                epilog_stream.add_instruction(
+                                    load_register(
+                                        xmm0,
+                                        instruction.operands[0],
+                                        self.result_type,
+                                        prototype=instruction,
+                                    )
+                                )
+                        elif isinstance(instruction.operands[0], YMMRegister):
+                            epilog_stream.add_instruction(
+                                load_register(
+                                    ymm0,
+                                    instruction.operands[0],
+                                    self.result_type,
+                                    prototype=instruction,
+                                )
+                            )
+                        else:
+                            assert False
+                    if instruction.avx_mode and not self.avx_environment:
+                        VZEROUPPER(prototype=instruction)
+                    # Generate epilog
+                    # 1. Restore clobbered XMM registers on stack with (V)MOVAPS instruction
+                    # 2. If there are clobbered XMM registers, release their space on stack (increment stack pointer)
+                    # 3. Restore clobbered general-purpose registers with PUSH instruction
+                    for i, xmm_reg in enumerate(cloberred_xmm_registers):
+                        movaps = VMOVAPS if self.avx_environment else MOVAPS
+                        movaps(
+                            xmm_reg,
+                            [rsp + self._local_variables_size + i * XMMRegister.size],
+                        )
+                    if self._stack_frame_alignment > self.abi.stack_alignment:
+                        # Restore rsp value from rbp
+                        MOV(rsp, rbp)
+                    elif cloberred_xmm_registers or self._local_variables_size != 0:
+                        # Total size of the stack frame less what will be adjusted with POP instructions
+                        stack_adjustment = (
+                            self._stack_frame_size
+                            - len(cloberred_general_purpose_registers)
+                            * GeneralPurposeRegister64.size
+                        )
+                        if self.abi != native_client_x86_64_abi:
+                            ADD(rsp, stack_adjustment + self._local_variables_size)
+                        else:
+                            NACLASP(stack_adjustment + self._local_variables_size)
+                    # Important: registers must be POPed in reverse order
+                    for reg in reversed(cloberred_general_purpose_registers):
+                        if reg == rbp and self.abi == native_client_x86_64_abi:
+                            POP(rcx)
+                            NACLRESTBP(ecx)
+                        else:
+                            POP(reg)
+                    # Return from the function
+                    if self.abi == native_client_x86_64_abi:
+                        POP(rcx, prototype=instruction)
+                        NACLJMP(ecx)
+                    else:
+                        RET(prototype=instruction)
+                instructions.extend(epilog_stream.instructions)
+            elif isinstance(instruction, STORE.RESULT):
+                instruction.destination_offset = self.result_offset
+                instructions.append(instruction)
+            else:
+                if self.abi == native_client_x86_64_abi and instruction.name != "LEA":
+                    from nervapy.x86_64.operand import is_m
+                    memory_operands = list(
+                        filter(lambda op: is_m(op), instruction.operands)
+                    )
+                    if memory_operands:
+                        assert (
+                            len(memory_operands) == 1
+                        ), "x86-64 instructions can not have more than 1 explicit memory operand"
+                        memory_address = memory_operands[0].address
+                        from nervapy.x86_64.operand import MemoryAddress
+                        if isinstance(memory_address, MemoryAddress):
+                            if memory_address.index is not None:
+                                raise ValueError("NaCl does not allow index addressing")
+                            from nervapy.x86_64.registers import r15, rbp, rsp
+                            if (
+                                memory_address.base is not None
+                                and memory_address.base not in {rbp, rsp, r15}
+                            ):
+                                # Base register is not a restricted register: needs transformation
+                                memory_address.index = memory_address.base
+                                memory_address.scale = 1
+                                memory_address.base = r15
+                instructions.append(instruction)
+        self._instructions = instructions
+    def _filter_instruction_encodings(self):
+        for instruction in self._instructions:
+            instruction.encodings = instruction._filter_encodings()
+    def _update_argument_addresses(self):
+        for argument in self.arguments:
+            if argument.stack_offset is not None:
+                argument.address = self._argument_stack_base + argument.stack_offset
+    def _analyze_clobbered_registers(self):
+        from nervapy.x86_64.registers import (GeneralPurposeRegister,
+                                              XMMRegister, YMMRegister,
+                                              ZMMRegister)
+        output_subregisters = set()
+        for instruction in self._instructions:
+            output_subregisters.update(instruction.output_registers)
+        output_registers = set()
+        for subreg in output_subregisters:
+            if isinstance(subreg, GeneralPurposeRegister):
+                output_registers.add(subreg.as_qword)
+            elif isinstance(subreg, (XMMRegister, YMMRegister, ZMMRegister)):
+                output_registers.add(subreg.as_xmm)
+            # Other register types are volatile registers for all x86-64 ABIs
+        return list(
+            sorted(
+                filter(
+                    lambda reg: reg in self.abi.callee_save_registers, output_registers
+                )
+            )
+        )
+    def _update_stack_frame(self):
+        from nervapy.x86_64.registers import (GeneralPurposeRegister64,
+                                              XMMRegister, rbp, rsp)
+        clobbered_general_purpose_registers = 0
+        clobbered_xmm_registers = 0
+        for reg in self._clobbered_registers:
+            assert isinstance(
+                reg, (GeneralPurposeRegister64, XMMRegister)
+            ), "Internal error: unexpected register %s in clobber list" % str(reg)
+            if isinstance(reg, GeneralPurposeRegister64):
+                clobbered_general_purpose_registers += 1
+            else:
+                clobbered_xmm_registers += 1
+        # If the stack needs to be aligned, rbp register needs to be preserved too
+        if self._stack_frame_alignment > self.abi.stack_alignment:
+            clobbered_general_purpose_registers += 1
+        self._stack_frame_size = (
+            clobbered_general_purpose_registers * GeneralPurposeRegister64.size
+            + clobbered_xmm_registers * XMMRegister.size
+        )
+        # 1. On function entry stack is misaligned by 8
+        # 2. Each clobbered general-purpose register is pushed as 8 bytes
+        # 3. If the number of clobbered general-purpose registers is odd, the stack will be misaligned by 8 after they
+        #    are pushed on stack
+        # 4. If additionally there are clobbered XMM registers, we need to subtract 8 from stack to make it aligned
+        #    by 16 after the general-purpose registers are pushed
+        if (
+            clobbered_xmm_registers != 0 or self._local_variables_size != 0
+        ) and clobbered_general_purpose_registers % 2 == 0:
+            self._stack_frame_size += 8
+        # Set stack_argument_base
+        return_address_size = 8
+        if self._stack_frame_alignment > self.abi.stack_alignment:
+            # rsp is realigned, argument addressing uses rbp
+            saved_rbp_size = 8
+            self._argument_stack_base = rbp + return_address_size + saved_rbp_size
+        else:
+            # argument addressing uses rsp
+            self._argument_stack_base = (
+                rsp
+                + return_address_size
+                + self._stack_frame_size
+                + self._local_variables_size
+            )
+    def _bind_registers(self):
+        """Iterates through the list of instructions and assigns physical IDs to allocated registers"""
+        for instruction in self._instructions:
+            for register in instruction.register_objects:
+                if register.is_virtual:
+                    register.physical_id = self._register_allocators[
+                        register.kind
+                    ].register_allocations[register.virtual_id]
+    def format_code(
+        self,
+        assembly_format="peachpy",
+        line_separator=os.linesep,
+        indent=True,
+        line_number=1,
+    ):
+        """Returns code of assembly instructions comprising the function"""
+        code = []
+        if assembly_format == "gas":
+            # Pre-assign line number to labels
+            from nervapy.x86_64.pseudo import LABEL
+            for i, instruction in enumerate(self._instructions):
+                if isinstance(instruction, LABEL):
+                    instruction.operands[0].line_number = line_number + i
+        for i, instruction in enumerate(self._instructions):
+            from nervapy.x86_64.instructions import Instruction
+            # if isinstance(instruction, Instruction):
+            #     try:
+            #         hex_string = " ".join("%02X" % byte for byte in instruction.encode())
+            #         code.append("    " + "# " + hex_string)
+            #     except Exception as e:
+            #         import sys
+            #         code.append(e.message)
+            #         # raise
+            code.append(
+                instruction.format(
+                    assembly_format=assembly_format,
+                    indent=indent,
+                    line_number=line_number + i,
+                )
+            )
+        if line_separator is None:
+            return code
+        else:
+            return str(line_separator).join(code)
+    def format(
+        self, assembly_format="peachpy", line_separator=os.linesep, line_number=1
+    ):
+        """Formats assembly listing of the function according to specified parameters"""
+        if assembly_format == "go":
+            # Arguments for TEXT directive in Go assembler
+            package_string = self.package
+            if package_string is None:
+                package_string = ""
+            if six.PY2:
+                text_arguments = [
+                    package_string + "\xc2\xb7" + self.mangled_name + "(SB)"
+                ]
+            else:
+                text_arguments = [
+                    package_string + "\u00b7" + self.mangled_name + "(SB)"
+                ]
+            text_arguments.append("4")
+            stack_size = sum(map(operator.attrgetter("size"), self.arguments))
+            if self.result_offset is not None:
+                stack_size = self.result_offset + self.result_type.size
+            if stack_size == 0:
+                text_arguments.append("$0")
+            else:
+                text_arguments.append("$0-%d" % stack_size)
+            code = ["TEXT " + ",".join(text_arguments)]
+            if self.go_signature is not None:
+                code.insert(0, "// " + self.go_signature)
+        elif assembly_format == "gas":
+            from nervapy.util import ilog2
+            code_alignment = 16
+            code = [
+                "#ifdef __APPLE__",
+                ".section __TEXT,__text,regular,pure_instructions",
+                ".globl _{name}".format(name=self.mangled_name),
+                ".p2align {ilog2alignment}, 0x90".format(
+                    ilog2alignment=ilog2(code_alignment)
+                ),
+                "_{name}:".format(name=self.mangled_name),
+                "#else /* !__APPLE__ */",
+                ".text",
+                ".p2align {ilog2alignment},,{max_alignment_bytes}".format(
+                    ilog2alignment=ilog2(code_alignment),
+                    max_alignment_bytes=code_alignment - 1,
+                ),
+                ".globl " + self.mangled_name,
+                ".type {name}, @function".format(name=self.mangled_name),
+                "{name}:".format(name=self.mangled_name),
+                "#endif /* !__APPLE */",
+            ]
+        else:
+            code = []
+        code += self.format_code(
+            assembly_format,
+            line_separator=None,
+            indent=True,
+            line_number=line_number + len(code),
+        )
+        if assembly_format == "gas":
+            code += [
+                "#ifndef __APPLE__",
+                ".size {name}, .-{name}".format(name=self.mangled_name),
+                "#endif /* !__APPLE__ */",
+            ]
+        if assembly_format in ["go", "gas"]:
+            # Add trailing line or assembler will refuse to compile
+            code.append("")
+        if line_separator is None:
+            return code
+        else:
+            return str(line_separator).join(code)
+    def encode(self):
+        return EncodedFunction(self)
+    @property
+    def metadata(self):
+        metadata = collections.OrderedDict(
+            [
+                ("entry", "function"),
+                ("name", self.name),
+                ("symbol", self.mangled_name),
+                (
+                    "return",
+                    "void" if self.result_type is None else str(self.result_type),
+                ),
+                (
+                    "arguments",
+                    [
+                        collections.OrderedDict(
+                            [("name", argument.name), ("type", str(argument.c_type))]
+                        )
+                        for argument in self.arguments
+                    ],
+                ),
+                ("arch", "x86-64"),
+                ("abi", str(self.abi)),
+                ("uarch", self.target.name),
+                ("isa", [str(extension) for extension in self.isa_extensions.minify()]),
+            ]
+        )
+        return metadata
+    def mangle_name(self):
+        import string
+        import nervapy.x86_64.options
+        name = (
+            nervapy.x86_64.options.name_mangling.replace("${Name}", self.name)
+            .replace("${name}", self.name.lower())
+            .replace("${NAME}", self.name.upper())
+            .replace("${uArch}", self.target.id)
+            .replace("${uarch}", self.target.id.lower())
+            .replace("${UARCH}", self.target.id.upper())
+            .replace(
+                "${ISA}",
+                "_".join(
+                    [extension.safe_name for extension in self.isa_extensions.minify()]
+                ),
+            )
+            .replace(
+                "${isa}",
+                "_".join(
+                    [
+                        extension.safe_name.lower()
+                        for extension in self.isa_extensions.minify()
+                    ]
+                ),
+            )
+        )
+        return name
+class InstructionBundle:
+    def __init__(self, capacity, address):
+        if capacity not in {16, 32, 64}:
+            raise ValueError("Bundle capacity must be 16, 32, or 64")
+        self.capacity = capacity
+        self.address = address
+        self.size = 0
+        self._instructions = []
+        # Map from instruction position to tuple (label address, long encoding, short range)
+        self.branch_info_map = dict()
+    @property
+    def padding(self):
+        return self.capacity - self.size
+    def add(self, instructions):
+        from nervapy.x86_64.instructions import Instruction
+        assert isinstance(instructions, list)
+        assert all(
+            isinstance(instruction, Instruction) for instruction in instructions
+        ), "Instruction instance expected"
+        bytecode = bytearray().join(
+            [instruction.encode() for instruction in instructions]
+        )
+        if self.size + len(bytecode) <= self.capacity:
+            self.size += len(bytecode)
+            for instruction in instructions:
+                instruction.bytecode = instruction.encode()
+                self._instructions.append(instruction)
+        else:
+            raise BufferError()
+    def add_label_branch(self, instruction, label_address=None, long_encoding=False):
+        from nervapy.x86_64.instructions import BranchInstruction
+        assert isinstance(
+            instruction, BranchInstruction
+        ), "BranchInstruction instance expected"
+        long_encoding, bytecode = instruction._encode_label_branch(
+            self.address + self.size, label_address, long_encoding
+        )
+        if self.capacity - self.size > len(bytecode):
+            self.size += len(bytecode)
+            if not long_encoding and label_address is not None:
+                # offset = label_address - self.end_address
+                # -> self.end_address = label_address - offset
+                # -> branch_address = label_address - len(bytecode) - offset
+                # -> branch_position = label_address - self.start_address - len(bytecode) - offset
+                #
+                # -> branch_pos >= label_address - self.start_address - len(bytecode) - 127
+                # -> branch_pos <= label_address - self.start_address - len(bytecode) + 128
+                branch_pos = label_address - self.address - len(bytecode)
+                branch_pos_max = min(branch_pos + 128, self.capacity)
+            else:
+                branch_pos_max = self.capacity
+            instruction.bytecode = bytecode
+            self.branch_info_map[len(self._instructions)] = (
+                label_address,
+                long_encoding,
+                branch_pos_max,
+            )
+            self._instructions.append(instruction)
+        else:
+            raise BufferError()
+    def optimize(self):
+        from nervapy.x86_64.instructions import BranchInstruction
+        from nervapy.x86_64.pseudo import LABEL
+        if any(
+            isinstance(instruction, (BranchInstruction, LABEL))
+            for instruction in self._instructions
+        ):
+            return
+        def suitable_encodings(instruction):
+            return [
+                (encoding, length)
+                for (length, encoding) in six.iteritems(
+                    instruction.encode_length_options()
+                )
+                if 0 < length - len(instruction.bytecode) <= self.padding
+            ]
+        while self.size < self.capacity:
+            suitable_instructions = [
+                instr for instr in self._instructions if any(suitable_encodings(instr))
+            ]
+            if not suitable_instructions:
+                break
+            shortest_suitable_instruction = min(
+                suitable_instructions, key=lambda instr: len(instr.bytecode)
+            )
+            new_encoding, new_length = min(
+                suitable_encodings(shortest_suitable_instruction),
+                key=operator.itemgetter(1),
+            )
+            self.size += new_length - len(shortest_suitable_instruction.bytecode)
+            assert self.size <= self.capacity
+            shortest_suitable_instruction.bytecode = new_encoding
+    def finalize(self):
+        from nervapy.x86_64.generic import NOP
+        while self.capacity > self.size:
+            self.add([NOP()])
+        self.size = self.capacity
+    @property
+    def label_address_map(self):
+        from nervapy.x86_64.pseudo import LABEL
+        label_address_map = dict()
+        code_address = self.address
+        for instruction in self._instructions:
+            if isinstance(instruction, LABEL):
+                label_address_map[instruction.identifier] = code_address
+            else:
+                code_address += len(instruction.bytecode)
+        return label_address_map
+    def __len__(self):
+        return self.size
+class EncodedFunction:
+    """ABI-specific x86-64 assembly function.
+    A function consists of C signature, ABI, and a list of instructions without virtual registers.
+    """
+    def __init__(self, function):
+        from copy import copy, deepcopy
+        assert isinstance(function, ABIFunction), "ABIFunction object expected"
+        self.name = function.name
+        self.mangled_name = function.mangled_name
+        self.arguments = list(map(copy, function.arguments))
+        self.result_type = function.result_type
+        self.target = function.target
+        self.abi = function.abi
+        from nervapy.x86_64.abi import native_client_x86_64_abi
+        from nervapy.x86_64.meta import Section, SectionType
+        if self.abi == native_client_x86_64_abi:
+            # Align with HLT instruction
+            self.code_section = Section(SectionType.code, alignment_byte=0xF4)
+            self.code_section.alignment = 32
+        else:
+            # Align with INT 3 instruction
+            self.code_section = Section(SectionType.code, alignment_byte=0xCC)
+            self.code_section.alignment = 16
+        self.const_section = Section(SectionType.const_data)
+        self._instructions = deepcopy(function._instructions)
+        self._constant_symbol_map = dict()
+        self._layout_literal_constants()
+        self._encode()
+    def _layout_literal_constants(self):
+        from nervapy.encoder import Encoder
+        from nervapy.x86_64.meta import Symbol, SymbolType
+        encoder = Encoder(self.abi.endianness)
+        constants = list()
+        for instruction in self._instructions:
+            constant = instruction.constant
+            if constant is not None:
+                constants.append(constant)
+        max_constant_size = 0
+        max_constant_alignment = 0
+        if constants:
+            max_constant_size = max(constant.size for constant in constants)
+            max_constant_alignment = max(constant.alignment for constant in constants)
+        self.const_section.alignment = max_constant_alignment
+        # Unsorted list of Symbol objects for constants
+        constant_symbols = list()
+        # This set is used to ensure that each constant is added only once
+        constant_names_set = set()
+        if max_constant_size != 0:
+            # Map from constant value (as bytes) to address in the const data section
+            constants_address_map = dict()
+            for instruction in self._instructions:
+                constant = instruction.constant
+                if constant is not None:
+                    constant_value = bytes(constant.encode(encoder))
+                    if constant_value not in constants_address_map:
+                        # Add the new constant to the section
+                        assert (
+                            constant.size == max_constant_size
+                        ), "Handling of functions with constant literals of different size is not implemented"
+                        assert (
+                            constant.alignment == max_constant_alignment
+                        ), "Handling of functions with constant literals of different alignment is not implemented"
+                        constants_address_map[constant_value] = len(self.const_section)
+                        self.const_section.content += constant_value
+                    if constant.name not in constant_names_set:
+                        constant_names_set.add(constant.name)
+                        const_symbol = Symbol(
+                            constants_address_map[constant_value],
+                            SymbolType.literal_constant,
+                            name=constant.name,
+                            size=constant.size,
+                        )
+                        constant_symbols.append(const_symbol)
+                        self._constant_symbol_map[constant.name] = const_symbol
+        for constant_symbol in sorted(
+            constant_symbols, key=lambda sym: (sym.offset, -sym.size)
+        ):
+            self.const_section.add_symbol(constant_symbol)
+    def _encode(self):
+        from nervapy.x86_64.instructions import BranchInstruction
+        from nervapy.x86_64.pseudo import LABEL
+        label_address_map = dict()
+        long_branches = set()
+        # Special post-processing for Native Client SFI
+        from nervapy.x86_64.abi import native_client_x86_64_abi
+        if self.abi == native_client_x86_64_abi:
+            has_updated_branches = True
+            has_unresolved_labels = True
+            bundles = list()
+            while has_updated_branches or has_unresolved_labels:
+                code_address = 0
+                has_updated_branches = False
+                has_unresolved_labels = False
+                bundles = list()
+                current_bundle = InstructionBundle(32, code_address)
+                for i, instruction in enumerate(self._instructions):
+                    if isinstance(instruction, LABEL):
+                        label_address_map[instruction.identifier] = code_address
+                        current_bundle.add([instruction])
+                    elif (
+                        isinstance(instruction, BranchInstruction)
+                        and instruction.label_name
+                    ):
+                        label_address = label_address_map.get(instruction.label_name)
+                        if label_address is None:
+                            has_unresolved_labels = True
+                        was_long = i in long_branches
+                        is_long, instruction.bytecode = (
+                            instruction._encode_label_branch(
+                                code_address, label_address, long_encoding=was_long
+                            )
+                        )
+                        if is_long and not was_long:
+                            long_branches.add(i)
+                            has_updated_branches = True
+                        try:
+                            current_bundle.add_label_branch(
+                                instruction, label_address, is_long
+                            )
+                        except BufferError:
+                            bundles.append(current_bundle)
+                            current_bundle = InstructionBundle(
+                                32, current_bundle.address + current_bundle.capacity
+                            )
+                            current_bundle.add_label_branch(
+                                instruction, label_address, is_long
+                            )
+                    else:
+                        instruction_group = [instruction]
+                        memory_address = instruction.memory_address
+                        from nervapy.x86_64.operand import MemoryAddress
+                        if (
+                            isinstance(memory_address, MemoryAddress)
+                            and memory_address.index is not None
+                        ):
+                            from nervapy.stream import NullStream
+                            with NullStream():
+                                from nervapy.x86_64.generic import MOV
+                                instruction_group.insert(
+                                    0,
+                                    MOV(
+                                        memory_address.index.as_dword,
+                                        memory_address.index.as_dword,
+                                    ),
+                                )
+                        try:
+                            current_bundle.add(instruction_group)
+                        except BufferError:
+                            bundles.append(current_bundle)
+                            current_bundle = InstructionBundle(
+                                32, current_bundle.address + current_bundle.capacity
+                            )
+                            current_bundle.add(instruction_group)
+                    code_address = current_bundle.address + current_bundle.size
+                bundles.append(current_bundle)
+            self._instructions = list()
+            for bundle in bundles:
+                bundle.optimize()
+                for instruction in bundle._instructions:
+                    constant = instruction.constant
+                    if constant:
+                        relocation = instruction.relocation
+                        for index in range(relocation.offset, relocation.offset + 4):
+                            instruction.bytecode[index] = 0
+                        relocation.offset += len(self.code_section)
+                        relocation.program_counter += len(self.code_section)
+                        relocation.symbol = self._constant_symbol_map[
+                            instruction.constant.name
+                        ]
+                        self.code_section.add_relocation(relocation)
+                    if instruction.bytecode:
+                        self.code_section.content += instruction.bytecode
+                if bundle.size < bundle.capacity:
+                    if bundle is not bundles[-1]:
+                        self.code_section.content += self._encode_nops(
+                            bundle.capacity - bundle.size
+                        )
+                    else:
+                        self.code_section.content += self._encode_abort(
+                            bundle.capacity - bundle.size
+                        )
+        else:
+            has_updated_branches = True
+            has_unresolved_labels = True
+            while has_updated_branches or has_unresolved_labels:
+                code_address = 0
+                has_updated_branches = False
+                has_unresolved_labels = False
+                for i, instruction in enumerate(self._instructions):
+                    if isinstance(instruction, LABEL):
+                        label_address_map[instruction.identifier] = code_address
+                    elif (
+                        isinstance(instruction, BranchInstruction)
+                        and instruction.label_name
+                    ):
+                        label_address = label_address_map.get(instruction.label_name)
+                        if label_address is None:
+                            has_unresolved_labels = True
+                        was_long = i in long_branches
+                        is_long, instruction.bytecode = (
+                            instruction._encode_label_branch(
+                                code_address, label_address, long_encoding=was_long
+                            )
+                        )
+                        if is_long and not was_long:
+                            long_branches.add(i)
+                            has_updated_branches = True
+                    else:
+                        instruction.bytecode = instruction.encode()
+                    if instruction.bytecode:
+                        code_address += len(instruction.bytecode)
+            for instruction in self._instructions:
+                constant = instruction.constant
+                if constant:
+                    relocation = instruction.relocation
+                    for index in range(relocation.offset, relocation.offset + 4):
+                        instruction.bytecode[index] = 0
+                    relocation.offset += len(self.code_section)
+                    relocation.program_counter += len(self.code_section)
+                    relocation.symbol = self._constant_symbol_map[
+                        instruction.constant.name
+                    ]
+                    self.code_section.add_relocation(relocation)
+                if instruction.bytecode:
+                    self.code_section.content += instruction.bytecode
+    def _encode_nops(self, length):
+        assert 1 <= length <= 31
+        from nervapy.x86_64.encoding import nop
+        if length <= 15:
+            return nop(length)
+        elif length <= 30:
+            return nop(length // 2) + nop(length - length // 2)
+        else:
+            return nop(8) + nop(8) + nop(15)
+    def _encode_abort(self, length):
+        from nervapy.x86_64.abi import (goasm_amd64_abi, goasm_amd64p32_abi,
+                                        native_client_x86_64_abi)
+        if self.abi == native_client_x86_64_abi:
+            # Use HLT instructions
+            return bytearray([0xF4] * length)
+        elif self.abi in {goasm_amd64_abi, goasm_amd64p32_abi}:
+            # Use a single INT 3 instruction as alignment is not supported anyway
+            return bytearray([0xCD])
+        else:
+            # Use INT 3 instructions
+            return bytearray([0xCD] * length)
+    def format_code(
+        self, assembly_format="peachpy", line_separator=os.linesep, indent=True
+    ):
+        """Returns code of assembly instructions comprising the function"""
+        code = []
+        for instruction in self._instructions:
+            code.append(instruction.format_encoding(indent=indent))
+            code.append(
+                instruction.format(assembly_format=assembly_format, indent=indent)
+            )
+        if line_separator is None:
+            return code
+        else:
+            return str(line_separator).join(filter(lambda line: line is not None, code))
+    def format(self, assembly_format="peachpy", line_separator=os.linesep):
+        """Formats assembly listing of the function according to specified parameters"""
+        if assembly_format == "go":
+            # Arguments for TEXT directive in Go assembler
+            text_arguments = ["%s\xc2\xb7%s(SB)" % (self.package_name, self.name)]
+            text_arguments.append("4")
+            text_arguments.append("$0")
+            code = ["TEXT " + ",".join(text_arguments)]
+        else:
+            code = []
+        code.extend(self.format_code(assembly_format, line_separator=None, indent=True))
+        if assembly_format == "go":
+            # Add trailing line or assembler will refuse to compile
+            code.append("")
+        if line_separator is None:
+            return code
+        else:
+            return str(line_separator).join(filter(bool, code))
+    def load(self):
+        return ExecutableFuntion(self)
+class ExecutableFuntion:
+    def __init__(self, function):
+        assert isinstance(function, EncodedFunction), "EncodedFunction object expected"
+        import nervapy.x86_64.abi
+        process_abi = nervapy.x86_64.abi.detect()
+        if process_abi != function.abi:
+            raise ValueError(
+                "Function ABI (%s) does not match process ABI (%s)"
+                % (str(function.abi), str(process_abi))
+            )
+        self.code_segment = bytearray(function.code_section.content)
+        self.const_segment = bytearray(function.const_section.content)
+        import nervapy.loader
+        self.loader = nervapy.loader.Loader(
+            len(self.code_segment), len(self.const_segment)
+        )
+        # Apply relocations
+        from nervapy.util import is_sint32
+        from nervapy.x86_64.meta import RelocationType
+        for relocation in function.code_section.relocations:
+            assert relocation.type == RelocationType.rip_disp32
+            assert relocation.symbol in function.const_section.symbols
+            old_value = (
+                self.code_segment[relocation.offset]
+                | (self.code_segment[relocation.offset + 1] << 8)
+                | (self.code_segment[relocation.offset + 2] << 16)
+                | (self.code_segment[relocation.offset + 3] << 24)
+            )
+            new_value = (
+                old_value
+                + (self.loader.data_address + relocation.symbol.offset)
+                - (self.loader.code_address + relocation.program_counter)
+            )
+            assert is_sint32(new_value)
+            self.code_segment[relocation.offset] = new_value & 0xFF
+            self.code_segment[relocation.offset + 1] = (new_value >> 8) & 0xFF
+            self.code_segment[relocation.offset + 2] = (new_value >> 16) & 0xFF
+            self.code_segment[relocation.offset + 3] = (new_value >> 24) & 0xFF
+        assert not function.const_section.relocations
+        self.loader.copy_code(self.code_segment)
+        self.loader.copy_data(self.const_segment)
+        import ctypes
+        result_type = (
+            None
+            if function.result_type is None
+            else function.result_type.as_ctypes_type
+        )
+        argument_types = [arg.c_type.as_ctypes_type for arg in function.arguments]
+        self.function_type = ctypes.CFUNCTYPE(result_type, *argument_types)
+        self.function_pointer = self.function_type(self.loader.code_address)
+    def __call__(self, *args):
+        return self.function_pointer(*args)
+    def __del__(self):
+        del self.loader
+        self.loader = None
+        self.function_pointer = None
+class LocalVariable:
+    def __init__(self, size_option, alignment=None):
+        from nervapy.util import is_int
+        if alignment is not None and not is_int(alignment):
+            raise TypeError("alignment %s is not an integer" % str(alignment))
+        if alignment is not None and alignment <= 0:
+            raise ValueError("alignment %d is not a positive integer" % alignment)
+        self.alignment = alignment
+        if is_int(size_option):
+            if size_option <= 0:
+                raise ValueError("size %d is not a positive integer" % size_option)
+            self.size = size_option
+        elif isinstance(size_option, nervapy.x86_64.registers.Register):
+            self.size = size_option.size
+        else:
+            raise TypeError(
+                "Unsupported size specification %s: register or integer expected"
+                % size_option
+            )
+        if self.alignment is None:
+            self.alignment = self.size
+        self._address = None
+        self._offset = 0
+        self.parent = None
+    def __eq__(self, other):
+        return (
+            isinstance(other, LocalVariable)
+            and self.root is other.root
+            and self.size == other.size
+            and self.offset == other.offset
+        )
+    def __ne__(self, other):
+        return (
+            not isinstance(other, LocalVariable)
+            or self.root is not other.root
+            or self.size != other.size
+            or self.offset != other.offset
+        )
+    def __hash__(self):
+        return id(self.root) ^ hash(self.size) ^ hash(self.offset)
+    def __str__(self):
+        if self.address is not None:
+            return "[" + str(self.address) + "]"
+        else:
+            return "local-variable<%d[%d:%d]>" % (
+                id(self.root),
+                self.offset,
+                self.offset + self.size,
+            )
+    def __repr__(self):
+        return str(self)
+    @property
+    def is_subvariable(self):
+        return self.parent is not None
+    @property
+    def root(self):
+        root = self
+        while root.parent is not None:
+            root = root.parent
+        return root
+    @property
+    def offset(self):
+        node = self
+        offset = 0
+        while node.parent is not None:
+            offset += node._offset
+            node = node.parent
+        return offset
+    @property
+    def address(self):
+        if self.is_subvariable:
+            base_address = self.root.address
+            if base_address is not None:
+                return base_address + self.offset
+        else:
+            return self._address
+    @property
+    def lo(self):
+        assert self.size % 2 == 0
+        child = LocalVariable(self.size // 2, min(self.size // 2, self.alignment))
+        child.parent = self
+        child._offset = 0
+        return child
+    @property
+    def hi(self):
+        assert self.size % 2 == 0
+        child = LocalVariable(self.size // 2, min(self.size // 2, self.alignment))
+        child.parent = self
+        child._offset = self.size // 2
+        return child