PyPI - bigdl-core-npu - Versions diffs - 2.6.0b20250114__cp311-cp311-win_amd64.whl - Mend

bigdl-core-npu 2.6.0b20250114__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (234) hide show

intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/compile.py ADDED Viewed

@@ -0,0 +1,141 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+# flake8: noqa
+# mypy: ignore-errors
+import os
+import torch
+import torch.overrides
+from hashlib import sha256
+from torch.fx import GraphModule
+from openvino.frontend import FrontEndManager
+from openvino.frontend.pytorch.fx_decoder import TorchFXPythonDecoder
+from openvino.runtime import Core, Type, PartialShape, serialize
+from openvino.frontend.pytorch.torchdynamo.backend_utils import _get_cache_dir, _get_device, _get_config, _is_cache_dir_in_config
+from typing import Callable, Optional
+import logging
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.WARNING)
+def cached_model_name(model_hash_str, device, args, cache_root, reversed = False):
+    if model_hash_str is None:
+        return None
+    model_cache_dir = cache_root + "/model/"
+    try:
+        os.makedirs(model_cache_dir, exist_ok=True)
+        file_name = model_cache_dir + model_hash_str + "_" + device
+    except OSError as error:
+        logger.warning(f"Cache directory {cache_root} cannot be created. Model caching is disabled. Error: {error }")
+        return None
+    inputs_str = ""
+    for idx, input_data in enumerate(args):
+        if reversed:
+            inputs_str = "_" + str(input_data.type()) + str(input_data.size())[11:-1].replace(" ", "") + inputs_str
+        else:
+            inputs_str += "_" + str(input_data.type()) + str(input_data.size())[11:-1].replace(" ", "")
+    inputs_str = sha256(inputs_str.encode('utf-8')).hexdigest()
+    file_name += inputs_str
+    return file_name
+def openvino_compile_cached_model(cached_model_path, options, *example_inputs):
+    core = Core()
+    om = core.read_model(cached_model_path + ".xml")
+    dtype_mapping = {
+        torch.float32: Type.f32,
+        torch.float64: Type.f64,
+        torch.float16: Type.f16,
+        torch.int64: Type.i64,
+        torch.int32: Type.i32,
+        torch.uint8: Type.u8,
+        torch.int8: Type.i8,
+        torch.bool: Type.boolean
+    }
+    for idx, input_data in enumerate(example_inputs):
+        om.inputs[idx].get_node().set_element_type(dtype_mapping[input_data.dtype])
+        om.inputs[idx].get_node().set_partial_shape(PartialShape(list(input_data.shape)))
+    om.validate_nodes_and_infer_types()
+    config = {}
+    if _is_cache_dir_in_config(options):
+        config = _get_config(options)
+    else:
+        config["CACHE_DIR"] = _get_cache_dir(options)
+    compiled_model = core.compile_model(om, _get_device(options), config)
+    return compiled_model
+def openvino_compile(gm: GraphModule, *args, model_hash_str: str = None, options=None):
+    core = Core()
+    device = _get_device(options)
+    cache_root = _get_cache_dir(options)
+    file_name = cached_model_name(model_hash_str, device, args, cache_root)
+    if file_name is not None and os.path.isfile(file_name + ".xml") and os.path.isfile(file_name + ".bin"):
+        om = core.read_model(file_name + ".xml")
+    else:
+        fe_manager = FrontEndManager()
+        fe = fe_manager.load_by_framework("pytorch")
+        input_shapes = []
+        input_types = []
+        for idx, input_data in enumerate(args):
+            if isinstance(input_data, int):
+                input_types.append(torch.int64)
+                input_shapes.append(torch.Size([1]))
+            else:
+                input_types.append(input_data.type())
+                input_shapes.append(input_data.size())
+        decoder = TorchFXPythonDecoder(gm)
+        im = fe.load(decoder)
+        om = fe.convert(im)
+        if file_name is not None:
+            serialize(om, file_name + ".xml", file_name + ".bin")
+    dtype_mapping = {
+        torch.float32: Type.f32,
+        torch.float64: Type.f64,
+        torch.float16: Type.f16,
+        torch.int64: Type.i64,
+        torch.int32: Type.i32,
+        torch.uint8: Type.u8,
+        torch.int8: Type.i8,
+        torch.bool: Type.boolean
+    }
+    for idx, input_data in enumerate(args):
+        if isinstance(input_data, int):
+            om.inputs[idx].get_node().set_element_type(dtype_mapping[torch.int64])
+            om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([1]))))
+        else:
+            om.inputs[idx].get_node().set_element_type(dtype_mapping[input_data.dtype])
+            om.inputs[idx].get_node().set_partial_shape(PartialShape(list(decoder.input_shapes[idx])))
+    om.validate_nodes_and_infer_types()
+    config = _get_config(options)
+    if model_hash_str is not None:
+        if not _is_cache_dir_in_config(options):
+            config["CACHE_DIR"] = cache_root
+    compiled = core.compile_model(om, device, config)
+    return compiled

intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/decompositions.py ADDED Viewed

@@ -0,0 +1,116 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+# flake8: noqa
+# mypy: ignore-errors
+import torch
+from torch._decomp.decompositions import aten, pw_cast_for_opmath
+from torch._decomp import register_decomposition, get_decompositions
+@register_decomposition(aten.convolution_backward)
+@pw_cast_for_opmath
+def convolution_backward(
+    grad_output,
+    inp,
+    weight,
+    bias,
+    stride,
+    padding,
+    dilation,
+    transposed,
+    output_padding,
+    groups,
+    output_mask,
+):
+    if stride == [2, 2]:
+        output_padding = [1, 1]
+    # Compute the gradient of the input tensor
+    grad_input = torch.nn.functional.conv_transpose2d(
+        grad_output, weight, stride=stride, padding=padding, dilation=dilation, groups=groups, output_padding=output_padding
+    )
+    # Compute the gradient of the weight tensor
+    grad_weight = torch.nn.functional.conv_transpose2d(
+        inp, weight.transpose(0, 1), stride=stride, padding=padding, dilation=dilation, groups=groups, output_padding=output_padding
+    )
+    # Compute the gradient of the bias tensor
+    if bias is not None:
+        grad_bias = grad_output.sum([0, 2, 3], keepdim=True)
+    else:
+        grad_bias = None
+    return grad_input, grad_weight, grad_bias
+if len(get_decompositions([aten._scaled_dot_product_flash_attention.default])) == 0:
+    @register_decomposition(aten._scaled_dot_product_flash_attention.default)
+    def scaled_dot_product_flash_attention(
+        query,
+        key,
+        value,
+        dropout_p=0.0,
+        is_causal=False,
+        *,
+        return_debug_mask=False,
+        scale=None,
+    ):
+        batch_size, num_head, q_size, head_size = (
+            query.shape[0],
+            query.shape[1],
+            query.shape[2],
+            query.shape[3],
+        )
+        logsumexp = torch.empty([batch_size, q_size, num_head, head_size], dtype=torch.float)
+        cum_seq_q, cum_seq_k = torch.empty([], dtype=torch.long), torch.empty(
+            [], dtype=torch.long
+        )
+        max_q, max_k = 0, 0
+        philox_seed, philox_offset = torch.empty([], dtype=torch.long), torch.empty(
+            [], dtype=torch.long
+        )
+        debug_attn_mask = torch.empty(
+            [],
+            dtype=query.dtype,
+            device=query.device,
+            requires_grad=query.requires_grad,
+        )
+        output, _ = aten._scaled_dot_product_attention_math.default(
+            query, key, value, None, dropout_p, is_causal, None, scale=scale
+        )
+        scores = torch.matmul(query, key.transpose(-2, -1)) / (key.size(-1) ** 0.5)
+        logsumexp = torch.logsumexp(scores, dim=-1)
+        output = output.transpose(1, 2).contiguous(memory_format=torch.contiguous_format)
+        return (
+            output.transpose(1, 2),
+            logsumexp,
+            cum_seq_q,
+            cum_seq_k,
+            max_q,
+            max_k,
+            philox_seed,
+            philox_offset,
+            debug_attn_mask,
+        )
+def get_aot_decomposition_list():
+    return ([torch.ops.aten._scaled_dot_product_flash_attention.default,
+             torch.ops.aten._softmax.default,
+             torch.ops.aten._softmax_backward_data.default,
+             torch.ops.aten.convolution_backward.default,
+             torch.ops.aten.gelu_backward.default,
+             torch.ops.aten.native_group_norm.default,
+             torch.ops.aten.native_group_norm_backward.default,
+             torch.ops.aten.native_layer_norm.default,
+             torch.ops.aten.native_layer_norm_backward.default,
+             torch.ops.aten.slice_backward.default])
+def get_inf_decomposition_list():
+    return ([torch.ops.aten.nll_loss_forward.default])

intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/execute.py ADDED Viewed

@@ -0,0 +1,189 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+# mypy: ignore-errors
+from copy import deepcopy
+from dataclasses import dataclass
+from functools import lru_cache
+from types import MappingProxyType
+from warnings import warn
+import torch
+import torch.overrides
+from torch.fx import GraphModule
+from torch.utils._pytree import tree_flatten, tree_map, tree_unflatten
+from openvino.frontend import FrontEndManager
+from openvino.frontend.pytorch.fx_decoder import TorchFXPythonDecoder
+from openvino.frontend.pytorch.torchdynamo.partition import Partitioner
+from openvino.frontend.pytorch.torchdynamo.compile import openvino_compile
+from openvino.runtime import Core, Type, PartialShape
+from openvino.frontend.pytorch.torchdynamo.backend_utils import _get_cache_dir, _get_device, _get_aot_autograd
+from typing import Callable, Optional, Any
+from torch.fx.experimental.proxy_tensor import make_fx, wrapper_and_args_for_make_fx
+import logging
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.WARNING)
+DEFAULT_OPENVINO_PYTHON_CONFIG = MappingProxyType(
+    {
+        "use_python_fusion_cache": True,
+        "allow_single_op_fusion": True,
+    },
+)
+compiled_cache = {}
+req_cache = {}
+max_openvino_partitions = 0
+partitioned_modules = {}
+def execute(
+    gm: GraphModule,
+    *args,
+    executor: str = "openvino",
+    executor_parameters: Optional[dict] = None,
+    options: Optional[Any] = None,
+):
+    if executor == "openvino":
+        return openvino_execute_partitioned(gm, *args, executor_parameters=executor_parameters, options=options)
+    elif executor == "strictly_openvino":
+        return openvino_execute(gm, *args, executor_parameters=executor_parameters)
+    msg = "Received unexpected value for 'executor': {0}. Allowed values are: openvino, strictly_openvino.".format(executor)
+    raise ValueError(msg)
+import numpy as np
+def execute_cached(compiled_model, *args):
+    ov_inputs = [a.detach().cpu().numpy() for a in args]
+    ov_inputs.reverse()
+    res = compiled_model(ov_inputs)
+    result = [torch.from_numpy(res[out]) for out in compiled_model.outputs]
+    return result
+def openvino_execute(gm: GraphModule, *args, executor_parameters=None, partition_id, options):
+    executor_parameters = executor_parameters or DEFAULT_OPENVINO_PYTHON_CONFIG
+    use_cache = executor_parameters.get(
+        "use_python_fusion_cache",
+        DEFAULT_OPENVINO_PYTHON_CONFIG["use_python_fusion_cache"],
+    )
+    global compiled_cache
+    model_hash_str = executor_parameters.get("model_hash_str", None)
+    if model_hash_str is not None:
+        fully_supported = False
+        if len(model_hash_str) > 3 and model_hash_str[-3:] == "_fs":
+            fully_supported = True
+        if not fully_supported:
+            model_hash_str = model_hash_str + "_p" + str(partition_id)
+    if use_cache and (partition_id in compiled_cache):
+        compiled = compiled_cache[partition_id]
+        req = req_cache[partition_id]
+    else:
+        compiled = openvino_compile(gm, *args, model_hash_str=model_hash_str, options=options)
+        compiled_cache[partition_id] = compiled
+        req = compiled.create_infer_request()
+        req_cache[partition_id] = req
+    flat_args, _ = tree_flatten(args)
+    ov_inputs = []
+    for arg in flat_args:
+        ov_inputs.append((arg if isinstance(arg, int) else arg.detach().cpu().numpy()))
+    res = req.infer(ov_inputs, share_inputs=True, share_outputs=True)
+    results1 = [torch.from_numpy(res[out]) for out in compiled.outputs]
+    if len(results1) == 1:
+        return results1[0]
+    return results1
+class OpenVINOGraphModule(torch.nn.Module):
+    def __init__(self, gm, partition_id, use_python_fusion_cache, model_hash_str: str = None, options=None):
+        super().__init__()
+        self.gm = gm
+        self.partition_id = partition_id
+        self.executor_parameters = {"use_python_fusion_cache": use_python_fusion_cache,
+                                    "model_hash_str": model_hash_str}
+        self.perm_fallback = False
+        self.options = options
+    def __call__(self, *args):
+        if self.perm_fallback:
+            return self.gm(*args)
+        try:
+            result = openvino_execute(self.gm, *args, executor_parameters=self.executor_parameters, partition_id=self.partition_id, options=self.options)
+        except Exception:
+            logger.debug("OpenVINO execution failed. Falling back to native PyTorch execution.")
+            self.perm_fallback = True
+            return self.gm(*args)
+        return result
+def partition_graph(gm: GraphModule, use_python_fusion_cache: bool, model_hash_str: str = None, options=None):
+    global max_openvino_partitions
+    partition_id = max_openvino_partitions
+    for node in gm.graph.nodes:
+        # TODO: use a better way to identify fused submodule
+        if node.op == "call_module" and "fused_" in node.name:
+            openvino_submodule = getattr(gm, node.name)
+            gm.delete_submodule(node.target)
+            gm.add_submodule(
+                node.target,
+                OpenVINOGraphModule(openvino_submodule, partition_id, use_python_fusion_cache,
+                                    model_hash_str=model_hash_str, options=options),
+            )
+            partition_id = partition_id + 1
+    max_openvino_partitions = partition_id
+    return gm
+def openvino_execute_partitioned(gm: GraphModule, *args, executor_parameters=None, options=None):
+    executor_parameters = executor_parameters or DEFAULT_OPENVINO_PYTHON_CONFIG
+    global partitioned_modules
+    use_python_fusion_cache = executor_parameters.get(
+        "use_python_fusion_cache",
+        DEFAULT_OPENVINO_PYTHON_CONFIG["use_python_fusion_cache"],
+    )
+    model_hash_str = executor_parameters.get("model_hash_str", None)
+    signature = str(id(gm))
+    if (not _get_aot_autograd(options)):
+        for idx, input_data in enumerate(args):
+            if isinstance(input_data, torch.Tensor):
+                signature = signature + "_" + str(idx) + ":" + str(input_data.type())[6:] + ":" + str(input_data.size())[11:-1].replace(" ", "")
+            else:
+                signature = signature + "_" + str(idx) + ":" + type(input_data).__name__ + ":val(" + str(input_data) + ")"
+    if signature not in partitioned_modules:
+        partitioned_modules[signature] = partition_graph(gm, use_python_fusion_cache=use_python_fusion_cache,
+                                                         model_hash_str=model_hash_str, options=options)
+    return partitioned_modules[signature](*args)
+def clear_caches():
+    global partitioned_modules
+    global compiled_cache
+    compiled_cache.clear()
+    partitioned_modules.clear()