PyPI - bigdl-core-npu - Versions diffs - 2.5.0__cp311-cp311-win_amd64.whl → 2.6.0__cp311-cp311-win_amd64.whl - Mend

bigdl-core-npu 2.5.0__cp311-cp311-win_amd64.whl → 2.6.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (146) hide show

intel_npu_acceleration_library/external/openvino/frontend/jax/utils.py ADDED Viewed

@@ -0,0 +1,182 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+# flake8: noqa
+# mypy: ignore-errors
+import jax
+import jax.numpy as jnp
+import numpy as np
+from openvino.frontend.jax.passes import filter_element, filter_ivalue, filter_param
+from openvino.runtime import op, Type as OVType, Shape, OVAny
+numpy_to_ov_type_map = {
+    np.float32: OVType.f32,
+    bool: OVType.boolean,
+    jax.dtypes.bfloat16: OVType.bf16,  # TODO: check this
+    np.float16: OVType.f16,
+    np.float32: OVType.f32,
+    np.float64: OVType.f64,
+    np.uint8: OVType.u8,
+    np.int8: OVType.i8,
+    np.uint16: OVType.u16,
+    np.int16: OVType.i16,
+    np.uint32: OVType.u32,
+    np.int32: OVType.i32,
+    np.uint64: OVType.u64,
+    np.int64: OVType.i64,
+}
+jax_to_ov_type_map = {
+    jnp.float32: OVType.f32,
+    jnp.bfloat16: OVType.bf16,  # TODO: check this
+    jnp.float16: OVType.f16,
+    jnp.float64: OVType.f64,
+    jnp.uint8: OVType.u8,
+    jnp.int8: OVType.i8,
+    jnp.uint16: OVType.u16,
+    jnp.int16: OVType.i16,
+    jnp.uint32: OVType.u32,
+    jnp.int32: OVType.i32,
+    jnp.uint64: OVType.u64,
+    jnp.int64: OVType.i64,
+}
+try:
+    jax_to_ov_type_map[jnp.bool] = OVType.boolean
+except:
+    pass
+basic_to_ov_type_map = {
+    int: OVType.i64,
+    float: OVType.f32,
+    bool: OVType.boolean,
+}
+ov_type_to_int_map = {
+    OVType.u8: 0,
+    OVType.i8: 1,
+    OVType.i16: 2,
+    OVType.i32: 3,
+    OVType.i64: 4,
+    OVType.f16: 5,
+    OVType.f32: 6,
+    OVType.f64: 7,
+    OVType.u16: 8,
+    OVType.u32: 9,
+    OVType.u64: 10,
+    OVType.boolean: 11,
+    OVType.bf16: 15,
+}
+def get_type_from_py_type(value):
+    if isinstance(value, float):
+        return OVType.f32
+    if isinstance(value, bool):
+        return OVType.boolean
+    if isinstance(value, int):
+        return OVType.i64
+    return OVType.dynamic
+def get_type_from_np_type(value):
+    for np_dtype, ov_type in numpy_to_ov_type_map.items():
+        if isinstance(value, np_dtype):
+            return ov_type
+    return None
+def _get_ov_type_from_value(value):
+    ov_type = get_type_from_np_type(value)
+    if ov_type is None:
+        ov_type = get_type_from_py_type(value)
+    return ov_type
+def get_ov_type_for_value(value):
+    if isinstance(value, (jax.core.Var, jax.core.Literal)):
+        if value.aval.dtype in jax_to_ov_type_map:
+            return OVAny(jax_to_ov_type_map[value.aval.dtype])
+        for k, v in numpy_to_ov_type_map.items():
+            if value.aval.dtype == k:
+                return OVAny(v)
+        for k, v in basic_to_ov_type_map.items():
+            if isinstance(value.aval.dtype, k):
+                return OVAny(v)
+    elif isinstance(value, (int, float, bool)):
+        return OVAny(jax_to_ov_type_map[type(value)])
+    else:
+        raise NotImplementedError(f"dtype for {value} of type {type(value)} has not been supported yet.")
+def get_ov_type_from_jax_type(dtype):
+    if dtype in jax_to_ov_type_map:
+        return OVAny(jax_to_ov_type_map[dtype])
+    for k, v in numpy_to_ov_type_map.items():
+        if dtype == k:
+            return OVAny(v)
+    for k, v in basic_to_ov_type_map.items():
+        if isinstance(dtype, k):
+            return OVAny(v)
+    return None
+def jax_array_to_ov_const(arr: np.ndarray, shared_memory=True):
+    # TODO: deal with bfloat16 dtype here.
+    if isinstance(arr, np.ndarray):
+        return op.Constant(arr, shared_memory=shared_memory)
+    elif isinstance(arr, jax.Array):
+        return op.Constant(np.array(jax.device_get(arr)), shared_memory=shared_memory)
+    else:
+        raise ValueError(f"Constant is expected to be a numpy array or jax array but got {type(arr)}")
+def ivalue_to_constant(ivalue, shared_memory=True):
+    '''
+    Convert a python object to an openvino constant.
+    '''
+    # print('ivalue = ', ivalue)
+    ivalue = filter_ivalue(ivalue)
+    ov_type = _get_ov_type_from_value(ivalue)
+    if ov_type.is_static():
+        return op.Constant(ov_type, Shape([]), [ivalue]).outputs()
+    if isinstance(ivalue, (list, tuple)):
+        assert len(ivalue) > 0, "Can't deduce type for empty list"
+        if isinstance(ivalue[0], (list, tuple)):
+            second_len = len(ivalue[0])
+            flattened_ivalue = []
+            for value in ivalue:
+                assert isinstance(value, (list, tuple)), "Can't deduce type for a list with both list and basic types."
+                assert len(value) == second_len or len(value) == 0, "Can't deduce type for nested list with different lengths."
+                flattened_ivalue.extend([filter_element(item) for item in value])
+            flattened_ivalue = [item for sublist in ivalue for item in sublist]
+            ov_type = _get_ov_type_from_value(flattened_ivalue[0])
+            assert ov_type.is_static(), f"Can't deduce type {flattened_ivalue[0].__class__} for list"
+            return op.Constant(ov_type, Shape([len(ivalue), second_len]), flattened_ivalue).outputs()
+        ivalue = [filter_element(item) for item in ivalue]
+        ov_type = _get_ov_type_from_value(ivalue[0])
+        try:
+            assert ov_type.is_static(), f"Can't deduce type {ivalue[0].__class__} for list"
+        except:
+            # TODO 150596: remove this workaround
+            ivalue = [0]
+            ov_type = OVType.f32
+        return op.Constant(ov_type, Shape([len(ivalue)]), ivalue).outputs()
+    if isinstance(ivalue, (jax.Array, np.ndarray)):
+        return jax_array_to_ov_const(ivalue, shared_memory=shared_memory).outputs()
+    ov_dtype_value = get_ov_type_from_jax_type(ivalue)
+    if ov_dtype_value is not None:
+        return op.Constant(OVType.i64, Shape([]), [ov_type_to_int_map[ov_dtype_value]]).outputs()
+    return None
+def param_to_constants(primitive: str, param_name: str, jaxpr, shared_memory=True):
+    processed_params = filter_param(primitive, param_name, jaxpr)
+    for k, v in processed_params.items():
+        processed_params[k] = ivalue_to_constant(v, shared_memory=shared_memory)
+    return processed_params

intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp310-win_amd64.pyd CHANGED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp311-win_amd64.pyd CHANGED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp312-win_amd64.pyd CHANGED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp38-win_amd64.pyd CHANGED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/onnx/py_onnx_frontend.cp39-win_amd64.pyd CHANGED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp310-win_amd64.pyd CHANGED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp311-win_amd64.pyd CHANGED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp312-win_amd64.pyd CHANGED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp38-win_amd64.pyd CHANGED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/paddle/py_paddle_frontend.cp39-win_amd64.pyd CHANGED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/pytorch/fx_decoder.py CHANGED Viewed

@@ -16,6 +16,11 @@ logger = logging.getLogger(__name__)
 logger.setLevel(logging.WARNING)
+class InlinedInput:
+    def __init__(self, data) -> None:
+        self.data = data
 class TorchFXPythonDecoder (Decoder):
     def __init__(self, pt_module, fx_gm=None, nodes=None, mark_node_callback=None, input_shapes=[], input_types=[]):
@@ -30,6 +35,7 @@ class TorchFXPythonDecoder (Decoder):
         self.input_shapes = input_shapes
         self._input_signature = []
+        self._example_input = None
         if issubclass(type(pt_module), torch.fx.graph_module.GraphModule):
@@ -58,7 +64,7 @@ class TorchFXPythonDecoder (Decoder):
                                      for arg in uargs if arg[1] is not None]
             for idx, shape in enumerate(found_shapes):
                 if shape is not None:
-                    new_shape=[]
+                    new_shape = []
                     for dim in range(0, len(shape)):
                         if (type(shape[dim]).__name__ == "SymInt"):
                             new_shape.append(-1)
@@ -80,7 +86,7 @@ class TorchFXPythonDecoder (Decoder):
             # None in inputs mean the input is inlined or None (also considered inlined)
             self._inputs = [self._nodes.index(
-                arg) if arg in self._nodes else (arg,) for arg in pt_module.args]
+                arg) if arg in self._nodes else InlinedInput(arg) for arg in pt_module.args]
             # FIXME: Find a better way to pass nested tuples to OV frontend. This is a temporary solution to flatten arguments.
             new_inputs = []
@@ -91,22 +97,22 @@ class TorchFXPythonDecoder (Decoder):
                         if arg in self._nodes:
                             new_inputs.append(self._nodes.index(arg))
                         else:
-                            new_inputs.append((arg,))
+                            new_inputs.append(InlinedInput(arg))
                         self.input_types.append(OVAny(DecoderType.List(
                             TorchFXPythonDecoder.get_type_for_value(arg))))
                 else:
                     v = self._inputs[i]
                     new_inputs.append(v)
                     self.input_types.append(
-                        TorchFXPythonDecoder.get_type_for_value(v[0] if isinstance(v, tuple) else self._nodes[v]))
+                        TorchFXPythonDecoder.get_type_for_value(v.data if isinstance(v, InlinedInput) else self._nodes[v]))
             self._inputs = new_inputs
     def inputs(self):
         # Consider 0 a special case which may mean the input is inlined, but not guaranteed
-        return [x if not isinstance(x, tuple) else 0 for x in self._inputs]
+        return [x if not isinstance(x, InlinedInput) else 0 for x in self._inputs]
     def is_input_inlined(self, index):
-        return isinstance(self._inputs[index], tuple)
+        return isinstance(self._inputs[index], InlinedInput)
     @staticmethod
     def unpack_containers(arg):
@@ -141,19 +147,24 @@ class TorchFXPythonDecoder (Decoder):
             return make_constant(OVType.i64, Shape([]), [arg])
         elif isinstance(arg, float):
             return make_constant(OVType.f32, Shape([]), [arg])
+        elif isinstance(arg, str):
+            u8_tensor = torch.frombuffer(str.encode(arg), dtype=torch.uint8)
+            return torch_tensor_to_ov_const(u8_tensor, shared_memory=True)
         return None
     def inlined_input(self, index):
         assert index < len(self._inputs), "Requested input doesn't exist"
         assert isinstance(
-            self._inputs[index], tuple), "Requested input which is not inlined"
-        assert self._inputs[index][0] is not None, "Requested None inlined input"
+            self._inputs[index], InlinedInput), "Requested input which is not inlined"
+        arg = self._inputs[index].data
+        assert arg is not None, f"Requested None inlined input for op {self.get_op_type()}"
         constant = None
-        arg = self._inputs[index][0]
         constant = self.arg_to_constant(arg)
-        assert constant is not None, f"Constant wasn't created for inlined input {index}"
-        return constant.outputs()
+        if constant is not None:
+            return constant.outputs()
+        else:
+            return []
     def input(self, index):  # TODO: remove
         return self.inputs()[index]  # TODO: find specialized method
@@ -256,9 +267,7 @@ class TorchFXPythonDecoder (Decoder):
         raise RuntimeError("This input is not a Node")
     def get_subgraph_size(self):
-        if issubclass(type(self.pt_module), torch.fx.Node):
-            return 0
-        return len(self.get_subgraphs()) if hasattr(self.pt_module, 'blocks') else 1
+        return len(self.get_subgraphs())
     def decoder_type_name(self) -> str:
         return "fx"
@@ -276,9 +285,7 @@ class TorchFXPythonDecoder (Decoder):
             node_visitor(decoder)
     def get_subgraphs(self):
-        if issubclass(type(self.pt_module), torch.fx.Node):
-            return []
-        return list(self.pt_module.blocks())
+        return []
     def get_subgraph_decoder(self, index):
         decoder = TorchFXPythonDecoder(self.get_subgraphs()[index],
@@ -308,7 +315,7 @@ class TorchFXPythonDecoder (Decoder):
         return self._raw_outputs()[index]
     def _raw_inputs(self):
-        return [self._nodes[x] if not isinstance(x, tuple) and x < len(self._nodes) else x[0] for x in self._inputs]
+        return [self._nodes[x] if not isinstance(x, InlinedInput) and x < len(self._nodes) else x.data for x in self._inputs]
     def _raw_input(self, index):
         return self._raw_inputs()[index]
@@ -316,6 +323,13 @@ class TorchFXPythonDecoder (Decoder):
     def num_of_outputs(self):
         return len(self.outputs())
+    def output_list_size(self):
+        max_out_id = -1
+        for user in self.pt_module.users:
+            if "<built-in function getitem>" == str(user.target) and max_out_id < user.args[1]:
+                max_out_id = user.args[1]
+        return max_out_id + 1
     def output(self, index):
         return self.outputs()[index]
@@ -339,7 +353,7 @@ class TorchFXPythonDecoder (Decoder):
         return None
     def input_is_none(self, index):
-        if index >= len(self._inputs) or (isinstance(self._inputs[index], tuple) and self._inputs[index][0] is None):
+        if index >= len(self._inputs) or (isinstance(self._inputs[index], InlinedInput) and self._inputs[index].data is None):
             return True
         else:
             r_input = self._raw_input(index)
@@ -350,3 +364,7 @@ class TorchFXPythonDecoder (Decoder):
     def may_produce_alias(self, in_index: int, out_index: int) -> bool:
         return False
+    def get_rt_info(self):
+        rt_info = {}
+        return rt_info

intel_npu_acceleration_library/external/openvino/frontend/pytorch/gptq.py CHANGED Viewed

@@ -32,7 +32,7 @@ def patched_forward(self, *args, **kwargs):
     x = args[0]
     dtype = x.dtype
     outshape = x.shape[:-1] + (self.width,)
-    x = x.view(-1, x.shape[-1])
+    x = x.contiguous().view(-1, x.shape[-1])
     groups = self.qzeros.shape[0]
     height = self.qweight.shape[0]
@@ -43,8 +43,6 @@ def patched_forward(self, *args, **kwargs):
     unpacked_zp = decompression_pattern(
         self._openvino_u4_compression_submodule_qzeros()).contiguous().view(groups, 1, -1)
-    unpacked_zp = unpacked_zp.to(dtype) + 1
     unpacked_weights = (unpacked_weights.to(dtype) - unpacked_zp) * self.scales
     unpacked_weights = unpacked_weights.view(-1, self.width)
@@ -59,11 +57,50 @@ def patched_forward(self, *args, **kwargs):
     return out
+def patched_forward_sym(self, *args, **kwargs):
+    if hasattr(self, '_hf_hook'):
+        args, kwargs = self._hf_hook.pre_forward(self, *args, **kwargs)
+    x = args[0]
+    dtype = x.dtype
+    outshape = x.shape[:-1] + (self.width,)
+    x = x.contiguous().view(-1, x.shape[-1])
+    height = self.qweight.shape[0]
+    unpacked_weights = decompression_pattern(
+        self._openvino_u4_compression_submodule_qweights()).contiguous().view(height, -1, 8)
+    unpacked_weights = torch.transpose(
+        unpacked_weights, 1, 2).contiguous().view(-1, self.group_size, self.width)
+    # all zp is 8 for symmetrical, will repack to i4 in pt fe transformation
+    unpacked_weights = unpacked_weights.to(dtype) * self.scales
+    unpacked_weights = unpacked_weights.view(-1, self.width)
+    out = x @ unpacked_weights
+    out = out.view(outshape)
+    if self.bias is not None:
+        out.add_(self.bias)
+    if hasattr(self, '_hf_hook'):
+        out = self._hf_hook.post_forward(self, out)
+    return out
 # All the following AutoGPTQ's quant types are supposed to have the same weights packing schema
 supported_quant_types = ['triton', 'exllama', 'cuda', 'exllamav2', 'cuda-old']
 def patch_model(model):
+    is_symmetrical = False
+    config = None
+    if hasattr(model, "config"):
+        config = model.config
+    elif hasattr(model, "model") and hasattr(model.model, "config"):
+        # original model was wrapped
+        config = model.model.config
+    if config is not None and hasattr(config, 'quantization_config') and hasattr(config.quantization_config, 'sym'):
+        is_symmetrical = config.quantization_config.sym
     for name, m in model.named_modules():
         if hasattr(m, '_openvino_patch_orig_forward'):
             # already patched, skipping
@@ -87,7 +124,10 @@ def patch_model(model):
             assert m.group_size == m.qweight.shape[0] * int4_in_int32 // groups
             m._openvino_patch_orig_forward = m.forward
-            m.forward = partial(patched_forward, m)
+            if is_symmetrical:
+                m.forward = partial(patched_forward_sym, m)
+            else:
+                m.forward = partial(patched_forward, m)
             # Keep original field properties to be used when model is returned back to its original state
             m._openvino_patch_orig_qweights_type = m.qweight.dtype
@@ -97,11 +137,12 @@ def patch_model(model):
             m.qweight = m.qweight.view(dtype=torch.uint8)
             m.qzeros = m.qzeros.view(dtype=torch.uint8)
-            # TODO: Redundant tensor copy? Try to remove m.qweigh and m.qzeros after keeping modified values as submodules
+            # TODO: Redundant tensor copy? Try to remove m.qweight and m.qzeros after keeping modified values as submodules
             m.add_module(
                 '_openvino_u4_compression_submodule_qweights', KeepWeight(m.qweight))
+            # Adding 17 to move zp+1 step from after unpacking to before to have correct decompression pattern. Can it overflow?
             m.add_module('_openvino_u4_compression_submodule_qzeros',
-                         KeepWeight(m.qzeros))
+                         KeepWeight(m.qzeros + torch.tensor(17, dtype=torch.uint8)))
             m.scales = m.scales.view(-1, 1, m.width)

intel_npu_acceleration_library/external/openvino/frontend/pytorch/patch_model.py CHANGED Viewed

@@ -30,6 +30,7 @@ def patch_model(model, module_extensions, orig_forward_name):
         if extension:
             # The Trampoline class is instantiated for every module replacement, so we can use class members individually for each module.
             class Trampoline(torch.autograd.Function):
                 target_extension = extension
                 original_module = m
@@ -83,16 +84,35 @@ def unpatch_model(model, orig_forward_name):
 def __make_16bit_traceable(model: torch.nn.Module):
-    # Replace torch.nn.Linear with ModuleExtension and move other modules to fp32
-    extensions = {torch.nn.Linear: ModuleExtension(
-        torch.nn.Linear,
-        "aten::linear",
-        evaluate=lambda module, *args, **kwargs: torch.ones(
-            list(args[0].shape[:-1]) + [module.out_features], dtype=torch.float32) * 0.5,
-        convert=lambda module, target_op, *args, **kwargs: target_op(args[0], module.weight, module.bias))
+    """
+    Prepare a 16-bit PyTorch model for tracing with OpenVINO.
+     - Replace known list of modules with ModuleExtension.
+     - Convert other modules with weights to FP32.
+    """
+    extensions = {
+        torch.nn.Linear: ModuleExtension(
+            torch.nn.Linear, "ov_ext::linear",
+            evaluate=lambda module, *args, **kwargs: torch.full(
+                list(args[0].shape[:-1]) + [module.out_features], 0.5, dtype=torch.float32),
+            convert=lambda module, target_op, *args, **kwargs: target_op(args[0], module.weight, module.bias)),
+        torch.nn.Embedding: ModuleExtension(
+            torch.nn.Embedding, "ov_ext::embedding",
+            evaluate=lambda module, *args, **kwargs: torch.full(
+                list(args[0].shape) + [module.embedding_dim], 0.5, dtype=torch.float32),
+            convert=lambda module, target_op, *args, **kwargs: target_op(module.weight, args[0], module.padding_idx, module.scale_grad_by_freq, module.sparse)),
     }
+    try:
+        from transformers.pytorch_utils import Conv1D
+        extensions[Conv1D] = ModuleExtension(
+            Conv1D, "ov_ext::conv1d",
+            evaluate=lambda module, *args, **kwargs: torch.full(
+                list(args[0].shape[:-1]) + [module.nf], 0.5, dtype=torch.float32),
+            convert=lambda module, target_op, *args, **kwargs: target_op(args[0], module.weight, module.bias))
+    except:
+        pass
     patch_model(model, extensions,
                 "_openvino_module_extension_patch_orig_forward")
     for _, module in model.named_modules():
-        if module.__class__ not in extensions and hasattr(module, "weight") and module.weight.dtype in [torch.float16, torch.bfloat16]:
+        if module.__class__ not in extensions and (any([p.dtype in [torch.float16, torch.bfloat16] for p in module.parameters(False)])
+                                                   or any([b.dtype in [torch.float16, torch.bfloat16] for b in module.buffers(False)])):
             module.float()

intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp310-win_amd64.pyd CHANGED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp311-win_amd64.pyd CHANGED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp312-win_amd64.pyd CHANGED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp38-win_amd64.pyd CHANGED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp39-win_amd64.pyd CHANGED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend.py CHANGED Viewed

@@ -13,6 +13,7 @@ import torch
 from torch._dynamo.backends.common import fake_tensor_unsupported, aot_autograd
 from torch._dynamo.backends.registry import register_backend
 from torch._inductor.compile_fx import compile_fx
+from torch._inductor.freezing import replace_params_with_constants
 from torch.fx.experimental.proxy_tensor import make_fx
 from torch._decomp import decomposition_table, get_decompositions
@@ -54,10 +55,9 @@ def openvino(subgraph, example_inputs, options=None):
     if (_get_aot_autograd(options)):
         global openvino_options
         openvino_options = options
-        decompositions = _get_decompositions(options) + get_inf_decomposition_list()
-        decompositions = decompositions + get_aot_decomposition_list()
-        return aot_autograd(fw_compiler=fx_openvino,
-                            bw_compiler=fx_openvino,
+        decompositions = _get_decompositions(options) + get_inf_decomposition_list() + get_aot_decomposition_list()
+        return aot_autograd(fw_compiler=fx_openvino,
+                            bw_compiler=fx_openvino,
                             decompositions=get_decompositions(decompositions))(subgraph, example_inputs)
     return fx_openvino(subgraph, example_inputs, options)
@@ -86,7 +86,14 @@ def fx_openvino(subgraph, example_inputs, options=None):
         if inputs_reversed:
             example_inputs.reverse()
+        preserved_arg_indices = []
         if (_get_aot_autograd(options)):
+            if tracing_context := torch._guards.TracingContext.try_get():
+                fw_metadata = tracing_context.fw_metadata
+                params_flat = tracing_context.params_flat
+                assert fw_metadata is not None and params_flat is not None
+            preserved_arg_indices = replace_params_with_constants(subgraph, params_flat, fw_metadata)
+            example_inputs = [example_inputs[ind] for ind in preserved_arg_indices]
             model = subgraph
         else:
             from torch._subclasses.fake_tensor import FakeTensorMode
@@ -96,7 +103,6 @@ def fx_openvino(subgraph, example_inputs, options=None):
             with torch.no_grad():
                 model.eval()
         partitioner = Partitioner(options)
         compiled_model = partitioner.make_partitions(model, options)
@@ -107,9 +113,15 @@ def fx_openvino(subgraph, example_inputs, options=None):
                 executor_parameters["model_hash_str"] += "_fs"
         def _call(*args):
+            if(_get_aot_autograd(options)):
+                args_list = args[0]
+                args_new = [args_list[i] for i in preserved_arg_indices]
+                args = args_new
             res = execute(compiled_model, *args, executor="openvino",
                           executor_parameters=executor_parameters, options=options)
             return res
+        if(_get_aot_autograd(options)):
+            _call._boxed_call = True # type: ignore[attr-defined]
         return _call
     except Exception as e:
         logger.debug(f"Failed in OpenVINO execution: {e}")

intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/op_support.py CHANGED Viewed

@@ -241,6 +241,7 @@ class OperatorSupport(OperatorSupport):
             "torch.ops.aten.transpose.int": None,
             "torch.ops.aten.tril.default": None,
             "torch.ops.aten.tril_.default": None,
+            "torch.ops.aten.triu.default": None,
             "torch.ops.aten.unbind.int": None,
             "torch.ops.aten.unfold.default": None,
             "torch.ops.aten.unsqueeze.default": None,