PyPI - bigdl-core-npu - Versions diffs - 2.6.0b20241120__cp310-cp310-win_amd64.whl → 2.6.0b20241121__cp310-cp310-win_amd64.whl - Mend

bigdl-core-npu 2.6.0b20241120__cp310-cp310-win_amd64.whl → 2.6.0b20241121__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

intel_npu_acceleration_library/external/openvino/frontend/pytorch/fx_decoder.py CHANGED Viewed

@@ -16,6 +16,11 @@ logger = logging.getLogger(__name__)
 logger.setLevel(logging.WARNING)
+class InlinedInput:
+    def __init__(self, data) -> None:
+        self.data = data
 class TorchFXPythonDecoder (Decoder):
     def __init__(self, pt_module, fx_gm=None, nodes=None, mark_node_callback=None, input_shapes=[], input_types=[]):
@@ -59,7 +64,7 @@ class TorchFXPythonDecoder (Decoder):
                                      for arg in uargs if arg[1] is not None]
             for idx, shape in enumerate(found_shapes):
                 if shape is not None:
-                    new_shape=[]
+                    new_shape = []
                     for dim in range(0, len(shape)):
                         if (type(shape[dim]).__name__ == "SymInt"):
                             new_shape.append(-1)
@@ -81,7 +86,7 @@ class TorchFXPythonDecoder (Decoder):
             # None in inputs mean the input is inlined or None (also considered inlined)
             self._inputs = [self._nodes.index(
-                arg) if arg in self._nodes else (arg,) for arg in pt_module.args]
+                arg) if arg in self._nodes else InlinedInput(arg) for arg in pt_module.args]
             # FIXME: Find a better way to pass nested tuples to OV frontend. This is a temporary solution to flatten arguments.
             new_inputs = []
@@ -92,22 +97,22 @@ class TorchFXPythonDecoder (Decoder):
                         if arg in self._nodes:
                             new_inputs.append(self._nodes.index(arg))
                         else:
-                            new_inputs.append((arg,))
+                            new_inputs.append(InlinedInput(arg))
                         self.input_types.append(OVAny(DecoderType.List(
                             TorchFXPythonDecoder.get_type_for_value(arg))))
                 else:
                     v = self._inputs[i]
                     new_inputs.append(v)
                     self.input_types.append(
-                        TorchFXPythonDecoder.get_type_for_value(v[0] if isinstance(v, tuple) else self._nodes[v]))
+                        TorchFXPythonDecoder.get_type_for_value(v.data if isinstance(v, InlinedInput) else self._nodes[v]))
             self._inputs = new_inputs
     def inputs(self):
         # Consider 0 a special case which may mean the input is inlined, but not guaranteed
-        return [x if not isinstance(x, tuple) else 0 for x in self._inputs]
+        return [x if not isinstance(x, InlinedInput) else 0 for x in self._inputs]
     def is_input_inlined(self, index):
-        return isinstance(self._inputs[index], tuple)
+        return isinstance(self._inputs[index], InlinedInput)
     @staticmethod
     def unpack_containers(arg):
@@ -142,19 +147,24 @@ class TorchFXPythonDecoder (Decoder):
             return make_constant(OVType.i64, Shape([]), [arg])
         elif isinstance(arg, float):
             return make_constant(OVType.f32, Shape([]), [arg])
+        elif isinstance(arg, str):
+            u8_tensor = torch.frombuffer(str.encode(arg), dtype=torch.uint8)
+            return torch_tensor_to_ov_const(u8_tensor, shared_memory=True)
         return None
     def inlined_input(self, index):
         assert index < len(self._inputs), "Requested input doesn't exist"
         assert isinstance(
-            self._inputs[index], tuple), "Requested input which is not inlined"
-        assert self._inputs[index][0] is not None, "Requested None inlined input"
+            self._inputs[index], InlinedInput), "Requested input which is not inlined"
+        arg = self._inputs[index].data
+        assert arg is not None, f"Requested None inlined input for op {self.get_op_type()}"
         constant = None
-        arg = self._inputs[index][0]
         constant = self.arg_to_constant(arg)
-        assert constant is not None, f"Constant wasn't created for inlined input {index}"
-        return constant.outputs()
+        if constant is not None:
+            return constant.outputs()
+        else:
+            return []
     def input(self, index):  # TODO: remove
         return self.inputs()[index]  # TODO: find specialized method
@@ -257,9 +267,7 @@ class TorchFXPythonDecoder (Decoder):
         raise RuntimeError("This input is not a Node")
     def get_subgraph_size(self):
-        if issubclass(type(self.pt_module), torch.fx.Node):
-            return 0
-        return len(self.get_subgraphs()) if hasattr(self.pt_module, 'blocks') else 1
+        return len(self.get_subgraphs())
     def decoder_type_name(self) -> str:
         return "fx"
@@ -277,9 +285,7 @@ class TorchFXPythonDecoder (Decoder):
             node_visitor(decoder)
     def get_subgraphs(self):
-        if issubclass(type(self.pt_module), torch.fx.Node):
-            return []
-        return list(self.pt_module.blocks())
+        return []
     def get_subgraph_decoder(self, index):
         decoder = TorchFXPythonDecoder(self.get_subgraphs()[index],
@@ -309,7 +315,7 @@ class TorchFXPythonDecoder (Decoder):
         return self._raw_outputs()[index]
     def _raw_inputs(self):
-        return [self._nodes[x] if not isinstance(x, tuple) and x < len(self._nodes) else x[0] for x in self._inputs]
+        return [self._nodes[x] if not isinstance(x, InlinedInput) and x < len(self._nodes) else x.data for x in self._inputs]
     def _raw_input(self, index):
         return self._raw_inputs()[index]
@@ -347,7 +353,7 @@ class TorchFXPythonDecoder (Decoder):
         return None
     def input_is_none(self, index):
-        if index >= len(self._inputs) or (isinstance(self._inputs[index], tuple) and self._inputs[index][0] is None):
+        if index >= len(self._inputs) or (isinstance(self._inputs[index], InlinedInput) and self._inputs[index].data is None):
             return True
         else:
             r_input = self._raw_input(index)
@@ -358,3 +364,7 @@ class TorchFXPythonDecoder (Decoder):
     def may_produce_alias(self, in_index: int, out_index: int) -> bool:
         return False
+    def get_rt_info(self):
+        rt_info = {}
+        return rt_info

intel_npu_acceleration_library/external/openvino/frontend/pytorch/gptq.py CHANGED Viewed

@@ -43,8 +43,6 @@ def patched_forward(self, *args, **kwargs):
     unpacked_zp = decompression_pattern(
         self._openvino_u4_compression_submodule_qzeros()).contiguous().view(groups, 1, -1)
-    unpacked_zp = unpacked_zp.to(dtype) + 1
     unpacked_weights = (unpacked_weights.to(dtype) - unpacked_zp) * self.scales
     unpacked_weights = unpacked_weights.view(-1, self.width)
@@ -59,11 +57,50 @@ def patched_forward(self, *args, **kwargs):
     return out
+def patched_forward_sym(self, *args, **kwargs):
+    if hasattr(self, '_hf_hook'):
+        args, kwargs = self._hf_hook.pre_forward(self, *args, **kwargs)
+    x = args[0]
+    dtype = x.dtype
+    outshape = x.shape[:-1] + (self.width,)
+    x = x.contiguous().view(-1, x.shape[-1])
+    height = self.qweight.shape[0]
+    unpacked_weights = decompression_pattern(
+        self._openvino_u4_compression_submodule_qweights()).contiguous().view(height, -1, 8)
+    unpacked_weights = torch.transpose(
+        unpacked_weights, 1, 2).contiguous().view(-1, self.group_size, self.width)
+    # all zp is 8 for symmetrical, will repack to i4 in pt fe transformation
+    unpacked_weights = unpacked_weights.to(dtype) * self.scales
+    unpacked_weights = unpacked_weights.view(-1, self.width)
+    out = x @ unpacked_weights
+    out = out.view(outshape)
+    if self.bias is not None:
+        out.add_(self.bias)
+    if hasattr(self, '_hf_hook'):
+        out = self._hf_hook.post_forward(self, out)
+    return out
 # All the following AutoGPTQ's quant types are supposed to have the same weights packing schema
 supported_quant_types = ['triton', 'exllama', 'cuda', 'exllamav2', 'cuda-old']
 def patch_model(model):
+    is_symmetrical = False
+    config = None
+    if hasattr(model, "config"):
+        config = model.config
+    elif hasattr(model, "model") and hasattr(model.model, "config"):
+        # original model was wrapped
+        config = model.model.config
+    if config is not None and hasattr(config, 'quantization_config') and hasattr(config.quantization_config, 'sym'):
+        is_symmetrical = config.quantization_config.sym
     for name, m in model.named_modules():
         if hasattr(m, '_openvino_patch_orig_forward'):
             # already patched, skipping
@@ -87,7 +124,10 @@ def patch_model(model):
             assert m.group_size == m.qweight.shape[0] * int4_in_int32 // groups
             m._openvino_patch_orig_forward = m.forward
-            m.forward = partial(patched_forward, m)
+            if is_symmetrical:
+                m.forward = partial(patched_forward_sym, m)
+            else:
+                m.forward = partial(patched_forward, m)
             # Keep original field properties to be used when model is returned back to its original state
             m._openvino_patch_orig_qweights_type = m.qweight.dtype
@@ -97,11 +137,12 @@ def patch_model(model):
             m.qweight = m.qweight.view(dtype=torch.uint8)
             m.qzeros = m.qzeros.view(dtype=torch.uint8)
-            # TODO: Redundant tensor copy? Try to remove m.qweigh and m.qzeros after keeping modified values as submodules
+            # TODO: Redundant tensor copy? Try to remove m.qweight and m.qzeros after keeping modified values as submodules
             m.add_module(
                 '_openvino_u4_compression_submodule_qweights', KeepWeight(m.qweight))
+            # Adding 17 to move zp+1 step from after unpacking to before to have correct decompression pattern. Can it overflow?
             m.add_module('_openvino_u4_compression_submodule_qzeros',
-                         KeepWeight(m.qzeros))
+                         KeepWeight(m.qzeros + torch.tensor(17, dtype=torch.uint8)))
             m.scales = m.scales.view(-1, 1, m.width)

intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp310-win_amd64.pyd CHANGED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp311-win_amd64.pyd CHANGED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp312-win_amd64.pyd CHANGED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp38-win_amd64.pyd CHANGED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/pytorch/py_pytorch_frontend.cp39-win_amd64.pyd CHANGED Viewed

Binary file

intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/backend.py CHANGED Viewed

@@ -13,6 +13,7 @@ import torch
 from torch._dynamo.backends.common import fake_tensor_unsupported, aot_autograd
 from torch._dynamo.backends.registry import register_backend
 from torch._inductor.compile_fx import compile_fx
+from torch._inductor.freezing import replace_params_with_constants
 from torch.fx.experimental.proxy_tensor import make_fx
 from torch._decomp import decomposition_table, get_decompositions
@@ -54,10 +55,9 @@ def openvino(subgraph, example_inputs, options=None):
     if (_get_aot_autograd(options)):
         global openvino_options
         openvino_options = options
-        decompositions = _get_decompositions(options) + get_inf_decomposition_list()
-        decompositions = decompositions + get_aot_decomposition_list()
-        return aot_autograd(fw_compiler=fx_openvino,
-                            bw_compiler=fx_openvino,
+        decompositions = _get_decompositions(options) + get_inf_decomposition_list() + get_aot_decomposition_list()
+        return aot_autograd(fw_compiler=fx_openvino,
+                            bw_compiler=fx_openvino,
                             decompositions=get_decompositions(decompositions))(subgraph, example_inputs)
     return fx_openvino(subgraph, example_inputs, options)
@@ -86,7 +86,14 @@ def fx_openvino(subgraph, example_inputs, options=None):
         if inputs_reversed:
             example_inputs.reverse()
+        preserved_arg_indices = []
         if (_get_aot_autograd(options)):
+            if tracing_context := torch._guards.TracingContext.try_get():
+                fw_metadata = tracing_context.fw_metadata
+                params_flat = tracing_context.params_flat
+                assert fw_metadata is not None and params_flat is not None
+            preserved_arg_indices = replace_params_with_constants(subgraph, params_flat, fw_metadata)
+            example_inputs = [example_inputs[ind] for ind in preserved_arg_indices]
             model = subgraph
         else:
             from torch._subclasses.fake_tensor import FakeTensorMode
@@ -96,7 +103,6 @@ def fx_openvino(subgraph, example_inputs, options=None):
             with torch.no_grad():
                 model.eval()
         partitioner = Partitioner(options)
         compiled_model = partitioner.make_partitions(model, options)
@@ -107,9 +113,15 @@ def fx_openvino(subgraph, example_inputs, options=None):
                 executor_parameters["model_hash_str"] += "_fs"
         def _call(*args):
+            if(_get_aot_autograd(options)):
+                args_list = args[0]
+                args_new = [args_list[i] for i in preserved_arg_indices]
+                args = args_new
             res = execute(compiled_model, *args, executor="openvino",
                           executor_parameters=executor_parameters, options=options)
             return res
+        if(_get_aot_autograd(options)):
+            _call._boxed_call = True # type: ignore[attr-defined]
         return _call
     except Exception as e:
         logger.debug(f"Failed in OpenVINO execution: {e}")

intel_npu_acceleration_library/external/openvino/frontend/pytorch/torchdynamo/partition.py CHANGED Viewed

@@ -25,6 +25,13 @@ logger = logging.getLogger(__name__)
 logger.setLevel(logging.WARNING)
+class PatternNode:
+    op_types = {}
+    def __init__(self):
+        self.op_types = {}
 class Partitioner:
     def __init__(self, options):
         self.supported_ops = OperatorSupport(options)
@@ -56,55 +63,56 @@ class Partitioner:
             return True
         return False
-    def capture_gptq_patterns(self, graph_module: GraphModule) -> bool:
+    def check_pattern(self, node: torch.fx.Node, pattern: PatternNode, enabled_ops: list) -> bool:
+        if node.op == "call_function":
+            if ("call_function" + ":" + str(node.target)) in pattern.op_types:
+                pt_input_nodes = node.all_input_nodes
+                pattern_input_ops = pattern.op_types["call_function" + ":" + str(node.target)]
+                if pattern_input_ops is None:
+                    enabled_ops.append(node)
+                    return True
+                if len(pt_input_nodes) != len(pattern_input_ops):
+                    return False
+                for i in range(len(pt_input_nodes)):
+                    if not self.check_pattern(pt_input_nodes[i], pattern_input_ops[i], enabled_ops):
+                        return False
+                enabled_ops.append(node)
+                return True
+        elif node.op == "get_attr":
+            if "get_attr" in pattern.op_types:
+                return True
+            else:
+                return False
+        return False
+    def capture_gptq_patterns(self, graph_module: GraphModule):
+        const_0_node = PatternNode
+        const_0_node.op_types["get_attr"] = None
+        unsqueeze_0_node = PatternNode
+        unsqueeze_0_node.op_types["call_function:aten.unsqueeze.default"] = [const_0_node,]
+        expand_node = PatternNode
+        expand_node.op_types["call_function:aten.expand.default"] = [unsqueeze_0_node,]
+        const_1_node = PatternNode
+        const_1_node.op_types["get_attr"] = None
+        unsqueeze_1_node = PatternNode
+        unsqueeze_1_node.op_types["call_function:aten.unsqueeze.default"] = [const_1_node,]
+        bitwise_right_shift_node = PatternNode
+        bitwise_right_shift_node.op_types["call_function:aten.bitwise_right_shift.Tensor"] = [expand_node, unsqueeze_1_node]
+        to_copy_node = PatternNode
+        to_copy_node.op_types["call_function:aten._to_copy.default"] = [bitwise_right_shift_node,]
+        add_or_to_copy_node = PatternNode
+        add_or_to_copy_node.op_types["call_function:aten._to_copy.default"] = [bitwise_right_shift_node,]
+        add_or_to_copy_node.op_types["call_function:aten.add.Tensor"] = [to_copy_node,]
+        bitwise_and_node = PatternNode
+        bitwise_and_node.op_types["call_function:aten.bitwise_and.Scalar"] = [add_or_to_copy_node,]
         for node in graph_module.graph.nodes:
             if str(node.op) == "call_function" and str(node.target) == "aten.bitwise_and.Scalar":
-                bitwise_and_in_nodes = node.all_input_nodes
-                if len(bitwise_and_in_nodes) != 1:
-                    continue
-                to_copy_node = bitwise_and_in_nodes[0]
-                if str(to_copy_node.op) != "call_function" or str(to_copy_node.target) != "aten._to_copy.default":
-                    continue
-                to_copy_in_nodes = to_copy_node.all_input_nodes
-                if len(to_copy_in_nodes) != 1:
-                    continue
-                bitwise_right_shift_node = to_copy_in_nodes[0]
-                if str(bitwise_right_shift_node.op) != "call_function" or str(bitwise_right_shift_node.target) != "aten.bitwise_right_shift.Tensor":
-                    continue
-                bitwise_right_shift_in_nodes = bitwise_right_shift_node.all_input_nodes
-                if len(bitwise_right_shift_in_nodes) != 2:
-                    continue
-                expand_node = bitwise_right_shift_in_nodes[0]
-                if str(expand_node.op) != "call_function" or str(expand_node.target) != "aten.expand.default":
-                    continue
-                expand_in_nodes = expand_node.all_input_nodes
-                if len(expand_in_nodes) != 1:
-                    continue
-                unsqueeze_0_node = expand_in_nodes[0]
-                if str(unsqueeze_0_node.op) != "call_function" or str(unsqueeze_0_node.target) != "aten.unsqueeze.default":
-                    continue
-                unsqueeze_0_in_nodes = unsqueeze_0_node.all_input_nodes
-                if len(unsqueeze_0_in_nodes) != 1:
-                    continue
-                const_0_node = unsqueeze_0_in_nodes[0]
-                if str(const_0_node.op) != "get_attr":
-                    continue
-                unsqueeze_1_node = bitwise_right_shift_in_nodes[1]
-                if str(unsqueeze_1_node.op) != "call_function" or str(unsqueeze_1_node.target) != "aten.unsqueeze.default":
-                    continue
-                unsqueeze_1_in_nodes = unsqueeze_1_node.all_input_nodes
-                if len(unsqueeze_1_in_nodes) != 1:
-                    continue
-                const_1_node = unsqueeze_1_in_nodes[0]
-                if str(const_1_node.op) != "get_attr":
-                    continue
-                self.supported_ops.enable_by_name(node)
-                self.supported_ops.enable_by_name(to_copy_node)
-                self.supported_ops.enable_by_name(bitwise_right_shift_node)
-                self.supported_ops.enable_by_name(expand_node)
-                self.supported_ops.enable_by_name(unsqueeze_0_node)
-                self.supported_ops.enable_by_name(unsqueeze_1_node)
+                enabled_ops = []
+                pattern_match = self.check_pattern(node, bitwise_and_node, enabled_ops)
+                if pattern_match:
+                    for pattern_op in enabled_ops:
+                        self.supported_ops.enable_by_name(pattern_op)
     def make_partitions(self, graph_module: GraphModule, options) -> GraphModule:
         allow_single_node_partition = _is_testing(options)