PyPI - mct-nightly - Versions diffs - 2.2.0.20240930.532__py3-none-any.whl → 2.2.0.20241002.500__py3-none-any.whl - Mend

mct-nightly 2.2.0.20240930.532py3-none-any.whl → 2.2.0.20241002.500py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

{mct_nightly-2.2.0.20240930.532.dist-info → mct_nightly-2.2.0.20241002.500.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mct-nightly
-Version: 2.2.0.20240930.532
+Version: 2.2.0.20241002.500
 Summary: A Model Compression Toolkit for neural networks
 Home-page: UNKNOWN
 License: UNKNOWN

{mct_nightly-2.2.0.20240930.532.dist-info → mct_nightly-2.2.0.20241002.500.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-model_compression_toolkit/__init__.py,sha256=t1wNg0lS5JpWEPKyQf-PoKxWqEgc_58HfooVwjVCFsQ,1573
+model_compression_toolkit/__init__.py,sha256=0OYwjkiM5Okt4kzkKaRTqc3Iq-TCsVN1uOUzYDgffog,1573
 model_compression_toolkit/constants.py,sha256=i4wYheBkIdQmsQA-axIpcT3YiSO1USNc-jaNiNE8w6E,3920
 model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
 model_compression_toolkit/logger.py,sha256=3DByV41XHRR3kLTJNbpaMmikL8icd9e1N-nkQAY9oDk,4567
@@ -15,7 +15,7 @@ model_compression_toolkit/core/common/framework_implementation.py,sha256=kSg2f7w
 model_compression_toolkit/core/common/framework_info.py,sha256=1ZMMGS9ip-kSflqkartyNRt9aQ5ub1WepuTRcTy-YSQ,6337
 model_compression_toolkit/core/common/memory_computation.py,sha256=ixoSpV5ZYZGyzhre3kQcvR2sNA8KBsPZ3lgbkDnw9Cs,1205
 model_compression_toolkit/core/common/model_builder_mode.py,sha256=jll9-59OPaE3ug7Y9-lLyV99_FoNHxkGZMgcm0Vkpss,1324
-model_compression_toolkit/core/common/model_collector.py,sha256=ofcepKtxc3j2Ouz6BpAKXTzPgjABnpRP47ndmJCXAkk,8352
+model_compression_toolkit/core/common/model_collector.py,sha256=T0J3hLmqJI8eQEXlBfqbnPNJ4XpPUp0zfRSjL0CQYu8,8381
 model_compression_toolkit/core/common/model_validation.py,sha256=LaG8wd6aZl0OJgieE3SeiVDEPxtk8IHq9-3wSnmWhY4,1214
 model_compression_toolkit/core/common/node_prior_info.py,sha256=WXX_PrGVG9M9I_REG5ZzFBohwmV4yf356sZnrja_FLo,2832
 model_compression_toolkit/core/common/similarity_analyzer.py,sha256=FikcIqgQQpfiXr9VJvgl-wk8OyH7-LvC8ku7TkhJfJM,9200
@@ -219,7 +219,7 @@ model_compression_toolkit/core/pytorch/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKW
 model_compression_toolkit/core/pytorch/constants.py,sha256=YwD_joIF0vK8UG2vW1NVvg36pCNWA0vHOXjAgy_XWn0,2794
 model_compression_toolkit/core/pytorch/default_framework_info.py,sha256=-Vls1P_8Ckm_18nnOsmQkZ71SmzHwtQLbQ383Z4Rb-U,4365
 model_compression_toolkit/core/pytorch/pytorch_device_config.py,sha256=S25cuw10AW3SEN_fRAGRcG_I3wdvvQx1ehSJzPnn-UI,4404
-model_compression_toolkit/core/pytorch/pytorch_implementation.py,sha256=xmcJyU-rkIDX1a_X9LILzf2Ko2z_4I4xnlHkezKH-2w,27669
+model_compression_toolkit/core/pytorch/pytorch_implementation.py,sha256=2RGf4ii9zxJwGLA3mp-qzDp4khFaYNUNN95bNuNNZ0c,27868
 model_compression_toolkit/core/pytorch/pytorch_node_prior_info.py,sha256=2LDQ7qupglHQ7o1Am7LWdfYVacfQnl-aW2N6l9det1w,3264
 model_compression_toolkit/core/pytorch/resource_utilization_data_facade.py,sha256=xpKj99OZKT9NT0vKIl_cOe8d89d2gef1gKoNT6PFElE,4989
 model_compression_toolkit/core/pytorch/utils.py,sha256=GE7T8q93I5C4As0iOias_dk9HpOvXM1N6---dJlyD60,3863
@@ -249,6 +249,7 @@ model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/remove_
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/reshape_with_static_shapes.py,sha256=hAZXzrEinHa-dJHLj39Hy_9Q-13QyO95rtYVSLrhvT8,4915
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/residual_collapsing.py,sha256=DcJEIkGvBdIMOelNIwaJUZ5UsAHiGnDJPR20I464vWo,2929
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/scale_equalization.py,sha256=XFtU9yuBmoZlX0f0mS6otMPWMk-RcWs94XdvvTNhW8Y,3303
+model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/scaled_dot_product_attention.py,sha256=ziL7jwTnjzTf7BHPRPYgWBSCUrSXSyjZnvQqsJhD1nM,12466
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/shift_negative_activation.py,sha256=3WCLvPyx7tVkM0rwYhYq-gntCzW9R_DcImR1ucKlPac,10772
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/softmax_shift.py,sha256=05lV4pIL3hJkZl4JQPV4wk_EFD0eYLG5b8cdzvZk4P8,1588
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/transform_function_call_method.py,sha256=EC9Dvp-_UlpDWnipnf8ds65wh_Y-T8pXAFIwRScWpiY,2044
@@ -550,8 +551,8 @@ tests_pytest/pytorch/gptq/test_annealing_cfg.py,sha256=hGC7L6mp3N1ygcJ3OctgS_Fz2
 tests_pytest/pytorch/gptq/test_gradual_act_quantization.py,sha256=tI01aFIUaiCILL5Qn--p1E_rLBUelxLdSY3k52lwcx0,4594
 tests_pytest/pytorch/trainable_infrastructure/__init__.py,sha256=RAe8mgIr1V8dRIQtLf_dSG5zTUCKuQzxyybYx1dzEAs,697
 tests_pytest/pytorch/trainable_infrastructure/test_linear_annealing.py,sha256=eNOpSp0GoLxtEdiRypBp8jaujXfdNxBwKh5Rd-P7WLs,1786
-mct_nightly-2.2.0.20240930.532.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
-mct_nightly-2.2.0.20240930.532.dist-info/METADATA,sha256=p4oG8xi2574mBzHWDgHCKzuBAz49q9DLDya346NWcYc,20830
-mct_nightly-2.2.0.20240930.532.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
-mct_nightly-2.2.0.20240930.532.dist-info/top_level.txt,sha256=csdfSXhtRnpWYRzjZ-dRLIhOmM2TEdVXUxG05A5fgb8,39
-mct_nightly-2.2.0.20240930.532.dist-info/RECORD,,
+mct_nightly-2.2.0.20241002.500.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
+mct_nightly-2.2.0.20241002.500.dist-info/METADATA,sha256=XUo1iMNL1fh6tGsBz-kglXfHvxhfWdOBebgokDVQJ4A,20830
+mct_nightly-2.2.0.20241002.500.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
+mct_nightly-2.2.0.20241002.500.dist-info/top_level.txt,sha256=csdfSXhtRnpWYRzjZ-dRLIhOmM2TEdVXUxG05A5fgb8,39
+mct_nightly-2.2.0.20241002.500.dist-info/RECORD,,

model_compression_toolkit/__init__.py CHANGED Viewed

@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
 from model_compression_toolkit import pruning
 from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
-__version__ = "2.2.0.20240930.000532"
+__version__ = "2.2.0.20241002.000500"

model_compression_toolkit/core/common/model_collector.py CHANGED Viewed

@@ -158,7 +158,7 @@ class ModelCollector:
         for td, sc in zip(tensor_data, self.stats_containers_list):
             if isinstance(sc, (list, tuple)):
                 if not isinstance(td, (list, tuple)):
-                    Logger.critical('\'tensor_data\' must be a list or a tuple if \'stats_containers_list\' contains lists or tuples.') # pragma: no cover
+                    Logger.critical(f"\'tensor_data\' is of type {type(td)} but must be of the same type as \'stats_containers_list\', which is of type {type(sc)}") # pragma: no cover
                 if len(sc) != len(td):
                     Logger.critical('\'tensor_data\' and \'stats_containers_list\' must have matching lengths') # pragma: no cover
                 for tdi, sci in zip(td, sc):

model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/scaled_dot_product_attention.py ADDED Viewed

@@ -0,0 +1,231 @@
+# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import torch.nn as nn
+import torch
+import math
+from copy import copy
+import numpy as np
+from model_compression_toolkit.core.common.graph.functional_node import FunctionalNode
+from model_compression_toolkit.core.common import BaseSubstitution
+from model_compression_toolkit.core.common.graph.graph_matchers import NodeOperationMatcher
+from model_compression_toolkit.core.common.graph.base_graph import Graph, BaseNode, OutTensor
+from model_compression_toolkit.core.pytorch.constants import DIM
+from model_compression_toolkit.core.pytorch.pytorch_device_config import get_working_device
+class ScaledDotProductDecomposition(BaseSubstitution):
+    """
+    Decompose torch.nn.scale_dot_product into its base operators:
+        Transpose (over k)
+        MatMul(over q and transposed k)
+        Mul (for scaling)
+        Add (for masking. optional operation, used in cases that attn_mask ig given)
+        Dropout
+        Softmax
+        Matmul.
+    """
+    def __init__(self):
+        """
+        Matches scaled_dot_product_attention node.
+        """
+        super().__init__(matcher_instance=NodeOperationMatcher(nn.functional.scaled_dot_product_attention))
+    def _get_input_by_name(self, attention_node: FunctionalNode, input_name: str,
+                           input_index: int, default_value: any) -> any:
+        """
+        Search for attention_node input value in op_call_kwargs (using input_name) and op_call_args (using input_index).
+        In case the input is not given, returns its default_value.
+        """
+        if input_name in attention_node.op_call_kwargs:
+            return attention_node.op_call_kwargs[input_name]
+        elif len(attention_node.op_call_args) > input_index:  # input order: [attn_mask, dropout_p, is_causal]
+            return attention_node.op_call_args[input_index]
+        return default_value
+    def _get_attention_input_nodes(self, graph: Graph, attention_node: FunctionalNode) -> dict:
+        q, k, v = 0, 1, 2
+        prev_nodes = graph.get_prev_nodes(attention_node, sink_index_sorted=True)
+        q_node, k_node, v_node = prev_nodes[q], prev_nodes[k], prev_nodes[v]
+        return {"q": q_node, "k": k_node, "v": v_node}
+    def _get_transpose_k_node(self, attention_node_name: str, key_node: BaseNode) -> BaseNode:
+        input_shape, output_shape = copy(key_node.output_shape[0]), copy(key_node.output_shape[0])
+        output_shape[-2], output_shape[-1] = input_shape[-1], input_shape[-2]
+        transpose_node = FunctionalNode(name=f"{attention_node_name}_{key_node.name}_transpose",
+                                        framework_attr={},
+                                        input_shape=input_shape,
+                                        output_shape=output_shape,
+                                        weights={},
+                                        layer_class=torch.transpose,
+                                        op_call_args=[-1, -2],  # axes to transpose
+                                        op_call_kwargs={},
+                                        functional_op=torch.transpose)
+        return transpose_node
+    def _get_scale_node(self, attention_node: FunctionalNode, q_node: BaseNode, matmul_node: BaseNode) -> FunctionalNode:
+        """
+        :return: multiplication node that represents multiplication by the scale factor
+        """
+        scale_name = f'{attention_node.name}_scale'
+        q_embd_axis = -1
+        input_scale = self._get_input_by_name(attention_node, "scale", 3, None)
+        scale_factor = input_scale if input_scale else (1 / math.sqrt(q_node.output_shape[0][q_embd_axis]))
+        scale_node = FunctionalNode(name=scale_name,
+                                    framework_attr={},
+                                    input_shape=(matmul_node.output_shape),
+                                    output_shape=matmul_node.output_shape,
+                                    weights={},
+                                    layer_class=torch.mul,
+                                    op_call_args=[scale_factor],
+                                    op_call_kwargs={},
+                                    functional_op=torch.mul)
+        return scale_node
+    def _get_matmul_node(self, attention_node_name: str, q_node: BaseNode, transposed_k_node: BaseNode) -> BaseNode:
+        matmul1_output_shape = copy(q_node.output_shape[0])
+        matmul1_output_shape[-2] = q_node.output_shape[0][-2]
+        matmul1_output_shape[-1] = transposed_k_node.output_shape[-1]
+        matmul_name = f'{attention_node_name}_matmul1'
+        return FunctionalNode(name=matmul_name,
+                              framework_attr={},
+                              input_shape=(tuple(q_node.output_shape[0]), tuple(transposed_k_node.output_shape)),
+                              output_shape=tuple(matmul1_output_shape),
+                              weights={},
+                              layer_class=torch.matmul,
+                              op_call_args=[],
+                              op_call_kwargs={},
+                              functional_op=torch.matmul)
+    def _get_mask_node(self, attention_node: FunctionalNode, scale_node: FunctionalNode) -> FunctionalNode:
+        """
+        :return: Add operator node with the mask tensor as input. In case there is no mask tensor, returns None.
+        """
+        attention_mask_tensor = self._get_attention_mask_tensor(attention_node)
+        if attention_mask_tensor is None:
+            return None
+        mask_node_name = f'{attention_node.name}_mask'
+        return FunctionalNode(name=mask_node_name,
+                              framework_attr={},
+                              input_shape=(scale_node.output_shape),
+                              output_shape=scale_node.output_shape,
+                              weights={},
+                              layer_class=torch.add,
+                              op_call_args=[],
+                              op_call_kwargs={'other': attention_mask_tensor},
+                              functional_op=torch.add)
+    def _get_softmax_node(self, attention_node_name: str, in_out_shape: tuple) -> BaseNode:
+        softmax_name = f'{attention_node_name}_softmax'
+        return BaseNode(name=softmax_name,
+                        framework_attr={DIM: -1},
+                        input_shape=in_out_shape,
+                        output_shape=in_out_shape,
+                        weights={},
+                        layer_class=nn.Softmax)
+    def _get_matmul2_node(self, attention_node_name: str, softmax_node: BaseNode, v_node: BaseNode) -> FunctionalNode:
+        matmul2_output_shape = list(copy(softmax_node.output_shape))
+        matmul2_output_shape[-2] = softmax_node.output_shape[-2]
+        matmul2_output_shape[-1] = v_node.output_shape[0][-1]
+        matmul2_name = f'{attention_node_name}_matmul2'
+        return FunctionalNode(name=matmul2_name,
+                              framework_attr={},
+                              input_shape=(tuple(softmax_node.output_shape), tuple(v_node.output_shape[0])),
+                              output_shape=tuple(matmul2_output_shape),
+                              weights={},
+                              layer_class=torch.matmul,
+                              op_call_args=[],
+                              op_call_kwargs={},
+                              functional_op=torch.matmul)
+    def _get_attention_mask_tensor(self, attention_node: FunctionalNode) -> torch.Tensor:
+        """
+        :return: mask tensor given as part of attention node input.
+        Since MCT doesn't support infinite values, we don't support is_causal (torch.nn.scale_dot_product_attention
+        argument) and boolean mask tensor, as they both require -inf values.
+        """
+        device = get_working_device()
+        is_causal = self._get_input_by_name(attention_node, "is_causal", 2, False)
+        if is_causal:
+            raise NotImplementedError("scaled_dot_product_attention is_causal feature is not implemented.")
+        input_weights = list(attention_node.weights.values())
+        attn_mask = input_weights[0] if len(input_weights) > 0 else None
+        if attn_mask is not None and (attn_mask.dtype == "bool"):
+            raise NotImplementedError(
+                "scaled_dot_product_attention attn_mask is of type boolean, which is not supported.")
+        if attn_mask is not None and (not np.isfinite(attn_mask).all()):
+            raise NotImplementedError(
+                "scaled_dot_product_attention attn_mask contains infinite value, which is not supported.")
+        return torch.from_numpy(attn_mask).to(device) if attn_mask is not None else None
+    def _get_dropout_node(self, attention_node: FunctionalNode, in_out_shape: tuple) -> BaseNode:
+        dropout_p = attention_node.op_call_kwargs.get('dropout_p', 0)
+        dropout_name = f'{attention_node.name}_dropout'
+        return BaseNode(name=dropout_name,
+                        framework_attr={"p": dropout_p},
+                        input_shape=in_out_shape,
+                        output_shape=in_out_shape,
+                        weights={},
+                        layer_class=nn.Dropout)
+    def substitute(self, graph: Graph, attention_node: FunctionalNode) -> Graph:
+        """
+        Removes a scaled_dot_product_attention node from the graph, and replaces it with a compatible graph that
+        consists of:
+            Transpose (over k)
+            MatMul(over q and transposed k)
+            Mul (for scaling)
+            Add (for masking. optional operation, used in cases that attn_mask ig given)
+            Dropout
+            Softmax
+            Matmul.
+        :param graph: A Graph to apply substitution on
+        :param attention_node: the node to replace
+        :return: A graph after the substitution
+        """
+        print("In scale_dot_product_attention substitution@@@@@@@@")
+        input_nodes = self._get_attention_input_nodes(graph, attention_node)
+        q_node, k_node, v_node = input_nodes["q"], input_nodes["k"], input_nodes["v"]
+        transpose_k_node = self._get_transpose_k_node(attention_node.name, k_node)
+        matmul_node = self._get_matmul_node(attention_node.name, q_node, transpose_k_node)
+        scale_node = self._get_scale_node(attention_node, q_node, matmul_node)
+        mask_node = self._get_mask_node(attention_node, scale_node)
+        softmax_node = self._get_softmax_node(attention_node.name, matmul_node.output_shape)
+        dropout_node = self._get_dropout_node(attention_node, softmax_node.output_shape)
+        matmul2_node = self._get_matmul2_node(attention_node.name, softmax_node, v_node)
+        graph.add_node_with_in_edges(transpose_k_node, [k_node])
+        graph.add_node_with_in_edges(matmul_node, [q_node, transpose_k_node])
+        graph.add_node_with_in_edges(scale_node, [matmul_node])
+        if mask_node:
+            graph.add_node_with_in_edges(mask_node, [scale_node])
+        graph.add_node_with_in_edges(softmax_node, [mask_node if mask_node else scale_node])
+        graph.add_node_with_in_edges(dropout_node, [softmax_node])
+        graph.add_node_with_in_edges(matmul2_node, [dropout_node if dropout_node else softmax_node, v_node])
+        graph_outputs = graph.get_outputs()
+        for i, g_out in enumerate(graph_outputs):
+            if g_out.node == attention_node:
+                graph_outputs[i] = OutTensor(node=matmul2_node, node_out_index=g_out.node_out_index)
+        graph.reconnect_out_edges(current_node=attention_node, new_node=matmul2_node)
+        graph.remove_edge(q_node, attention_node)
+        graph.remove_edge(k_node, attention_node)
+        graph.remove_edge(v_node, attention_node)
+        graph.remove_node(attention_node, new_graph_outputs=graph_outputs)
+        return graph

model_compression_toolkit/core/pytorch/pytorch_implementation.py CHANGED Viewed

@@ -53,6 +53,8 @@ from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.li
     pytorch_linear_collapsing
 from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.multi_head_attention_decomposition \
     import MultiHeadAttentionDecomposition
+from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.scaled_dot_product_attention import \
+    ScaledDotProductDecomposition
 from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.transform_function_call_method import \
     TransformFunctionCallMethod
 from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.const_holder_conv import \
@@ -237,6 +239,7 @@ class PytorchImplementation(FrameworkImplementation):
         """
         return [ReshapeWithStaticShapes(),
                 MultiHeadAttentionDecomposition(),
+                ScaledDotProductDecomposition(),
                 TransformFunctionCallMethod(),
                 FunctionalConvSubstitution(fw_info),
                 FunctionalBatchNorm(),

{mct_nightly-2.2.0.20240930.532.dist-info → mct_nightly-2.2.0.20241002.500.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{mct_nightly-2.2.0.20240930.532.dist-info → mct_nightly-2.2.0.20241002.500.dist-info}/WHEEL RENAMED Viewed

File without changes

{mct_nightly-2.2.0.20240930.532.dist-info → mct_nightly-2.2.0.20241002.500.dist-info}/top_level.txt RENAMED Viewed

File without changes

mct-nightly 2.2.0.20240930.532__py3-none-any.whl → 2.2.0.20241002.500__py3-none-any.whl

mct-nightly 2.2.0.20240930.532py3-none-any.whl → 2.2.0.20241002.500py3-none-any.whl