PyPI - mct-nightly - Versions diffs - 1.7.1.31122022.post351__py3-none-any.whl → 1.8.0.1042023.post423__py3-none-any.whl - Mend

mct-nightly 1.7.1.31122022.post351py3-none-any.whl → 1.8.0.1042023.post423py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (241) hide show

model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 from abc import abstractmethod
-from typing import Tuple, Any, Dict, List, Union
+from typing import Tuple, Any, Dict, List, Union, Callable
 import torch
 from networkx import topological_sort
@@ -25,7 +25,7 @@ from model_compression_toolkit.core.common.back2framework.base_model_builder imp
 from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX
 from model_compression_toolkit.core.common.graph.functional_node import FunctionalNode
 from model_compression_toolkit.core.common.user_info import UserInformation
-from model_compression_toolkit.core.pytorch.back2framework.instance_builder import node_builder
+from model_compression_toolkit.core.pytorch.back2framework.instance_builder import node_builder, identity_wrapper
 from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
 from model_compression_toolkit.core.pytorch.reader.node_holders import DummyPlaceHolder, BufferHolder
 from model_compression_toolkit.core.pytorch.utils import get_working_device
@@ -65,7 +65,8 @@ def _build_input_tensors_list(node: BaseNode,
 def _run_operation(n: BaseNode,
                    input_tensors: List,
                    op_func: Any,
-                   quantize_node_activation_fn) -> Tuple[Union[List,torch.Tensor], Union[List,torch.Tensor]]:
+                   quantize_node_activation_fn,
+                   is_wrapped: bool) -> Tuple[Union[List,torch.Tensor], Union[List,torch.Tensor]]:
     """
     Applying the layer (op_func) to the input tensors (input_tensors).
     If quantized is set to True, and the layer's corresponding node (n) has quantization
@@ -76,6 +77,7 @@ def _run_operation(n: BaseNode,
         input_tensors: List of Pytorch tensors that are the layer's inputs.
         op_func: Module/functional to apply to the input tensors.
         quantize_node_activation_fn: quantization function
+        is_wrapped : Flag to indicate if layer is already quantization wrapped so no activation is needed
     Returns:
         A tuple of Pytorch tensors. The Module/functional output tensors after applying the
         Module/functional to the input tensors.
@@ -90,7 +92,7 @@ def _run_operation(n: BaseNode,
     # Add a fake quant node if the node has an activation threshold.
     out_tensors_of_n = out_tensors_of_n_float
-    if n.is_activation_quantization_enabled():
+    if n.is_activation_quantization_enabled() and not is_wrapped:
         if isinstance(out_tensors_of_n_float, list):
             out_tensors_of_n_float = torch.cat(out_tensors_of_n_float, dim=0)
         out_tensors_of_n = quantize_node_activation_fn(n, out_tensors_of_n_float)
@@ -142,7 +144,8 @@ class PytorchModel(torch.nn.Module):
                  graph: Graph,
                  append2output: List[Any] = None,
                  fw_info: FrameworkInfo = DEFAULT_PYTORCH_INFO,
-                 return_float_outputs: bool = False):
+                 return_float_outputs: bool = False,
+                 wrapper: Callable = identity_wrapper):
         """
         Construct a Pytorch model.
@@ -151,6 +154,7 @@ class PytorchModel(torch.nn.Module):
             append2output: List of nodes or OutTensor objects.
             fw_info: Framework information (e.g., mapping from layers to their attributes to quantize).
             return_float_outputs: Whether the model returns float tensors or not.
+            wrapper: A function wrapper Pytorch Layers.
         """
         super(PytorchModel, self).__init__()
         self.graph = graph
@@ -159,6 +163,7 @@ class PytorchModel(torch.nn.Module):
         self.append2output = append2output
         self.return_float_outputs = return_float_outputs
         self.fw_info = fw_info
+        self.wrapper = wrapper
         self._add_modules()
     @abstractmethod
@@ -176,17 +181,21 @@ class PytorchModel(torch.nn.Module):
             Output of the node.
         """
-        raise NotImplemented(f'{self.__class__.__name__} have to implement a method for quantization activation nodes.')
+        raise NotImplemented(f'{self.__class__.__name__} '
+                             f'have to implement a method for quantization activation nodes.')  # pragma: no cover
     def _add_modules(self):
         for n in self.node_sort:
-            if not isinstance(n, FunctionalNode):
+            if isinstance(n, FunctionalNode):
+                # for functional layers
+                setattr(self, n.name, self.wrapper(n, n.type))
+            else:
                 if n.type == BufferHolder:
                     self.add_module(n.name, node_builder(n))
                     self.get_submodule(n.name). \
                         register_buffer(n.name, torch.Tensor(n.get_weights_by_keys(BUFFER)).to(get_working_device()))
                 else:
-                    self.add_module(n.name, node_builder(n))
+                    self.add_module(n.name, self.wrapper(n, node_builder(n)))
     def forward(self,
                 *args: Any) -> Any:
@@ -211,7 +220,8 @@ class PytorchModel(torch.nn.Module):
             out_tensors_of_n, out_tensors_of_n_float = _run_operation(n,
                                                                       input_tensors,
                                                                       op_func=op_func,
-                                                                      quantize_node_activation_fn=self._quantize_node_activations)
+                                                                      quantize_node_activation_fn=self._quantize_node_activations,
+                                                                      is_wrapped=self.wrapper is not identity_wrapper)
             if isinstance(out_tensors_of_n, list):
                 node_to_output_tensors_dict.update({n: out_tensors_of_n})
@@ -244,7 +254,7 @@ class PytorchModel(torch.nn.Module):
         Returns: Module/functional to apply to the input tensors.
         """
-        return node.type if isinstance(node, FunctionalNode) else getattr(self, node.name)
+        return getattr(self, node.name)
 class PyTorchModelBuilder(BaseModelBuilder):
@@ -256,7 +266,8 @@ class PyTorchModelBuilder(BaseModelBuilder):
                  graph: common.Graph,
                  append2output=None,
                  fw_info: FrameworkInfo = DEFAULT_PYTORCH_INFO,
-                 return_float_outputs: bool = False):
+                 return_float_outputs: bool = False,
+                 wrapper: Callable = identity_wrapper):
         """
         Args:
@@ -264,6 +275,7 @@ class PyTorchModelBuilder(BaseModelBuilder):
             append2output: Nodes to append to model's output.
             fw_info: Information about the specific framework of the model that is built.
             return_float_outputs: Whether the model returns float tensors or not.
+            wrapper: A function wrapper Pytorch Layers.
         """
         super().__init__(graph,
@@ -271,6 +283,8 @@ class PyTorchModelBuilder(BaseModelBuilder):
                          fw_info,
                          return_float_outputs)
+        self.wrapper = wrapper
     def build_model(self) -> Tuple[PytorchModel, UserInformation]:
         """
         Build a PyTorch model and return it.
@@ -279,4 +293,5 @@ class PyTorchModelBuilder(BaseModelBuilder):
         """
         return PytorchModel(self.graph,
                             self.append2output,
-                            return_float_outputs=self.return_float_outputs), self.graph.user_info
+                            return_float_outputs=self.return_float_outputs,
+                            wrapper=self.wrapper), self.graph.user_info

model_compression_toolkit/core/pytorch/back2framework/quantization_wrapper/wrapper_quantize_config.py CHANGED Viewed

@@ -38,8 +38,7 @@ class WrapperQuantizeConfig:
         Returns: A List of quantizers for weights quantization.
         """
-        raise NotImplemented
+        raise NotImplemented  # pragma: no cover
     def get_activation_quantizers(self) -> list:
         """
@@ -47,7 +46,7 @@ class WrapperQuantizeConfig:
         Returns: A List of quantizers for activation quantization.
         """
-        raise NotImplemented
+        raise NotImplemented  # pragma: no cover

model_compression_toolkit/core/pytorch/constants.py CHANGED Viewed

@@ -71,6 +71,7 @@ RELU_POT_BOUND = 8.0
 # Supported TP models names for Pytorch:
 DEFAULT_TP_MODEL = 'default'
+IMX500_TP_MODEL = 'imx500'
 TFLITE_TP_MODEL = 'tflite'
 QNNPACK_TP_MODEL = 'qnnpack'
@@ -91,3 +92,7 @@ IN_PROJ_WEIGHT = 'in_proj_weight'
 IN_PROJ_BIAS = 'in_proj_bias'
 BIAS_K = 'bias_k'
 BIAS_V = 'bias_v'
+# # Batch size value for 'reshape' and 'view' operators,
+# # the value is -1 so the batch size is inferred from the length of the array and remaining dimensions.
+BATCH_DIM_VALUE = -1

model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/multi_head_attention_decomposition.py CHANGED Viewed

@@ -20,6 +20,7 @@ import torch.nn as nn
 import operator
 from typing import List
+from model_compression_toolkit.core.common.logger import Logger
 from model_compression_toolkit.core import common
 from model_compression_toolkit.core.common.graph.base_graph import Graph, BaseNode, OutTensor
 from model_compression_toolkit.core.common.graph.functional_node import FunctionalNode
@@ -46,32 +47,26 @@ class MHAParams:
         # Only batch first network is supported
         if BATCH_FIRST in mha_node.framework_attr.keys():
             if mha_node.framework_attr[BATCH_FIRST] is not True:
-                raise Exception('Only batch first network is supported')
+                Logger.error('Only batch first network is supported')  # pragma: no cover
         else:
-            raise Exception('Only batch first network is supported')
+            Logger.error('Only batch first network is supported')  # pragma: no cover
         # Add Zero Attn feature is Not Implemented
         if ADD_ZERO_ATTN in mha_node.framework_attr.keys():
             if mha_node.framework_attr[ADD_ZERO_ATTN] is not False:
-                raise Exception('Add Zero Attn feature is Not Implemented')
+                Logger.error('Add Zero Attn feature is Not Implemented')  # pragma: no cover
         # Check if Add Bias KV feature is Active
         if BIAS_K and BIAS_V in mha_node.weights.keys():
-            if mha_node.weights[BIAS_K] and mha_node.weights[BIAS_V] is not None:
-                raise Exception('Add BIAS_KV feature is Not Implemented')
+            if mha_node.weights[BIAS_K] is not None and mha_node.weights[BIAS_V] is not None:
+                Logger.error('Add BIAS_KV feature is Not Implemented')  # pragma: no cover
         self.embed_dim = mha_node.framework_attr[EMBED_DIM]
         self.num_heads = mha_node.framework_attr[NUM_HEADS]
-        if KEY_DIM in mha_node.framework_attr:
-            self.kdim = mha_node.framework_attr[KEY_DIM]
-        else:
-            self.kdim = False
+        self.kdim = mha_node.framework_attr[KEY_DIM]
-        if VALUE_DIM in mha_node.framework_attr:
-            self.vdim = mha_node.framework_attr[VALUE_DIM]
-        else:
-            self.vdim = False
+        self.vdim = mha_node.framework_attr[VALUE_DIM]
         self.qdim = int(self.embed_dim / self.num_heads)
@@ -707,7 +702,7 @@ class MultiHeadAttentionDecomposition(common.BaseSubstitution):
         """
         if mha_node.reuse:
-            raise Exception("MCT doesn't support reuse of MultiHeadAttention layer")
+            raise Exception("MCT doesn't support reuse of MultiHeadAttention layer")  # pragma: no cover
         params = MHAParams(mha_node)
         # project

model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/reshape_with_static_shapes.py CHANGED Viewed

@@ -14,10 +14,13 @@
 # ==============================================================================
 from torch import reshape
 import torch
+from model_compression_toolkit.core.common import Logger
 from model_compression_toolkit.core.common.graph.graph_matchers import NodeOperationMatcher
 from model_compression_toolkit.core import common
 from model_compression_toolkit.core.common.graph.base_graph import Graph
 from model_compression_toolkit.core.common.graph.base_node import BaseNode
+from model_compression_toolkit.core.pytorch.constants import BATCH_DIM_VALUE
 class ReshapeWithStaticShapes(common.BaseSubstitution):
@@ -47,14 +50,25 @@ class ReshapeWithStaticShapes(common.BaseSubstitution):
         Returns:
             Graph after applying the substitution.
         """
+        # we want the batch size value to infer from the length of the array and remaining dimensions
+        if len(node.output_shape) == 1:
+            node.output_shape[0][0] = BATCH_DIM_VALUE
+        else:
+            Logger.error('Reshape or view nodes should have a single output shape')  # pragma: no cover
         # configure the new static output shape attribute
         node.op_call_args = node.output_shape
         # modify the node input info
         node.input_shape = [node.input_shape[0]]
+        # the first input is the tensor to be reshaped, we want his batch size value to infer
+        # from the length of the array and remaining dimensions
+        node.input_shape[0][0] = BATCH_DIM_VALUE
         nodes_to_check = []
         for in_edge in graph.incoming_edges(node):
-            if in_edge.sink_index > 0: # the first input is the tensor to be reshaped
+            if in_edge.sink_index > 0:  # the first input is the tensor to be reshaped
                 nodes_to_check.append(in_edge.source_node)
                 graph.remove_edge(in_edge.source_node, node)
         for n in nodes_to_check:
@@ -80,4 +94,4 @@ def clean_graph_from_nodes_without_out_edges(graph: Graph,
             graph.remove_edge(in_edge.source_node, node)
         graph.remove_node(node)
         for n in nodes_to_check:
-            clean_graph_from_nodes_without_out_edges(graph, n)
+            clean_graph_from_nodes_without_out_edges(graph, n)

model_compression_toolkit/core/pytorch/kpi_data_facade.py CHANGED Viewed

@@ -154,9 +154,9 @@ else:
     # we raise an exception when trying to use this function.
     def pytorch_kpi_data(*args, **kwargs):
         Logger.critical('Installing torch is mandatory when using pytorch_kpi_data. '
-                        'Could not find Tensorflow package.')
+                        'Could not find Tensorflow package.')  # pragma: no cover
     def pytorch_kpi_data_experimental(*args, **kwargs):
         Logger.critical('Installing torch is mandatory when using pytorch_kpi_data. '
-                        'Could not find Tensorflow package.')
+                        'Could not find Tensorflow package.')  # pragma: no cover

model_compression_toolkit/core/pytorch/quantization_facade.py CHANGED Viewed

@@ -269,9 +269,9 @@ else:
     def pytorch_post_training_quantization(*args, **kwargs):
         Logger.critical('Installing Pytorch is mandatory '
                         'when using pytorch_post_training_quantization. '
-                        'Could not find the torch package.')
+                        'Could not find the torch package.')  # pragma: no cover
     def pytorch_post_training_quantization_mixed_precision(*args, **kwargs):
         Logger.critical('Installing tensorflow and tensorflow_model_optimization is mandatory '
                         'when using pytorch_post_training_quantization_mixed_precision. '
-                        'Could not find Tensorflow package.')
+                        'Could not find Tensorflow package.')  # pragma: no cover

model_compression_toolkit/core/pytorch/quantizer/fake_quant_builder.py CHANGED Viewed

@@ -17,6 +17,7 @@ import torch
 from model_compression_toolkit.core.common.constants import THRESHOLD, SIGNED, RANGE_MIN, RANGE_MAX
 from model_compression_toolkit.core.common.quantization.quantizers.uniform_quantizers import threshold_is_power_of_two
+from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import fix_range_to_include_zero
 def get_symmetric_quantization_range_and_scale(activation_is_signed: bool,
@@ -62,9 +63,9 @@ def power_of_two_quantization(activation_n_bits: int,
     activation_is_signed = quantization_params.get(SIGNED)
     if activation_threshold is None or activation_is_signed is None:
-        return None
+        return None # pragma: no cover
     if not threshold_is_power_of_two(activation_threshold, per_channel=False):
-        return None
+        return None # pragma: no cover
     min_value, max_value, scale = get_symmetric_quantization_range_and_scale(activation_is_signed,
                                                                              activation_n_bits,
@@ -90,7 +91,7 @@ def symmetric_quantization(activation_n_bits: int,
     activation_is_signed = quantization_params.get(SIGNED)
     if activation_threshold is None or activation_is_signed is None:
-        return None
+        return None # pragma: no cover
     min_value, max_value, scale = get_symmetric_quantization_range_and_scale(activation_is_signed,
                                                                              activation_n_bits,
@@ -115,16 +116,17 @@ def uniform_quantization(activation_n_bits: int,
     a, b = quantization_params.get(RANGE_MIN), quantization_params.get(RANGE_MAX)
     if a is None or b is None:
-        return None
+        return None # pragma: no cover
     # fixing quantization range to include 0
     a = 0 if a > 0 else a
     b = 0 if b < 0 else b
+    a, b = fix_range_to_include_zero(a, b, activation_n_bits)
     min_value = 0
     max_value = 2 ** activation_n_bits - 1
     scale = (b - a) / ((2 ** activation_n_bits) - 1)
-    zero_point = -int(a / scale)  # zp has to be positive, and a <=0, so we multiply by -1
+    zero_point = -round(a / scale)  # zp has to be positive, and a <=0, so we multiply by -1
     return lambda x: q(x, min_value, max_value, scale, zero_point)

model_compression_toolkit/core/pytorch/quantizer/lut_fake_quant.py CHANGED Viewed

@@ -57,7 +57,7 @@ class PytorchLUTFakeQuant(torch.nn.Module):
             Quantized torch Tensor.
         """
         if self.activation_is_signed is None or self.cluster_centers is None or self.threshold is None:
-            return None
+            return None   # pragma: no cover
         _quant_output = self.lut_kmeans_quantizer(x)
         return _quant_output

model_compression_toolkit/core/tpc_models/get_target_platform_capabilities.py CHANGED Viewed

@@ -17,14 +17,18 @@ from model_compression_toolkit.core.common.target_platform import TargetPlatform
 from model_compression_toolkit.core.tpc_models.default_tpc.target_platform_capabilities import \
     tpc_dict as default_tpc_dict
+from model_compression_toolkit.core.tpc_models.imx500_tpc.target_platform_capabilities import \
+    tpc_dict as imx500_tpc_dict
 from model_compression_toolkit.core.tpc_models.tflite_tpc.target_platform_capabilities import \
     tpc_dict as tflite_tpc_dict
 from model_compression_toolkit.core.tpc_models.qnnpack_tpc.target_platform_capabilities import \
     tpc_dict as qnnpack_tpc_dict
-from model_compression_toolkit.core.keras.constants import DEFAULT_TP_MODEL, TFLITE_TP_MODEL, QNNPACK_TP_MODEL
+from model_compression_toolkit.core.keras.constants import DEFAULT_TP_MODEL, IMX500_TP_MODEL, TFLITE_TP_MODEL, \
+    QNNPACK_TP_MODEL
 from model_compression_toolkit.core.common.constants import LATEST
 tpc_dict = {DEFAULT_TP_MODEL: default_tpc_dict,
+            IMX500_TP_MODEL: imx500_tpc_dict,
             TFLITE_TP_MODEL: tflite_tpc_dict,
             QNNPACK_TP_MODEL: qnnpack_tpc_dict}
@@ -35,7 +39,7 @@ def get_target_platform_capabilities(fw_name: str,
     """
     Get a TargetPlatformCapabilities by the target platform model name and the framework name.
     For now, it supports frameworks 'tensorflow' and 'pytorch'. For both of them
-    the target platform model can be 'default','tflite', or 'qnnpack'.
+    the target platform model can be 'default', 'imx500', 'tflite', or 'qnnpack'.
     Args:
         fw_name: Framework name of the TargetPlatformCapabilities.

model_compression_toolkit/{exporter/model_wrapper/keras/quantize_configs → core/tpc_models/imx500_tpc}/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
+# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

model_compression_toolkit/core/tpc_models/imx500_tpc/latest/__init__.py ADDED Viewed

@@ -0,0 +1,24 @@
+# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from model_compression_toolkit.core.common.constants import FOUND_TF, FOUND_TORCH
+from model_compression_toolkit.core.tpc_models.imx500_tpc.v1.tp_model import get_tp_model, generate_tp_model, \
+    get_op_quantization_configs
+if FOUND_TF:
+    from model_compression_toolkit.core.tpc_models.imx500_tpc.v1.tpc_keras import get_keras_tpc as get_keras_tpc_latest
+    from model_compression_toolkit.core.tpc_models.imx500_tpc.v1.tpc_keras import generate_keras_tpc
+if FOUND_TORCH:
+    from model_compression_toolkit.core.tpc_models.imx500_tpc.v1.tpc_pytorch import get_pytorch_tpc as \
+        get_pytorch_tpc_latest
+    from model_compression_toolkit.core.tpc_models.imx500_tpc.v1.tpc_pytorch import generate_pytorch_tpc

model_compression_toolkit/core/tpc_models/imx500_tpc/target_platform_capabilities.py ADDED Viewed

@@ -0,0 +1,45 @@
+# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from model_compression_toolkit.core.common.constants import FOUND_TF, FOUND_TORCH, TENSORFLOW, PYTORCH, LATEST
+###############################
+# Build Tensorflow TPC models
+###############################
+keras_tpc_models_dict = None
+if FOUND_TF:
+    from model_compression_toolkit.core.tpc_models.imx500_tpc.latest import get_keras_tpc_latest
+    from model_compression_toolkit.core.tpc_models.imx500_tpc.v1.tpc_keras import get_keras_tpc as get_keras_tpc_v1
+    # Keras: TPC versioning
+    keras_tpc_models_dict = {'v1': get_keras_tpc_v1(),
+                             LATEST: get_keras_tpc_latest()}
+###############################
+# Build Pytorch TPC models
+###############################
+pytorch_tpc_models_dict = None
+if FOUND_TORCH:
+    from model_compression_toolkit.core.tpc_models.imx500_tpc.latest import get_pytorch_tpc_latest
+    from model_compression_toolkit.core.tpc_models.imx500_tpc.v1.tpc_pytorch import \
+        get_pytorch_tpc as get_pytorch_tpc_v1
+    # Pytorch: TPC versioning
+    pytorch_tpc_models_dict = {'v1': get_pytorch_tpc_v1(),
+                               LATEST: get_pytorch_tpc_latest()}
+tpc_dict = {TENSORFLOW: keras_tpc_models_dict,
+            PYTORCH: pytorch_tpc_models_dict}

model_compression_toolkit/core/tpc_models/imx500_tpc/v1/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+__version__ = 'v1'

model_compression_toolkit/core/tpc_models/imx500_tpc/v1/tp_model.py ADDED Viewed

@@ -0,0 +1,156 @@
+# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from typing import List, Tuple
+import model_compression_toolkit as mct
+from model_compression_toolkit.core.common.target_platform import OpQuantizationConfig, TargetPlatformModel
+tp = mct.target_platform
+def get_tp_model() -> TargetPlatformModel:
+    """
+    A method that generates a default target platform model, with base 8-bit quantization configuration and 8, 4, 2
+    bits configuration list for mixed-precision quantization.
+    NOTE: in order to generate a target platform model with different configurations but with the same Operators Sets
+    (for tests, experiments, etc.), use this method implementation as a test-case, i.e., override the
+    'get_op_quantization_configs' method and use its output to call 'generate_tp_model' with your configurations.
+    Returns: A TargetPlatformModel object.
+    """
+    base_config, mixed_precision_cfg_list = get_op_quantization_configs()
+    return generate_tp_model(default_config=base_config,
+                             base_config=base_config,
+                             mixed_precision_cfg_list=mixed_precision_cfg_list,
+                             name='imx500_tp_model')
+def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantizationConfig]]:
+    """
+    Creates a default configuration object for 8-bit quantization, to be used to set a default TargetPlatformModel.
+    In addition, creates a default configuration objects list (with 8, 4 and 2 bit quantization) to be used as
+    default configuration for mixed-precision quantization.
+    Returns: An OpQuantizationConfig config object and a list of OpQuantizationConfig objects.
+    """
+    # Create a quantization config.
+    # A quantization configuration defines how an operator
+    # should be quantized on the modeled hardware:
+    eight_bits = tp.OpQuantizationConfig(
+        activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
+        weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
+        activation_n_bits=8,
+        weights_n_bits=8,
+        weights_per_channel_threshold=True,
+        enable_weights_quantization=True,
+        enable_activation_quantization=True,
+        quantization_preserving=False,
+        fixed_scale=None,
+        fixed_zero_point=None,
+        weights_multiplier_nbits=None)
+    # To quantize a model using mixed-precision, create
+    # a list with more than one OpQuantizationConfig.
+    # In this example, we quantize some operations' weights
+    # using 2, 4 or 8 bits, and when using 2 or 4 bits, it's possible
+    # to quantize the operations' activations using LUT.
+    four_bits = eight_bits.clone_and_edit(weights_n_bits=4)
+    two_bits = eight_bits.clone_and_edit(weights_n_bits=2)
+    mixed_precision_cfg_list = [eight_bits, four_bits, two_bits]
+    return eight_bits, mixed_precision_cfg_list
+def generate_tp_model(default_config: OpQuantizationConfig,
+                      base_config: OpQuantizationConfig,
+                      mixed_precision_cfg_list: List[OpQuantizationConfig],
+                      name: str) -> TargetPlatformModel:
+    """
+    Generates TargetPlatformModel with default defined Operators Sets, based on the given base configuration and
+    mixed-precision configurations options list.
+    Args
+        default_config: A default OpQuantizationConfig to set as the TP model default configuration.
+        base_config: An OpQuantizationConfig to set as the TargetPlatformModel base configuration for mixed-precision purposes only.
+        mixed_precision_cfg_list: A list of OpQuantizationConfig to be used as the TP model mixed-precision
+            quantization configuration options.
+        name: The name of the TargetPlatformModel.
+    Returns: A TargetPlatformModel object.
+    """
+    # Create a QuantizationConfigOptions, which defines a set
+    # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example).
+    # If the QuantizationConfigOptions contains only one configuration,
+    # this configuration will be used for the operation quantization:
+    default_configuration_options = tp.QuantizationConfigOptions([default_config])
+    # Create a TargetPlatformModel and set its default quantization config.
+    # This default configuration will be used for all operations
+    # unless specified otherwise (see OperatorsSet, for example):
+    generated_tpc = tp.TargetPlatformModel(default_configuration_options, name=name)
+    # To start defining the model's components (such as operator sets, and fusing patterns),
+    # use 'with' the TargetPlatformModel instance, and create them as below:
+    with generated_tpc:
+        # Create an OperatorsSet to represent a set of operations.
+        # Each OperatorsSet has a unique label.
+        # If a quantization configuration options is passed, these options will
+        # be used for operations that will be attached to this set's label.
+        # Otherwise, it will be a configure-less set (used in fusing):
+        # May suit for operations like: Dropout, Reshape, etc.
+        tp.OperatorsSet("NoQuantization",
+                        tp.get_default_quantization_config_options().clone_and_edit(
+                            enable_weights_quantization=False,
+                            enable_activation_quantization=False))
+        # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects
+        mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list,
+                                                                             base_config=base_config)
+        # Define operator sets that use mixed_precision_configuration_options:
+        conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options)
+        fc = tp.OperatorsSet("FullyConnected", mixed_precision_configuration_options)
+        # Define operations sets without quantization configuration
+        # options (useful for creating fusing patterns, for example):
+        any_relu = tp.OperatorsSet("AnyReLU")
+        add = tp.OperatorsSet("Add")
+        sub = tp.OperatorsSet("Sub")
+        mul = tp.OperatorsSet("Mul")
+        div = tp.OperatorsSet("Div")
+        prelu = tp.OperatorsSet("PReLU")
+        swish = tp.OperatorsSet("Swish")
+        sigmoid = tp.OperatorsSet("Sigmoid")
+        tanh = tp.OperatorsSet("Tanh")
+        # Combine multiple operators into a single operator to avoid quantization between
+        # them. To do this we define fusing patterns using the OperatorsSets that were created.
+        # To group multiple sets with regard to fusing, an OperatorSetConcat can be created
+        activations_after_conv_to_fuse = tp.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh)
+        activations_after_fc_to_fuse = tp.OperatorSetConcat(any_relu, swish, sigmoid)
+        any_binary = tp.OperatorSetConcat(add, sub, mul, div)
+        # ------------------- #
+        # Fusions
+        # ------------------- #
+        tp.Fusing([conv, activations_after_conv_to_fuse])
+        tp.Fusing([fc, activations_after_fc_to_fuse])
+        tp.Fusing([any_binary, any_relu])
+    return generated_tpc

mct-nightly 1.7.1.31122022.post351__py3-none-any.whl → 1.8.0.1042023.post423__py3-none-any.whl

mct-nightly 1.7.1.31122022.post351py3-none-any.whl → 1.8.0.1042023.post423py3-none-any.whl