PyPI - mct-nightly - Versions diffs - 1.8.0.20052023.post401__py3-none-any.whl → 1.8.0.20230610.post356__py3-none-any.whl - Mend

mct-nightly 1.8.0.20052023.post401py3-none-any.whl → 1.8.0.20230610.post356py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (115) hide show

model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 from abc import abstractmethod
+from functools import partial
 from typing import Tuple, Any, Dict, List, Union, Callable
 import torch
@@ -30,6 +31,7 @@ from model_compression_toolkit.core.pytorch.default_framework_info import DEFAUL
 from model_compression_toolkit.core.pytorch.reader.node_holders import DummyPlaceHolder, BufferHolder
 from model_compression_toolkit.core.pytorch.utils import get_working_device
 from model_compression_toolkit.core.pytorch.constants import BUFFER
+from mct_quantizers.common.constants import ACTIVATION_HOLDER_QUANTIZER
 def _build_input_tensors_list(node: BaseNode,
@@ -66,7 +68,7 @@ def _run_operation(n: BaseNode,
                    input_tensors: List,
                    op_func: Any,
                    quantize_node_activation_fn,
-                   is_wrapped: bool) -> Tuple[Union[List,torch.Tensor], Union[List,torch.Tensor]]:
+                   use_activation_quantization: bool) -> Tuple[Union[List, torch.Tensor], Union[List, torch.Tensor]]:
     """
     Applying the layer (op_func) to the input tensors (input_tensors).
     If quantized is set to True, and the layer's corresponding node (n) has quantization
@@ -77,7 +79,7 @@ def _run_operation(n: BaseNode,
         input_tensors: List of Pytorch tensors that are the layer's inputs.
         op_func: Module/functional to apply to the input tensors.
         quantize_node_activation_fn: quantization function
-        is_wrapped : Flag to indicate if layer is already quantization wrapped so no activation is needed
+        use_activation_quantization: Flag to indicate if we have an activation function.
     Returns:
         A tuple of Pytorch tensors. The Module/functional output tensors after applying the
         Module/functional to the input tensors.
@@ -92,10 +94,10 @@ def _run_operation(n: BaseNode,
     # Add a fake quant node if the node has an activation threshold.
     out_tensors_of_n = out_tensors_of_n_float
-    if n.is_activation_quantization_enabled() and not is_wrapped:
+    if use_activation_quantization:
         if isinstance(out_tensors_of_n_float, list):
             out_tensors_of_n_float = torch.cat(out_tensors_of_n_float, dim=0)
-        out_tensors_of_n = quantize_node_activation_fn(n, out_tensors_of_n_float)
+        out_tensors_of_n = quantize_node_activation_fn(out_tensors_of_n_float)
     return out_tensors_of_n, out_tensors_of_n_float
@@ -145,7 +147,8 @@ class PytorchModel(torch.nn.Module):
                  append2output: List[Any] = None,
                  fw_info: FrameworkInfo = DEFAULT_PYTORCH_INFO,
                  return_float_outputs: bool = False,
-                 wrapper: Callable = identity_wrapper):
+                 wrapper: Callable = None,
+                 get_activation_quantizer_holder_fn: Callable = None):
         """
         Construct a Pytorch model.
@@ -155,17 +158,31 @@ class PytorchModel(torch.nn.Module):
             fw_info: Framework information (e.g., mapping from layers to their attributes to quantize).
             return_float_outputs: Whether the model returns float tensors or not.
             wrapper: A function wrapper Pytorch Layers.
+            get_activation_quantizer_holder_fn: Function to retrieve a quantization holder for a node.
         """
         super(PytorchModel, self).__init__()
         self.graph = graph
         self.node_sort = list(topological_sort(graph))
-        self.nodes_dict = {}
+        self.node_to_activation_quantization_holder = {}
         self.append2output = append2output
         self.return_float_outputs = return_float_outputs
         self.fw_info = fw_info
         self.wrapper = wrapper
+        self.get_activation_quantizer_holder = get_activation_quantizer_holder_fn
         self._add_modules()
+    # todo: Move to parent class BaseModelBuilder
+    @property
+    def use_activation_holder_during_model_building(self) -> bool:
+        """
+        Returns: Whether or not the model builder uses a PytorchActivationQuantizationHolder during
+        model building (by adding it as a module when converting the graph to a Pytorch model).
+        If so - the model builder expects the activation quantizers not to be wrapped
+        in a PytorchQuantizeWrapper.
+        """
+        return self.get_activation_quantizer_holder is not None
     @abstractmethod
     def _quantize_node_activations(self,
                                    node: BaseNode,
@@ -184,18 +201,50 @@ class PytorchModel(torch.nn.Module):
         raise NotImplemented(f'{self.__class__.__name__} '
                              f'have to implement a method for quantization activation nodes.')  # pragma: no cover
+    def wrap(self, node):
+        """
+        Wraps a node operation with a wrapper, if one is available.
+        Args:
+            node: node to wrap its operation.
+        Returns: the node's operation. If a wrapper is available, the operation is wrapped.
+        """
+        if isinstance(node, FunctionalNode):
+            if self.wrapper is None:
+                node_op = node.type
+            else:
+                node_op = self.wrapper(node, node.type)
+        else:
+            if self.wrapper is None or node.type == BufferHolder:
+                node_op = node_builder(node)
+            else:
+                node_op = self.wrapper(node, node_builder(node))
+        return node_op
     def _add_modules(self):
-        for n in self.node_sort:
-            if isinstance(n, FunctionalNode):
+        """
+        Build and add the modules and functional nodes from node_sort list as attributes to PytorchModel
+        """
+        for node in self.node_sort:
+            node_op = self.wrap(node)
+            if isinstance(node, FunctionalNode):
                 # for functional layers
-                setattr(self, n.name, self.wrapper(n, n.type))
+                setattr(self, node.name, node_op)
             else:
-                if n.type == BufferHolder:
-                    self.add_module(n.name, node_builder(n))
-                    self.get_submodule(n.name). \
-                        register_buffer(n.name, torch.Tensor(n.get_weights_by_keys(BUFFER)).to(get_working_device()))
-                else:
-                    self.add_module(n.name, self.wrapper(n, node_builder(n)))
+                self.add_module(node.name, node_op)
+                if node.type == BufferHolder:
+                    self.get_submodule(node.name). \
+                        register_buffer(node.name,
+                                        torch.Tensor(node.get_weights_by_keys(BUFFER)).to(get_working_device()))
+            # Add activation quantization modules if an activation holder is configured for this node
+            if node.is_activation_quantization_enabled() and self.get_activation_quantizer_holder is not None:
+                activation_quantizer_holder = self.get_activation_quantizer_holder(node)
+                if activation_quantizer_holder is not None:
+                    self.add_module(node.name + '_' + ACTIVATION_HOLDER_QUANTIZER, activation_quantizer_holder)
+                    self.node_to_activation_quantization_holder.update(
+                        {node.name: node.name + '_' + ACTIVATION_HOLDER_QUANTIZER})
     def forward(self,
                 *args: Any) -> Any:
@@ -208,28 +257,28 @@ class PytorchModel(torch.nn.Module):
         node_to_output_tensors_dict = dict()
         node_to_output_tensors_dict_float = dict()
         configurable_nodes = self.graph.get_configurable_sorted_nodes_names()
-        for n in self.node_sort:
-            input_tensors = _build_input_tensors_list(n,
+        for node in self.node_sort:
+            input_tensors = _build_input_tensors_list(node,
                                                       self.graph,
                                                       args,
                                                       node_to_output_tensors_dict)
-            op_func = self._get_op_func(n, configurable_nodes)
+            op_func = self._get_op_func(node, configurable_nodes)
+            use_activation_quantization, activation_quantization_fn = self._get_activation_quantization_fn(node)
             # Run node operation and fetch outputs
-            out_tensors_of_n, out_tensors_of_n_float = _run_operation(n,
+            out_tensors_of_n, out_tensors_of_n_float = _run_operation(node,
                                                                       input_tensors,
                                                                       op_func=op_func,
-                                                                      quantize_node_activation_fn=self._quantize_node_activations,
-                                                                      is_wrapped=self.wrapper is not identity_wrapper)
+                                                                      quantize_node_activation_fn=activation_quantization_fn,
+                                                                      use_activation_quantization=use_activation_quantization)
             if isinstance(out_tensors_of_n, list):
-                node_to_output_tensors_dict.update({n: out_tensors_of_n})
-                node_to_output_tensors_dict_float.update({n: out_tensors_of_n_float})
+                node_to_output_tensors_dict.update({node: out_tensors_of_n})
+                node_to_output_tensors_dict_float.update({node: out_tensors_of_n_float})
             else:
-                node_to_output_tensors_dict.update({n: [out_tensors_of_n]})
-                node_to_output_tensors_dict_float.update({n: [out_tensors_of_n_float]})
+                node_to_output_tensors_dict.update({node: [out_tensors_of_n]})
+                node_to_output_tensors_dict_float.update({node: [out_tensors_of_n_float]})
         if self.append2output:
             outputs = _generate_outputs(self.append2output,
@@ -256,6 +305,28 @@ class PytorchModel(torch.nn.Module):
         """
         return getattr(self, node.name)
+    def _get_activation_quantization_fn(self, node) -> Tuple[bool, bool, Callable]:
+        """
+        Get activation quantization parameters for this node.
+        Args:
+            node: Node from which to extract the activation quantization parameters.
+        Returns: Flag to indicate if we quantize activations, flag to indicate if we quantize activations
+        using a quantization holder and a quantization function to use for the node's activations.
+        """
+        activation_quantization_holder = self.node_to_activation_quantization_holder.get(node.name)
+        use_activation_quantization = node.is_activation_quantization_enabled()
+        if use_activation_quantization:
+            if activation_quantization_holder is None:
+                activation_quantization_fn = partial(self._quantize_node_activations, node)
+                use_activation_quantization = self.wrapper is None
+            else:
+                activation_quantization_fn = getattr(self, activation_quantization_holder)
+        else:
+            activation_quantization_fn = None
+        return use_activation_quantization, activation_quantization_fn
 class PyTorchModelBuilder(BaseModelBuilder):
     """
@@ -267,7 +338,8 @@ class PyTorchModelBuilder(BaseModelBuilder):
                  append2output=None,
                  fw_info: FrameworkInfo = DEFAULT_PYTORCH_INFO,
                  return_float_outputs: bool = False,
-                 wrapper: Callable = identity_wrapper):
+                 wrapper: Callable = None,
+                 get_activation_quantizer_holder_fn: Callable = None):
         """
         Args:
@@ -276,6 +348,7 @@ class PyTorchModelBuilder(BaseModelBuilder):
             fw_info: Information about the specific framework of the model that is built.
             return_float_outputs: Whether the model returns float tensors or not.
             wrapper: A function wrapper Pytorch Layers.
+            get_activation_quantizer_holder_fn: Function to retrieve a quantization holder for a node.
         """
         super().__init__(graph,
@@ -284,6 +357,7 @@ class PyTorchModelBuilder(BaseModelBuilder):
                          return_float_outputs)
         self.wrapper = wrapper
+        self.get_activation_quantizer_holder_fn = get_activation_quantizer_holder_fn
     def build_model(self) -> Tuple[PytorchModel, UserInformation]:
         """
@@ -294,4 +368,5 @@ class PyTorchModelBuilder(BaseModelBuilder):
         return PytorchModel(self.graph,
                             self.append2output,
                             return_float_outputs=self.return_float_outputs,
-                            wrapper=self.wrapper), self.graph.user_info
+                            wrapper=self.wrapper,
+                            get_activation_quantizer_holder_fn=self.get_activation_quantizer_holder_fn), self.graph.user_info

model_compression_toolkit/exporter/model_exporter/keras/fakely_quant_keras_exporter.py CHANGED Viewed

@@ -14,6 +14,7 @@
 # ==============================================================================
 from typing import Dict, Callable
+import keras
 import keras.models
 import keras.models
 import tensorflow as tf
@@ -22,9 +23,9 @@ from keras.engine.base_layer import Layer
 from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.exporter.model_exporter.keras.base_keras_exporter import \
     BaseKerasExporter
-from model_compression_toolkit.quantizers_infrastructure import KerasQuantizationWrapper
+from mct_quantizers import KerasQuantizationWrapper
+layers = keras.layers
 class FakelyQuantKerasExporter(BaseKerasExporter):
     """
@@ -69,51 +70,45 @@ class FakelyQuantKerasExporter(BaseKerasExporter):
                 Layer after unwrapping.
             """
-            assert self.is_layer_exportable_fn(layer), f'Layer {layer.name} is not exportable.'
+            # Assert each layer is exportable
+            self.is_layer_exportable_fn(layer)
             # If weights are quantized, use the quantized weight for the new built layer.
-            if layer.is_weights_quantization:
-                new_layer = layer.layer.__class__.from_config(layer.layer.get_config())
-                with tf.name_scope(new_layer.name):
-                    new_layer.build(layer.input_shape)
-                # Build a list of the layer's new weights.
-                weights_list = []
-                # Go over weights, check if they should be quantized, and quantize if this is the case:
-                for w in new_layer.weights:
-                    val = None
-                    for qw in layer.weights:
-                        if w.name in qw.name:
-                            # Use quantized weight if layer attribute should be quantized.
-                            # For example: check if 'kernel_0' is an attribute
-                            # that should be quantized. First, extract 'kernel' from variable name, check if the
-                            # quantize config contains this as an attribute for quantization. If so -
-                            # Take the quantized weight from the quantize_config and set it to the new layer.
-                            attribute_name = w.name.split('/')[-1].split(':')[0]
-                            if attribute_name in layer.weights_quantizers.keys():
-                                quantizer = layer.weights_quantizers.get(attribute_name)
-                                val = quantizer(qw)
-                            else:
-                                val = qw
-                    if val is None:
-                        Logger.error(f'Could not match weight name: {w.name}')
-                    weights_list.append(val)
-                new_layer.set_weights(weights_list)
-                new_layer.trainable = False
-                # If activations are also quantized, wrap the layer back using ActivationQuantizeConfig
-                # from original wrapper (weights wrapping is no longer needed).
-                if layer.is_activation_quantization:
-                    new_layer = KerasQuantizationWrapper(layer=new_layer,
-                                                         activation_quantizers=layer.activation_quantizers)
-                return new_layer
-            # If this is a layer with activation quantization only, just return it
-            # as activation quantization in the fake-quant case uses the wrapper for quantization.
-            return layer
+            if isinstance(layer, KerasQuantizationWrapper):
+                if layer.is_weights_quantization:
+                    new_layer = layer.layer.__class__.from_config(layer.layer.get_config())
+                    # Build a list of the layer's new weights.
+                    weights_list = []
+                    # Create a list of weights for the new created layer
+                    if isinstance(layer.layer, layers.DepthwiseConv2D):
+                        weights_list.append(layer.get_quantized_weights()['depthwise_kernel'])
+                    elif isinstance(layer.layer, (layers.Conv2D, layers.Dense, layers.Conv2DTranspose)):
+                        weights_list.append(layer.get_quantized_weights()['kernel'])
+                    else:
+                        Logger.error(f'KerasQuantizationWrapper should wrap only DepthwiseConv2D, Conv2D, Dense'
+                                     f' and Conv2DTranspose layers but wrapped layer is {layer.layer}')
+                    if layer.layer.bias is not None:
+                        weights_list.append(layer.layer.bias)
+                    # In order to add the weights of the layer, we need to build it. To build it
+                    # we need to pass its input shape. Not every layer has input_shape since some
+                    # layers may have multiple inputs with different input shapes (reused layers for
+                    # example). For this reason, we take input shape at index 0 (any input shape
+                    # should work since the weights are dependent only at some dimensions which have to
+                    # be the same for all inputs).
+                    with tf.name_scope(new_layer.name):
+                        new_layer.build(layer.get_input_shape_at(0))
+                    new_layer.set_weights(weights_list)
+                    new_layer.trainable = False
+                    return new_layer
+            return layer
         # clone each layer in the model and apply _unwrap_quantize_wrapper to layers wrapped with a QuantizeWrapper.
         self.exported_model = tf.keras.models.clone_model(self.model,

model_compression_toolkit/exporter/model_exporter/keras/fakely_quant_tflite_exporter.py CHANGED Viewed

@@ -19,9 +19,9 @@ from typing import Callable
 import keras.models
 import tensorflow as tf
-from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.keras.load_model import keras_load_quantized_model
 from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.exporter.model_exporter.keras.fakely_quant_keras_exporter import FakelyQuantKerasExporter
+from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
 class FakelyQuantTFLiteExporter(FakelyQuantKerasExporter):

model_compression_toolkit/exporter/model_exporter/keras/int8_tflite_exporter.py CHANGED Viewed

@@ -22,11 +22,9 @@ from keras import Sequential
 from keras.layers import Dense, Conv2D, Reshape
 from keras.models import clone_model
-from model_compression_toolkit import quantizers_infrastructure as qi
 from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.exporter.model_exporter.keras.fakely_quant_keras_exporter import FakelyQuantKerasExporter
-from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.keras.quantizers import \
-    constants as keras_inferable_constants
+from mct_quantizers import constants as keras_inferable_constants, KerasQuantizationWrapper
 BIAS_INITIALIZER = 'bias_initializer'
 BIAS_REGULARIZER = 'bias_regularizer'
@@ -50,6 +48,7 @@ KERNEL = 'kernel'
 CONV_KERNEL_CHANNEL_AXIS = 3
 CONV_INPUT_CHANNELS_DIM = 4
 class INT8TFLiteExporter(FakelyQuantKerasExporter):
     """
     Exporter for INT8 TFLite models.
@@ -75,7 +74,7 @@ class INT8TFLiteExporter(FakelyQuantKerasExporter):
         self.exported_model = None
-    def _get_pointwise_layer_to_replace_dense(self, wrapped_layer: qi.KerasQuantizationWrapper) -> keras.layers.Layer:
+    def _get_pointwise_layer_to_replace_dense(self, wrapped_layer: KerasQuantizationWrapper) -> keras.layers.Layer:
         # First we create a pointwise configuration based on the Dense layer's configuration
         dense_cfg = wrapped_layer.layer.get_config()
@@ -94,7 +93,7 @@ class INT8TFLiteExporter(FakelyQuantKerasExporter):
         # Create the point-wise layer
         pw_layer = Conv2D(**pw_cfg)
-        pw_layer.build(wrapped_layer.layer.input_shape)
+        pw_layer.build(wrapped_layer.input_shape)
         # Create and set the point-wise weights to assign
         dense_kernel = wrapped_layer.layer.kernel
@@ -110,7 +109,7 @@ class INT8TFLiteExporter(FakelyQuantKerasExporter):
         pw_layer.set_weights(pw_weights)
         # Now that we have the point-wise to replace the dense layer,
-        # we need to wrap it using qi.KerasQuantizationWrapper with a new
+        # we need to wrap it using KerasQuantizationWrapper with a new
         # relevant quantizers.
         # Create new kernel quantizer
         pw_kernel_quantizer_cfg = wrapped_layer.weights_quantizers[KERNEL].get_config()
@@ -121,8 +120,10 @@ class INT8TFLiteExporter(FakelyQuantKerasExporter):
         # Unquantized weight to conv layer has 4 dimensions (unlike dense which varies)
         pw_kernel_quantizer_cfg[keras_inferable_constants.INPUT_RANK] = CONV_INPUT_CHANNELS_DIM
-        assert isinstance(pw_kernel_quantizer_cfg[keras_inferable_constants.THRESHOLD], np.ndarray), f'Expected to find threshold which is a numpy array, but found: {type(pw_kernel_quantizer_cfg[keras_inferable_constants.THRESHOLD])}'
-        pw_kernel_quantizer_cfg[keras_inferable_constants.THRESHOLD] = list(pw_kernel_quantizer_cfg[keras_inferable_constants.THRESHOLD])
+        assert isinstance(pw_kernel_quantizer_cfg[keras_inferable_constants.THRESHOLD],
+                          np.ndarray), f'Expected to find threshold which is a numpy array, but found: {type(pw_kernel_quantizer_cfg[keras_inferable_constants.THRESHOLD])}'
+        pw_kernel_quantizer_cfg[keras_inferable_constants.THRESHOLD] = list(
+            pw_kernel_quantizer_cfg[keras_inferable_constants.THRESHOLD])
         # Now that we have the point-wise quantizer we can instantiate it
         quantizer_class = type(wrapped_layer.weights_quantizers[KERNEL])
@@ -131,21 +132,21 @@ class INT8TFLiteExporter(FakelyQuantKerasExporter):
         pw_weights_quantizers[KERNEL] = pw_quantizer
         # Wrap pw with the new quantizers (the activation is not affected thus we take the Dense quantizers)
-        wrapped_pw = qi.KerasQuantizationWrapper(pw_layer,
-                                                 pw_weights_quantizers,
-                                                 wrapped_layer.activation_quantizers)
+        wrapped_pw = KerasQuantizationWrapper(pw_layer,
+                                              pw_weights_quantizers,
+                                              wrapped_layer.activation_quantizers)
         # Compute the shape that the input to the new layer should be reshaped into
         # Example: Dense kernel with the following shape (3, 20) expects to have input with the
         # next dimensions (BATCH_SIZE, x0, x1, ..., xn, 20).
         # Conv layer expects 4-rank input. Thus, the input is reshaped to (BATCH_SIZE, 1, x0*x1*...*xn, 20)
-        dim = wrapped_layer.layer.input_shape[1:-1]
+        dim = wrapped_layer.input_shape[1:-1]
         target_shape = (1, int(np.prod(dim))) + (dense_kernel.get_shape()[0],)
         return Sequential([
             Reshape(target_shape=target_shape),
             wrapped_pw,
-            Reshape(wrapped_layer.layer.output_shape[1:])
+            Reshape(wrapped_layer.output_shape[1:])
         ])
     def export(self) -> None:
@@ -153,17 +154,18 @@ class INT8TFLiteExporter(FakelyQuantKerasExporter):
         Export a fully quantized model to its int8 tflite model.
         """
-        def _substitute_model(wrapped_layer: qi.KerasQuantizationWrapper) -> keras.layers.Layer:
+        def _substitute_model(layer_to_substitue: keras.layers.Layer) -> keras.layers.Layer:
             assert self.is_layer_exportable_fn(
-                wrapped_layer), f'Layer {wrapped_layer.get_config()} did not pass validation'
+                layer_to_substitue), f'Layer {layer_to_substitue.get_config()} did not pass validation'
             # In order to support dense quantization using per-channel quantization (which is
             # unsupported in TFLITE int models) we substitute each dense layer to its equivalent
             # point-wise convolution.
-            if isinstance(wrapped_layer.layer, Dense):
-                return self._get_pointwise_layer_to_replace_dense(wrapped_layer)
+            if isinstance(layer_to_substitue, KerasQuantizationWrapper):
+                if isinstance(layer_to_substitue.layer, Dense):
+                    return self._get_pointwise_layer_to_replace_dense(layer_to_substitue)
-            return wrapped_layer
+            return layer_to_substitue
         # Transform the model to a new model that can be converted to int8 models.
         # For example: replace dense layers with point-wise layers (to support per-channel quantization)

model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py CHANGED Viewed

@@ -21,8 +21,8 @@ from model_compression_toolkit.core.pytorch.utils import to_torch_tensor
 from model_compression_toolkit.exporter.model_exporter.pytorch.base_pytorch_exporter import BasePyTorchExporter
 from packaging import version
-from model_compression_toolkit.quantizers_infrastructure import PytorchQuantizationWrapper
-from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.common.constants import LAYER
+from mct_quantizers import PytorchQuantizationWrapper
+from mct_quantizers.common.constants import LAYER
 # ONNX opset version 16 is supported from PyTorch 1.12
 if version.parse(torch.__version__) < version.parse("1.12"):
@@ -68,7 +68,7 @@ class FakelyQuantONNXPyTorchExporter(BasePyTorchExporter):
             Fake-quant PyTorch model.
         """
         for layer in self.model.children():
-            assert self.is_layer_exportable_fn(layer), f'Layer {layer.name} is not exportable.'
+            self.is_layer_exportable_fn(layer)
         model_input = to_torch_tensor(next(self.repr_dataset())[0])

model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_torchscript_pytorch_exporter.py CHANGED Viewed

@@ -57,7 +57,7 @@ class FakelyQuantTorchScriptPyTorchExporter(BasePyTorchExporter):
             Fake-quant PyTorch model.
         """
         for layer in self.model.children():
-            assert self.is_layer_exportable_fn(layer), f'Layer {layer} is not exportable.'
+            self.is_layer_exportable_fn(layer)
         torch_traced = torch.jit.trace(self.model,
                                        to_torch_tensor(next(self.repr_dataset())),

model_compression_toolkit/exporter/model_wrapper/keras/builder/fully_quantized_model_builder.py CHANGED Viewed

@@ -12,36 +12,62 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from typing import Tuple
-from model_compression_toolkit import quantizers_infrastructure as qi
+from typing import Tuple, Callable
 from model_compression_toolkit.core import common
 from model_compression_toolkit.core.common import Graph
 from model_compression_toolkit.constants import FOUND_TF
 from model_compression_toolkit.core.common.user_info import UserInformation
 from model_compression_toolkit.logger import Logger
+from mct_quantizers import KerasActivationQuantizationHolder
 if FOUND_TF:
     import tensorflow as tf
     from tensorflow.keras.layers import Layer
     from model_compression_toolkit.core.keras.back2framework.keras_model_builder import KerasModelBuilder
     from model_compression_toolkit.exporter.model_wrapper.keras.builder.node_to_quantizers import get_quantization_quantizers
+    from mct_quantizers import KerasQuantizationWrapper
     def _get_wrapper(node: common.BaseNode,
-                     layer: Layer) -> qi.KerasQuantizationWrapper:
+                     layer: Layer) -> Layer:
         """
         A function which takes a computational graph node and a keras layer and perform the quantization wrapping
         Args:
-            n: A node of mct graph.
+            node: A node of mct graph.
             layer: A keras layer
-            include_activation_quantizers: Whether to use the wrapper for the activation quantizer or not
         Returns: Wrapped layer with weights quantizers and activation quantizers
         """
-        weights_quantizers, activation_quantizers = get_quantization_quantizers(node)
-        return qi.KerasQuantizationWrapper(layer, weights_quantizers, activation_quantizers)
+        weights_quantizers, _ = get_quantization_quantizers(node)
+        if len(weights_quantizers) > 0:
+            return KerasQuantizationWrapper(layer,
+                                            weights_quantizers)
+        return layer
+    def get_activation_quantizer_holder(node: common.BaseNode) -> Callable:
+        """
+        Retrieve a ActivationQuantizationHolder layer to use for activation quantization for a node.
+        Args:
+            node: Node to get ActivationQuantizationHolder to attach in its output.
+        Returns:
+            A ActivationQuantizationHolder layer for the node activation quantization.
+        """
+        _, activation_quantizers = get_quantization_quantizers(node)
+        # Holder by definition uses a single quantizer for the activation quantization
+        # thus we make sure this is the only possible case (unless it's a node with no activation
+        # quantization, which in this case has an empty list).
+        if len(activation_quantizers) == 1:
+            return KerasActivationQuantizationHolder(activation_quantizers[0])
+        Logger.error(
+            f'ActivationQuantizationHolder supports a single quantizer but {len(activation_quantizers)} quantizers '
+            f'were found for node {node}')
     def get_exportable_keras_model(graph: Graph) -> Tuple[tf.keras.models.Model, UserInformation]:
@@ -57,7 +83,8 @@ if FOUND_TF:
             Exportable Keras model and user information.
         """
         exportable_model, user_info = KerasModelBuilder(graph=graph,
-                                                        wrapper=_get_wrapper).build_model()
+                                                        wrapper=_get_wrapper,
+                                                        get_activation_quantizer_holder_fn=get_activation_quantizer_holder).build_model()
         exportable_model.trainable = False
         return exportable_model, user_info
 else:

model_compression_toolkit/exporter/model_wrapper/keras/builder/node_to_quantizer.py CHANGED Viewed

@@ -19,10 +19,10 @@ from model_compression_toolkit.constants import THRESHOLD, RANGE_MIN, RANGE_MAX,
 from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod
-from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.common.base_inferable_quantizer import QuantizationTarget
-from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.common.get_quantizers import get_inferable_quantizer_class
-from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.keras.quantizers.base_keras_inferable_quantizer import BaseKerasInferableQuantizer
-from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.keras.quantizers import constants as qi_keras_consts
+from mct_quantizers import QuantizationTarget
+from mct_quantizers.common.get_quantizers import get_inferable_quantizer_class
+from mct_quantizers.keras.quantizers import BaseKerasInferableQuantizer
+from mct_quantizers import constants as qi_keras_consts
 def get_inferable_quantizer_kwargs(node: BaseNode,
                                    quantization_target: QuantizationTarget) -> Dict[str, Any]:

mct-nightly 1.8.0.20052023.post401__py3-none-any.whl → 1.8.0.20230610.post356__py3-none-any.whl

mct-nightly 1.8.0.20052023.post401py3-none-any.whl → 1.8.0.20230610.post356py3-none-any.whl