PyPI - mct-nightly - Versions diffs - 0.0.0__py3-none-any.whl → 1.1.0.01122021-003325__py3-none-any.whl - Mend

mct-nightly 0.0.0py3-none-any.whl → 1.1.0.01122021-003325py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

model_compression_toolkit/keras/back2framework/model_builder.py CHANGED Viewed

@@ -14,6 +14,8 @@
 # ==============================================================================
+from enum import Enum
 import tensorflow as tf
 # As from Tensorflow 2.6, keras is a separate package and some classes should be imported differently.
@@ -24,7 +26,6 @@ else:
     from keras import Input
     from keras.layers.core import TFOpLambda
-from model_compression_toolkit.common.model_builder_mode import ModelBuilderMode
 from tensorflow.python.keras.engine.base_layer import TensorFlowOpLayer
 from tensorflow.python.keras.layers import Layer
 from tensorflow_model_optimization.python.core.quantization.keras.quantize_wrapper import QuantizeWrapper
@@ -40,6 +41,7 @@ from model_compression_toolkit.keras.quantizer.gradient_ptq.config_factory impor
 from model_compression_toolkit.common import Node, Graph
 from model_compression_toolkit.common.graph.edge import EDGE_SINK_INDEX
 from model_compression_toolkit.keras.back2framework.instance_builder import OperationHandler
+from model_compression_toolkit.keras.graph_substitutions.substituter import pre_build_substitute
 from model_compression_toolkit.keras.reader.connectivity_handler import OutTensor
 # In tf2.3 fake quant node is implemented as TensorFlowOpLayer, while in tf2.4 as TFOpLambda.
@@ -48,6 +50,22 @@ FQ_NODE_OP_V2_4 = 'quantization.fake_quant_with_min_max_vars'
 BATCH_INPUT_SHAPE = 'batch_input_shape'
+class ModelBuilderMode(Enum):
+    """
+    Mode for building the model back from a graph:
+    FLOAT - Build model for statistics collection. Model's outputs list contain all output tensors of all nodes
+    in the graph.
+    QUANTIZED - Build a quantized model using the nodes' quantization attributes for adding
+    quantization nodes to the model.
+    GPTQ - Build a quantized model using the nodes' quantization attributes for wrapping
+    layers with QuantizeWrapper and output comparing points.
+    """
+    FLOAT = 0
+    QUANTIZED = 1
+    GPTQ = 2
+    MIXEDPRECISION = 3
 def get_node_name_from_layer(layer: Layer) -> str:
     """
     Get a node's name from the layer it was built from. For TensorFlowOpLayer
@@ -182,6 +200,11 @@ def model_builder(graph: common.Graph,
     Returns:
         A tuple of the model, and an UserInformation object.
     """
+    # For quantized models, first apply some substitutions.
+    if mode != ModelBuilderMode.FLOAT:
+        graph = pre_build_substitute(graph)
     node_to_output_tensors_dict = dict()
     model_output_tensors = []

model_compression_toolkit/{common → keras/back2framework}/model_collector.py RENAMED Viewed

@@ -14,14 +14,13 @@
 # ==============================================================================
-import numpy as np
 from typing import List
-from model_compression_toolkit import FrameworkInfo
-from model_compression_toolkit.common.framework_implementation import FrameworkImplementation
+import numpy as np
 from model_compression_toolkit.common.graph.base_graph import Graph
 from model_compression_toolkit.common.logger import Logger
-from model_compression_toolkit.common.model_builder_mode import ModelBuilderMode
+from model_compression_toolkit.keras.back2framework.model_builder import model_builder, ModelBuilderMode
 class ModelCollector(object):
@@ -32,21 +31,16 @@ class ModelCollector(object):
     for thresholds calculations.
     """
-    def __init__(self, graph: Graph, fw_impl: FrameworkImplementation, fw_info: FrameworkInfo):
+    def __init__(self, graph: Graph):
         """
         Build a Keras model from the passed graph, and set the model's
         outputs to be all layers' outputs.
         Args:
             graph: Graph to build a model from it.
-            fw_impl: FrameworkImplementation object with a specific framework methods implementation.
         """
         self.graph = graph
-        self.fw_impl = fw_impl
-        self.fw_info = fw_info
         node2fetch = []  # List of graph nodes, the model should output their outputs.
         stats_containers_list = []  # List of output statistics containers of nodes ordered
         # the same as node2fetch so statistics of outputs can be gathered for the correct statistics container.
@@ -72,10 +66,9 @@ class ModelCollector(object):
         # Build a float model and output all layers' outputs
         # (that should be collected) as the model's outputs
-        self.model, _ = self.fw_impl.model_builder(self.graph,
-                                                      mode=ModelBuilderMode.FLOAT,
-                                                      append2output=node2fetch,
-                                                   fw_info=self.fw_info)
+        self.model, _ = model_builder(self.graph,
+                                      mode=ModelBuilderMode.FLOAT,
+                                      append2output=node2fetch)
     def infer(self, inputs_list: List[np.ndarray]):
         """
@@ -84,9 +77,7 @@ class ModelCollector(object):
         Args:
             inputs_list: Inputs for the model inferring.
         """
         # TODO: Thinking about delegating collections to framework
         # TODO: migrate datasets to framework datasets
         tensor_data = self.model(list(inputs_list))
@@ -98,6 +89,6 @@ class ModelCollector(object):
                 if len(sc) != len(td):
                     Logger.exception('"tensor_data" and the model tensor_list must be of the same length')
                 for tdi, sci in zip(td, sc):
-                    sci.update_statistics(self.fw_impl.to_numpy(tdi))
+                    sci.update_statistics(tdi.numpy())
             else:
-                sc.update_statistics(self.fw_impl.to_numpy(td))
+                sc.update_statistics(td.numpy())

model_compression_toolkit/keras/default_framework_info.py CHANGED Viewed

@@ -27,7 +27,6 @@ from model_compression_toolkit.common.quantization.quantizers.power_of_two_quant
 from model_compression_toolkit.keras.constants import SOFTMAX, LINEAR, RELU, SWISH, SIGMOID, IDENTITY, TANH, SELU, \
     KERNEL, DEPTHWISE_KERNEL
 from model_compression_toolkit.keras.quantizer.fake_quant_builder import constraint_quantization
 """
 Division of Keras layers by how they should be quantized.
 KERNEL_OPS: Layers that their coefficients should be quantized.

model_compression_toolkit/keras/gradient_ptq/training_wrapper.py CHANGED Viewed

@@ -19,21 +19,71 @@ import tensorflow as tf
 from tqdm import tqdm
 from model_compression_toolkit import common
-from model_compression_toolkit.common.gptq.gptq_config import GradientPTQConfig
 from model_compression_toolkit.common import Graph
-from model_compression_toolkit.keras.back2framework.model_builder import model_builder
-from model_compression_toolkit.common.model_builder_mode import ModelBuilderMode
+from model_compression_toolkit.keras.back2framework.model_builder import model_builder, ModelBuilderMode
 from model_compression_toolkit.keras.gradient_ptq.graph_info import get_compare_points, \
     get_trainable_parameters
 from model_compression_toolkit.common.framework_info import FrameworkInfo
 from model_compression_toolkit.keras.gradient_ptq.graph_update import update_graph_after_gptq
+from model_compression_toolkit.keras.gradient_ptq.gptq_loss import \
+    multiple_tensors_mse_loss
+from tensorflow.python.keras.optimizer_v2.optimizer_v2 import OptimizerV2
 import numpy as np
+class GradientPTQConfig:
+    """
+    Configuration to use for quantization with GradientPTQ (experimental).
+    """
+    def __init__(self,
+                 n_iter: int,
+                 optimizer: OptimizerV2 = tf.keras.optimizers.Adam(learning_rate=0.0001),
+                 loss: Callable = multiple_tensors_mse_loss,
+                 log_function: Callable = None,
+                 train_bias: bool = True,
+                 representative_data_gen: Callable = None):
+        """
+        Initialize a GradientPTQConfig.
+        Args:
+            n_iter (int): Number of iterations to train.
+            optimizer (OptimizerV2): Optimizer to use.
+            loss (Callable): the loss to use. should accept 2 lists of tf.Tensor. 1st list are the quantized tensors, the 2nd the float tensors
+            log_function (Callable): Function to log information about the GPTQ process.
+            train_bias (bool): Whether to update the bias during the training or not.
+            representative_data_gen (Callable): Dataset generator.
+        Examples:
+            Create a GradientPTQConfig to run for 5 iteration and uses a random dataset generator:
+            >>> import numpy as np
+            >>> def repr_datagen(): return [np.random.random((1,224,224,3))]
+            >>> gptq_conf = GradientPTQConfig(n_iter=5, representative_data_gen=repr_datagen)
+            An optimizer can be passed:
+            >>> gptq_conf = GradientPTQConfig(n_iter=5, representative_data_gen=repr_datagen, optimizer=tf.keras.optimizers.Nadam(learning_rate=0.2))
+            To disable the biases training, one may set train_bias to False (enabled by default):
+            >>> gptq_conf = GradientPTQConfig(n_iter=5, representative_data_gen=repr_datagen, train_bias=False)
+            The configuration can then be passed to :func:`~model_compression_toolkit.keras_post_training_quantization`.
+        """
+        self.n_iter = n_iter
+        self.optimizer = optimizer
+        self.loss = loss
+        self.log_function = log_function
+        self.train_bias = train_bias
+        self.representative_data_gen = representative_data_gen
 def gptq_training_wrapper(tg: Graph,
                           representative_data_gen: Callable,
                           gptq_config: GradientPTQConfig,
-                          fw_info: FrameworkInfo) -> Graph:
+                          fw_info: FrameworkInfo):
     """
     Build two models from a graph: A teacher network (float model) and a student network (quantized model).
     Use the dataset generator to pass images through the teacher and student networks to get intermediate
@@ -57,17 +107,14 @@ def gptq_training_wrapper(tg: Graph,
     #########################################
     # Build two models and compare points
     #########################################
-    # TODO: maybe need to add pre_build substitutions here. Ask Elad
     compare_points, _ = get_compare_points(tg)  # get compare points
     n = len(compare_points)
     float_model, float_user_info = model_builder(tg,
                                                  mode=ModelBuilderMode.FLOAT,
-                                                 append2output=compare_points,
-                                                 fw_info=fw_info)
+                                                 append2output=compare_points)
     fxp_model, gptq_user_info = model_builder(tg,
-                                              mode=ModelBuilderMode.GPTQ,
-                                              append2output=compare_points,
-                                              fw_info=fw_info)
+                                            mode=ModelBuilderMode.GPTQ,
+                                            append2output=compare_points)
     trainable_weights = get_trainable_parameters(fxp_model,
                                                  fw_info,

model_compression_toolkit/keras/graph_substitutions/substituter.py ADDED Viewed

@@ -0,0 +1,171 @@
+# Copyright 2021 Sony Semiconductors Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import copy
+from typing import List
+from model_compression_toolkit import common
+from model_compression_toolkit.common.framework_info import FrameworkInfo
+from model_compression_toolkit.common.graph.base_graph import Graph
+from model_compression_toolkit.common.quantization.quantization_config import QuantizationConfig
+from model_compression_toolkit.keras.graph_substitutions.substitutions.activation_decomposition import \
+    ActivationDecomposition
+from model_compression_toolkit.keras.graph_substitutions.substitutions.relu_bound_correction import \
+    ReLUBoundCorrection
+from model_compression_toolkit.keras.graph_substitutions.substitutions.batchnorm_folding import \
+    BatchNormalizationFolding
+from model_compression_toolkit.keras.graph_substitutions.substitutions.input_scaling import InputScaling, InputScalingWithPad
+from model_compression_toolkit.keras.graph_substitutions.substitutions.mark_activation import MarkActivation
+from model_compression_toolkit.keras.graph_substitutions.substitutions.remove_relu_upper_bound import \
+    RemoveReLUUpperBound
+from model_compression_toolkit.keras.graph_substitutions.substitutions.scale_equalization import \
+    ScaleEqualization, ScaleEqualizationWithPad, \
+    ScaleEqualizationMidActivation, ScaleEqualizationMidActivationWithPad
+from model_compression_toolkit.keras.graph_substitutions.substitutions.separableconv_decomposition import \
+    SeparableConvDecomposition
+from model_compression_toolkit.keras.graph_substitutions.substitutions.shift_negative_activation import \
+    apply_shift_negative_correction
+def substitute(graph_to_substitute: common.Graph,
+               substitutions_list: List[common.BaseSubstitution]) -> common.Graph:
+    """
+    Apply a list of substitutions on a graph.
+    Args:
+        graph: Graph to transform.
+        substitutions_list: List of substitutions to apply on the graph.
+    Returns:
+        Transformed graph after applying all substitutions in substitutions_list.
+    """
+    graph = copy.deepcopy(graph_to_substitute)
+    for substitution in substitutions_list:
+        matched_nodes = graph.filter(substitution.matcher_instance)
+        for idn in matched_nodes:
+            graph = substitution.substitute(graph, idn)
+    return graph
+def graph_marking_substitute(graph: Graph) -> Graph:
+    """
+    Build a list of marking substitutions the graph should transformed according to (before statistics
+    are being collected), apply these substitutions on the graph and return the transformed graph.
+    Args:
+        graph: Graph to apply substitutions on.
+    Returns:
+        Transformed graph after marking substitutions were applied.
+    """
+    marking_substitutions_list = [MarkActivation()]  # mark activation layers that their inputs should not be quantized
+    return substitute(graph,
+                      marking_substitutions_list)
+def pre_statistics_collection_substitute(graph: Graph) -> Graph:
+    """
+    Build a list of substitutions the graph should transformed according to (before statistics
+    are being collected), apply these substitutions on the graph and return the transformed graph.
+    Args:
+        graph: Graph to apply substitutions on.
+    Returns:
+        Transformed graph after substitutions.
+    """
+    substitutions_list = [SeparableConvDecomposition(),  # decompose separable node into depthwise and pointwise nodes
+                          ActivationDecomposition(),  # extract activation from linear op to an additional layer
+                          BatchNormalizationFolding(),  # fold batch normalization layer to the preceding linear layer
+                          MarkActivation()]  # mark activation layers that their inputs should not be quantized
+    return substitute(graph,
+                      substitutions_list)
+def post_statistics_collection_substitute(graph: Graph,
+                                          quant_config: QuantizationConfig,
+                                          fw_info: FrameworkInfo) -> Graph:
+    """
+    Build a list of substitutions the graph should transformed according to (after statistics
+    were collected), apply these substitutions on the graph and return the transformed graph.
+    Args:
+        graph: Graph to apply substitutions on.
+        quant_config: Quantization configuration to build the substitutions list according to.
+        fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices,
+        groups of layers by how they should be quantized, etc.)
+    Returns:
+        Transformed graph after substitutions.
+    """
+    substitutions_list = []
+    ######################################
+    # Scale Activations
+    ######################################
+    if quant_config.input_scaling:
+        substitutions_list.append(InputScaling(quant_config,
+                                               fw_info))
+        substitutions_list.append(InputScalingWithPad(quant_config,
+                                                      fw_info))
+    ######################################
+    # Scale Activations
+    ######################################
+    if quant_config.relu_unbound_correction:
+        substitutions_list.append(ReLUBoundCorrection(quant_config,
+                                                      fw_info))
+    if quant_config.activation_channel_equalization:
+        substitutions_list.append(ScaleEqualization(quant_config,
+                                                    fw_info))
+        substitutions_list.append(ScaleEqualizationWithPad(quant_config,
+                                                           fw_info))
+        substitutions_list.append(ScaleEqualizationMidActivation(quant_config,
+                                                                 fw_info))
+        substitutions_list.append(ScaleEqualizationMidActivationWithPad(quant_config,
+                                                                        fw_info))
+    ######################################
+    # Shift Negative Activations
+    ######################################
+    if quant_config.shift_negative_activation_correction:
+        graph = apply_shift_negative_correction(graph, quant_config, fw_info)
+    return substitute(graph,
+                      substitutions_list)
+def pre_build_substitute(graph: Graph,
+                         remove_relu_bound: bool = True) -> Graph:
+    """
+    Build a list of substitutions the graph should transformed according to (before building
+    the model back from its graph), apply these substitutions on the graph and return the transformed graph.
+    Args:
+        graph: Graph to apply substitutions on.
+        remove_relu_bound: Whether or not to remove bounds of bounded ReLUs in case the quantization threshold is
+        bound the maximal value anyway.
+    Returns:
+        Transformed graph after substitutions.
+    """
+    substitutions_list = []
+    if remove_relu_bound:
+        substitutions_list.append(RemoveReLUUpperBound())
+    return substitute(graph,
+                      substitutions_list)

model_compression_toolkit/keras/graph_substitutions/substitutions/input_scaling.py CHANGED Viewed

@@ -47,6 +47,8 @@ class BaseInputScaling(common.BaseSubstitution):
     """
     def __init__(self,
+                 quantization_config: QuantizationConfig,
+                 fw_info: FrameworkInfo,
                  matcher_instance):
         """
         Matches: InputLayer -> (optional nodes) -> (Dense,Conv2D,DepthwiseConv2D,Conv2DTranspose)
@@ -55,9 +57,16 @@ class BaseInputScaling(common.BaseSubstitution):
         Create a substitution using different params which may affect the way this substitution is made.
         The substitution is looking for edges in the graph which are input layers connected to linear layers.
         Args:
+            quantization_config: QuantizationConfig containing parameters of how the model should be quantized.
+            fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices,
+            groups of layers by how they should be quantized, etc.)
             matcher_instance: matcher instance of type WalkMatcher
         """
+        self.fw_info = fw_info
+        self.qc = quantization_config
         super().__init__(matcher_instance=matcher_instance)
     def substitute(self,
@@ -80,7 +89,6 @@ class BaseInputScaling(common.BaseSubstitution):
         linear_layer = nodes_list[-1]
         threshold = input_layer.activation_quantization_cfg.activation_quantization_params.get(THRESHOLD)
         if threshold is None:
             return graph
@@ -94,7 +102,7 @@ class BaseInputScaling(common.BaseSubstitution):
             w1_fixed = linear_layer.get_weights_by_keys(KERNEL) * scale_factor
             linear_layer.set_weights_by_keys(KERNEL, w1_fixed)
-            graph.scale_stats_collector(input_layer, 1 / scale_factor)
+            graph.scale_stats_collector(input_layer, 1/scale_factor)
             # After scaling weights may have different thresholds so it needs to be recalculated
             for nqc in linear_layer.candidates_weights_quantization_cfg:
@@ -108,12 +116,18 @@ class InputScaling(BaseInputScaling):
     Substitution extends BaseInputScaling to the case of Input-->Linear
     """
-    def __init__(self):
+    def __init__(self,
+                 quant_config: QuantizationConfig,
+                 fw_info: FrameworkInfo):
         """
         Initialize a ScaleEqualization object.
+        Args:
+            quant_config: QuantizationConfig containing parameters of how the model should be quantized.
+            fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices,
+            groups of layers by how they should be quantized, etc.)
         """
-        super().__init__(matcher_instance=INPUT_MATCHER)
+        super().__init__(quantization_config=quant_config, fw_info=fw_info, matcher_instance=INPUT_MATCHER)
 class InputScalingWithPad(BaseInputScaling):
@@ -121,9 +135,15 @@ class InputScalingWithPad(BaseInputScaling):
     Substitution extends BaseInputScaling to the case of Input-->ZeroPadding-->Linear
     """
-    def __init__(self):
+    def __init__(self,
+                 quant_config: QuantizationConfig,
+                 fw_info: FrameworkInfo):
         """
         Initialize a ScaleEqualization object.
+        Args:
+            quant_config: QuantizationConfig containing parameters of how the model should be quantized.
+            fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices,
+            groups of layers by how they should be quantized, etc.)
         """
-        super().__init__(matcher_instance=INPUT_MATCHER_WITH_PAD)
+        super().__init__(quantization_config=quant_config, fw_info=fw_info, matcher_instance=INPUT_MATCHER_WITH_PAD)

model_compression_toolkit/keras/graph_substitutions/substitutions/relu_bound_correction.py CHANGED Viewed

@@ -30,20 +30,27 @@ from model_compression_toolkit.keras.constants import KERNEL, BIAS, ACTIVATION,
 from model_compression_toolkit.keras.constants import RELU
 class ReLUBoundCorrection(common.BaseSubstitution):
     """
     Substitution to scale the weights of two linear nodes, and the bound of non-linear between them
     (if bounded) in order to use the entire constrained range when activations are quantized.
     """
-    def __init__(self):
+    def __init__(self,
+                 quant_config: QuantizationConfig,
+                 fw_info: FrameworkInfo):
         """
         Initialize a ReLUBoundCorrection object.
+        Args:
+            quant_config: QuantizationConfig containing parameters of how the model should be quantized.
+            fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices,
+            groups of layers by how they should be quantized, etc.)
         """
+        self.fw_info = fw_info
+        self.quant_config = quant_config
         homogeneous_activation_nodes = NodeOperationMatcher(ReLU) | \
                                        NodeOperationMatcher(Activation) & \
                                        NodeFrameworkAttrMatcher(ACTIVATION, RELU)
@@ -115,4 +122,4 @@ class ReLUBoundCorrection(common.BaseSubstitution):
             for nqc in second_op2d_node.candidates_weights_quantization_cfg:
                 nqc.calculate_and_set_weights_params(w2_fixed)
-        return graph
+        return graph

model_compression_toolkit/keras/mixed_precision/sensitivity_evaluation.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
-from tensorflow.python.layers.base import Layer
+from keras.engine.base_layer_v1 import Layer
 from tensorflow import Tensor
 from tensorflow.keras.models import Model
 from tensorflow_model_optimization.python.core.quantization.keras.quantize_wrapper import QuantizeWrapper
@@ -24,8 +24,7 @@ from model_compression_toolkit.common import Node
 from model_compression_toolkit.common.graph.base_graph import Graph
 from model_compression_toolkit.common.mixed_precision.mixed_precision_quantization_config import \
     MixedPrecisionQuantizationConfig
-from model_compression_toolkit.keras.back2framework.model_builder import model_builder
-from model_compression_toolkit.common.model_builder_mode import ModelBuilderMode
+from model_compression_toolkit.keras.back2framework.model_builder import ModelBuilderMode, model_builder
 from model_compression_toolkit.keras.quantizer.mixed_precision.selective_weights_quantize_config import \
     SelectiveWeightsQuantizeConfig
 import numpy as np
@@ -35,7 +34,7 @@ def get_sensitivity_evaluation(graph: Graph,
                                quant_config: MixedPrecisionQuantizationConfig,
                                metrics_weights: np.ndarray,
                                representative_data_gen: Callable,
-                               fw_info: FrameworkInfo) -> Callable:
+                               fw_info: FrameworkInfo):
     """
     Create a function to compute the sensitivity metric of an MP model (the sensitivity
     is computed based on the similarity of the interest points' outputs between the MP model

mct-nightly 0.0.0__py3-none-any.whl → 1.1.0.01122021-003325__py3-none-any.whl

mct-nightly 0.0.0py3-none-any.whl → 1.1.0.01122021-003325py3-none-any.whl