PyPI - mct-nightly - Versions diffs - 2.0.0.20240417.406__tar.gz → 2.0.0.20240419.358__tar.gz - Mend

mct-nightly 2.0.0.20240417.406tar.gz → 2.0.0.20240419.358tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (495) hide show

{mct-nightly-2.0.0.20240417.406 → mct-nightly-2.0.0.20240419.358}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mct-nightly
-Version: 2.0.0.20240417.406
+Version: 2.0.0.20240419.358
 Summary: A Model Compression Toolkit for neural networks
 Home-page: UNKNOWN
 License: UNKNOWN

{mct-nightly-2.0.0.20240417.406 → mct-nightly-2.0.0.20240419.358}/mct_nightly.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mct-nightly
-Version: 2.0.0.20240417.406
+Version: 2.0.0.20240419.358
 Summary: A Model Compression Toolkit for neural networks
 Home-page: UNKNOWN
 License: UNKNOWN

{mct-nightly-2.0.0.20240417.406 → mct-nightly-2.0.0.20240419.358}/mct_nightly.egg-info/SOURCES.txt RENAMED Viewed

@@ -144,6 +144,7 @@ model_compression_toolkit/core/common/substitutions/batchnorm_reconstruction.py
 model_compression_toolkit/core/common/substitutions/batchnorm_refusing.py
 model_compression_toolkit/core/common/substitutions/linear_collapsing.py
 model_compression_toolkit/core/common/substitutions/linear_collapsing_substitution.py
+model_compression_toolkit/core/common/substitutions/remove_identity.py
 model_compression_toolkit/core/common/substitutions/residual_collapsing.py
 model_compression_toolkit/core/common/substitutions/scale_equalization.py
 model_compression_toolkit/core/common/substitutions/shift_negative_activation.py
@@ -183,6 +184,7 @@ model_compression_toolkit/core/keras/graph_substitutions/substitutions/linear_co
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/matmul_substitution.py
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/multi_head_attention_decomposition.py
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/relu_bound_to_power_of_2.py
+model_compression_toolkit/core/keras/graph_substitutions/substitutions/remove_identity.py
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/residual_collapsing.py
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/scale_equalization.py
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/separableconv_decomposition.py
@@ -247,6 +249,7 @@ model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/linear_
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/multi_head_attention_decomposition.py
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/permute_call_method.py
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/relu_bound_to_power_of_2.py
+model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/remove_identity.py
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/reshape_with_static_shapes.py
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/residual_collapsing.py
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/scale_equalization.py

{mct-nightly-2.0.0.20240417.406 → mct-nightly-2.0.0.20240419.358}/model_compression_toolkit/__init__.py RENAMED Viewed

@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
 from model_compression_toolkit import pruning
 from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
-__version__ = "2.0.0.20240417.000406"
+__version__ = "2.0.0.20240419.000358"

{mct-nightly-2.0.0.20240417.406 → mct-nightly-2.0.0.20240419.358}/model_compression_toolkit/constants.py RENAMED Viewed

@@ -97,6 +97,8 @@ UPPER_FACTOR = 1.2
 DEC_RANGE_BOTTOM = 0.97
 DEC_RANGE_UPPER = 1.03
+NUM_QPARAM_HESSIAN_SAMPLES = 16
 # Resource utilization computation parameters
 BITS_TO_BYTES = 8.0

{mct-nightly-2.0.0.20240417.406 → mct-nightly-2.0.0.20240419.358}/model_compression_toolkit/core/common/graph/base_graph.py RENAMED Viewed

@@ -98,8 +98,8 @@ class Graph(nx.MultiDiGraph, GraphSearches):
         tpc_layers = tpc.op_sets_to_layers.get_layers()
         tpc_filtered_layers = [layer for layer in tpc_layers if isinstance(layer, LayerFilterParams)]
         for n in self.nodes:
-            is_node_in_tpc = n.type in tpc_layers or any([n.is_match_filter_params(filtered_layer)
-                                                          for filtered_layer in tpc_filtered_layers])
+            is_node_in_tpc = any([n.is_match_type(_type) for _type in tpc_layers]) or \
+                             any([n.is_match_filter_params(filtered_layer) for filtered_layer in tpc_filtered_layers])
             if n.is_custom:
                 if not is_node_in_tpc:
                     Logger.critical(f'MCT does not support optimizing Keras custom layers. Found a layer of type {n.type}. '

{mct-nightly-2.0.0.20240417.406 → mct-nightly-2.0.0.20240419.358}/model_compression_toolkit/core/common/graph/base_node.py RENAMED Viewed

@@ -14,7 +14,7 @@
 # ==============================================================================
 import copy
-from typing import Dict, Any, Tuple, List, Type
+from typing import Dict, Any, Tuple, List, Type, Union
 import numpy as np
@@ -151,7 +151,21 @@ class BaseNode:
         """
         return self.reuse or self.reuse_group is not None
-    def get_weights_by_keys(self, name: str) -> np.ndarray:
+    def _get_weight_name(self, name: Union[str, int]) -> List[Union[str, int]]:
+        """
+        Get weight names that match argument name (either string weights or integer for
+        positional weights).
+        Args:
+            name: weight name
+        Returns:
+            A list of weight names that match input "name"
+        """
+        return [k for k in self.weights.keys()
+                if (isinstance(k, int) and name == k) or (isinstance(k, str) and name in k)]
+    def get_weights_by_keys(self, name: Union[str, int]) -> np.ndarray:
         """
         Get a node's weight by its name.
         Args:
@@ -163,7 +177,7 @@ class BaseNode:
         if name is None:
             return None
-        res = [k for k in self.weights.keys() if name in k]
+        res = self._get_weight_name(name)
         if len(res) == 1:  # Make sure there are no duplicates
             return self.weights[res[0]]
         else:
@@ -179,7 +193,7 @@ class BaseNode:
         """
-        res = [k for k in self.weights.keys() if name in k]
+        res = self._get_weight_name(name)
         if len(res) == 1:
             self.weights[res[0]] = tensor
         else:  # Add if not exist
@@ -552,14 +566,17 @@ class BaseNode:
         for fl, qco in tpc.filterlayer2qco.items():
             if self.is_match_filter_params(fl):
                 return qco
-        if self.type in tpc.layer2qco:
-            return tpc.layer2qco.get(self.type)
+        # Extract qco with is_match_type to overcome mismatch of function types in TF 2.15
+        matching_qcos = [_qco for _type, _qco in tpc.layer2qco.items() if self.is_match_type(_type)]
+        if matching_qcos:
+            if len(matching_qcos) > 1:
+                Logger.error('Found duplicate qco types!')
+            return matching_qcos[0]
         return tpc.tp_model.default_qco
     def is_match_type(self, _type: Type) -> bool:
         """
-        Check if input type matches the node type, either in instance type or in type name. Checking the
-        name string is required because of function types changes that occurred in TF 2.15.
+        Check if input type matches the node type, either in instance type or in type name.
         Args:
             _type: other node type
@@ -567,7 +584,7 @@ class BaseNode:
             Whether _type matches the self node type
         """
-        return _type == self.type or _type.__name__ == self.type.__name__
+        return _type == self.type
     def is_match_filter_params(self, layer_filter_params: LayerFilterParams) -> bool:
         """

{mct-nightly-2.0.0.20240417.406 → mct-nightly-2.0.0.20240419.358}/model_compression_toolkit/core/common/graph/functional_node.py RENAMED Viewed

@@ -1,5 +1,6 @@
-from typing import Dict, Any, Tuple, List
+from typing import Dict, Any, Tuple, Type
+from model_compression_toolkit.constants import FOUND_TF
 from model_compression_toolkit.core.common.graph.base_node import BaseNode
 import numpy as np
@@ -71,3 +72,19 @@ class FunctionalNode(BaseNode):
         :return: the node's functional_op
         """
         return self.functional_op
+    def is_match_type(self, _type: Type) -> bool:
+        """
+        Check if input type matches the node type, either in instance type or in type name. Checking the
+        name string is required because of function types changes that occurred in TF 2.15, because it
+        changes the "function" attribute object (e.g. a different tf.add function that will fail the
+        equal operation).
+        Args:
+            _type: other node type
+        Returns:
+            Whether _type matches the self node type
+        """
+        names_match = _type.__name__ == self.type.__name__ if FOUND_TF else False
+        return super().is_match_type(_type) or names_match

{mct-nightly-2.0.0.20240417.406 → mct-nightly-2.0.0.20240419.358}/model_compression_toolkit/core/common/hessian/hessian_info_service.py RENAMED Viewed

@@ -17,7 +17,6 @@ from functools import partial
 from typing import Callable, List
 from model_compression_toolkit.constants import HESSIAN_NUM_ITERATIONS
-from model_compression_toolkit.core.common import Graph
 from model_compression_toolkit.core.common.hessian.trace_hessian_request import TraceHessianRequest
 from model_compression_toolkit.logger import Logger
@@ -38,7 +37,7 @@ class HessianInfoService:
     """
     def __init__(self,
-                 graph: Graph,
+                 graph,
                  representative_dataset: Callable,
                  fw_impl,
                  num_iterations_for_approximation: int = HESSIAN_NUM_ITERATIONS
@@ -151,7 +150,7 @@ class HessianInfoService:
         if required_size==0:
             return []
-        Logger.info(f"Ensuring {required_size} Hessian-trace approximation for node {trace_hessian_request.target_node}.")
+        Logger.info(f"\nEnsuring {required_size} Hessian-trace approximation for node {trace_hessian_request.target_node}.")
         # Replace request of a reused target node with a request of the 'reuse group'.
         if trace_hessian_request.target_node.reuse_group:

{mct-nightly-2.0.0.20240417.406 → mct-nightly-2.0.0.20240419.358}/model_compression_toolkit/core/common/hessian/trace_hessian_request.py RENAMED Viewed

@@ -16,8 +16,6 @@ from typing import List
 from enum import Enum
-from model_compression_toolkit.core.common import BaseNode
 class HessianMode(Enum):
     """
@@ -54,7 +52,7 @@ class TraceHessianRequest:
     def __init__(self,
                  mode: HessianMode,
                  granularity: HessianInfoGranularity,
-                 target_node: BaseNode,
+                 target_node,
                  ):
         """
         Attributes:

{mct-nightly-2.0.0.20240417.406 → mct-nightly-2.0.0.20240419.358}/model_compression_toolkit/core/common/network_editors/node_filters.py RENAMED Viewed

@@ -15,6 +15,7 @@
 from typing import Any
 from model_compression_toolkit.core.common.matchers.node_matcher import BaseNodeMatcher
+from model_compression_toolkit.core.common.graph.base_node import BaseNode
 class NodeTypeFilter(BaseNodeMatcher):
@@ -30,7 +31,7 @@ class NodeTypeFilter(BaseNodeMatcher):
         """
         self.node_type = node_type
-    def apply(self, input_object: Any) -> bool:
+    def apply(self, input_object: BaseNode) -> bool:
         """
         Check if input_object is of the type that NodeTypeFilter contains.
@@ -38,9 +39,9 @@ class NodeTypeFilter(BaseNodeMatcher):
             input_object: Node object to check for its type.
         Returns:
-            True if the node if of the type that was passed during the initialization of NodeTypeFilter.
+            True if the node is of the type that was passed during the initialization of NodeTypeFilter.
         """
-        if input_object.type == self.node_type:
+        if input_object.is_match_type(self.node_type):
             return True

{mct-nightly-2.0.0.20240417.406 → mct-nightly-2.0.0.20240419.358}/model_compression_toolkit/core/common/quantization/node_quantization_config.py RENAMED Viewed

@@ -265,8 +265,6 @@ class WeightsAttrQuantizationConfig:
         self.enable_weights_quantization = weights_attr_cfg.enable_weights_quantization
         self.l_p_value = qc.l_p_value
     @property
     def weights_error_method(self) -> QuantizationErrorMethod:
         """
@@ -412,9 +410,6 @@ class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
         for attr in node_attrs_list:
             if isinstance(attr, int):
                 # this is a positional attribute, so it needs to be handled separately.
-                # we assume that a positional attribute is quantized with the default configuration provided in the TPC.
-                if op_cfg.default_weight_attr_config.enable_weights_quantization:
-                    Logger.critical(f"Quantizing constant weights is not supported.")
                 self.pos_attributes_config_mapping[attr] = WeightsAttrQuantizationConfig(qc=qc,
                                                                                          weights_attr_cfg=op_cfg.default_weight_attr_config,
                                                                                          weights_channels_axis=weights_channels_axis)

{mct-nightly-2.0.0.20240417.406 → mct-nightly-2.0.0.20240419.358}/model_compression_toolkit/core/common/quantization/quantization_config.py RENAMED Viewed

@@ -26,14 +26,16 @@ class QuantizationErrorMethod(Enum):
     NOCLIPPING - Use min/max values as thresholds.
-    MSE - Use min square error for minimizing quantization noise.
+    MSE - Use mean square error for minimizing quantization noise.
-    MAE - Use min absolute error for minimizing quantization noise.
+    MAE - Use mean absolute error for minimizing quantization noise.
     KL - Use KL-divergence to make signals distributions to be similar as possible.
     Lp - Use Lp-norm to minimizing quantization noise.
+    HMSE - Use Hessian-based mean squared error for minimizing quantization noise. This method is using Hessian scores to factorize more valuable parameters when computing the error induced by quantization.
     """
     NOCLIPPING = 0
@@ -41,6 +43,7 @@ class QuantizationErrorMethod(Enum):
     MAE = 2
     KL = 4
     LP = 5
+    HMSE = 6
 class QuantizationConfig:

{mct-nightly-2.0.0.20240417.406 → mct-nightly-2.0.0.20240419.358}/model_compression_toolkit/core/common/quantization/quantization_params_generation/error_functions.py RENAMED Viewed

@@ -13,13 +13,16 @@
 # limitations under the License.
 # ==============================================================================
 from copy import deepcopy
-from typing import Tuple, Callable
+from typing import Tuple, Callable, List
 import numpy as np
 import model_compression_toolkit.core.common.quantization.quantization_config as qc
+from model_compression_toolkit.core.common.hessian import TraceHessianRequest, HessianMode, HessianInfoGranularity, \
+    HessianInfoService
 from model_compression_toolkit.core.common.similarity_analyzer import compute_mse, compute_mae, compute_lp_norm
 from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod
-from model_compression_toolkit.constants import FLOAT_32
-from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import uniform_quantize_tensor
+from model_compression_toolkit.constants import FLOAT_32, NUM_QPARAM_HESSIAN_SAMPLES
+from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import uniform_quantize_tensor, \
+    reshape_tensor_for_per_channel_search
 def _mse_error_histogram(q_bins: np.ndarray,
@@ -371,13 +374,63 @@ def _get_sliced_histogram(bins: np.ndarray,
     return bins_subset, counts_subset
+def _compute_hessian_for_hmse(node,
+                              hessian_info_service: HessianInfoService,
+                              num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES) -> List[np.ndarray]:
+    """
+    Compute and retrieve Hessian-based scores for using during HMSE error computation.
+    Args:
+        node: The node to compute Hessian-based scores for.
+        hessian_info_service: HessianInfoService object for retrieving Hessian-based scores.
+        num_hessian_samples: Number of samples to approximate Hessian-based scores on.
+    Returns: A list with computed Hessian-based scores tensors for the given node.
+    """
+    _request = TraceHessianRequest(mode=HessianMode.WEIGHTS,
+                                   granularity=HessianInfoGranularity.PER_ELEMENT,
+                                   target_node=node)
+    _scores_for_node = hessian_info_service.fetch_hessian(_request,
+                                                          required_size=num_hessian_samples)
+    return _scores_for_node
+def _hmse_error_function_wrapper(float_tensor: np.ndarray,
+                                 fxp_tensor: np.ndarray,
+                                 axis: int,
+                                 norm: bool,
+                                 hessian_scores: np.ndarray):
+    """
+    This function wraps the HMSE error method to enable using it during parameters selection.
+    Args:
+        float_tensor: Float tensor.
+        fxp_tensor: Quantized tensor.
+        axis: Axis along which the operation has been performed. If not None, then per-channel computation is expected.
+        norm: Indicates whether to normalize the result of the error function.
+        hessian_scores: A tensor with Hessian-based scores to use for Hessian-based MSE (HMSE) error computation.
+    Returns: The HMSE error between the float and fixed-point tensors.
+    """
+    if axis is not None:
+        hessian_scores = reshape_tensor_for_per_channel_search(hessian_scores, 0)
+    return compute_mse(float_tensor, fxp_tensor, axis, norm, weights=hessian_scores)
 def get_threshold_selection_tensor_error_function(quantization_method: QuantizationMethod,
                                                   quant_error_method: qc.QuantizationErrorMethod,
                                                   p: int,
                                                   axis: int = None,
                                                   norm: bool = False,
                                                   n_bits: int = 8,
-                                                  signed: bool = True) -> Callable:
+                                                  signed: bool = True,
+                                                  node=None,
+                                                  hessian_info_service: HessianInfoService = None,
+                                                  num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES) -> Callable:
     """
     Returns the error function compatible to the provided threshold method,
     to be used in the threshold optimization search for tensor quantization.
@@ -389,6 +442,9 @@ def get_threshold_selection_tensor_error_function(quantization_method: Quantizat
         norm: Indicates whether to normalize the result of the error function.
         n_bits: Number of bits used to quantize the tensor.
         signed: Indicates whether the input is signed.
+        node: The node for which the quantization error is computed (used only with HMSE error method).
+        hessian_info_service: HessianInfoService object for retrieving Hessian-based scores (used only with HMSE error method).
+        num_hessian_samples: Number of samples to approximate Hessian-based scores on (used only with HMSE error method).
     Returns: a Callable method that calculates the error between a tensor and a quantized tensor.
     """
@@ -418,6 +474,13 @@ def get_threshold_selection_tensor_error_function(quantization_method: Quantizat
                                                                           n_bits=n_bits,
                                                                           per_channel=True)
+    if quant_error_method == qc.QuantizationErrorMethod.HMSE:
+        node_hessian_scores = _compute_hessian_for_hmse(node, hessian_info_service, num_hessian_samples)
+        node_hessian_scores = np.sqrt(np.mean(node_hessian_scores, axis=0))
+        return lambda x, y, threshold: _hmse_error_function_wrapper(x, y, norm=norm, axis=axis,
+                                                                    hessian_scores=node_hessian_scores)
     quant_method_error_function_mapping = {
         qc.QuantizationErrorMethod.MSE: lambda x, y, threshold: compute_mse(x, y, norm=norm, axis=axis),
         qc.QuantizationErrorMethod.MAE: lambda x, y, threshold: compute_mae(x, y, norm=norm, axis=axis),

{mct-nightly-2.0.0.20240417.406 → mct-nightly-2.0.0.20240419.358}/model_compression_toolkit/core/common/quantization/quantization_params_generation/lut_kmeans_params.py RENAMED Viewed

@@ -13,12 +13,14 @@
 # limitations under the License.
 # ==============================================================================
+from typing import Dict
 import numpy as np
 from sklearn.cluster import KMeans
 import model_compression_toolkit.core.common.quantization.quantization_config as qc
 from model_compression_toolkit.constants import LUT_VALUES, MIN_THRESHOLD, SCALE_PER_CHANNEL, \
-    LUT_VALUES_BITWIDTH, THRESHOLD
+    LUT_VALUES_BITWIDTH, THRESHOLD, NUM_QPARAM_HESSIAN_SAMPLES
+from model_compression_toolkit.core.common.hessian import HessianInfoService
 from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import \
     max_power_of_two, int_quantization_with_threshold
 from model_compression_toolkit.core.common.quantization.quantization_params_generation.symmetric_selection import \
@@ -37,7 +39,10 @@ def lut_kmeans_tensor(tensor_data: np.ndarray,
                       n_iter: int = 10,
                       min_threshold: float = MIN_THRESHOLD,
                       quant_error_method: qc.QuantizationErrorMethod = None,
-                      is_symmetric=False) -> dict:
+                      is_symmetric: bool = False,
+                      node=None,
+                      hessian_info_service: HessianInfoService = None,
+                      num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES) -> Dict:
     """
     The quantizer first finds the closest max value per channel of tensor_data.
     Now, we divide tensor_data with the threshold vector per channel. In addition, we scale the result to the range
@@ -53,7 +58,10 @@ def lut_kmeans_tensor(tensor_data: np.ndarray,
         n_iter: Number of iterations to search_methods for the optimal threshold.
         min_threshold: Minimal threshold to chose when the computed one is smaller.
         quant_error_method: an error function to optimize the parameters' selection accordingly (not used for this method).
-        is_symmetric (bool): Whether to apply symmetric weight quantization (default is False, meaning power of 2 quantization)
+        is_symmetric (bool): Whether to apply symmetric weight quantization (default is False, meaning power of 2 quantization).
+        node: The node for which the quantization error is computed (not used for this method).
+        hessian_info_service: HessianInfoService object for retrieving Hessian-based scores (not used for this method).
+        num_hessian_samples: Number of samples to approximate Hessian-based scores on (not used for this method).
     Returns:
         A dictionary containing the cluster assignments according to the k-means algorithm,
@@ -94,7 +102,7 @@ def lut_kmeans_histogram(bins: np.ndarray,
                          constrained: bool = True,
                          n_iter: int = 20,
                          min_threshold: float = MIN_THRESHOLD,
-                         quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE) -> dict:
+                         quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE) -> Dict:
     """
     Finds quantization cluster points for non-uniform activation quantization.
     The quantizer first finds the closest power-of-two number to the max value of the given histogram,

{mct-nightly-2.0.0.20240417.406 → mct-nightly-2.0.0.20240419.358}/model_compression_toolkit/core/common/quantization/quantization_params_generation/power_of_two_selection.py RENAMED Viewed

@@ -15,7 +15,8 @@
 import numpy as np
 import model_compression_toolkit.core.common.quantization.quantization_config as qc
-from model_compression_toolkit.constants import MIN_THRESHOLD, THRESHOLD
+from model_compression_toolkit.constants import MIN_THRESHOLD, THRESHOLD, NUM_QPARAM_HESSIAN_SAMPLES
+from model_compression_toolkit.core.common.hessian import HessianInfoService
 from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_search import \
     qparams_selection_tensor_search, qparams_selection_histogram_search
 from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import max_power_of_two, get_tensor_max
@@ -31,7 +32,11 @@ def power_of_two_selection_tensor(tensor_data: np.ndarray,
                                   channel_axis: int = 1,
                                   n_iter: int = 10,
                                   min_threshold: float = MIN_THRESHOLD,
-                                  quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE) -> dict:
+                                  quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE,
+                                  node=None,
+                                  hessian_info_service: HessianInfoService = None,
+                                  num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES,
+                                  ) -> dict:
     """
     Compute the power of two threshold based on the provided QuantizationErrorMethod to quantize the tensor.
     Different search is applied, depends on the value of the selected QuantizationErrorMethod.
@@ -45,6 +50,9 @@ def power_of_two_selection_tensor(tensor_data: np.ndarray,
         n_iter: Number of iterations to search for the optimal threshold (not used for this method).
         min_threshold: Minimal threshold to use if threshold is too small (not used for this method).
         quant_error_method: an error function to optimize the parameters' selection accordingly.
+        node: The node for which the quantization error is computed (used only with HMSE error method).
+        hessian_info_service: HessianInfoService object for retrieving Hessian-based scores (used only with HMSE error method).
+        num_hessian_samples: Number of samples to approximate Hessian-based scores on (used only with HMSE error method).
     Returns:
         Power of two threshold to quantize the tensor in a power of 2 manner.
@@ -57,8 +65,10 @@ def power_of_two_selection_tensor(tensor_data: np.ndarray,
         signed = True  # weights are always signed
         axis = -1 if per_channel else None
         error_function = get_threshold_selection_tensor_error_function(QuantizationMethod.POWER_OF_TWO,
-                                                                       quant_error_method, p, axis=axis, norm=False, n_bits=n_bits,
-                                                                       signed=signed)
+                                                                       quant_error_method, p, axis=axis, norm=False,
+                                                                       n_bits=n_bits, signed=signed, node=node,
+                                                                       hessian_info_service=hessian_info_service,
+                                                                       num_hessian_samples=num_hessian_samples)
         threshold = qparams_selection_tensor_search(error_function,
                                                     tensor_data,
                                                     n_bits,

{mct-nightly-2.0.0.20240417.406 → mct-nightly-2.0.0.20240419.358}/model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py RENAMED Viewed

@@ -12,10 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+import copy
 from tqdm import tqdm
 from typing import List
+from model_compression_toolkit.constants import NUM_QPARAM_HESSIAN_SAMPLES
+from model_compression_toolkit.core import QuantizationErrorMethod
 from model_compression_toolkit.core.common import Graph, BaseNode
+from model_compression_toolkit.core.common.hessian import HessianInfoService
 from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_activations_computation \
     import get_activations_qparams
 from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_weights_computation import \
@@ -25,7 +30,9 @@ from model_compression_toolkit.logger import Logger
 def calculate_quantization_params(graph: Graph,
                                   nodes: List[BaseNode] = [],
-                                  specific_nodes: bool = False):
+                                  specific_nodes: bool = False,
+                                  hessian_info_service: HessianInfoService = None,
+                                  num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES):
     """
     For a graph, go over its nodes, compute quantization params (for both weights and activations according
     to the given framework info), and create and attach a NodeQuantizationConfig to each node (containing the
@@ -39,6 +46,8 @@ def calculate_quantization_params(graph: Graph,
         graph: Graph to compute its nodes' thresholds.
         nodes: List of nodes to compute their thresholds instead of computing it for all nodes in the graph.
         specific_nodes: Flag to compute thresholds for only specific nodes.
+        hessian_info_service: HessianInfoService object for retrieving Hessian-based scores (used only with HMSE error method).
+        num_hessian_samples: Number of samples to approximate Hessian-based scores on (used only with HMSE error method).
     """
@@ -60,10 +69,28 @@ def calculate_quantization_params(graph: Graph,
                         output_channels_axis = channels_axis[0]
                     else:
                         output_channels_axis = None
+                    mod_attr_cfg = attr_cfg
+                    if attr_cfg.weights_error_method == QuantizationErrorMethod.HMSE:
+                        kernel_attr_name = graph.fw_info.get_kernel_op_attributes(n.type)
+                        if len(kernel_attr_name) > 0:
+                            kernel_attr_name = kernel_attr_name[0]
+                        if kernel_attr_name is None or kernel_attr_name not in attr:
+                            Logger.warning(f"The HMSE error method for parameters selection is only supported for "
+                                           f"kernel weights attributes. Running parameters selection for attribute "
+                                           f"'{attr}' in node '{n.name}' with the default MSE error method instead.")
+                            mod_attr_cfg = copy.deepcopy(attr_cfg)
+                            mod_attr_cfg.weights_error_method = QuantizationErrorMethod.MSE
                     weights_params = get_weights_qparams(n.get_weights_by_keys(attr),
                                                          candidate_qc.weights_quantization_cfg,
-                                                         attr_cfg,
-                                                         output_channels_axis)
+                                                         mod_attr_cfg,
+                                                         output_channels_axis,
+                                                         node=n,
+                                                         hessian_info_service=hessian_info_service,
+                                                         num_hessian_samples=num_hessian_samples)
                     attr_cfg.set_weights_quantization_param(weights_params)
             if n.is_activation_quantization_enabled():

{mct-nightly-2.0.0.20240417.406 → mct-nightly-2.0.0.20240419.358}/model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_weights_computation.py RENAMED Viewed

@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from typing import Dict, Any, Tuple
+from typing import Dict, Any
 import numpy as np
-from model_compression_toolkit.logger import Logger
+from model_compression_toolkit.constants import NUM_QPARAM_HESSIAN_SAMPLES
+from model_compression_toolkit.core.common.hessian import HessianInfoService
 from model_compression_toolkit.defaultdict import DefaultDict
 from model_compression_toolkit.core.common.framework_info import FrameworkInfo
 from model_compression_toolkit.core.common.quantization.node_quantization_config import NodeWeightsQuantizationConfig, \
@@ -27,31 +28,40 @@ from model_compression_toolkit.core.common.quantization.node_quantization_config
 dummy_channel_mapping = DefaultDict(default_value=(None, None))
-def get_weights_qparams(kernel: np.ndarray,
+def get_weights_qparams(weights_attr_values: np.ndarray,
                         weights_quant_config: NodeWeightsQuantizationConfig,
                         attr_quant_config: WeightsAttrQuantizationConfig,
-                        output_channels_axis: int) -> Dict[Any, Any]:
+                        output_channels_axis: int,
+                        node=None,
+                        hessian_info_service: HessianInfoService = None,
+                        num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES) -> Dict[Any, Any]:
     """
     Compute thresholds to quantize a kernel according to a NodeWeightsQuantizationConfig
     instance.
     Args:
-        kernel: Kernel to compute the quantization thresholds to.
+        weights_attr_values: Weights attribute parameter to compute the quantization thresholds for.
         weights_quant_config: Weights quantization configuration to define how the thresholds are computed.
         attr_quant_config: A specific weights attribute quantization configuration to get its params.
         output_channels_axis: Index of the kernel output channels dimension.
+        node: The node for which the quantization error is computed (used only with HMSE error method).
+        hessian_info_service: HessianInfoService object for retrieving Hessian-based scores (used only with HMSE error method).
+        num_hessian_samples: Number of samples to approximate Hessian-based scores on (used only with HMSE error method).
     Returns:
         A dictionary with the quantization threshold of the kernel.
     """
     if attr_quant_config.weights_quantization_params_fn is not None:
-        weights_params = attr_quant_config.weights_quantization_params_fn(kernel,
+        weights_params = attr_quant_config.weights_quantization_params_fn(weights_attr_values,
                                                                           p=attr_quant_config.l_p_value,
                                                                           n_bits=attr_quant_config.weights_n_bits,
                                                                           per_channel=attr_quant_config.weights_per_channel_threshold and output_channels_axis is not None,
                                                                           channel_axis=output_channels_axis,
                                                                           min_threshold=weights_quant_config.min_threshold,
-                                                                          quant_error_method=attr_quant_config.weights_error_method)
+                                                                          quant_error_method=attr_quant_config.weights_error_method,
+                                                                          node=node,
+                                                                          hessian_info_service=hessian_info_service,
+                                                                          num_hessian_samples=num_hessian_samples)
     else:
         weights_params = {}

mct-nightly 2.0.0.20240417.406__tar.gz → 2.0.0.20240419.358__tar.gz

mct-nightly 2.0.0.20240417.406tar.gz → 2.0.0.20240419.358tar.gz