PyPI - mct-nightly - Versions diffs - 2.2.0.20241022.507__py3-none-any.whl → 2.2.0.20241024.501__py3-none-any.whl - Mend

mct-nightly 2.2.0.20241022.507py3-none-any.whl → 2.2.0.20241024.501py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

model_compression_toolkit/core/pytorch/hessian/hessian_scores_calculator_pytorch.py CHANGED Viewed

@@ -15,12 +15,10 @@
 from typing import Union, List
-from model_compression_toolkit.constants import HESSIAN_NUM_ITERATIONS
-from model_compression_toolkit.core.common import Graph
-from model_compression_toolkit.core.common.hessian import HessianScoresRequest
+import torch
 from model_compression_toolkit.core.common.hessian.hessian_scores_calculator import HessianScoresCalculator
 from model_compression_toolkit.logger import Logger
-import torch
 class HessianScoresCalculatorPytorch(HessianScoresCalculator):
@@ -28,28 +26,20 @@ class HessianScoresCalculatorPytorch(HessianScoresCalculator):
     Pytorch-specific implementation of the Hessian approximation scores Calculator.
     This class serves as a base for other Pytorch-specific Hessian approximation scores calculators.
     """
-    def __init__(self,
-                 graph: Graph,
-                 input_images: List[torch.Tensor],
-                 fw_impl,
-                 hessian_scores_request: HessianScoresRequest,
-                 num_iterations_for_approximation: int = HESSIAN_NUM_ITERATIONS):
+    def _generate_random_vectors_batch(self, shape: tuple, device: torch.device) -> torch.Tensor:
         """
+        Generate a batch of random vectors for Hutchinson estimation using Rademacher distribution.
         Args:
-            graph: Computational graph for the float model.
-            input_images: List of input images for the computation.
-            fw_impl: Framework-specific implementation for Hessian scores computation.
-            hessian_scores_request: Configuration request for which to compute the Hessian approximation scores.
-            num_iterations_for_approximation: Number of iterations to use when approximating the Hessian based scores.
+            shape: target shape.
+            device: target device.
+        Returns:
+            Random tensor.
         """
-        super(HessianScoresCalculatorPytorch, self).__init__(graph=graph,
-                                                             input_images=input_images,
-                                                             fw_impl=fw_impl,
-                                                             hessian_scores_request=hessian_scores_request,
-                                                             num_iterations_for_approximation=num_iterations_for_approximation)
+        v = torch.randint(high=2, size=shape, device=device)
+        v[v == 0] = -1
+        return v
     def concat_tensors(self, tensors_to_concate: Union[torch.Tensor, List[torch.Tensor]]) -> torch.Tensor:
         """

model_compression_toolkit/core/pytorch/hessian/weights_hessian_scores_calculator_pytorch.py CHANGED Viewed

@@ -12,19 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from tqdm import tqdm
 from typing import List
+import numpy as np
 import torch
 from torch import autograd
-import numpy as np
+from tqdm import tqdm
+from model_compression_toolkit.constants import HESSIAN_NUM_ITERATIONS, MIN_HESSIAN_ITER, HESSIAN_COMP_TOLERANCE
 from model_compression_toolkit.core.common import Graph
 from model_compression_toolkit.core.common.hessian import HessianScoresRequest, HessianScoresGranularity
+from model_compression_toolkit.core.pytorch.back2framework.float_model_builder import FloatPyTorchModelBuilder
+from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
 from model_compression_toolkit.core.pytorch.hessian.hessian_scores_calculator_pytorch import \
     HessianScoresCalculatorPytorch
 from model_compression_toolkit.logger import Logger
-from model_compression_toolkit.core.pytorch.back2framework.float_model_builder import FloatPyTorchModelBuilder
-from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
-from model_compression_toolkit.constants import HESSIAN_NUM_ITERATIONS, MIN_HESSIAN_ITER, HESSIAN_COMP_TOLERANCE, HESSIAN_EPS
 class WeightsHessianScoresCalculatorPytorch(HessianScoresCalculatorPytorch):
@@ -84,8 +86,8 @@ class WeightsHessianScoresCalculatorPytorch(HessianScoresCalculatorPytorch):
         prev_mean_results = None
         for j in tqdm(range(self.num_iterations_for_approximation)):
-            # Getting a random vector with normal distribution and the same shape as the model output
-            v = torch.randn_like(output_tensor, device=device)
+            # Getting a random vector with the same shape as the model output
+            v = self._generate_random_vectors_batch(output_tensor.shape, device=device)
             f_v = torch.mean(torch.sum(v * output_tensor, dim=-1))
             for i, ipt_node in enumerate(self.hessian_request.target_nodes):  # Per Interest point weights tensor

model_compression_toolkit/core/pytorch/pytorch_implementation.py CHANGED Viewed

@@ -15,7 +15,7 @@
 import operator
 from copy import deepcopy
 from functools import partial
-from typing import List, Any, Tuple, Callable, Type, Dict
+from typing import List, Any, Tuple, Callable, Type, Dict, Generator
 import numpy as np
 import torch
@@ -38,6 +38,7 @@ from model_compression_toolkit.core.common.model_builder_mode import ModelBuilde
 from model_compression_toolkit.core.common.node_prior_info import NodePriorInfo
 from model_compression_toolkit.core.common.similarity_analyzer import compute_mse, compute_kl_divergence, compute_cs
 from model_compression_toolkit.core.pytorch.back2framework import get_pytorch_model_builder
+from model_compression_toolkit.core.pytorch.data_util import data_gen_to_dataloader
 from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
 from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.batchnorm_folding import \
     pytorch_batchnorm_folding, pytorch_batchnorm_forward_folding
@@ -563,4 +564,9 @@ class PytorchImplementation(FrameworkImplementation):
         return get_inferable_quantizers(node,
                                         get_weights_quantizer_for_node,
                                         get_activations_quantizer_for_node,
-                                        node.get_node_weights_attributes())
+                                        node.get_node_weights_attributes())
+    @staticmethod
+    def convert_data_gen_to_dataloader(data_gen_fn: Callable[[], Generator], batch_size: int):
+        """ Converts data generator into framework dataloader with arbitrary batch size. """
+        return data_gen_to_dataloader(data_gen_fn, batch_size=batch_size)

model_compression_toolkit/core/pytorch/utils.py CHANGED Viewed

@@ -15,7 +15,7 @@
 import torch
 from torch import Tensor
 import numpy as np
-from typing import Union
+from typing import Union, Sequence, Optional, List, Tuple
 from model_compression_toolkit.core.pytorch.constants import MAX_FLOAT16, MIN_FLOAT16
 from model_compression_toolkit.core.pytorch.pytorch_device_config import get_working_device
@@ -41,30 +41,33 @@ def set_model(model: torch.nn.Module, train_mode: bool = False):
     model.to(device)
-def to_torch_tensor(tensor,
-                    numpy_type=np.float32):
+def to_torch_tensor(data,
+                    dtype: Optional = torch.float32) -> Union[Tensor, List[Tensor], Tuple[Tensor]]:
+    # TODO it would make more sense to keep the original type by default but it will break lots of existing calls
+    # that count on implicit convertion
     """
-    Convert a Numpy array to a Torch tensor.
+    Convert data to Torch tensors and move to the working device.
+    Data can be numpy or torch tensor, a scalar, or a list or a tuple of such data. In the latter case only the inner
+    data is converted.
     Args:
-        tensor: Numpy array.
-        numpy_type: The desired data type for the tensor. Default is np.float32.
+        data: Input data
+        dtype: The desired data type for the tensor. Pass None to keep the type of the input data.
     Returns:
-        Torch tensor converted from the input Numpy array.
+        Torch tensor
     """
     working_device = get_working_device()
-    if isinstance(tensor, torch.Tensor):
-        return tensor.to(working_device)
-    elif isinstance(tensor, list):
-        return [to_torch_tensor(t) for t in tensor]
-    elif isinstance(tensor, tuple):
-        return (to_torch_tensor(t) for t in tensor)
-    elif isinstance(tensor, np.ndarray):
-        return torch.from_numpy(tensor.astype(numpy_type)).to(working_device)
-    elif isinstance(tensor, (int, float)):
-        return torch.from_numpy(np.array(tensor).astype(numpy_type)).to(working_device)
-    else:
-        Logger.critical(f'Unsupported type for conversion to Torch.tensor: {type(tensor)}.')
+    if isinstance(data, list):
+        return [to_torch_tensor(t, dtype) for t in data]
+    if isinstance(data, tuple):
+        return tuple(to_torch_tensor(t, dtype) for t in data)
+    kwargs = {} if dtype is None else {'dtype': dtype}
+    return torch.as_tensor(data, device=working_device, **kwargs)
 def torch_tensor_to_numpy(tensor: Union[torch.Tensor, list, tuple]) -> Union[np.ndarray, list, tuple]:

model_compression_toolkit/core/quantization_prep_runner.py CHANGED Viewed

@@ -90,7 +90,8 @@ def quantization_preparation_runner(graph: Graph,
     # Calculate quantization params
     ######################################
-    calculate_quantization_params(graph, hessian_info_service=hessian_info_service)
+    calculate_quantization_params(graph, fw_impl=fw_impl, repr_data_gen_fn=representative_data_gen,
+                                  hessian_info_service=hessian_info_service)
     if tb_w is not None:
         tb_w.add_graph(graph, 'thresholds_selection')

model_compression_toolkit/core/runner.py CHANGED Viewed

@@ -122,8 +122,7 @@ def core_runner(in_model: Any,
                                      mixed_precision_enable=core_config.is_mixed_precision_enabled,
                                      running_gptq=running_gptq)
-    hessian_info_service = HessianInfoService(graph=graph, representative_dataset_gen=representative_data_gen,
-                                              fw_impl=fw_impl)
+    hessian_info_service = HessianInfoService(graph=graph, fw_impl=fw_impl)
     tg = quantization_preparation_runner(graph=graph,
                                          representative_data_gen=representative_data_gen,

model_compression_toolkit/gptq/common/gptq_config.py CHANGED Viewed

@@ -17,7 +17,6 @@ from enum import Enum
 from typing import Callable, Any, Dict, Optional
 from model_compression_toolkit.constants import GPTQ_HESSIAN_NUM_SAMPLES, ACT_HESSIAN_DEFAULT_BATCH_SIZE
-from model_compression_toolkit.core.common.hessian import HessianScoresGranularity, HessianEstimationDistribution
 from model_compression_toolkit.gptq.common.gptq_constants import REG_DEFAULT
@@ -54,7 +53,6 @@ class GPTQHessianScoresConfig:
     scale_log_norm: bool = False
     hessian_batch_size: int = ACT_HESSIAN_DEFAULT_BATCH_SIZE
     per_sample: bool = False
-    estimator_distribution: HessianEstimationDistribution = HessianEstimationDistribution.GAUSSIAN
 @dataclass

model_compression_toolkit/gptq/common/gptq_training.py CHANGED Viewed

@@ -13,23 +13,21 @@
 # limitations under the License.
 # ==============================================================================
 import copy
-import hashlib
 from abc import ABC, abstractmethod
+from typing import Callable, List, Any, Iterable, Optional, Generator
 import numpy as np
-from typing import Callable, List, Any, Dict
-from model_compression_toolkit.constants import ACT_HESSIAN_DEFAULT_BATCH_SIZE
-from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig
-from model_compression_toolkit.core.common import Graph, BaseNode
+from model_compression_toolkit.core.common import Graph
 from model_compression_toolkit.core.common.framework_info import FrameworkInfo
+from model_compression_toolkit.core.common.hessian import HessianInfoService, HessianScoresRequest, HessianMode, \
+    HessianScoresGranularity, hessian_info_utils as hessian_utils
+from model_compression_toolkit.core.common.model_builder_mode import ModelBuilderMode
+from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig
 from model_compression_toolkit.gptq.common.gptq_constants import QUANT_PARAM_LEARNING_STR
 from model_compression_toolkit.gptq.common.gptq_framework_implementation import GPTQFrameworkImplemantation
 from model_compression_toolkit.gptq.common.gptq_graph import get_compare_points
-from model_compression_toolkit.core.common.model_builder_mode import ModelBuilderMode
 from model_compression_toolkit.logger import Logger
-from model_compression_toolkit.core.common.hessian import HessianInfoService, HessianScoresRequest, HessianMode, \
-    HessianScoresGranularity
-from model_compression_toolkit.core.common.hessian import hessian_info_utils as hessian_utils
 class GPTQTrainer(ABC):
@@ -43,6 +41,7 @@ class GPTQTrainer(ABC):
                  gptq_config: GradientPTQConfig,
                  fw_impl: GPTQFrameworkImplemantation,
                  fw_info: FrameworkInfo,
+                 representative_data_gen_fn: Callable[[], Generator],
                  hessian_info_service: HessianInfoService = None):
         """
         Build two models from a graph: A teacher network (float model) and a student network (quantized model).
@@ -56,6 +55,7 @@ class GPTQTrainer(ABC):
             gptq_config: GradientPTQConfig with parameters about the tuning process.
             fw_impl: Framework implementation
             fw_info: Framework information
+            representative_data_gen_fn: factory for representative data generator.
             hessian_info_service: HessianInfoService for fetching and computing Hessian-approximation information.
         """
         self.graph_float = copy.deepcopy(graph_float)
@@ -63,7 +63,7 @@ class GPTQTrainer(ABC):
         self.gptq_config = gptq_config
         self.fw_impl = fw_impl
         self.fw_info = fw_info
+        self.representative_data_gen_fn = representative_data_gen_fn
         # ----------------------------------------------
         # Build two models and create compare nodes
         # ----------------------------------------------
@@ -131,124 +131,69 @@ class GPTQTrainer(ABC):
         return optimizer_with_param
-    def compute_hessian_based_weights(self) -> np.ndarray:
+    def compute_hessian_based_weights(self, data_loader: Iterable) -> np.ndarray:
         """
         Computes scores based on the hessian approximation per layer w.r.t activations of the interest points.
         Returns:
             np.ndarray: Scores based on the hessian matrix approximation.
         """
-        if not self.gptq_config.use_hessian_based_weights:
-            # Return a default weight distribution based on the number of compare points
-            num_nodes = len(self.compare_points)
-            return np.asarray([1 / num_nodes for _ in range(num_nodes)])
-        # Fetch hessian approximations for each target node
-        # TODO this smells like a potential bug. In hessian calculation target nodes are topo sorted and results are returned
-        # TODO also target nodes are replaced for reuse. Does this work correctly?
-        approximations = self._fetch_hessian_approximations(HessianScoresGranularity.PER_TENSOR)
-        compare_point_to_hessian_approx_scores = {node: score for node, score in zip(self.compare_points, approximations)}
-        # Process the fetched hessian approximations to gather them per images
-        hessian_approx_score_by_image = (
-            self._process_hessian_approximations(compare_point_to_hessian_approx_scores))
-        # Check if log normalization is enabled in the configuration
-        if self.gptq_config.hessian_weights_config.log_norm:
-            # Calculate the mean of the approximations across images
-            mean_approx_scores = np.mean(hessian_approx_score_by_image, axis=0)
-            # Reduce unnecessary dims, should remain with one dimension for the number of nodes
-            mean_approx_scores = np.squeeze(mean_approx_scores)
-            # Handle zero values to avoid log(0)
-            mean_approx_scores = np.where(mean_approx_scores != 0, mean_approx_scores,
-                                          np.partition(mean_approx_scores, 1)[1])
-            # Calculate log weights
-            log_weights = np.log10(mean_approx_scores)
-            # Check if scaling of log normalization is enabled in the configuration
-            if self.gptq_config.hessian_weights_config.scale_log_norm:
-                # Scale the log weights to the range [0, 1]
-                return (log_weights - np.min(log_weights)) / (np.max(log_weights) - np.min(log_weights))
-            # Offset the log weights so the minimum value is 0
-            return log_weights - np.min(log_weights)
-        else:
-            # If log normalization is not enabled, return the mean of the approximations across images
-            return np.mean(hessian_approx_score_by_image, axis=0)
-    def _compute_sample_layer_attention_scores(self, inputs_batch) -> Dict[str, Dict[BaseNode, np.ndarray]]:
-        """
-        Compute sample layer attention scores per image hash per layer.
+        request = self._build_hessian_request(
+            HessianScoresGranularity.PER_TENSOR,
+            data_loader=data_loader,
+            n_samples=self.gptq_config.hessian_weights_config.hessians_num_samples
+        )
+        layers_hessians = self.hessian_service.fetch_hessian(request)
-        Args:
-            inputs_batch: a list containing a batch of inputs.
+        hessian_approx_score_by_image = np.stack([layers_hessians[node.name] for node in self.compare_points], axis=1)
+        assert hessian_approx_score_by_image.shape[0] == self.gptq_config.hessian_weights_config.hessians_num_samples
-        Returns:
-            A dictionary with a structure {img_hash: {layer: score}}.
+        if self.gptq_config.hessian_weights_config.norm_scores:
+            hessian_approx_score_by_image = hessian_utils.normalize_scores(hessian_approx_score_by_image)
-        """
-        request = self._build_hessian_request(HessianScoresGranularity.PER_OUTPUT_CHANNEL)
-        hessian_batch_size = self.gptq_config.hessian_weights_config.hessian_batch_size
-        hessian_score_per_image_per_layer = {}
-        # If hessian batch is smaller than inputs batch, split it to hessian batches. If hessian batch is larger,
-        # it's currently ignored (TODO)
-        for i in range(0, inputs_batch[0].shape[0], hessian_batch_size):
-            inputs = [t[i: i+hessian_batch_size] for t in inputs_batch]
-            hessian_score_per_image_per_layer.update(
-                self.hessian_service.compute_trackable_per_sample_hessian(request, inputs)
-            )
-        for img_hash, v in hessian_score_per_image_per_layer.items():
-            hessian_score_per_image_per_layer[img_hash] = {k: t.max(axis=0) for k, t in v.items()}
-        return hessian_score_per_image_per_layer
-    def _fetch_hessian_approximations(self, granularity: HessianScoresGranularity) -> Dict[BaseNode, List[List[float]]]:
-        """
-        Fetches hessian approximations for each target node.
+        # Calculate the mean of the approximations across images
+        mean_approx_scores = np.mean(hessian_approx_score_by_image, axis=0)
+        # assert len(mean_approx_scores.shape) == len(self.compare_points)
-        Returns:
-            Mapping of target nodes to their hessian approximations.
-        """
-        hessian_scores_request = self._build_hessian_request(granularity)
+        if not self.gptq_config.hessian_weights_config.log_norm:
+            return mean_approx_scores
-        node_approximations = self.hessian_service.fetch_hessian(
-            hessian_scores_request=hessian_scores_request,
-            required_size=self.gptq_config.hessian_weights_config.hessians_num_samples,
-            batch_size=self.gptq_config.hessian_weights_config.hessian_batch_size
-        )
-        return node_approximations
+        # Reduce unnecessary dims, should remain with one dimension for the number of nodes
+        mean_approx_scores = np.squeeze(mean_approx_scores)
+        # Handle zero values to avoid log(0)
+        mean_approx_scores = np.where(mean_approx_scores != 0, mean_approx_scores,
+                                      np.partition(mean_approx_scores, 1)[1])
-    def _build_hessian_request(self, granularity):
-        return HessianScoresRequest(
-            mode=HessianMode.ACTIVATION,
-            granularity=granularity,
-            target_nodes=self.compare_points,
-            distribution=self.gptq_config.hessian_weights_config.estimator_distribution
-        )
+        # Calculate log weights
+        log_weights = np.log10(mean_approx_scores)
-    def _process_hessian_approximations(self, approximations: Dict[BaseNode, List[List[float]]]) -> List:
+        if self.gptq_config.hessian_weights_config.scale_log_norm:
+            # Scale the log weights to the range [0, 1]
+            return (log_weights - np.min(log_weights)) / (np.max(log_weights) - np.min(log_weights))
+        # Offset the log weights so the minimum value is 0
+        return log_weights - np.min(log_weights)
+    def _build_hessian_request(self, granularity: HessianScoresGranularity, data_loader: Iterable,
+                               n_samples: Optional[int]) -> HessianScoresRequest:
         """
-        Processes the fetched hessian approximations by image.
-        Receives a dictionary of Node to a list of the length of the number of images that were fetched.
-        Returns list of lists where each inner list is the approximations per image to all interest points.
+        Build hessian request for hessian service.
         Args:
-            approximations: Hessian scores approximations mapping to process.
-            Dictionary of Node to a list of the length of the number of images that were fetched.
+            granularity: requested granularity.
+            data_loader: data loader yielding samples to compute hessians on.
+            n_samples: request number of samples.
         Returns:
-            Processed approximations as a list of lists where each inner list is the approximations
-             per image to all interest points.
+            Hessian request.
         """
-        hessian_approx_score_by_image = [[approximations[target_node][image_idx] for target_node in self.compare_points]
-                                         for image_idx in
-                                         range(self.gptq_config.hessian_weights_config.hessians_num_samples)]
-        if self.gptq_config.hessian_weights_config.norm_scores:
-            hessian_approx_score_by_image = hessian_utils.normalize_scores(hessian_approx_score_by_image)
-        return hessian_approx_score_by_image
+        return HessianScoresRequest(
+            mode=HessianMode.ACTIVATION,
+            granularity=granularity,
+            target_nodes=self.compare_points,
+            data_loader=data_loader,
+            n_samples=n_samples
+        )
     @abstractmethod
     def build_gptq_model(self):
@@ -261,11 +206,9 @@ class GPTQTrainer(ABC):
                              f'framework\'s GPTQ model builder method.')  # pragma: no cover
     @abstractmethod
-    def train(self, representative_data_gen: Callable):
+    def train(self):
         """
-        Train the quantized model using GPTQ training process
-        Args:
-            representative_data_gen: Dataset to use for inputs of the models.
+        Train the quantized model using GPTQ training process.
         """
         raise NotImplemented(f'{self.__class__.__name__} have to implement the '
                              f'framework\'s train method.')  # pragma: no cover
@@ -281,6 +224,7 @@ class GPTQTrainer(ABC):
         raise NotImplemented(f'{self.__class__.__name__} have to implement the '
                              f'framework\'s update_graph method.')  # pragma: no cover
 def gptq_training(graph_float: Graph,
                   graph_quant: Graph,
                   gptq_config: GradientPTQConfig,
@@ -315,7 +259,7 @@ def gptq_training(graph_float: Graph,
                                     hessian_info_service=hessian_info_service)
     # Training process
-    gptq_trainer.train(representative_data_gen)
+    gptq_trainer.train()
     # Update graph
     graph_quant = gptq_trainer.update_graph()

model_compression_toolkit/gptq/keras/gptq_training.py CHANGED Viewed

@@ -24,6 +24,7 @@ from model_compression_toolkit.core.common.hessian import HessianInfoService
 # As from Tensorflow 2.6, keras is a separate package and some classes should be imported differently.
 from model_compression_toolkit.core.common.user_info import UserInformation
 from model_compression_toolkit.core.keras.back2framework.keras_model_builder import KerasModelBuilder
+from model_compression_toolkit.core.keras.data_util import data_gen_to_dataloader
 from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribute_name_for_gptq
 from model_compression_toolkit.gptq.keras.quantizer.quantization_builder import quantization_builder
 from model_compression_toolkit.logger import Logger
@@ -82,6 +83,7 @@ class KerasGPTQTrainer(GPTQTrainer):
                          gptq_config,
                          fw_impl,
                          fw_info,
+                         representative_data_gen_fn=representative_data_gen,
                          hessian_info_service=hessian_info_service)
         self.loss_list = []
@@ -115,10 +117,20 @@ class KerasGPTQTrainer(GPTQTrainer):
         else:
             self.input_scale = self.gptq_user_info.input_scale
-        self.weights_for_average_loss = self.compute_hessian_based_weights()
+        self.weights_for_average_loss = self._get_compare_points_loss_weights()
         self.reg_func = get_regularization(self.gptq_config, representative_data_gen)
+    def _get_compare_points_loss_weights(self):
+        """ Get compare points weights for the distillation loss. """
+        if self.gptq_config.use_hessian_based_weights:
+            hess_dataloader = data_gen_to_dataloader(self.representative_data_gen_fn,
+                                                     batch_size=self.gptq_config.hessian_weights_config.hessian_batch_size)
+            return self.compute_hessian_based_weights(hess_dataloader)
+        num_nodes = len(self.compare_points)
+        return np.ones((num_nodes,)) / num_nodes
     def _is_gptq_weights_trainable(self,
                                    node: common.BaseNode) -> bool:
         """
@@ -182,7 +194,6 @@ class KerasGPTQTrainer(GPTQTrainer):
                         f"but {len(activation_quantizers)} quantizers were found for node '{n}'. "
                         f"Ensure only one quantizer is configured for each node's activation.")
     def build_gptq_model(self) -> Tuple[Model, UserInformation]:
         """
         Build the GPTQ model with QuantizationWrappers
@@ -243,11 +254,9 @@ class KerasGPTQTrainer(GPTQTrainer):
             i += len(p)
         return loss_value, res
-    def train(self, representative_data_gen: Callable):
+    def train(self):
         """
         Train the quantized model using GPTQ training process in Keras framework
-        Args:
-            representative_data_gen: Dataset to use for inputs of the models.
         """
         compute_gradients = self.compute_gradients
@@ -255,7 +264,7 @@ class KerasGPTQTrainer(GPTQTrainer):
         # Training loop
         # ----------------------------------------------
         if self.has_params_to_train:
-            self.micro_training_loop(representative_data_gen,
+            self.micro_training_loop(self.representative_data_gen_fn,
                                      compute_gradients,
                                      self.optimizer_with_param,
                                      self.gptq_config.n_epochs,

model_compression_toolkit/gptq/pytorch/gptq_loss.py CHANGED Viewed

@@ -79,7 +79,7 @@ def sample_layer_attention_loss(y_list: List[torch.Tensor],
         y_list: First list of tensors.
         x_list: Second list of tensors.
         fxp_w_list, flp_w_list, act_bn_mean, act_bn_std: unused (needed to comply with the interface).
-        loss_weights: layer-sample weights tensor of shape (layers, batch)
+        loss_weights: layer-sample weights tensor of shape (batch X layers)
     Returns:
         Sample Layer Attention loss (a scalar).
@@ -87,10 +87,11 @@ def sample_layer_attention_loss(y_list: List[torch.Tensor],
     loss = 0
     layers_mean_w = []
-    for i, (y, x, w) in enumerate(zip(y_list, x_list, loss_weights)):
+    for i, (y, x) in enumerate(zip(y_list, x_list)):
         norm = (y - x).pow(2).sum(1)
         if len(norm.shape) > 1:
             norm = norm.flatten(1).mean(1)
+        w = loss_weights[:, i]
         loss += torch.mean(w * norm)
         layers_mean_w.append(w.mean())

mct-nightly 2.2.0.20241022.507__py3-none-any.whl → 2.2.0.20241024.501__py3-none-any.whl

mct-nightly 2.2.0.20241022.507py3-none-any.whl → 2.2.0.20241024.501py3-none-any.whl