PyPI - mct-nightly - Versions diffs - 2.2.0.20240916.525__tar.gz → 2.2.0.20240918.448__tar.gz - Mend

mct-nightly 2.2.0.20240916.525tar.gz → 2.2.0.20240918.448tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (564) hide show

{mct-nightly-2.2.0.20240916.525 → mct-nightly-2.2.0.20240918.448}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mct-nightly
-Version: 2.2.0.20240916.525
+Version: 2.2.0.20240918.448
 Summary: A Model Compression Toolkit for neural networks
 Home-page: UNKNOWN
 License: UNKNOWN

{mct-nightly-2.2.0.20240916.525 → mct-nightly-2.2.0.20240918.448}/mct_nightly.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mct-nightly
-Version: 2.2.0.20240916.525
+Version: 2.2.0.20240918.448
 Summary: A Model Compression Toolkit for neural networks
 Home-page: UNKNOWN
 License: UNKNOWN

{mct-nightly-2.2.0.20240916.525 → mct-nightly-2.2.0.20240918.448}/mct_nightly.egg-info/SOURCES.txt RENAMED Viewed

@@ -383,6 +383,7 @@ model_compression_toolkit/gptq/pytorch/graph_info.py
 model_compression_toolkit/gptq/pytorch/quantization_facade.py
 model_compression_toolkit/gptq/pytorch/quantizer/__init__.py
 model_compression_toolkit/gptq/pytorch/quantizer/base_pytorch_gptq_quantizer.py
+model_compression_toolkit/gptq/pytorch/quantizer/gradual_activation_quantization.py
 model_compression_toolkit/gptq/pytorch/quantizer/quant_utils.py
 model_compression_toolkit/gptq/pytorch/quantizer/quantization_builder.py
 model_compression_toolkit/gptq/pytorch/quantizer/regularization_factory.py
@@ -513,8 +514,10 @@ model_compression_toolkit/trainable_infrastructure/keras/load_model.py
 model_compression_toolkit/trainable_infrastructure/keras/quantize_wrapper.py
 model_compression_toolkit/trainable_infrastructure/keras/quantizer_utils.py
 model_compression_toolkit/trainable_infrastructure/pytorch/__init__.py
+model_compression_toolkit/trainable_infrastructure/pytorch/annealing_schedulers.py
 model_compression_toolkit/trainable_infrastructure/pytorch/base_pytorch_quantizer.py
 model_compression_toolkit/trainable_infrastructure/pytorch/quantizer_utils.py
+model_compression_toolkit/trainable_infrastructure/pytorch/util.py
 model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/__init__.py
 model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/base_activation_quantizer.py
 model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/lsq/__init__.py
@@ -548,4 +551,11 @@ model_compression_toolkit/xquant/pytorch/facade_xquant_report.py
 model_compression_toolkit/xquant/pytorch/model_analyzer.py
 model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py
 model_compression_toolkit/xquant/pytorch/similarity_functions.py
-model_compression_toolkit/xquant/pytorch/tensorboard_utils.py
+model_compression_toolkit/xquant/pytorch/tensorboard_utils.py
+tests_pytest/__init__.py
+tests_pytest/pytorch/__init__.py
+tests_pytest/pytorch/gptq/__init__.py
+tests_pytest/pytorch/gptq/test_annealing_cfg.py
+tests_pytest/pytorch/gptq/test_gradual_act_quantization.py
+tests_pytest/pytorch/trainable_infrastructure/__init__.py
+tests_pytest/pytorch/trainable_infrastructure/test_linear_annealing.py

{mct-nightly-2.2.0.20240916.525 → mct-nightly-2.2.0.20240918.448}/mct_nightly.egg-info/top_level.txt RENAMED Viewed

	@@ -1 +1,2 @@
1 1	model_compression_toolkit
2	+ tests_pytest

{mct-nightly-2.2.0.20240916.525 → mct-nightly-2.2.0.20240918.448}/model_compression_toolkit/__init__.py RENAMED Viewed

@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
 from model_compression_toolkit import pruning
 from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
-__version__ = "2.2.0.20240916.000525"
+__version__ = "2.2.0.20240918.000448"

{mct-nightly-2.2.0.20240916.525 → mct-nightly-2.2.0.20240918.448}/model_compression_toolkit/core/common/graph/base_node.py RENAMED Viewed

@@ -40,6 +40,7 @@ class BaseNode:
                  layer_class: type,
                  reuse: bool = False,
                  reuse_group: str = None,
+                 inputs_as_list: bool = False,
                  quantization_attr: Dict[str, Any] = None,
                  has_activation: bool = True,
                  is_custom: bool = False
@@ -58,6 +59,7 @@ class BaseNode:
             layer_class: Class path of the layer this node represents.
             reuse: Whether this node was duplicated and represents a reused layer.
             reuse_group: Name of group of nodes from the same reused layer.
+            inputs_as_list: Whether to pass the node its input tensors as a list or not when calling the layer.
             quantization_attr: Attributes the node holds regarding how it should be quantized.
             has_activation: Whether the node has activations that we might want to quantize.
             is_custom: Whether the node is custom layer or not.
@@ -71,6 +73,7 @@ class BaseNode:
         self.layer_class = layer_class
         self.reuse = reuse
         self.reuse_group = reuse_group
+        self.inputs_as_list = inputs_as_list
         self.final_weights_quantization_cfg = None
         self.final_activation_quantization_cfg = None
         self.candidates_quantization_cfg = None

{mct-nightly-2.2.0.20240916.525 → mct-nightly-2.2.0.20240918.448}/model_compression_toolkit/core/common/graph/functional_node.py RENAMED Viewed

@@ -55,13 +55,13 @@ class FunctionalNode(BaseNode):
                          layer_class,
                          reuse,
                          reuse_group,
+                         inputs_as_list,
                          quantization_attr,
                          has_activation=has_activation)
         self.op_call_kwargs = op_call_kwargs
         self.op_call_args = list(op_call_args)
         self.functional_op = functional_op
-        self.inputs_as_list = inputs_as_list
         self.tensor_input_allocs = [] if tensor_input_allocs is None else tensor_input_allocs
     @property

{mct-nightly-2.2.0.20240916.525 → mct-nightly-2.2.0.20240918.448}/model_compression_toolkit/core/keras/back2framework/keras_model_builder.py RENAMED Viewed

@@ -308,7 +308,7 @@ class KerasModelBuilder(BaseModelBuilder):
             else:
                 # If operator expects a single input tensor, it cannot be a list as it should
                 # have a dtype field.
-                if len(input_tensors) == 1:
+                if len(input_tensors) == 1 and not n.inputs_as_list:
                     input_tensors = input_tensors[0]
                 out_tensors_of_n_float = op_func(input_tensors)

{mct-nightly-2.2.0.20240916.525 → mct-nightly-2.2.0.20240918.448}/model_compression_toolkit/core/keras/reader/node_builder.py RENAMED Viewed

@@ -30,10 +30,12 @@ if version.parse(tf.__version__) >= version.parse("2.13"):
     from keras.src.layers.core import TFOpLambda, SlicingOpLambda
     from keras.src.engine.keras_tensor import KerasTensor
     from keras.src.engine.node import Node as KerasNode
+    from keras.src.layers.merging.base_merge import _Merge
 else:
     from keras.layers.core import TFOpLambda, SlicingOpLambda
     from keras.engine.keras_tensor import KerasTensor
     from keras.engine.node import Node as KerasNode
+    from keras.layers.merging.base_merge import _Merge
 from model_compression_toolkit.core.common.graph.base_node import BaseNode
 from model_compression_toolkit.core.common.graph.functional_node import FunctionalNode
@@ -287,6 +289,7 @@ def build_node(node: KerasNode,
             for i, arg in enumerate(op_call_args[0]):
                 if is_const(arg):
                     weights.update({i: to_numpy(arg, is_single_tensor=True)})
+        inputs_as_list = __is_node_inputs_a_list(op_call_args, keras_layer)
         node = BaseNode(node_name,
                         layer_config,
@@ -296,6 +299,7 @@ def build_node(node: KerasNode,
                         layer_class,
                         is_reused,
                         reuse_group,
+                        inputs_as_list,
                         is_custom=is_keras_custom_layer(layer_class))
     node_name_to_node[node_name] = node
@@ -316,6 +320,24 @@ def __is_functional_inputs_a_list(op_call_args: Any, keras_layer: Any) -> bool:
     """
     return (keras_layer.symbol in
-            [TFOpLambda(tf.concat).symbol, TFOpLambda(tf.stack).symbol,TFOpLambda(tf.add_n).symbol] and
+            [TFOpLambda(tf.concat).symbol, TFOpLambda(tf.stack).symbol, TFOpLambda(tf.add_n).symbol] and
             len(op_call_args) > 0 and
             isinstance(op_call_args[0], list))
+def __is_node_inputs_a_list(op_call_args: Any, keras_layer: Any) -> bool:
+    """
+    Check whether the input tensors should be passed as a list or not. This is relevant
+    only for layers that inherit from _Merge such as Concatenate and Add.
+    Args:
+        op_call_args: Arguments list to check.
+        keras_layer: Keras layer.
+    Returns:
+        Whether the input tensors should be passed as a list or not.
+    """
+    return (isinstance(keras_layer, _Merge) and
+            len(op_call_args) > 0 and
+            isinstance(op_call_args[0], (list, tuple)))

{mct-nightly-2.2.0.20240916.525 → mct-nightly-2.2.0.20240918.448}/model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py RENAMED Viewed

@@ -139,7 +139,11 @@ def _run_operation(n: BaseNode,
         _tensor_input_allocs = None
     if isinstance(n, FunctionalNode) and n.inputs_as_list:
-        out_tensors_of_n_float = op_func(input_tensors, *op_call_args, **functional_kwargs)
+        if isinstance(op_func, PytorchQuantizationWrapper):
+            # in wrapped nodes, the op args & kwargs are already in the PytorchQuantizationWrapper.
+            out_tensors_of_n_float = op_func(*input_tensors)
+        else:
+            out_tensors_of_n_float = op_func(input_tensors, *op_call_args, **functional_kwargs)
     else:
         merged_inputs, functional_kwargs = _merge_inputs(n, input_tensors, op_call_args, functional_kwargs.copy(),
                                                          tensor_input_allocs=_tensor_input_allocs)

{mct-nightly-2.2.0.20240916.525 → mct-nightly-2.2.0.20240918.448}/model_compression_toolkit/core/pytorch/reader/graph_builders.py RENAMED Viewed

@@ -232,10 +232,19 @@ def nodes_builder(model: GraphModule,
         # Add constants to weights dictionary.
         if node.op != PLACEHOLDER:
-            for i, input_node in enumerate(node.all_input_nodes):
-                if input_node in consts_dict:
-                    used_consts.add(input_node)
-                    weights.update({i: consts_dict[input_node]})
+            if len(node.args) and isinstance(node.args[0], (list, tuple)):
+                # handle weights in nodes with list input. Especially when there's a duplicate of a tensor
+                # in the input list (e.g. torch.concat([const1, x, const2, x, const3], 1)).
+                for input_node in node.all_input_nodes:
+                    for i, input_arg in enumerate(node.args[0]):
+                        if input_node is input_arg and input_node in consts_dict:
+                            used_consts.add(input_node)
+                            weights.update({i: consts_dict[input_node]})
+            else:
+                for i, input_node in enumerate(node.all_input_nodes):
+                    if input_node in consts_dict:
+                        used_consts.add(input_node)
+                        weights.update({i: consts_dict[input_node]})
         # Extract input and output shapes of the node.
         input_shape, output_shape = _extract_input_and_output_shapes(node)

{mct-nightly-2.2.0.20240916.525 → mct-nightly-2.2.0.20240918.448}/model_compression_toolkit/exporter/model_wrapper/keras/builder/fully_quantized_model_builder.py RENAMED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
-from typing import Tuple, Callable
+from typing import Tuple, Callable, Union
 from model_compression_toolkit.core import common
 from model_compression_toolkit.core.common import Graph
 from model_compression_toolkit.verify_packages import FOUND_TF
@@ -25,10 +25,12 @@ if FOUND_TF:
     import tensorflow as tf
     from tensorflow.keras.layers import Layer
     from model_compression_toolkit.core.keras.back2framework.keras_model_builder import KerasModelBuilder
+    from model_compression_toolkit.core.common.graph.functional_node import FunctionalNode
     from mct_quantizers import KerasQuantizationWrapper
     from mct_quantizers import KerasActivationQuantizationHolder
+    from mct_quantizers.common.constants import OP_CALL_ARGS, OP_CALL_KWARGS
-    def _get_wrapper(node: common.BaseNode,
+    def _get_wrapper(node: Union[common.BaseNode, FunctionalNode],
                      layer: Layer,
                      fw_impl=None) -> Layer:
         """
@@ -45,9 +47,16 @@ if FOUND_TF:
             # for positional weights we need to extract the weight's value.
             weights_values = {attr: node.get_weights_by_keys(attr)
                               for attr in weights_quantizers if isinstance(attr, int)}
+            # When wrapping functional nodes, need to set call args\kwargs in wrapper, because they
+            # are used during wrapper call method.
+            func_node_kwargs = {OP_CALL_ARGS: node.op_call_args,
+                                OP_CALL_KWARGS: node.op_call_kwargs
+                                } if isinstance(node, FunctionalNode) else {}
             return KerasQuantizationWrapper(layer,
                                             weights_quantizers,
-                                            weights_values)
+                                            weights_values,
+                                            is_inputs_as_list=node.inputs_as_list,
+                                            **func_node_kwargs)
         return layer

{mct-nightly-2.2.0.20240916.525 → mct-nightly-2.2.0.20240918.448}/model_compression_toolkit/exporter/model_wrapper/pytorch/builder/fully_quantized_model_builder.py RENAMED Viewed

@@ -24,7 +24,9 @@ import model_compression_toolkit.core as C
 if FOUND_TORCH:
     import torch
     from mct_quantizers import PytorchQuantizationWrapper, PytorchActivationQuantizationHolder
+    from mct_quantizers.common.constants import OP_CALL_ARGS, OP_CALL_KWARGS
     from model_compression_toolkit.core.pytorch.back2framework.pytorch_model_builder import PyTorchModelBuilder
+    from model_compression_toolkit.core.common.graph.functional_node import FunctionalNode
     def fully_quantized_wrapper(node: common.BaseNode,
@@ -46,7 +48,14 @@ if FOUND_TORCH:
             # for positional weights we need to extract the weight's value.
             weights_values = {attr: fw_impl.to_tensor(node.get_weights_by_keys(attr))
                               for attr in weight_quantizers if isinstance(attr, int)}
-            return PytorchQuantizationWrapper(module, weight_quantizers, weights_values)
+            # When wrapping functional nodes, need to set call args\kwargs in wrapper, because they
+            # are used during wrapper call method.
+            func_node_kwargs = {OP_CALL_ARGS: node.op_call_args,
+                                OP_CALL_KWARGS: node.op_call_kwargs
+                                } if isinstance(node, FunctionalNode) else {}
+            return PytorchQuantizationWrapper(module, weight_quantizers, weights_values,
+                                              is_inputs_as_list=node.inputs_as_list,
+                                              **func_node_kwargs)
         return module

mct-nightly-2.2.0.20240918.448/model_compression_toolkit/gptq/__init__.py ADDED Viewed

@@ -0,0 +1,32 @@
+# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from model_compression_toolkit.gptq.common.gptq_config import (
+    GradientPTQConfig,
+    RoundingType,
+    GPTQHessianScoresConfig,
+    GradualActivationQuantizationConfig,
+    QFractionLinearAnnealingConfig
+)
+from model_compression_toolkit.verify_packages import FOUND_TF, FOUND_TORCH
+if FOUND_TF:
+    from model_compression_toolkit.gptq.keras.quantization_facade import keras_gradient_post_training_quantization
+    from model_compression_toolkit.gptq.keras.quantization_facade import get_keras_gptq_config
+if FOUND_TORCH:
+    from model_compression_toolkit.gptq.pytorch.quantization_facade import pytorch_gradient_post_training_quantization
+    from model_compression_toolkit.gptq.pytorch.quantization_facade import get_pytorch_gptq_config

mct-nightly-2.2.0.20240918.448/model_compression_toolkit/gptq/common/gptq_config.py ADDED Viewed

@@ -0,0 +1,135 @@
+# Copyright 2021 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Callable, Any, Dict, Optional
+from model_compression_toolkit.constants import GPTQ_HESSIAN_NUM_SAMPLES, ACT_HESSIAN_DEFAULT_BATCH_SIZE
+from model_compression_toolkit.gptq.common.gptq_constants import REG_DEFAULT
+class RoundingType(Enum):
+    """
+    An enum for choosing the GPTQ rounding methods:
+    STE - STRAIGHT-THROUGH ESTIMATOR
+    SoftQuantizer - SoftQuantizer
+    """
+    STE = 0
+    SoftQuantizer = 1
+@dataclass
+class GPTQHessianScoresConfig:
+    """
+    Configuration to use for computing the Hessian-based scores for GPTQ loss metric.
+    Args:
+        hessians_num_samples (int): Number of samples to use for computing the Hessian-based scores.
+        norm_scores (bool): Whether to normalize the returned scores of the weighted loss function (to get values between 0 and 1).
+        log_norm (bool): Whether to use log normalization for the GPTQ Hessian-based scores.
+        scale_log_norm (bool): Whether to scale the final vector of the Hessian-based scores.
+        hessian_batch_size (int): The Hessian computation batch size. used only if using GPTQ with Hessian-based objective.
+    """
+    hessians_num_samples: int = GPTQ_HESSIAN_NUM_SAMPLES
+    norm_scores: bool = True
+    log_norm: bool = True
+    scale_log_norm: bool = False
+    hessian_batch_size: int = ACT_HESSIAN_DEFAULT_BATCH_SIZE
+@dataclass
+class QFractionLinearAnnealingConfig:
+    """
+    Config for the quantized fraction linear scheduler of Gradual Activation Quantization.
+    Args:
+         initial_q_fraction: initial quantized fraction
+         target_q_fraction: target quantized fraction
+         start_step: gradient step to begin annealing
+         end_step: gradient step to complete annealing. None means last step.
+    """
+    initial_q_fraction: float
+    target_q_fraction: float
+    start_step: int
+    end_step: Optional[int]
+    def __post_init__(self):
+        if not (0 <= self.initial_q_fraction < self.target_q_fraction <= 1):
+            raise ValueError(f'Expected 0 <= initial_q_fraction < target_q_fraction <= 1, received initial_q_fraction '
+                             f'{self.initial_q_fraction} and target_q_fraction {self.target_q_fraction}.')
+        if self.start_step < 0:
+            raise ValueError(f'Expected start_step >= 0. received {self.start_step}.')
+        if self.end_step is not None and self.end_step <= self.start_step:
+            raise ValueError('Expected start_step < end_step, '
+                             'received end_step {self.end_step} and start_step {self.start_stap}.')
+@dataclass
+class GradualActivationQuantizationConfig:
+    """ Configuration for Gradual Activation Quantization.
+        By default, the quantized fraction increases linearly from 0 to 1 throughout the training.
+        Args:
+            q_fraction_scheduler_policy: config for the scheduling of the quantized fraction.
+                Only linear annealing is currently supported.
+    """
+    q_fraction_scheduler_policy: QFractionLinearAnnealingConfig = field(
+        default_factory=lambda: QFractionLinearAnnealingConfig(initial_q_fraction=0,
+                                                               target_q_fraction=1,
+                                                               start_step=0,
+                                                               end_step=None)
+    )
+@dataclass
+class GradientPTQConfig:
+    """
+    Configuration to use for quantization with GradientPTQ.
+    Args:
+        n_epochs: Number of representative dataset epochs to train.
+        optimizer: Optimizer to use.
+        optimizer_rest: Optimizer to use for bias and quantizer parameters.
+        loss: The loss to use. See 'multiple_tensors_mse_loss' for the expected interface.
+        log_function: Function to log information about the GPTQ process.
+        train_bias: Whether to update the bias during the training or not.
+        rounding_type: An enum that defines the rounding type.
+        use_hessian_based_weights: Whether to use Hessian-based weights for weighted average loss.
+        optimizer_quantization_parameter: Optimizer to override the rest optimizer  for quantizer parameters.
+        optimizer_bias: Optimizer to override the rest optimizer for bias.
+        regularization_factor: A floating point number that defines the regularization factor.
+        hessian_weights_config: A configuration that include all necessary arguments to run a computation of
+            Hessian scores for the GPTQ loss.
+        gradual_activation_quantization_config: A configuration for Gradual Activation Quantization.
+        gptq_quantizer_params_override: A dictionary of parameters to override in GPTQ quantizer instantiation.
+    """
+    n_epochs: int
+    optimizer: Any
+    optimizer_rest: Any = None
+    loss: Callable = None
+    log_function: Callable = None
+    train_bias: bool = True
+    rounding_type: RoundingType = RoundingType.SoftQuantizer
+    use_hessian_based_weights: bool = True
+    optimizer_quantization_parameter: Any = None
+    optimizer_bias: Any = None
+    regularization_factor: float = REG_DEFAULT
+    hessian_weights_config: GPTQHessianScoresConfig = field(default_factory=GPTQHessianScoresConfig)
+    gradual_activation_quantization_config: Optional[GradualActivationQuantizationConfig] = None
+    gptq_quantizer_params_override: Dict[str, Any] = field(default_factory=dict)

{mct-nightly-2.2.0.20240916.525 → mct-nightly-2.2.0.20240918.448}/model_compression_toolkit/gptq/pytorch/gptq_training.py RENAMED Viewed

@@ -21,6 +21,8 @@ import copy
 import torch
 from model_compression_toolkit.core.common.hessian import HessianInfoService
+from model_compression_toolkit.gptq.pytorch.quantizer.gradual_activation_quantization import \
+    get_gradual_activation_quantizer_wrapper_factory
 from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.core.pytorch.back2framework.pytorch_model_builder import PyTorchModelBuilder
 from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribute_name_for_gptq
@@ -36,6 +38,7 @@ from model_compression_toolkit.gptq.pytorch.graph_info import get_gptq_trainable
 from model_compression_toolkit.gptq.pytorch.quantizer.quantization_builder import quantization_builder
 from model_compression_toolkit.gptq.pytorch.quantizer.regularization_factory import get_regularization
 from mct_quantizers import PytorchQuantizationWrapper, PytorchActivationQuantizationHolder
+from model_compression_toolkit.trainable_infrastructure.pytorch.util import get_total_grad_steps
 class PytorchGPTQTrainer(GPTQTrainer):
@@ -66,6 +69,13 @@ class PytorchGPTQTrainer(GPTQTrainer):
             representative_data_gen: Dataset to use for inputs of the models.
             hessian_info_service: HessianInfoService to fetch info based on the hessian approximation of the float model.
         """
+        def _get_total_grad_steps():
+            return get_total_grad_steps(representative_data_gen) * gptq_config.n_epochs
+        # must be set prior to model building in the base class constructor
+        self.gradual_act_quantizer_wrapper_factory = get_gradual_activation_quantizer_wrapper_factory(
+            gptq_config, _get_total_grad_steps)
         super().__init__(graph_float,
                          graph_quant,
                          gptq_config,
@@ -98,7 +108,7 @@ class PytorchGPTQTrainer(GPTQTrainer):
         self.weights_for_average_loss = to_torch_tensor(self.compute_hessian_based_weights())
-        self.reg_func = get_regularization(self.gptq_config, representative_data_gen)
+        self.reg_func = get_regularization(self.gptq_config, _get_total_grad_steps)
     def _is_gptq_weights_trainable(self,
                                    node: BaseNode) -> bool:
@@ -145,7 +155,6 @@ class PytorchGPTQTrainer(GPTQTrainer):
     def get_activation_quantizer_holder(self, n: BaseNode) -> Callable:
         """
         Retrieve a PytorchActivationQuantizationHolder layer to use for activation quantization of a node.
-        If the layer is not supposed to be wrapped with an activation quantizer - return None.
         Args:
             n: Node to attach a PytorchActivationQuantizationHolder to its output.
         Returns:
@@ -153,13 +162,13 @@ class PytorchGPTQTrainer(GPTQTrainer):
         """
         _, activation_quantizers = quantization_builder(n, self.gptq_config)
         # Holder by definition uses a single quantizer for the activation quantization
-        # thus we make sure this is the only possible case (unless it's a node we no activation
-        # quantization, which in this case has an empty list).
-        if len(activation_quantizers) == 1:
-            return PytorchActivationQuantizationHolder(activation_quantizers[0])
-        Logger.critical(f"'PytorchActivationQuantizationHolder' requires exactly one quantizer, "
-                        f"but {len(activation_quantizers)} were found for node {n.name}. "
-                        f"Ensure the node is configured with a single activation quantizer.")
+        # thus we make sure this is the only possible case
+        if len(activation_quantizers) != 1:
+            Logger.critical(f"'PytorchActivationQuantizationHolder' requires exactly one quantizer, "
+                            f"but {len(activation_quantizers)} were found for node {n.name}. "
+                            f"Ensure the node is configured with a single activation quantizer.")
+        quantizer = self.gradual_act_quantizer_wrapper_factory(activation_quantizers[0])
+        return PytorchActivationQuantizationHolder(quantizer)
     def build_gptq_model(self):
         """

mct-nightly 2.2.0.20240916.525__tar.gz → 2.2.0.20240918.448__tar.gz

mct-nightly 2.2.0.20240916.525tar.gz → 2.2.0.20240918.448tar.gz