PyPI - mct-nightly - Versions diffs - 2.4.0.20250617.613__py3-none-any.whl → 2.4.0.20250619.621__py3-none-any.whl - Mend

mct-nightly 2.4.0.20250617.613py3-none-any.whl → 2.4.0.20250619.621py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (123) hide show

model_compression_toolkit/data_generation/pytorch/optimization_functions/lr_scheduler.py CHANGED Viewed

@@ -60,10 +60,10 @@ class ReduceLROnPlateauWithReset:
         # Attach optimizer
         if not isinstance(optimizer, Optimizer):
             Logger.critical('{} is not an Optimizer'.format(
-                type(optimizer).__name__)) # pragma: no cover
+                type(optimizer).__name__))  # pragma: no cover
         self.optimizer = optimizer
-        if isinstance(min_lr, (list, tuple)):
+        if isinstance(min_lr, (list, tuple)):  # pragma: no cover
             if len(min_lr) != len(optimizer.param_groups):
                 Logger.critical("expected {} min_lrs, got {}".format(
                     len(optimizer.param_groups), len(min_lr))) # pragma: no cover
@@ -117,7 +117,7 @@ class ReduceLROnPlateauWithReset:
             self.num_bad_epochs += 1
         # Handle cooldown period
-        if self.in_cooldown:
+        if self.in_cooldown:  # pragma: no cover
             self.cooldown_counter -= 1
             self.num_bad_epochs = 0  # Ignore any bad epochs in cooldown
@@ -142,7 +142,7 @@ class ReduceLROnPlateauWithReset:
             new_lr = max(old_lr * self.factor, self.min_lrs[i])
             if old_lr - new_lr > self.eps:
                 param_group['lr'] = new_lr
-                if self.verbose:
+                if self.verbose:  # pragma: no cover
                     epoch_str = ("%.2f" if isinstance(epoch, float) else "%.5d") % epoch
                     print('Epoch {}: reducing learning rate'
                           ' of group {} to {:.4e}.'.format(epoch_str, i, new_lr))
@@ -168,19 +168,19 @@ class ReduceLROnPlateauWithReset:
         Returns:
             bool: True if the new value is better, False otherwise.
         """
-        if best is None:
+        if best is None:  # pragma: no cover
             return True
         if self.mode == 'min' and self.threshold_mode == 'rel':
             rel_epsilon = 1. - self.threshold
             return a < best * rel_epsilon
-        elif self.mode == 'min' and self.threshold_mode == 'abs':
+        elif self.mode == 'min' and self.threshold_mode == 'abs':  # pragma: no cover
             return a < best - self.threshold
-        elif self.mode == 'max' and self.threshold_mode == 'rel':
+        elif self.mode == 'max' and self.threshold_mode == 'rel':  # pragma: no cover
             rel_epsilon = self.threshold + 1.
             return a > best * rel_epsilon
         else:  # mode == 'max' and threshold_mode == 'abs':
-            return a > best + self.threshold
+            return a > best + self.threshold  # pragma: no cover
     def _init_is_better(self) -> None:
         """
@@ -197,9 +197,9 @@ class ReduceLROnPlateauWithReset:
         if self.mode == 'min':
             self.mode_worse = float('inf')
         else:  # mode == 'max':
-            self.mode_worse = float('-inf')
+            self.mode_worse = float('-inf')  # pragma: no cover
-    def state_dict(self) -> Dict[str, Any]:
+    def state_dict(self) -> Dict[str, Any]:  # pragma: no cover
         """
         Return the state of the scheduler as a dictionary.
@@ -208,7 +208,7 @@ class ReduceLROnPlateauWithReset:
         """
         return {key: value for key, value in self.__dict__.items() if key != 'optimizer'}
-    def load_state_dict(self, state_dict: Dict[str, Any]) -> None:
+    def load_state_dict(self, state_dict: Dict[str, Any]) -> None:  # pragma: no cover
         """
         Load the scheduler state.

model_compression_toolkit/exporter/model_exporter/keras/keras_export_facade.py CHANGED Viewed

@@ -21,6 +21,7 @@ from model_compression_toolkit.logger import Logger
 if FOUND_TF:
     import keras
+    from model_compression_toolkit.core.keras.default_framework_info import set_keras_info
     from model_compression_toolkit.exporter.model_wrapper.keras.validate_layer import is_keras_layer_exportable
     from model_compression_toolkit.exporter.model_exporter.keras.fakely_quant_keras_exporter import \
         FakelyQuantKerasExporter
@@ -36,6 +37,7 @@ if FOUND_TF:
         KerasExportSerializationFormat.TFLITE: [QuantizationFormat.FAKELY_QUANT, QuantizationFormat.INT8]
     }
+    @set_keras_info
     def keras_export_model(model: keras.models.Model,
                            save_model_path: str,
                            is_layer_exportable_fn: Callable = is_keras_layer_exportable,

model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py CHANGED Viewed

@@ -73,23 +73,25 @@ if FOUND_ONNX:
             Returns:
                 Fake-quant PyTorch model.
             """
-            # List all activation quantization holders with num_bits>8 and replace them with Identity, because
-            # ONNX doesn't support quantization of more than 8 bits for torch.fake_quantize_per_tensor_affine.
-            act_holder_list = [n for n, m in self.model.named_modules()
-                               if isinstance(m, PytorchActivationQuantizationHolder) and
-                               m.activation_holder_quantizer.num_bits > 8]
-            for act_holder in act_holder_list: # pragma: no cover
-                obj = self.model
-                attrs = act_holder.split(".")
-                for a in attrs[:-1]:
-                    obj = getattr(obj, a)
-                if hasattr(obj, attrs[-1]):
-                    delattr(obj, attrs[-1])
-                    setattr(obj, attrs[-1], torch.nn.Identity())
-                else:
-                    Logger.info(f"During removal of activation quantization of a quantizer (with bits > 8) in ONNX FQ "
-                                f"export, deletion of activation holder '{act_holder}' failed — could not locate one or"
-                                f"more intermediate attributes in the path.")
+            # When exporting using Fakely Quant Quantization Format list all activation quantization holders with
+            # num_bits>8 and replace them with Identity, because ONNX doesn't support quantization of more than 8 bits
+            # for torch.fake_quantize_per_tensor_affine.
+            if not self._use_onnx_custom_quantizer_ops:
+                act_holder_list = [n for n, m in self.model.named_modules()
+                                   if isinstance(m, PytorchActivationQuantizationHolder) and
+                                   m.activation_holder_quantizer.num_bits > 8]
+                for act_holder in act_holder_list: # pragma: no cover
+                    obj = self.model
+                    attrs = act_holder.split(".")
+                    for a in attrs[:-1]:
+                        obj = getattr(obj, a)
+                    if hasattr(obj, attrs[-1]):
+                        delattr(obj, attrs[-1])
+                        setattr(obj, attrs[-1], torch.nn.Identity())
+                    else:
+                        Logger.info(f"During removal of activation quantization of a quantizer (with bits > 8) in ONNX"
+                                    f"FQ export, deletion of activation holder '{act_holder}' failed — could not locate"
+                                    f"one or more intermediate attributes in the path.")
             for layer in self.model.children():
                 self.is_layer_exportable_fn(layer)

model_compression_toolkit/exporter/model_exporter/pytorch/pytorch_export_facade.py CHANGED Viewed

@@ -27,6 +27,7 @@ DEFAULT_ONNX_OPSET_VERSION = 15
 if FOUND_TORCH:
     import torch.nn
+    from model_compression_toolkit.core.pytorch.default_framework_info import set_pytorch_info
     from model_compression_toolkit.exporter.model_exporter.pytorch.fakely_quant_onnx_pytorch_exporter import FakelyQuantONNXPyTorchExporter
     from model_compression_toolkit.exporter.model_exporter.pytorch.fakely_quant_torchscript_pytorch_exporter import FakelyQuantTorchScriptPyTorchExporter
     from model_compression_toolkit.exporter.model_wrapper.pytorch.validate_layer import is_pytorch_layer_exportable
@@ -41,6 +42,7 @@ if FOUND_TORCH:
         PytorchExportSerializationFormat.ONNX: [QuantizationFormat.FAKELY_QUANT, QuantizationFormat.MCTQ]
     }
+    @set_pytorch_info
     def pytorch_export_model(model: torch.nn.Module,
                              save_model_path: str,
                              repr_dataset: Callable,

model_compression_toolkit/gptq/common/gptq_graph.py CHANGED Viewed

@@ -14,8 +14,8 @@
 # ==============================================================================
 from typing import Tuple, List
-from model_compression_toolkit.core import FrameworkInfo
 from model_compression_toolkit.logger import Logger
+from model_compression_toolkit.core.common.framework_info import get_fw_info
 from model_compression_toolkit.core.common.graph.base_graph import Graph
 from model_compression_toolkit.core.common.graph.base_node import BaseNode
@@ -40,8 +40,7 @@ def get_compare_points(input_graph: Graph) -> Tuple[List[BaseNode], List[str], L
     compare_points_name = []
     for n in input_graph.get_topo_sorted_nodes():
         # only nodes with kernel attribute are currently trained with GPTQ and are used as compare points
-        kernel_attr = input_graph.fw_info.get_kernel_op_attributes(n.type)[0]
-        if kernel_attr is not None and n.is_weights_quantization_enabled(kernel_attr) and not n.reuse:
+        if n.kernel_attr is not None and n.is_weights_quantization_enabled(n.kernel_attr) and not n.reuse:
             compare_points.append(n)
             compare_points_name.append(n.name)
             compare_points_std.append(n.prior_info.std_output)
@@ -49,20 +48,15 @@ def get_compare_points(input_graph: Graph) -> Tuple[List[BaseNode], List[str], L
     return compare_points, compare_points_name, compare_points_mean, compare_points_std
-def get_kernel_attribute_name_for_gptq(layer_type: type, fw_info: FrameworkInfo) -> str:
+def get_kernel_attribute_name_for_gptq(layer_type: type) -> str:
     """
     Returns a layer's kernel attribute name for GPTQ training purposes.
     Args:
         layer_type: A type of model's layer.
-        fw_info: A FrameworkInfo object.
     Returns: The name of the kernel attribute.
     """
-    kernel_attribute = fw_info.get_kernel_op_attributes(layer_type)
-    if len(kernel_attribute) != 1:
-        Logger.critical(  # pragma: no cover
-            f"In GPTQ training, only the kernel weights attribute should be trained. "
-            f"However, the number of kernel attributes is {len(kernel_attribute)}.")
-    return kernel_attribute[0]
+    return get_fw_info().get_kernel_op_attribute(layer_type)

model_compression_toolkit/gptq/common/gptq_training.py CHANGED Viewed

@@ -44,7 +44,6 @@ class GPTQTrainer(ABC):
                  graph_quant: Graph,
                  gptq_config: GradientPTQConfig,
                  fw_impl: GPTQFrameworkImplemantation,
-                 fw_info: FrameworkInfo,
                  representative_data_gen_fn: Callable[[], Generator],
                  hessian_info_service: HessianInfoService = None):
         """
@@ -58,7 +57,6 @@ class GPTQTrainer(ABC):
             graph_quant: Graph to build a quantized networks from.
             gptq_config: GradientPTQConfig with parameters about the tuning process.
             fw_impl: Framework implementation
-            fw_info: Framework information
             representative_data_gen_fn: factory for representative data generator.
             hessian_info_service: HessianInfoService for fetching and computing Hessian-approximation information.
         """
@@ -66,7 +64,6 @@ class GPTQTrainer(ABC):
         self.graph_quant = copy.deepcopy(graph_quant)
         self.gptq_config = gptq_config
         self.fw_impl = fw_impl
-        self.fw_info = fw_info
         self.representative_data_gen_fn = representative_data_gen_fn
         def _get_total_grad_steps():
@@ -83,8 +80,7 @@ class GPTQTrainer(ABC):
         self.float_model, self.float_user_info = fw_impl.model_builder(self.graph_float,
                                                                        mode=ModelBuilderMode.FLOAT,
-                                                                       append2output=self.compare_points,
-                                                                       fw_info=self.fw_info)
+                                                                       append2output=self.compare_points)
         self.fxp_model, self.gptq_user_info = self.build_gptq_model()
         if self.gptq_config.hessian_weights_config:
@@ -288,7 +284,6 @@ def gptq_training(graph_float: Graph,
                   gptq_config: GradientPTQConfig,
                   representative_data_gen: Callable,
                   fw_impl: GPTQFrameworkImplemantation,
-                  fw_info: FrameworkInfo,
                   hessian_info_service: HessianInfoService = None) -> Graph:
     """
     GPTQ training process using knowledge distillation with a teacher network (float model) and a student network (quantized model).
@@ -298,7 +293,6 @@ def gptq_training(graph_float: Graph,
         gptq_config: GradientPTQConfig with parameters about the tuning process.
         representative_data_gen: Dataset to use for inputs of the models.
         fw_impl: Framework implementation
-        fw_info: Framework information
         hessian_info_service: HessianInfoService to fetch information based on the Hessian approximation.
     Returns:
@@ -312,7 +306,6 @@ def gptq_training(graph_float: Graph,
                                     graph_quant,
                                     gptq_config,
                                     fw_impl,
-                                    fw_info,
                                     representative_data_gen,
                                     hessian_info_service=hessian_info_service)

model_compression_toolkit/gptq/keras/gptq_training.py CHANGED Viewed

@@ -65,7 +65,6 @@ class KerasGPTQTrainer(GPTQTrainer):
                  graph_quant: Graph,
                  gptq_config: GradientPTQConfig,
                  fw_impl: FrameworkImplementation,
-                 fw_info: FrameworkInfo,
                  representative_data_gen: Callable,
                  hessian_info_service: HessianInfoService = None):
         """
@@ -79,7 +78,6 @@ class KerasGPTQTrainer(GPTQTrainer):
             graph_quant: Graph to build a quantized networks from.
             gptq_config: GradientPTQConfig with parameters about the tuning process.
             fw_impl: FrameworkImplementation object with a specific framework methods implementation.
-            fw_info: Framework information.
             representative_data_gen: Dataset to use for inputs of the models.
             hessian_info_service: HessianScoresService for fetching and computing Hessian's approximation scores.
@@ -94,7 +92,6 @@ class KerasGPTQTrainer(GPTQTrainer):
                          graph_quant,
                          gptq_config,
                          fw_impl,
-                         fw_info,
                          representative_data_gen_fn=representative_data_gen,
                          hessian_info_service=hessian_info_service)
@@ -210,8 +207,7 @@ class KerasGPTQTrainer(GPTQTrainer):
         Returns:
             A boolean whether the layer is to be wrapped with a QuantizeWrapper
         """
-        kernel_attr = self.fw_info.get_kernel_op_attributes(node.type)[0]
-        return kernel_attr is not None and node.is_weights_quantization_enabled(kernel_attr)
+        return node.kernel_attr is not None and node.is_weights_quantization_enabled(node.kernel_attr)
     def gptq_wrapper(self,
                      n: common.BaseNode,
@@ -230,7 +226,7 @@ class KerasGPTQTrainer(GPTQTrainer):
             # If we are here, then the node has a kernel attribute to quantize and training during GPTQ
             weights_quantizers, _ = quantization_builder(n,
                                                          self.gptq_config,  # TODO: split quantizers building into two functions: for weights and activations
-                                                         self.fw_info.get_kernel_op_attributes(n.type)[0])
+                                                         n.kernel_attr)
             if len(weights_quantizers) > 0:
                 return KerasTrainableQuantizationWrapper(layer,
                                                          weights_quantizers=weights_quantizers)
@@ -271,7 +267,6 @@ class KerasGPTQTrainer(GPTQTrainer):
         gptq_model, gptq_user_info = KerasModelBuilder(graph=self.graph_quant,
                                                        append2output=self.compare_points,
-                                                       fw_info=self.fw_info,
                                                        return_float_outputs=True,
                                                        wrapper=self.gptq_wrapper,
                                                        get_activation_quantizer_holder_fn=self.get_activation_quantizer_holder).build_model()
@@ -431,8 +426,7 @@ class KerasGPTQTrainer(GPTQTrainer):
                     Logger.critical(f"Unable to update the GPTQ graph because the layer named '{layer.layer.name}' could not be found. "
                                     f"Verify that the layer names in the GPTQ model match those in the graph.")
                 node = node[0]
-                kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=node.type,
-                                                                      fw_info=self.fw_info)
+                kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=node.type)
                 # TODO: only kernel attributes are currently trained in GPTQ, so only the kernel weights need to be updated.
                 #  To enable GPTQ for other attributes, this code needs to be modified.
                 weights, weight_quant_config, activation_quant_config = \

model_compression_toolkit/gptq/keras/graph_info.py CHANGED Viewed

@@ -16,8 +16,8 @@
 import tensorflow as tf
 from typing import Tuple, List
 from model_compression_toolkit.core.keras.constants import USE_BIAS
+from model_compression_toolkit.core.common.framework_info import get_fw_info
 from tensorflow.keras.models import Model
-from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO
 from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribute_name_for_gptq
 from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper
@@ -44,8 +44,7 @@ def get_gptq_trainable_parameters(fxp_model: Model,
     for layer in fxp_model.layers:
         if isinstance(layer, KerasTrainableQuantizationWrapper):
-            kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=type(layer.layer),
-                                                                  fw_info=DEFAULT_KERAS_INFO)
+            kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=type(layer.layer))
             # collect trainable weights per quantizer
             if kernel_attribute not in layer.weights_quantizers:
@@ -57,9 +56,8 @@ def get_gptq_trainable_parameters(fxp_model: Model,
             trainable_threshold.extend(quantizer_trainable_threshold)
             if add_bias:
-                kernel_ops_attrs = DEFAULT_KERAS_INFO.kernel_ops_attributes_mapping.get(type(layer.layer))
-                use_bias = kernel_ops_attrs is not None and kernel_ops_attrs[0] is not None \
-                           and layer.layer.get_config().get(USE_BIAS)
+                kernel_ops_attr = get_fw_info().get_kernel_op_attribute(type(layer.layer))
+                use_bias = kernel_ops_attr is not None and layer.layer.get_config().get(USE_BIAS)
                 if use_bias is not None and use_bias and layer.layer.bias is not None:
                     bias_weights.append([layer.layer.bias])

model_compression_toolkit/gptq/keras/quantization_facade.py CHANGED Viewed

@@ -41,7 +41,7 @@ from model_compression_toolkit.metadata import create_model_metadata
 if FOUND_TF:
     import tensorflow as tf
-    from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO
+    from model_compression_toolkit.core.keras.default_framework_info import set_keras_info
     from model_compression_toolkit.gptq.keras.gptq_keras_implementation import GPTQKerasImplemantation
     from model_compression_toolkit.core.keras.keras_model_validation import KerasModelValidation
     from tensorflow.keras.models import Model
@@ -152,6 +152,7 @@ if FOUND_TF:
                                  gradual_activation_quantization_config=gradual_quant_config)
+    @set_keras_info
     def keras_gradient_post_training_quantization(in_model: Model, representative_data_gen: Callable,
                                                   gptq_config: GradientPTQConfig,
                                                   gptq_representative_data_gen: Callable = None,
@@ -234,8 +235,7 @@ if FOUND_TF:
         if core_config.debug_config.bypass:
             return in_model, None
-        KerasModelValidation(model=in_model,
-                             fw_info=DEFAULT_KERAS_INFO).validate()
+        KerasModelValidation(model=in_model).validate()
         if core_config.is_mixed_precision_enabled:
             if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfig):
@@ -243,7 +243,7 @@ if FOUND_TF:
                                 "Ensure usage of the correct API for keras_post_training_quantization "
                                 "or provide a valid mixed-precision configuration.")  # pragma: no cover
-        tb_w = init_tensorboard_writer(DEFAULT_KERAS_INFO)
+        tb_w = init_tensorboard_writer()
         fw_impl = GPTQKerasImplemantation()
@@ -257,7 +257,6 @@ if FOUND_TF:
         tg, bit_widths_config, hessian_info_service, scheduling_info = core_runner(in_model=in_model,
                                                                                    representative_data_gen=representative_data_gen,
                                                                                    core_config=core_config,
-                                                                                   fw_info=DEFAULT_KERAS_INFO,
                                                                                    fw_impl=fw_impl,
                                                                                    fqc=framework_platform_capabilities,
                                                                                    target_resource_utilization=target_resource_utilization,
@@ -271,7 +270,6 @@ if FOUND_TF:
                               gptq_config,
                               representative_data_gen,
                               gptq_representative_data_gen if gptq_representative_data_gen else representative_data_gen,
-                              DEFAULT_KERAS_INFO,
                               fw_impl,
                               tb_w,
                               hessian_info_service=hessian_info_service)
@@ -283,8 +281,7 @@ if FOUND_TF:
                                         tb_w,
                                         float_graph,
                                         tg_gptq,
-                                        fw_impl,
-                                        DEFAULT_KERAS_INFO)
+                                        fw_impl)
         exportable_model, user_info = get_exportable_keras_model(tg_gptq)
         if framework_platform_capabilities.tpc.add_metadata:

model_compression_toolkit/gptq/keras/quantizer/soft_rounding/soft_quantizer_reg.py CHANGED Viewed

@@ -17,7 +17,6 @@ from typing import List, Callable
 import tensorflow as tf
 from keras import Model
-from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO
 from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribute_name_for_gptq
 from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper
@@ -66,8 +65,7 @@ class SoftQuantizerRegularization:
         # Compute the regularization term without concatenating
         for i, layer in enumerate(layers):
-            kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=type(layer.layer),
-                                                                  fw_info=DEFAULT_KERAS_INFO)
+            kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=type(layer.layer))
             st = layer.weights_quantizers[kernel_attribute].get_soft_targets()

model_compression_toolkit/gptq/pytorch/gptq_training.py CHANGED Viewed

@@ -54,7 +54,6 @@ class PytorchGPTQTrainer(GPTQTrainer):
                  graph_quant: Graph,
                  gptq_config: GradientPTQConfig,
                  fw_impl: FrameworkImplementation,
-                 fw_info: FrameworkInfo,
                  representative_data_gen: Callable,
                  hessian_info_service: HessianInfoService = None):
         """
@@ -68,7 +67,6 @@ class PytorchGPTQTrainer(GPTQTrainer):
             graph_quant: Graph to build a quantized networks from.
             gptq_config: GradientPTQConfigV2 with parameters about the tuning process.
             fw_impl: FrameworkImplementation object with a specific framework methods implementation.
-            fw_info: Framework information
             representative_data_gen: Dataset to use for inputs of the models.
             hessian_info_service: HessianInfoService to fetch info based on the hessian approximation of the float model.
         """
@@ -81,7 +79,6 @@ class PytorchGPTQTrainer(GPTQTrainer):
                          graph_quant,
                          gptq_config,
                          fw_impl,
-                         fw_info,
                          representative_data_gen_fn=representative_data_gen,
                          hessian_info_service=hessian_info_service)
@@ -167,8 +164,7 @@ class PytorchGPTQTrainer(GPTQTrainer):
             A boolean whether the layer is to be wrapped with a Quantization Wrapper.
         """
-        kernel_attr = self.fw_info.get_kernel_op_attributes(node.type)[0]
-        return kernel_attr is not None and node.is_weights_quantization_enabled(kernel_attr)
+        return node.kernel_attr is not None and node.is_weights_quantization_enabled(node.kernel_attr)
     def gptq_wrapper(self,
                      n: BaseNode,
@@ -187,7 +183,7 @@ class PytorchGPTQTrainer(GPTQTrainer):
             # If we are here, then the node has a kernel attribute to quantize and training during GPTQ
             weights_quantizers, _ = quantization_builder(n,
                                                          self.gptq_config,
-                                                         self.fw_info.get_kernel_op_attributes(n.type)[0])
+                                                         n.kernel_attr)
             if len(weights_quantizers) > 0:
                 return PytorchQuantizationWrapper(layer,
@@ -224,7 +220,6 @@ class PytorchGPTQTrainer(GPTQTrainer):
         """
         gptq_model, gptq_user_info = PyTorchModelBuilder(graph=self.graph_quant,
                                                          append2output=self.compare_points,
-                                                         fw_info=self.fw_info,
                                                          wrapper=self.gptq_wrapper,
                                                          return_float_outputs=True,
                                                          get_activation_quantizer_holder_fn=self.get_activation_quantizer_holder).build_model()
@@ -340,8 +335,7 @@ class PytorchGPTQTrainer(GPTQTrainer):
                     Logger.critical(f"Cannot update GPTQ graph: Layer with name '{name}' is missing or not unique. "
                                     f"Ensure each layer has a unique name and exists within the graph for updates.")
                 node = node[0]
-                kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=node.type,
-                                                                      fw_info=self.fw_info)
+                kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=node.type)
                 # TODO: only kernel attributes are currently trained in GPTQ, so only the kernel weights need to be updated.
                 #  To enable GPTQ for other attributes, this code needs to be modified.
                 weights, weight_quant_config, activation_quant_config = \

model_compression_toolkit/gptq/pytorch/graph_info.py CHANGED Viewed

@@ -16,7 +16,6 @@ import torch
 import torch.nn as nn
 from typing import List
 from model_compression_toolkit.core.pytorch.constants import BIAS
-from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
 from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribute_name_for_gptq
 from model_compression_toolkit.logger import Logger
 from mct_quantizers import PytorchQuantizationWrapper
@@ -43,8 +42,7 @@ def get_gptq_trainable_parameters(fxp_model: nn.Module,
     for layer in fxp_model.modules():
         if isinstance(layer, PytorchQuantizationWrapper):
-            kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=type(layer.layer),
-                                                                  fw_info=DEFAULT_PYTORCH_INFO)
+            kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=type(layer.layer))
             # collect trainable weights per quantizer
             if kernel_attribute not in layer.weights_quantizers:

model_compression_toolkit/gptq/pytorch/quantization_facade.py CHANGED Viewed

@@ -39,7 +39,7 @@ from model_compression_toolkit.verify_packages import FOUND_TORCH
 if FOUND_TORCH:
-    from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
+    from model_compression_toolkit.core.pytorch.default_framework_info import set_pytorch_info
     from model_compression_toolkit.gptq.pytorch.gptq_pytorch_implementation import GPTQPytorchImplemantation
     from model_compression_toolkit.target_platform_capabilities.constants import DEFAULT_TP_MODEL
     from model_compression_toolkit.gptq.pytorch.gptq_loss import multiple_tensors_mse_loss, sample_layer_attention_loss
@@ -142,6 +142,8 @@ if FOUND_TORCH:
                                  gradual_activation_quantization_config=gradual_quant_config,
                                  log_function=log_function)
+    @set_pytorch_info
     def pytorch_gradient_post_training_quantization(model: Module,
                                                     representative_data_gen: Callable,
                                                     target_resource_utilization: ResourceUtilization = None,
@@ -216,8 +218,7 @@ if FOUND_TORCH:
                 Logger.critical("Given quantization config for mixed-precision is not of type 'MixedPrecisionQuantizationConfig'. "
                                 "Ensure usage of the correct API for 'pytorch_gradient_post_training_quantization' "
                                 "or provide a valid mixed-precision configuration.")
-        tb_w = init_tensorboard_writer(DEFAULT_PYTORCH_INFO)
+        tb_w = init_tensorboard_writer()
         fw_impl = GPTQPytorchImplemantation()
@@ -233,7 +234,6 @@ if FOUND_TORCH:
         graph, bit_widths_config, hessian_info_service, scheduling_info = core_runner(in_model=model,
                                                                                       representative_data_gen=representative_data_gen,
                                                                                       core_config=core_config,
-                                                                                      fw_info=DEFAULT_PYTORCH_INFO,
                                                                                       fw_impl=fw_impl,
                                                                                       fqc=framework_quantization_capabilities,
                                                                                       target_resource_utilization=target_resource_utilization,
@@ -250,7 +250,6 @@ if FOUND_TORCH:
                                  gptq_config,
                                  representative_data_gen,
                                  gptq_representative_data_gen if gptq_representative_data_gen else representative_data_gen,
-                                 DEFAULT_PYTORCH_INFO,
                                  fw_impl,
                                  tb_w,
                                  hessian_info_service=hessian_info_service)
@@ -260,8 +259,7 @@ if FOUND_TORCH:
                                         tb_w,
                                         float_graph,
                                         graph_gptq,
-                                        fw_impl,
-                                        DEFAULT_PYTORCH_INFO)
+                                        fw_impl)
         exportable_model, user_info = get_exportable_pytorch_model(graph_gptq)
         if framework_quantization_capabilities.tpc.add_metadata:

model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/soft_quantizer_reg.py CHANGED Viewed

@@ -18,7 +18,6 @@ import torch
 from torch import nn
 from mct_quantizers import PytorchQuantizationWrapper
-from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
 from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribute_name_for_gptq
@@ -61,8 +60,7 @@ class SoftQuantizerRegularization:
         b = self.beta_scheduler(self.count_iter)
         reg = 0
         for layer, w in zip(layers, layer_weights):
-            kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=type(layer.layer),
-                                                                  fw_info=DEFAULT_PYTORCH_INFO)
+            kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=type(layer.layer))
             st = layer.weights_quantizers[kernel_attribute].get_soft_targets()
             soft_loss = (1 - torch.pow(torch.abs(st - .5) * 2, b)).sum()

model_compression_toolkit/gptq/runner.py CHANGED Viewed

@@ -37,7 +37,6 @@ def _apply_gptq(gptq_config: GradientPTQConfig,
                 tb_w: TensorboardWriter,
                 tg: Graph,
                 tg_bias: Graph,
-                fw_info: FrameworkInfo,
                 fw_impl: FrameworkImplementation,
                 hessian_info_service: HessianInfoService = None) -> Graph:
     """
@@ -52,7 +51,6 @@ def _apply_gptq(gptq_config: GradientPTQConfig,
         tb_w: TensorBoardWriter object to log events.
         tg: Float Reference Graph.
         tg_bias: Graph of quantized model.
-        fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices, groups of layers by how they should be quantized, etc.).
         fw_impl: Framework implementation per framework
         hessian_info_service: HessianInfoService to fetch information based on the hessian approximation for the float model.
     Returns:
@@ -64,7 +62,6 @@ def _apply_gptq(gptq_config: GradientPTQConfig,
                                 gptq_config,
                                 representative_data_gen,
                                 fw_impl,
-                                fw_info,
                                 hessian_info_service=hessian_info_service)
         if tb_w is not None:
@@ -77,7 +74,6 @@ def gptq_runner(tg: Graph,
                 gptq_config: GradientPTQConfig,
                 representative_data_gen: Callable,
                 gptq_representative_data_gen: Callable,
-                fw_info: FrameworkInfo,
                 fw_impl: FrameworkImplementation,
                 tb_w: TensorboardWriter,
                 hessian_info_service: HessianInfoService = None) -> Graph:
@@ -91,7 +87,6 @@ def gptq_runner(tg: Graph,
         gptq_config: GradientPTQConfig with parameters about the tuning process.
         representative_data_gen: Dataset used for calibration.
         gptq_representative_data_gen: Dataset used for GPTQ training
-        fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices, groups of layers by how they should be quantized, etc.)
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
         tb_w: A TensorBoardWriter object initialized with the logger dir path if it was set, or None otherwise.
         hessian_info_service: HessianScoresService to fetch approximations of the hessian scores for the float model.
@@ -104,7 +99,7 @@ def gptq_runner(tg: Graph,
     #############################################
     # Apply Statistics Correction
     #############################################
-    tg_bias = apply_statistics_correction(tg, representative_data_gen, core_config, fw_info, fw_impl, tb_w)
+    tg_bias = apply_statistics_correction(tg, representative_data_gen, core_config, fw_impl, tb_w)
     if tb_w is not None:
         tb_w.add_graph(tg_bias, 'after_bias_correction')
@@ -117,7 +112,6 @@ def gptq_runner(tg: Graph,
                           tb_w,
                           tg,
                           tg_bias,
-                          fw_info,
                           fw_impl,
                           hessian_info_service=hessian_info_service)

mct-nightly 2.4.0.20250617.613__py3-none-any.whl → 2.4.0.20250619.621__py3-none-any.whl

mct-nightly 2.4.0.20250617.613py3-none-any.whl → 2.4.0.20250619.621py3-none-any.whl