PyPI - mct-nightly - Versions diffs - 2.4.0.20250924.535__py3-none-any.whl → 2.4.2.20250926.532__py3-none-any.whl - Mend

mct-nightly 2.4.0.20250924.535py3-none-any.whl → 2.4.2.20250926.532py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (169) hide show

model_compression_toolkit/data_generation/keras/optimization_functions/lr_scheduler.py CHANGED Viewed

@@ -56,7 +56,7 @@ class ReduceLROnPlateau(tf.keras.callbacks.Callback):
         super(ReduceLROnPlateau, self).__init__()
         if factor >= 1.0:
-            Logger.critical('Factor should be < 1.0.')  # pragma: no cover
+            Logger.critical('Factor should be < 1.0.') # pragma: no cover
         self.factor = factor
         self.optimizer = optimizer
@@ -101,7 +101,7 @@ class ReduceLROnPlateau(tf.keras.callbacks.Callback):
         else:
             self.num_bad_epochs += 1
-        if self.in_cooldown:  # pragma: no cover
+        if self.in_cooldown:
             self.cooldown_counter -= 1
             self.num_bad_epochs = 0  # Ignore any bad epochs in cooldown
@@ -122,7 +122,7 @@ class ReduceLROnPlateau(tf.keras.callbacks.Callback):
         new_lr = max(old_lr * self.factor, self.min_lr)
         if old_lr - new_lr > self.eps:
             tf.keras.backend.set_value(self.optimizer.learning_rate, new_lr)
-            if self.verbose:  # pragma: no cover
+            if self.verbose:
                 print(f'Epoch {epoch:05d}: reducing learning rate to {new_lr:.4e}.')
     @property
@@ -152,13 +152,13 @@ class ReduceLROnPlateau(tf.keras.callbacks.Callback):
         if self.mode == 'min' and self.threshold_mode == 'rel':
             rel_epsilon = 1. - self.threshold
             return a < best * rel_epsilon
-        elif self.mode == 'min' and self.threshold_mode == 'abs':  # pragma: no cover
+        elif self.mode == 'min' and self.threshold_mode == 'abs':
             return a < best - self.threshold
-        elif self.mode == 'max' and self.threshold_mode == 'rel':  # pragma: no cover
+        elif self.mode == 'max' and self.threshold_mode == 'rel':
             rel_epsilon = self.threshold + 1.
             return a > best * rel_epsilon
         else:  # mode == 'max' and threshold_mode == 'abs':
-            return a > best + self.threshold  # pragma: no cover
+            return a > best + self.threshold
     def _init_is_better(self, mode: str, threshold: float, threshold_mode: str) -> None:
         """
@@ -186,7 +186,7 @@ class ReduceLROnPlateau(tf.keras.callbacks.Callback):
         self.threshold = threshold
         self.threshold_mode = threshold_mode
-    def get_config(self) -> Dict:  # pragma: no cover
+    def get_config(self) -> Dict:
         """
         Return the configuration of the scheduler as a dictionary.
@@ -207,7 +207,7 @@ class ReduceLROnPlateau(tf.keras.callbacks.Callback):
         base_config = super(ReduceLROnPlateau, self).get_config()
         return {**base_config, **config}
-    def set_config(self, config: Dict) -> None:  # pragma: no cover
+    def set_config(self, config: Dict) -> None:
         """
         Set the configuration of the scheduler from a dictionary.

model_compression_toolkit/data_generation/pytorch/optimization_functions/lr_scheduler.py CHANGED Viewed

@@ -60,10 +60,10 @@ class ReduceLROnPlateauWithReset:
         # Attach optimizer
         if not isinstance(optimizer, Optimizer):
             Logger.critical('{} is not an Optimizer'.format(
-                type(optimizer).__name__))  # pragma: no cover
+                type(optimizer).__name__)) # pragma: no cover
         self.optimizer = optimizer
-        if isinstance(min_lr, (list, tuple)):  # pragma: no cover
+        if isinstance(min_lr, (list, tuple)):
             if len(min_lr) != len(optimizer.param_groups):
                 Logger.critical("expected {} min_lrs, got {}".format(
                     len(optimizer.param_groups), len(min_lr))) # pragma: no cover
@@ -117,7 +117,7 @@ class ReduceLROnPlateauWithReset:
             self.num_bad_epochs += 1
         # Handle cooldown period
-        if self.in_cooldown:  # pragma: no cover
+        if self.in_cooldown:
             self.cooldown_counter -= 1
             self.num_bad_epochs = 0  # Ignore any bad epochs in cooldown
@@ -142,7 +142,7 @@ class ReduceLROnPlateauWithReset:
             new_lr = max(old_lr * self.factor, self.min_lrs[i])
             if old_lr - new_lr > self.eps:
                 param_group['lr'] = new_lr
-                if self.verbose:  # pragma: no cover
+                if self.verbose:
                     epoch_str = ("%.2f" if isinstance(epoch, float) else "%.5d") % epoch
                     print('Epoch {}: reducing learning rate'
                           ' of group {} to {:.4e}.'.format(epoch_str, i, new_lr))
@@ -168,19 +168,19 @@ class ReduceLROnPlateauWithReset:
         Returns:
             bool: True if the new value is better, False otherwise.
         """
-        if best is None:  # pragma: no cover
+        if best is None:
             return True
         if self.mode == 'min' and self.threshold_mode == 'rel':
             rel_epsilon = 1. - self.threshold
             return a < best * rel_epsilon
-        elif self.mode == 'min' and self.threshold_mode == 'abs':  # pragma: no cover
+        elif self.mode == 'min' and self.threshold_mode == 'abs':
             return a < best - self.threshold
-        elif self.mode == 'max' and self.threshold_mode == 'rel':  # pragma: no cover
+        elif self.mode == 'max' and self.threshold_mode == 'rel':
             rel_epsilon = self.threshold + 1.
             return a > best * rel_epsilon
         else:  # mode == 'max' and threshold_mode == 'abs':
-            return a > best + self.threshold  # pragma: no cover
+            return a > best + self.threshold
     def _init_is_better(self) -> None:
         """
@@ -197,9 +197,9 @@ class ReduceLROnPlateauWithReset:
         if self.mode == 'min':
             self.mode_worse = float('inf')
         else:  # mode == 'max':
-            self.mode_worse = float('-inf')  # pragma: no cover
+            self.mode_worse = float('-inf')
-    def state_dict(self) -> Dict[str, Any]:  # pragma: no cover
+    def state_dict(self) -> Dict[str, Any]:
         """
         Return the state of the scheduler as a dictionary.
@@ -208,7 +208,7 @@ class ReduceLROnPlateauWithReset:
         """
         return {key: value for key, value in self.__dict__.items() if key != 'optimizer'}
-    def load_state_dict(self, state_dict: Dict[str, Any]) -> None:  # pragma: no cover
+    def load_state_dict(self, state_dict: Dict[str, Any]) -> None:
         """
         Load the scheduler state.

model_compression_toolkit/exporter/model_exporter/keras/keras_export_facade.py CHANGED Viewed

@@ -21,7 +21,6 @@ from model_compression_toolkit.logger import Logger
 if FOUND_TF:
     import keras
-    from model_compression_toolkit.core.keras.default_framework_info import set_keras_info
     from model_compression_toolkit.exporter.model_wrapper.keras.validate_layer import is_keras_layer_exportable
     from model_compression_toolkit.exporter.model_exporter.keras.fakely_quant_keras_exporter import \
         FakelyQuantKerasExporter
@@ -37,7 +36,6 @@ if FOUND_TF:
         KerasExportSerializationFormat.TFLITE: [QuantizationFormat.FAKELY_QUANT, QuantizationFormat.INT8]
     }
-    @set_keras_info
     def keras_export_model(model: keras.models.Model,
                            save_model_path: str,
                            is_layer_exportable_fn: Callable = is_keras_layer_exportable,

model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py CHANGED Viewed

@@ -19,6 +19,7 @@ import torch.nn
 from mct_quantizers import PytorchActivationQuantizationHolder, PytorchQuantizationWrapper
+from model_compression_toolkit.core.pytorch.reader.node_holders import DummyPlaceHolder
 from model_compression_toolkit.verify_packages import FOUND_ONNX
 from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.core.pytorch.utils import to_torch_tensor

model_compression_toolkit/exporter/model_exporter/pytorch/pytorch_export_facade.py CHANGED Viewed

@@ -27,7 +27,6 @@ DEFAULT_ONNX_OPSET_VERSION = 15
 if FOUND_TORCH:
     import torch.nn
-    from model_compression_toolkit.core.pytorch.default_framework_info import set_pytorch_info
     from model_compression_toolkit.exporter.model_exporter.pytorch.fakely_quant_onnx_pytorch_exporter import FakelyQuantONNXPyTorchExporter
     from model_compression_toolkit.exporter.model_exporter.pytorch.fakely_quant_torchscript_pytorch_exporter import FakelyQuantTorchScriptPyTorchExporter
     from model_compression_toolkit.exporter.model_wrapper.pytorch.validate_layer import is_pytorch_layer_exportable
@@ -42,14 +41,13 @@ if FOUND_TORCH:
         PytorchExportSerializationFormat.ONNX: [QuantizationFormat.FAKELY_QUANT, QuantizationFormat.MCTQ]
     }
-    @set_pytorch_info
     def pytorch_export_model(model: torch.nn.Module,
                              save_model_path: str,
                              repr_dataset: Callable,
                              is_layer_exportable_fn: Callable = is_pytorch_layer_exportable,
                              serialization_format: PytorchExportSerializationFormat = PytorchExportSerializationFormat.ONNX,
                              quantization_format: QuantizationFormat = QuantizationFormat.MCTQ,
-                             onnx_opset_version=DEFAULT_ONNX_OPSET_VERSION,
+                             onnx_opset_version: int = DEFAULT_ONNX_OPSET_VERSION,
                              output_names: Optional[List[str]] = None) -> None:
         """
         Export a PyTorch quantized model to a torchscript or onnx model.
@@ -60,16 +58,14 @@ if FOUND_TORCH:
         (where the model will be saved to ONNX model).
         Args:
-            model: Model to export.
-            save_model_path: Path to save the model.
-            repr_dataset: Representative dataset for tracing the pytorch model (mandatory for exporting it).
-            is_layer_exportable_fn: Callable to check whether a layer can be exported or not.
-            serialization_format: Format to export the model according to (by default
-            PytorchExportSerializationFormat.ONNX).
-            quantization_format: Format of how quantizers are exported (fakely-quant, int8, MCTQ quantizers).
-            onnx_opset_version: ONNX opset version to use for exported ONNX model.
-            output_names (Optional[List[str]]): Optional list of output node names for export compatibility.
-            This argument is relevant only when using PytorchExportSerializationFormat.ONNX.
+            model (Module): Model to export.
+            save_model_path (str): Path to save the model.
+            repr_dataset (Callable): Representative dataset for tracing the pytorch model (mandatory for exporting it).
+            is_layer_exportable_fn (Callable): Callable to check whether a layer can be exported or not.
+            serialization_format (PytorchExportSerializationFormat): Format to export the model according to (by default PytorchExportSerializationFormat.ONNX).
+            quantization_format (QuantizationFormat): Format of how quantizers are exported (fakely-quant, int8, MCTQ quantizers).
+            onnx_opset_version (int): ONNX opset version to use for exported ONNX model.
+            output_names (Optional[List[str]]): Optional list of output node names for export compatibility. This argument is relevant only when using PytorchExportSerializationFormat.ONNX.
         """
         # Ensure 'metadata' is available directly on the model, if present in submodules

model_compression_toolkit/gptq/common/gptq_graph.py CHANGED Viewed

@@ -14,8 +14,8 @@
 # ==============================================================================
 from typing import Tuple, List
+from model_compression_toolkit.core import FrameworkInfo
 from model_compression_toolkit.logger import Logger
-from model_compression_toolkit.core.common.framework_info import get_fw_info
 from model_compression_toolkit.core.common.graph.base_graph import Graph
 from model_compression_toolkit.core.common.graph.base_node import BaseNode
@@ -40,7 +40,8 @@ def get_compare_points(input_graph: Graph) -> Tuple[List[BaseNode], List[str], L
     compare_points_name = []
     for n in input_graph.get_topo_sorted_nodes():
         # only nodes with kernel attribute are currently trained with GPTQ and are used as compare points
-        if n.kernel_attr is not None and n.is_weights_quantization_enabled(n.kernel_attr) and not n.reuse:
+        kernel_attr = input_graph.fw_info.get_kernel_op_attributes(n.type)[0]
+        if kernel_attr is not None and n.is_weights_quantization_enabled(kernel_attr) and not n.reuse:
             compare_points.append(n)
             compare_points_name.append(n.name)
             compare_points_std.append(n.prior_info.std_output)
@@ -48,15 +49,20 @@ def get_compare_points(input_graph: Graph) -> Tuple[List[BaseNode], List[str], L
     return compare_points, compare_points_name, compare_points_mean, compare_points_std
-def get_kernel_attribute_name_for_gptq(layer_type: type) -> str:
+def get_kernel_attribute_name_for_gptq(layer_type: type, fw_info: FrameworkInfo) -> str:
     """
     Returns a layer's kernel attribute name for GPTQ training purposes.
     Args:
         layer_type: A type of model's layer.
+        fw_info: A FrameworkInfo object.
     Returns: The name of the kernel attribute.
     """
-    return get_fw_info().get_kernel_op_attribute(layer_type)
+    kernel_attribute = fw_info.get_kernel_op_attributes(layer_type)
+    if len(kernel_attribute) != 1:
+        Logger.critical(  # pragma: no cover
+            f"In GPTQ training, only the kernel weights attribute should be trained. "
+            f"However, the number of kernel attributes is {len(kernel_attribute)}.")
+    return kernel_attribute[0]

model_compression_toolkit/gptq/common/gptq_training.py CHANGED Viewed

@@ -44,6 +44,7 @@ class GPTQTrainer(ABC):
                  graph_quant: Graph,
                  gptq_config: GradientPTQConfig,
                  fw_impl: GPTQFrameworkImplemantation,
+                 fw_info: FrameworkInfo,
                  representative_data_gen_fn: Callable[[], Generator],
                  hessian_info_service: HessianInfoService = None):
         """
@@ -57,6 +58,7 @@ class GPTQTrainer(ABC):
             graph_quant: Graph to build a quantized networks from.
             gptq_config: GradientPTQConfig with parameters about the tuning process.
             fw_impl: Framework implementation
+            fw_info: Framework information
             representative_data_gen_fn: factory for representative data generator.
             hessian_info_service: HessianInfoService for fetching and computing Hessian-approximation information.
         """
@@ -64,6 +66,7 @@ class GPTQTrainer(ABC):
         self.graph_quant = copy.deepcopy(graph_quant)
         self.gptq_config = gptq_config
         self.fw_impl = fw_impl
+        self.fw_info = fw_info
         self.representative_data_gen_fn = representative_data_gen_fn
         def _get_total_grad_steps():
@@ -80,7 +83,8 @@ class GPTQTrainer(ABC):
         self.float_model, self.float_user_info = fw_impl.model_builder(self.graph_float,
                                                                        mode=ModelBuilderMode.FLOAT,
-                                                                       append2output=self.compare_points)
+                                                                       append2output=self.compare_points,
+                                                                       fw_info=self.fw_info)
         self.fxp_model, self.gptq_user_info = self.build_gptq_model()
         if self.gptq_config.hessian_weights_config:
@@ -284,6 +288,7 @@ def gptq_training(graph_float: Graph,
                   gptq_config: GradientPTQConfig,
                   representative_data_gen: Callable,
                   fw_impl: GPTQFrameworkImplemantation,
+                  fw_info: FrameworkInfo,
                   hessian_info_service: HessianInfoService = None) -> Graph:
     """
     GPTQ training process using knowledge distillation with a teacher network (float model) and a student network (quantized model).
@@ -293,6 +298,7 @@ def gptq_training(graph_float: Graph,
         gptq_config: GradientPTQConfig with parameters about the tuning process.
         representative_data_gen: Dataset to use for inputs of the models.
         fw_impl: Framework implementation
+        fw_info: Framework information
         hessian_info_service: HessianInfoService to fetch information based on the Hessian approximation.
     Returns:
@@ -306,6 +312,7 @@ def gptq_training(graph_float: Graph,
                                     graph_quant,
                                     gptq_config,
                                     fw_impl,
+                                    fw_info,
                                     representative_data_gen,
                                     hessian_info_service=hessian_info_service)

model_compression_toolkit/gptq/keras/gptq_training.py CHANGED Viewed

@@ -65,6 +65,7 @@ class KerasGPTQTrainer(GPTQTrainer):
                  graph_quant: Graph,
                  gptq_config: GradientPTQConfig,
                  fw_impl: FrameworkImplementation,
+                 fw_info: FrameworkInfo,
                  representative_data_gen: Callable,
                  hessian_info_service: HessianInfoService = None):
         """
@@ -78,6 +79,7 @@ class KerasGPTQTrainer(GPTQTrainer):
             graph_quant: Graph to build a quantized networks from.
             gptq_config: GradientPTQConfig with parameters about the tuning process.
             fw_impl: FrameworkImplementation object with a specific framework methods implementation.
+            fw_info: Framework information.
             representative_data_gen: Dataset to use for inputs of the models.
             hessian_info_service: HessianScoresService for fetching and computing Hessian's approximation scores.
@@ -92,6 +94,7 @@ class KerasGPTQTrainer(GPTQTrainer):
                          graph_quant,
                          gptq_config,
                          fw_impl,
+                         fw_info,
                          representative_data_gen_fn=representative_data_gen,
                          hessian_info_service=hessian_info_service)
@@ -207,7 +210,8 @@ class KerasGPTQTrainer(GPTQTrainer):
         Returns:
             A boolean whether the layer is to be wrapped with a QuantizeWrapper
         """
-        return node.kernel_attr is not None and node.is_weights_quantization_enabled(node.kernel_attr)
+        kernel_attr = self.fw_info.get_kernel_op_attributes(node.type)[0]
+        return kernel_attr is not None and node.is_weights_quantization_enabled(kernel_attr)
     def gptq_wrapper(self,
                      n: common.BaseNode,
@@ -226,7 +230,7 @@ class KerasGPTQTrainer(GPTQTrainer):
             # If we are here, then the node has a kernel attribute to quantize and training during GPTQ
             weights_quantizers, _ = quantization_builder(n,
                                                          self.gptq_config,  # TODO: split quantizers building into two functions: for weights and activations
-                                                         n.kernel_attr)
+                                                         self.fw_info.get_kernel_op_attributes(n.type)[0])
             if len(weights_quantizers) > 0:
                 return KerasTrainableQuantizationWrapper(layer,
                                                          weights_quantizers=weights_quantizers)
@@ -267,6 +271,7 @@ class KerasGPTQTrainer(GPTQTrainer):
         gptq_model, gptq_user_info = KerasModelBuilder(graph=self.graph_quant,
                                                        append2output=self.compare_points,
+                                                       fw_info=self.fw_info,
                                                        return_float_outputs=True,
                                                        wrapper=self.gptq_wrapper,
                                                        get_activation_quantizer_holder_fn=self.get_activation_quantizer_holder).build_model()
@@ -426,7 +431,8 @@ class KerasGPTQTrainer(GPTQTrainer):
                     Logger.critical(f"Unable to update the GPTQ graph because the layer named '{layer.layer.name}' could not be found. "
                                     f"Verify that the layer names in the GPTQ model match those in the graph.")
                 node = node[0]
-                kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=node.type)
+                kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=node.type,
+                                                                      fw_info=self.fw_info)
                 # TODO: only kernel attributes are currently trained in GPTQ, so only the kernel weights need to be updated.
                 #  To enable GPTQ for other attributes, this code needs to be modified.
                 weights, weight_quant_config, activation_quant_config = \

model_compression_toolkit/gptq/keras/graph_info.py CHANGED Viewed

@@ -16,8 +16,8 @@
 import tensorflow as tf
 from typing import Tuple, List
 from model_compression_toolkit.core.keras.constants import USE_BIAS
-from model_compression_toolkit.core.common.framework_info import get_fw_info
 from tensorflow.keras.models import Model
+from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO
 from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribute_name_for_gptq
 from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper
@@ -44,7 +44,8 @@ def get_gptq_trainable_parameters(fxp_model: Model,
     for layer in fxp_model.layers:
         if isinstance(layer, KerasTrainableQuantizationWrapper):
-            kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=type(layer.layer))
+            kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=type(layer.layer),
+                                                                  fw_info=DEFAULT_KERAS_INFO)
             # collect trainable weights per quantizer
             if kernel_attribute not in layer.weights_quantizers:
@@ -56,8 +57,9 @@ def get_gptq_trainable_parameters(fxp_model: Model,
             trainable_threshold.extend(quantizer_trainable_threshold)
             if add_bias:
-                kernel_ops_attr = get_fw_info().get_kernel_op_attribute(type(layer.layer))
-                use_bias = kernel_ops_attr is not None and layer.layer.get_config().get(USE_BIAS)
+                kernel_ops_attrs = DEFAULT_KERAS_INFO.kernel_ops_attributes_mapping.get(type(layer.layer))
+                use_bias = kernel_ops_attrs is not None and kernel_ops_attrs[0] is not None \
+                           and layer.layer.get_config().get(USE_BIAS)
                 if use_bias is not None and use_bias and layer.layer.bias is not None:
                     bias_weights.append([layer.layer.bias])

model_compression_toolkit/gptq/keras/quantization_facade.py CHANGED Viewed

@@ -41,8 +41,9 @@ from model_compression_toolkit.metadata import create_model_metadata
 if FOUND_TF:
     import tensorflow as tf
-    from model_compression_toolkit.core.keras.default_framework_info import set_keras_info
+    from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO
     from model_compression_toolkit.gptq.keras.gptq_keras_implementation import GPTQKerasImplemantation
+    from model_compression_toolkit.core.keras.keras_model_validation import KerasModelValidation
     from tensorflow.keras.models import Model
     from model_compression_toolkit.gptq.keras.gptq_loss import GPTQMultipleTensorsLoss, sample_layer_attention_loss
     from model_compression_toolkit.target_platform_capabilities.constants import DEFAULT_TP_MODEL
@@ -151,7 +152,6 @@ if FOUND_TF:
                                  gradual_activation_quantization_config=gradual_quant_config)
-    @set_keras_info
     def keras_gradient_post_training_quantization(in_model: Model, representative_data_gen: Callable,
                                                   gptq_config: GradientPTQConfig,
                                                   gptq_representative_data_gen: Callable = None,
@@ -234,13 +234,16 @@ if FOUND_TF:
         if core_config.debug_config.bypass:
             return in_model, None
+        KerasModelValidation(model=in_model,
+                             fw_info=DEFAULT_KERAS_INFO).validate()
         if core_config.is_mixed_precision_enabled:
             if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfig):
                 Logger.critical("Given quantization config for mixed-precision is not of type 'MixedPrecisionQuantizationConfig'. "
                                 "Ensure usage of the correct API for keras_post_training_quantization "
                                 "or provide a valid mixed-precision configuration.")  # pragma: no cover
-        tb_w = init_tensorboard_writer()
+        tb_w = init_tensorboard_writer(DEFAULT_KERAS_INFO)
         fw_impl = GPTQKerasImplemantation()
@@ -254,6 +257,7 @@ if FOUND_TF:
         tg, bit_widths_config, hessian_info_service, scheduling_info = core_runner(in_model=in_model,
                                                                                    representative_data_gen=representative_data_gen,
                                                                                    core_config=core_config,
+                                                                                   fw_info=DEFAULT_KERAS_INFO,
                                                                                    fw_impl=fw_impl,
                                                                                    fqc=framework_platform_capabilities,
                                                                                    target_resource_utilization=target_resource_utilization,
@@ -267,6 +271,7 @@ if FOUND_TF:
                               gptq_config,
                               representative_data_gen,
                               gptq_representative_data_gen if gptq_representative_data_gen else representative_data_gen,
+                              DEFAULT_KERAS_INFO,
                               fw_impl,
                               tb_w,
                               hessian_info_service=hessian_info_service)
@@ -278,7 +283,8 @@ if FOUND_TF:
                                         tb_w,
                                         float_graph,
                                         tg_gptq,
-                                        fw_impl)
+                                        fw_impl,
+                                        DEFAULT_KERAS_INFO)
         exportable_model, user_info = get_exportable_keras_model(tg_gptq)
         if framework_platform_capabilities.tpc.add_metadata:

model_compression_toolkit/gptq/keras/quantizer/soft_rounding/soft_quantizer_reg.py CHANGED Viewed

@@ -17,6 +17,7 @@ from typing import List, Callable
 import tensorflow as tf
 from keras import Model
+from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO
 from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribute_name_for_gptq
 from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper
@@ -65,7 +66,8 @@ class SoftQuantizerRegularization:
         # Compute the regularization term without concatenating
         for i, layer in enumerate(layers):
-            kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=type(layer.layer))
+            kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=type(layer.layer),
+                                                                  fw_info=DEFAULT_KERAS_INFO)
             st = layer.weights_quantizers[kernel_attribute].get_soft_targets()

model_compression_toolkit/gptq/pytorch/gptq_training.py CHANGED Viewed

@@ -54,6 +54,7 @@ class PytorchGPTQTrainer(GPTQTrainer):
                  graph_quant: Graph,
                  gptq_config: GradientPTQConfig,
                  fw_impl: FrameworkImplementation,
+                 fw_info: FrameworkInfo,
                  representative_data_gen: Callable,
                  hessian_info_service: HessianInfoService = None):
         """
@@ -67,6 +68,7 @@ class PytorchGPTQTrainer(GPTQTrainer):
             graph_quant: Graph to build a quantized networks from.
             gptq_config: GradientPTQConfigV2 with parameters about the tuning process.
             fw_impl: FrameworkImplementation object with a specific framework methods implementation.
+            fw_info: Framework information
             representative_data_gen: Dataset to use for inputs of the models.
             hessian_info_service: HessianInfoService to fetch info based on the hessian approximation of the float model.
         """
@@ -79,6 +81,7 @@ class PytorchGPTQTrainer(GPTQTrainer):
                          graph_quant,
                          gptq_config,
                          fw_impl,
+                         fw_info,
                          representative_data_gen_fn=representative_data_gen,
                          hessian_info_service=hessian_info_service)
@@ -164,7 +167,8 @@ class PytorchGPTQTrainer(GPTQTrainer):
             A boolean whether the layer is to be wrapped with a Quantization Wrapper.
         """
-        return node.kernel_attr is not None and node.is_weights_quantization_enabled(node.kernel_attr)
+        kernel_attr = self.fw_info.get_kernel_op_attributes(node.type)[0]
+        return kernel_attr is not None and node.is_weights_quantization_enabled(kernel_attr)
     def gptq_wrapper(self,
                      n: BaseNode,
@@ -183,7 +187,7 @@ class PytorchGPTQTrainer(GPTQTrainer):
             # If we are here, then the node has a kernel attribute to quantize and training during GPTQ
             weights_quantizers, _ = quantization_builder(n,
                                                          self.gptq_config,
-                                                         n.kernel_attr)
+                                                         self.fw_info.get_kernel_op_attributes(n.type)[0])
             if len(weights_quantizers) > 0:
                 return PytorchQuantizationWrapper(layer,
@@ -220,6 +224,7 @@ class PytorchGPTQTrainer(GPTQTrainer):
         """
         gptq_model, gptq_user_info = PyTorchModelBuilder(graph=self.graph_quant,
                                                          append2output=self.compare_points,
+                                                         fw_info=self.fw_info,
                                                          wrapper=self.gptq_wrapper,
                                                          return_float_outputs=True,
                                                          get_activation_quantizer_holder_fn=self.get_activation_quantizer_holder).build_model()
@@ -335,7 +340,8 @@ class PytorchGPTQTrainer(GPTQTrainer):
                     Logger.critical(f"Cannot update GPTQ graph: Layer with name '{name}' is missing or not unique. "
                                     f"Ensure each layer has a unique name and exists within the graph for updates.")
                 node = node[0]
-                kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=node.type)
+                kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=node.type,
+                                                                      fw_info=self.fw_info)
                 # TODO: only kernel attributes are currently trained in GPTQ, so only the kernel weights need to be updated.
                 #  To enable GPTQ for other attributes, this code needs to be modified.
                 weights, weight_quant_config, activation_quant_config = \

model_compression_toolkit/gptq/pytorch/graph_info.py CHANGED Viewed

@@ -16,6 +16,7 @@ import torch
 import torch.nn as nn
 from typing import List
 from model_compression_toolkit.core.pytorch.constants import BIAS
+from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
 from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribute_name_for_gptq
 from model_compression_toolkit.logger import Logger
 from mct_quantizers import PytorchQuantizationWrapper
@@ -42,7 +43,8 @@ def get_gptq_trainable_parameters(fxp_model: nn.Module,
     for layer in fxp_model.modules():
         if isinstance(layer, PytorchQuantizationWrapper):
-            kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=type(layer.layer))
+            kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=type(layer.layer),
+                                                                  fw_info=DEFAULT_PYTORCH_INFO)
             # collect trainable weights per quantizer
             if kernel_attribute not in layer.weights_quantizers:

model_compression_toolkit/gptq/pytorch/quantization_facade.py CHANGED Viewed

@@ -39,7 +39,7 @@ from model_compression_toolkit.verify_packages import FOUND_TORCH
 if FOUND_TORCH:
-    from model_compression_toolkit.core.pytorch.default_framework_info import set_pytorch_info
+    from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
     from model_compression_toolkit.gptq.pytorch.gptq_pytorch_implementation import GPTQPytorchImplemantation
     from model_compression_toolkit.target_platform_capabilities.constants import DEFAULT_TP_MODEL
     from model_compression_toolkit.gptq.pytorch.gptq_loss import multiple_tensors_mse_loss, sample_layer_attention_loss
@@ -142,8 +142,6 @@ if FOUND_TORCH:
                                  gradual_activation_quantization_config=gradual_quant_config,
                                  log_function=log_function)
-    @set_pytorch_info
     def pytorch_gradient_post_training_quantization(model: Module,
                                                     representative_data_gen: Callable,
                                                     target_resource_utilization: ResourceUtilization = None,
@@ -218,7 +216,8 @@ if FOUND_TORCH:
                 Logger.critical("Given quantization config for mixed-precision is not of type 'MixedPrecisionQuantizationConfig'. "
                                 "Ensure usage of the correct API for 'pytorch_gradient_post_training_quantization' "
                                 "or provide a valid mixed-precision configuration.")
-        tb_w = init_tensorboard_writer()
+        tb_w = init_tensorboard_writer(DEFAULT_PYTORCH_INFO)
         fw_impl = GPTQPytorchImplemantation()
@@ -234,6 +233,7 @@ if FOUND_TORCH:
         graph, bit_widths_config, hessian_info_service, scheduling_info = core_runner(in_model=model,
                                                                                       representative_data_gen=representative_data_gen,
                                                                                       core_config=core_config,
+                                                                                      fw_info=DEFAULT_PYTORCH_INFO,
                                                                                       fw_impl=fw_impl,
                                                                                       fqc=framework_quantization_capabilities,
                                                                                       target_resource_utilization=target_resource_utilization,
@@ -250,6 +250,7 @@ if FOUND_TORCH:
                                  gptq_config,
                                  representative_data_gen,
                                  gptq_representative_data_gen if gptq_representative_data_gen else representative_data_gen,
+                                 DEFAULT_PYTORCH_INFO,
                                  fw_impl,
                                  tb_w,
                                  hessian_info_service=hessian_info_service)
@@ -259,7 +260,8 @@ if FOUND_TORCH:
                                         tb_w,
                                         float_graph,
                                         graph_gptq,
-                                        fw_impl)
+                                        fw_impl,
+                                        DEFAULT_PYTORCH_INFO)
         exportable_model, user_info = get_exportable_pytorch_model(graph_gptq)
         if framework_quantization_capabilities.tpc.add_metadata:

model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/soft_quantizer_reg.py CHANGED Viewed

@@ -18,6 +18,7 @@ import torch
 from torch import nn
 from mct_quantizers import PytorchQuantizationWrapper
+from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
 from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribute_name_for_gptq
@@ -60,7 +61,8 @@ class SoftQuantizerRegularization:
         b = self.beta_scheduler(self.count_iter)
         reg = 0
         for layer, w in zip(layers, layer_weights):
-            kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=type(layer.layer))
+            kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=type(layer.layer),
+                                                                  fw_info=DEFAULT_PYTORCH_INFO)
             st = layer.weights_quantizers[kernel_attribute].get_soft_targets()
             soft_loss = (1 - torch.pow(torch.abs(st - .5) * 2, b)).sum()

mct-nightly 2.4.0.20250924.535__py3-none-any.whl → 2.4.2.20250926.532__py3-none-any.whl

mct-nightly 2.4.0.20250924.535py3-none-any.whl → 2.4.2.20250926.532py3-none-any.whl