PyPI - mct-nightly - Versions diffs - 2.2.0.20241204.524__py3-none-any.whl → 2.2.0.20241206.524__py3-none-any.whl - Mend

mct-nightly 2.2.0.20241204.524py3-none-any.whl → 2.2.0.20241206.524py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{mct_nightly-2.2.0.20241204.524.dist-info → mct_nightly-2.2.0.20241206.524.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mct-nightly
-Version: 2.2.0.20241204.524
+Version: 2.2.0.20241206.524
 Summary: A Model Compression Toolkit for neural networks
 Home-page: UNKNOWN
 License: UNKNOWN

{mct_nightly-2.2.0.20241204.524.dist-info → mct_nightly-2.2.0.20241206.524.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-model_compression_toolkit/__init__.py,sha256=CwUJhq62PjrcRJgkwwmu5dArRV2bz7lgnxc2ebnm840,1573
+model_compression_toolkit/__init__.py,sha256=KhP8R07jwQig7PMnV7NExSRFSjG_rAbMcGhuL8koQWc,1573
 model_compression_toolkit/constants.py,sha256=i_R6uXBfO1ph_X6DNJych2x59SUojfJbn7dNjs_mZnc,3846
 model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
 model_compression_toolkit/logger.py,sha256=3DByV41XHRR3kLTJNbpaMmikL8icd9e1N-nkQAY9oDk,4567
@@ -155,7 +155,7 @@ model_compression_toolkit/core/common/visualization/tensorboard_writer.py,sha256
 model_compression_toolkit/core/keras/__init__.py,sha256=mjbqLD-KcG3eNeCYpu1GBS7VclGVOQ63x2p6mAAuba4,698
 model_compression_toolkit/core/keras/constants.py,sha256=dh4elQWt6Q6NYRht5k5RiiOcnLAq1v0MMBCJqMJzzFk,3225
 model_compression_toolkit/core/keras/custom_layer_validation.py,sha256=f-b14wuiIgitBe7d0MmofYhDCTO3IhwJgwrh-Hq_t_U,1192
-model_compression_toolkit/core/keras/data_util.py,sha256=JdomIJZfep0QYPtx2jlg0xJ40cd9S_I7BakaWQi0wKw,2681
+model_compression_toolkit/core/keras/data_util.py,sha256=sTEuHUrT8S3CpeAEG0XDlYA0bWZKISGPilObPlO0TA8,6833
 model_compression_toolkit/core/keras/default_framework_info.py,sha256=PYcER89eEXjKtR0T7-2Y4f7cckqoD5OQbpHePoRkMec,5030
 model_compression_toolkit/core/keras/keras_implementation.py,sha256=HwbIR7x4t-TBNbWHVvVNFk8z-KFt6zM0LWAUXQuNZrk,31753
 model_compression_toolkit/core/keras/keras_model_validation.py,sha256=1wNV2clFdC9BzIELRLSO2uKf0xqjLqlkTJudwtCeaJk,1722
@@ -193,9 +193,9 @@ model_compression_toolkit/core/keras/graph_substitutions/substitutions/softmax_s
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/virtual_activation_weights_composition.py,sha256=wH9ocMLL725-uUPU-zCxdd8NwT5nyd0ZShmI7iuTwF8,1462
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/weights_activation_split.py,sha256=rjIheZW7LbSPv9bzMSmC8wl6UUxaTkd4J2IHinObT-Y,1814
 model_compression_toolkit/core/keras/hessian/__init__.py,sha256=lNJ29DYxaLUPDstRDA1PGI5r9Fulq_hvrZMlhst1Z5g,697
-model_compression_toolkit/core/keras/hessian/activation_hessian_scores_calculator_keras.py,sha256=p0eM-EO5ltXYjSkd7B3h9BWBcuRZvjxEcA8WaNvdyqc,8901
-model_compression_toolkit/core/keras/hessian/hessian_scores_calculator_keras.py,sha256=Cep-bQEwLyqLYfLxM0ByOQd_oAIT-uXjr3dFUd8T9CY,3954
-model_compression_toolkit/core/keras/hessian/weights_hessian_scores_calculator_keras.py,sha256=970C-8J4HtUalNWvZAKlWFZVfw5r6SBdt5RQU_mZ7M0,12261
+model_compression_toolkit/core/keras/hessian/activation_hessian_scores_calculator_keras.py,sha256=qGEyOzC1_NIcnBmvvjA-GT7o9-PWo0Ko66vcEyLixhw,9180
+model_compression_toolkit/core/keras/hessian/hessian_scores_calculator_keras.py,sha256=1o7X9GXSfpEmuB5ee2AaBQ2sN2xzX4-smbrq_0qOGRU,4454
+model_compression_toolkit/core/keras/hessian/weights_hessian_scores_calculator_keras.py,sha256=Rl6NNGkHMV0ioEM5bbM4XX7yHDqG6mMp4ifN2VQBDxE,12168
 model_compression_toolkit/core/keras/mixed_precision/__init__.py,sha256=sw7LOPN1bM82o3SkMaklyH0jw-TLGK0-fl2Wq73rffI,697
 model_compression_toolkit/core/keras/mixed_precision/configurable_activation_quantizer.py,sha256=aW8wR13fK6P6xzbU9XGU60IO1yYzXSo_Hk4qeq486kg,5137
 model_compression_toolkit/core/keras/mixed_precision/configurable_weights_quantizer.py,sha256=Ziydik2j-LvNBXP3TSfUD6rEezPAikzQGib0_IXkmGM,6729
@@ -355,21 +355,21 @@ model_compression_toolkit/gptq/common/gptq_config.py,sha256=QwSEZZlC6OpnpoBQoAFf
 model_compression_toolkit/gptq/common/gptq_constants.py,sha256=8HB0yiX75zZ1IKgQUPWpFCM5sS8HAqslws5XrOhxJQ0,750
 model_compression_toolkit/gptq/common/gptq_framework_implementation.py,sha256=n3mSf4J92kFjekzyGyrJULylI-8Jf5OVWJ5AFoVnEx0,1266
 model_compression_toolkit/gptq/common/gptq_graph.py,sha256=-bL5HhPcKqV8nj4dZPXc5QmQJbFBel6etrioikP0tEo,3039
-model_compression_toolkit/gptq/common/gptq_training.py,sha256=EnG-17U6kGDgTeMkOJQmRoMs0KUldROss683_Bo5oHQ,13249
+model_compression_toolkit/gptq/common/gptq_training.py,sha256=vvrQH1MIW3w90yt9VKBW6jTMXkBrDY82JmCDwT8Kve8,17002
 model_compression_toolkit/gptq/common/gradual_activation_quantization.py,sha256=EgpzMs_aDoB0wQiTagqvcxCTfrgNUuCfdXEXmfNiyb0,3780
 model_compression_toolkit/gptq/common/regularization_factory.py,sha256=hyunpXepVeHyoAFJw6zNLK-3ZHBmiut3lmNisJN_L3E,2514
 model_compression_toolkit/gptq/keras/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
 model_compression_toolkit/gptq/keras/gptq_keras_implementation.py,sha256=axBwnCSjq5xk-xGymOwSOqjp39It-CVtGcCTRTf0E_4,1248
-model_compression_toolkit/gptq/keras/gptq_loss.py,sha256=rbRkF15MYd6nq4G49kcjb_dPTa-XNq9cTkrb93mXawo,6241
-model_compression_toolkit/gptq/keras/gptq_training.py,sha256=yBiAod9hbzh2bp4xhVO5szmtCHm6bLUa7-kjUVVwo40,20845
-model_compression_toolkit/gptq/keras/graph_info.py,sha256=MKIfrRTRH3zCuxCR1g9ZVIFyuSSr0e0sDybqh4LDM7E,4672
-model_compression_toolkit/gptq/keras/quantization_facade.py,sha256=e3O835Ol5ML0XuqNsCmoTbnnfs-gEgrSGT1ijUZLX7Q,17102
+model_compression_toolkit/gptq/keras/gptq_loss.py,sha256=2hzWzsbuVd5XcL85NM57YeOyHxRY0qMArKn8NvQ1UWw,7643
+model_compression_toolkit/gptq/keras/gptq_training.py,sha256=0WGiP7Gs4xX3FBs1PNaZ7w3hWRigwQXqYjBrs_-x32o,23241
+model_compression_toolkit/gptq/keras/graph_info.py,sha256=zwoeHX67nJJ5-zYLjzvMXS9TLsy9BsizARbZiDVjVSA,4473
+model_compression_toolkit/gptq/keras/quantization_facade.py,sha256=PO-tNoCoWQpXgefVxqxBfAQ29kGe_DFBgiOQ2DLYato,18005
 model_compression_toolkit/gptq/keras/quantizer/__init__.py,sha256=-DK1CDXvlsnEbki4lukZLpl6Xrbo91_jcqxXlG5Eg6Q,963
 model_compression_toolkit/gptq/keras/quantizer/base_keras_gptq_quantizer.py,sha256=Rbl9urzkmACvVxICSEyJ02qFOBxWK0UQWtysFJzBVZw,4899
 model_compression_toolkit/gptq/keras/quantizer/quant_utils.py,sha256=Vt7Qb8i4JsE4sFtcjpfM4FTXTtfV1t6SwfoNH8a_Iaw,5055
 model_compression_toolkit/gptq/keras/quantizer/quantization_builder.py,sha256=rst-u5EB9Xss4ndKqi297WvZ-9RVee2TAUVFelPVKhU,4663
 model_compression_toolkit/gptq/keras/quantizer/soft_rounding/__init__.py,sha256=huHoBUcKNB6BnY6YaUCcFvdyBtBI172ZoUD8ZYeNc6o,696
-model_compression_toolkit/gptq/keras/quantizer/soft_rounding/soft_quantizer_reg.py,sha256=REO-pIXpT4ZuJzhizvQjz6vn7Vxnq7k0KvikuQ4FDkE,2769
+model_compression_toolkit/gptq/keras/quantizer/soft_rounding/soft_quantizer_reg.py,sha256=UTvEL5hN2cEsMwiGBDbpcE0kQr32VFKwlJBWlDg8HNA,3271
 model_compression_toolkit/gptq/keras/quantizer/soft_rounding/symmetric_soft_quantizer.py,sha256=BBSDWLmeywjSM5N6oJkMgcuo7zrXTesB4zLwRGG8QB0,12159
 model_compression_toolkit/gptq/keras/quantizer/soft_rounding/uniform_soft_quantizer.py,sha256=pyhlVpoauHM-zuixHsIGPHFgQoXppL8TlDFCjPE2RuY,10377
 model_compression_toolkit/gptq/keras/quantizer/ste_rounding/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
@@ -377,7 +377,7 @@ model_compression_toolkit/gptq/keras/quantizer/ste_rounding/symmetric_ste.py,sha
 model_compression_toolkit/gptq/pytorch/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
 model_compression_toolkit/gptq/pytorch/gptq_loss.py,sha256=_07Zx_43bnNokwR5S8phIqeu5-_7_5VBT4DT-FCw7Do,3892
 model_compression_toolkit/gptq/pytorch/gptq_pytorch_implementation.py,sha256=tECPTavxn8EEwgLaP2zvxdJH6Vg9jC0YOIMJ7857Sdc,1268
-model_compression_toolkit/gptq/pytorch/gptq_training.py,sha256=iuZJcoG2w-7qjWGntXWTdU2XUuMPy5IwzZbiolThuI4,22145
+model_compression_toolkit/gptq/pytorch/gptq_training.py,sha256=WtehnyiYXdUXf8-uNpV0mdsalF7YF7eKnL7tcFrzZoE,19549
 model_compression_toolkit/gptq/pytorch/graph_info.py,sha256=4mVM-VvnBaA64ACVdOe6wTGHdMSa2UTLIUe7nACLcdo,4008
 model_compression_toolkit/gptq/pytorch/quantization_facade.py,sha256=hZFU_ZY-LYcpRZyzzX7NsJievkIYKGdkgBzEoB4rsRQ,16020
 model_compression_toolkit/gptq/pytorch/quantizer/__init__.py,sha256=ZHNHo1yzye44m9_ht4UUZfTpK01RiVR3Tr74-vtnOGI,968
@@ -559,8 +559,8 @@ model_compression_toolkit/xquant/pytorch/model_analyzer.py,sha256=b93o800yVB3Z-i
 model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py,sha256=bOc-hFL3gdoSM1Th_S2N_-9JJSlPGpZCTx_QLJHS6lg,3388
 model_compression_toolkit/xquant/pytorch/similarity_functions.py,sha256=CERxq5K8rqaiE-DlwhZBTUd9x69dtYJlkHOPLB54vm8,2354
 model_compression_toolkit/xquant/pytorch/tensorboard_utils.py,sha256=mkoEktLFFHtEKzzFRn_jCnxjhJolK12TZ5AQeDHzUO8,9767
-mct_nightly-2.2.0.20241204.524.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
-mct_nightly-2.2.0.20241204.524.dist-info/METADATA,sha256=O3ETKzNDjZGmSvp_WVmqIJz-jyk93WLG676QjyRsISs,26446
-mct_nightly-2.2.0.20241204.524.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-mct_nightly-2.2.0.20241204.524.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
-mct_nightly-2.2.0.20241204.524.dist-info/RECORD,,
+mct_nightly-2.2.0.20241206.524.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
+mct_nightly-2.2.0.20241206.524.dist-info/METADATA,sha256=Q-MSMJXd4He0d0RJ_jhEABCs2FgxB6vZIGjv24boOnw,26446
+mct_nightly-2.2.0.20241206.524.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+mct_nightly-2.2.0.20241206.524.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
+mct_nightly-2.2.0.20241206.524.dist-info/RECORD,,

model_compression_toolkit/__init__.py CHANGED Viewed

@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
 from model_compression_toolkit import pruning
 from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
-__version__ = "2.2.0.20241204.000524"
+__version__ = "2.2.0.20241206.000524"

model_compression_toolkit/core/keras/data_util.py CHANGED Viewed

@@ -18,6 +18,27 @@ import tensorflow as tf
 from model_compression_toolkit.core.keras.tf_tensor_numpy import to_tf_tensor
+import tensorflow as tf
+from typing import Callable, Generator, Sequence, Any
+def get_tensor_spec(item, ignore_batch_dim=False):
+    """
+    Get the TensorFlow TensorSpec for an item, optionally ignoring the first dimension.
+    Args:
+        item: The input item, which could be a tensor, tuple, or list.
+        ignore_batch_dim (bool): Whether to ignore the first dimension of the tensor shape.
+    Returns:
+        TensorSpec or a tuple of TensorSpecs.
+    """
+    if isinstance(item, (tuple, list)):
+        return tuple(get_tensor_spec(sub_item, ignore_batch_dim) for sub_item in item)
+    shape = item.shape[1:] if ignore_batch_dim else item.shape
+    return tf.TensorSpec(shape=shape, dtype=item.dtype)
 def flat_gen_fn(data_gen_fn: Callable[[], Generator]):
     """
@@ -29,39 +50,151 @@ def flat_gen_fn(data_gen_fn: Callable[[], Generator]):
     Returns:
         A factory for a flattened data generator.
     """
     def gen():
         for inputs_batch in data_gen_fn():
             for sample in zip(*inputs_batch):
-                yield to_tf_tensor(sample)
-    return gen
+                yield tuple([tf.convert_to_tensor(s) for s in sample])
+    return gen
-# TODO in tf dataset and dataloader are combined within tf.data.Dataset. For advanced use cases such as gptq sla we
-#  need to separate dataset from dataloader similarly to torch data_util.
 class TFDatasetFromGenerator:
-    def __init__(self, data_gen, batch_size):
-        inputs = next(data_gen())
-        if not isinstance(inputs, list):
-            raise TypeError(f'Representative data generator is expected to generate a list of tensors, '
-                            f'got {type(inputs)}')  # pragma: no cover
+    """
+    TensorFlow dataset from a data generator function, batched to a specified size.
+    """
+    def __init__(self, data_gen_fn: Callable[[], Generator]):
+        """
+        Args:
+            data_gen_fn: a factory function for data generator that yields lists of tensors.
+        """
+        inputs = next(data_gen_fn())
+        if not isinstance(inputs, list):
+            raise TypeError(f'Data generator is expected to yield a list of tensors, got {type(inputs)}')
         self.orig_batch_size = inputs[0].shape[0]
-        output_signature = tuple([tf.TensorSpec(shape=t.shape[1:], dtype=t.dtype) for t in inputs])
-        dataset = tf.data.Dataset.from_generator(flat_gen_fn(data_gen), output_signature=output_signature)
-        self.dataset = dataset.batch(batch_size)
         self._size = None
+        # TFDatasetFromGenerator flattens the dataset, thus we ignore the batch dimension
+        output_signature = get_tensor_spec(inputs, ignore_batch_dim=True)
+        self.dataset = tf.data.Dataset.from_generator(flat_gen_fn(data_gen_fn), output_signature=output_signature)
     def __iter__(self):
         return iter(self.dataset)
     def __len__(self):
         """ Returns the number of batches. """
         if self._size is None:
-            self._num_batches = sum(1 for _ in self)
-        return self._num_batches
+            self._size = sum(1 for _ in self.dataset)
+        return self._size
+class FixedTFDataset:
+    """
+    Fixed dataset containing samples from a generator, stored in memory.
+    """
+    def __init__(self, data_gen_fn: Callable[[], Generator], n_samples: int = None):
+        """
+        Args:
+            data_gen_fn: data generator function.
+            n_samples: number of samples to store in the dataset. If None, uses all samples in one pass.
+        """
+        inputs = next(data_gen_fn())
+        if not isinstance(inputs, list):
+            raise TypeError(f'Data generator is expected to yield a list of tensors, got {type(inputs)}')
+        self.orig_batch_size = inputs[0].shape[0]
+        samples = []
+        for batch in data_gen_fn():
+            samples.extend(zip(*[tf.convert_to_tensor(t) for t in batch]))
+            if n_samples is not None and len(samples) >= n_samples:
+                samples = samples[:n_samples]
+                break
+        if n_samples and len(samples) < n_samples:
+            raise ValueError(f'Not enough samples to create a dataset with {n_samples} samples')
+        self.samples = samples
+    def __len__(self):
+        return len(self.samples)
+    def __getitem__(self, index):
+        return self.samples[index]
+class FixedSampleInfoDataset:
+    """
+    Dataset for samples with additional info, each element is a tuple of (sample, sample_info).
+    """
+    def __init__(self, samples: Sequence, sample_info: Sequence):
+        if not all(len(info) == len(samples) for info in sample_info):
+            raise ValueError('Sample and additional info lengths must match')
+        self.samples = samples
+        self.sample_info = sample_info
+    def __len__(self):
+        return len(self.samples)
+    def __getitem__(self, index):
+        return self.samples[index], tuple([info[index] for info in self.sample_info])
+class IterableSampleWithConstInfoDataset:
+    """
+    Augments each sample in an iterable dataset with constant additional information.
+    """
+    def __init__(self, samples_dataset: tf.data.Dataset, *info: Any):
+        self.samples_dataset = samples_dataset
+        self.info = info
+    def __iter__(self):
+        for sample in self.samples_dataset:
+            yield (sample, *self.info)
+def data_gen_to_dataloader(data_gen_fn: Callable[[], Generator], batch_size: int):
+    """Create a DataLoader based on samples yielded by data_gen."""
+    ds = TFDatasetFromGenerator(data_gen_fn)
+    return create_tf_dataloader(dataset=ds, batch_size=batch_size)
+def create_tf_dataloader(dataset, batch_size, shuffle=False, collate_fn=None):
+    """
+    Creates a tf.data.Dataset with specified loading options.
+    Args:
+        dataset: The dataset container (e.g., FixedDatasetFromGenerator or FixedSampleInfoDataset).
+        batch_size: Number of samples per batch.
+        shuffle: Whether to shuffle the dataset.
+        collate_fn: A function to apply to each batch (e.g., add extra outputs like regularization weights).
+    Returns:
+        tf.data.Dataset: Configured for batching, shuffling, and custom transformations.
+    """
+    def generator():
+        for item in dataset:
+            yield item
+    dummy_input_tensors = next(generator())
+    output_signature = get_tensor_spec(dummy_input_tensors)
+    tf_dataset = tf.data.Dataset.from_generator(
+        generator,
+        output_signature=output_signature
+    )
+    if shuffle:
+        tf_dataset = tf_dataset.shuffle(buffer_size=len(dataset))
+    tf_dataset = tf_dataset.batch(batch_size)
+    # Apply collate function if provided
+    if collate_fn:
+        tf_dataset = tf_dataset.map(lambda *args: collate_fn(args))
-def data_gen_to_dataloader(data_gen_fn: Callable[[], Generator], batch_size) -> TFDatasetFromGenerator:
-    """ Create DataLoader based on samples yielded by data_gen. """
-    return TFDatasetFromGenerator(data_gen_fn, batch_size)
+    return tf_dataset

model_compression_toolkit/core/keras/hessian/activation_hessian_scores_calculator_keras.py CHANGED Viewed

@@ -60,96 +60,103 @@ class ActivationHessianScoresCalculatorKeras(HessianScoresCalculatorKeras):
         Returns:
             List[np.ndarray]: Scores based on the Hessian-approximation for the requested nodes.
         """
-        if self.hessian_request.granularity == HessianScoresGranularity.PER_TENSOR:
-            model_output_nodes = [ot.node for ot in self.graph.get_outputs()]
-            if len([n for n in self.hessian_request.target_nodes if n in model_output_nodes]) > 0:
-                Logger.critical("Trying to compute activation Hessian approximation with respect to the model output. "
-                                "This operation is not supported. "
-                                "Remove the output node from the set of node targets in the Hessian request.")
-            grad_model_outputs = self.hessian_request.target_nodes + model_output_nodes
-            # Building a model to run Hessian approximation on
-            model, _ = FloatKerasModelBuilder(graph=self.graph, append2output=grad_model_outputs).build_model()
-            # Record operations for automatic differentiation
-            with tf.GradientTape(persistent=True, watch_accessed_variables=False) as g:
-                g.watch(self.input_images)
-                if len(self.input_images) > 1:
-                    outputs = model(self.input_images)
-                else:
-                    outputs = model(*self.input_images)
-                if len(outputs) != len(grad_model_outputs):  # pragma: no cover
-                    Logger.critical(
-                        f"Model for computing activation Hessian approximation expects {len(grad_model_outputs)} "
-                        f"outputs, but got {len(outputs)} output tensors.")
-                # Extracting the intermediate activation tensors and the model real output.
-                # Note that we do not allow computing Hessian for output nodes, so there shouldn't be an overlap.
-                num_target_nodes = len(self.hessian_request.target_nodes)
-                # Extract activation tensors of nodes for which we want to compute Hessian
-                target_activation_tensors = outputs[:num_target_nodes]
-                # Extract the model outputs
-                output_tensors = outputs[num_target_nodes:]
-                # Unfold and concatenate all outputs to form a single tensor
-                output = self._concat_tensors(output_tensors)
-                # List to store the Hessian-approximation scores for each interest point
-                ipts_hessian_approximations = [tf.Variable([0.0], dtype=tf.float32, trainable=True)
-                                               for _ in range(len(target_activation_tensors))]
-                # Loop through each interest point activation tensor
-                prev_mean_results = None
-                for j in tqdm(range(self.num_iterations_for_approximation)):  # Approximation iterations
-                    # Getting a random vector with normal distribution
-                    v = tf.random.normal(shape=output.shape, dtype=output.dtype)
-                    f_v = tf.reduce_sum(v * output)
-                    for i, ipt in enumerate(target_activation_tensors):  # Per Interest point activation tensor
-                        interest_point_scores = []  # List to store scores for each interest point
-                        with g.stop_recording():
-                            # Computing the approximation by getting the gradient of (output * v)
-                            hess_v = g.gradient(f_v, ipt)
-                            if hess_v is None:
-                                # In case we have an output node, which is an interest point, but it is not
-                                # differentiable, we consider its Hessian to be the initial value 0.
-                                continue  # pragma: no cover
+        model_output_nodes = [ot.node for ot in self.graph.get_outputs()]
+        if len([n for n in self.hessian_request.target_nodes if n in model_output_nodes]) > 0:
+            Logger.critical("Trying to compute activation Hessian approximation with respect to the model output. "
+                            "This operation is not supported. "
+                            "Remove the output node from the set of node targets in the Hessian request.")
+        grad_model_outputs = self.hessian_request.target_nodes + model_output_nodes
+        # Building a model to run Hessian approximation on
+        model, _ = FloatKerasModelBuilder(graph=self.graph, append2output=grad_model_outputs).build_model()
+        # Record operations for automatic differentiation
+        with tf.GradientTape(persistent=True, watch_accessed_variables=False) as g:
+            g.watch(self.input_images)
+            if len(self.input_images) > 1:
+                outputs = model(self.input_images)
+            else:
+                outputs = model(*self.input_images)
+            if len(outputs) != len(grad_model_outputs):  # pragma: no cover
+                Logger.critical(
+                    f"Model for computing activation Hessian approximation expects {len(grad_model_outputs)} "
+                    f"outputs, but got {len(outputs)} output tensors.")
+            # Extracting the intermediate activation tensors and the model real output.
+            # Note that we do not allow computing Hessian for output nodes, so there shouldn't be an overlap.
+            num_target_nodes = len(self.hessian_request.target_nodes)
+            # Extract activation tensors of nodes for which we want to compute Hessian
+            target_activation_tensors = outputs[:num_target_nodes]
+            # Extract the model outputs
+            output_tensors = outputs[num_target_nodes:]
+            # Unfold and concatenate all outputs to form a single tensor
+            output = self._concat_tensors(output_tensors)
+            # List to store the Hessian-approximation scores for each interest point
+            ipts_hessian_approximations = [tf.Variable([0.0], dtype=tf.float32, trainable=True)
+                                           for _ in range(len(target_activation_tensors))]
+            # Loop through each interest point activation tensor
+            prev_mean_results = None
+            for j in tqdm(range(self.num_iterations_for_approximation)):  # Approximation iterations
+                # Generate random tensor of 1s and -1s
+                v = self._generate_random_vectors_batch(output.shape)
+                f_v = tf.reduce_sum(v * output)
+                for i, ipt in enumerate(target_activation_tensors):  # Per Interest point activation tensor
+                    interest_point_scores = []  # List to store scores for each interest point
+                    with g.stop_recording():
+                        # Computing the approximation by getting the gradient of (output * v)
+                        hess_v = g.gradient(f_v, ipt)
+                        if hess_v is None:
+                            # In case we have an output node, which is an interest point, but it is not
+                            # differentiable, we consider its Hessian to be the initial value 0.
+                            continue  # pragma: no cover
+                        if self.hessian_request.granularity == HessianScoresGranularity.PER_TENSOR:
                             # Mean over all dims but the batch (CXHXW for conv)
                             hessian_approx = tf.reduce_sum(hess_v ** 2.0,
                                                            axis=tuple(d for d in range(1, len(hess_v.shape))))
-                            # Free gradients
-                            del hess_v
-                            # Update node Hessian approximation mean over random iterations
-                            ipts_hessian_approximations[i] = (j * ipts_hessian_approximations[i] + hessian_approx) / (j + 1)
-                    # If the change to the mean approximation is insignificant (to all outputs)
-                    # we stop the calculation.
-                    if j > MIN_HESSIAN_ITER:
-                        if prev_mean_results is not None:
-                            new_mean_res = tf.reduce_mean(tf.stack(ipts_hessian_approximations), axis=1)
-                            relative_delta_per_node = (tf.abs(new_mean_res - prev_mean_results) /
-                                                       (tf.abs(new_mean_res) + 1e-6))
-                            max_delta = tf.reduce_max(relative_delta_per_node)
-                            if max_delta < HESSIAN_COMP_TOLERANCE:
-                                break
+                        elif self.hessian_request.granularity == HessianScoresGranularity.PER_ELEMENT:
+                            hessian_approx = hess_v ** 2
+                        elif self.hessian_request.granularity == HessianScoresGranularity.PER_OUTPUT_CHANNEL:
+                            axes_to_sum = tuple(d for d in range(1, len(hess_v.shape)-1))
+                            hessian_approx = tf.reduce_sum(hess_v ** 2.0, axis=axes_to_sum)
+                        else:  # pragma: no cover
+                            Logger.critical(f"{self.hessian_request.granularity} "
+                                            f"is not supported for Keras activation hessian\'s approximation scores calculator.")
+                        # Free gradients
+                        del hess_v
+                        # Update node Hessian approximation mean over random iterations
+                        ipts_hessian_approximations[i] = (j * ipts_hessian_approximations[i] + hessian_approx) / (j + 1)
+                # If the change to the mean approximation is insignificant (to all outputs)
+                # we stop the calculation.
+                if j > MIN_HESSIAN_ITER and prev_mean_results is not None:
+                    new_mean_res = tf.reduce_mean(tf.stack(ipts_hessian_approximations), axis=1)
+                    relative_delta_per_node = (tf.abs(new_mean_res - prev_mean_results) /
+                                               (tf.abs(new_mean_res) + 1e-6))
+                    max_delta = tf.reduce_max(relative_delta_per_node)
+                    if max_delta < HESSIAN_COMP_TOLERANCE:
+                        break
+                if self.hessian_request.granularity == HessianScoresGranularity.PER_TENSOR:
                     prev_mean_results = tf.reduce_mean(tf.stack(ipts_hessian_approximations), axis=1)
-                # Convert results to list of numpy arrays
-                hessian_results = [h.numpy() for h in ipts_hessian_approximations]
-                # Extend the Hessian tensors shape to align with expected return type
-                # TODO: currently, only per-tensor Hessian is available for activation.
-                #  Once implementing per-channel or per-element, this alignment needs to be verified and handled separately.
-                hessian_results = [h[..., np.newaxis] for h in hessian_results]
+            # Convert results to list of numpy arrays
+            hessian_results = [h.numpy() for h in ipts_hessian_approximations]
+            # Extend the Hessian tensors shape to align with expected return type
+            # TODO: currently, only per-tensor Hessian is available for activation.
+            #  Once implementing per-channel or per-element, this alignment needs to be verified and handled separately.
+            hessian_results = [h[..., np.newaxis] for h in hessian_results]
-                return hessian_results
+            return hessian_results
-        else:  # pragma: no cover
-            Logger.critical(f"{self.hessian_request.granularity} "
-                            f"is not supported for Keras activation hessian\'s approximation scores calculator.")

model_compression_toolkit/core/keras/hessian/hessian_scores_calculator_keras.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+from tensorflow import TensorShape
 from model_compression_toolkit.core.common.hessian.hessian_scores_calculator import HessianScoresCalculator
@@ -77,3 +78,19 @@ class HessianScoresCalculatorKeras(HessianScoresCalculator):
                 "Unable to concatenate tensors for gradient calculation due to mismatched shapes along the first axis.") # pragma: no cover
         return tf.concat(_r_tensors, axis=1)
+    def _generate_random_vectors_batch(self, shape: TensorShape) -> tf.Tensor:
+        """
+        Generate a batch of random vectors for Hutchinson estimation using Rademacher distribution.
+        Args:
+            shape: target shape.
+        Returns:
+            Random tensor.
+        """
+        v = tf.random.uniform(shape=shape, minval=0, maxval=2, dtype=tf.int32)
+        v = tf.where(v == 0, -1, 1)
+        v = tf.cast(v, tf.float32)
+        return v

model_compression_toolkit/core/keras/hessian/weights_hessian_scores_calculator_keras.py CHANGED Viewed

@@ -89,8 +89,7 @@ class WeightsHessianScoresCalculatorKeras(HessianScoresCalculatorKeras):
             prev_mean_results = None
             tensors_original_shape = []
             for j in tqdm(range(self.num_iterations_for_approximation)):  # Approximation iterations
-                # Getting a random vector with normal distribution and the same shape as the model output
-                v = tf.random.normal(shape=output.shape)
+                v = self._generate_random_vectors_batch(output.shape)
                 f_v = tf.reduce_sum(v * output)
                 for i, ipt_node in enumerate(self.hessian_request.target_nodes):  # Per Interest point weights tensor

model_compression_toolkit/gptq/common/gptq_training.py CHANGED Viewed

@@ -27,7 +27,11 @@ from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig
 from model_compression_toolkit.gptq.common.gptq_constants import QUANT_PARAM_LEARNING_STR
 from model_compression_toolkit.gptq.common.gptq_framework_implementation import GPTQFrameworkImplemantation
 from model_compression_toolkit.gptq.common.gptq_graph import get_compare_points
+from model_compression_toolkit.gptq.common.gradual_activation_quantization import \
+    get_gradual_activation_quantizer_wrapper_factory
+from model_compression_toolkit.gptq.common.regularization_factory import get_regularization
 from model_compression_toolkit.logger import Logger
+from model_compression_toolkit.trainable_infrastructure.common.util import get_total_grad_steps
 class GPTQTrainer(ABC):
@@ -64,6 +68,14 @@ class GPTQTrainer(ABC):
         self.fw_impl = fw_impl
         self.fw_info = fw_info
         self.representative_data_gen_fn = representative_data_gen_fn
+        def _get_total_grad_steps():
+            return get_total_grad_steps(representative_data_gen_fn) * gptq_config.n_epochs
+        self.gradual_act_quantizer_wrapper_factory = get_gradual_activation_quantizer_wrapper_factory(gptq_config,
+                                                                                                      _get_total_grad_steps,
+                                                                                                      self.fw_linear_annealing_scheduler)
         # ----------------------------------------------
         # Build two models and create compare nodes
         # ----------------------------------------------
@@ -81,6 +93,52 @@ class GPTQTrainer(ABC):
                                 f"an 'HessianInfoService' object must be provided, but received: {hessian_info_service}.")   # pragma: no cover
             self.hessian_service = hessian_info_service
+        self.reg_func = get_regularization(self.gptq_config,
+                                           _get_total_grad_steps,
+                                           self.fw_soft_quantizer_regularization,
+                                           self.fw_linear_annealing_scheduler)
+        self.loss_list = []
+        self.input_scale = 1
+        if self.float_user_info.input_scale != self.gptq_user_info.input_scale:
+            Logger.critical("Input scale mismatch between float and GPTQ networks. "
+                            "Ensure both networks have matching input scales.")  # pragma: no cover
+        else:
+            self.input_scale = self.gptq_user_info.input_scale
+        trainable_weights, trainable_bias, trainable_threshold = self.fw_get_gptq_trainable_parameters_fn(
+            self.fxp_model,
+            add_bias=self.gptq_config.train_bias)
+        self.flp_weights_list, self.fxp_weights_list = self.fw_get_weights_for_loss_fn(self.fxp_model)
+        if not (len(self.compare_points) == len(trainable_weights) == len(self.flp_weights_list) == len(
+                self.fxp_weights_list)):
+            Logger.critical("Mismatch in the number of comparison points, layers with trainable weights, "
+                            "and the number of float and quantized weights for loss calculation. "
+                            "Ensure all these elements align to proceed with GPTQ training.")
+        # In Keras we need to flatten the weights first before attaching the optimizer
+        if len(trainable_weights) > 0 and isinstance(trainable_weights[0], (list, tuple)):
+            trainable_weights = [w for layer_weights in trainable_weights for w in layer_weights]
+        if len(trainable_bias) > 0 and isinstance(trainable_bias[0], (list, tuple)):
+            trainable_bias = [w for layer_weights in trainable_bias for w in layer_weights]
+        self.optimizer_with_param = self.get_optimizer_with_param(trainable_weights,
+                                                                  trainable_bias,
+                                                                  trainable_threshold)
+        hessian_cfg = self.gptq_config.hessian_weights_config
+        self.has_params_to_train = np.sum(
+            [len(optimizer_params_tuple[1]) for optimizer_params_tuple in self.optimizer_with_param]) > 0
+        self.use_sample_layer_attention = hessian_cfg and hessian_cfg.per_sample
+        if self.use_sample_layer_attention:
+            # normalization is currently not supported, make sure the config reflects it.
+            if hessian_cfg.norm_scores or hessian_cfg.log_norm or hessian_cfg.scale_log_norm:
+                raise NotImplementedError()
+            self.train_dataloader = self._prepare_train_dataloader_sla(representative_data_gen_fn)
+        else:
+            self.train_dataloader = self._prepare_train_dataloader_for_non_sla(representative_data_gen_fn)
     def get_optimizer_with_param(self,
                                  flattened_trainable_weights: List[Any],
                                  flattened_bias_weights: List[Any],

model_compression_toolkit/gptq/keras/gptq_loss.py CHANGED Viewed

@@ -13,9 +13,8 @@
 # limitations under the License.
 # ==============================================================================
-from typing import Any, Tuple, List
 import tensorflow as tf
+from typing import List, Tuple
 def mse_loss(y: tf.Tensor, x: tf.Tensor, normalized: bool = True) -> tf.Tensor:
@@ -67,6 +66,40 @@ def multiple_tensors_mse_loss(y_list: List[tf.Tensor],
     else:
         return tf.reduce_mean(tf.stack(loss_values_list))
+def sample_layer_attention_loss(y_list: List[tf.Tensor],
+                                x_list: List[tf.Tensor],
+                                fxp_w_list,
+                                flp_w_list,
+                                act_bn_mean,
+                                act_bn_std,
+                                loss_weights: Tuple[tf.Tensor]) -> tf.Tensor:
+    """
+    Compute Sample Layer Attention loss between two lists of tensors using TensorFlow.
+    Args:
+        y_list: First list of tensors.
+        x_list: Second list of tensors.
+        fxp_w_list, flp_w_list, act_bn_mean, act_bn_std: unused (needed to comply with the interface).
+        loss_weights: layer-sample attention scores (tuplle by the same length as the number of layers, where each element is a tf.Tensor vector of length of number of samples).
+    Returns:
+        Sample Layer Attention loss (a scalar).
+    """
+    loss = 0
+    layers_mean_w = []
+    loss_weights = tf.stack(loss_weights, axis=1)
+    for i, (y, x) in enumerate(zip(y_list, x_list)):
+        norm = tf.reduce_sum(tf.square(y - x), axis=1)
+        if len(norm.shape) > 1:
+            norm = tf.reduce_mean(tf.reshape(norm, [norm.shape[0], -1]), axis=1)
+        w = loss_weights[:, i]
+        loss += tf.reduce_mean(w * norm)
+        layers_mean_w.append(tf.reduce_mean(w))
+    loss = loss / tf.reduce_max(tf.stack(layers_mean_w))
+    return loss
 def mse_loss_per_tensor(y: tf.Tensor,
                         x: tf.Tensor,

model_compression_toolkit/gptq/keras/gptq_training.py CHANGED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from typing import Callable, List, Tuple, Union
+from typing import Callable, List, Tuple, Union, Generator
 import tensorflow as tf
 from keras import Model
@@ -20,11 +20,13 @@ from packaging import version
 from tensorflow.keras.layers import Layer
 from tqdm import tqdm
-from model_compression_toolkit.core.common.hessian import HessianInfoService
+from model_compression_toolkit.core.common.hessian import HessianInfoService, HessianScoresGranularity
 # As from Tensorflow 2.6, keras is a separate package and some classes should be imported differently.
 from model_compression_toolkit.core.common.user_info import UserInformation
 from model_compression_toolkit.core.keras.back2framework.keras_model_builder import KerasModelBuilder
-from model_compression_toolkit.core.keras.data_util import data_gen_to_dataloader
+from model_compression_toolkit.core.keras.data_util import data_gen_to_dataloader, \
+    FixedSampleInfoDataset, FixedTFDataset, create_tf_dataloader, TFDatasetFromGenerator, \
+    IterableSampleWithConstInfoDataset
 from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribute_name_for_gptq
 from model_compression_toolkit.gptq.common.gradual_activation_quantization import \
     get_gradual_activation_quantizer_wrapper_factory
@@ -83,13 +85,10 @@ class KerasGPTQTrainer(GPTQTrainer):
         """
-        def _get_total_grad_steps():
-            return get_total_grad_steps(representative_data_gen) * gptq_config.n_epochs
-        # This must be set before the model building (as it is required for activation holder construction),
-        # which occurs in the base constructor.
-        self.gradual_act_quantizer_wrapper_factory = get_gradual_activation_quantizer_wrapper_factory(
-            gptq_config, _get_total_grad_steps, KerasLinearAnnealingScheduler)
+        self.fw_soft_quantizer_regularization = SoftQuantizerRegularization
+        self.fw_linear_annealing_scheduler = KerasLinearAnnealingScheduler
+        self.fw_get_gptq_trainable_parameters_fn = get_gptq_trainable_parameters
+        self.fw_get_weights_for_loss_fn = get_weights_for_loss
         super().__init__(graph_float,
                          graph_quant,
@@ -99,53 +98,106 @@ class KerasGPTQTrainer(GPTQTrainer):
                          representative_data_gen_fn=representative_data_gen,
                          hessian_info_service=hessian_info_service)
-        self.loss_list = []
-        self.input_scale = 1
-        trainable_weights, bias_weights, trainable_threshold = get_gptq_trainable_parameters(
-            self.fxp_model,
-            fw_info,
-            add_bias=gptq_config.train_bias)
-        self.flp_weights_list, self.fxp_weights_list = get_weights_for_loss(self.fxp_model)
-        if not (len(self.compare_points) == len(trainable_weights) == len(self.flp_weights_list) == len(
-                self.fxp_weights_list)):
-            Logger.critical("Mismatch in the number of comparison points, layers with trainable weights, "
-                            "and the number of float and quantized weights for loss calculation. "
-                            "Ensure all these elements align to proceed with GPTQ training.")
-        flattened_trainable_weights = [w for layer_weights in trainable_weights for w in layer_weights]
-        flattened_bias_weights = [w for layer_weights in bias_weights for w in layer_weights]
-        trainable_quantization_parameters = trainable_threshold
-        self.optimizer_with_param = self.get_optimizer_with_param(flattened_trainable_weights,
-                                                                  flattened_bias_weights,
-                                                                  trainable_quantization_parameters)
-        self.has_params_to_train = np.sum(
-            [len(optimizer_params_tuple[1]) for optimizer_params_tuple in self.optimizer_with_param]) > 0
-        if self.float_user_info.input_scale != self.gptq_user_info.input_scale:
-            Logger.critical("Input scale mismatch detected between the float model and the GPTQ model. "
-                            "Confirm that the input scales for both models are correctly configured and aligned.")  # pragma: no cover
-        else:
-            self.input_scale = self.gptq_user_info.input_scale
-        self.weights_for_average_loss = self._get_compare_points_loss_weights()
+    def _prepare_train_dataloader_sla(self, data_gen_fn: Callable[[], Generator]) -> tf.data.Dataset:
+        """
+        Computes Sample-Layer Attention score and builds a train dataloader in TensorFlow.
-        self.reg_func = get_regularization(self.gptq_config,
-                                           _get_total_grad_steps,
-                                           SoftQuantizerRegularization,
-                                           KerasLinearAnnealingScheduler)
+        Args:
+            data_gen_fn: function for representative dataset generation.
-    def _get_compare_points_loss_weights(self):
-        """ Get compare points weights for the distillation loss. """
-        if self.gptq_config.hessian_weights_config:
-            hess_dataloader = data_gen_to_dataloader(self.representative_data_gen_fn,
-                                                     batch_size=self.gptq_config.hessian_weights_config.hessian_batch_size)
-            return self.compute_hessian_based_weights(hess_dataloader)
+        Returns:
+            TensorFlow dataset yielding three outputs - samples, weights for the distillation loss,
+            and weights for regularization.
+        """
+        # Create a fixed dataset
+        fixed_dataset = FixedTFDataset(data_gen_fn)
+        orig_batch_size = fixed_dataset.orig_batch_size
+        # Prepare a separate loader for computing hessians over the whole dataset
+        hess_data_loader = create_tf_dataloader(
+            fixed_dataset,
+            batch_size=self.gptq_config.hessian_weights_config.hessian_batch_size,
+            shuffle=False
+        )
+        # Prepare request for Hessian computation
+        request = self._build_hessian_request(
+            granularity=HessianScoresGranularity.PER_OUTPUT_CHANNEL,
+            data_loader=hess_data_loader,
+            n_samples=None
+        )
+        layers_hessians = self.hessian_service.fetch_hessian(request, force_compute=True)
+        # Compute SLA score defined as max over elements
+        layers_hessians = {
+            layer: tf.convert_to_tensor(tf.reduce_max(hess, axis=tuple(range(1, len(hess.shape))))) for layer, hess in layers_hessians.items()
+        }
+        # Stack hessians for comparison points
+        hessians_tensor = tf.stack([layers_hessians[layer.name] for layer in self.compare_points])
+        assert hessians_tensor.shape[0] == len(self.compare_points)
+        loss_weights = list(hessians_tensor.numpy())  # Convert to a list for compatibility
+        # Prepare final dataset with samples and loss weights
+        sla_train_dataset = FixedSampleInfoDataset(fixed_dataset.samples, loss_weights)
+        # Calculate regularization weights as mean across samples
+        reg_weights = tf.reduce_mean(hessians_tensor, axis=1)
+        # Define a collate function to add regularization weights to each batch
+        def collate_fn(samples_with_loss_weights):
+            return *samples_with_loss_weights, reg_weights
+        # Create final dataset using the new dataloader with collate_fn
+        final_dataset = create_tf_dataloader(
+            dataset=sla_train_dataset,
+            batch_size=orig_batch_size,
+            shuffle=True,
+            collate_fn=collate_fn
+        )
+        return final_dataset
+    def _prepare_train_dataloader_for_non_sla(self,
+                                              data_gen_fn: Callable[[], Generator]) -> tf.data.Dataset:
+        """
+        Prepares a train dataloader for non-SLA tasks.
+        Args:
+            data_gen_fn: Factory for representative dataset generator.
+        Returns:
+            A `tf.data.Dataset` yielding samples with loss weights and regularization weights.
+        """
+        # Step 1: Create a dataset from the generator
+        dataset = TFDatasetFromGenerator(data_gen_fn)
         num_nodes = len(self.compare_points)
-        return np.ones((num_nodes,)) / num_nodes
+        # Step 2: Compute loss weights
+        if self.gptq_config.hessian_weights_config:
+            hessian_dataset = create_tf_dataloader(dataset=dataset, batch_size=self.gptq_config.hessian_weights_config.hessian_batch_size)
+            hessian_weights = self.compute_hessian_based_weights(hessian_dataset)
+            loss_weights = tf.convert_to_tensor(hessian_weights, dtype=tf.float32)
+        else:
+            loss_weights = tf.ones(num_nodes, dtype=tf.float32) / num_nodes
+        # Step 3: Create a dataset with samples and loss weights
+        augmented_dataset = IterableSampleWithConstInfoDataset(dataset.dataset, loss_weights)
+        # Step 4: Add constant regularization weights
+        reg_weights = tf.ones(num_nodes, dtype=tf.float32)
+        def collate_fn(batch):
+            samples, loss_weights = batch
+            return samples, loss_weights, reg_weights
+        # Step 5: Create a tf.data.Dataset with collate_fn
+        train_dataloader = create_tf_dataloader(augmented_dataset,
+                                                batch_size=dataset.orig_batch_size,
+                                                collate_fn=collate_fn)
+        return train_dataloader
     def _is_gptq_weights_trainable(self,
                                    node: common.BaseNode) -> bool:
@@ -226,9 +278,13 @@ class KerasGPTQTrainer(GPTQTrainer):
         return gptq_model, gptq_user_info
-    def compute_gradients(self, in_y_float: List[tf.Tensor], input_data: List[np.ndarray],
+    def compute_gradients(self,
+                          in_y_float: List[tf.Tensor],
+                          input_data: List[np.ndarray],
                           in_optimizer_with_param: List,
-                          training=True) -> Tuple[tf.Tensor, List[tf.Tensor]]:
+                          training=True,
+                          distill_loss_weights=None,
+                          reg_weights=None) -> Tuple[tf.Tensor, List[tf.Tensor]]:
         """
         Get outputs from both teacher and student networks. Compute the observed error,
         and use it to compute the gradients and applying them to the student weights.
@@ -253,9 +309,9 @@ class KerasGPTQTrainer(GPTQTrainer):
                                                self.flp_weights_list,
                                                self.compare_points_mean,
                                                self.compare_points_std,
-                                               self.weights_for_average_loss)
+                                               distill_loss_weights)
-            reg_value = self.reg_func(self.fxp_model, self.gptq_config.regularization_factor)
+            reg_value = self.reg_func(self.fxp_model, self.gptq_config.regularization_factor, reg_weights)
             loss_value += reg_value
@@ -279,14 +335,19 @@ class KerasGPTQTrainer(GPTQTrainer):
         # Training loop
         # ----------------------------------------------
         if self.has_params_to_train:
-            self.micro_training_loop(self.representative_data_gen_fn,
-                                     compute_gradients,
+            self.micro_training_loop(compute_gradients,
                                      self.optimizer_with_param,
                                      self.gptq_config.n_epochs,
                                      True)
     @tf.function
-    def nano_training_step(self, input_data, in_compute_gradients, in_optimizer_with_param, is_training):
+    def nano_training_step(self,
+                           input_data,
+                           in_compute_gradients,
+                           in_optimizer_with_param,
+                           is_training,
+                           distill_loss_weights,
+                           reg_weights):
         """
         This function run part of the training step, wrapped by a tf.function for acceleration.
         Args:
@@ -303,12 +364,15 @@ class KerasGPTQTrainer(GPTQTrainer):
         # run float model
         y_float = self.float_model(input_data)
         # rung quantized model and calculate loss & gradients
-        loss_value_step, grads = in_compute_gradients(y_float, input_data, in_optimizer_with_param,
-                                                      training=is_training)
+        loss_value_step, grads = in_compute_gradients(y_float,
+                                                      input_data,
+                                                      in_optimizer_with_param,
+                                                      training=is_training,
+                                                      distill_loss_weights=distill_loss_weights,
+                                                      reg_weights=reg_weights)
         return loss_value_step, grads
     def micro_training_loop(self,
-                            data_function: Callable,
                             in_compute_gradients: Callable,
                             in_optimizer_with_param: List[Tuple[tf.keras.optimizers.Optimizer, List[tf.Tensor]]],
                             n_epochs: int,
@@ -316,7 +380,6 @@ class KerasGPTQTrainer(GPTQTrainer):
         """
         This function run a micro training loop on given set of parameters.
         Args:
-            data_function: A callable function that give a batch of samples.
             in_compute_gradients: A callable function that compute the gradients.
             in_optimizer_with_param: A list of optimizer classes to update with the corresponding parameters.
             n_epochs: Number of update iterations of representative dataset.
@@ -327,12 +390,19 @@ class KerasGPTQTrainer(GPTQTrainer):
         """
         with tqdm(range(n_epochs), "Running GPTQ optimization") as epochs_pbar:
             for _ in epochs_pbar:
-                with tqdm(data_function(), position=1, leave=False) as data_pbar:
+                with tqdm(self.train_dataloader, position=1, leave=False) as data_pbar:
                     for data in data_pbar:
-                        input_data = [d * self.input_scale for d in data]
-                        loss_value_step, grads = self.nano_training_step(input_data, in_compute_gradients,
-                                                                         in_optimizer_with_param, is_training)
+                        input_data, distill_loss_weights, reg_weight = data
+                        input_data = [d * self.input_scale for d in input_data]
+                        loss_value_step, grads = self.nano_training_step(input_data,
+                                                                         in_compute_gradients,
+                                                                         in_optimizer_with_param,
+                                                                         is_training,
+                                                                         distill_loss_weights,
+                                                                         reg_weight)
                         # Run one step of gradient descent by updating
                         # the value of the variables to minimize the loss.
                         for i, (o, p) in enumerate(in_optimizer_with_param):

model_compression_toolkit/gptq/keras/graph_info.py CHANGED Viewed

@@ -16,7 +16,6 @@
 import tensorflow as tf
 from typing import Tuple, List
 from model_compression_toolkit.core.keras.constants import USE_BIAS
-from model_compression_toolkit.core.common.framework_info import FrameworkInfo
 from tensorflow.keras.models import Model
 from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO
 from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribute_name_for_gptq
@@ -26,7 +25,6 @@ from model_compression_toolkit.trainable_infrastructure.common.base_trainable_qu
 def get_gptq_trainable_parameters(fxp_model: Model,
-                                  fw_info: FrameworkInfo,
                                   add_bias: bool = False) -> (
         List[tf.Variable], List[tf.Variable], List[tf.Variable]):
     """
@@ -34,7 +32,6 @@ def get_gptq_trainable_parameters(fxp_model: Model,
     Args:
         fxp_model: Model to get its trainable parameters.
-        fw_info: Framework information needed for keras kernel ops list.
         add_bias: Whether to include biases of the model (if there are) or not.
     Returns:
@@ -60,7 +57,7 @@ def get_gptq_trainable_parameters(fxp_model: Model,
             trainable_threshold.extend(quantizer_trainable_threshold)
             if add_bias:
-                kernel_ops_attrs = fw_info.kernel_ops_attributes_mapping.get(type(layer.layer))
+                kernel_ops_attrs = DEFAULT_KERAS_INFO.kernel_ops_attributes_mapping.get(type(layer.layer))
                 use_bias = kernel_ops_attrs is not None and kernel_ops_attrs[0] is not None \
                            and layer.layer.get_config().get(USE_BIAS)
                 if use_bias is not None and use_bias and layer.layer.bias is not None:

model_compression_toolkit/gptq/keras/quantization_facade.py CHANGED Viewed

@@ -19,7 +19,7 @@ from packaging import version
 from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer
 from model_compression_toolkit.gptq.common.gptq_constants import REG_DEFAULT, LR_DEFAULT, LR_REST_DEFAULT, \
-    LR_BIAS_DEFAULT, GPTQ_MOMENTUM
+    LR_BIAS_DEFAULT, GPTQ_MOMENTUM, REG_DEFAULT_SLA
 from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.constants import TENSORFLOW, ACT_HESSIAN_DEFAULT_BATCH_SIZE, GPTQ_HESSIAN_NUM_SAMPLES
 from model_compression_toolkit.verify_packages import FOUND_TF
@@ -42,7 +42,7 @@ if FOUND_TF:
     from model_compression_toolkit.gptq.keras.gptq_keras_implementation import GPTQKerasImplemantation
     from model_compression_toolkit.core.keras.keras_model_validation import KerasModelValidation
     from tensorflow.keras.models import Model
-    from model_compression_toolkit.gptq.keras.gptq_loss import GPTQMultipleTensorsLoss
+    from model_compression_toolkit.gptq.keras.gptq_loss import GPTQMultipleTensorsLoss, sample_layer_attention_loss
     from model_compression_toolkit.target_platform_capabilities.constants import DEFAULT_TP_MODEL
     from model_compression_toolkit.exporter.model_wrapper import get_exportable_keras_model
     from model_compression_toolkit import get_target_platform_capabilities
@@ -61,11 +61,12 @@ if FOUND_TF:
     def get_keras_gptq_config(n_epochs: int,
                               optimizer: OptimizerV2 = None,
                               optimizer_rest: OptimizerV2 = None,
-                              loss: Callable = GPTQMultipleTensorsLoss(),
+                              loss: Callable = None,
                               log_function: Callable = None,
                               use_hessian_based_weights: bool = True,
-                              regularization_factor: float = REG_DEFAULT,
+                              regularization_factor: float = None,
                               hessian_batch_size: int = ACT_HESSIAN_DEFAULT_BATCH_SIZE,
+                              use_hessian_sample_attention: bool = False,
                               gradual_activation_quantization: Union[bool, GradualActivationQuantizationConfig] = False) -> GradientPTQConfig:
         """
         Create a GradientPTQConfig instance for Keras models.
@@ -79,6 +80,7 @@ if FOUND_TF:
             use_hessian_based_weights (bool): Whether to use Hessian-based weights for weighted average loss.
             regularization_factor (float): A floating point number that defines the regularization factor.
             hessian_batch_size (int): Batch size for Hessian computation in Hessian-based weights GPTQ.
+            use_hessian_sample_attention (bool): whether to use Sample-Layer Attention score for weighted loss.
             gradual_activation_quantization (bool, GradualActivationQuantizationConfig): If False, GradualActivationQuantization is disabled. If True, GradualActivationQuantization is enabled with the default settings. GradualActivationQuantizationConfig object can be passed to use non-default settings.
         returns:
@@ -105,9 +107,25 @@ if FOUND_TF:
         """
         optimizer = optimizer or tf.keras.optimizers.Adam(learning_rate=LR_DEFAULT)
         optimizer_rest = optimizer_rest or tf.keras.optimizers.Adam(learning_rate=LR_REST_DEFAULT)
+        bias_optimizer = tf.keras.optimizers.SGD(learning_rate=LR_BIAS_DEFAULT, momentum=GPTQ_MOMENTUM)
-        bias_optimizer = tf.keras.optimizers.SGD(learning_rate=LR_BIAS_DEFAULT,
-                                                 momentum=GPTQ_MOMENTUM)
+        if regularization_factor is None:
+            regularization_factor = REG_DEFAULT_SLA if use_hessian_sample_attention else REG_DEFAULT
+        loss = loss or GPTQMultipleTensorsLoss()
+        hessian_weights_config = None
+        if use_hessian_sample_attention:
+            if not use_hessian_based_weights:    # pragma: no cover
+                raise ValueError('use_hessian_based_weights must be set to True in order to use Sample Layer Attention.')
+            hessian_weights_config = GPTQHessianScoresConfig(per_sample=True,
+                                                             hessians_num_samples=None,
+                                                             hessian_batch_size=hessian_batch_size)
+            loss = loss or sample_layer_attention_loss
+        elif use_hessian_based_weights:
+            hessian_weights_config = GPTQHessianScoresConfig(per_sample=False,
+                                                             hessians_num_samples=GPTQ_HESSIAN_NUM_SAMPLES,
+                                                             hessian_batch_size=hessian_batch_size)
         if isinstance(gradual_activation_quantization, bool):
             gradual_quant_config = GradualActivationQuantizationConfig() if gradual_activation_quantization else None
@@ -117,11 +135,6 @@ if FOUND_TF:
             raise TypeError(f'gradual_activation_quantization argument should be bool or '
                             f'GradualActivationQuantizationConfig, received {type(gradual_activation_quantization)}')
-        hessian_weights_config = None
-        if use_hessian_based_weights:
-            hessian_weights_config = GPTQHessianScoresConfig(per_sample=False,
-                                                             hessians_num_samples=GPTQ_HESSIAN_NUM_SAMPLES,
-                                                             hessian_batch_size=hessian_batch_size)
         return GradientPTQConfig(n_epochs=n_epochs,
                                  optimizer=optimizer,
                                  optimizer_rest=optimizer_rest,

model_compression_toolkit/gptq/keras/quantizer/soft_rounding/soft_quantizer_reg.py CHANGED Viewed

@@ -40,30 +40,42 @@ class SoftQuantizerRegularization:
         self.count_iter = tf.Variable(0.)
-    def __call__(self, model: Model, entropy_reg: float):
+    def __call__(self, model: Model, entropy_reg: float, layer_weights: tf.Tensor):
         """
         Returns the soft quantizer regularization value for SoftRounding.
         Args:
             model: A model to be quantized with SoftRounding.
             entropy_reg: Entropy value to scale the quantizer regularization.
+            layer_weights: a vector of layers weights.
         Returns: Regularization value.
         """
-        soft_reg_aux: List[tf.Tensor] = []
+        layers = [l for l in model.layers if isinstance(l, KerasTrainableQuantizationWrapper)]
+        if layer_weights.shape[0] != len(layers):
+            raise ValueError(f'Expected weights.shape[0] to be {len(layers)}, '
+                             f'received shape {layer_weights.shape}.')  # pragma: no cover
         b = self.beta_scheduler(self.count_iter.value())
-        for layer in model.layers:
-            if isinstance(layer, KerasTrainableQuantizationWrapper):
-                kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=type(layer.layer),
-                                                                      fw_info=DEFAULT_KERAS_INFO)
-                st = layer.weights_quantizers[kernel_attribute].get_soft_targets()
-                soft_reg_aux.append(tf.reduce_sum(1 - tf.pow(tf.math.abs(st - .5) * 2, b)))
+        max_w = tf.reduce_max(layer_weights)
+        # Initialize reg to zero
+        reg = tf.constant(0.0, dtype=tf.float32)
+        # Compute the regularization term without concatenating
+        for i, layer in enumerate(layers):
+            kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=type(layer.layer),
+                                                                  fw_info=DEFAULT_KERAS_INFO)
+            st = layer.weights_quantizers[kernel_attribute].get_soft_targets()
-        reg = 0
+            soft_loss = tf.reduce_sum(1 - tf.pow(tf.math.abs(st - 0.5) * 2, b))
+            reg += layer_weights[i] * soft_loss
-        for sq in soft_reg_aux:
-            reg += sq
+        # Normalize reg by max_w
+        reg = reg / max_w
         self.count_iter.assign_add(1.0)

model_compression_toolkit/gptq/pytorch/gptq_training.py CHANGED Viewed

@@ -21,9 +21,6 @@ from torch.nn import Module
 from torch.utils.data import DataLoader
 from tqdm import tqdm
-from model_compression_toolkit.gptq.common.gradual_activation_quantization import get_gradual_activation_quantizer_wrapper_factory
-from model_compression_toolkit.gptq.common.regularization_factory import get_regularization
 from model_compression_toolkit.core.common import Graph, BaseNode
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
 from model_compression_toolkit.core.common.framework_info import FrameworkInfo
@@ -41,7 +38,6 @@ from model_compression_toolkit.gptq.pytorch.graph_info import get_gptq_trainable
 from model_compression_toolkit.gptq.pytorch.quantizer.quantization_builder import quantization_builder
 from mct_quantizers import PytorchQuantizationWrapper, PytorchActivationQuantizationHolder
-from model_compression_toolkit.trainable_infrastructure.common.util import get_total_grad_steps
 from model_compression_toolkit.trainable_infrastructure.pytorch.annealing_schedulers import PytorchLinearAnnealingScheduler
 from model_compression_toolkit.gptq.pytorch.quantizer.soft_rounding.soft_quantizer_reg import SoftQuantizerRegularization as PytorchSoftQuantizerRegularization
@@ -76,13 +72,10 @@ class PytorchGPTQTrainer(GPTQTrainer):
             representative_data_gen: Dataset to use for inputs of the models.
             hessian_info_service: HessianInfoService to fetch info based on the hessian approximation of the float model.
         """
-        def _get_total_grad_steps():
-            # TODO get it from the dataset
-            return get_total_grad_steps(representative_data_gen) * gptq_config.n_epochs
-        # must be set prior to model building in the base class constructor
-        self.gradual_act_quantizer_wrapper_factory = get_gradual_activation_quantizer_wrapper_factory(
-            gptq_config, _get_total_grad_steps, PytorchLinearAnnealingScheduler)
+        self.fw_soft_quantizer_regularization = PytorchSoftQuantizerRegularization
+        self.fw_linear_annealing_scheduler = PytorchLinearAnnealingScheduler
+        self.fw_get_gptq_trainable_parameters_fn = get_gptq_trainable_parameters
+        self.fw_get_weights_for_loss_fn = get_weights_for_loss
         super().__init__(graph_float,
                          graph_quant,
@@ -92,40 +85,6 @@ class PytorchGPTQTrainer(GPTQTrainer):
                          representative_data_gen_fn=representative_data_gen,
                          hessian_info_service=hessian_info_service)
-        self.loss_list = []
-        self.input_scale = 1
-        if self.float_user_info.input_scale != self.gptq_user_info.input_scale:
-            Logger.critical("Input scale mismatch between float and GPTQ networks. "
-                            "Ensure both networks have matching input scales.")  # pragma: no cover
-        else:
-            self.input_scale = self.gptq_user_info.input_scale
-        trainable_weights, trainable_bias, trainable_threshold = get_gptq_trainable_parameters(
-            self.fxp_model,
-            add_bias=self.gptq_config.train_bias)
-        self.flp_weights_list, self.fxp_weights_list = get_weights_for_loss(self.fxp_model)
-        if not (len(self.compare_points) == len(trainable_weights) == len(self.flp_weights_list) == len(
-                self.fxp_weights_list)):
-            Logger.critical("GPTQ: Number of comparison points, layers with trainable weights, "
-                            "and float vs. quantized weights for loss calculation do not match. "
-                            "Verify consistency across these parameters for successful GPTQ training.")
-        self.optimizer_with_param = self.get_optimizer_with_param(trainable_weights,
-                                                                  trainable_bias,
-                                                                  trainable_threshold)
-        hessian_cfg = self.gptq_config.hessian_weights_config
-        self.use_sample_layer_attention = hessian_cfg and hessian_cfg.per_sample
-        if self.use_sample_layer_attention:
-            # normalization is currently not supported, make sure the config reflects it.
-            if hessian_cfg.norm_scores or hessian_cfg.log_norm or hessian_cfg.scale_log_norm:
-                raise NotImplementedError()
-            self.train_dataloader = self._prepare_train_dataloader_sla(representative_data_gen)
-        else:
-            self.train_dataloader = self._prepare_train_dataloader_for_non_sla(representative_data_gen)
-        self.reg_func = get_regularization(self.gptq_config, _get_total_grad_steps, PytorchSoftQuantizerRegularization, PytorchLinearAnnealingScheduler)
     def _prepare_train_dataloader_sla(self, data_gen_fn: Callable[[], Generator]) -> DataLoader:
         """

{mct_nightly-2.2.0.20241204.524.dist-info → mct_nightly-2.2.0.20241206.524.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{mct_nightly-2.2.0.20241204.524.dist-info → mct_nightly-2.2.0.20241206.524.dist-info}/WHEEL RENAMED Viewed

File without changes

{mct_nightly-2.2.0.20241204.524.dist-info → mct_nightly-2.2.0.20241206.524.dist-info}/top_level.txt RENAMED Viewed

File without changes

mct-nightly 2.2.0.20241204.524__py3-none-any.whl → 2.2.0.20241206.524__py3-none-any.whl

mct-nightly 2.2.0.20241204.524py3-none-any.whl → 2.2.0.20241206.524py3-none-any.whl