mct-nightly 2.1.0.20240708.453__py3-none-any.whl → 2.1.0.20240709.429__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.1.0.20240708.453.dist-info → mct_nightly-2.1.0.20240709.429.dist-info}/METADATA +1 -1
- {mct_nightly-2.1.0.20240708.453.dist-info → mct_nightly-2.1.0.20240709.429.dist-info}/RECORD +31 -31
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/core/common/framework_implementation.py +12 -12
- model_compression_toolkit/core/common/hessian/__init__.py +1 -1
- model_compression_toolkit/core/common/hessian/hessian_info_service.py +74 -69
- model_compression_toolkit/core/common/hessian/hessian_info_utils.py +1 -1
- model_compression_toolkit/core/common/hessian/{trace_hessian_calculator.py → hessian_scores_calculator.py} +11 -11
- model_compression_toolkit/core/common/hessian/{trace_hessian_request.py → hessian_scores_request.py} +15 -15
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py +2 -2
- model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py +8 -8
- model_compression_toolkit/core/common/pruning/importance_metrics/lfh_importance_metric.py +5 -5
- model_compression_toolkit/core/common/quantization/quantization_params_generation/error_functions.py +4 -4
- model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py +5 -5
- model_compression_toolkit/core/keras/hessian/{activation_trace_hessian_calculator_keras.py → activation_hessian_scores_calculator_keras.py} +26 -26
- model_compression_toolkit/core/keras/hessian/{trace_hessian_calculator_keras.py → hessian_scores_calculator_keras.py} +14 -14
- model_compression_toolkit/core/keras/hessian/{weights_trace_hessian_calculator_keras.py → weights_hessian_scores_calculator_keras.py} +27 -27
- model_compression_toolkit/core/keras/keras_implementation.py +30 -30
- model_compression_toolkit/core/pytorch/hessian/{activation_trace_hessian_calculator_pytorch.py → activation_hessian_scores_calculator_pytorch.py} +25 -25
- model_compression_toolkit/core/pytorch/hessian/{trace_hessian_calculator_pytorch.py → hessian_scores_calculator_pytorch.py} +14 -14
- model_compression_toolkit/core/pytorch/hessian/{weights_trace_hessian_calculator_pytorch.py → weights_hessian_scores_calculator_pytorch.py} +25 -25
- model_compression_toolkit/core/pytorch/pytorch_implementation.py +30 -30
- model_compression_toolkit/core/quantization_prep_runner.py +1 -1
- model_compression_toolkit/gptq/common/gptq_training.py +30 -30
- model_compression_toolkit/gptq/keras/gptq_training.py +1 -1
- model_compression_toolkit/gptq/pytorch/gptq_training.py +1 -1
- model_compression_toolkit/gptq/runner.py +2 -2
- model_compression_toolkit/qat/pytorch/quantization_facade.py +1 -1
- {mct_nightly-2.1.0.20240708.453.dist-info → mct_nightly-2.1.0.20240709.429.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.1.0.20240708.453.dist-info → mct_nightly-2.1.0.20240709.429.dist-info}/WHEEL +0 -0
- {mct_nightly-2.1.0.20240708.453.dist-info → mct_nightly-2.1.0.20240709.429.dist-info}/top_level.txt +0 -0
@@ -26,8 +26,8 @@ from model_compression_toolkit.gptq.common.gptq_framework_implementation import
|
|
26
26
|
from model_compression_toolkit.gptq.common.gptq_graph import get_compare_points
|
27
27
|
from model_compression_toolkit.core.common.model_builder_mode import ModelBuilderMode
|
28
28
|
from model_compression_toolkit.logger import Logger
|
29
|
-
from model_compression_toolkit.core.common.hessian import HessianInfoService,
|
30
|
-
|
29
|
+
from model_compression_toolkit.core.common.hessian import HessianInfoService, HessianScoresRequest, HessianMode, \
|
30
|
+
HessianScoresGranularity
|
31
31
|
from model_compression_toolkit.core.common.hessian import hessian_info_utils as hessian_utils
|
32
32
|
|
33
33
|
|
@@ -55,7 +55,7 @@ class GPTQTrainer(ABC):
|
|
55
55
|
gptq_config: GradientPTQConfig with parameters about the tuning process.
|
56
56
|
fw_impl: Framework implementation
|
57
57
|
fw_info: Framework information
|
58
|
-
hessian_info_service: HessianInfoService for fetching and computing Hessian
|
58
|
+
hessian_info_service: HessianInfoService for fetching and computing Hessian-approximation information.
|
59
59
|
"""
|
60
60
|
self.graph_float = copy.deepcopy(graph_float)
|
61
61
|
self.graph_quant = copy.deepcopy(graph_quant)
|
@@ -132,10 +132,10 @@ class GPTQTrainer(ABC):
|
|
132
132
|
|
133
133
|
def compute_hessian_based_weights(self) -> np.ndarray:
|
134
134
|
"""
|
135
|
-
Computes
|
135
|
+
Computes scores based on the hessian approximation per layer w.r.t activations of the interest points.
|
136
136
|
|
137
137
|
Returns:
|
138
|
-
np.ndarray:
|
138
|
+
np.ndarray: Scores based on the hessian matrix approximation.
|
139
139
|
"""
|
140
140
|
if not self.gptq_config.use_hessian_based_weights:
|
141
141
|
# Return a default weight distribution based on the number of compare points
|
@@ -143,15 +143,15 @@ class GPTQTrainer(ABC):
|
|
143
143
|
return np.asarray([1 / num_nodes for _ in range(num_nodes)])
|
144
144
|
|
145
145
|
# Fetch hessian approximations for each target node
|
146
|
-
|
146
|
+
compare_point_to_hessian_approx_scores = self._fetch_hessian_approximations()
|
147
147
|
# Process the fetched hessian approximations to gather them per images
|
148
|
-
|
149
|
-
self._process_hessian_approximations(
|
148
|
+
hessian_approx_score_by_image = (
|
149
|
+
self._process_hessian_approximations(compare_point_to_hessian_approx_scores))
|
150
150
|
|
151
151
|
# Check if log normalization is enabled in the configuration
|
152
152
|
if self.gptq_config.hessian_weights_config.log_norm:
|
153
153
|
# Calculate the mean of the approximations across images
|
154
|
-
mean_approx_scores = np.mean(
|
154
|
+
mean_approx_scores = np.mean(hessian_approx_score_by_image, axis=0)
|
155
155
|
# Reduce unnecessary dims, should remain with one dimension for the number of nodes
|
156
156
|
mean_approx_scores = np.squeeze(mean_approx_scores)
|
157
157
|
# Handle zero values to avoid log(0)
|
@@ -170,7 +170,7 @@ class GPTQTrainer(ABC):
|
|
170
170
|
return log_weights - np.min(log_weights)
|
171
171
|
else:
|
172
172
|
# If log normalization is not enabled, return the mean of the approximations across images
|
173
|
-
return np.mean(
|
173
|
+
return np.mean(hessian_approx_score_by_image, axis=0)
|
174
174
|
|
175
175
|
def _fetch_hessian_approximations(self) -> Dict[BaseNode, List[List[float]]]:
|
176
176
|
"""
|
@@ -180,13 +180,13 @@ class GPTQTrainer(ABC):
|
|
180
180
|
Mapping of target nodes to their hessian approximations.
|
181
181
|
"""
|
182
182
|
approximations = {}
|
183
|
-
|
183
|
+
hessian_scores_request = HessianScoresRequest(
|
184
184
|
mode=HessianMode.ACTIVATION,
|
185
|
-
granularity=
|
185
|
+
granularity=HessianScoresGranularity.PER_TENSOR,
|
186
186
|
target_nodes=self.compare_points
|
187
187
|
)
|
188
188
|
node_approximations = self.hessian_service.fetch_hessian(
|
189
|
-
|
189
|
+
hessian_scores_request=hessian_scores_request,
|
190
190
|
required_size=self.gptq_config.hessian_weights_config.hessians_num_samples,
|
191
191
|
batch_size=self.gptq_config.hessian_weights_config.hessian_batch_size
|
192
192
|
)
|
@@ -203,21 +203,21 @@ class GPTQTrainer(ABC):
|
|
203
203
|
Returns list of lists where each inner list is the approximations per image to all interest points.
|
204
204
|
|
205
205
|
Args:
|
206
|
-
approximations: Hessian
|
206
|
+
approximations: Hessian scores approximations mapping to process.
|
207
207
|
Dictionary of Node to a list of the length of the number of images that were fetched.
|
208
208
|
|
209
209
|
Returns:
|
210
210
|
Processed approximations as a list of lists where each inner list is the approximations
|
211
211
|
per image to all interest points.
|
212
212
|
"""
|
213
|
-
|
213
|
+
hessian_approx_score_by_image = [[approximations[target_node][image_idx] for target_node in self.compare_points]
|
214
214
|
for image_idx in
|
215
215
|
range(self.gptq_config.hessian_weights_config.hessians_num_samples)]
|
216
216
|
|
217
217
|
if self.gptq_config.hessian_weights_config.norm_scores:
|
218
|
-
|
218
|
+
hessian_approx_score_by_image = hessian_utils.normalize_scores(hessian_approx_score_by_image)
|
219
219
|
|
220
|
-
return
|
220
|
+
return hessian_approx_score_by_image
|
221
221
|
|
222
222
|
def _get_approximations_by_interest_point(self, approximations: Dict, image_idx: int) -> List:
|
223
223
|
"""
|
@@ -232,25 +232,25 @@ class GPTQTrainer(ABC):
|
|
232
232
|
"""
|
233
233
|
approx_by_interest_point = []
|
234
234
|
for target_node in self.compare_points:
|
235
|
-
|
236
|
-
self.
|
237
|
-
approx_by_interest_point.append(
|
235
|
+
hessian_approx_scores = approximations[target_node][image_idx]
|
236
|
+
self._validate_scores_approximation(hessian_approx_scores)
|
237
|
+
approx_by_interest_point.append(hessian_approx_scores[0])
|
238
238
|
return approx_by_interest_point
|
239
239
|
|
240
240
|
@staticmethod
|
241
|
-
def
|
241
|
+
def _validate_scores_approximation(hessian_approx_scores: List):
|
242
242
|
"""
|
243
|
-
Validates the structure and length of the
|
243
|
+
Validates the structure and length of the Hessian-approximation scores.
|
244
244
|
|
245
245
|
Args:
|
246
|
-
|
246
|
+
hessian_approx_scores: Scores to validate.
|
247
247
|
"""
|
248
|
-
if not isinstance(
|
249
|
-
Logger.critical(f"
|
250
|
-
if len(
|
251
|
-
Logger.critical(f"
|
248
|
+
if not isinstance(hessian_approx_scores, list):
|
249
|
+
Logger.critical(f"Scores approximation was expected to be a list but is of type: {type(hessian_approx_scores)}.") # pragma: no cover
|
250
|
+
if len(hessian_approx_scores) != 1:
|
251
|
+
Logger.critical(f"Scores approximation was expected to have a length of 1 "
|
252
252
|
f"(for computations with granularity set to 'HessianInfoGranularity.PER_TENSOR') "
|
253
|
-
f"but has a length of {len(
|
253
|
+
f"but has a length of {len(hessian_approx_scores)}."
|
254
254
|
) # pragma: no cover
|
255
255
|
|
256
256
|
|
@@ -291,7 +291,7 @@ def gptq_training(graph_float: Graph,
|
|
291
291
|
representative_data_gen: Callable,
|
292
292
|
fw_impl: GPTQFrameworkImplemantation,
|
293
293
|
fw_info: FrameworkInfo,
|
294
|
-
hessian_info_service: HessianInfoService=None) -> Graph:
|
294
|
+
hessian_info_service: HessianInfoService = None) -> Graph:
|
295
295
|
"""
|
296
296
|
GPTQ training process using knowledge distillation with a teacher network (float model) and a student network (quantized model).
|
297
297
|
Args:
|
@@ -301,7 +301,7 @@ def gptq_training(graph_float: Graph,
|
|
301
301
|
representative_data_gen: Dataset to use for inputs of the models.
|
302
302
|
fw_impl: Framework implementation
|
303
303
|
fw_info: Framework information
|
304
|
-
hessian_info_service: HessianInfoService to fetch Hessian
|
304
|
+
hessian_info_service: HessianInfoService to fetch information based on the Hessian approximation.
|
305
305
|
|
306
306
|
Returns:
|
307
307
|
Quantized graph for export
|
@@ -74,7 +74,7 @@ class KerasGPTQTrainer(GPTQTrainer):
|
|
74
74
|
fw_impl: FrameworkImplementation object with a specific framework methods implementation.
|
75
75
|
fw_info: Framework information.
|
76
76
|
representative_data_gen: Dataset to use for inputs of the models.
|
77
|
-
hessian_info_service:
|
77
|
+
hessian_info_service: HessianScoresService for fetching and computing Hessian's approximation scores.
|
78
78
|
|
79
79
|
"""
|
80
80
|
super().__init__(graph_float,
|
@@ -64,7 +64,7 @@ class PytorchGPTQTrainer(GPTQTrainer):
|
|
64
64
|
fw_impl: FrameworkImplementation object with a specific framework methods implementation.
|
65
65
|
fw_info: Framework information
|
66
66
|
representative_data_gen: Dataset to use for inputs of the models.
|
67
|
-
hessian_info_service: HessianInfoService to fetch
|
67
|
+
hessian_info_service: HessianInfoService to fetch info based on the hessian approximation of the float model.
|
68
68
|
"""
|
69
69
|
super().__init__(graph_float,
|
70
70
|
graph_quant,
|
@@ -54,7 +54,7 @@ def _apply_gptq(gptq_config: GradientPTQConfig,
|
|
54
54
|
tg_bias: Graph of quantized model.
|
55
55
|
fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices, groups of layers by how they should be quantized, etc.).
|
56
56
|
fw_impl: Framework implementation per framework
|
57
|
-
hessian_info_service: HessianInfoService to fetch
|
57
|
+
hessian_info_service: HessianInfoService to fetch information based on the hessian approximation for the float model.
|
58
58
|
Returns:
|
59
59
|
|
60
60
|
"""
|
@@ -94,7 +94,7 @@ def gptq_runner(tg: Graph,
|
|
94
94
|
fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices, groups of layers by how they should be quantized, etc.)
|
95
95
|
fw_impl: FrameworkImplementation object with a specific framework methods implementation.
|
96
96
|
tb_w: A TensorBoardWriter object initialized with the logger dir path if it was set, or None otherwise.
|
97
|
-
hessian_info_service:
|
97
|
+
hessian_info_service: HessianScoresService to fetch approximations of the hessian scores for the float model.
|
98
98
|
|
99
99
|
Returns:
|
100
100
|
A graph after model weights GPTQ fine-tuning.
|
@@ -153,7 +153,7 @@ if FOUND_TORCH:
|
|
153
153
|
tb_w = init_tensorboard_writer(DEFAULT_PYTORCH_INFO)
|
154
154
|
fw_impl = PytorchImplementation()
|
155
155
|
|
156
|
-
# Ignore
|
156
|
+
# Ignore hessian scores service as we do not use it here
|
157
157
|
tg, bit_widths_config, _ = core_runner(in_model=in_model,
|
158
158
|
representative_data_gen=representative_data_gen,
|
159
159
|
core_config=core_config,
|
{mct_nightly-2.1.0.20240708.453.dist-info → mct_nightly-2.1.0.20240709.429.dist-info}/LICENSE.md
RENAMED
File without changes
|
File without changes
|
{mct_nightly-2.1.0.20240708.453.dist-info → mct_nightly-2.1.0.20240709.429.dist-info}/top_level.txt
RENAMED
File without changes
|