mct-nightly 2.1.0.20240731.414__py3-none-any.whl → 2.1.0.20240802.429__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.1.0.20240731.414.dist-info → mct_nightly-2.1.0.20240802.429.dist-info}/METADATA +1 -1
- {mct_nightly-2.1.0.20240731.414.dist-info → mct_nightly-2.1.0.20240802.429.dist-info}/RECORD +51 -47
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/constants.py +2 -1
- model_compression_toolkit/core/common/framework_implementation.py +5 -9
- model_compression_toolkit/core/common/graph/base_graph.py +1 -23
- model_compression_toolkit/core/common/graph/base_node.py +52 -33
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py +6 -6
- model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py +12 -12
- model_compression_toolkit/core/common/quantization/candidate_node_quantization_config.py +2 -2
- model_compression_toolkit/core/common/quantization/node_quantization_config.py +17 -38
- model_compression_toolkit/core/common/quantization/quantization_params_generation/lut_kmeans_params.py +6 -4
- model_compression_toolkit/core/common/quantization/quantization_params_generation/power_of_two_selection.py +19 -12
- model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py +14 -14
- model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_search.py +14 -9
- model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_weights_computation.py +5 -27
- model_compression_toolkit/core/common/quantization/quantization_params_generation/symmetric_selection.py +25 -17
- model_compression_toolkit/core/common/quantization/quantization_params_generation/uniform_selection.py +10 -6
- model_compression_toolkit/core/common/quantization/quantizers/quantizers_helpers.py +1 -65
- model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +12 -5
- model_compression_toolkit/core/common/substitutions/shift_negative_activation.py +7 -5
- model_compression_toolkit/core/keras/back2framework/factory_model_builder.py +3 -3
- model_compression_toolkit/core/keras/keras_implementation.py +21 -17
- model_compression_toolkit/core/keras/tf_tensor_numpy.py +2 -2
- model_compression_toolkit/core/pytorch/back2framework/factory_model_builder.py +3 -3
- model_compression_toolkit/core/pytorch/pytorch_implementation.py +15 -14
- model_compression_toolkit/core/pytorch/reader/node_holders.py +1 -1
- model_compression_toolkit/core/runner.py +1 -0
- model_compression_toolkit/exporter/model_wrapper/keras/validate_layer.py +2 -2
- model_compression_toolkit/gptq/common/gptq_training.py +0 -35
- model_compression_toolkit/qat/keras/quantizer/base_keras_qat_quantizer.py +1 -1
- model_compression_toolkit/qat/pytorch/quantizer/base_pytorch_qat_quantizer.py +1 -1
- model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py +32 -8
- model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model.py +2 -2
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/target_platform_capabilities.py +5 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py +2 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py +2 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py +2 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py +2 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py +2 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tp_model.py +2 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py +2 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/__init__.py +16 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tp_model.py +235 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_keras.py +132 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_pytorch.py +112 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py +2 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py +2 -0
- {mct_nightly-2.1.0.20240731.414.dist-info → mct_nightly-2.1.0.20240802.429.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.1.0.20240731.414.dist-info → mct_nightly-2.1.0.20240802.429.dist-info}/WHEEL +0 -0
- {mct_nightly-2.1.0.20240731.414.dist-info → mct_nightly-2.1.0.20240802.429.dist-info}/top_level.txt +0 -0
@@ -60,33 +60,6 @@ def calculate_delta(threshold: np.ndarray,
|
|
60
60
|
return threshold / (2 ** (n_bits - int(signed)))
|
61
61
|
|
62
62
|
|
63
|
-
def calculate_min_max_values(threshold: np.ndarray,
|
64
|
-
n_bits: int = 8,
|
65
|
-
signed: bool = False) -> Tuple[np.ndarray, np.ndarray]:
|
66
|
-
"""
|
67
|
-
Compute the min/max values of a quantization range according to the threshold,
|
68
|
-
number of bits and whether its signed or unsigned.
|
69
|
-
|
70
|
-
Args:
|
71
|
-
threshold: Threshold of quantization range to compute its min/max values.
|
72
|
-
n_bits: Number of bits used in the quantization.
|
73
|
-
signed: Whether the quantization range is signed or not.
|
74
|
-
|
75
|
-
Returns:
|
76
|
-
Min/max values of quantization range.
|
77
|
-
"""
|
78
|
-
|
79
|
-
delta = calculate_delta(threshold,
|
80
|
-
n_bits=n_bits,
|
81
|
-
signed=signed)
|
82
|
-
|
83
|
-
# If unsigned: min=0, otherwise its -threshold
|
84
|
-
min_value = int(signed) * -threshold
|
85
|
-
max_value = threshold - delta
|
86
|
-
|
87
|
-
return min_value, max_value
|
88
|
-
|
89
|
-
|
90
63
|
def quantize_tensor(tensor_data: np.ndarray,
|
91
64
|
threshold: np.ndarray,
|
92
65
|
n_bits: int,
|
@@ -238,7 +211,7 @@ def get_tensor_max(tensor_data: np.ndarray,
|
|
238
211
|
|
239
212
|
"""
|
240
213
|
if n_bits < 1:
|
241
|
-
Logger.critical(f"Parameter n_bits must be positive; however 'n_bits'={n_bits} was provided.")
|
214
|
+
Logger.critical(f"Parameter n_bits must be positive; however 'n_bits'={n_bits} was provided.") # pragma: no cover
|
242
215
|
if is_uniform_quantization:
|
243
216
|
expansion_factor = 1.0
|
244
217
|
elif n_bits == 1:
|
@@ -337,40 +310,3 @@ def get_output_shape(tensor_shape, channel_axis):
|
|
337
310
|
|
338
311
|
"""
|
339
312
|
return [-1 if i is channel_axis else 1 for i in range(len(tensor_shape))]
|
340
|
-
|
341
|
-
|
342
|
-
def get_range_bounds(tensor_min, tensor_max):
|
343
|
-
"""
|
344
|
-
Gets bounds on the quantization range limits for the minimization process.
|
345
|
-
Calculates the bounds in a way that would leave a gap between the possible optimized values
|
346
|
-
and the tensor min-max values.
|
347
|
-
|
348
|
-
Args:
|
349
|
-
tensor_min: min value of a tensor.
|
350
|
-
tensor_max: max value of a tensor.
|
351
|
-
|
352
|
-
Returns: An array with (lbound, ubound) pairs on the quantization range limit values.
|
353
|
-
|
354
|
-
"""
|
355
|
-
# choosing bounds that have some gap from the original tensor min/max values.
|
356
|
-
l_bound = tensor_min / 2 if tensor_min > 0 else tensor_min * 2
|
357
|
-
u_bound = tensor_max * 2 if tensor_max > 0 else tensor_min / 2
|
358
|
-
return [(l_bound, u_bound), (l_bound, u_bound)]
|
359
|
-
|
360
|
-
|
361
|
-
def get_threshold_bounds(min_threshold, max_threshold):
|
362
|
-
"""
|
363
|
-
Gets bounds on the threshold for the minimization process.
|
364
|
-
Calculates the bounds in a way that would leave a gap between the possible optimized threshold
|
365
|
-
and the tensor max values. We use min_threshold as lower-bound to prevent the selected threshold
|
366
|
-
from being zero or negative.
|
367
|
-
|
368
|
-
Args:
|
369
|
-
min_threshold: minimal threshold to use if threshold is too small (not used for this method).
|
370
|
-
max_threshold: maximal threshold to be used in quantization.
|
371
|
-
|
372
|
-
Returns: An array with a pair of (lbound, ubound) on the quantization threshold limit values.
|
373
|
-
|
374
|
-
"""
|
375
|
-
max_threshold = max(min_threshold, max_threshold)
|
376
|
-
return [(min_threshold, 2 * max_threshold)]
|
@@ -64,6 +64,7 @@ def set_quantization_configuration_to_graph(graph: Graph,
|
|
64
64
|
|
65
65
|
for n in graph.nodes:
|
66
66
|
set_quantization_configs_to_node(node=n,
|
67
|
+
graph=graph,
|
67
68
|
quant_config=quant_config,
|
68
69
|
fw_info=graph.fw_info,
|
69
70
|
tpc=graph.tpc,
|
@@ -72,6 +73,7 @@ def set_quantization_configuration_to_graph(graph: Graph,
|
|
72
73
|
|
73
74
|
|
74
75
|
def set_quantization_configs_to_node(node: BaseNode,
|
76
|
+
graph: Graph,
|
75
77
|
quant_config: QuantizationConfig,
|
76
78
|
fw_info: FrameworkInfo,
|
77
79
|
tpc: TargetPlatformCapabilities,
|
@@ -81,19 +83,22 @@ def set_quantization_configs_to_node(node: BaseNode,
|
|
81
83
|
|
82
84
|
Args:
|
83
85
|
node: Node to set its quantization configurations.
|
86
|
+
graph: Model's internal representation graph.
|
84
87
|
quant_config: Quantization configuration to generate the node's configurations from.
|
85
88
|
fw_info: Information needed for quantization about the specific framework.
|
86
89
|
tpc: TargetPlatformCapabilities to get default OpQuantizationConfig.
|
87
90
|
mixed_precision_enable: is mixed precision enabled.
|
88
91
|
"""
|
89
92
|
node_qc_options = node.get_qco(tpc)
|
93
|
+
base_config, node_qc_options_list = node.filter_node_qco_by_graph(tpc, graph.get_next_nodes(node), node_qc_options)
|
90
94
|
|
91
95
|
# Create QC candidates for weights and activation combined
|
92
96
|
weight_channel_axis = fw_info.kernel_channels_mapping.get(node.type)
|
93
97
|
node.candidates_quantization_cfg = _create_node_candidates_qc(quant_config,
|
94
98
|
fw_info,
|
95
99
|
weight_channel_axis,
|
96
|
-
|
100
|
+
node_qc_options_list,
|
101
|
+
base_config,
|
97
102
|
node,
|
98
103
|
mixed_precision_enable=mixed_precision_enable)
|
99
104
|
|
@@ -186,7 +191,8 @@ def _create_node_single_candidate_qc(qc: QuantizationConfig,
|
|
186
191
|
def _create_node_candidates_qc(qc: QuantizationConfig,
|
187
192
|
fw_info: FrameworkInfo,
|
188
193
|
weight_channel_axis: Tuple[int, int],
|
189
|
-
|
194
|
+
node_qc_options_list: List[OpQuantizationConfig],
|
195
|
+
base_config: OpQuantizationConfig,
|
190
196
|
node: BaseNode,
|
191
197
|
mixed_precision_enable: bool = False) -> List[CandidateNodeQuantizationConfig]:
|
192
198
|
"""
|
@@ -196,7 +202,8 @@ def _create_node_candidates_qc(qc: QuantizationConfig,
|
|
196
202
|
qc: Quantization configuration the quantization process should follow.
|
197
203
|
fw_info: Framework information (e.g., which layers should have their kernels' quantized).
|
198
204
|
weight_channel_axis: (Output, Input) channel index of the node's kernel.
|
199
|
-
|
205
|
+
node_qc_options_list: List of quantization configs of node.
|
206
|
+
base_config: Base quantization config for node.
|
200
207
|
node: A node to set quantization configuration candidates to.
|
201
208
|
mixed_precision_enable: is mixed precision enabled
|
202
209
|
|
@@ -208,7 +215,7 @@ def _create_node_candidates_qc(qc: QuantizationConfig,
|
|
208
215
|
node_attrs_list = node.get_node_weights_attributes()
|
209
216
|
|
210
217
|
if mixed_precision_enable:
|
211
|
-
for op_cfg in
|
218
|
+
for op_cfg in node_qc_options_list:
|
212
219
|
candidate_qc = copy.deepcopy(qc)
|
213
220
|
candidates.append(_create_node_single_candidate_qc(candidate_qc,
|
214
221
|
fw_info,
|
@@ -220,7 +227,7 @@ def _create_node_candidates_qc(qc: QuantizationConfig,
|
|
220
227
|
candidates.append(_create_node_single_candidate_qc(qc,
|
221
228
|
fw_info,
|
222
229
|
weight_channel_axis,
|
223
|
-
|
230
|
+
base_config,
|
224
231
|
node_attrs_list))
|
225
232
|
|
226
233
|
return candidates
|
@@ -349,9 +349,15 @@ def shift_negative_function(graph: Graph,
|
|
349
349
|
add_node.output_shape,
|
350
350
|
pad_top, pad_btm, pad_left, pad_right)
|
351
351
|
|
352
|
+
# Insert a pad node between the add node to the op2d, and create statistics for the pad node
|
353
|
+
insert_node_before_node(graph,
|
354
|
+
node_to_insert=pad_node,
|
355
|
+
last_node=op2d_node)
|
356
|
+
|
352
357
|
# Set quantization configuration to node, even though we do not quantize it:
|
353
358
|
set_quantization_configs_to_node(fw_info=fw_info,
|
354
359
|
node=pad_node,
|
360
|
+
graph=graph,
|
355
361
|
quant_config=core_config.quantization_config,
|
356
362
|
tpc=graph.tpc,
|
357
363
|
mixed_precision_enable=core_config.mixed_precision_enable)
|
@@ -361,11 +367,6 @@ def shift_negative_function(graph: Graph,
|
|
361
367
|
for attr in pad_node.get_node_weights_attributes():
|
362
368
|
candidate_qc.weights_quantization_cfg.get_attr_config(attr).enable_weights_quantization = False
|
363
369
|
|
364
|
-
# Insert a pad node between the add node to the op2d, and create statistics for the pad node
|
365
|
-
insert_node_before_node(graph,
|
366
|
-
node_to_insert=pad_node,
|
367
|
-
last_node=op2d_node)
|
368
|
-
|
369
370
|
graph.set_out_stats_collector_to_node(pad_node,
|
370
371
|
add_node_stats_collector) # We ignore the padding effect on statistics
|
371
372
|
|
@@ -373,6 +374,7 @@ def shift_negative_function(graph: Graph,
|
|
373
374
|
|
374
375
|
set_quantization_configs_to_node(fw_info=fw_info,
|
375
376
|
node=add_node,
|
377
|
+
graph=graph,
|
376
378
|
quant_config=core_config.quantization_config,
|
377
379
|
tpc=graph.tpc,
|
378
380
|
mixed_precision_enable=core_config.mixed_precision_enable)
|
@@ -37,10 +37,10 @@ def get_keras_model_builder(mode: ModelBuilderMode) -> type:
|
|
37
37
|
Keras model builder for the given mode.
|
38
38
|
"""
|
39
39
|
|
40
|
-
if not isinstance(mode, ModelBuilderMode):
|
40
|
+
if not isinstance(mode, ModelBuilderMode): # pragma: no cover
|
41
41
|
Logger.critical(f"Expected a ModelBuilderMode type for 'mode', but received {type(mode)} instead.")
|
42
|
-
if mode is None:
|
42
|
+
if mode is None: # pragma: no cover
|
43
43
|
Logger.critical(f"get_keras_model_builder received 'mode' is None")
|
44
|
-
if mode not in keras_model_builders.keys():
|
44
|
+
if mode not in keras_model_builders.keys(): # pragma: no cover
|
45
45
|
Logger.critical(f"'mode' {mode} is not recognized in the Keras model builders factory.")
|
46
46
|
return keras_model_builders.get(mode)
|
@@ -21,6 +21,7 @@ from mct_quantizers import KerasQuantizationWrapper, KerasActivationQuantization
|
|
21
21
|
from tensorflow.keras.models import Model
|
22
22
|
|
23
23
|
from model_compression_toolkit.constants import HESSIAN_NUM_ITERATIONS
|
24
|
+
from model_compression_toolkit.core.common.graph.functional_node import FunctionalNode
|
24
25
|
from model_compression_toolkit.core.common.hessian import HessianScoresRequest, HessianMode, HessianInfoService
|
25
26
|
from model_compression_toolkit.core.keras.graph_substitutions.substitutions.remove_identity import RemoveIdentity
|
26
27
|
from model_compression_toolkit.core.keras.hessian.activation_hessian_scores_calculator_keras import \
|
@@ -421,44 +422,47 @@ class KerasImplementation(FrameworkImplementation):
|
|
421
422
|
|
422
423
|
return False
|
423
424
|
|
424
|
-
def get_mp_node_distance_fn(self,
|
425
|
-
|
426
|
-
|
427
|
-
axis: int = None,
|
428
|
-
norm_mse: bool = False) -> Callable:
|
425
|
+
def get_mp_node_distance_fn(self, n: BaseNode,
|
426
|
+
compute_distance_fn: Callable = None,
|
427
|
+
norm_mse: bool = False) -> Tuple[Callable, int]:
|
429
428
|
"""
|
430
429
|
A mapping between layers' types and a distance function for computing the distance between
|
431
430
|
two tensors in mixed precision (for loss computation purposes). Returns a specific function if node of specific types is
|
432
431
|
given, or a default (normalized MSE) function otherwise.
|
433
432
|
|
434
433
|
Args:
|
435
|
-
|
436
|
-
framework_attrs: Framework attributes the layer had which the graph node holds.
|
434
|
+
n: Node to choose distance function for.
|
437
435
|
compute_distance_fn: An optional distance function to use globally for all nodes.
|
438
|
-
axis: The axis on which the operation is preformed (if specified).
|
439
436
|
norm_mse: whether to normalize mse distance function.
|
440
437
|
|
441
|
-
Returns: A distance function between two tensors.
|
438
|
+
Returns: A distance function between two tensors and a axis on which the distance is computed (if exists).
|
442
439
|
"""
|
443
440
|
|
441
|
+
axis = n.framework_attr.get(keras_constants.AXIS) \
|
442
|
+
if not isinstance(n, FunctionalNode) else n.op_call_kwargs.get(keras_constants.AXIS)
|
443
|
+
|
444
|
+
layer_class = n.layer_class
|
445
|
+
framework_attrs = n.framework_attr
|
446
|
+
|
444
447
|
if compute_distance_fn is not None:
|
445
|
-
return compute_distance_fn
|
448
|
+
return compute_distance_fn, axis
|
446
449
|
|
447
450
|
if layer_class == Activation:
|
448
451
|
node_type_name = framework_attrs[ACTIVATION]
|
449
452
|
if node_type_name == SOFTMAX and axis is not None:
|
450
|
-
return compute_kl_divergence
|
453
|
+
return compute_kl_divergence, axis
|
451
454
|
elif node_type_name == SIGMOID:
|
452
|
-
return compute_cs
|
455
|
+
return compute_cs, axis
|
453
456
|
elif axis is not None and (layer_class == tf.nn.softmax or layer_class == tf.keras.layers.Softmax
|
454
457
|
or (layer_class == TFOpLambda and
|
455
458
|
SOFTMAX in framework_attrs[keras_constants.FUNCTION])):
|
456
|
-
return compute_kl_divergence
|
457
|
-
elif layer_class == tf.nn.sigmoid
|
458
|
-
|
459
|
+
return compute_kl_divergence, axis
|
460
|
+
elif layer_class == tf.nn.sigmoid or (layer_class == TFOpLambda and
|
461
|
+
SIGMOID in framework_attrs[keras_constants.FUNCTION]):
|
462
|
+
return compute_cs, axis
|
459
463
|
elif layer_class == Dense:
|
460
|
-
return compute_cs
|
461
|
-
return partial(compute_mse, norm=norm_mse)
|
464
|
+
return compute_cs, axis
|
465
|
+
return partial(compute_mse, norm=norm_mse), axis
|
462
466
|
|
463
467
|
def get_hessian_scores_calculator(self,
|
464
468
|
graph: Graph,
|
@@ -36,7 +36,7 @@ def to_tf_tensor(tensor):
|
|
36
36
|
return (to_tf_tensor(t) for t in tensor)
|
37
37
|
elif isinstance(tensor, np.ndarray):
|
38
38
|
return tf.convert_to_tensor(tensor.astype(np.float32))
|
39
|
-
else:
|
39
|
+
else: # pragma: no cover
|
40
40
|
Logger.critical(f'Unsupported type for conversion to TF tensor: {type(tensor)}.')
|
41
41
|
|
42
42
|
|
@@ -69,5 +69,5 @@ def tf_tensor_to_numpy(tensor: Union[List, Tuple, np.ndarray, tf.Tensor, float],
|
|
69
69
|
return np.array([np_tensor]) if np.isscalar(np_tensor) else np_tensor
|
70
70
|
elif isinstance(tensor, float):
|
71
71
|
return np.array([tensor])
|
72
|
-
else:
|
72
|
+
else: # pragma: no cover
|
73
73
|
Logger.critical(f'Unsupported type for conversion to Numpy array: {type(tensor)}.')
|
@@ -37,10 +37,10 @@ def get_pytorch_model_builder(mode: ModelBuilderMode) -> type:
|
|
37
37
|
PyTorch model builder for the given mode.
|
38
38
|
"""
|
39
39
|
|
40
|
-
if not isinstance(mode, ModelBuilderMode):
|
40
|
+
if not isinstance(mode, ModelBuilderMode): # pragma: no cover
|
41
41
|
Logger.critical(f"Expected a ModelBuilderMode type for 'mode' parameter; received {type(mode)} instead.")
|
42
|
-
if mode is None:
|
42
|
+
if mode is None: # pragma: no cover
|
43
43
|
Logger.critical(f"Received 'mode' parameter is None.")
|
44
|
-
if mode not in pytorch_model_builders.keys():
|
44
|
+
if mode not in pytorch_model_builders.keys(): # pragma: no cover
|
45
45
|
Logger.critical(f"'mode' parameter {mode} is not supported by the PyTorch model builders factory.")
|
46
46
|
return pytorch_model_builders.get(mode)
|
@@ -30,6 +30,7 @@ from model_compression_toolkit.core import QuantizationConfig, FrameworkInfo, Co
|
|
30
30
|
from model_compression_toolkit.core import common
|
31
31
|
from model_compression_toolkit.core.common import Graph, BaseNode
|
32
32
|
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
|
33
|
+
from model_compression_toolkit.core.common.graph.functional_node import FunctionalNode
|
33
34
|
from model_compression_toolkit.core.common.hessian import HessianScoresRequest, HessianMode, HessianInfoService
|
34
35
|
from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
|
35
36
|
from model_compression_toolkit.core.common.mixed_precision.set_layer_to_bitwidth import set_layer_to_bitwidth
|
@@ -403,36 +404,36 @@ class PytorchImplementation(FrameworkImplementation):
|
|
403
404
|
return True
|
404
405
|
return False
|
405
406
|
|
406
|
-
def get_mp_node_distance_fn(self,
|
407
|
-
|
408
|
-
|
409
|
-
axis: int = None,
|
410
|
-
norm_mse: bool = False) -> Callable:
|
407
|
+
def get_mp_node_distance_fn(self, n: BaseNode,
|
408
|
+
compute_distance_fn: Callable = None,
|
409
|
+
norm_mse: bool = False) -> Tuple[Callable, int]:
|
411
410
|
"""
|
412
411
|
A mapping between layers' types and a distance function for computing the distance between
|
413
412
|
two tensors in mixed precision (for loss computation purposes). Returns a specific function if node of specific types is
|
414
413
|
given, or a default (normalized MSE) function otherwise.
|
415
414
|
|
416
415
|
Args:
|
417
|
-
|
418
|
-
framework_attrs: Framework attributes the layer had which the graph node holds.
|
416
|
+
n: Node to choose distance function for.
|
419
417
|
compute_distance_fn: An optional distance function to use globally for all nodes.
|
420
|
-
axis: The axis on which the operation is preformed (if specified).
|
421
418
|
norm_mse: whether to normalize mse distance function.
|
422
419
|
|
423
|
-
Returns: A distance function between two tensors.
|
420
|
+
Returns: A distance function between two tensors and a axis on which the distance is computed (if exists).
|
424
421
|
"""
|
422
|
+
axis = n.framework_attr.get(pytorch_constants.DIM) if not (
|
423
|
+
isinstance(n, FunctionalNode)) else n.op_call_kwargs.get(pytorch_constants.DIM)
|
424
|
+
|
425
|
+
layer_class = n.layer_class
|
425
426
|
|
426
427
|
if compute_distance_fn is not None:
|
427
|
-
return compute_distance_fn
|
428
|
+
return compute_distance_fn, axis
|
428
429
|
|
429
430
|
elif layer_class in [Softmax, softmax] and axis is not None:
|
430
|
-
return compute_kl_divergence
|
431
|
+
return compute_kl_divergence, axis
|
431
432
|
elif layer_class in [Sigmoid, sigmoid]:
|
432
|
-
return compute_cs
|
433
|
+
return compute_cs, axis
|
433
434
|
elif layer_class == Linear:
|
434
|
-
return compute_cs
|
435
|
-
return partial(compute_mse, norm=norm_mse)
|
435
|
+
return compute_cs, axis
|
436
|
+
return partial(compute_mse, norm=norm_mse), axis
|
436
437
|
|
437
438
|
def is_output_node_compatible_for_hessian_score_computation(self,
|
438
439
|
node: BaseNode) -> bool:
|
@@ -222,5 +222,6 @@ def _set_final_resource_utilization(graph: Graph,
|
|
222
222
|
|
223
223
|
final_ru = ResourceUtilization()
|
224
224
|
final_ru.set_resource_utilization_by_target(final_ru_dict)
|
225
|
+
print(final_ru)
|
225
226
|
graph.user_info.final_resource_utilization = final_ru
|
226
227
|
graph.user_info.mixed_precision_cfg = final_bit_widths_config
|
@@ -24,7 +24,7 @@ if FOUND_TF:
|
|
24
24
|
if version.parse(tf.__version__) >= version.parse("2.13"):
|
25
25
|
from keras.src.engine.base_layer import Layer
|
26
26
|
from keras.src.engine.input_layer import InputLayer
|
27
|
-
else:
|
27
|
+
else: # pragma: no cover
|
28
28
|
from keras.engine.base_layer import Layer
|
29
29
|
from keras.engine.input_layer import InputLayer
|
30
30
|
|
@@ -57,7 +57,7 @@ if FOUND_TF:
|
|
57
57
|
f'KerasQuantizationWrapper must have a weights_quantizers but has a '
|
58
58
|
f'{type(layer.weights_quantizers)} object') # pragma: no cover
|
59
59
|
|
60
|
-
if len(layer.weights_quantizers) == 0:
|
60
|
+
if len(layer.weights_quantizers) == 0: # pragma: no cover
|
61
61
|
Logger.critical(f'KerasQuantizationWrapper must have at least one weight quantizer, but found {len(layer.weights_quantizers)} quantizers. If layer is not quantized it should be a Keras layer.')
|
62
62
|
|
63
63
|
for _, weights_quantizer in layer.weights_quantizers.items():
|
@@ -219,41 +219,6 @@ class GPTQTrainer(ABC):
|
|
219
219
|
|
220
220
|
return hessian_approx_score_by_image
|
221
221
|
|
222
|
-
def _get_approximations_by_interest_point(self, approximations: Dict, image_idx: int) -> List:
|
223
|
-
"""
|
224
|
-
Retrieves hessian approximations for a specific image index.
|
225
|
-
|
226
|
-
Args:
|
227
|
-
approximations (Dict): Hessian approximations.
|
228
|
-
image_idx (int): Image index.
|
229
|
-
|
230
|
-
Returns:
|
231
|
-
List: Hessian approximations for the given image index.
|
232
|
-
"""
|
233
|
-
approx_by_interest_point = []
|
234
|
-
for target_node in self.compare_points:
|
235
|
-
hessian_approx_scores = approximations[target_node][image_idx]
|
236
|
-
self._validate_scores_approximation(hessian_approx_scores)
|
237
|
-
approx_by_interest_point.append(hessian_approx_scores[0])
|
238
|
-
return approx_by_interest_point
|
239
|
-
|
240
|
-
@staticmethod
|
241
|
-
def _validate_scores_approximation(hessian_approx_scores: List):
|
242
|
-
"""
|
243
|
-
Validates the structure and length of the Hessian-approximation scores.
|
244
|
-
|
245
|
-
Args:
|
246
|
-
hessian_approx_scores: Scores to validate.
|
247
|
-
"""
|
248
|
-
if not isinstance(hessian_approx_scores, list):
|
249
|
-
Logger.critical(f"Scores approximation was expected to be a list but is of type: {type(hessian_approx_scores)}.") # pragma: no cover
|
250
|
-
if len(hessian_approx_scores) != 1:
|
251
|
-
Logger.critical(f"Scores approximation was expected to have a length of 1 "
|
252
|
-
f"(for computations with granularity set to 'HessianInfoGranularity.PER_TENSOR') "
|
253
|
-
f"but has a length of {len(hessian_approx_scores)}."
|
254
|
-
) # pragma: no cover
|
255
|
-
|
256
|
-
|
257
222
|
@abstractmethod
|
258
223
|
def build_gptq_model(self):
|
259
224
|
"""
|
@@ -38,7 +38,7 @@ if FOUND_TF:
|
|
38
38
|
|
39
39
|
super().__init__(quantization_config)
|
40
40
|
|
41
|
-
else:
|
41
|
+
else: # pragma: no cover
|
42
42
|
class BaseKerasQATTrainableQuantizer(BaseKerasTrainableQuantizer):
|
43
43
|
def __init__(self,
|
44
44
|
quantization_config: Union[TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig]):
|
@@ -39,7 +39,7 @@ if FOUND_TORCH:
|
|
39
39
|
"""
|
40
40
|
super().__init__(quantization_config)
|
41
41
|
|
42
|
-
else:
|
42
|
+
else: # pragma: no cover
|
43
43
|
class BasePytorchQATTrainableQuantizer(BasePytorchTrainableQuantizer):
|
44
44
|
def __init__(self,
|
45
45
|
quantization_config: Union[TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig]):
|
model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py
CHANGED
@@ -14,7 +14,7 @@
|
|
14
14
|
# ==============================================================================
|
15
15
|
|
16
16
|
import copy
|
17
|
-
from typing import List, Dict, Union, Any
|
17
|
+
from typing import List, Dict, Union, Any, Tuple
|
18
18
|
|
19
19
|
from mct_quantizers import QuantizationMethod
|
20
20
|
from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
@@ -96,7 +96,7 @@ class AttributeQuantizationConfig:
|
|
96
96
|
Whether this configuration is equal to another object or not.
|
97
97
|
"""
|
98
98
|
if not isinstance(other, AttributeQuantizationConfig):
|
99
|
-
return False
|
99
|
+
return False # pragma: no cover
|
100
100
|
return self.weights_quantization_method == other.weights_quantization_method and \
|
101
101
|
self.weights_n_bits == other.weights_n_bits and \
|
102
102
|
self.weights_per_channel_threshold == other.weights_per_channel_threshold and \
|
@@ -114,11 +114,13 @@ class OpQuantizationConfig:
|
|
114
114
|
attr_weights_configs_mapping: Dict[str, AttributeQuantizationConfig],
|
115
115
|
activation_quantization_method: QuantizationMethod,
|
116
116
|
activation_n_bits: int,
|
117
|
+
supported_input_activation_n_bits: Union[int, Tuple[int]],
|
117
118
|
enable_activation_quantization: bool,
|
118
119
|
quantization_preserving: bool,
|
119
120
|
fixed_scale: float,
|
120
121
|
fixed_zero_point: int,
|
121
|
-
simd_size: int
|
122
|
+
simd_size: int,
|
123
|
+
is_signed: bool = None
|
122
124
|
):
|
123
125
|
"""
|
124
126
|
|
@@ -127,10 +129,12 @@ class OpQuantizationConfig:
|
|
127
129
|
attr_weights_configs_mapping (Dict[str, AttributeQuantizationConfig]): A mapping between an op attribute name and its quantization configuration.
|
128
130
|
activation_quantization_method (QuantizationMethod): Which method to use from QuantizationMethod for activation quantization.
|
129
131
|
activation_n_bits (int): Number of bits to quantize the activations.
|
132
|
+
supported_input_activation_n_bits (int or Tuple[int]): Number of bits that operator accepts as input.
|
130
133
|
enable_activation_quantization (bool): Whether to quantize the model activations or not.
|
131
134
|
quantization_preserving (bool): Whether quantization parameters should be the same for an operator's input and output.
|
132
135
|
fixed_scale (float): Scale to use for an operator quantization parameters.
|
133
136
|
fixed_zero_point (int): Zero-point to use for an operator quantization parameters.
|
137
|
+
is_signed (bool): Force activation quantization signedness (None means don't force).
|
134
138
|
simd_size (int): Per op integer representing the Single Instruction, Multiple Data (SIMD) width of an operator. It indicates the number of data elements that can be fetched and processed simultaneously in a single instruction.
|
135
139
|
|
136
140
|
"""
|
@@ -140,10 +144,17 @@ class OpQuantizationConfig:
|
|
140
144
|
|
141
145
|
self.activation_quantization_method = activation_quantization_method
|
142
146
|
self.activation_n_bits = activation_n_bits
|
147
|
+
if isinstance(supported_input_activation_n_bits, tuple):
|
148
|
+
self.supported_input_activation_n_bits = supported_input_activation_n_bits
|
149
|
+
elif isinstance(supported_input_activation_n_bits, int):
|
150
|
+
self.supported_input_activation_n_bits = (supported_input_activation_n_bits,)
|
151
|
+
else:
|
152
|
+
Logger.critical(f"Supported_input_activation_n_bits only accepts int or tuple of ints, but got {type(supported_input_activation_n_bits)}") # pragma: no cover
|
143
153
|
self.enable_activation_quantization = enable_activation_quantization
|
144
154
|
self.quantization_preserving = quantization_preserving
|
145
155
|
self.fixed_scale = fixed_scale
|
146
156
|
self.fixed_zero_point = fixed_zero_point
|
157
|
+
self.is_signed = is_signed
|
147
158
|
self.simd_size = simd_size
|
148
159
|
|
149
160
|
def get_info(self):
|
@@ -152,7 +163,7 @@ class OpQuantizationConfig:
|
|
152
163
|
Returns: Info about the quantization configuration as a dictionary.
|
153
164
|
|
154
165
|
"""
|
155
|
-
return self.__dict__
|
166
|
+
return self.__dict__ # pragma: no cover
|
156
167
|
|
157
168
|
def clone_and_edit(self, attr_to_edit: Dict[str, Dict[str, Any]] = {}, **kwargs):
|
158
169
|
"""
|
@@ -188,14 +199,26 @@ class OpQuantizationConfig:
|
|
188
199
|
Whether this configuration is equal to another object or not.
|
189
200
|
"""
|
190
201
|
if not isinstance(other, OpQuantizationConfig):
|
191
|
-
return False
|
202
|
+
return False # pragma: no cover
|
192
203
|
return self.default_weight_attr_config == other.default_weight_attr_config and \
|
193
204
|
self.attr_weights_configs_mapping == other.attr_weights_configs_mapping and \
|
194
205
|
self.activation_quantization_method == other.activation_quantization_method and \
|
195
206
|
self.activation_n_bits == other.activation_n_bits and \
|
207
|
+
self.supported_input_activation_n_bits == other.supported_input_activation_n_bits and \
|
196
208
|
self.enable_activation_quantization == other.enable_activation_quantization and \
|
209
|
+
self.is_signed == other.is_signed and \
|
197
210
|
self.simd_size == other.simd_size
|
198
211
|
|
212
|
+
@property
|
213
|
+
def max_input_activation_n_bits(self) -> int:
|
214
|
+
"""
|
215
|
+
Get maximum supported input bit-width.
|
216
|
+
|
217
|
+
Returns: Maximum supported input bit-width.
|
218
|
+
|
219
|
+
"""
|
220
|
+
return max(self.supported_input_activation_n_bits)
|
221
|
+
|
199
222
|
|
200
223
|
class QuantizationConfigOptions:
|
201
224
|
"""
|
@@ -279,12 +302,12 @@ class QuantizationConfigOptions:
|
|
279
302
|
if attrs is None:
|
280
303
|
attrs_to_update = list(qc.attr_weights_configs_mapping.keys())
|
281
304
|
else:
|
282
|
-
if not isinstance(attrs, List):
|
305
|
+
if not isinstance(attrs, List): # pragma: no cover
|
283
306
|
Logger.critical(f"Expected a list of attributes but received {type(attrs)}.")
|
284
307
|
attrs_to_update = attrs
|
285
308
|
|
286
309
|
for attr in attrs_to_update:
|
287
|
-
if qc.attr_weights_configs_mapping.get(attr) is None:
|
310
|
+
if qc.attr_weights_configs_mapping.get(attr) is None: # pragma: no cover
|
288
311
|
Logger.critical(f'Editing attributes is only possible for existing attributes in the configuration\'s '
|
289
312
|
f'weights config mapping; {attr} does not exist in {qc}.')
|
290
313
|
self.__edit_quantization_configuration(qc.attr_weights_configs_mapping[attr], kwargs)
|
@@ -310,6 +333,7 @@ class QuantizationConfigOptions:
|
|
310
333
|
# If not, add base_config to the list of configurations to update
|
311
334
|
cfgs_to_update = [cfg for cfg in qc_options.quantization_config_list]
|
312
335
|
if not any(qc_options.base_config is cfg for cfg in cfgs_to_update):
|
336
|
+
# TODO: add test for this case
|
313
337
|
cfgs_to_update.append(qc_options.base_config)
|
314
338
|
|
315
339
|
for qc in cfgs_to_update:
|
@@ -319,7 +343,7 @@ class QuantizationConfigOptions:
|
|
319
343
|
new_attr_mapping = {}
|
320
344
|
for attr in list(qc.attr_weights_configs_mapping.keys()):
|
321
345
|
new_key = layer_attrs_mapping.get(attr)
|
322
|
-
if new_key is None:
|
346
|
+
if new_key is None: # pragma: no cover
|
323
347
|
Logger.critical(f"Attribute \'{attr}\' does not exist in the provided attribute mapping.")
|
324
348
|
|
325
349
|
new_attr_mapping[new_key] = qc.attr_weights_configs_mapping.pop(attr)
|
model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model.py
CHANGED
@@ -96,7 +96,7 @@ class TargetPlatformModel(ImmutableClass):
|
|
96
96
|
for op_set in self.operator_set:
|
97
97
|
if operators_set_name == op_set.name:
|
98
98
|
return op_set.qc_options
|
99
|
-
return
|
99
|
+
return self.default_qco
|
100
100
|
|
101
101
|
def get_default_op_quantization_config(self) -> OpQuantizationConfig:
|
102
102
|
"""
|
@@ -158,7 +158,7 @@ class TargetPlatformModel(ImmutableClass):
|
|
158
158
|
self.fusing_patterns.append(tp_model_component)
|
159
159
|
elif isinstance(tp_model_component, OperatorsSetBase):
|
160
160
|
self.operator_set.append(tp_model_component)
|
161
|
-
else:
|
161
|
+
else: # pragma: no cover
|
162
162
|
Logger.critical(f'Attempted to append an unrecognized TargetPlatformModelComponent of type: {type(tp_model_component)}.')
|
163
163
|
|
164
164
|
def __enter__(self):
|