mct-nightly 1.10.0.20231002.post426__py3-none-any.whl → 1.10.0.20231004.post404__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-1.10.0.20231002.post426.dist-info → mct_nightly-1.10.0.20231004.post404.dist-info}/METADATA +10 -1
- {mct_nightly-1.10.0.20231002.post426.dist-info → mct_nightly-1.10.0.20231004.post404.dist-info}/RECORD +6 -6
- model_compression_toolkit/core/common/mixed_precision/bit_width_setter.py +15 -14
- {mct_nightly-1.10.0.20231002.post426.dist-info → mct_nightly-1.10.0.20231004.post404.dist-info}/LICENSE.md +0 -0
- {mct_nightly-1.10.0.20231002.post426.dist-info → mct_nightly-1.10.0.20231004.post404.dist-info}/WHEEL +0 -0
- {mct_nightly-1.10.0.20231002.post426.dist-info → mct_nightly-1.10.0.20231004.post404.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: mct-nightly
|
|
3
|
-
Version: 1.10.0.
|
|
3
|
+
Version: 1.10.0.20231004.post404
|
|
4
4
|
Summary: A Model Compression Toolkit for neural networks
|
|
5
5
|
Home-page: UNKNOWN
|
|
6
6
|
License: UNKNOWN
|
|
@@ -130,6 +130,14 @@ Main features:
|
|
|
130
130
|
* <ins>Visualization:</ins> You can use TensorBoard to observe useful information for troubleshooting the quantized model's performance (for example, the model in different phases of the quantization, collected statistics, similarity between layers of the float and quantized model and bit-width configuration for mixed-precision quantization). For more details, please read the [visualization documentation](https://sony.github.io/model_optimization/docs/guidelines/visualization.html).
|
|
131
131
|
* <ins>Target Platform Capabilities:</ins> The Target Platform Capabilities (TPC) describes the target platform (an edge device with dedicated hardware). For more details, please read the [TPC README](model_compression_toolkit/target_platform_capabilities/README.md).
|
|
132
132
|
|
|
133
|
+
### Enhanced Post-Training Quantization (EPTQ)
|
|
134
|
+
As part of the GPTQ we provide an advanced optimization algorithm called EPTQ.
|
|
135
|
+
|
|
136
|
+
The specifications of the algorithm are detailed in the paper: _"**EPTQ: Enhanced Post-Training Quantization via Label-Free Hessian**"_ [4].
|
|
137
|
+
|
|
138
|
+
More details on the how to use EPTQ via MCT can be found in the [EPTQ guidelines](model_compression_toolkit/gptq/README.md).
|
|
139
|
+
|
|
140
|
+
|
|
133
141
|
|
|
134
142
|
#### Experimental features
|
|
135
143
|
|
|
@@ -176,4 +184,5 @@ MCT aims at keeping a more up-to-date fork and welcomes contributions from anyon
|
|
|
176
184
|
|
|
177
185
|
[3] [TORCHVISION.MODELS](https://pytorch.org/vision/stable/models.html)
|
|
178
186
|
|
|
187
|
+
[4] Gordon, O., Habi, H. V., & Netzer, A., 2023. [EPTQ: Enhanced Post-Training Quantization via Label-Free Hessian. arXiv preprint](https://arxiv.org/abs/2309.11531)
|
|
179
188
|
|
|
@@ -53,7 +53,7 @@ model_compression_toolkit/core/common/matchers/function.py,sha256=kMwcinxn_PInve
|
|
|
53
53
|
model_compression_toolkit/core/common/matchers/node_matcher.py,sha256=63cMwa5YbQ5LKZy8-KFmdchVc3N7mpDJ6fNDt_uAQsk,2745
|
|
54
54
|
model_compression_toolkit/core/common/matchers/walk_matcher.py,sha256=xqfLKk6xZt72hSnND_HoX5ESOooNMypb5VOZkVsJ_nw,1111
|
|
55
55
|
model_compression_toolkit/core/common/mixed_precision/__init__.py,sha256=sw7LOPN1bM82o3SkMaklyH0jw-TLGK0-fl2Wq73rffI,697
|
|
56
|
-
model_compression_toolkit/core/common/mixed_precision/bit_width_setter.py,sha256=
|
|
56
|
+
model_compression_toolkit/core/common/mixed_precision/bit_width_setter.py,sha256=_qwE1RlvDx4eGUfxpFHfM1Jo1pA6gSUUrswdgfs6YU8,6774
|
|
57
57
|
model_compression_toolkit/core/common/mixed_precision/configurable_quant_id.py,sha256=LLDguK7afsbN742ucLpmJr5TUfTyFpK1vbf2bpVr1v0,882
|
|
58
58
|
model_compression_toolkit/core/common/mixed_precision/configurable_quantizer_utils.py,sha256=kmyBcqGh3qYqo42gIZzouQEljTNpF9apQt6cXEVkTQ0,3871
|
|
59
59
|
model_compression_toolkit/core/common/mixed_precision/distance_weighting.py,sha256=x0cweemRG3_7FlvAbxFK5Zi77qpoKAGqtGndY8MtgwM,2222
|
|
@@ -429,8 +429,8 @@ model_compression_toolkit/trainable_infrastructure/keras/quantize_wrapper.py,sha
|
|
|
429
429
|
model_compression_toolkit/trainable_infrastructure/keras/quantizer_utils.py,sha256=MVwXNymmFRB2NXIBx4e2mdJ1RfoHxRPYRgjb1MQP5kY,1797
|
|
430
430
|
model_compression_toolkit/trainable_infrastructure/pytorch/__init__.py,sha256=huHoBUcKNB6BnY6YaUCcFvdyBtBI172ZoUD8ZYeNc6o,696
|
|
431
431
|
model_compression_toolkit/trainable_infrastructure/pytorch/base_pytorch_quantizer.py,sha256=SbvRlIdE32PEBsINt1bhSqvrKL_zbM9V-aeSkOn-sw4,3083
|
|
432
|
-
mct_nightly-1.10.0.
|
|
433
|
-
mct_nightly-1.10.0.
|
|
434
|
-
mct_nightly-1.10.0.
|
|
435
|
-
mct_nightly-1.10.0.
|
|
436
|
-
mct_nightly-1.10.0.
|
|
432
|
+
mct_nightly-1.10.0.20231004.post404.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
|
|
433
|
+
mct_nightly-1.10.0.20231004.post404.dist-info/METADATA,sha256=6imuKBIiVkvsgOisTy671wf6-OChPZOr7D8ai_J2sVo,16303
|
|
434
|
+
mct_nightly-1.10.0.20231004.post404.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
435
|
+
mct_nightly-1.10.0.20231004.post404.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
|
|
436
|
+
mct_nightly-1.10.0.20231004.post404.dist-info/RECORD,,
|
|
@@ -50,20 +50,21 @@ def set_bit_widths(mixed_precision_enable: bool,
|
|
|
50
50
|
_set_node_final_qc(bit_widths_config,
|
|
51
51
|
node,
|
|
52
52
|
node_index_in_graph)
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
53
|
+
else:
|
|
54
|
+
if node.is_activation_quantization_enabled():
|
|
55
|
+
# If we are here, this means that we are in weights-only mixed-precision
|
|
56
|
+
# (i.e., activations are quantized with fixed bitwidth or not quantized)
|
|
57
|
+
# and that this node doesn't have weights to quantize
|
|
58
|
+
assert len(node.candidates_quantization_cfg) > 0, \
|
|
59
|
+
"Node need to have at least one quantization configuration in order to quantize its activation"
|
|
60
|
+
node.final_activation_quantization_cfg = copy.deepcopy(node.candidates_quantization_cfg[0].activation_quantization_cfg)
|
|
61
|
+
if node.is_weights_quantization_enabled():
|
|
62
|
+
# If we are here, this means that we are in activation-only mixed-precision
|
|
63
|
+
# (i.e., weights are quantized with fixed bitwidth or not quantized)
|
|
64
|
+
# and that this node doesn't have activations to quantize
|
|
65
|
+
assert len(node.candidates_quantization_cfg) > 0, \
|
|
66
|
+
"Node need to have at least one quantization configuration in order to quantize its activation"
|
|
67
|
+
node.final_weights_quantization_cfg = copy.deepcopy(node.candidates_quantization_cfg[0].weights_quantization_cfg)
|
|
67
68
|
|
|
68
69
|
# When working in non-mixed-precision mode, there's only one bitwidth, and we simply set the
|
|
69
70
|
# only candidate of the node as its final weight and activation quantization configuration.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|