mct-nightly 1.10.0.20231129.post414__py3-none-any.whl → 1.10.0.20231201.post429__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mct-nightly
3
- Version: 1.10.0.20231129.post414
3
+ Version: 1.10.0.20231201.post429
4
4
  Summary: A Model Compression Toolkit for neural networks
5
5
  Home-page: UNKNOWN
6
6
  License: UNKNOWN
@@ -4,8 +4,9 @@ model_compression_toolkit/logger.py,sha256=b9DVktZ-LymFcRxv2aL_sdiE6S2sSrFGWltx6
4
4
  model_compression_toolkit/core/__init__.py,sha256=qnBA6aaojI7RpEQZU2vXWiELHfVJf-MnAP-4T0tcFDY,2008
5
5
  model_compression_toolkit/core/analyzer.py,sha256=dbsD61pakp_9JXNyAScLdtJvcXny9jr_cMbET0Bd3Sg,2975
6
6
  model_compression_toolkit/core/exporter.py,sha256=U_-ea-zYHsnIt2ydameMLZ_gzDaCMI1dRa5IjA8RUuc,4233
7
- model_compression_toolkit/core/graph_prep_runner.py,sha256=XDQhivlSiPRPP9WkYro4NDvHqu5rjCIo00IVbuzuR_Y,9995
8
- model_compression_toolkit/core/runner.py,sha256=_OyuUefAse2oEdEtFfvJghOvfk0WlWuh_ZkxtJR91LM,17989
7
+ model_compression_toolkit/core/graph_prep_runner.py,sha256=SHhFl0vpC9YpRu40xkApFzmw_dT-nfIz1MDjmKcon8Q,9913
8
+ model_compression_toolkit/core/quantization_prep_runner.py,sha256=npv55-QsJFR7bnbHj4tBMf13Y18Ns7QGa-UDSI6WJRE,6554
9
+ model_compression_toolkit/core/runner.py,sha256=D_5OZ1PHlvytjCwu_7nTPkYHdmthN-ZHANpirqrH5A8,12069
9
10
  model_compression_toolkit/core/common/__init__.py,sha256=Wh127PbXcETZX_d1PQqZ71ETK3J9XO5A-HpadGUbj6o,1447
10
11
  model_compression_toolkit/core/common/base_substitutions.py,sha256=xDFSmVVs_iFSZfajytI0cuQaNRNcwHX3uqOoHgVUvxQ,1666
11
12
  model_compression_toolkit/core/common/data_loader.py,sha256=7YF5Mqz64Xb4rVwY3knrdIZ4JEHybXxiQqx0deR_c5k,4017
@@ -447,8 +448,8 @@ model_compression_toolkit/trainable_infrastructure/keras/quantize_wrapper.py,sha
447
448
  model_compression_toolkit/trainable_infrastructure/keras/quantizer_utils.py,sha256=MVwXNymmFRB2NXIBx4e2mdJ1RfoHxRPYRgjb1MQP5kY,1797
448
449
  model_compression_toolkit/trainable_infrastructure/pytorch/__init__.py,sha256=huHoBUcKNB6BnY6YaUCcFvdyBtBI172ZoUD8ZYeNc6o,696
449
450
  model_compression_toolkit/trainable_infrastructure/pytorch/base_pytorch_quantizer.py,sha256=SbvRlIdE32PEBsINt1bhSqvrKL_zbM9V-aeSkOn-sw4,3083
450
- mct_nightly-1.10.0.20231129.post414.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
451
- mct_nightly-1.10.0.20231129.post414.dist-info/METADATA,sha256=BlcFLeMfxtrdsyKQxgLa5xkorVHJlwwK7qGId7PKegQ,16232
452
- mct_nightly-1.10.0.20231129.post414.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
453
- mct_nightly-1.10.0.20231129.post414.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
454
- mct_nightly-1.10.0.20231129.post414.dist-info/RECORD,,
451
+ mct_nightly-1.10.0.20231201.post429.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
452
+ mct_nightly-1.10.0.20231201.post429.dist-info/METADATA,sha256=FHlPhn8_PjyXcKC1lbAR_gtBwSTRSJ-bFKIzCzKMToE,16232
453
+ mct_nightly-1.10.0.20231201.post429.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
454
+ mct_nightly-1.10.0.20231201.post429.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
455
+ mct_nightly-1.10.0.20231201.post429.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
- # Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
1
+ # Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -41,23 +41,24 @@ def graph_preparation_runner(in_model: Any,
41
41
  tb_w: TensorboardWriter = None,
42
42
  mixed_precision_enable: bool = False) -> Graph:
43
43
  """
44
- Quantize a trained model using post-training quantization.
45
- First, the model graph is optimized using several transformations (e.g. folding BatchNormalization to preceding
46
- layers).
47
- Second, statistics (e.g. min/max, histogram, etc.) are collected for each layer's output
48
- (and input, depends on the quantization configuration) using a given representative dataset.
49
- Next, quantization parameters are calculated using the collected statistics
50
- (both coefficients and activations by default).
44
+ Runs all required preparations in order to build a quantization graph from the given model,
45
+ quantization configuration and target platform specifications.
46
+ This runner include the following steps:
47
+ - Reading and building a graph from the given model.
48
+ - Setting quantization config to each relevant node in the graph.
49
+ - Apply all necessary substitutions to finalize the graph for quantization.
50
+
51
51
  Args:
52
52
  in_model: Model to quantize.
53
53
  representative_data_gen: Dataset used for calibration.
54
- core_config: CoreConfig containing parameters of how the model should be quantized
54
+ quantization_config: QuantizationConfig containing parameters of how the model should be quantized.
55
55
  fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices,
56
- groups of layers by how they should be quantized, etc.).
56
+ groups of layers by how they should be quantized, etc.).
57
57
  fw_impl: FrameworkImplementation object with a specific framework methods implementation.
58
58
  tpc: TargetPlatformCapabilities object that models the inference target platform and
59
- the attached framework operator's information.
59
+ the attached framework operator's information.
60
60
  tb_w: TensorboardWriter object for logging
61
+
61
62
  Returns:
62
63
  An internal graph representation of the input model.
63
64
  """
@@ -92,16 +93,18 @@ def get_finalized_graph(initial_graph: Graph,
92
93
  """
93
94
  Applies all edit operation (edit, substitutions, etc.) on the model's graph, to prepare it for the quantization
94
95
  process. All future graph substitutions and operations that change the graph should be added to this method.
96
+
95
97
  Args:
96
98
  initial_graph (Graph): Graph to apply the changes to.
97
99
  tpc (TargetPlatformCapabilities): TargetPlatformCapabilities object that describes the desired inference target platform (includes fusing patterns MCT should handle).
98
100
  quant_config (QuantizationConfig): QuantizationConfig containing parameters of how the model should be
99
- quantized.
101
+ quantized.
100
102
  fw_info (FrameworkInfo): Information needed for quantization about the specific framework (e.g.,
101
- kernel channels indices, groups of layers by how they should be quantized, etc.)
103
+ kernel channels indices, groups of layers by how they should be quantized, etc.)
102
104
  tb_w (TensorboardWriter): TensorboardWriter object to use for logging events such as graphs, histograms, etc.
103
105
  fw_impl (FrameworkImplementation): FrameworkImplementation object with a specific framework methods implementation.
104
- mixed_precision_enable: is mixed precision enabled.
106
+ mixed_precision_enable: is mixed precision enabled.
107
+
105
108
  Returns: Graph object that represents the model, after applying all required modifications to it.
106
109
  """
107
110
 
@@ -173,6 +176,7 @@ def read_model_to_graph(in_model: Any,
173
176
 
174
177
  """
175
178
  Read a model into a graph object.
179
+
176
180
  Args:
177
181
  in_model: Model to optimize and prepare for quantization.
178
182
  representative_data_gen: Dataset used for calibration.
@@ -181,6 +185,7 @@ def read_model_to_graph(in_model: Any,
181
185
  fw_info: Information needed for quantization about the specific framework (e.g.,
182
186
  kernel channels indices, groups of layers by how they should be quantized, etc.)
183
187
  fw_impl: FrameworkImplementation object with a specific framework methods implementation.
188
+
184
189
  Returns:
185
190
  Graph object that represents the model.
186
191
  """
@@ -0,0 +1,134 @@
1
+ # Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+
17
+ from typing import Callable
18
+
19
+ from tqdm import tqdm
20
+
21
+ from model_compression_toolkit.core.common import FrameworkInfo
22
+ from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
23
+ from model_compression_toolkit.core.common.graph.base_graph import Graph
24
+ from model_compression_toolkit.core.common.model_collector import ModelCollector
25
+ from model_compression_toolkit.core.common.network_editors.edit_network import edit_network_graph
26
+ from model_compression_toolkit.core.common.quantization.core_config import CoreConfig
27
+ from model_compression_toolkit.core.common.quantization.quantization_analyzer import analyzer_graph
28
+ from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_computation import \
29
+ calculate_quantization_params
30
+ from model_compression_toolkit.core.common.statistics_correction.statistics_correction import \
31
+ statistics_correction_runner
32
+ from model_compression_toolkit.core.common.substitutions.apply_substitutions import substitute
33
+
34
+ from model_compression_toolkit.core.common.visualization.tensorboard_writer import TensorboardWriter
35
+
36
+
37
+ def quantization_preparation_runner(graph: Graph,
38
+ representative_data_gen: Callable,
39
+ core_config: CoreConfig,
40
+ fw_info: FrameworkInfo,
41
+ fw_impl: FrameworkImplementation,
42
+ tb_w: TensorboardWriter = None) -> Graph:
43
+ """
44
+ Prepares a trained model for post-training quantization.
45
+ First, the model graph is optimized using several transformations (e.g. folding BatchNormalization to preceding layers).
46
+ Second, statistics (e.g. min/max, histogram, etc.) are collected for each layer's output
47
+ (and input, depends on the quantization configuration) using a given representative dataset.
48
+ Next, quantization parameters are calculated using the collected statistics.
49
+ Finally, more transformations (based on the statistics) are applied to increase the model's performance.
50
+
51
+ Args:
52
+ graph: A graph representation of the model to be quantized.
53
+ representative_data_gen: Dataset used for calibration.
54
+ core_config: CoreConfig containing parameters of how the model should be quantized
55
+ fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices,
56
+ groups of layers by how they should be quantized, etc.).
57
+ fw_impl: FrameworkImplementation object with a specific framework methods implementation.
58
+ tb_w: TensorboardWriter object for logging
59
+
60
+ Returns:
61
+ Graph object that represents the model, contains thresholds, and ready for quantization.
62
+ """
63
+
64
+ ######################################
65
+ # Graph analyzing (attaching statistics collectors)
66
+ ######################################
67
+ analyzer_graph(fw_impl.attach_sc_to_node,
68
+ graph,
69
+ fw_info,
70
+ core_config.quantization_config) # Mark points for statistics collection
71
+
72
+ if tb_w is not None:
73
+ tb_w.add_graph(graph, 'after_analyzer_graph')
74
+
75
+ ######################################
76
+ # Statistic collection
77
+ ######################################
78
+ mi = ModelCollector(graph,
79
+ fw_impl,
80
+ fw_info)
81
+
82
+ for _data in tqdm(representative_data_gen()):
83
+ mi.infer(_data)
84
+
85
+ ######################################
86
+ # Edit network according to user
87
+ # specific settings
88
+ ######################################
89
+ # Notice that not all actions affect at this stage (for example, actions that edit the final configuration as
90
+ # there are no final configurations at this stage of the optimization). For this reason we edit the graph
91
+ # again at the end of the optimization process.
92
+ edit_network_graph(graph, fw_info, core_config.debug_config.network_editor)
93
+
94
+ ######################################
95
+ # Calculate quantization params
96
+ ######################################
97
+ calculate_quantization_params(graph,
98
+ fw_info,
99
+ fw_impl=fw_impl)
100
+
101
+ if tb_w is not None:
102
+ tb_w.add_graph(graph, 'thresholds_selection')
103
+ tb_w.add_all_statistics(graph, 'thresholds_selection')
104
+
105
+ ######################################
106
+ # Graph substitution (post statistics collection)
107
+ ######################################
108
+ transformed_graph = substitute(graph,
109
+ fw_impl.get_substitutions_post_statistics_collection(core_config.quantization_config))
110
+
111
+ ######################################
112
+ # Shift Negative Activations
113
+ ######################################
114
+ if core_config.quantization_config.shift_negative_activation_correction:
115
+ transformed_graph = fw_impl.shift_negative_correction(transformed_graph,
116
+ core_config,
117
+ fw_info)
118
+ if tb_w is not None:
119
+ tb_w.add_graph(transformed_graph, 'after_shift_negative_correction')
120
+ tb_w.add_all_statistics(transformed_graph, 'after_shift_negative_correction')
121
+
122
+ if tb_w is not None:
123
+ tb_w.add_graph(transformed_graph, 'post_statistics_collection_substitutions')
124
+ tb_w.add_all_statistics(transformed_graph, 'post_statistics_collection_substitutions')
125
+
126
+ ######################################
127
+ # Statistics Correction
128
+ ######################################
129
+ tg_with_bias = statistics_correction_runner(transformed_graph, core_config, fw_info, fw_impl, tb_w)
130
+
131
+ for n in tg_with_bias.nodes:
132
+ assert n.final_weights_quantization_cfg is None
133
+
134
+ return tg_with_bias
@@ -23,6 +23,7 @@ from tqdm import tqdm
23
23
  from model_compression_toolkit.core.common import FrameworkInfo
24
24
  from model_compression_toolkit.core.common.hessian.hessian_info_service import HessianInfoService
25
25
  from model_compression_toolkit.core.graph_prep_runner import graph_preparation_runner
26
+ from model_compression_toolkit.core.quantization_prep_runner import quantization_preparation_runner
26
27
  from model_compression_toolkit.logger import Logger
27
28
  from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
28
29
  from model_compression_toolkit.core.common.graph.base_graph import Graph
@@ -47,6 +48,7 @@ from model_compression_toolkit.core.common.visualization.final_config_visualizer
47
48
  ActivationFinalBitwidthConfigVisualizer
48
49
  from model_compression_toolkit.core.common.visualization.tensorboard_writer import TensorboardWriter
49
50
 
51
+
50
52
  def core_runner(in_model: Any,
51
53
  representative_data_gen: Callable,
52
54
  core_config: CoreConfig,
@@ -94,12 +96,12 @@ def core_runner(in_model: Any,
94
96
  representative_dataset=representative_data_gen,
95
97
  fw_impl=fw_impl)
96
98
 
97
- tg = _prepare_model_for_quantization(graph,
98
- representative_data_gen,
99
- core_config,
100
- fw_info,
101
- tb_w,
102
- fw_impl)
99
+ tg = quantization_preparation_runner(graph=graph,
100
+ representative_data_gen=representative_data_gen,
101
+ core_config=core_config,
102
+ fw_info=fw_info,
103
+ fw_impl=fw_impl,
104
+ tb_w=tb_w)
103
105
 
104
106
  ######################################
105
107
  # Finalize bit widths
@@ -179,131 +181,6 @@ def _init_tensorboard_writer(fw_info: FrameworkInfo) -> TensorboardWriter:
179
181
  return tb_w
180
182
 
181
183
 
182
- def read_model_to_graph(in_model: Any,
183
- representative_data_gen: Callable,
184
- tpc: TargetPlatformCapabilities,
185
- fw_info: FrameworkInfo = None,
186
- fw_impl: FrameworkImplementation = None) -> Graph:
187
-
188
- """
189
- Read a model into a graph object.
190
- Args:
191
- in_model: Model to optimize and prepare for quantization.
192
- representative_data_gen: Dataset used for calibration.
193
- tpc: TargetPlatformCapabilities object that models the inference target platform and
194
- the attached framework operator's information.
195
- fw_info: Information needed for quantization about the specific framework (e.g.,
196
- kernel channels indices, groups of layers by how they should be quantized, etc.)
197
- fw_impl: FrameworkImplementation object with a specific framework methods implementation.
198
- Returns:
199
- Graph object that represents the model.
200
- """
201
- graph = fw_impl.model_reader(in_model,
202
- representative_data_gen)
203
- graph.set_fw_info(fw_info)
204
- graph.set_tpc(tpc)
205
- return graph
206
-
207
-
208
- def _prepare_model_for_quantization(transformed_graph: Graph,
209
- representative_data_gen: Callable,
210
- core_config: CoreConfig = CoreConfig(),
211
- fw_info: FrameworkInfo = None,
212
- tb_w: TensorboardWriter = None,
213
- fw_impl: FrameworkImplementation = None) -> Graph:
214
- """
215
- Prepare a trained model for post-training quantization.
216
- First, the model graph is optimized using several transformations (e.g. folding BatchNormalization to preceding layers).
217
- Second, statistics (e.g. min/max, histogram, etc.) are collected for each layer's output
218
- (and input, depends on the quantization configuration) using a given representative dataset.
219
- Next, quantization parameters are calculated using the collected statistics.
220
- Finally, more transformations (based on the statistics) are applied to increase the model's performance.
221
-
222
- Args:
223
- representative_data_gen (Callable): Dataset used for calibration.
224
- core_config (CoreConfig): CoreConfig containing parameters of how the model should be quantized.
225
- fw_info (FrameworkInfo): Information needed for quantization about the specific framework (e.g.,
226
- kernel channels indices, groups of layers by how they should be quantized, etc.)
227
- tb_w (TensorboardWriter): TensorboardWriter object to use for logging events such as graphs, histograms, etc.
228
- fw_impl (FrameworkImplementation): FrameworkImplementation object with a specific framework methods implementation.
229
-
230
- Returns:
231
- Graph object that represents the model, contains thresholds, and ready for quantization.
232
- """
233
-
234
- ######################################
235
- # Graph analyzing (attaching statistics collectors)
236
- ######################################
237
- analyzer_graph(fw_impl.attach_sc_to_node,
238
- transformed_graph,
239
- fw_info,
240
- core_config.quantization_config) # Mark points for statistics collection
241
-
242
- if tb_w is not None:
243
- tb_w.add_graph(transformed_graph, 'after_analyzer_graph')
244
-
245
- ######################################
246
- # Statistic collection
247
- ######################################
248
- mi = ModelCollector(transformed_graph,
249
- fw_impl,
250
- fw_info)
251
-
252
- for _data in tqdm(representative_data_gen()):
253
- mi.infer(_data)
254
-
255
- ######################################
256
- # Edit network according to user
257
- # specific settings
258
- ######################################
259
- # Notice that not all actions affect at this stage (for example, actions that edit the final configuration as
260
- # there are no final configurations at this stage of the optimization). For this reason we edit the graph
261
- # again at the end of the optimization process.
262
- edit_network_graph(transformed_graph, fw_info, core_config.debug_config.network_editor)
263
-
264
- ######################################
265
- # Calculate quantization params
266
- ######################################
267
- calculate_quantization_params(transformed_graph,
268
- fw_info,
269
- fw_impl=fw_impl)
270
-
271
- if tb_w is not None:
272
- tb_w.add_graph(transformed_graph, 'thresholds_selection')
273
- tb_w.add_all_statistics(transformed_graph, 'thresholds_selection')
274
-
275
- ######################################
276
- # Graph substitution (post statistics collection)
277
- ######################################
278
- transformed_graph = substitute(transformed_graph,
279
- fw_impl.get_substitutions_post_statistics_collection(core_config.quantization_config))
280
-
281
- ######################################
282
- # Shift Negative Activations
283
- ######################################
284
- if core_config.quantization_config.shift_negative_activation_correction:
285
- transformed_graph = fw_impl.shift_negative_correction(transformed_graph,
286
- core_config,
287
- fw_info)
288
- if tb_w is not None:
289
- tb_w.add_graph(transformed_graph, 'after_shift_negative_correction')
290
- tb_w.add_all_statistics(transformed_graph, 'after_shift_negative_correction')
291
-
292
- if tb_w is not None:
293
- tb_w.add_graph(transformed_graph, 'post_statistics_collection_substitutions')
294
- tb_w.add_all_statistics(transformed_graph, 'post_statistics_collection_substitutions')
295
-
296
- ######################################
297
- # Statistics Correction
298
- ######################################
299
- tg_with_bias = statistics_correction_runner(transformed_graph, core_config, fw_info, fw_impl, tb_w)
300
-
301
- for n in tg_with_bias.nodes:
302
- assert n.final_weights_quantization_cfg is None
303
-
304
- return tg_with_bias
305
-
306
-
307
184
  def _set_final_kpi(graph: Graph,
308
185
  final_bit_widths_config: List[int],
309
186
  kpi_functions_dict: Dict[KPITarget, Tuple[MpKpiMetric, MpKpiAggregation]],