neural-compressor 3.2__tar.gz → 3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {neural_compressor-3.2 → neural_compressor-3.3}/PKG-INFO +35 -50
- {neural_compressor-3.2 → neural_compressor-3.3}/README.md +21 -48
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/adaptor.py +1 -1
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/mxnet.py +1 -1
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/onnxrt.py +2 -2
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_util.py +1 -1
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/gptq.py +6 -2
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/hawq_metric.py +0 -3
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/layer_wise_quant/utils.py +2 -2
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/common/base_config.py +7 -1
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/common/version.py +1 -1
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/datasets/dummy_dataset_v2.py +1 -1
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/lm_eval/accuracy.py +12 -1
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/lm_eval/utils.py +17 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_util.py +1 -1
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/_core/common.py +6 -88
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/_core/fp_utils.py +27 -38
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/_core/measure.py +5 -2
- neural_compressor-3.3/neural_compressor/torch/algorithms/fp8_quant/_core/patching_common.py +113 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/_core/quant_dequant.py +98 -37
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/_core/quantize.py +45 -34
- neural_compressor-3.3/neural_compressor/torch/algorithms/fp8_quant/_core/quantized_hpu_ops.py +196 -0
- neural_compressor-3.3/neural_compressor/torch/algorithms/fp8_quant/_core/scale.py +196 -0
- neural_compressor-3.3/neural_compressor/torch/algorithms/fp8_quant/_core/scale_handler.py +155 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/__init__.py +4 -4
- neural_compressor-3.3/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/ops_quantizer.py +311 -0
- neural_compressor-3.3/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/round_scales_function.py +65 -0
- neural_compressor-3.3/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/scale_method_factory.py +194 -0
- neural_compressor-3.3/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/scales_method.py +246 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/_core/utils.py +37 -16
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/_quant_common/helper_modules.py +448 -318
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/_quant_common/quant_config.py +18 -11
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/common.py +1 -1
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/model_configs.py +1 -4
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/patched_module_base.py +47 -26
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/quantizer.py +6 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/save_load.py +371 -50
- neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/scripts/fix_measurements.py → neural_compressor-3.3/neural_compressor/torch/algorithms/fp8_quant/scripts/postprocessing_vllm_measurements.py +60 -28
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/utils/patched_module_restore_registry.py +9 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/pt2e_quant/core.py +3 -1
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/pt2e_quant/half_precision_rewriter.py +46 -20
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/pt2e_quant/utility.py +2 -1
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/autoround.py +6 -4
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/gptq.py +2 -1
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/save_load.py +116 -28
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/quantization/autotune.py +12 -3
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/quantization/config.py +17 -3
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/quantization/save_load_entry.py +21 -11
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/utils/constants.py +1 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/utils/environ.py +8 -0
- neural_compressor-3.3/neural_compressor/torch/utils/llm_utility.py +126 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/utils/utility.py +14 -1
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/transformers/__init__.py +1 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/transformers/models/__init__.py +8 -1
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/transformers/models/modeling_auto.py +46 -17
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/transformers/quantization/utils.py +110 -15
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/transformers/utils/quantization_config.py +13 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/version.py +1 -1
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor.egg-info/PKG-INFO +35 -50
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor.egg-info/SOURCES.txt +8 -4
- neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/_core/scale.py +0 -500
- neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/_core/scale_handler.py +0 -50
- neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/max_abs.py +0 -553
- neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/single_scale.py +0 -96
- neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/smooth_quant.py +0 -131
- {neural_compressor-3.2 → neural_compressor-3.3}/LICENSE +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/keras.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/keras.yaml +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/keras_utils/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/keras_utils/conv2d.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/keras_utils/dense.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/keras_utils/depthwise_conv2d.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/keras_utils/pool2d.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/keras_utils/quantizer.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/keras_utils/separable_conv2d.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/mxnet.yaml +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/mxnet_utils/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/mxnet_utils/util.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/onnxrt.yaml +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/onnxrt_cuda.yaml +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/onnxrt_dml.yaml +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/onnxrt_dnnl.yaml +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/onnxrt_trt.yaml +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/calibration.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/calibrator.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/activation.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/argmax.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/attention.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/binary_op.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/concat.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/conv.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/direct_q8.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/embed_layernorm.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/gather.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/gavgpool.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/gemm.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/lstm.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/matmul.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/maxpool.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/norm.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/ops.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/pad.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/pooling.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/reduce.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/resize.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/split.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/unary_op.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/quantizer.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/smooth_quant.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/util.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/weight_only.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/pytorch.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/pytorch_cpu.yaml +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/pytorch_gpu.yaml +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/pytorch_ipex.yaml +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/query.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tensorflow.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tensorflow.yaml +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tensorflow_itex.yaml +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_converter.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_converter_without_calib.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/bf16_convert.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/dequantize_cast_optimizer.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_add_to_biasadd.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_layout.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_leakyrelu.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_nan_to_random.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_placeholder_to_const.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dilated_contraction.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dummy_biasadd.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/expanddims_optimizer.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fetch_weight_from_reshape.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fold_batch_norm.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fold_constant.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_biasadd_add.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_column_wise_mul.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_conv_with_math.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_decomposed_bn.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_decomposed_in.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_gelu.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_layer_norm.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_conv.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_fp32_conv.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_reshape_transpose.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/graph_cse_optimizer.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/grappler_pass.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/insert_print_node.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/move_squeeze_after_relu.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/remove_training_nodes.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/rename_batch_norm.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/split_shared_input.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/strip_equivalent_nodes.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/strip_unused_nodes.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/switch_optimizer.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/graph_base.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_fake_quant.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_value.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_value_without_calib.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_redundant_dequantize.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_requantize.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_redundant_dequantize.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_requantize.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/meta_op_optimizer.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/post_hostconst_converter.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/post_quantized_op_cse.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/rnn_convert.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/scale_propagation.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_node.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_schema.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/insert_qdq_pattern.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/merge_duplicated_qdq.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/share_qdq_y_pattern.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/fake_quantize.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_config.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_helper.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/optimize_layer.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_add.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_base.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_bn.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_wrapper.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_bn.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_concatv2.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_conv.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_deconv.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_in.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_pooling.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/optimize_qdq.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_base.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_bn.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_concatv2.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_conv.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_for_intel_cpu.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_matmul.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_pooling.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph_common.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/smooth_quant_calibration.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/smooth_quant_scaler.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/transform_graph/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/transform_graph/bias_correction.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/transform_graph/graph_transform_base.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/transform_graph/insert_logging.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/transform_graph/rerange_quantized_concat.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/util.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/auto_round.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/awq.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/bf16_convert.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/layer_wise_quant/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/layer_wise_quant/modified_pickle.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/layer_wise_quant/quantize.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/layer_wise_quant/torch_load.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/mixed_precision.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/model_wrapper.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/pattern_detector.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/symbolic_trace.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/teq.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/util.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/waq/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/waq/auto_alpha.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/waq/calibration.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/waq/graph_trace.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/waq/smooth_quant.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/waq/utils.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/weight_only.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/algorithm/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/algorithm/algorithm.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/algorithm/fast_bias_correction.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/algorithm/smooth_quant.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/algorithm/weight_correction.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/benchmark.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/common/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/common/base_tuning.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/common/benchmark.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/common/tuning_param.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/common/utils/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/common/utils/constants.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/common/utils/logger.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/common/utils/save_load.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/common/utils/utility.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/callbacks.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/distillation/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/distillation/criterions.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/distillation/optimizers.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/distillation/utility.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/hpo/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/hpo/sa_optimizer.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/hpo/search_algorithms.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/hpo/search_space.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/criteria.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/dsnot.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/model_slim/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/model_slim/auto_slim.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/model_slim/pattern_analyzer.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/model_slim/weight_slim.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/patterns/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/patterns/base.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/patterns/mha.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/patterns/ninm.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/patterns/nxm.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/pruners/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/pruners/base.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/pruners/basic.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/pruners/block_mask.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/pruners/mha.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/pruners/pattern_lock.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/pruners/progressive.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/pruners/retrain_free.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/pruners/sparse_gpt.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/pruning.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/regs.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/schedulers.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/tf_criteria.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/utils.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/wanda/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/wanda/prune.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/wanda/utils.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/wanda/wrapper.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/config.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/contrib/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/contrib/strategy/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/contrib/strategy/sigopt.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/contrib/strategy/tpe.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/dataloaders/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/dataloaders/base_dataloader.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/dataloaders/dataloader.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/dataloaders/default_dataloader.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/dataloaders/fetcher.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/dataloaders/mxnet_dataloader.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/dataloaders/onnxrt_dataloader.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/dataloaders/pytorch_dataloader.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/dataloaders/sampler.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/dataloaders/tensorflow_dataloader.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/datasets/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/datasets/bert_dataset.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/datasets/coco_dataset.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/datasets/dataset.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/datasets/dummy_dataset.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/datasets/imagenet_dataset.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/datasets/style_transfer_dataset.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/filters/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/filters/coco_filter.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/filters/filter.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/transforms/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/transforms/coco_transform.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/transforms/imagenet_transform.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/transforms/postprocess.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/transforms/tokenization.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/transforms/transform.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/bigcode_eval/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/bigcode_eval/evaluator.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/hf_eval/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/hf_eval/evaluator.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/hf_eval/hf_datasets/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/hf_eval/hf_datasets/cnn_dailymail.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/lm_eval/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/lm_eval/models/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/lm_eval/models/huggingface.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/metric/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/metric/bleu.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/metric/bleu_util.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/metric/coco_label_map.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/metric/coco_tools.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/metric/evaluate_squad.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/metric/f1.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/metric/metric.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/mix_precision.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/model/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/model/base_model.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/model/keras_model.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/model/model.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/model/mxnet_model.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/model/nets_factory.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/model/onnx_model.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/model/tensorflow_model.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/model/torch_model.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/objective.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/parser/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/parser/factory.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/parser/onnx_parser/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/parser/onnx_parser/factory.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/parser/onnx_parser/parser.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/parser/parser.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/parser/result.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/parser/tensorflow_parser/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/parser/tensorflow_parser/factory.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/parser/tensorflow_parser/parser.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/profiler/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/profiler/factory.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/profiler/onnxrt_profiler/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/profiler/onnxrt_profiler/factory.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/profiler/onnxrt_profiler/profiler.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/profiler/onnxrt_profiler/utils.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/profiler/profiler.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/profiler/tensorflow_profiler/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/profiler/tensorflow_profiler/factory.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/profiler/tensorflow_profiler/profiler.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/profiler/tensorflow_profiler/utils.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/quantization.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/auto.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/auto_mixed_precision.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/basic.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/bayesian.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/conservative.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/exhaustive.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/hawq_v2.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/mse.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/mse_v2.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/random.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/strategy.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/utils/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/utils/constant.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/utils/tuning_sampler.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/utils/tuning_space.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/utils/tuning_structs.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/utils/utility.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/algorithms/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/algorithms/smoother/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/algorithms/smoother/calibration.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/algorithms/smoother/core.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/algorithms/smoother/scaler.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/algorithms/static_quant/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/algorithms/static_quant/keras.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/algorithms/static_quant/keras.yaml +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.yaml +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/algorithms/static_quant/tensorflow_itex.yaml +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/keras/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/keras/layers/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/keras/layers/conv2d.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/keras/layers/dense.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/keras/layers/depthwise_conv2d.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/keras/layers/layer_initializer.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/keras/layers/pool2d.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/keras/layers/separable_conv2d.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/keras/quantization/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/keras/quantization/config.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/algorithm_entry.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/autotune.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/config.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/quantize.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_converter.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/bf16/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/bf16/bf16_convert.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/bf16/dequantize_cast_optimizer.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/convert_add_to_biasadd.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/convert_layout.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/convert_leakyrelu.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/convert_nan_to_random.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/convert_placeholder_to_const.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/dilated_contraction.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/dummy_biasadd.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/expanddims_optimizer.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fetch_weight_from_reshape.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fold_batch_norm.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fold_constant.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_biasadd_add.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_column_wise_mul.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_conv_with_math.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_decomposed_bn.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_decomposed_in.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_gelu.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_layer_norm.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_pad_with_conv.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_pad_with_fp32_conv.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_reshape_transpose.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/graph_cse_optimizer.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/grappler_pass.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/insert_print_node.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/move_squeeze_after_relu.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/pre_optimize.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/remove_training_nodes.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/rename_batch_norm.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/split_shared_input.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/strip_equivalent_nodes.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/strip_unused_nodes.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/switch_optimizer.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/graph_base.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/freeze_fake_quant.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/freeze_value.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/fuse_conv_redundant_dequantize.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/fuse_conv_requantize.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/fuse_matmul_redundant_dequantize.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/fuse_matmul_requantize.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/meta_op_optimizer.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/post_hostconst_converter.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/post_quantized_op_cse.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/scale_propagation.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/qdq/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/qdq/insert_qdq_pattern.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/qdq/merge_duplicated_qdq.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/qdq/share_qdq_y_pattern.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_bn.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_concatv2.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_conv.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_deconv.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_in.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_matmul.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_pooling.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/optimize_qdq.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_base.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_bn.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_concatv2.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_conv.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_for_intel_cpu.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_matmul.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_pooling.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph_common.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/transform_graph/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/transform_graph/bias_correction.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/transform_graph/graph_transform_base.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/transform_graph/insert_logging.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/transform_graph/rerange_quantized_concat.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/utility.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/utils/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/utils/constants.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/utils/data.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/utils/model.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/utils/model_wrappers.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/utils/utility.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/base_algorithm.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/_core/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/_quant_common/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/custom_config/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/observer.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/prepare_quant/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/prepare_quant/prepare_model.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/scaling_method_base.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/scripts/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/utils/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/utils/logger.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/layer_wise/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/layer_wise/load.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/layer_wise/modified_pickle.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/layer_wise/utils.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/mixed_low_precision/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/mixed_low_precision/custom_methods/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/mixed_low_precision/custom_methods/gptq.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/mixed_low_precision/custom_methods/quarot.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/mixed_low_precision/custom_methods/quarot_utils.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/mixed_precision/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/mixed_precision/half_precision_convert.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/mixed_precision/module_wrappers.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/mx_quant/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/mx_quant/mx.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/mx_quant/utils.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/pt2e_quant/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/pt2e_quant/save_load.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/smooth_quant/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/smooth_quant/save_load.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/smooth_quant/smooth_quant.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/smooth_quant/utility.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/static_quant/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/static_quant/save_load.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/static_quant/static_quant.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/static_quant/utility.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/awq.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/hqq/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/hqq/bitpack.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/hqq/config.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/hqq/core.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/hqq/optimizer.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/hqq/qtensor.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/hqq/quantizer.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/modules.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/rtn.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/teq.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/utility.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/export/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/export/pt2e_export.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/quantization/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/quantization/algorithm_entry.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/quantization/quantize.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/utils/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/utils/auto_accelerator.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/utils/bit_packer.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/utils/block_wise.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/training.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/transformers/generation/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/transformers/generation/beam_search.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/transformers/generation/greedy_search.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/transformers/quantization/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/transformers/utils/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/collect_layer_histogram.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/constant.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/create_obj_from_config.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/export/__init__.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/export/qlinear2qdq.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/export/tf2onnx.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/export/torch2onnx.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/kl_divergence.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/load_huggingface.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/logger.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/options.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/pytorch.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/utility.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/weights_details.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor.egg-info/dependency_links.txt +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor.egg-info/entry_points.txt +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor.egg-info/requires.txt +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor.egg-info/top_level.txt +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/pyproject.toml +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/setup.cfg +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/setup.py +0 -0
- {neural_compressor-3.2 → neural_compressor-3.3}/third-party-programs.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: neural_compressor
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.3
|
|
4
4
|
Summary: Repository of Intel® Neural Compressor
|
|
5
5
|
Home-page: https://github.com/intel/neural-compressor
|
|
6
6
|
Author: Intel AIPT Team
|
|
@@ -43,6 +43,18 @@ Requires-Dist: py-cpuinfo; extra == "tf"
|
|
|
43
43
|
Requires-Dist: pydantic; extra == "tf"
|
|
44
44
|
Requires-Dist: pyyaml; extra == "tf"
|
|
45
45
|
Requires-Dist: tensorflow; extra == "tf"
|
|
46
|
+
Dynamic: author
|
|
47
|
+
Dynamic: author-email
|
|
48
|
+
Dynamic: classifier
|
|
49
|
+
Dynamic: description
|
|
50
|
+
Dynamic: description-content-type
|
|
51
|
+
Dynamic: home-page
|
|
52
|
+
Dynamic: keywords
|
|
53
|
+
Dynamic: license
|
|
54
|
+
Dynamic: provides-extra
|
|
55
|
+
Dynamic: requires-dist
|
|
56
|
+
Dynamic: requires-python
|
|
57
|
+
Dynamic: summary
|
|
46
58
|
|
|
47
59
|
<div align="center">
|
|
48
60
|
|
|
@@ -51,7 +63,7 @@ Intel® Neural Compressor
|
|
|
51
63
|
<h3> An open-source Python library supporting popular model compression techniques on all mainstream deep learning frameworks (TensorFlow, PyTorch, and ONNX Runtime)</h3>
|
|
52
64
|
|
|
53
65
|
[](https://github.com/intel/neural-compressor)
|
|
54
|
-
[](https://github.com/intel/neural-compressor/releases)
|
|
55
67
|
[](https://github.com/intel/neural-compressor/blob/master/LICENSE)
|
|
56
68
|
[](https://github.com/intel/neural-compressor)
|
|
57
69
|
[](https://pepy.tech/project/neural-compressor)
|
|
@@ -78,55 +90,33 @@ support AMD CPU, ARM CPU, and NVidia GPU through ONNX Runtime with limited testi
|
|
|
78
90
|
* [2024/07] Performance optimizations and usability improvements on [client-side](./docs/source/3x/client_quant.md).
|
|
79
91
|
|
|
80
92
|
## Installation
|
|
93
|
+
Choose the necessary framework dependencies to install based on your deploy environment.
|
|
81
94
|
### Install Framework
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
95
|
+
* [Install intel_extension_for_pytorch for CPU](https://intel.github.io/intel-extension-for-pytorch/cpu/latest/)
|
|
96
|
+
* [Install intel_extension_for_pytorch for XPU](https://intel.github.io/intel-extension-for-pytorch/xpu/latest/)
|
|
97
|
+
* [Use Docker Image with torch installed for HPU](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#bare-metal-fresh-os-single-click)
|
|
98
|
+
**Note**: There is a version mapping between Intel Neural Compressor and Gaudi Software Stack, please refer to this [table](./docs/source/3x/gaudi_version_map.md) and make sure to use a matched combination.
|
|
99
|
+
* [Install torch for other platform](https://pytorch.org/get-started/locally)
|
|
100
|
+
* [Install TensorFlow](https://www.tensorflow.org/install)
|
|
101
|
+
|
|
102
|
+
### Install Neural Compressor from pypi
|
|
85
103
|
```
|
|
86
|
-
#### Use Docker Image with torch installed for HPU
|
|
87
|
-
https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#bare-metal-fresh-os-single-click
|
|
88
|
-
|
|
89
|
-
> **Note**:
|
|
90
|
-
> There is a version mapping between Intel Neural Compressor and Gaudi Software Stack, please refer to this [table](./docs/source/3x/gaudi_version_map.md) and make sure to use a matched combination.
|
|
91
|
-
|
|
92
|
-
#### Install torch/intel_extension_for_pytorch for Intel GPU
|
|
93
|
-
https://intel.github.io/intel-extension-for-pytorch/index.html#installation
|
|
94
|
-
|
|
95
|
-
#### Install torch for other platform
|
|
96
|
-
https://pytorch.org/get-started/locally
|
|
97
|
-
|
|
98
|
-
#### Install tensorflow
|
|
99
|
-
```Shell
|
|
100
|
-
pip install tensorflow
|
|
101
|
-
```
|
|
102
|
-
|
|
103
|
-
### Install from pypi
|
|
104
|
-
```Shell
|
|
105
104
|
# Install 2.X API + Framework extension API + PyTorch dependency
|
|
106
105
|
pip install neural-compressor[pt]
|
|
107
106
|
# Install 2.X API + Framework extension API + TensorFlow dependency
|
|
108
107
|
pip install neural-compressor[tf]
|
|
109
|
-
```
|
|
110
|
-
|
|
111
|
-
> Further installation methods can be found under [Installation Guide](./docs/source/installation_guide.md). check out our [FAQ](./docs/source/faq.md) for more details.
|
|
108
|
+
```
|
|
109
|
+
**Note**: Further installation methods can be found under [Installation Guide](./docs/source/installation_guide.md). check out our [FAQ](./docs/source/faq.md) for more details.
|
|
112
110
|
|
|
113
111
|
## Getting Started
|
|
112
|
+
After successfully installing these packages, try your first quantization program. **Following example code demonstrates FP8 Quantization**, it is supported by Intel Gaudi2 AI Accelerator.
|
|
113
|
+
To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, please refer to following script for environment setup. More details can be found in [Gaudi Guide](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#launch-docker-image-that-was-built).
|
|
114
114
|
|
|
115
|
-
|
|
116
|
-
```bash
|
|
117
|
-
pip install "neural-compressor>=2.3" "transformers>=4.34.0" torch torchvision
|
|
115
|
+
Run a container with an interactive shell, [more info](https://docs.habana.ai/en/latest/Installation_Guide/Additional_Installation/Docker_Installation.html#docker-installation)
|
|
118
116
|
```
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
### [FP8 Quantization](./docs/source/3x/PT_FP8Quant.md)
|
|
122
|
-
Following example code demonstrates FP8 Quantization, it is supported by Intel Gaudi2 AI Accelerator.
|
|
123
|
-
|
|
124
|
-
To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, please refer to following script for environment setup. More details can be found in [Gaudi Guide](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#launch-docker-image-that-was-built).
|
|
125
|
-
```bash
|
|
126
|
-
# Run a container with an interactive shell
|
|
127
|
-
docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.19.0/ubuntu24.04/habanalabs/pytorch-installer-2.5.1:latest
|
|
117
|
+
docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.20.0/ubuntu24.04/habanalabs/pytorch-installer-2.6.0:latest
|
|
128
118
|
```
|
|
129
|
-
Run the example
|
|
119
|
+
Run the example,
|
|
130
120
|
```python
|
|
131
121
|
from neural_compressor.torch.quantization import (
|
|
132
122
|
FP8Config,
|
|
@@ -148,12 +138,10 @@ model = convert(model)
|
|
|
148
138
|
|
|
149
139
|
output = model(torch.randn(1, 3, 224, 224).to("hpu")).to("cpu")
|
|
150
140
|
print(output.shape)
|
|
151
|
-
```
|
|
152
|
-
|
|
153
|
-
### Weight-Only Large Language Model Loading (LLMs)
|
|
154
|
-
|
|
155
|
-
Following example code demonstrates weight-only large language model loading on Intel Gaudi2 AI Accelerator.
|
|
141
|
+
```
|
|
142
|
+
More [FP8 quantization doc](./docs/source/3x/PT_FP8Quant.md).
|
|
156
143
|
|
|
144
|
+
**Following example code demonstrates weight-only large language model loading** on Intel Gaudi2 AI Accelerator.
|
|
157
145
|
```python
|
|
158
146
|
from neural_compressor.torch.quantization import load
|
|
159
147
|
|
|
@@ -165,10 +153,7 @@ model = load(
|
|
|
165
153
|
torch_dtype=torch.bfloat16,
|
|
166
154
|
)
|
|
167
155
|
```
|
|
168
|
-
|
|
169
|
-
**Note:**
|
|
170
|
-
|
|
171
|
-
Intel Neural Compressor will convert the model format from auto-gptq to hpu format on the first load and save hpu_model.safetensors to the local cache directory for the next load. So it may take a while to load for the first time.
|
|
156
|
+
**Note:** Intel Neural Compressor will convert the model format from auto-gptq to hpu format on the first load and save hpu_model.safetensors to the local cache directory for the next load. So it may take a while to load for the first time.
|
|
172
157
|
|
|
173
158
|
## Documentation
|
|
174
159
|
|
|
@@ -5,7 +5,7 @@ Intel® Neural Compressor
|
|
|
5
5
|
<h3> An open-source Python library supporting popular model compression techniques on all mainstream deep learning frameworks (TensorFlow, PyTorch, and ONNX Runtime)</h3>
|
|
6
6
|
|
|
7
7
|
[](https://github.com/intel/neural-compressor)
|
|
8
|
-
[](https://github.com/intel/neural-compressor/releases)
|
|
9
9
|
[](https://github.com/intel/neural-compressor/blob/master/LICENSE)
|
|
10
10
|
[](https://github.com/intel/neural-compressor)
|
|
11
11
|
[](https://pepy.tech/project/neural-compressor)
|
|
@@ -32,55 +32,33 @@ support AMD CPU, ARM CPU, and NVidia GPU through ONNX Runtime with limited testi
|
|
|
32
32
|
* [2024/07] Performance optimizations and usability improvements on [client-side](./docs/source/3x/client_quant.md).
|
|
33
33
|
|
|
34
34
|
## Installation
|
|
35
|
+
Choose the necessary framework dependencies to install based on your deploy environment.
|
|
35
36
|
### Install Framework
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
37
|
+
* [Install intel_extension_for_pytorch for CPU](https://intel.github.io/intel-extension-for-pytorch/cpu/latest/)
|
|
38
|
+
* [Install intel_extension_for_pytorch for XPU](https://intel.github.io/intel-extension-for-pytorch/xpu/latest/)
|
|
39
|
+
* [Use Docker Image with torch installed for HPU](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#bare-metal-fresh-os-single-click)
|
|
40
|
+
**Note**: There is a version mapping between Intel Neural Compressor and Gaudi Software Stack, please refer to this [table](./docs/source/3x/gaudi_version_map.md) and make sure to use a matched combination.
|
|
41
|
+
* [Install torch for other platform](https://pytorch.org/get-started/locally)
|
|
42
|
+
* [Install TensorFlow](https://www.tensorflow.org/install)
|
|
43
|
+
|
|
44
|
+
### Install Neural Compressor from pypi
|
|
39
45
|
```
|
|
40
|
-
#### Use Docker Image with torch installed for HPU
|
|
41
|
-
https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#bare-metal-fresh-os-single-click
|
|
42
|
-
|
|
43
|
-
> **Note**:
|
|
44
|
-
> There is a version mapping between Intel Neural Compressor and Gaudi Software Stack, please refer to this [table](./docs/source/3x/gaudi_version_map.md) and make sure to use a matched combination.
|
|
45
|
-
|
|
46
|
-
#### Install torch/intel_extension_for_pytorch for Intel GPU
|
|
47
|
-
https://intel.github.io/intel-extension-for-pytorch/index.html#installation
|
|
48
|
-
|
|
49
|
-
#### Install torch for other platform
|
|
50
|
-
https://pytorch.org/get-started/locally
|
|
51
|
-
|
|
52
|
-
#### Install tensorflow
|
|
53
|
-
```Shell
|
|
54
|
-
pip install tensorflow
|
|
55
|
-
```
|
|
56
|
-
|
|
57
|
-
### Install from pypi
|
|
58
|
-
```Shell
|
|
59
46
|
# Install 2.X API + Framework extension API + PyTorch dependency
|
|
60
47
|
pip install neural-compressor[pt]
|
|
61
48
|
# Install 2.X API + Framework extension API + TensorFlow dependency
|
|
62
49
|
pip install neural-compressor[tf]
|
|
63
|
-
```
|
|
64
|
-
|
|
65
|
-
> Further installation methods can be found under [Installation Guide](./docs/source/installation_guide.md). check out our [FAQ](./docs/source/faq.md) for more details.
|
|
50
|
+
```
|
|
51
|
+
**Note**: Further installation methods can be found under [Installation Guide](./docs/source/installation_guide.md). check out our [FAQ](./docs/source/faq.md) for more details.
|
|
66
52
|
|
|
67
53
|
## Getting Started
|
|
54
|
+
After successfully installing these packages, try your first quantization program. **Following example code demonstrates FP8 Quantization**, it is supported by Intel Gaudi2 AI Accelerator.
|
|
55
|
+
To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, please refer to following script for environment setup. More details can be found in [Gaudi Guide](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#launch-docker-image-that-was-built).
|
|
68
56
|
|
|
69
|
-
|
|
70
|
-
```bash
|
|
71
|
-
pip install "neural-compressor>=2.3" "transformers>=4.34.0" torch torchvision
|
|
57
|
+
Run a container with an interactive shell, [more info](https://docs.habana.ai/en/latest/Installation_Guide/Additional_Installation/Docker_Installation.html#docker-installation)
|
|
72
58
|
```
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
### [FP8 Quantization](./docs/source/3x/PT_FP8Quant.md)
|
|
76
|
-
Following example code demonstrates FP8 Quantization, it is supported by Intel Gaudi2 AI Accelerator.
|
|
77
|
-
|
|
78
|
-
To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, please refer to following script for environment setup. More details can be found in [Gaudi Guide](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#launch-docker-image-that-was-built).
|
|
79
|
-
```bash
|
|
80
|
-
# Run a container with an interactive shell
|
|
81
|
-
docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.19.0/ubuntu24.04/habanalabs/pytorch-installer-2.5.1:latest
|
|
59
|
+
docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.20.0/ubuntu24.04/habanalabs/pytorch-installer-2.6.0:latest
|
|
82
60
|
```
|
|
83
|
-
Run the example
|
|
61
|
+
Run the example,
|
|
84
62
|
```python
|
|
85
63
|
from neural_compressor.torch.quantization import (
|
|
86
64
|
FP8Config,
|
|
@@ -102,12 +80,10 @@ model = convert(model)
|
|
|
102
80
|
|
|
103
81
|
output = model(torch.randn(1, 3, 224, 224).to("hpu")).to("cpu")
|
|
104
82
|
print(output.shape)
|
|
105
|
-
```
|
|
106
|
-
|
|
107
|
-
### Weight-Only Large Language Model Loading (LLMs)
|
|
108
|
-
|
|
109
|
-
Following example code demonstrates weight-only large language model loading on Intel Gaudi2 AI Accelerator.
|
|
83
|
+
```
|
|
84
|
+
More [FP8 quantization doc](./docs/source/3x/PT_FP8Quant.md).
|
|
110
85
|
|
|
86
|
+
**Following example code demonstrates weight-only large language model loading** on Intel Gaudi2 AI Accelerator.
|
|
111
87
|
```python
|
|
112
88
|
from neural_compressor.torch.quantization import load
|
|
113
89
|
|
|
@@ -119,10 +95,7 @@ model = load(
|
|
|
119
95
|
torch_dtype=torch.bfloat16,
|
|
120
96
|
)
|
|
121
97
|
```
|
|
122
|
-
|
|
123
|
-
**Note:**
|
|
124
|
-
|
|
125
|
-
Intel Neural Compressor will convert the model format from auto-gptq to hpu format on the first load and save hpu_model.safetensors to the local cache directory for the next load. So it may take a while to load for the first time.
|
|
98
|
+
**Note:** Intel Neural Compressor will convert the model format from auto-gptq to hpu format on the first load and save hpu_model.safetensors to the local cache directory for the next load. So it may take a while to load for the first time.
|
|
126
99
|
|
|
127
100
|
## Documentation
|
|
128
101
|
|
|
@@ -49,7 +49,7 @@ class Adaptor(object):
|
|
|
49
49
|
|
|
50
50
|
@abstractmethod
|
|
51
51
|
def quantize(self, tune_cfg, model, dataloader, q_func=None):
|
|
52
|
-
"""The function is used to do calibration and
|
|
52
|
+
"""The function is used to do calibration and quantization in post-training quantization.
|
|
53
53
|
|
|
54
54
|
Args:
|
|
55
55
|
tune_cfg(dict): The chosen tuning configuration.
|
|
@@ -59,7 +59,7 @@ class MxNetAdaptor(Adaptor):
|
|
|
59
59
|
|
|
60
60
|
@dump_elapsed_time("Pass quantize model")
|
|
61
61
|
def quantize(self, tune_cfg, nc_model, dataloader, q_func=None):
|
|
62
|
-
"""The function is used to do MXNet calibration and
|
|
62
|
+
"""The function is used to do MXNet calibration and quantization in post-training
|
|
63
63
|
quantization.
|
|
64
64
|
|
|
65
65
|
Args:
|
|
@@ -252,7 +252,7 @@ class ONNXRUNTIMEAdaptor(Adaptor):
|
|
|
252
252
|
|
|
253
253
|
@dump_elapsed_time("Pass quantize model")
|
|
254
254
|
def quantize(self, tune_cfg, model, data_loader, q_func=None):
|
|
255
|
-
"""The function is used to do calibration and
|
|
255
|
+
"""The function is used to do calibration and quantization in post-training
|
|
256
256
|
quantization.
|
|
257
257
|
|
|
258
258
|
Args:
|
|
@@ -1853,7 +1853,7 @@ class ONNXRT_WeightOnlyAdaptor(ONNXRUNTIMEAdaptor):
|
|
|
1853
1853
|
|
|
1854
1854
|
@dump_elapsed_time("Pass quantize model")
|
|
1855
1855
|
def quantize(self, tune_cfg, model, data_loader, q_func=None):
|
|
1856
|
-
"""The function is used to do calibration and
|
|
1856
|
+
"""The function is used to do calibration and quantization in post-training
|
|
1857
1857
|
quantization.
|
|
1858
1858
|
|
|
1859
1859
|
Args:
|
{neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_util.py
RENAMED
|
@@ -212,7 +212,7 @@ class GraphAnalyzer:
|
|
|
212
212
|
return self._search_patterns(patterns)
|
|
213
213
|
|
|
214
214
|
def _search_patterns(self, input_pattern):
|
|
215
|
-
"""Search user specified patterns on internal
|
|
215
|
+
"""Search user specified patterns on internal graph structure.
|
|
216
216
|
|
|
217
217
|
Args:
|
|
218
218
|
input_pattern (list): The element of the pattern list could be string/list/tuple.
|
{neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/gptq.py
RENAMED
|
@@ -718,10 +718,12 @@ class GPTQuantizer(object):
|
|
|
718
718
|
for n, p in sub_layer.named_parameters():
|
|
719
719
|
param_name = full_layer_name + "." + n
|
|
720
720
|
if n == "weight":
|
|
721
|
-
set_module_tensor_to_device(self.model, param_name, self.device, Q)
|
|
721
|
+
set_module_tensor_to_device(self.model, param_name, self.device, Q, dtype=Q.dtype)
|
|
722
722
|
else:
|
|
723
723
|
value = load_value(self.model, param_name, model_path)
|
|
724
|
-
set_module_tensor_to_device(
|
|
724
|
+
set_module_tensor_to_device(
|
|
725
|
+
self.model, param_name, self.device, value, dtype=value.dtype
|
|
726
|
+
)
|
|
725
727
|
# sub_layer.weight.data = Q
|
|
726
728
|
torch.save(sub_layer.state_dict(), LWQ_WORKSPACE + f"/{full_layer_name}.pt")
|
|
727
729
|
clean_module_weight(sub_layer)
|
|
@@ -745,6 +747,8 @@ class GPTQuantizer(object):
|
|
|
745
747
|
for j in range(len(self.dataloader)):
|
|
746
748
|
cache_keyword_batch = self.gather_single_batch_from_dict(self.cache_key_arguments, j)
|
|
747
749
|
cache_positional_batch = self.gather_single_batch_from_list(self.cache_positional_arguments, j)
|
|
750
|
+
# breakpoint()
|
|
751
|
+
# transformer_block = transformer_block.to(getattr(torch, self.model.config.torch_dtype))
|
|
748
752
|
out = transformer_block(*cache_positional_batch, **cache_keyword_batch)
|
|
749
753
|
out = self.track_hidden_states(out)
|
|
750
754
|
outs.append(out)
|
{neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/hawq_metric.py
RENAMED
|
@@ -23,14 +23,11 @@ import copy
|
|
|
23
23
|
import logging
|
|
24
24
|
|
|
25
25
|
import numpy as np
|
|
26
|
-
import torch.nn
|
|
27
|
-
import torch.nn as nn
|
|
28
26
|
from torch.quantization.quantize_fx import fuse_fx
|
|
29
27
|
|
|
30
28
|
logger = logging.getLogger(__name__)
|
|
31
29
|
from typing import Any, Callable, Dict, List, Optional, Set, Union
|
|
32
30
|
|
|
33
|
-
import torch
|
|
34
31
|
import tqdm
|
|
35
32
|
|
|
36
33
|
|
|
@@ -221,7 +221,7 @@ def load_module(model, module_name, path, device="cpu"):
|
|
|
221
221
|
for n, p in module.named_parameters():
|
|
222
222
|
param_name = module_name + "." + n
|
|
223
223
|
value = load_value(model, param_name, path)
|
|
224
|
-
set_module_tensor_to_device(model, param_name, device, value)
|
|
224
|
+
set_module_tensor_to_device(model, param_name, device, value, dtype=value.dtype)
|
|
225
225
|
|
|
226
226
|
|
|
227
227
|
def register_weight_hooks(model, path, device="cpu", clean_weight=True, saved_path=None):
|
|
@@ -239,7 +239,7 @@ def register_weight_hooks(model, path, device="cpu", clean_weight=True, saved_pa
|
|
|
239
239
|
value = state_dict[n]
|
|
240
240
|
else:
|
|
241
241
|
value = load_value(model, param_name, path)
|
|
242
|
-
set_module_tensor_to_device(model, param_name, device, value)
|
|
242
|
+
set_module_tensor_to_device(model, param_name, device, value, dtype=value.dtype)
|
|
243
243
|
|
|
244
244
|
return hook
|
|
245
245
|
|
|
@@ -18,6 +18,7 @@
|
|
|
18
18
|
|
|
19
19
|
from __future__ import annotations
|
|
20
20
|
|
|
21
|
+
import copy
|
|
21
22
|
import inspect
|
|
22
23
|
import json
|
|
23
24
|
import os
|
|
@@ -539,6 +540,7 @@ class BaseConfig(ABC):
|
|
|
539
540
|
tuning_param_pair = dict(zip(tuning_param_name_lst, params_values))
|
|
540
541
|
tmp_params_dict = {**not_tuning_param_pair, **tuning_param_pair}
|
|
541
542
|
new_config = self.__class__(**tmp_params_dict)
|
|
543
|
+
new_config.local_config = copy.deepcopy(self.local_config)
|
|
542
544
|
logger.info(new_config.to_dict())
|
|
543
545
|
config_list.append(new_config)
|
|
544
546
|
logger.info("Expanded the %s and got %d configs.", self.__class__.name, len(config_list))
|
|
@@ -629,9 +631,13 @@ class BaseConfig(ABC):
|
|
|
629
631
|
"""
|
|
630
632
|
if not isinstance(other, type(self)):
|
|
631
633
|
return False
|
|
632
|
-
|
|
634
|
+
|
|
635
|
+
params_equal = self.params_list == other.params_list and all(
|
|
633
636
|
getattr(self, str(attr)) == getattr(other, str(attr)) for attr in self.params_list
|
|
634
637
|
)
|
|
638
|
+
local_config_equal = self.local_config == other.local_config
|
|
639
|
+
global_config_equal = self.global_config == other.global_config
|
|
640
|
+
return params_equal and local_config_equal and global_config_equal
|
|
635
641
|
|
|
636
642
|
|
|
637
643
|
class ComposableConfig(BaseConfig):
|
{neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/datasets/dummy_dataset_v2.py
RENAMED
|
@@ -236,7 +236,7 @@ class SparseDummyDataset(IterableDataset): # pragma: no cover
|
|
|
236
236
|
self.label_shape = len(self.dense_shape) * self.label_shape
|
|
237
237
|
assert len(self.label_shape) == len(
|
|
238
238
|
self.dense_shape
|
|
239
|
-
), "length of dense_shape should be
|
|
239
|
+
), "length of dense_shape should be equal to length of label_shape"
|
|
240
240
|
self.label_dim = len(self.label_shape)
|
|
241
241
|
|
|
242
242
|
self.input_dim = 1 if isinstance(dense_shape, tuple) else len(dense_shape)
|
{neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/lm_eval/accuracy.py
RENAMED
|
@@ -39,7 +39,6 @@ from typing import Union
|
|
|
39
39
|
import lm_eval
|
|
40
40
|
import numpy as np
|
|
41
41
|
from lm_eval import evaluator, utils
|
|
42
|
-
from lm_eval.loggers import WandbLogger
|
|
43
42
|
from lm_eval.tasks import TaskManager
|
|
44
43
|
from lm_eval.utils import make_table, simple_parse_args_string
|
|
45
44
|
|
|
@@ -67,6 +66,17 @@ def _handle_non_serializable(o):
|
|
|
67
66
|
|
|
68
67
|
def cli_evaluate(args) -> None:
|
|
69
68
|
if args.wandb_args:
|
|
69
|
+
try:
|
|
70
|
+
# For 0.4.3 and above
|
|
71
|
+
from lm_eval.loggers import WandbLogger
|
|
72
|
+
except ImportError:
|
|
73
|
+
try:
|
|
74
|
+
# For 0.4.2
|
|
75
|
+
from lm_eval.logging_utils import WandbLogger
|
|
76
|
+
except ImportError:
|
|
77
|
+
raise ImportError("Import of WandbLogger failed. Please install wandb to use this feature.")
|
|
78
|
+
except Exception as e:
|
|
79
|
+
raise RuntimeError(f"An unexpected error occurred: {e}")
|
|
70
80
|
wandb_logger = WandbLogger(**simple_parse_args_string(args.wandb_args))
|
|
71
81
|
|
|
72
82
|
eval_logger = utils.eval_logger
|
|
@@ -200,6 +210,7 @@ def cli_evaluate(args) -> None:
|
|
|
200
210
|
)
|
|
201
211
|
lm.pad_to_buckets = args.pad_to_buckets
|
|
202
212
|
lm.buckets = args.buckets
|
|
213
|
+
lm.add_bos_token = args.add_bos_token
|
|
203
214
|
|
|
204
215
|
results = evaluator.simple_evaluate(
|
|
205
216
|
model=lm,
|
{neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/lm_eval/utils.py
RENAMED
|
@@ -20,6 +20,8 @@ try:
|
|
|
20
20
|
except:
|
|
21
21
|
_hpex_available = False
|
|
22
22
|
|
|
23
|
+
from neural_compressor.common import logger
|
|
24
|
+
|
|
23
25
|
|
|
24
26
|
class LMEvalParser:
|
|
25
27
|
def __init__(
|
|
@@ -50,6 +52,7 @@ class LMEvalParser:
|
|
|
50
52
|
trust_remote_code=False,
|
|
51
53
|
pad_to_buckets=None, # used by HPU to align input length for performance.
|
|
52
54
|
buckets=[32, 64, 128, 256, 512, 1024, 2048, 4096], # used by HPU to limit input length range.
|
|
55
|
+
add_bos_token=False,
|
|
53
56
|
):
|
|
54
57
|
self.model = model
|
|
55
58
|
self.tasks = tasks
|
|
@@ -83,3 +86,17 @@ class LMEvalParser:
|
|
|
83
86
|
else:
|
|
84
87
|
self.pad_to_buckets = pad_to_buckets
|
|
85
88
|
self.buckets = buckets
|
|
89
|
+
self.add_bos_token = add_bos_token
|
|
90
|
+
self._post_init()
|
|
91
|
+
|
|
92
|
+
def _check_add_bos_token(self):
|
|
93
|
+
if not self.add_bos_token:
|
|
94
|
+
logger.warning(
|
|
95
|
+
(
|
|
96
|
+
"`add_bos_token` is set to False. "
|
|
97
|
+
"If the model was trained or fine-tuned with a BOS token, this may lead to incorrect results."
|
|
98
|
+
)
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
def _post_init(self):
|
|
102
|
+
self._check_add_bos_token()
|
|
@@ -212,7 +212,7 @@ class GraphAnalyzer:
|
|
|
212
212
|
return self._search_patterns(patterns)
|
|
213
213
|
|
|
214
214
|
def _search_patterns(self, input_pattern):
|
|
215
|
-
"""Search user specified patterns on internal
|
|
215
|
+
"""Search user specified patterns on internal graph structure.
|
|
216
216
|
|
|
217
217
|
Args:
|
|
218
218
|
input_pattern (list): The element of the pattern list could be string/list/tuple.
|
|
@@ -13,39 +13,24 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import functools
|
|
16
|
-
import importlib.util
|
|
17
16
|
import json
|
|
18
17
|
import os
|
|
19
18
|
|
|
20
19
|
import numpy as np
|
|
21
20
|
import torch
|
|
21
|
+
from enum import Enum, auto
|
|
22
22
|
|
|
23
|
-
from .._quant_common.helper_modules import *
|
|
24
23
|
from .._quant_common.quant_config import get_hqt_config
|
|
25
24
|
from ..utils.logger import logger
|
|
26
|
-
from neural_compressor.torch.algorithms.fp8_quant.model_configs import
|
|
27
|
-
ModuleInfo,
|
|
28
|
-
ModuleConfig,
|
|
29
|
-
ModuleType,
|
|
30
|
-
ModuleExtraConfig,
|
|
31
|
-
get_patched_module_table,
|
|
32
|
-
get_patched_module_type_table,
|
|
33
|
-
)
|
|
34
|
-
from neural_compressor.torch.utils.auto_accelerator import auto_detect_accelerator
|
|
35
|
-
deepspeed_exists = False
|
|
36
|
-
if importlib.util.find_spec("deepspeed"): # check if deepspeed is installed
|
|
37
|
-
deepspeed_exists = True
|
|
25
|
+
from neural_compressor.torch.algorithms.fp8_quant.model_configs import ModuleConfig
|
|
38
26
|
|
|
39
27
|
UNMEASURED_MODELS = "UnmeasuredModels"
|
|
40
28
|
|
|
41
29
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
"softmax": ModuleType(1, [], 1, True),
|
|
47
|
-
"fused_sdpa": ModuleType(3, [], 2, True),
|
|
48
|
-
}
|
|
30
|
+
class QuantTensorType(Enum):
|
|
31
|
+
MEASUREMENTS = auto()
|
|
32
|
+
CONST = auto()
|
|
33
|
+
DYNAMIC = auto()
|
|
49
34
|
|
|
50
35
|
|
|
51
36
|
class ShapeList:
|
|
@@ -196,73 +181,6 @@ format_functions = {
|
|
|
196
181
|
|
|
197
182
|
format_functions_rec = lambda k: functools.partial(rec_fn, fn=format_functions[k])
|
|
198
183
|
|
|
199
|
-
_mod_default_dict = {
|
|
200
|
-
"Matmul": ModuleInfo("matmul", PatchedMatmul),
|
|
201
|
-
"Linear": ModuleInfo("linear", PatchedLinear),
|
|
202
|
-
"RowParallelLinear": ModuleInfo("linear", PatchedRowParallelLinear),
|
|
203
|
-
"ColumnParallelLinear": ModuleInfo("linear", PatchedColumnParallelLinear),
|
|
204
|
-
"MergedColumnParallelLinear": ModuleInfo("linear", PatchedColumnParallelLinear),
|
|
205
|
-
"QKVParallelLinear": ModuleInfo("linear", PatchedColumnParallelLinear),
|
|
206
|
-
"FalconLinear": ModuleInfo("linear", PatchedLinear),
|
|
207
|
-
"KVCache": ModuleInfo("kv_cache", PatchedKVCache),
|
|
208
|
-
"VLLMKVCache": ModuleInfo("kv_cache", PatchedVLLMKVCache),
|
|
209
|
-
"Conv2d": ModuleInfo("linear", PatchedConv2d),
|
|
210
|
-
"LoRACompatibleLinear": ModuleInfo("linear", PatchedLoRACompatibleLinear),
|
|
211
|
-
"LoRACompatibleConv": ModuleInfo("linear", PatchedLoRACompatibleConv),
|
|
212
|
-
"Softmax": ModuleInfo("softmax", PatchedSoftmax),
|
|
213
|
-
"ModuleFusedSDPA": ModuleInfo("fused_sdpa", PatchedModuleFusedSDPA),
|
|
214
|
-
"MoeMatmul": ModuleInfo("linear", PatchedMoeMatmul),
|
|
215
|
-
"ReplicatedLinear": ModuleInfo("linear", PatchedReplicatedLinear),
|
|
216
|
-
"FusedMoE": ModuleInfo("linear", PatchedMixtralMoE, False),
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
if deepspeed_exists:
|
|
221
|
-
_mod_default_dict.update(
|
|
222
|
-
{
|
|
223
|
-
"LinearLayer": ModuleInfo("linear", PatchedLinear),
|
|
224
|
-
"LinearAllreduce": ModuleInfo("linear", PatchedLinearAllReduce),
|
|
225
|
-
"ScopedLinearAllReduce": ModuleInfo("linear", PatchedLinearAllReduce),
|
|
226
|
-
"LmHeadLinearAllreduce": ModuleInfo("linear", PatchedLmHeadLinearAllreduce),
|
|
227
|
-
}
|
|
228
|
-
)
|
|
229
|
-
|
|
230
|
-
@functools.lru_cache(maxsize=None)
|
|
231
|
-
def _import_hpu_modules():
|
|
232
|
-
from neural_compressor.torch.algorithms.fp8_quant.patched_module_base import (
|
|
233
|
-
PATCHED_MODULE_TABLE, PATCHED_MODULE_TYPES_TABLE
|
|
234
|
-
)
|
|
235
|
-
cur_accelerator = auto_detect_accelerator()
|
|
236
|
-
if not cur_accelerator.current_device_name().startswith("hpu"):
|
|
237
|
-
return
|
|
238
|
-
PATCHED_MODULE_TABLE["hpu"].update(_mod_default_dict)
|
|
239
|
-
PATCHED_MODULE_TYPES_TABLE["hpu"].update(_mod_types)
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
_import_hpu_modules()
|
|
243
|
-
|
|
244
|
-
mod_default_dict = get_patched_module_table()
|
|
245
|
-
mod_types = get_patched_module_type_table()
|
|
246
|
-
|
|
247
|
-
def get_white_list():
|
|
248
|
-
return list(mod_default_dict.keys())
|
|
249
|
-
|
|
250
|
-
class ModInstInfo:
|
|
251
|
-
def __init__(self, name, parent):
|
|
252
|
-
self.name = name
|
|
253
|
-
self.parent = parent
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
parent_child_mod_dict = {}
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
def generate_model_info(model):
|
|
260
|
-
def create_mod_info_recursion(parent):
|
|
261
|
-
for name, mod in parent.named_children():
|
|
262
|
-
parent_child_mod_dict[mod] = ModInstInfo(name, parent)
|
|
263
|
-
create_mod_info_recursion(mod)
|
|
264
|
-
|
|
265
|
-
create_mod_info_recursion(model)
|
|
266
184
|
|
|
267
185
|
def get_device_type_for_scales(mod):
|
|
268
186
|
config = get_hqt_config(mod).cfg
|