neural-compressor 3.1__tar.gz → 3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {neural_compressor-3.1 → neural_compressor-3.2}/PKG-INFO +12 -5
- {neural_compressor-3.1 → neural_compressor-3.2}/README.md +11 -4
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/smooth_quant.py +3 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/pytorch.py +2 -2
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/weight_only.py +3 -3
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/common/__init__.py +1 -1
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/common/utils/constants.py +1 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/common/utils/logger.py +9 -0
- {neural_compressor-3.1/neural_compressor → neural_compressor-3.2/neural_compressor/common}/version.py +1 -1
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/evaluation/lm_eval/accuracy.py +1 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/evaluation/lm_eval/models/huggingface.py +19 -2
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/evaluation/lm_eval/utils.py +2 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/__init__.py +21 -1
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_core/common.py +43 -43
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_core/fp_utils.py +10 -9
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_core/measure.py +24 -173
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_core/quant_dequant.py +74 -12
- neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/_core/quantize.py +253 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_core/scale.py +126 -96
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_core/scale_handler.py +14 -3
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/max_abs.py +159 -83
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/single_scale.py +10 -15
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/smooth_quant.py +6 -11
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_core/utils.py +1 -1
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_quant_common/helper_modules.py +305 -130
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_quant_common/quant_config.py +104 -33
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/common.py +2 -2
- neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/model_configs.py +143 -0
- neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/observer.py +228 -0
- neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/patched_module_base.py +197 -0
- neural_compressor-3.1/neural_compressor/torch/algorithms/fp8_quant/fp8_quant.py → neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/quantizer.py +1 -1
- neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/save_load.py +452 -0
- neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/scaling_method_base.py +79 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/scripts/fix_measurements.py +2 -6
- neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/utils/__init__.py +18 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/layer_wise/utils.py +57 -62
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/static_quant/utility.py +0 -1
- neural_compressor-3.2/neural_compressor/torch/algorithms/weight_only/autoround.py +426 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/gptq.py +62 -28
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/modules.py +2 -12
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/save_load.py +55 -47
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/utility.py +1 -1
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/quantization/__init__.py +2 -1
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/quantization/algorithm_entry.py +23 -2
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/quantization/config.py +98 -58
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/quantization/quantize.py +8 -6
- neural_compressor-3.1/neural_compressor/torch/quantization/load_entry.py → neural_compressor-3.2/neural_compressor/torch/quantization/save_load_entry.py +50 -15
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/utils/auto_accelerator.py +4 -8
- neural_compressor-3.2/neural_compressor/torch/utils/block_wise.py +220 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/utils/constants.py +1 -1
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/utils/environ.py +113 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/utils/utility.py +145 -63
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/transformers/models/modeling_auto.py +7 -5
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/transformers/quantization/utils.py +11 -10
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/transformers/utils/quantization_config.py +5 -5
- neural_compressor-3.2/neural_compressor/version.py +18 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor.egg-info/PKG-INFO +12 -5
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor.egg-info/SOURCES.txt +10 -5
- {neural_compressor-3.1 → neural_compressor-3.2}/setup.py +0 -11
- neural_compressor-3.1/neural_compressor/torch/algorithms/fp8_quant/_core/quantize.py +0 -147
- neural_compressor-3.1/neural_compressor/torch/algorithms/fp8_quant/scripts/regression_detection/regression_detection.py +0 -132
- neural_compressor-3.1/neural_compressor/torch/algorithms/mixed_low_precision/__init__.py +0 -13
- neural_compressor-3.1/neural_compressor/torch/algorithms/mixed_low_precision/custom_methods/__init__.py +0 -13
- neural_compressor-3.1/neural_compressor/torch/algorithms/weight_only/autoround.py +0 -246
- {neural_compressor-3.1 → neural_compressor-3.2}/LICENSE +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/adaptor.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/keras.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/keras.yaml +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/keras_utils/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/keras_utils/conv2d.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/keras_utils/dense.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/keras_utils/depthwise_conv2d.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/keras_utils/pool2d.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/keras_utils/quantizer.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/keras_utils/separable_conv2d.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/mxnet.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/mxnet.yaml +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/mxnet_utils/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/mxnet_utils/util.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/onnxrt.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/onnxrt.yaml +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/onnxrt_cuda.yaml +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/onnxrt_dml.yaml +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/onnxrt_dnnl.yaml +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/onnxrt_trt.yaml +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/calibration.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/calibrator.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/activation.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/argmax.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/attention.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/binary_op.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/concat.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/conv.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/direct_q8.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/embed_layernorm.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/gather.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/gavgpool.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/gemm.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/lstm.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/matmul.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/maxpool.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/norm.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/ops.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/pad.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/pooling.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/reduce.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/resize.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/split.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/unary_op.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/quantizer.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/util.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/weight_only.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/pytorch_cpu.yaml +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/pytorch_gpu.yaml +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/pytorch_ipex.yaml +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/query.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tensorflow.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tensorflow.yaml +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tensorflow_itex.yaml +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_converter.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_converter_without_calib.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/bf16_convert.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/dequantize_cast_optimizer.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_add_to_biasadd.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_layout.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_leakyrelu.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_nan_to_random.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_placeholder_to_const.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dilated_contraction.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dummy_biasadd.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/expanddims_optimizer.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fetch_weight_from_reshape.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fold_batch_norm.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fold_constant.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_biasadd_add.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_column_wise_mul.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_conv_with_math.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_decomposed_bn.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_decomposed_in.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_gelu.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_layer_norm.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_conv.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_fp32_conv.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_reshape_transpose.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/graph_cse_optimizer.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/grappler_pass.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/insert_print_node.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/move_squeeze_after_relu.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/remove_training_nodes.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/rename_batch_norm.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/split_shared_input.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/strip_equivalent_nodes.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/strip_unused_nodes.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/switch_optimizer.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/graph_base.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_fake_quant.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_value.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_value_without_calib.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_redundant_dequantize.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_requantize.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_redundant_dequantize.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_requantize.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/meta_op_optimizer.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/post_hostconst_converter.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/post_quantized_op_cse.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/rnn_convert.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/scale_propagation.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_node.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_schema.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/insert_qdq_pattern.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/merge_duplicated_qdq.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/share_qdq_y_pattern.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_util.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/fake_quantize.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_config.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_helper.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/optimize_layer.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_add.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_base.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_bn.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_wrapper.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_bn.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_concatv2.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_conv.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_deconv.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_in.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_pooling.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/optimize_qdq.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_base.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_bn.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_concatv2.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_conv.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_for_intel_cpu.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_matmul.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_pooling.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph_common.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/smooth_quant_calibration.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/smooth_quant_scaler.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/transform_graph/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/transform_graph/bias_correction.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/transform_graph/graph_transform_base.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/transform_graph/insert_logging.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/transform_graph/rerange_quantized_concat.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/util.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/auto_round.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/awq.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/bf16_convert.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/gptq.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/hawq_metric.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/layer_wise_quant/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/layer_wise_quant/modified_pickle.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/layer_wise_quant/quantize.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/layer_wise_quant/torch_load.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/layer_wise_quant/utils.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/mixed_precision.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/model_wrapper.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/pattern_detector.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/symbolic_trace.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/teq.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/util.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/waq/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/waq/auto_alpha.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/waq/calibration.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/waq/graph_trace.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/waq/smooth_quant.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/waq/utils.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/algorithm/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/algorithm/algorithm.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/algorithm/fast_bias_correction.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/algorithm/smooth_quant.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/algorithm/weight_correction.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/benchmark.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/common/base_config.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/common/base_tuning.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/common/benchmark.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/common/tuning_param.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/common/utils/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/common/utils/save_load.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/common/utils/utility.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/callbacks.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/distillation/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/distillation/criterions.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/distillation/optimizers.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/distillation/utility.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/hpo/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/hpo/sa_optimizer.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/hpo/search_algorithms.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/hpo/search_space.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/criteria.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/dsnot.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/model_slim/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/model_slim/auto_slim.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/model_slim/pattern_analyzer.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/model_slim/weight_slim.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/patterns/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/patterns/base.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/patterns/mha.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/patterns/ninm.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/patterns/nxm.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/pruners/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/pruners/base.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/pruners/basic.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/pruners/block_mask.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/pruners/mha.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/pruners/pattern_lock.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/pruners/progressive.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/pruners/retrain_free.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/pruners/sparse_gpt.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/pruning.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/regs.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/schedulers.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/tf_criteria.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/utils.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/wanda/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/wanda/prune.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/wanda/utils.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/wanda/wrapper.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/config.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/contrib/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/contrib/strategy/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/contrib/strategy/sigopt.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/contrib/strategy/tpe.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/dataloaders/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/dataloaders/base_dataloader.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/dataloaders/dataloader.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/dataloaders/default_dataloader.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/dataloaders/fetcher.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/dataloaders/mxnet_dataloader.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/dataloaders/onnxrt_dataloader.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/dataloaders/pytorch_dataloader.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/dataloaders/sampler.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/dataloaders/tensorflow_dataloader.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/datasets/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/datasets/bert_dataset.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/datasets/coco_dataset.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/datasets/dataset.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/datasets/dummy_dataset.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/datasets/dummy_dataset_v2.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/datasets/imagenet_dataset.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/datasets/style_transfer_dataset.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/filters/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/filters/coco_filter.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/filters/filter.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/transforms/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/transforms/coco_transform.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/transforms/imagenet_transform.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/transforms/postprocess.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/transforms/tokenization.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/transforms/transform.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/evaluation/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/evaluation/bigcode_eval/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/evaluation/bigcode_eval/evaluator.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/evaluation/hf_eval/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/evaluation/hf_eval/evaluator.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/evaluation/hf_eval/hf_datasets/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/evaluation/hf_eval/hf_datasets/cnn_dailymail.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/evaluation/lm_eval/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/evaluation/lm_eval/models/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/metric/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/metric/bleu.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/metric/bleu_util.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/metric/coco_label_map.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/metric/coco_tools.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/metric/evaluate_squad.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/metric/f1.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/metric/metric.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/mix_precision.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/model/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/model/base_model.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/model/keras_model.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/model/model.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/model/mxnet_model.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/model/nets_factory.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/model/onnx_model.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/model/tensorflow_model.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/model/torch_model.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/objective.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/parser/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/parser/factory.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/parser/onnx_parser/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/parser/onnx_parser/factory.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/parser/onnx_parser/parser.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/parser/parser.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/parser/result.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/parser/tensorflow_parser/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/parser/tensorflow_parser/factory.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/parser/tensorflow_parser/parser.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/profiler/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/profiler/factory.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/profiler/onnxrt_profiler/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/profiler/onnxrt_profiler/factory.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/profiler/onnxrt_profiler/profiler.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/profiler/onnxrt_profiler/utils.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/profiler/profiler.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/profiler/tensorflow_profiler/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/profiler/tensorflow_profiler/factory.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/profiler/tensorflow_profiler/profiler.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/profiler/tensorflow_profiler/utils.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/quantization.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/auto.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/auto_mixed_precision.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/basic.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/bayesian.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/conservative.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/exhaustive.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/hawq_v2.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/mse.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/mse_v2.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/random.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/strategy.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/utils/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/utils/constant.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/utils/tuning_sampler.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/utils/tuning_space.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/utils/tuning_structs.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/utils/utility.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/algorithms/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/algorithms/smoother/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/algorithms/smoother/calibration.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/algorithms/smoother/core.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/algorithms/smoother/scaler.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/algorithms/static_quant/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/algorithms/static_quant/keras.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/algorithms/static_quant/keras.yaml +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.yaml +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/algorithms/static_quant/tensorflow_itex.yaml +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/keras/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/keras/layers/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/keras/layers/conv2d.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/keras/layers/dense.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/keras/layers/depthwise_conv2d.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/keras/layers/layer_initializer.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/keras/layers/pool2d.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/keras/layers/separable_conv2d.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/keras/quantization/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/keras/quantization/config.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/algorithm_entry.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/autotune.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/config.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/quantize.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_converter.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/bf16/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/bf16/bf16_convert.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/bf16/dequantize_cast_optimizer.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/convert_add_to_biasadd.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/convert_layout.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/convert_leakyrelu.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/convert_nan_to_random.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/convert_placeholder_to_const.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/dilated_contraction.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/dummy_biasadd.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/expanddims_optimizer.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fetch_weight_from_reshape.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fold_batch_norm.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fold_constant.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_biasadd_add.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_column_wise_mul.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_conv_with_math.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_decomposed_bn.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_decomposed_in.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_gelu.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_layer_norm.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_pad_with_conv.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_pad_with_fp32_conv.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_reshape_transpose.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/graph_cse_optimizer.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/grappler_pass.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/insert_print_node.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/move_squeeze_after_relu.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/pre_optimize.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/remove_training_nodes.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/rename_batch_norm.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/split_shared_input.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/strip_equivalent_nodes.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/strip_unused_nodes.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/switch_optimizer.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/graph_base.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/freeze_fake_quant.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/freeze_value.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/fuse_conv_redundant_dequantize.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/fuse_conv_requantize.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/fuse_matmul_redundant_dequantize.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/fuse_matmul_requantize.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/meta_op_optimizer.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/post_hostconst_converter.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/post_quantized_op_cse.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/scale_propagation.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/qdq/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/qdq/insert_qdq_pattern.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/qdq/merge_duplicated_qdq.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/qdq/share_qdq_y_pattern.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_util.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_bn.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_concatv2.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_conv.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_deconv.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_in.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_matmul.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_pooling.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/optimize_qdq.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_base.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_bn.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_concatv2.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_conv.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_for_intel_cpu.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_matmul.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_pooling.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph_common.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/transform_graph/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/transform_graph/bias_correction.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/transform_graph/graph_transform_base.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/transform_graph/insert_logging.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/transform_graph/rerange_quantized_concat.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/utility.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/utils/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/utils/constants.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/utils/data.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/utils/model.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/utils/model_wrappers.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/utils/utility.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/base_algorithm.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_core/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_quant_common/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/custom_config/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/prepare_quant/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/prepare_quant/prepare_model.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/scripts/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/utils/logger.py +0 -0
- /neural_compressor-3.1/neural_compressor/torch/algorithms/fp8_quant/helper_modules.py → /neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/utils/patched_module_restore_registry.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/layer_wise/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/layer_wise/load.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/layer_wise/modified_pickle.py +0 -0
- {neural_compressor-3.1/neural_compressor/torch/algorithms/fp8_quant/scripts/regression_detection → neural_compressor-3.2/neural_compressor/torch/algorithms/mixed_low_precision}/__init__.py +0 -0
- {neural_compressor-3.1/neural_compressor/torch/algorithms/fp8_quant/utils → neural_compressor-3.2/neural_compressor/torch/algorithms/mixed_low_precision/custom_methods}/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/mixed_low_precision/custom_methods/gptq.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/mixed_low_precision/custom_methods/quarot.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/mixed_low_precision/custom_methods/quarot_utils.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/mixed_precision/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/mixed_precision/half_precision_convert.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/mixed_precision/module_wrappers.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/mx_quant/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/mx_quant/mx.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/mx_quant/utils.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/pt2e_quant/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/pt2e_quant/core.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/pt2e_quant/half_precision_rewriter.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/pt2e_quant/save_load.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/pt2e_quant/utility.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/smooth_quant/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/smooth_quant/save_load.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/smooth_quant/smooth_quant.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/smooth_quant/utility.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/static_quant/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/static_quant/save_load.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/static_quant/static_quant.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/awq.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/hqq/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/hqq/bitpack.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/hqq/config.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/hqq/core.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/hqq/optimizer.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/hqq/qtensor.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/hqq/quantizer.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/rtn.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/teq.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/export/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/export/pt2e_export.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/quantization/autotune.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/utils/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/utils/bit_packer.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/training.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/transformers/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/transformers/generation/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/transformers/generation/beam_search.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/transformers/generation/greedy_search.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/transformers/models/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/transformers/quantization/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/transformers/utils/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/collect_layer_histogram.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/constant.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/create_obj_from_config.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/export/__init__.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/export/qlinear2qdq.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/export/tf2onnx.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/export/torch2onnx.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/kl_divergence.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/load_huggingface.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/logger.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/options.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/pytorch.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/utility.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/weights_details.py +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor.egg-info/dependency_links.txt +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor.egg-info/entry_points.txt +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor.egg-info/requires.txt +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor.egg-info/top_level.txt +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/pyproject.toml +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/setup.cfg +0 -0
- {neural_compressor-3.1 → neural_compressor-3.2}/third-party-programs.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: neural_compressor
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.2
|
|
4
4
|
Summary: Repository of Intel® Neural Compressor
|
|
5
5
|
Home-page: https://github.com/intel/neural-compressor
|
|
6
6
|
Author: Intel AIPT Team
|
|
@@ -51,7 +51,7 @@ Intel® Neural Compressor
|
|
|
51
51
|
<h3> An open-source Python library supporting popular model compression techniques on all mainstream deep learning frameworks (TensorFlow, PyTorch, and ONNX Runtime)</h3>
|
|
52
52
|
|
|
53
53
|
[](https://github.com/intel/neural-compressor)
|
|
54
|
-
[](https://github.com/intel/neural-compressor/releases)
|
|
55
55
|
[](https://github.com/intel/neural-compressor/blob/master/LICENSE)
|
|
56
56
|
[](https://github.com/intel/neural-compressor)
|
|
57
57
|
[](https://pepy.tech/project/neural-compressor)
|
|
@@ -124,7 +124,7 @@ Following example code demonstrates FP8 Quantization, it is supported by Intel G
|
|
|
124
124
|
To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, please refer to following script for environment setup. More details can be found in [Gaudi Guide](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#launch-docker-image-that-was-built).
|
|
125
125
|
```bash
|
|
126
126
|
# Run a container with an interactive shell
|
|
127
|
-
docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.
|
|
127
|
+
docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.19.0/ubuntu24.04/habanalabs/pytorch-installer-2.5.1:latest
|
|
128
128
|
```
|
|
129
129
|
Run the example:
|
|
130
130
|
```python
|
|
@@ -133,14 +133,21 @@ from neural_compressor.torch.quantization import (
|
|
|
133
133
|
prepare,
|
|
134
134
|
convert,
|
|
135
135
|
)
|
|
136
|
+
|
|
137
|
+
import torch
|
|
136
138
|
import torchvision.models as models
|
|
137
139
|
|
|
138
140
|
model = models.resnet18()
|
|
139
141
|
qconfig = FP8Config(fp8_config="E4M3")
|
|
140
142
|
model = prepare(model, qconfig)
|
|
141
|
-
|
|
142
|
-
|
|
143
|
+
|
|
144
|
+
# Customer defined calibration. Below is a dummy calibration
|
|
145
|
+
model(torch.randn(1, 3, 224, 224).to("hpu"))
|
|
146
|
+
|
|
143
147
|
model = convert(model)
|
|
148
|
+
|
|
149
|
+
output = model(torch.randn(1, 3, 224, 224).to("hpu")).to("cpu")
|
|
150
|
+
print(output.shape)
|
|
144
151
|
```
|
|
145
152
|
|
|
146
153
|
### Weight-Only Large Language Model Loading (LLMs)
|
|
@@ -5,7 +5,7 @@ Intel® Neural Compressor
|
|
|
5
5
|
<h3> An open-source Python library supporting popular model compression techniques on all mainstream deep learning frameworks (TensorFlow, PyTorch, and ONNX Runtime)</h3>
|
|
6
6
|
|
|
7
7
|
[](https://github.com/intel/neural-compressor)
|
|
8
|
-
[](https://github.com/intel/neural-compressor/releases)
|
|
9
9
|
[](https://github.com/intel/neural-compressor/blob/master/LICENSE)
|
|
10
10
|
[](https://github.com/intel/neural-compressor)
|
|
11
11
|
[](https://pepy.tech/project/neural-compressor)
|
|
@@ -78,7 +78,7 @@ Following example code demonstrates FP8 Quantization, it is supported by Intel G
|
|
|
78
78
|
To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, please refer to following script for environment setup. More details can be found in [Gaudi Guide](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#launch-docker-image-that-was-built).
|
|
79
79
|
```bash
|
|
80
80
|
# Run a container with an interactive shell
|
|
81
|
-
docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.
|
|
81
|
+
docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.19.0/ubuntu24.04/habanalabs/pytorch-installer-2.5.1:latest
|
|
82
82
|
```
|
|
83
83
|
Run the example:
|
|
84
84
|
```python
|
|
@@ -87,14 +87,21 @@ from neural_compressor.torch.quantization import (
|
|
|
87
87
|
prepare,
|
|
88
88
|
convert,
|
|
89
89
|
)
|
|
90
|
+
|
|
91
|
+
import torch
|
|
90
92
|
import torchvision.models as models
|
|
91
93
|
|
|
92
94
|
model = models.resnet18()
|
|
93
95
|
qconfig = FP8Config(fp8_config="E4M3")
|
|
94
96
|
model = prepare(model, qconfig)
|
|
95
|
-
|
|
96
|
-
|
|
97
|
+
|
|
98
|
+
# Customer defined calibration. Below is a dummy calibration
|
|
99
|
+
model(torch.randn(1, 3, 224, 224).to("hpu"))
|
|
100
|
+
|
|
97
101
|
model = convert(model)
|
|
102
|
+
|
|
103
|
+
output = model(torch.randn(1, 3, 224, 224).to("hpu")).to("cpu")
|
|
104
|
+
print(output.shape)
|
|
98
105
|
```
|
|
99
106
|
|
|
100
107
|
### Weight-Only Large Language Model Loading (LLMs)
|
{neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/smooth_quant.py
RENAMED
|
@@ -295,6 +295,9 @@ class ORTSmoothQuant:
|
|
|
295
295
|
return False
|
|
296
296
|
for inp in node.input:
|
|
297
297
|
if self.model.get_initializer(inp) is not None:
|
|
298
|
+
# Ensure that mul operators with shared initializer will not be absorbed.
|
|
299
|
+
if self.model.get_initializer_share_num(inp) > 1:
|
|
300
|
+
return False
|
|
298
301
|
key = node.input[0].split("_smooth_output")[0]
|
|
299
302
|
tensor = self.model.get_initializer(inp)
|
|
300
303
|
new_tensor = (
|
|
@@ -4926,7 +4926,7 @@ class PyTorchWeightOnlyAdaptor(TemplateAdaptor):
|
|
|
4926
4926
|
act_group_size = self.recipes["autoround_args"].get("act_group_size", None)
|
|
4927
4927
|
act_sym = self.recipes["autoround_args"].get("act_sym", None)
|
|
4928
4928
|
act_dynamic = self.recipes["autoround_args"].get("act_dynamic", True)
|
|
4929
|
-
|
|
4929
|
+
to_quant_block_names = self.recipes["autoround_args"].get("to_quant_block_names", None)
|
|
4930
4930
|
use_layer_wise = self.recipes["autoround_args"].get("use_layer_wise", False)
|
|
4931
4931
|
|
|
4932
4932
|
if dataloader is not None:
|
|
@@ -4959,7 +4959,7 @@ class PyTorchWeightOnlyAdaptor(TemplateAdaptor):
|
|
|
4959
4959
|
dynamic_max_gap=dynamic_max_gap,
|
|
4960
4960
|
data_type=data_type,
|
|
4961
4961
|
scale_dtype=scale_dtype,
|
|
4962
|
-
|
|
4962
|
+
to_quant_block_names=to_quant_block_names,
|
|
4963
4963
|
act_bits=act_bits,
|
|
4964
4964
|
act_group_size=act_group_size,
|
|
4965
4965
|
act_sym=act_sym,
|
{neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/weight_only.py
RENAMED
|
@@ -706,7 +706,7 @@ def autoround_quantize(
|
|
|
706
706
|
dynamic_max_gap: int = -1,
|
|
707
707
|
data_type: str = "int", ##only support int for now
|
|
708
708
|
scale_dtype: str = "fp16",
|
|
709
|
-
|
|
709
|
+
to_quant_block_names: list = None,
|
|
710
710
|
act_bits: int = 32,
|
|
711
711
|
act_group_size: int = None,
|
|
712
712
|
act_sym: bool = None,
|
|
@@ -761,7 +761,7 @@ def autoround_quantize(
|
|
|
761
761
|
data_type (str): The data type to be used (default is "int").
|
|
762
762
|
scale_dtype (str): The data type of quantization scale to be used (default is "float32"), different kernels
|
|
763
763
|
have different choices.
|
|
764
|
-
|
|
764
|
+
to_quant_block_names (list): A list whose elements are list of block's layer names to be quantized.
|
|
765
765
|
act_bits (int): Number of bits for activation quantization. Default is 32.
|
|
766
766
|
act_group_size (int): Group size for activation quantization. Default is None.
|
|
767
767
|
act_sym (bool): Whether to use symmetric activation quantization. Default is None.
|
|
@@ -800,7 +800,7 @@ def autoround_quantize(
|
|
|
800
800
|
dynamic_max_gap=dynamic_max_gap,
|
|
801
801
|
data_type=data_type, ## only support data_type
|
|
802
802
|
scale_dtype=scale_dtype,
|
|
803
|
-
|
|
803
|
+
to_quant_block_names=to_quant_block_names,
|
|
804
804
|
act_bits=act_bits,
|
|
805
805
|
act_group_size=act_group_size,
|
|
806
806
|
act_sym=act_sym,
|
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
"""Logger: handles logging functionalities."""
|
|
18
18
|
|
|
19
19
|
|
|
20
|
+
import functools
|
|
20
21
|
import logging
|
|
21
22
|
import os
|
|
22
23
|
|
|
@@ -137,6 +138,12 @@ class Logger(object):
|
|
|
137
138
|
else:
|
|
138
139
|
Logger().get_logger().warning(msg, *args, **kwargs)
|
|
139
140
|
|
|
141
|
+
@functools.lru_cache(None)
|
|
142
|
+
def warning_once(msg, *args, **kwargs):
|
|
143
|
+
"""Output log with the warning level only once."""
|
|
144
|
+
Logger.warning("Below warning will be shown only once:")
|
|
145
|
+
Logger.warning(msg, *args, **kwargs)
|
|
146
|
+
|
|
140
147
|
|
|
141
148
|
level = Logger().get_logger().level
|
|
142
149
|
level_name = logging.getLevelName(level)
|
|
@@ -152,6 +159,8 @@ def _get_log_msg(mode):
|
|
|
152
159
|
log_msg = "Preparation"
|
|
153
160
|
elif mode == Mode.CONVERT: # pragma: no cover
|
|
154
161
|
log_msg = "Conversion"
|
|
162
|
+
elif mode == Mode.LOAD: # pragma: no cover
|
|
163
|
+
log_msg = "Loading"
|
|
155
164
|
return log_msg
|
|
156
165
|
|
|
157
166
|
|
|
@@ -116,11 +116,14 @@ class HFLM(TemplateLM):
|
|
|
116
116
|
peft: Optional[str] = None,
|
|
117
117
|
autogptq: Optional[Union[bool, str]] = False,
|
|
118
118
|
pad_to_buckets: Optional[Union[bool]] = False,
|
|
119
|
+
buckets: Optional[list] = [32, 64, 128, 256, 512, 1024, 2048, 4096],
|
|
119
120
|
model_format: Optional[str] = "torch",
|
|
120
121
|
**kwargs,
|
|
121
122
|
) -> None:
|
|
122
123
|
super().__init__()
|
|
123
124
|
self.pad_to_buckets = pad_to_buckets
|
|
125
|
+
self.buckets = buckets
|
|
126
|
+
self.last_bucket = -1
|
|
124
127
|
self.model_format = model_format
|
|
125
128
|
# optionally: take in an already-initialized transformers.PreTrainedModel
|
|
126
129
|
if not isinstance(pretrained, str):
|
|
@@ -874,6 +877,19 @@ class HFLM(TemplateLM):
|
|
|
874
877
|
elif self.AUTO_MODEL_CLASS == transformers.AutoModelForSeq2SeqLM:
|
|
875
878
|
return self.tokenizer.decode(tokens, skip_special_tokens=skip_special_tokens)
|
|
876
879
|
|
|
880
|
+
def find_bucket(self, length):
|
|
881
|
+
suitable_buckets = [b for b in self.buckets if b >= length]
|
|
882
|
+
if len(suitable_buckets) == 0:
|
|
883
|
+
eval_logger.error(f"The input_length={length} exceeds the maximum value in buckets={self.buckets}")
|
|
884
|
+
eval_logger.error("Please add a higher value into the buckets list for this case.")
|
|
885
|
+
exit(0)
|
|
886
|
+
else:
|
|
887
|
+
if self.last_bucket != suitable_buckets[0]:
|
|
888
|
+
if hasattr(self.model, "clear_cache"):
|
|
889
|
+
self.model.clear_cache() # clear HPU graph cache to avoid OOM
|
|
890
|
+
self.last_bucket = suitable_buckets[0]
|
|
891
|
+
return self.last_bucket
|
|
892
|
+
|
|
877
893
|
def _model_call(self, inps, attn_mask=None, labels=None):
|
|
878
894
|
"""
|
|
879
895
|
:param inps: torch.Tensor
|
|
@@ -943,8 +959,7 @@ class HFLM(TemplateLM):
|
|
|
943
959
|
if self.pad_to_buckets: # use buckets to pad inputs
|
|
944
960
|
bs, seq_length = inps.shape
|
|
945
961
|
padding_length = 0
|
|
946
|
-
|
|
947
|
-
bucket_length = [b for b in buckets if b >= seq_length][0]
|
|
962
|
+
bucket_length = self.find_bucket(seq_length)
|
|
948
963
|
padding_length = bucket_length - seq_length
|
|
949
964
|
inps = F.pad(inps, (0, padding_length), value=self.model.config.pad_token_id)
|
|
950
965
|
output = self.model(inps)
|
|
@@ -954,6 +969,8 @@ class HFLM(TemplateLM):
|
|
|
954
969
|
output = output.logits
|
|
955
970
|
if self.pad_to_buckets and padding_length != 0: # use buckets to pad inputs
|
|
956
971
|
output = output[:, :-padding_length, :]
|
|
972
|
+
if "hpu" in output.device.type: # make sure return fp32 tensor for HPU, TODO: root cause
|
|
973
|
+
output = output.to(torch.float32)
|
|
957
974
|
return output
|
|
958
975
|
|
|
959
976
|
def _model_generate(self, context, max_length, stop, **generation_kwargs):
|
{neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/evaluation/lm_eval/utils.py
RENAMED
|
@@ -49,6 +49,7 @@ class LMEvalParser:
|
|
|
49
49
|
seed=[0, 1234, 1234],
|
|
50
50
|
trust_remote_code=False,
|
|
51
51
|
pad_to_buckets=None, # used by HPU to align input length for performance.
|
|
52
|
+
buckets=[32, 64, 128, 256, 512, 1024, 2048, 4096], # used by HPU to limit input length range.
|
|
52
53
|
):
|
|
53
54
|
self.model = model
|
|
54
55
|
self.tasks = tasks
|
|
@@ -81,3 +82,4 @@ class LMEvalParser:
|
|
|
81
82
|
self.pad_to_buckets = False
|
|
82
83
|
else:
|
|
83
84
|
self.pad_to_buckets = pad_to_buckets
|
|
85
|
+
self.buckets = buckets
|
|
@@ -19,4 +19,24 @@ from neural_compressor.torch.algorithms.fp8_quant.common import (
|
|
|
19
19
|
with_patched_module,
|
|
20
20
|
)
|
|
21
21
|
from neural_compressor.torch.algorithms.fp8_quant.prepare_quant.prepare_model import finish_measurements, prep_model
|
|
22
|
-
from neural_compressor.torch.algorithms.fp8_quant.
|
|
22
|
+
from neural_compressor.torch.algorithms.fp8_quant.quantizer import FP8Quantizer
|
|
23
|
+
from neural_compressor.torch.algorithms.fp8_quant.patched_module_base import (
|
|
24
|
+
PatchedModuleBase,
|
|
25
|
+
register_patched_module,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
from neural_compressor.torch.algorithms.fp8_quant.scaling_method_base import (
|
|
29
|
+
ScalingMethodBase,
|
|
30
|
+
register_scaling_methods,
|
|
31
|
+
)
|
|
32
|
+
from neural_compressor.torch.algorithms.fp8_quant.observer import (
|
|
33
|
+
ObserverBase,
|
|
34
|
+
register_observer,
|
|
35
|
+
)
|
|
36
|
+
from neural_compressor.torch.algorithms.fp8_quant.model_configs import (
|
|
37
|
+
ModuleConfig,
|
|
38
|
+
ModuleInfo,
|
|
39
|
+
ModuleType,
|
|
40
|
+
ModuleExtraConfig
|
|
41
|
+
)
|
|
42
|
+
from neural_compressor.torch.algorithms.fp8_quant.save_load import save, load
|
|
@@ -23,7 +23,15 @@ import torch
|
|
|
23
23
|
from .._quant_common.helper_modules import *
|
|
24
24
|
from .._quant_common.quant_config import get_hqt_config
|
|
25
25
|
from ..utils.logger import logger
|
|
26
|
-
|
|
26
|
+
from neural_compressor.torch.algorithms.fp8_quant.model_configs import (
|
|
27
|
+
ModuleInfo,
|
|
28
|
+
ModuleConfig,
|
|
29
|
+
ModuleType,
|
|
30
|
+
ModuleExtraConfig,
|
|
31
|
+
get_patched_module_table,
|
|
32
|
+
get_patched_module_type_table,
|
|
33
|
+
)
|
|
34
|
+
from neural_compressor.torch.utils.auto_accelerator import auto_detect_accelerator
|
|
27
35
|
deepspeed_exists = False
|
|
28
36
|
if importlib.util.find_spec("deepspeed"): # check if deepspeed is installed
|
|
29
37
|
deepspeed_exists = True
|
|
@@ -31,38 +39,7 @@ if importlib.util.find_spec("deepspeed"): # check if deepspeed is installed
|
|
|
31
39
|
UNMEASURED_MODELS = "UnmeasuredModels"
|
|
32
40
|
|
|
33
41
|
|
|
34
|
-
|
|
35
|
-
def __init__(self, type, patched_module, should_measure=True):
|
|
36
|
-
self.type = type
|
|
37
|
-
self.patched_module = patched_module
|
|
38
|
-
self.should_measure = should_measure
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
class ModuleConfig:
|
|
42
|
-
def __init__(self, inputs=(None,), outputs=(None,), params=None):
|
|
43
|
-
self.inputs = inputs
|
|
44
|
-
self.outputs = outputs
|
|
45
|
-
self.params = params if params is not None else {}
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
class ModuleExtraConfig:
|
|
49
|
-
def __init__(self, inputs=(None,), outputs=(None,), params=None, scale=None, config_params=None):
|
|
50
|
-
self.inputs = inputs
|
|
51
|
-
self.outputs = outputs
|
|
52
|
-
self.params = params if params is not None else {}
|
|
53
|
-
self.scale = scale
|
|
54
|
-
self.config_params = config_params if config_params is not None else {}
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
class ModuleType:
|
|
58
|
-
def __init__(self, num_inputs, param_names, num_outputs, required_output):
|
|
59
|
-
self.num_inputs = num_inputs
|
|
60
|
-
self.param_names = param_names
|
|
61
|
-
self.num_outputs = num_outputs
|
|
62
|
-
self.required_output = required_output
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
mod_types = {
|
|
42
|
+
_mod_types = {
|
|
66
43
|
"linear": ModuleType(1, ["weight"], 1, False),
|
|
67
44
|
"matmul": ModuleType(2, [], 1, False),
|
|
68
45
|
"kv_cache": ModuleType(1, [], 1, False),
|
|
@@ -110,7 +87,7 @@ def save_file(model, d, source_format, fname, mode):
|
|
|
110
87
|
config = get_hqt_config(model)
|
|
111
88
|
logger.debug("Saving %s file: %s", mode, fname)
|
|
112
89
|
ext = os.path.splitext(fname)[1]
|
|
113
|
-
target_format = file_functions[ext][
|
|
90
|
+
target_format = file_functions[ext]['format']
|
|
114
91
|
dc = rec_fn(d, format_functions[(source_format, target_format)])
|
|
115
92
|
df = {
|
|
116
93
|
"GlobalRank": config.cfg["global_rank"],
|
|
@@ -119,7 +96,7 @@ def save_file(model, d, source_format, fname, mode):
|
|
|
119
96
|
"Nodes": dc,
|
|
120
97
|
}
|
|
121
98
|
try:
|
|
122
|
-
file_functions[ext][
|
|
99
|
+
file_functions[ext]['save'](df, fname)
|
|
123
100
|
except:
|
|
124
101
|
pass
|
|
125
102
|
|
|
@@ -127,10 +104,10 @@ def save_file(model, d, source_format, fname, mode):
|
|
|
127
104
|
def load_file(fname, target_format, fail_on_file_not_exist):
|
|
128
105
|
logger.debug("Loading file: %s", fname)
|
|
129
106
|
ext = os.path.splitext(fname)[1]
|
|
130
|
-
source_format = file_functions[ext][
|
|
107
|
+
source_format = file_functions[ext]['format']
|
|
131
108
|
d = {}
|
|
132
109
|
if os.path.isfile(fname):
|
|
133
|
-
d = file_functions[ext][
|
|
110
|
+
d = file_functions[ext]['load'](fname)
|
|
134
111
|
elif fail_on_file_not_exist:
|
|
135
112
|
raise FileNotFoundError(f"Failed to load file {fname}")
|
|
136
113
|
if "Nodes" in d:
|
|
@@ -190,17 +167,17 @@ def load_scales(fname, target_format):
|
|
|
190
167
|
return d
|
|
191
168
|
|
|
192
169
|
|
|
193
|
-
def convert_scales_to_tensors_dict(scales_obj, scales_file_format, hp_dtype):
|
|
170
|
+
def convert_scales_to_tensors_dict(scales_obj, scales_file_format, hp_dtype, device="hpu"):
|
|
194
171
|
scales_temp = {k: scales_obj[k].__dict__ for k in scales_obj}
|
|
195
172
|
scales_temp = format_functions_rec((scales_file_format, torch.Tensor))(scales_temp)
|
|
196
|
-
scales_temp = rec_fn(scales_temp, lambda x: x.to(dtype=hp_dtype, device=
|
|
173
|
+
scales_temp = rec_fn(scales_temp, lambda x: x.to(dtype=hp_dtype, device=device))
|
|
197
174
|
scales = {k: ModuleConfig(**scales_temp[k]) for k in scales_temp}
|
|
198
175
|
return scales
|
|
199
176
|
|
|
200
177
|
|
|
201
178
|
file_functions = {
|
|
202
|
-
".json": {
|
|
203
|
-
".npz": {
|
|
179
|
+
".json": {'format': list, 'save': save_json, 'load': load_json},
|
|
180
|
+
".npz": {'format': np.ndarray, 'save': save_npz, 'load': load_npz}
|
|
204
181
|
}
|
|
205
182
|
|
|
206
183
|
format_functions = {
|
|
@@ -219,7 +196,7 @@ format_functions = {
|
|
|
219
196
|
|
|
220
197
|
format_functions_rec = lambda k: functools.partial(rec_fn, fn=format_functions[k])
|
|
221
198
|
|
|
222
|
-
|
|
199
|
+
_mod_default_dict = {
|
|
223
200
|
"Matmul": ModuleInfo("matmul", PatchedMatmul),
|
|
224
201
|
"Linear": ModuleInfo("linear", PatchedLinear),
|
|
225
202
|
"RowParallelLinear": ModuleInfo("linear", PatchedRowParallelLinear),
|
|
@@ -241,7 +218,7 @@ mod_default_dict = {
|
|
|
241
218
|
|
|
242
219
|
|
|
243
220
|
if deepspeed_exists:
|
|
244
|
-
|
|
221
|
+
_mod_default_dict.update(
|
|
245
222
|
{
|
|
246
223
|
"LinearLayer": ModuleInfo("linear", PatchedLinear),
|
|
247
224
|
"LinearAllreduce": ModuleInfo("linear", PatchedLinearAllReduce),
|
|
@@ -250,6 +227,25 @@ if deepspeed_exists:
|
|
|
250
227
|
}
|
|
251
228
|
)
|
|
252
229
|
|
|
230
|
+
@functools.lru_cache(maxsize=None)
|
|
231
|
+
def _import_hpu_modules():
|
|
232
|
+
from neural_compressor.torch.algorithms.fp8_quant.patched_module_base import (
|
|
233
|
+
PATCHED_MODULE_TABLE, PATCHED_MODULE_TYPES_TABLE
|
|
234
|
+
)
|
|
235
|
+
cur_accelerator = auto_detect_accelerator()
|
|
236
|
+
if not cur_accelerator.current_device_name().startswith("hpu"):
|
|
237
|
+
return
|
|
238
|
+
PATCHED_MODULE_TABLE["hpu"].update(_mod_default_dict)
|
|
239
|
+
PATCHED_MODULE_TYPES_TABLE["hpu"].update(_mod_types)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
_import_hpu_modules()
|
|
243
|
+
|
|
244
|
+
mod_default_dict = get_patched_module_table()
|
|
245
|
+
mod_types = get_patched_module_type_table()
|
|
246
|
+
|
|
247
|
+
def get_white_list():
|
|
248
|
+
return list(mod_default_dict.keys())
|
|
253
249
|
|
|
254
250
|
class ModInstInfo:
|
|
255
251
|
def __init__(self, name, parent):
|
|
@@ -267,3 +263,7 @@ def generate_model_info(model):
|
|
|
267
263
|
create_mod_info_recursion(mod)
|
|
268
264
|
|
|
269
265
|
create_mod_info_recursion(model)
|
|
266
|
+
|
|
267
|
+
def get_device_type_for_scales(mod):
|
|
268
|
+
config = get_hqt_config(mod).cfg
|
|
269
|
+
return config["device_for_scales"]
|
|
@@ -12,12 +12,13 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import torch
|
|
15
16
|
import habana_frameworks.torch.core as htcore
|
|
16
17
|
import habana_frameworks.torch.utils.experimental as htexp
|
|
17
|
-
import torch
|
|
18
|
-
|
|
19
18
|
from .common import ModuleConfig
|
|
20
|
-
from .quant_dequant import
|
|
19
|
+
from .quant_dequant import cast_to_fp8_fcn, cast_fcn, descale_fcn, scale_fcn
|
|
20
|
+
from neural_compressor.torch.utils.auto_accelerator import auto_detect_accelerator
|
|
21
|
+
cur_accelerator = auto_detect_accelerator()
|
|
21
22
|
|
|
22
23
|
GAUDI2 = htexp.synDeviceType.synDeviceGaudi2
|
|
23
24
|
GAUDI3 = htexp.synDeviceType.synDeviceGaudi3
|
|
@@ -116,9 +117,9 @@ def scale_to_pow2(scale):
|
|
|
116
117
|
# for Gaudi2 the range is 16^-2..16^1 so we change 2 with 16 and remember that:
|
|
117
118
|
# 16 = 2^4, log16(m)=log2(m)/log2(16)=log2(m)/4, and we get:
|
|
118
119
|
# we choose s=16^ciel(log16(m))=2^4^ciel(log2(m)/4)=2^(4*ciel(log2(m)/4))=2^(ciel(log2(m)/4)*4)
|
|
119
|
-
def scale_to_pow2_hw(scale,
|
|
120
|
+
def scale_to_pow2_hw(scale, device_for_scales):
|
|
120
121
|
scale_pow2 = scale_to_pow2(scale)
|
|
121
|
-
min_scale, max_scale, scale_factor = FP8_143_SCALES_TRAITS[
|
|
122
|
+
min_scale, max_scale, scale_factor = FP8_143_SCALES_TRAITS[device_for_scales]
|
|
122
123
|
scale_pow2_hw = torch.minimum(
|
|
123
124
|
torch.maximum(
|
|
124
125
|
2 ** (torch.ceil(torch.log2(scale_pow2) / scale_factor) * scale_factor),
|
|
@@ -142,13 +143,13 @@ def mmse_scale_multi(x, ref_scale, scales, lp_dtype, hp_dtype):
|
|
|
142
143
|
xscales = rs * sv
|
|
143
144
|
y = scale_fcn(x, xscales)
|
|
144
145
|
y = cast_to_fp8_fcn(y, lp_dtype)
|
|
145
|
-
|
|
146
|
+
cur_accelerator.synchronize() # we are measuring the error so we want to avoid fusion of the converts
|
|
146
147
|
y = cast_fcn(y, hp_dtype)
|
|
147
148
|
y = descale_fcn(y, xscales)
|
|
148
149
|
err = torch.sum((x - y) ** 2, dim=sum_axis)
|
|
149
150
|
opt_scale = torch.where(err < opt_err, sv, opt_scale)
|
|
150
151
|
opt_err = torch.where(err < opt_err, err, opt_err)
|
|
151
|
-
|
|
152
|
+
cur_accelerator.synchronize()
|
|
152
153
|
return opt_scale * ref_scale
|
|
153
154
|
|
|
154
155
|
|
|
@@ -160,13 +161,13 @@ def mmse_scale(x, scales, lp_dtype, hp_dtype):
|
|
|
160
161
|
for s in scales:
|
|
161
162
|
y = scale_fcn(x, s)
|
|
162
163
|
y = cast_to_fp8_fcn(y, lp_dtype)
|
|
163
|
-
|
|
164
|
+
cur_accelerator.synchronize() # we are measuring the error so we want to avoid fusion of the converts
|
|
164
165
|
y = cast_fcn(y, hp_dtype)
|
|
165
166
|
y = descale_fcn(y, s)
|
|
166
167
|
err = torch.norm(x - y)
|
|
167
168
|
opt_scale = torch.where(err <= opt_err, s, opt_scale)
|
|
168
169
|
opt_err = torch.where(err <= opt_err, err, opt_err)
|
|
169
|
-
|
|
170
|
+
cur_accelerator.synchronize()
|
|
170
171
|
return opt_scale
|
|
171
172
|
|
|
172
173
|
|