neural-compressor 2.4__tar.gz → 2.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {neural_compressor-2.4 → neural_compressor-2.5}/PKG-INFO +76 -24
- {neural_compressor-2.4 → neural_compressor-2.5}/README.md +70 -18
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/keras.py +8 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/mxnet_utils/__init__.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/mxnet_utils/util.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/onnxrt.py +51 -32
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/onnxrt_cuda.yaml +21 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/calibration.py +135 -39
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/split.py +4 -1
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/quantizer.py +82 -57
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/smooth_quant.py +11 -11
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/util.py +32 -2
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/weight_only.py +81 -41
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/pytorch.py +199 -99
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/pytorch_cpu.yaml +1 -1
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_converter.py +10 -6
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dummy_biasadd.py +5 -3
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_requantize.py +35 -21
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_requantize.py +7 -3
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_base.py +5 -3
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/smooth_quant_calibration.py +1 -1
- neural_compressor-2.5/neural_compressor/adaptor/torch_utils/auto_round.py +25 -0
- neural_compressor-2.5/neural_compressor/adaptor/torch_utils/autoround/__init__.py +15 -0
- neural_compressor-2.5/neural_compressor/adaptor/torch_utils/autoround/autoround.py +1718 -0
- neural_compressor-2.5/neural_compressor/adaptor/torch_utils/autoround/export.py +99 -0
- neural_compressor-2.5/neural_compressor/adaptor/torch_utils/autoround/model_wrapper.py +346 -0
- neural_compressor-2.5/neural_compressor/adaptor/torch_utils/autoround/sign_sgd.py +389 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/awq.py +3 -4
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/gptq.py +39 -7
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/layer_wise_quant/quantize.py +1 -1
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/model_wrapper.py +47 -45
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/teq.py +6 -3
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/util.py +13 -15
- neural_compressor-2.5/neural_compressor/adaptor/torch_utils/waq/__init__.py +17 -0
- neural_compressor-2.5/neural_compressor/adaptor/torch_utils/waq/auto_alpha.py +665 -0
- neural_compressor-2.5/neural_compressor/adaptor/torch_utils/waq/calibration.py +114 -0
- neural_compressor-2.5/neural_compressor/adaptor/torch_utils/waq/graph_trace.py +232 -0
- neural_compressor-2.5/neural_compressor/adaptor/torch_utils/waq/smooth_quant.py +595 -0
- neural_compressor-2.5/neural_compressor/adaptor/torch_utils/waq/utils.py +428 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/weight_only.py +229 -98
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/distillation/__init__.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/hpo/__init__.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/hpo/sa_optimizer.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/__init__.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/criteria.py +22 -10
- neural_compressor-2.5/neural_compressor/compression/pruner/dsnot.py +354 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/model_slim/__init__.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/model_slim/auto_slim.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/model_slim/pattern_analyzer.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/model_slim/weight_slim.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/patterns/__init__.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/patterns/base.py +18 -7
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/patterns/mha.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/patterns/ninm.py +40 -14
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/patterns/nxm.py +49 -19
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/pruners/__init__.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/pruners/base.py +8 -3
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/pruners/basic.py +6 -1
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/pruners/block_mask.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/pruners/mha.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/pruners/pattern_lock.py +5 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/pruners/progressive.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/pruners/retrain_free.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/pruning.py +26 -21
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/regs.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/schedulers.py +2 -3
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/tf_criteria.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/utils.py +105 -33
- neural_compressor-2.5/neural_compressor/compression/pruner/wanda/__init__.py +20 -0
- neural_compressor-2.5/neural_compressor/compression/pruner/wanda/prune.py +242 -0
- neural_compressor-2.5/neural_compressor/compression/pruner/wanda/utils.py +62 -0
- neural_compressor-2.5/neural_compressor/compression/pruner/wanda/wrapper.py +75 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/config.py +9 -2
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/datasets/dataset.py +3 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/common/__init__.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/compression/__init__.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/contrib/strategy/tpe.py +5 -3
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/datasets/dataset.py +3 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/nas/__init__.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pruning.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pruning_recipes/__init__.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pruning_recipes/patterns/__init__.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pruning_recipes/patterns/pattern.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pruning_recipes/patterns/tile_pattern.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pruning_v2.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pytorch_pruner/__init__.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pytorch_pruner/logger.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pytorch_pruner/patterns.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pytorch_pruner/prune_utils.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pytorch_pruner/pruner.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pytorch_pruner/pruning.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pytorch_pruner/scheduler.py +1 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/strategy.py +15 -9
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/mix_precision.py +2 -2
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/model/model.py +2 -2
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/model/onnx_model.py +2 -2
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/model/tensorflow_model.py +138 -45
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/model/torch_model.py +16 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/objective.py +15 -9
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/quantization.py +2 -2
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/auto.py +1 -1
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/conservative.py +4 -4
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/strategy.py +44 -12
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/utils/tuning_space.py +0 -8
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/training.py +2 -2
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/utils/load_huggingface.py +36 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/utils/pytorch.py +31 -19
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/utils/utility.py +6 -2
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/version.py +1 -1
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor.egg-info/PKG-INFO +77 -25
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor.egg-info/SOURCES.txt +17 -1
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor.egg-info/requires.txt +4 -4
- {neural_compressor-2.4 → neural_compressor-2.5}/setup.py +49 -2
- {neural_compressor-2.4 → neural_compressor-2.5}/third-party-programs.txt +2 -0
- neural_compressor-2.4/neural_compressor/adaptor/torch_utils/smooth_quant.py +0 -1607
- {neural_compressor-2.4 → neural_compressor-2.5}/LICENSE +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/__main__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/intel_extension_for_transformers.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/keras_inc.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_bf16.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_bf16_channels_last.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_bf16_ipex.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_bf16_ipex_channels_last.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_fp32_channels_last.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_fp32_ipex.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_fp32_ipex_channels_last.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_gpu_to_cpu.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_int8.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_jit_bf16.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_jit_bf16_channels_last.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_jit_bf16_ipex.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_jit_bf16_ipex_channels_last.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_jit_fp32.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_jit_fp32_channels_last.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_jit_fp32_ipex.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_jit_fp32_ipex_channels_last.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_onnxruntime_fp32.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_onnxruntime_int8_qlinear.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_openvino_fp32.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_openvino_int8.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/onnx_inc_dynamic_quant.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/onnx_inc_static_quant_qdq.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/onnx_inc_static_quant_qlinear.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_aliblade.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_benchmark.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_channels_last.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_inc_bf16.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_inc_dynamic_quant.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_inc_dynamic_quant_fp8.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_inc_huggingface_optimum_dynamic.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_inc_huggingface_optimum_static.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_inc_static_quant_fx.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_inc_static_quant_fx_fp8.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_inc_static_quant_ipex.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_inc_static_quant_ipex_xpu.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_ipex_bf16.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_ipex_fp32.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_ipex_int8_dynamic_quant.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_ipex_int8_static_quant.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_jit_script.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_jit_script_ofi.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_jit_trace.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_jit_trace_ofi.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_mixed_precision_cpu.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_mixed_precision_cuda.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_mixed_precision_intel_gpu.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_torchdynamo_jit_script.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_torchdynamo_jit_script_ofi.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_torchdynamo_jit_trace.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_torchdynamo_jit_trace_ofi.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/template.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/autoinc/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/autoinc/autoinc_harness.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/autoinc/calib_dataloader.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/autoinc/domain.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/autoinc/eval_func.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/pytorch/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/pytorch/batch_size.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/pytorch/change_trainer_to_nlptrainer.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/pytorch/cuda_to_cpu.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/pytorch/dummy_dataloader.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/pytorch/harness.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/pytorch/lightning.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/pytorch/reclaim_inference_transformers_trainer.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/pytorch/reclaim_inputs.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/tensorflow/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/tensorflow/amp.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/tensorflow/inc.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/transform.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/globals.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/graphers/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/graphers/code_line.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/graphers/function.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/graphers/model.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/graphers/preloads/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/graphers/preloads/transformers.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/interface.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/launcher.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/utils/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/utils/common.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/utils/cpu_info.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/utils/device.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/utils/handle_user_input.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/utils/line_operation.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/utils/numa_launcher.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/utils/pdf_report.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/version.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/adaptor.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/keras.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/keras_utils/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/keras_utils/conv2d.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/keras_utils/dense.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/keras_utils/depthwise_conv2d.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/keras_utils/pool2d.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/keras_utils/quantizer.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/keras_utils/separable_conv2d.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/mxnet.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/mxnet.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/onnxrt.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/onnxrt_dml.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/onnxrt_dnnl.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/onnxrt_trt.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/calibrator.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/activation.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/argmax.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/attention.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/binary_op.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/concat.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/conv.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/direct_q8.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/embed_layernorm.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/gather.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/gavgpool.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/gemm.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/lstm.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/matmul.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/maxpool.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/norm.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/ops.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/pad.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/pooling.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/reduce.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/resize.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/unary_op.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/pytorch_gpu.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/pytorch_ipex.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/query.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tensorflow.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tensorflow.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tensorflow_itex.yaml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_converter_without_calib.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/bf16_convert.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/dequantize_cast_optimizer.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_add_to_biasadd.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_layout.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_leakyrelu.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_nan_to_random.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_placeholder_to_const.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dilated_contraction.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/expanddims_optimizer.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fetch_weight_from_reshape.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fold_batch_norm.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fold_constant.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_biasadd_add.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_column_wise_mul.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_conv_with_math.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_decomposed_bn.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_decomposed_in.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_gelu.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_layer_norm.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_conv.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_fp32_conv.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_reshape_transpose.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/graph_cse_optimizer.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/grappler_pass.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/insert_print_node.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/move_squeeze_after_relu.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/remove_training_nodes.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/rename_batch_norm.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/split_shared_input.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/strip_equivalent_nodes.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/strip_unused_nodes.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/switch_optimizer.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/graph_base.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_fake_quant.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_value.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_value_without_calib.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_redundant_dequantize.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_redundant_dequantize.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/meta_op_optimizer.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/post_hostconst_converter.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/post_quantized_op_cse.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/rnn_convert.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/scale_propagation.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_node.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_schema.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/insert_qdq_pattern.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/merge_duplicated_qdq.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/share_qdq_y_pattern.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_util.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/fake_quantize.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_config.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_helper.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/optimize_layer.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_add.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_base.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_bn.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_wrapper.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_bn.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_concatv2.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_conv.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_deconv.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_in.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_pooling.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/optimize_qdq.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_bn.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_concatv2.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_conv.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_for_intel_cpu.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_matmul.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_pooling.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph_common.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/smooth_quant_scaler.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/transform_graph/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/transform_graph/bias_correction.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/transform_graph/graph_transform_base.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/transform_graph/insert_logging.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/transform_graph/rerange_quantized_concat.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/util.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/bf16_convert.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/hawq_metric.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/layer_wise_quant/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/layer_wise_quant/modified_pickle.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/layer_wise_quant/torch_load.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/layer_wise_quant/utils.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/mixed_precision.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/pattern_detector.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/symbolic_trace.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/algorithm/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/algorithm/algorithm.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/algorithm/fast_bias_correction.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/algorithm/smooth_quant.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/algorithm/weight_correction.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/benchmark.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/callbacks.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/distillation/criterions.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/distillation/optimizers.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/distillation/utility.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/hpo/search_algorithms.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/hpo/search_space.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/pruners/sparse_gpt.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/conf/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/conf/config.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/conf/dotdict.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/conf/pythonic_config.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/contrib/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/contrib/strategy/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/contrib/strategy/sigopt.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/contrib/strategy/tpe.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/dataloaders/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/dataloaders/base_dataloader.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/dataloaders/dataloader.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/dataloaders/default_dataloader.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/dataloaders/fetcher.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/dataloaders/mxnet_dataloader.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/dataloaders/onnxrt_dataloader.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/dataloaders/pytorch_dataloader.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/dataloaders/sampler.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/dataloaders/tensorflow_dataloader.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/datasets/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/datasets/bert_dataset.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/datasets/coco_dataset.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/datasets/dummy_dataset.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/datasets/dummy_dataset_v2.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/datasets/imagenet_dataset.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/datasets/style_transfer_dataset.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/filters/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/filters/coco_filter.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/filters/filter.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/transforms/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/transforms/coco_transform.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/transforms/imagenet_transform.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/transforms/postprocess.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/transforms/tokenization.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/transforms/transform.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/benchmark.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/common/criterion.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/common/dataloader.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/common/metric.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/common/model.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/common/optimizer.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/common/postprocess.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/common/torch_utils.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/component.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/contrib/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/contrib/strategy/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/contrib/strategy/sigopt.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/dataloaders/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/dataloaders/base_dataloader.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/dataloaders/dataloader.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/dataloaders/default_dataloader.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/dataloaders/fetcher.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/dataloaders/mxnet_dataloader.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/dataloaders/onnxrt_dataloader.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/dataloaders/pytorch_dataloader.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/dataloaders/sampler.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/dataloaders/tensorflow_dataloader.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/datasets/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/datasets/bert_dataset.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/datasets/coco_dataset.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/datasets/dummy_dataset.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/datasets/dummy_dataset_v2.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/datasets/imagenet_dataset.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/datasets/style_transfer_dataset.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/filters/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/filters/coco_filter.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/filters/filter.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/transforms/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/transforms/imagenet_transform.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/transforms/tokenization.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/transforms/transform.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/distillation.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/export/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/export/qlinear2qdq.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/export/tf2onnx.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/export/torch2onnx.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/graph_optimization.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/metric/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/metric/bleu.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/metric/bleu_util.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/metric/coco_label_map.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/metric/coco_tools.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/metric/evaluate_squad.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/metric/f1.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/metric/metric.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/mixed_precision.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/model_conversion.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/nas/basic_nas.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/nas/dynas.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/nas/nas.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/nas/nas_utils.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/nas/search_algorithms.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pruner_legacy/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pruner_legacy/gradient_sensitivity.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pruner_legacy/group_lasso.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pruner_legacy/magnitude.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pruner_legacy/pattern_lock.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pruner_legacy/pruner.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/quantization.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/scheduler.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/auto_mixed_precision.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/basic.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/bayesian.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/exhaustive.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/mse.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/mse_v2.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/random.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/utils/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/utils/constant.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/utils/tuning_sampler.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/utils/tuning_space.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/utils/tuning_structs.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/utils/utility.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/metric/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/metric/bleu.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/metric/bleu_util.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/metric/coco_label_map.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/metric/coco_tools.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/metric/evaluate_squad.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/metric/f1.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/metric/metric.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/model/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/model/base_model.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/model/keras_model.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/model/mxnet_model.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/model/nets_factory.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/parser/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/parser/factory.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/parser/onnx_parser/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/parser/onnx_parser/factory.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/parser/onnx_parser/parser.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/parser/parser.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/parser/result.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/parser/tensorflow_parser/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/parser/tensorflow_parser/factory.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/parser/tensorflow_parser/parser.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/profiler/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/profiler/factory.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/profiler/onnxrt_profiler/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/profiler/onnxrt_profiler/factory.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/profiler/onnxrt_profiler/profiler.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/profiler/onnxrt_profiler/utils.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/profiler/profiler.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/profiler/tensorflow_profiler/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/profiler/tensorflow_profiler/factory.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/profiler/tensorflow_profiler/profiler.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/profiler/tensorflow_profiler/utils.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/auto_mixed_precision.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/basic.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/bayesian.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/exhaustive.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/hawq_v2.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/mse.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/mse_v2.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/random.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/utils/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/utils/constant.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/utils/tuning_sampler.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/utils/tuning_structs.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/utils/utility.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/utils/__init__.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/utils/collect_layer_histogram.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/utils/constant.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/utils/create_obj_from_config.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/utils/kl_divergence.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/utils/logger.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/utils/neural_insights_utils.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/utils/options.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/utils/weights_details.py +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor.egg-info/dependency_links.txt +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor.egg-info/top_level.txt +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/pyproject.toml +0 -0
- {neural_compressor-2.4 → neural_compressor-2.5}/setup.cfg +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: neural_compressor
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.5
|
|
4
4
|
Summary: Repository of Intel® Neural Compressor
|
|
5
5
|
Home-page: https://github.com/intel/neural-compressor
|
|
6
|
-
Author: Intel
|
|
6
|
+
Author: Intel AIPT Team
|
|
7
7
|
Author-email: feng.tian@intel.com, haihao.shen@intel.com, suyue.chen@intel.com
|
|
8
8
|
License: Apache 2.0
|
|
9
9
|
Keywords: quantization,auto-tuning,post-training static quantization,post-training dynamic quantization,quantization-aware training
|
|
@@ -16,7 +16,7 @@ Description-Content-Type: text/markdown
|
|
|
16
16
|
License-File: LICENSE
|
|
17
17
|
License-File: third-party-programs.txt
|
|
18
18
|
Requires-Dist: deprecated>=1.2.13
|
|
19
|
-
Requires-Dist: numpy
|
|
19
|
+
Requires-Dist: numpy<2.0
|
|
20
20
|
Requires-Dist: opencv-python-headless
|
|
21
21
|
Requires-Dist: pandas
|
|
22
22
|
Requires-Dist: Pillow
|
|
@@ -30,11 +30,11 @@ Requires-Dist: requests
|
|
|
30
30
|
Requires-Dist: schema
|
|
31
31
|
Requires-Dist: scikit-learn
|
|
32
32
|
Provides-Extra: pt
|
|
33
|
-
Requires-Dist: neural_compressor_3x_pt==2.
|
|
33
|
+
Requires-Dist: neural_compressor_3x_pt==2.5; extra == "pt"
|
|
34
34
|
Provides-Extra: tf
|
|
35
|
-
Requires-Dist: neural_compressor_3x_tf==2.
|
|
35
|
+
Requires-Dist: neural_compressor_3x_tf==2.5; extra == "tf"
|
|
36
36
|
Provides-Extra: ort
|
|
37
|
-
Requires-Dist: neural_compressor_3x_ort==2.
|
|
37
|
+
Requires-Dist: neural_compressor_3x_ort==2.5; extra == "ort"
|
|
38
38
|
|
|
39
39
|
<div align="center">
|
|
40
40
|
|
|
@@ -43,12 +43,12 @@ Intel® Neural Compressor
|
|
|
43
43
|
<h3> An open-source Python library supporting popular model compression techniques on all mainstream deep learning frameworks (TensorFlow, PyTorch, ONNX Runtime, and MXNet)</h3>
|
|
44
44
|
|
|
45
45
|
[](https://github.com/intel/neural-compressor)
|
|
46
|
-
[](https://github.com/intel/neural-compressor/releases)
|
|
47
47
|
[](https://github.com/intel/neural-compressor/blob/master/LICENSE)
|
|
48
48
|
[](https://github.com/intel/neural-compressor)
|
|
49
49
|
[](https://pepy.tech/project/neural-compressor)
|
|
50
50
|
|
|
51
|
-
[Architecture](./docs/source/design.md#architecture) | [Workflow](./docs/source/design.md#workflow) | [
|
|
51
|
+
[Architecture](./docs/source/design.md#architecture) | [Workflow](./docs/source/design.md#workflow) | [LLMs Recipes](./docs/source/llm_recipes.md) | [Results](./docs/source/validated_model_list.md) | [Documentations](https://intel.github.io/neural-compressor)
|
|
52
52
|
|
|
53
53
|
---
|
|
54
54
|
<div align="left">
|
|
@@ -63,6 +63,9 @@ In particular, the tool provides the key features, typical examples, and open co
|
|
|
63
63
|
|
|
64
64
|
* Collaborate with cloud marketplaces such as [Google Cloud Platform](https://console.cloud.google.com/marketplace/product/bitnami-launchpad/inc-tensorflow-intel?project=verdant-sensor-286207), [Amazon Web Services](https://aws.amazon.com/marketplace/pp/prodview-yjyh2xmggbmga#pdp-support), and [Azure](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/bitnami.inc-tensorflow-intel), software platforms such as [Alibaba Cloud](https://www.intel.com/content/www/us/en/developer/articles/technical/quantize-ai-by-oneapi-analytics-on-alibaba-cloud.html), [Tencent TACO](https://new.qq.com/rain/a/20221202A00B9S00) and [Microsoft Olive](https://github.com/microsoft/Olive), and open AI ecosystem such as [Hugging Face](https://huggingface.co/blog/intel), [PyTorch](https://pytorch.org/tutorials/recipes/intel_neural_compressor_for_pytorch.html), [ONNX](https://github.com/onnx/models#models), [ONNX Runtime](https://github.com/microsoft/onnxruntime), and [Lightning AI](https://github.com/Lightning-AI/lightning/blob/master/docs/source-pytorch/advanced/post_training_quantization.rst)
|
|
65
65
|
|
|
66
|
+
## What's New
|
|
67
|
+
* [2024/03] A new SOTA approach [AutoRound](https://github.com/intel/auto-round) Weight-Only Quantization on [Intel Gaudi2 AI accelerator](https://habana.ai/products/gaudi2/) is available for LLMs.
|
|
68
|
+
|
|
66
69
|
## Installation
|
|
67
70
|
|
|
68
71
|
### Install from pypi
|
|
@@ -73,29 +76,77 @@ pip install neural-compressor
|
|
|
73
76
|
> More installation methods can be found at [Installation Guide](https://github.com/intel/neural-compressor/blob/master/docs/source/installation_guide.md). Please check out our [FAQ](https://github.com/intel/neural-compressor/blob/master/docs/source/faq.md) for more details.
|
|
74
77
|
|
|
75
78
|
## Getting Started
|
|
76
|
-
### Quantization with Python API
|
|
77
79
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
pip install neural-compressor
|
|
81
|
-
pip install tensorflow
|
|
82
|
-
# Prepare fp32 model
|
|
83
|
-
wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/mobilenet_v1_1.0_224_frozen.pb
|
|
80
|
+
Setting up the environment:
|
|
81
|
+
```bash
|
|
82
|
+
pip install "neural-compressor>=2.3" "transformers>=4.34.0" torch torchvision
|
|
84
83
|
```
|
|
84
|
+
After successfully installing these packages, try your first quantization program.
|
|
85
|
+
|
|
86
|
+
### Weight-Only Quantization (LLMs)
|
|
87
|
+
Following example code demonstrates Weight-Only Quantization on LLMs, it supports Intel CPU, Intel Gauid2 AI Accelerator, Nvidia GPU, best device will be selected automatically.
|
|
88
|
+
|
|
89
|
+
To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, please refer to following script for environment setup. More details can be found in [Gaudi Guide](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#launch-docker-image-that-was-built).
|
|
90
|
+
```bash
|
|
91
|
+
docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04//habanalabs/pytorch-installer-2.1.1:latest
|
|
92
|
+
|
|
93
|
+
# Check the container ID
|
|
94
|
+
docker ps
|
|
95
|
+
|
|
96
|
+
# Login into container
|
|
97
|
+
docker exec -it <container_id> bash
|
|
98
|
+
|
|
99
|
+
# Install the optimum-habana
|
|
100
|
+
pip install --upgrade-strategy eager optimum[habana]
|
|
101
|
+
|
|
102
|
+
# Install INC/auto_round
|
|
103
|
+
pip install neural-compressor auto_round
|
|
104
|
+
```
|
|
105
|
+
Run the example:
|
|
85
106
|
```python
|
|
86
|
-
from
|
|
107
|
+
from transformers import AutoModel, AutoTokenizer
|
|
108
|
+
|
|
87
109
|
from neural_compressor.config import PostTrainingQuantConfig
|
|
110
|
+
from neural_compressor.quantization import fit
|
|
111
|
+
from neural_compressor.adaptor.torch_utils.auto_round import get_dataloader
|
|
112
|
+
|
|
113
|
+
model_name = "EleutherAI/gpt-neo-125m"
|
|
114
|
+
float_model = AutoModel.from_pretrained(model_name)
|
|
115
|
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
|
116
|
+
dataloader = get_dataloader(tokenizer, seqlen=2048)
|
|
117
|
+
|
|
118
|
+
woq_conf = PostTrainingQuantConfig(
|
|
119
|
+
approach="weight_only",
|
|
120
|
+
op_type_dict={
|
|
121
|
+
".*": { # match all ops
|
|
122
|
+
"weight": {
|
|
123
|
+
"dtype": "int",
|
|
124
|
+
"bits": 4,
|
|
125
|
+
"algorithm": "AUTOROUND",
|
|
126
|
+
},
|
|
127
|
+
}
|
|
128
|
+
},
|
|
129
|
+
)
|
|
130
|
+
quantized_model = fit(model=float_model, conf=woq_conf, calib_dataloader=dataloader)
|
|
131
|
+
```
|
|
132
|
+
**Note:**
|
|
133
|
+
|
|
134
|
+
To try INT4 model inference, please directly use [Intel Extension for Transformers](https://github.com/intel/intel-extension-for-transformers), which leverages Intel Neural Compressor for model quantization.
|
|
88
135
|
|
|
89
|
-
|
|
90
|
-
dataloader = DataLoader(framework="tensorflow", dataset=dataset)
|
|
136
|
+
### Static Quantization (Non-LLMs)
|
|
91
137
|
|
|
138
|
+
```python
|
|
139
|
+
from torchvision import models
|
|
140
|
+
|
|
141
|
+
from neural_compressor.config import PostTrainingQuantConfig
|
|
142
|
+
from neural_compressor.data import DataLoader, Datasets
|
|
92
143
|
from neural_compressor.quantization import fit
|
|
93
144
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
)
|
|
145
|
+
float_model = models.resnet18()
|
|
146
|
+
dataset = Datasets("pytorch")["dummy"](shape=(1, 3, 224, 224))
|
|
147
|
+
calib_dataloader = DataLoader(framework="pytorch", dataset=dataset)
|
|
148
|
+
static_quant_conf = PostTrainingQuantConfig()
|
|
149
|
+
quantized_model = fit(model=float_model, conf=static_quant_conf, calib_dataloader=calib_dataloader)
|
|
99
150
|
```
|
|
100
151
|
|
|
101
152
|
## Documentation
|
|
@@ -110,8 +161,9 @@ q_model = fit(
|
|
|
110
161
|
<tr>
|
|
111
162
|
<td colspan="2" align="center"><a href="./docs/source/design.md#architecture">Architecture</a></td>
|
|
112
163
|
<td colspan="2" align="center"><a href="./docs/source/design.md#workflow">Workflow</a></td>
|
|
164
|
+
<td colspan="1" align="center"><a href="https://intel.github.io/neural-compressor/latest/docs/source/api-doc/apis.html">APIs</a></td>
|
|
165
|
+
<td colspan="1" align="center"><a href="./docs/source/llm_recipes.md">LLMs Recipes</a></td>
|
|
113
166
|
<td colspan="2" align="center"><a href="examples/README.md">Examples</a></td>
|
|
114
|
-
<td colspan="2" align="center"><a href="https://intel.github.io/neural-compressor/latest/docs/source/api-doc/apis.html">APIs</a></td>
|
|
115
167
|
</tr>
|
|
116
168
|
</tbody>
|
|
117
169
|
<thead>
|
|
@@ -5,12 +5,12 @@ Intel® Neural Compressor
|
|
|
5
5
|
<h3> An open-source Python library supporting popular model compression techniques on all mainstream deep learning frameworks (TensorFlow, PyTorch, ONNX Runtime, and MXNet)</h3>
|
|
6
6
|
|
|
7
7
|
[](https://github.com/intel/neural-compressor)
|
|
8
|
-
[](https://github.com/intel/neural-compressor/releases)
|
|
9
9
|
[](https://github.com/intel/neural-compressor/blob/master/LICENSE)
|
|
10
10
|
[](https://github.com/intel/neural-compressor)
|
|
11
11
|
[](https://pepy.tech/project/neural-compressor)
|
|
12
12
|
|
|
13
|
-
[Architecture](./docs/source/design.md#architecture) | [Workflow](./docs/source/design.md#workflow) | [
|
|
13
|
+
[Architecture](./docs/source/design.md#architecture) | [Workflow](./docs/source/design.md#workflow) | [LLMs Recipes](./docs/source/llm_recipes.md) | [Results](./docs/source/validated_model_list.md) | [Documentations](https://intel.github.io/neural-compressor)
|
|
14
14
|
|
|
15
15
|
---
|
|
16
16
|
<div align="left">
|
|
@@ -25,6 +25,9 @@ In particular, the tool provides the key features, typical examples, and open co
|
|
|
25
25
|
|
|
26
26
|
* Collaborate with cloud marketplaces such as [Google Cloud Platform](https://console.cloud.google.com/marketplace/product/bitnami-launchpad/inc-tensorflow-intel?project=verdant-sensor-286207), [Amazon Web Services](https://aws.amazon.com/marketplace/pp/prodview-yjyh2xmggbmga#pdp-support), and [Azure](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/bitnami.inc-tensorflow-intel), software platforms such as [Alibaba Cloud](https://www.intel.com/content/www/us/en/developer/articles/technical/quantize-ai-by-oneapi-analytics-on-alibaba-cloud.html), [Tencent TACO](https://new.qq.com/rain/a/20221202A00B9S00) and [Microsoft Olive](https://github.com/microsoft/Olive), and open AI ecosystem such as [Hugging Face](https://huggingface.co/blog/intel), [PyTorch](https://pytorch.org/tutorials/recipes/intel_neural_compressor_for_pytorch.html), [ONNX](https://github.com/onnx/models#models), [ONNX Runtime](https://github.com/microsoft/onnxruntime), and [Lightning AI](https://github.com/Lightning-AI/lightning/blob/master/docs/source-pytorch/advanced/post_training_quantization.rst)
|
|
27
27
|
|
|
28
|
+
## What's New
|
|
29
|
+
* [2024/03] A new SOTA approach [AutoRound](https://github.com/intel/auto-round) Weight-Only Quantization on [Intel Gaudi2 AI accelerator](https://habana.ai/products/gaudi2/) is available for LLMs.
|
|
30
|
+
|
|
28
31
|
## Installation
|
|
29
32
|
|
|
30
33
|
### Install from pypi
|
|
@@ -35,29 +38,77 @@ pip install neural-compressor
|
|
|
35
38
|
> More installation methods can be found at [Installation Guide](https://github.com/intel/neural-compressor/blob/master/docs/source/installation_guide.md). Please check out our [FAQ](https://github.com/intel/neural-compressor/blob/master/docs/source/faq.md) for more details.
|
|
36
39
|
|
|
37
40
|
## Getting Started
|
|
38
|
-
### Quantization with Python API
|
|
39
41
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
pip install neural-compressor
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
42
|
+
Setting up the environment:
|
|
43
|
+
```bash
|
|
44
|
+
pip install "neural-compressor>=2.3" "transformers>=4.34.0" torch torchvision
|
|
45
|
+
```
|
|
46
|
+
After successfully installing these packages, try your first quantization program.
|
|
47
|
+
|
|
48
|
+
### Weight-Only Quantization (LLMs)
|
|
49
|
+
Following example code demonstrates Weight-Only Quantization on LLMs, it supports Intel CPU, Intel Gauid2 AI Accelerator, Nvidia GPU, best device will be selected automatically.
|
|
50
|
+
|
|
51
|
+
To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, please refer to following script for environment setup. More details can be found in [Gaudi Guide](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#launch-docker-image-that-was-built).
|
|
52
|
+
```bash
|
|
53
|
+
docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04//habanalabs/pytorch-installer-2.1.1:latest
|
|
54
|
+
|
|
55
|
+
# Check the container ID
|
|
56
|
+
docker ps
|
|
57
|
+
|
|
58
|
+
# Login into container
|
|
59
|
+
docker exec -it <container_id> bash
|
|
60
|
+
|
|
61
|
+
# Install the optimum-habana
|
|
62
|
+
pip install --upgrade-strategy eager optimum[habana]
|
|
63
|
+
|
|
64
|
+
# Install INC/auto_round
|
|
65
|
+
pip install neural-compressor auto_round
|
|
46
66
|
```
|
|
67
|
+
Run the example:
|
|
47
68
|
```python
|
|
48
|
-
from
|
|
69
|
+
from transformers import AutoModel, AutoTokenizer
|
|
70
|
+
|
|
49
71
|
from neural_compressor.config import PostTrainingQuantConfig
|
|
72
|
+
from neural_compressor.quantization import fit
|
|
73
|
+
from neural_compressor.adaptor.torch_utils.auto_round import get_dataloader
|
|
74
|
+
|
|
75
|
+
model_name = "EleutherAI/gpt-neo-125m"
|
|
76
|
+
float_model = AutoModel.from_pretrained(model_name)
|
|
77
|
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
|
78
|
+
dataloader = get_dataloader(tokenizer, seqlen=2048)
|
|
79
|
+
|
|
80
|
+
woq_conf = PostTrainingQuantConfig(
|
|
81
|
+
approach="weight_only",
|
|
82
|
+
op_type_dict={
|
|
83
|
+
".*": { # match all ops
|
|
84
|
+
"weight": {
|
|
85
|
+
"dtype": "int",
|
|
86
|
+
"bits": 4,
|
|
87
|
+
"algorithm": "AUTOROUND",
|
|
88
|
+
},
|
|
89
|
+
}
|
|
90
|
+
},
|
|
91
|
+
)
|
|
92
|
+
quantized_model = fit(model=float_model, conf=woq_conf, calib_dataloader=dataloader)
|
|
93
|
+
```
|
|
94
|
+
**Note:**
|
|
95
|
+
|
|
96
|
+
To try INT4 model inference, please directly use [Intel Extension for Transformers](https://github.com/intel/intel-extension-for-transformers), which leverages Intel Neural Compressor for model quantization.
|
|
50
97
|
|
|
51
|
-
|
|
52
|
-
dataloader = DataLoader(framework="tensorflow", dataset=dataset)
|
|
98
|
+
### Static Quantization (Non-LLMs)
|
|
53
99
|
|
|
100
|
+
```python
|
|
101
|
+
from torchvision import models
|
|
102
|
+
|
|
103
|
+
from neural_compressor.config import PostTrainingQuantConfig
|
|
104
|
+
from neural_compressor.data import DataLoader, Datasets
|
|
54
105
|
from neural_compressor.quantization import fit
|
|
55
106
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
)
|
|
107
|
+
float_model = models.resnet18()
|
|
108
|
+
dataset = Datasets("pytorch")["dummy"](shape=(1, 3, 224, 224))
|
|
109
|
+
calib_dataloader = DataLoader(framework="pytorch", dataset=dataset)
|
|
110
|
+
static_quant_conf = PostTrainingQuantConfig()
|
|
111
|
+
quantized_model = fit(model=float_model, conf=static_quant_conf, calib_dataloader=calib_dataloader)
|
|
61
112
|
```
|
|
62
113
|
|
|
63
114
|
## Documentation
|
|
@@ -72,8 +123,9 @@ q_model = fit(
|
|
|
72
123
|
<tr>
|
|
73
124
|
<td colspan="2" align="center"><a href="./docs/source/design.md#architecture">Architecture</a></td>
|
|
74
125
|
<td colspan="2" align="center"><a href="./docs/source/design.md#workflow">Workflow</a></td>
|
|
126
|
+
<td colspan="1" align="center"><a href="https://intel.github.io/neural-compressor/latest/docs/source/api-doc/apis.html">APIs</a></td>
|
|
127
|
+
<td colspan="1" align="center"><a href="./docs/source/llm_recipes.md">LLMs Recipes</a></td>
|
|
75
128
|
<td colspan="2" align="center"><a href="examples/README.md">Examples</a></td>
|
|
76
|
-
<td colspan="2" align="center"><a href="https://intel.github.io/neural-compressor/latest/docs/source/api-doc/apis.html">APIs</a></td>
|
|
77
129
|
</tr>
|
|
78
130
|
</tbody>
|
|
79
131
|
<thead>
|
|
@@ -42,6 +42,7 @@ from .adaptor import Adaptor, adaptor_registry
|
|
|
42
42
|
from .query import QueryBackendCapability
|
|
43
43
|
|
|
44
44
|
tf = LazyImport("tensorflow")
|
|
45
|
+
keras = LazyImport("keras")
|
|
45
46
|
|
|
46
47
|
|
|
47
48
|
def _add_supported_quantized_objects(custom_objects):
|
|
@@ -519,6 +520,13 @@ class KerasAdaptor(Adaptor):
|
|
|
519
520
|
def _restore_model_from_json(self, json_model):
|
|
520
521
|
from tensorflow.keras.models import model_from_json
|
|
521
522
|
|
|
523
|
+
from neural_compressor.utils.utility import version1_gte_version2
|
|
524
|
+
|
|
525
|
+
if version1_gte_version2(keras.__version__, "2.13.1"):
|
|
526
|
+
from keras.src.saving import serialization_lib
|
|
527
|
+
|
|
528
|
+
serialization_lib.enable_unsafe_deserialization()
|
|
529
|
+
|
|
522
530
|
custom_objects = {}
|
|
523
531
|
# We need to keep a dictionary of custom objects as our quantized library
|
|
524
532
|
# is not recognized by keras.
|
|
@@ -417,15 +417,21 @@ class ONNXRUNTIMEAdaptor(Adaptor):
|
|
|
417
417
|
self.quantizable_op_types,
|
|
418
418
|
self.query_handler.get_fallback_list(),
|
|
419
419
|
self.reduce_range,
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
420
|
+
(
|
|
421
|
+
options.onnxrt.qdq_setting.AddQDQPairToWeight
|
|
422
|
+
if "add_qdq_pair_to_weight" not in self.recipes
|
|
423
|
+
else self.recipes.get("add_qdq_pair_to_weight", False)
|
|
424
|
+
),
|
|
425
|
+
(
|
|
426
|
+
options.onnxrt.qdq_setting.OpTypesToExcludeOutputQuantizatioin
|
|
427
|
+
if "optypes_to_exclude_output_quant" not in self.recipes
|
|
428
|
+
else self.recipes.get("optypes_to_exclude_output_quant", [])
|
|
429
|
+
),
|
|
430
|
+
(
|
|
431
|
+
options.onnxrt.qdq_setting.DedicatedQDQPair
|
|
432
|
+
if "dedicated_qdq_pair" not in self.recipes
|
|
433
|
+
else self.recipes.get("dedicated_qdq_pair", False)
|
|
434
|
+
),
|
|
429
435
|
self.backend,
|
|
430
436
|
)
|
|
431
437
|
quantizer.quantize_model()
|
|
@@ -502,15 +508,21 @@ class ONNXRUNTIMEAdaptor(Adaptor):
|
|
|
502
508
|
self.quantizable_op_types,
|
|
503
509
|
self.query_handler.get_fallback_list(),
|
|
504
510
|
self.reduce_range,
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
511
|
+
(
|
|
512
|
+
options.onnxrt.qdq_setting.AddQDQPairToWeight
|
|
513
|
+
if "add_qdq_pair_to_weight" not in self.recipes
|
|
514
|
+
else self.recipes.get("add_qdq_pair_to_weight", False)
|
|
515
|
+
),
|
|
516
|
+
(
|
|
517
|
+
options.onnxrt.qdq_setting.OpTypesToExcludeOutputQuantizatioin
|
|
518
|
+
if "optypes_to_exclude_output_quant" not in self.recipes
|
|
519
|
+
else self.recipes.get("optypes_to_exclude_output_quant", [])
|
|
520
|
+
),
|
|
521
|
+
(
|
|
522
|
+
options.onnxrt.qdq_setting.DedicatedQDQPair
|
|
523
|
+
if "dedicated_qdq_pair" not in self.recipes
|
|
524
|
+
else self.recipes.get("dedicated_qdq_pair", False)
|
|
525
|
+
),
|
|
514
526
|
self.backend,
|
|
515
527
|
)
|
|
516
528
|
quantizer.quantize_model()
|
|
@@ -657,15 +669,21 @@ class ONNXRUNTIMEAdaptor(Adaptor):
|
|
|
657
669
|
self.quantizable_op_types,
|
|
658
670
|
self.query_handler.get_fallback_list(),
|
|
659
671
|
self.reduce_range,
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
672
|
+
(
|
|
673
|
+
options.onnxrt.qdq_setting.AddQDQPairToWeight
|
|
674
|
+
if not options.onnxrt.qdq_setting.AddQDQPairToWeight
|
|
675
|
+
else self.recipes.get("add_qdq_pair_to_weight", False)
|
|
676
|
+
),
|
|
677
|
+
(
|
|
678
|
+
options.onnxrt.qdq_setting.OpTypesToExcludeOutputQuantizatioin
|
|
679
|
+
if options.onnxrt.qdq_setting.OpTypesToExcludeOutputQuantizatioin is not None
|
|
680
|
+
else self.recipes.get("optypes_to_exclude_output_quant", [])
|
|
681
|
+
),
|
|
682
|
+
(
|
|
683
|
+
options.onnxrt.qdq_setting.DedicatedQDQPair
|
|
684
|
+
if not options.onnxrt.qdq_setting.DedicatedQDQPair
|
|
685
|
+
else self.recipes.get("dedicated_qdq_pair", False)
|
|
686
|
+
),
|
|
669
687
|
)
|
|
670
688
|
|
|
671
689
|
quantizer.quantize_model()
|
|
@@ -765,7 +783,7 @@ class ONNXRUNTIMEAdaptor(Adaptor):
|
|
|
765
783
|
black_nodes=black_nodes,
|
|
766
784
|
white_nodes=white_nodes,
|
|
767
785
|
iterations=list(range(0, iterations)),
|
|
768
|
-
backend=self.backend
|
|
786
|
+
backend=self.backend,
|
|
769
787
|
reduce_range=self.reduce_range,
|
|
770
788
|
**kwargs,
|
|
771
789
|
)
|
|
@@ -979,12 +997,10 @@ class ONNXRUNTIMEAdaptor(Adaptor):
|
|
|
979
997
|
sess_options.register_custom_ops_library(get_library_path())
|
|
980
998
|
|
|
981
999
|
if not model.is_large_model:
|
|
982
|
-
sess = ort.InferenceSession(
|
|
983
|
-
model.model.SerializeToString(), sess_options, providers=["CPUExecutionProvider"]
|
|
984
|
-
)
|
|
1000
|
+
sess = ort.InferenceSession(model.model.SerializeToString(), sess_options, providers=[self.backend])
|
|
985
1001
|
elif model.model_path is not None: # pragma: no cover
|
|
986
1002
|
model.model = onnx.ModelProto() # clean memory for large model
|
|
987
|
-
sess = ort.InferenceSession(model.model_path, sess_options, providers=[
|
|
1003
|
+
sess = ort.InferenceSession(model.model_path, sess_options, providers=[self.backend])
|
|
988
1004
|
else: # pragma: no cover
|
|
989
1005
|
logger.warning("Please use model path instead of onnx model object to quantize")
|
|
990
1006
|
del sess
|
|
@@ -1914,6 +1930,7 @@ class ONNXRT_WeightOnlyAdaptor(ONNXRUNTIMEAdaptor):
|
|
|
1914
1930
|
mse=mse,
|
|
1915
1931
|
perchannel=perchannel,
|
|
1916
1932
|
accuracy_level=accuracy_level,
|
|
1933
|
+
providers=[self.backend],
|
|
1917
1934
|
)
|
|
1918
1935
|
if "AWQ" in algos:
|
|
1919
1936
|
from neural_compressor.adaptor.ox_utils.weight_only import awq_quantize
|
|
@@ -1931,6 +1948,7 @@ class ONNXRT_WeightOnlyAdaptor(ONNXRUNTIMEAdaptor):
|
|
|
1931
1948
|
enable_auto_scale=enable_auto_scale,
|
|
1932
1949
|
enable_mse_search=enable_mse_search,
|
|
1933
1950
|
accuracy_level=accuracy_level,
|
|
1951
|
+
providers=[self.backend],
|
|
1934
1952
|
)
|
|
1935
1953
|
elif "RTN" in algos:
|
|
1936
1954
|
from neural_compressor.adaptor.ox_utils.weight_only import rtn_quantize
|
|
@@ -1940,6 +1958,7 @@ class ONNXRT_WeightOnlyAdaptor(ONNXRUNTIMEAdaptor):
|
|
|
1940
1958
|
tmp_model,
|
|
1941
1959
|
quant_config,
|
|
1942
1960
|
accuracy_level=accuracy_level,
|
|
1961
|
+
providers=[self.backend],
|
|
1943
1962
|
)
|
|
1944
1963
|
tmp_model.q_config = copy.deepcopy(quant_config)
|
|
1945
1964
|
self._dump_model_op_stats(tmp_model, tune_cfg)
|
|
@@ -17,6 +17,20 @@
|
|
|
17
17
|
-
|
|
18
18
|
version:
|
|
19
19
|
name: '1.6.0'
|
|
20
|
+
weight_only_integer: &cap_weight_only {
|
|
21
|
+
'MatMul': &cap_weight_only_matmul {
|
|
22
|
+
'weight': {
|
|
23
|
+
'dtype': ['int'], # no need to care uint
|
|
24
|
+
'bits': [4, 3, 8], # [1-8]
|
|
25
|
+
'group_size': [32, -1, 1, 16, 64, 128, 256, 512, 1024], # [1-inf]
|
|
26
|
+
'scheme': ['sym', 'asym'], # sym, no ZP
|
|
27
|
+
'algorithm': ['RTN', 'AWQ', 'GPTQ']
|
|
28
|
+
},
|
|
29
|
+
'activation': {
|
|
30
|
+
'dtype': ['fp32']
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
}
|
|
20
34
|
int8: &ref_1_6 {
|
|
21
35
|
'static': &ref_1_6_static {
|
|
22
36
|
'Conv': {
|
|
@@ -114,6 +128,7 @@
|
|
|
114
128
|
-
|
|
115
129
|
version:
|
|
116
130
|
name: '1.7.0'
|
|
131
|
+
weight_only_integer: *cap_weight_only
|
|
117
132
|
int8: {
|
|
118
133
|
'static': {
|
|
119
134
|
'FusedConv': {
|
|
@@ -155,6 +170,7 @@
|
|
|
155
170
|
-
|
|
156
171
|
version:
|
|
157
172
|
name: '1.8.0'
|
|
173
|
+
weight_only_integer: *cap_weight_only
|
|
158
174
|
int8: {
|
|
159
175
|
'static': {
|
|
160
176
|
'FusedConv': {
|
|
@@ -224,6 +240,7 @@
|
|
|
224
240
|
-
|
|
225
241
|
version:
|
|
226
242
|
name: '1.9.0'
|
|
243
|
+
weight_only_integer: *cap_weight_only
|
|
227
244
|
int8: {
|
|
228
245
|
'static': {
|
|
229
246
|
'FusedConv': {
|
|
@@ -300,6 +317,7 @@
|
|
|
300
317
|
-
|
|
301
318
|
version:
|
|
302
319
|
name: '1.10.0'
|
|
320
|
+
weight_only_integer: *cap_weight_only
|
|
303
321
|
int8: {
|
|
304
322
|
'static': {
|
|
305
323
|
'FusedConv': {
|
|
@@ -356,6 +374,7 @@
|
|
|
356
374
|
-
|
|
357
375
|
version:
|
|
358
376
|
name: '1.11.0'
|
|
377
|
+
weight_only_integer: *cap_weight_only
|
|
359
378
|
int8: &ref_1_11 {
|
|
360
379
|
'static': {
|
|
361
380
|
'FusedConv': {
|
|
@@ -427,6 +446,7 @@
|
|
|
427
446
|
-
|
|
428
447
|
version:
|
|
429
448
|
name: '1.12.0'
|
|
449
|
+
weight_only_integer: *cap_weight_only
|
|
430
450
|
int8: *ref_1_11
|
|
431
451
|
fp16: *common_fp16
|
|
432
452
|
bf16: *common_bf16
|
|
@@ -436,6 +456,7 @@
|
|
|
436
456
|
-
|
|
437
457
|
version:
|
|
438
458
|
name: 'default'
|
|
459
|
+
weight_only_integer: *cap_weight_only
|
|
439
460
|
int8: *ref_1_6
|
|
440
461
|
fp16: *common_fp16
|
|
441
462
|
bf16: *common_bf16
|