neural-compressor 3.1__tar.gz → 3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (599) hide show
  1. {neural_compressor-3.1 → neural_compressor-3.2}/PKG-INFO +12 -5
  2. {neural_compressor-3.1 → neural_compressor-3.2}/README.md +11 -4
  3. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/smooth_quant.py +3 -0
  4. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/pytorch.py +2 -2
  5. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/weight_only.py +3 -3
  6. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/common/__init__.py +1 -1
  7. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/common/utils/constants.py +1 -0
  8. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/common/utils/logger.py +9 -0
  9. {neural_compressor-3.1/neural_compressor → neural_compressor-3.2/neural_compressor/common}/version.py +1 -1
  10. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/evaluation/lm_eval/accuracy.py +1 -0
  11. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/evaluation/lm_eval/models/huggingface.py +19 -2
  12. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/evaluation/lm_eval/utils.py +2 -0
  13. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/__init__.py +21 -1
  14. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_core/common.py +43 -43
  15. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_core/fp_utils.py +10 -9
  16. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_core/measure.py +24 -173
  17. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_core/quant_dequant.py +74 -12
  18. neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/_core/quantize.py +253 -0
  19. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_core/scale.py +126 -96
  20. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_core/scale_handler.py +14 -3
  21. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/max_abs.py +159 -83
  22. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/single_scale.py +10 -15
  23. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/smooth_quant.py +6 -11
  24. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_core/utils.py +1 -1
  25. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_quant_common/helper_modules.py +305 -130
  26. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_quant_common/quant_config.py +104 -33
  27. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/common.py +2 -2
  28. neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/model_configs.py +143 -0
  29. neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/observer.py +228 -0
  30. neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/patched_module_base.py +197 -0
  31. neural_compressor-3.1/neural_compressor/torch/algorithms/fp8_quant/fp8_quant.py → neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/quantizer.py +1 -1
  32. neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/save_load.py +452 -0
  33. neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/scaling_method_base.py +79 -0
  34. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/scripts/fix_measurements.py +2 -6
  35. neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/utils/__init__.py +18 -0
  36. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/layer_wise/utils.py +57 -62
  37. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/static_quant/utility.py +0 -1
  38. neural_compressor-3.2/neural_compressor/torch/algorithms/weight_only/autoround.py +426 -0
  39. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/gptq.py +62 -28
  40. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/modules.py +2 -12
  41. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/save_load.py +55 -47
  42. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/utility.py +1 -1
  43. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/quantization/__init__.py +2 -1
  44. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/quantization/algorithm_entry.py +23 -2
  45. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/quantization/config.py +98 -58
  46. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/quantization/quantize.py +8 -6
  47. neural_compressor-3.1/neural_compressor/torch/quantization/load_entry.py → neural_compressor-3.2/neural_compressor/torch/quantization/save_load_entry.py +50 -15
  48. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/utils/auto_accelerator.py +4 -8
  49. neural_compressor-3.2/neural_compressor/torch/utils/block_wise.py +220 -0
  50. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/utils/constants.py +1 -1
  51. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/utils/environ.py +113 -0
  52. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/utils/utility.py +145 -63
  53. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/transformers/models/modeling_auto.py +7 -5
  54. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/transformers/quantization/utils.py +11 -10
  55. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/transformers/utils/quantization_config.py +5 -5
  56. neural_compressor-3.2/neural_compressor/version.py +18 -0
  57. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor.egg-info/PKG-INFO +12 -5
  58. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor.egg-info/SOURCES.txt +10 -5
  59. {neural_compressor-3.1 → neural_compressor-3.2}/setup.py +0 -11
  60. neural_compressor-3.1/neural_compressor/torch/algorithms/fp8_quant/_core/quantize.py +0 -147
  61. neural_compressor-3.1/neural_compressor/torch/algorithms/fp8_quant/scripts/regression_detection/regression_detection.py +0 -132
  62. neural_compressor-3.1/neural_compressor/torch/algorithms/mixed_low_precision/__init__.py +0 -13
  63. neural_compressor-3.1/neural_compressor/torch/algorithms/mixed_low_precision/custom_methods/__init__.py +0 -13
  64. neural_compressor-3.1/neural_compressor/torch/algorithms/weight_only/autoround.py +0 -246
  65. {neural_compressor-3.1 → neural_compressor-3.2}/LICENSE +0 -0
  66. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/__init__.py +0 -0
  67. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/__init__.py +0 -0
  68. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/adaptor.py +0 -0
  69. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/keras.py +0 -0
  70. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/keras.yaml +0 -0
  71. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/keras_utils/__init__.py +0 -0
  72. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/keras_utils/conv2d.py +0 -0
  73. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/keras_utils/dense.py +0 -0
  74. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/keras_utils/depthwise_conv2d.py +0 -0
  75. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/keras_utils/pool2d.py +0 -0
  76. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/keras_utils/quantizer.py +0 -0
  77. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/keras_utils/separable_conv2d.py +0 -0
  78. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/mxnet.py +0 -0
  79. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/mxnet.yaml +0 -0
  80. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/mxnet_utils/__init__.py +0 -0
  81. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/mxnet_utils/util.py +0 -0
  82. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/onnxrt.py +0 -0
  83. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/onnxrt.yaml +0 -0
  84. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/onnxrt_cuda.yaml +0 -0
  85. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/onnxrt_dml.yaml +0 -0
  86. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/onnxrt_dnnl.yaml +0 -0
  87. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/onnxrt_trt.yaml +0 -0
  88. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/__init__.py +0 -0
  89. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/calibration.py +0 -0
  90. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/calibrator.py +0 -0
  91. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/__init__.py +0 -0
  92. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/activation.py +0 -0
  93. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/argmax.py +0 -0
  94. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/attention.py +0 -0
  95. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/binary_op.py +0 -0
  96. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/concat.py +0 -0
  97. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/conv.py +0 -0
  98. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/direct_q8.py +0 -0
  99. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/embed_layernorm.py +0 -0
  100. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/gather.py +0 -0
  101. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/gavgpool.py +0 -0
  102. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/gemm.py +0 -0
  103. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/lstm.py +0 -0
  104. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/matmul.py +0 -0
  105. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/maxpool.py +0 -0
  106. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/norm.py +0 -0
  107. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/ops.py +0 -0
  108. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/pad.py +0 -0
  109. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/pooling.py +0 -0
  110. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/reduce.py +0 -0
  111. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/resize.py +0 -0
  112. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/split.py +0 -0
  113. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/operators/unary_op.py +0 -0
  114. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/quantizer.py +0 -0
  115. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/util.py +0 -0
  116. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/ox_utils/weight_only.py +0 -0
  117. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/pytorch_cpu.yaml +0 -0
  118. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/pytorch_gpu.yaml +0 -0
  119. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/pytorch_ipex.yaml +0 -0
  120. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/query.py +0 -0
  121. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tensorflow.py +0 -0
  122. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tensorflow.yaml +0 -0
  123. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tensorflow_itex.yaml +0 -0
  124. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/__init__.py +0 -0
  125. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_converter.py +0 -0
  126. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_converter_without_calib.py +0 -0
  127. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/__init__.py +0 -0
  128. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/__init__.py +0 -0
  129. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/bf16_convert.py +0 -0
  130. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/dequantize_cast_optimizer.py +0 -0
  131. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/__init__.py +0 -0
  132. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_add_to_biasadd.py +0 -0
  133. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_layout.py +0 -0
  134. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_leakyrelu.py +0 -0
  135. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_nan_to_random.py +0 -0
  136. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_placeholder_to_const.py +0 -0
  137. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dilated_contraction.py +0 -0
  138. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dummy_biasadd.py +0 -0
  139. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/expanddims_optimizer.py +0 -0
  140. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fetch_weight_from_reshape.py +0 -0
  141. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fold_batch_norm.py +0 -0
  142. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fold_constant.py +0 -0
  143. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_biasadd_add.py +0 -0
  144. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_column_wise_mul.py +0 -0
  145. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_conv_with_math.py +0 -0
  146. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_decomposed_bn.py +0 -0
  147. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_decomposed_in.py +0 -0
  148. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_gelu.py +0 -0
  149. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_layer_norm.py +0 -0
  150. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_conv.py +0 -0
  151. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_fp32_conv.py +0 -0
  152. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_reshape_transpose.py +0 -0
  153. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/graph_cse_optimizer.py +0 -0
  154. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/grappler_pass.py +0 -0
  155. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/insert_print_node.py +0 -0
  156. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/move_squeeze_after_relu.py +0 -0
  157. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py +0 -0
  158. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/remove_training_nodes.py +0 -0
  159. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/rename_batch_norm.py +0 -0
  160. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/split_shared_input.py +0 -0
  161. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/strip_equivalent_nodes.py +0 -0
  162. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/strip_unused_nodes.py +0 -0
  163. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/switch_optimizer.py +0 -0
  164. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/graph_base.py +0 -0
  165. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/__init__.py +0 -0
  166. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_fake_quant.py +0 -0
  167. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_value.py +0 -0
  168. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_value_without_calib.py +0 -0
  169. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_redundant_dequantize.py +0 -0
  170. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_requantize.py +0 -0
  171. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_redundant_dequantize.py +0 -0
  172. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_requantize.py +0 -0
  173. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/meta_op_optimizer.py +0 -0
  174. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/post_hostconst_converter.py +0 -0
  175. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/post_quantized_op_cse.py +0 -0
  176. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/rnn_convert.py +0 -0
  177. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/scale_propagation.py +0 -0
  178. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/__init__.py +0 -0
  179. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py +0 -0
  180. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_node.py +0 -0
  181. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_schema.py +0 -0
  182. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py +0 -0
  183. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/__init__.py +0 -0
  184. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/insert_qdq_pattern.py +0 -0
  185. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/merge_duplicated_qdq.py +0 -0
  186. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/share_qdq_y_pattern.py +0 -0
  187. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/graph_util.py +0 -0
  188. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/__init__.py +0 -0
  189. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/__init__.py +0 -0
  190. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/fake_quantize.py +0 -0
  191. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_config.py +0 -0
  192. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_helper.py +0 -0
  193. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/__init__.py +0 -0
  194. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/optimize_layer.py +0 -0
  195. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_add.py +0 -0
  196. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_base.py +0 -0
  197. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_bn.py +0 -0
  198. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_wrapper.py +0 -0
  199. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/__init__.py +0 -0
  200. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_bn.py +0 -0
  201. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_concatv2.py +0 -0
  202. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_conv.py +0 -0
  203. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_deconv.py +0 -0
  204. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_in.py +0 -0
  205. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py +0 -0
  206. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_pooling.py +0 -0
  207. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/optimize_qdq.py +0 -0
  208. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_base.py +0 -0
  209. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_bn.py +0 -0
  210. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_concatv2.py +0 -0
  211. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_conv.py +0 -0
  212. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_for_intel_cpu.py +0 -0
  213. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_matmul.py +0 -0
  214. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_pooling.py +0 -0
  215. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/quantize_graph_common.py +0 -0
  216. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/smooth_quant_calibration.py +0 -0
  217. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/smooth_quant_scaler.py +0 -0
  218. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py +0 -0
  219. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/transform_graph/__init__.py +0 -0
  220. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/transform_graph/bias_correction.py +0 -0
  221. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/transform_graph/graph_transform_base.py +0 -0
  222. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/transform_graph/insert_logging.py +0 -0
  223. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/transform_graph/rerange_quantized_concat.py +0 -0
  224. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/tf_utils/util.py +0 -0
  225. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/__init__.py +0 -0
  226. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/auto_round.py +0 -0
  227. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/awq.py +0 -0
  228. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/bf16_convert.py +0 -0
  229. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/gptq.py +0 -0
  230. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/hawq_metric.py +0 -0
  231. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/layer_wise_quant/__init__.py +0 -0
  232. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/layer_wise_quant/modified_pickle.py +0 -0
  233. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/layer_wise_quant/quantize.py +0 -0
  234. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/layer_wise_quant/torch_load.py +0 -0
  235. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/layer_wise_quant/utils.py +0 -0
  236. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/mixed_precision.py +0 -0
  237. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/model_wrapper.py +0 -0
  238. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/pattern_detector.py +0 -0
  239. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/symbolic_trace.py +0 -0
  240. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/teq.py +0 -0
  241. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/util.py +0 -0
  242. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/waq/__init__.py +0 -0
  243. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/waq/auto_alpha.py +0 -0
  244. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/waq/calibration.py +0 -0
  245. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/waq/graph_trace.py +0 -0
  246. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/waq/smooth_quant.py +0 -0
  247. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/adaptor/torch_utils/waq/utils.py +0 -0
  248. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/algorithm/__init__.py +0 -0
  249. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/algorithm/algorithm.py +0 -0
  250. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/algorithm/fast_bias_correction.py +0 -0
  251. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/algorithm/smooth_quant.py +0 -0
  252. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/algorithm/weight_correction.py +0 -0
  253. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/benchmark.py +0 -0
  254. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/common/base_config.py +0 -0
  255. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/common/base_tuning.py +0 -0
  256. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/common/benchmark.py +0 -0
  257. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/common/tuning_param.py +0 -0
  258. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/common/utils/__init__.py +0 -0
  259. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/common/utils/save_load.py +0 -0
  260. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/common/utils/utility.py +0 -0
  261. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/__init__.py +0 -0
  262. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/callbacks.py +0 -0
  263. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/distillation/__init__.py +0 -0
  264. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/distillation/criterions.py +0 -0
  265. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/distillation/optimizers.py +0 -0
  266. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/distillation/utility.py +0 -0
  267. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/hpo/__init__.py +0 -0
  268. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/hpo/sa_optimizer.py +0 -0
  269. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/hpo/search_algorithms.py +0 -0
  270. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/hpo/search_space.py +0 -0
  271. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/__init__.py +0 -0
  272. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/criteria.py +0 -0
  273. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/dsnot.py +0 -0
  274. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/model_slim/__init__.py +0 -0
  275. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/model_slim/auto_slim.py +0 -0
  276. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/model_slim/pattern_analyzer.py +0 -0
  277. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/model_slim/weight_slim.py +0 -0
  278. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/patterns/__init__.py +0 -0
  279. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/patterns/base.py +0 -0
  280. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/patterns/mha.py +0 -0
  281. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/patterns/ninm.py +0 -0
  282. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/patterns/nxm.py +0 -0
  283. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/pruners/__init__.py +0 -0
  284. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/pruners/base.py +0 -0
  285. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/pruners/basic.py +0 -0
  286. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/pruners/block_mask.py +0 -0
  287. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/pruners/mha.py +0 -0
  288. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/pruners/pattern_lock.py +0 -0
  289. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/pruners/progressive.py +0 -0
  290. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/pruners/retrain_free.py +0 -0
  291. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/pruners/sparse_gpt.py +0 -0
  292. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/pruning.py +0 -0
  293. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/regs.py +0 -0
  294. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/schedulers.py +0 -0
  295. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/tf_criteria.py +0 -0
  296. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/utils.py +0 -0
  297. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/wanda/__init__.py +0 -0
  298. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/wanda/prune.py +0 -0
  299. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/wanda/utils.py +0 -0
  300. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/compression/pruner/wanda/wrapper.py +0 -0
  301. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/config.py +0 -0
  302. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/contrib/__init__.py +0 -0
  303. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/contrib/strategy/__init__.py +0 -0
  304. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/contrib/strategy/sigopt.py +0 -0
  305. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/contrib/strategy/tpe.py +0 -0
  306. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/__init__.py +0 -0
  307. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/dataloaders/__init__.py +0 -0
  308. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/dataloaders/base_dataloader.py +0 -0
  309. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/dataloaders/dataloader.py +0 -0
  310. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/dataloaders/default_dataloader.py +0 -0
  311. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/dataloaders/fetcher.py +0 -0
  312. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/dataloaders/mxnet_dataloader.py +0 -0
  313. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/dataloaders/onnxrt_dataloader.py +0 -0
  314. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/dataloaders/pytorch_dataloader.py +0 -0
  315. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/dataloaders/sampler.py +0 -0
  316. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/dataloaders/tensorflow_dataloader.py +0 -0
  317. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/datasets/__init__.py +0 -0
  318. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/datasets/bert_dataset.py +0 -0
  319. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/datasets/coco_dataset.py +0 -0
  320. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/datasets/dataset.py +0 -0
  321. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/datasets/dummy_dataset.py +0 -0
  322. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/datasets/dummy_dataset_v2.py +0 -0
  323. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/datasets/imagenet_dataset.py +0 -0
  324. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/datasets/style_transfer_dataset.py +0 -0
  325. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/filters/__init__.py +0 -0
  326. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/filters/coco_filter.py +0 -0
  327. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/filters/filter.py +0 -0
  328. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/transforms/__init__.py +0 -0
  329. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/transforms/coco_transform.py +0 -0
  330. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/transforms/imagenet_transform.py +0 -0
  331. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/transforms/postprocess.py +0 -0
  332. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/transforms/tokenization.py +0 -0
  333. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/data/transforms/transform.py +0 -0
  334. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/evaluation/__init__.py +0 -0
  335. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/evaluation/bigcode_eval/__init__.py +0 -0
  336. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/evaluation/bigcode_eval/evaluator.py +0 -0
  337. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/evaluation/hf_eval/__init__.py +0 -0
  338. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/evaluation/hf_eval/evaluator.py +0 -0
  339. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/evaluation/hf_eval/hf_datasets/__init__.py +0 -0
  340. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/evaluation/hf_eval/hf_datasets/cnn_dailymail.py +0 -0
  341. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/evaluation/lm_eval/__init__.py +0 -0
  342. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/evaluation/lm_eval/models/__init__.py +0 -0
  343. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/metric/__init__.py +0 -0
  344. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/metric/bleu.py +0 -0
  345. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/metric/bleu_util.py +0 -0
  346. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/metric/coco_label_map.py +0 -0
  347. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/metric/coco_tools.py +0 -0
  348. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/metric/evaluate_squad.py +0 -0
  349. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/metric/f1.py +0 -0
  350. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/metric/metric.py +0 -0
  351. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/mix_precision.py +0 -0
  352. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/model/__init__.py +0 -0
  353. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/model/base_model.py +0 -0
  354. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/model/keras_model.py +0 -0
  355. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/model/model.py +0 -0
  356. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/model/mxnet_model.py +0 -0
  357. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/model/nets_factory.py +0 -0
  358. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/model/onnx_model.py +0 -0
  359. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/model/tensorflow_model.py +0 -0
  360. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/model/torch_model.py +0 -0
  361. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/objective.py +0 -0
  362. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/__init__.py +0 -0
  363. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/parser/__init__.py +0 -0
  364. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/parser/factory.py +0 -0
  365. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/parser/onnx_parser/__init__.py +0 -0
  366. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/parser/onnx_parser/factory.py +0 -0
  367. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/parser/onnx_parser/parser.py +0 -0
  368. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/parser/parser.py +0 -0
  369. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/parser/result.py +0 -0
  370. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/parser/tensorflow_parser/__init__.py +0 -0
  371. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/parser/tensorflow_parser/factory.py +0 -0
  372. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/parser/tensorflow_parser/parser.py +0 -0
  373. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/profiler/__init__.py +0 -0
  374. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/profiler/factory.py +0 -0
  375. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/profiler/onnxrt_profiler/__init__.py +0 -0
  376. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/profiler/onnxrt_profiler/factory.py +0 -0
  377. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/profiler/onnxrt_profiler/profiler.py +0 -0
  378. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/profiler/onnxrt_profiler/utils.py +0 -0
  379. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/profiler/profiler.py +0 -0
  380. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/profiler/tensorflow_profiler/__init__.py +0 -0
  381. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/profiler/tensorflow_profiler/factory.py +0 -0
  382. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/profiler/tensorflow_profiler/profiler.py +0 -0
  383. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/profiling/profiler/tensorflow_profiler/utils.py +0 -0
  384. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/quantization.py +0 -0
  385. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/__init__.py +0 -0
  386. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/auto.py +0 -0
  387. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/auto_mixed_precision.py +0 -0
  388. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/basic.py +0 -0
  389. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/bayesian.py +0 -0
  390. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/conservative.py +0 -0
  391. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/exhaustive.py +0 -0
  392. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/hawq_v2.py +0 -0
  393. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/mse.py +0 -0
  394. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/mse_v2.py +0 -0
  395. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/random.py +0 -0
  396. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/strategy.py +0 -0
  397. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/utils/__init__.py +0 -0
  398. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/utils/constant.py +0 -0
  399. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/utils/tuning_sampler.py +0 -0
  400. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/utils/tuning_space.py +0 -0
  401. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/utils/tuning_structs.py +0 -0
  402. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/strategy/utils/utility.py +0 -0
  403. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/__init__.py +0 -0
  404. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/algorithms/__init__.py +0 -0
  405. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/algorithms/smoother/__init__.py +0 -0
  406. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/algorithms/smoother/calibration.py +0 -0
  407. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/algorithms/smoother/core.py +0 -0
  408. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/algorithms/smoother/scaler.py +0 -0
  409. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/algorithms/static_quant/__init__.py +0 -0
  410. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/algorithms/static_quant/keras.py +0 -0
  411. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/algorithms/static_quant/keras.yaml +0 -0
  412. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py +0 -0
  413. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.yaml +0 -0
  414. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/algorithms/static_quant/tensorflow_itex.yaml +0 -0
  415. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/keras/__init__.py +0 -0
  416. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/keras/layers/__init__.py +0 -0
  417. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/keras/layers/conv2d.py +0 -0
  418. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/keras/layers/dense.py +0 -0
  419. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/keras/layers/depthwise_conv2d.py +0 -0
  420. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/keras/layers/layer_initializer.py +0 -0
  421. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/keras/layers/pool2d.py +0 -0
  422. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/keras/layers/separable_conv2d.py +0 -0
  423. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/keras/quantization/__init__.py +0 -0
  424. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/keras/quantization/config.py +0 -0
  425. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/__init__.py +0 -0
  426. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/algorithm_entry.py +0 -0
  427. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/autotune.py +0 -0
  428. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/config.py +0 -0
  429. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/quantize.py +0 -0
  430. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/__init__.py +0 -0
  431. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_converter.py +0 -0
  432. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/__init__.py +0 -0
  433. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/bf16/__init__.py +0 -0
  434. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/bf16/bf16_convert.py +0 -0
  435. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/bf16/dequantize_cast_optimizer.py +0 -0
  436. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/__init__.py +0 -0
  437. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/convert_add_to_biasadd.py +0 -0
  438. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/convert_layout.py +0 -0
  439. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/convert_leakyrelu.py +0 -0
  440. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/convert_nan_to_random.py +0 -0
  441. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/convert_placeholder_to_const.py +0 -0
  442. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/dilated_contraction.py +0 -0
  443. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/dummy_biasadd.py +0 -0
  444. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/expanddims_optimizer.py +0 -0
  445. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fetch_weight_from_reshape.py +0 -0
  446. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fold_batch_norm.py +0 -0
  447. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fold_constant.py +0 -0
  448. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_biasadd_add.py +0 -0
  449. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_column_wise_mul.py +0 -0
  450. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_conv_with_math.py +0 -0
  451. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_decomposed_bn.py +0 -0
  452. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_decomposed_in.py +0 -0
  453. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_gelu.py +0 -0
  454. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_layer_norm.py +0 -0
  455. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_pad_with_conv.py +0 -0
  456. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_pad_with_fp32_conv.py +0 -0
  457. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_reshape_transpose.py +0 -0
  458. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/graph_cse_optimizer.py +0 -0
  459. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/grappler_pass.py +0 -0
  460. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/insert_print_node.py +0 -0
  461. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/move_squeeze_after_relu.py +0 -0
  462. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/pre_optimize.py +0 -0
  463. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/remove_training_nodes.py +0 -0
  464. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/rename_batch_norm.py +0 -0
  465. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/split_shared_input.py +0 -0
  466. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/strip_equivalent_nodes.py +0 -0
  467. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/strip_unused_nodes.py +0 -0
  468. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/switch_optimizer.py +0 -0
  469. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/graph_base.py +0 -0
  470. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/__init__.py +0 -0
  471. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/freeze_fake_quant.py +0 -0
  472. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/freeze_value.py +0 -0
  473. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/fuse_conv_redundant_dequantize.py +0 -0
  474. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/fuse_conv_requantize.py +0 -0
  475. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/fuse_matmul_redundant_dequantize.py +0 -0
  476. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/fuse_matmul_requantize.py +0 -0
  477. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/meta_op_optimizer.py +0 -0
  478. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/post_hostconst_converter.py +0 -0
  479. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/post_quantized_op_cse.py +0 -0
  480. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/scale_propagation.py +0 -0
  481. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/qdq/__init__.py +0 -0
  482. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/qdq/insert_qdq_pattern.py +0 -0
  483. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/qdq/merge_duplicated_qdq.py +0 -0
  484. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/qdq/share_qdq_y_pattern.py +0 -0
  485. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/graph_util.py +0 -0
  486. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/__init__.py +0 -0
  487. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/__init__.py +0 -0
  488. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_bn.py +0 -0
  489. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_concatv2.py +0 -0
  490. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_conv.py +0 -0
  491. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_deconv.py +0 -0
  492. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_in.py +0 -0
  493. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_matmul.py +0 -0
  494. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_pooling.py +0 -0
  495. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/optimize_qdq.py +0 -0
  496. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_base.py +0 -0
  497. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_bn.py +0 -0
  498. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_concatv2.py +0 -0
  499. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_conv.py +0 -0
  500. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_for_intel_cpu.py +0 -0
  501. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_matmul.py +0 -0
  502. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_pooling.py +0 -0
  503. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/quantize_graph_common.py +0 -0
  504. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/transform_graph/__init__.py +0 -0
  505. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/transform_graph/bias_correction.py +0 -0
  506. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/transform_graph/graph_transform_base.py +0 -0
  507. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/transform_graph/insert_logging.py +0 -0
  508. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/transform_graph/rerange_quantized_concat.py +0 -0
  509. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/quantization/utils/utility.py +0 -0
  510. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/utils/__init__.py +0 -0
  511. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/utils/constants.py +0 -0
  512. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/utils/data.py +0 -0
  513. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/utils/model.py +0 -0
  514. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/utils/model_wrappers.py +0 -0
  515. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/tensorflow/utils/utility.py +0 -0
  516. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/__init__.py +0 -0
  517. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/__init__.py +0 -0
  518. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/base_algorithm.py +0 -0
  519. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_core/__init__.py +0 -0
  520. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/__init__.py +0 -0
  521. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/_quant_common/__init__.py +0 -0
  522. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/custom_config/__init__.py +0 -0
  523. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/prepare_quant/__init__.py +0 -0
  524. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/prepare_quant/prepare_model.py +0 -0
  525. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/scripts/__init__.py +0 -0
  526. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/fp8_quant/utils/logger.py +0 -0
  527. /neural_compressor-3.1/neural_compressor/torch/algorithms/fp8_quant/helper_modules.py → /neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/utils/patched_module_restore_registry.py +0 -0
  528. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/layer_wise/__init__.py +0 -0
  529. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/layer_wise/load.py +0 -0
  530. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/layer_wise/modified_pickle.py +0 -0
  531. {neural_compressor-3.1/neural_compressor/torch/algorithms/fp8_quant/scripts/regression_detection → neural_compressor-3.2/neural_compressor/torch/algorithms/mixed_low_precision}/__init__.py +0 -0
  532. {neural_compressor-3.1/neural_compressor/torch/algorithms/fp8_quant/utils → neural_compressor-3.2/neural_compressor/torch/algorithms/mixed_low_precision/custom_methods}/__init__.py +0 -0
  533. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/mixed_low_precision/custom_methods/gptq.py +0 -0
  534. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/mixed_low_precision/custom_methods/quarot.py +0 -0
  535. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/mixed_low_precision/custom_methods/quarot_utils.py +0 -0
  536. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/mixed_precision/__init__.py +0 -0
  537. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/mixed_precision/half_precision_convert.py +0 -0
  538. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/mixed_precision/module_wrappers.py +0 -0
  539. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/mx_quant/__init__.py +0 -0
  540. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/mx_quant/mx.py +0 -0
  541. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/mx_quant/utils.py +0 -0
  542. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/pt2e_quant/__init__.py +0 -0
  543. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/pt2e_quant/core.py +0 -0
  544. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/pt2e_quant/half_precision_rewriter.py +0 -0
  545. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/pt2e_quant/save_load.py +0 -0
  546. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/pt2e_quant/utility.py +0 -0
  547. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/smooth_quant/__init__.py +0 -0
  548. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/smooth_quant/save_load.py +0 -0
  549. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/smooth_quant/smooth_quant.py +0 -0
  550. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/smooth_quant/utility.py +0 -0
  551. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/static_quant/__init__.py +0 -0
  552. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/static_quant/save_load.py +0 -0
  553. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/static_quant/static_quant.py +0 -0
  554. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/__init__.py +0 -0
  555. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/awq.py +0 -0
  556. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/hqq/__init__.py +0 -0
  557. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/hqq/bitpack.py +0 -0
  558. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/hqq/config.py +0 -0
  559. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/hqq/core.py +0 -0
  560. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/hqq/optimizer.py +0 -0
  561. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/hqq/qtensor.py +0 -0
  562. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/hqq/quantizer.py +0 -0
  563. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/rtn.py +0 -0
  564. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/algorithms/weight_only/teq.py +0 -0
  565. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/export/__init__.py +0 -0
  566. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/export/pt2e_export.py +0 -0
  567. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/quantization/autotune.py +0 -0
  568. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/utils/__init__.py +0 -0
  569. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/torch/utils/bit_packer.py +0 -0
  570. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/training.py +0 -0
  571. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/transformers/__init__.py +0 -0
  572. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/transformers/generation/__init__.py +0 -0
  573. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/transformers/generation/beam_search.py +0 -0
  574. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/transformers/generation/greedy_search.py +0 -0
  575. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/transformers/models/__init__.py +0 -0
  576. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/transformers/quantization/__init__.py +0 -0
  577. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/transformers/utils/__init__.py +0 -0
  578. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/__init__.py +0 -0
  579. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/collect_layer_histogram.py +0 -0
  580. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/constant.py +0 -0
  581. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/create_obj_from_config.py +0 -0
  582. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/export/__init__.py +0 -0
  583. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/export/qlinear2qdq.py +0 -0
  584. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/export/tf2onnx.py +0 -0
  585. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/export/torch2onnx.py +0 -0
  586. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/kl_divergence.py +0 -0
  587. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/load_huggingface.py +0 -0
  588. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/logger.py +0 -0
  589. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/options.py +0 -0
  590. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/pytorch.py +0 -0
  591. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/utility.py +0 -0
  592. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor/utils/weights_details.py +0 -0
  593. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor.egg-info/dependency_links.txt +0 -0
  594. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor.egg-info/entry_points.txt +0 -0
  595. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor.egg-info/requires.txt +0 -0
  596. {neural_compressor-3.1 → neural_compressor-3.2}/neural_compressor.egg-info/top_level.txt +0 -0
  597. {neural_compressor-3.1 → neural_compressor-3.2}/pyproject.toml +0 -0
  598. {neural_compressor-3.1 → neural_compressor-3.2}/setup.cfg +0 -0
  599. {neural_compressor-3.1 → neural_compressor-3.2}/third-party-programs.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: neural_compressor
3
- Version: 3.1
3
+ Version: 3.2
4
4
  Summary: Repository of Intel® Neural Compressor
5
5
  Home-page: https://github.com/intel/neural-compressor
6
6
  Author: Intel AIPT Team
@@ -51,7 +51,7 @@ Intel® Neural Compressor
51
51
  <h3> An open-source Python library supporting popular model compression techniques on all mainstream deep learning frameworks (TensorFlow, PyTorch, and ONNX Runtime)</h3>
52
52
 
53
53
  [![python](https://img.shields.io/badge/python-3.8%2B-blue)](https://github.com/intel/neural-compressor)
54
- [![version](https://img.shields.io/badge/release-3.1-green)](https://github.com/intel/neural-compressor/releases)
54
+ [![version](https://img.shields.io/badge/release-3.2-green)](https://github.com/intel/neural-compressor/releases)
55
55
  [![license](https://img.shields.io/badge/license-Apache%202-blue)](https://github.com/intel/neural-compressor/blob/master/LICENSE)
56
56
  [![coverage](https://img.shields.io/badge/coverage-85%25-green)](https://github.com/intel/neural-compressor)
57
57
  [![Downloads](https://static.pepy.tech/personalized-badge/neural-compressor?period=total&units=international_system&left_color=grey&right_color=green&left_text=downloads)](https://pepy.tech/project/neural-compressor)
@@ -124,7 +124,7 @@ Following example code demonstrates FP8 Quantization, it is supported by Intel G
124
124
  To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, please refer to following script for environment setup. More details can be found in [Gaudi Guide](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#launch-docker-image-that-was-built).
125
125
  ```bash
126
126
  # Run a container with an interactive shell
127
- docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest
127
+ docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.19.0/ubuntu24.04/habanalabs/pytorch-installer-2.5.1:latest
128
128
  ```
129
129
  Run the example:
130
130
  ```python
@@ -133,14 +133,21 @@ from neural_compressor.torch.quantization import (
133
133
  prepare,
134
134
  convert,
135
135
  )
136
+
137
+ import torch
136
138
  import torchvision.models as models
137
139
 
138
140
  model = models.resnet18()
139
141
  qconfig = FP8Config(fp8_config="E4M3")
140
142
  model = prepare(model, qconfig)
141
- # customer defined calibration
142
- calib_func(model)
143
+
144
+ # Customer defined calibration. Below is a dummy calibration
145
+ model(torch.randn(1, 3, 224, 224).to("hpu"))
146
+
143
147
  model = convert(model)
148
+
149
+ output = model(torch.randn(1, 3, 224, 224).to("hpu")).to("cpu")
150
+ print(output.shape)
144
151
  ```
145
152
 
146
153
  ### Weight-Only Large Language Model Loading (LLMs)
@@ -5,7 +5,7 @@ Intel® Neural Compressor
5
5
  <h3> An open-source Python library supporting popular model compression techniques on all mainstream deep learning frameworks (TensorFlow, PyTorch, and ONNX Runtime)</h3>
6
6
 
7
7
  [![python](https://img.shields.io/badge/python-3.8%2B-blue)](https://github.com/intel/neural-compressor)
8
- [![version](https://img.shields.io/badge/release-3.1-green)](https://github.com/intel/neural-compressor/releases)
8
+ [![version](https://img.shields.io/badge/release-3.2-green)](https://github.com/intel/neural-compressor/releases)
9
9
  [![license](https://img.shields.io/badge/license-Apache%202-blue)](https://github.com/intel/neural-compressor/blob/master/LICENSE)
10
10
  [![coverage](https://img.shields.io/badge/coverage-85%25-green)](https://github.com/intel/neural-compressor)
11
11
  [![Downloads](https://static.pepy.tech/personalized-badge/neural-compressor?period=total&units=international_system&left_color=grey&right_color=green&left_text=downloads)](https://pepy.tech/project/neural-compressor)
@@ -78,7 +78,7 @@ Following example code demonstrates FP8 Quantization, it is supported by Intel G
78
78
  To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, please refer to following script for environment setup. More details can be found in [Gaudi Guide](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#launch-docker-image-that-was-built).
79
79
  ```bash
80
80
  # Run a container with an interactive shell
81
- docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest
81
+ docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.19.0/ubuntu24.04/habanalabs/pytorch-installer-2.5.1:latest
82
82
  ```
83
83
  Run the example:
84
84
  ```python
@@ -87,14 +87,21 @@ from neural_compressor.torch.quantization import (
87
87
  prepare,
88
88
  convert,
89
89
  )
90
+
91
+ import torch
90
92
  import torchvision.models as models
91
93
 
92
94
  model = models.resnet18()
93
95
  qconfig = FP8Config(fp8_config="E4M3")
94
96
  model = prepare(model, qconfig)
95
- # customer defined calibration
96
- calib_func(model)
97
+
98
+ # Customer defined calibration. Below is a dummy calibration
99
+ model(torch.randn(1, 3, 224, 224).to("hpu"))
100
+
97
101
  model = convert(model)
102
+
103
+ output = model(torch.randn(1, 3, 224, 224).to("hpu")).to("cpu")
104
+ print(output.shape)
98
105
  ```
99
106
 
100
107
  ### Weight-Only Large Language Model Loading (LLMs)
@@ -295,6 +295,9 @@ class ORTSmoothQuant:
295
295
  return False
296
296
  for inp in node.input:
297
297
  if self.model.get_initializer(inp) is not None:
298
+ # Ensure that mul operators with shared initializer will not be absorbed.
299
+ if self.model.get_initializer_share_num(inp) > 1:
300
+ return False
298
301
  key = node.input[0].split("_smooth_output")[0]
299
302
  tensor = self.model.get_initializer(inp)
300
303
  new_tensor = (
@@ -4926,7 +4926,7 @@ class PyTorchWeightOnlyAdaptor(TemplateAdaptor):
4926
4926
  act_group_size = self.recipes["autoround_args"].get("act_group_size", None)
4927
4927
  act_sym = self.recipes["autoround_args"].get("act_sym", None)
4928
4928
  act_dynamic = self.recipes["autoround_args"].get("act_dynamic", True)
4929
- quant_block_list = self.recipes["autoround_args"].get("quant_block_list", None)
4929
+ to_quant_block_names = self.recipes["autoround_args"].get("to_quant_block_names", None)
4930
4930
  use_layer_wise = self.recipes["autoround_args"].get("use_layer_wise", False)
4931
4931
 
4932
4932
  if dataloader is not None:
@@ -4959,7 +4959,7 @@ class PyTorchWeightOnlyAdaptor(TemplateAdaptor):
4959
4959
  dynamic_max_gap=dynamic_max_gap,
4960
4960
  data_type=data_type,
4961
4961
  scale_dtype=scale_dtype,
4962
- quant_block_list=quant_block_list,
4962
+ to_quant_block_names=to_quant_block_names,
4963
4963
  act_bits=act_bits,
4964
4964
  act_group_size=act_group_size,
4965
4965
  act_sym=act_sym,
@@ -706,7 +706,7 @@ def autoround_quantize(
706
706
  dynamic_max_gap: int = -1,
707
707
  data_type: str = "int", ##only support int for now
708
708
  scale_dtype: str = "fp16",
709
- quant_block_list: list = None,
709
+ to_quant_block_names: list = None,
710
710
  act_bits: int = 32,
711
711
  act_group_size: int = None,
712
712
  act_sym: bool = None,
@@ -761,7 +761,7 @@ def autoround_quantize(
761
761
  data_type (str): The data type to be used (default is "int").
762
762
  scale_dtype (str): The data type of quantization scale to be used (default is "float32"), different kernels
763
763
  have different choices.
764
- quant_block_list (list): A list whose elements are list of block's layer names to be quantized.
764
+ to_quant_block_names (list): A list whose elements are list of block's layer names to be quantized.
765
765
  act_bits (int): Number of bits for activation quantization. Default is 32.
766
766
  act_group_size (int): Group size for activation quantization. Default is None.
767
767
  act_sym (bool): Whether to use symmetric activation quantization. Default is None.
@@ -800,7 +800,7 @@ def autoround_quantize(
800
800
  dynamic_max_gap=dynamic_max_gap,
801
801
  data_type=data_type, ## only support data_type
802
802
  scale_dtype=scale_dtype,
803
- quant_block_list=quant_block_list,
803
+ to_quant_block_names=to_quant_block_names,
804
804
  act_bits=act_bits,
805
805
  act_group_size=act_group_size,
806
806
  act_sym=act_sym,
@@ -27,7 +27,7 @@ from neural_compressor.common.utils import (
27
27
  dump_elapsed_time,
28
28
  )
29
29
  from neural_compressor.common.base_config import options
30
-
30
+ from neural_compressor.common.version import __version__
31
31
 
32
32
  __all__ = [
33
33
  "options",
@@ -56,6 +56,7 @@ class Mode(Enum):
56
56
  PREPARE = "prepare"
57
57
  CONVERT = "convert"
58
58
  QUANTIZE = "quantize"
59
+ LOAD = "load"
59
60
 
60
61
 
61
62
  SERVER_PROCESSOR_BRAND_KEY_WORLD_LST = ["Xeon"]
@@ -17,6 +17,7 @@
17
17
  """Logger: handles logging functionalities."""
18
18
 
19
19
 
20
+ import functools
20
21
  import logging
21
22
  import os
22
23
 
@@ -137,6 +138,12 @@ class Logger(object):
137
138
  else:
138
139
  Logger().get_logger().warning(msg, *args, **kwargs)
139
140
 
141
+ @functools.lru_cache(None)
142
+ def warning_once(msg, *args, **kwargs):
143
+ """Output log with the warning level only once."""
144
+ Logger.warning("Below warning will be shown only once:")
145
+ Logger.warning(msg, *args, **kwargs)
146
+
140
147
 
141
148
  level = Logger().get_logger().level
142
149
  level_name = logging.getLevelName(level)
@@ -152,6 +159,8 @@ def _get_log_msg(mode):
152
159
  log_msg = "Preparation"
153
160
  elif mode == Mode.CONVERT: # pragma: no cover
154
161
  log_msg = "Conversion"
162
+ elif mode == Mode.LOAD: # pragma: no cover
163
+ log_msg = "Loading"
155
164
  return log_msg
156
165
 
157
166
 
@@ -15,4 +15,4 @@
15
15
  # See the License for the specific language governing permissions and
16
16
  # limitations under the License.
17
17
  """Intel® Neural Compressor: An open-source Python library supporting popular model compression techniques."""
18
- __version__ = "3.1"
18
+ __version__ = "3.2"
@@ -199,6 +199,7 @@ def cli_evaluate(args) -> None:
199
199
  },
200
200
  )
201
201
  lm.pad_to_buckets = args.pad_to_buckets
202
+ lm.buckets = args.buckets
202
203
 
203
204
  results = evaluator.simple_evaluate(
204
205
  model=lm,
@@ -116,11 +116,14 @@ class HFLM(TemplateLM):
116
116
  peft: Optional[str] = None,
117
117
  autogptq: Optional[Union[bool, str]] = False,
118
118
  pad_to_buckets: Optional[Union[bool]] = False,
119
+ buckets: Optional[list] = [32, 64, 128, 256, 512, 1024, 2048, 4096],
119
120
  model_format: Optional[str] = "torch",
120
121
  **kwargs,
121
122
  ) -> None:
122
123
  super().__init__()
123
124
  self.pad_to_buckets = pad_to_buckets
125
+ self.buckets = buckets
126
+ self.last_bucket = -1
124
127
  self.model_format = model_format
125
128
  # optionally: take in an already-initialized transformers.PreTrainedModel
126
129
  if not isinstance(pretrained, str):
@@ -874,6 +877,19 @@ class HFLM(TemplateLM):
874
877
  elif self.AUTO_MODEL_CLASS == transformers.AutoModelForSeq2SeqLM:
875
878
  return self.tokenizer.decode(tokens, skip_special_tokens=skip_special_tokens)
876
879
 
880
+ def find_bucket(self, length):
881
+ suitable_buckets = [b for b in self.buckets if b >= length]
882
+ if len(suitable_buckets) == 0:
883
+ eval_logger.error(f"The input_length={length} exceeds the maximum value in buckets={self.buckets}")
884
+ eval_logger.error("Please add a higher value into the buckets list for this case.")
885
+ exit(0)
886
+ else:
887
+ if self.last_bucket != suitable_buckets[0]:
888
+ if hasattr(self.model, "clear_cache"):
889
+ self.model.clear_cache() # clear HPU graph cache to avoid OOM
890
+ self.last_bucket = suitable_buckets[0]
891
+ return self.last_bucket
892
+
877
893
  def _model_call(self, inps, attn_mask=None, labels=None):
878
894
  """
879
895
  :param inps: torch.Tensor
@@ -943,8 +959,7 @@ class HFLM(TemplateLM):
943
959
  if self.pad_to_buckets: # use buckets to pad inputs
944
960
  bs, seq_length = inps.shape
945
961
  padding_length = 0
946
- buckets = [64, 128, 256, 512, 1024, 2048, 4096, 8192]
947
- bucket_length = [b for b in buckets if b >= seq_length][0]
962
+ bucket_length = self.find_bucket(seq_length)
948
963
  padding_length = bucket_length - seq_length
949
964
  inps = F.pad(inps, (0, padding_length), value=self.model.config.pad_token_id)
950
965
  output = self.model(inps)
@@ -954,6 +969,8 @@ class HFLM(TemplateLM):
954
969
  output = output.logits
955
970
  if self.pad_to_buckets and padding_length != 0: # use buckets to pad inputs
956
971
  output = output[:, :-padding_length, :]
972
+ if "hpu" in output.device.type: # make sure return fp32 tensor for HPU, TODO: root cause
973
+ output = output.to(torch.float32)
957
974
  return output
958
975
 
959
976
  def _model_generate(self, context, max_length, stop, **generation_kwargs):
@@ -49,6 +49,7 @@ class LMEvalParser:
49
49
  seed=[0, 1234, 1234],
50
50
  trust_remote_code=False,
51
51
  pad_to_buckets=None, # used by HPU to align input length for performance.
52
+ buckets=[32, 64, 128, 256, 512, 1024, 2048, 4096], # used by HPU to limit input length range.
52
53
  ):
53
54
  self.model = model
54
55
  self.tasks = tasks
@@ -81,3 +82,4 @@ class LMEvalParser:
81
82
  self.pad_to_buckets = False
82
83
  else:
83
84
  self.pad_to_buckets = pad_to_buckets
85
+ self.buckets = buckets
@@ -19,4 +19,24 @@ from neural_compressor.torch.algorithms.fp8_quant.common import (
19
19
  with_patched_module,
20
20
  )
21
21
  from neural_compressor.torch.algorithms.fp8_quant.prepare_quant.prepare_model import finish_measurements, prep_model
22
- from neural_compressor.torch.algorithms.fp8_quant.fp8_quant import FP8Quantizer
22
+ from neural_compressor.torch.algorithms.fp8_quant.quantizer import FP8Quantizer
23
+ from neural_compressor.torch.algorithms.fp8_quant.patched_module_base import (
24
+ PatchedModuleBase,
25
+ register_patched_module,
26
+ )
27
+
28
+ from neural_compressor.torch.algorithms.fp8_quant.scaling_method_base import (
29
+ ScalingMethodBase,
30
+ register_scaling_methods,
31
+ )
32
+ from neural_compressor.torch.algorithms.fp8_quant.observer import (
33
+ ObserverBase,
34
+ register_observer,
35
+ )
36
+ from neural_compressor.torch.algorithms.fp8_quant.model_configs import (
37
+ ModuleConfig,
38
+ ModuleInfo,
39
+ ModuleType,
40
+ ModuleExtraConfig
41
+ )
42
+ from neural_compressor.torch.algorithms.fp8_quant.save_load import save, load
@@ -23,7 +23,15 @@ import torch
23
23
  from .._quant_common.helper_modules import *
24
24
  from .._quant_common.quant_config import get_hqt_config
25
25
  from ..utils.logger import logger
26
-
26
+ from neural_compressor.torch.algorithms.fp8_quant.model_configs import (
27
+ ModuleInfo,
28
+ ModuleConfig,
29
+ ModuleType,
30
+ ModuleExtraConfig,
31
+ get_patched_module_table,
32
+ get_patched_module_type_table,
33
+ )
34
+ from neural_compressor.torch.utils.auto_accelerator import auto_detect_accelerator
27
35
  deepspeed_exists = False
28
36
  if importlib.util.find_spec("deepspeed"): # check if deepspeed is installed
29
37
  deepspeed_exists = True
@@ -31,38 +39,7 @@ if importlib.util.find_spec("deepspeed"): # check if deepspeed is installed
31
39
  UNMEASURED_MODELS = "UnmeasuredModels"
32
40
 
33
41
 
34
- class ModuleInfo:
35
- def __init__(self, type, patched_module, should_measure=True):
36
- self.type = type
37
- self.patched_module = patched_module
38
- self.should_measure = should_measure
39
-
40
-
41
- class ModuleConfig:
42
- def __init__(self, inputs=(None,), outputs=(None,), params=None):
43
- self.inputs = inputs
44
- self.outputs = outputs
45
- self.params = params if params is not None else {}
46
-
47
-
48
- class ModuleExtraConfig:
49
- def __init__(self, inputs=(None,), outputs=(None,), params=None, scale=None, config_params=None):
50
- self.inputs = inputs
51
- self.outputs = outputs
52
- self.params = params if params is not None else {}
53
- self.scale = scale
54
- self.config_params = config_params if config_params is not None else {}
55
-
56
-
57
- class ModuleType:
58
- def __init__(self, num_inputs, param_names, num_outputs, required_output):
59
- self.num_inputs = num_inputs
60
- self.param_names = param_names
61
- self.num_outputs = num_outputs
62
- self.required_output = required_output
63
-
64
-
65
- mod_types = {
42
+ _mod_types = {
66
43
  "linear": ModuleType(1, ["weight"], 1, False),
67
44
  "matmul": ModuleType(2, [], 1, False),
68
45
  "kv_cache": ModuleType(1, [], 1, False),
@@ -110,7 +87,7 @@ def save_file(model, d, source_format, fname, mode):
110
87
  config = get_hqt_config(model)
111
88
  logger.debug("Saving %s file: %s", mode, fname)
112
89
  ext = os.path.splitext(fname)[1]
113
- target_format = file_functions[ext]["format"]
90
+ target_format = file_functions[ext]['format']
114
91
  dc = rec_fn(d, format_functions[(source_format, target_format)])
115
92
  df = {
116
93
  "GlobalRank": config.cfg["global_rank"],
@@ -119,7 +96,7 @@ def save_file(model, d, source_format, fname, mode):
119
96
  "Nodes": dc,
120
97
  }
121
98
  try:
122
- file_functions[ext]["save"](df, fname)
99
+ file_functions[ext]['save'](df, fname)
123
100
  except:
124
101
  pass
125
102
 
@@ -127,10 +104,10 @@ def save_file(model, d, source_format, fname, mode):
127
104
  def load_file(fname, target_format, fail_on_file_not_exist):
128
105
  logger.debug("Loading file: %s", fname)
129
106
  ext = os.path.splitext(fname)[1]
130
- source_format = file_functions[ext]["format"]
107
+ source_format = file_functions[ext]['format']
131
108
  d = {}
132
109
  if os.path.isfile(fname):
133
- d = file_functions[ext]["load"](fname)
110
+ d = file_functions[ext]['load'](fname)
134
111
  elif fail_on_file_not_exist:
135
112
  raise FileNotFoundError(f"Failed to load file {fname}")
136
113
  if "Nodes" in d:
@@ -190,17 +167,17 @@ def load_scales(fname, target_format):
190
167
  return d
191
168
 
192
169
 
193
- def convert_scales_to_tensors_dict(scales_obj, scales_file_format, hp_dtype):
170
+ def convert_scales_to_tensors_dict(scales_obj, scales_file_format, hp_dtype, device="hpu"):
194
171
  scales_temp = {k: scales_obj[k].__dict__ for k in scales_obj}
195
172
  scales_temp = format_functions_rec((scales_file_format, torch.Tensor))(scales_temp)
196
- scales_temp = rec_fn(scales_temp, lambda x: x.to(dtype=hp_dtype, device="hpu"))
173
+ scales_temp = rec_fn(scales_temp, lambda x: x.to(dtype=hp_dtype, device=device))
197
174
  scales = {k: ModuleConfig(**scales_temp[k]) for k in scales_temp}
198
175
  return scales
199
176
 
200
177
 
201
178
  file_functions = {
202
- ".json": {"format": list, "save": save_json, "load": load_json},
203
- ".npz": {"format": np.ndarray, "save": save_npz, "load": load_npz},
179
+ ".json": {'format': list, 'save': save_json, 'load': load_json},
180
+ ".npz": {'format': np.ndarray, 'save': save_npz, 'load': load_npz}
204
181
  }
205
182
 
206
183
  format_functions = {
@@ -219,7 +196,7 @@ format_functions = {
219
196
 
220
197
  format_functions_rec = lambda k: functools.partial(rec_fn, fn=format_functions[k])
221
198
 
222
- mod_default_dict = {
199
+ _mod_default_dict = {
223
200
  "Matmul": ModuleInfo("matmul", PatchedMatmul),
224
201
  "Linear": ModuleInfo("linear", PatchedLinear),
225
202
  "RowParallelLinear": ModuleInfo("linear", PatchedRowParallelLinear),
@@ -241,7 +218,7 @@ mod_default_dict = {
241
218
 
242
219
 
243
220
  if deepspeed_exists:
244
- mod_default_dict.update(
221
+ _mod_default_dict.update(
245
222
  {
246
223
  "LinearLayer": ModuleInfo("linear", PatchedLinear),
247
224
  "LinearAllreduce": ModuleInfo("linear", PatchedLinearAllReduce),
@@ -250,6 +227,25 @@ if deepspeed_exists:
250
227
  }
251
228
  )
252
229
 
230
+ @functools.lru_cache(maxsize=None)
231
+ def _import_hpu_modules():
232
+ from neural_compressor.torch.algorithms.fp8_quant.patched_module_base import (
233
+ PATCHED_MODULE_TABLE, PATCHED_MODULE_TYPES_TABLE
234
+ )
235
+ cur_accelerator = auto_detect_accelerator()
236
+ if not cur_accelerator.current_device_name().startswith("hpu"):
237
+ return
238
+ PATCHED_MODULE_TABLE["hpu"].update(_mod_default_dict)
239
+ PATCHED_MODULE_TYPES_TABLE["hpu"].update(_mod_types)
240
+
241
+
242
+ _import_hpu_modules()
243
+
244
+ mod_default_dict = get_patched_module_table()
245
+ mod_types = get_patched_module_type_table()
246
+
247
+ def get_white_list():
248
+ return list(mod_default_dict.keys())
253
249
 
254
250
  class ModInstInfo:
255
251
  def __init__(self, name, parent):
@@ -267,3 +263,7 @@ def generate_model_info(model):
267
263
  create_mod_info_recursion(mod)
268
264
 
269
265
  create_mod_info_recursion(model)
266
+
267
+ def get_device_type_for_scales(mod):
268
+ config = get_hqt_config(mod).cfg
269
+ return config["device_for_scales"]
@@ -12,12 +12,13 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import torch
15
16
  import habana_frameworks.torch.core as htcore
16
17
  import habana_frameworks.torch.utils.experimental as htexp
17
- import torch
18
-
19
18
  from .common import ModuleConfig
20
- from .quant_dequant import cast_fcn, cast_to_fp8_fcn, descale_fcn, scale_fcn
19
+ from .quant_dequant import cast_to_fp8_fcn, cast_fcn, descale_fcn, scale_fcn
20
+ from neural_compressor.torch.utils.auto_accelerator import auto_detect_accelerator
21
+ cur_accelerator = auto_detect_accelerator()
21
22
 
22
23
  GAUDI2 = htexp.synDeviceType.synDeviceGaudi2
23
24
  GAUDI3 = htexp.synDeviceType.synDeviceGaudi3
@@ -116,9 +117,9 @@ def scale_to_pow2(scale):
116
117
  # for Gaudi2 the range is 16^-2..16^1 so we change 2 with 16 and remember that:
117
118
  # 16 = 2^4, log16(m)=log2(m)/log2(16)=log2(m)/4, and we get:
118
119
  # we choose s=16^ciel(log16(m))=2^4^ciel(log2(m)/4)=2^(4*ciel(log2(m)/4))=2^(ciel(log2(m)/4)*4)
119
- def scale_to_pow2_hw(scale, device_type):
120
+ def scale_to_pow2_hw(scale, device_for_scales):
120
121
  scale_pow2 = scale_to_pow2(scale)
121
- min_scale, max_scale, scale_factor = FP8_143_SCALES_TRAITS[device_type]
122
+ min_scale, max_scale, scale_factor = FP8_143_SCALES_TRAITS[device_for_scales]
122
123
  scale_pow2_hw = torch.minimum(
123
124
  torch.maximum(
124
125
  2 ** (torch.ceil(torch.log2(scale_pow2) / scale_factor) * scale_factor),
@@ -142,13 +143,13 @@ def mmse_scale_multi(x, ref_scale, scales, lp_dtype, hp_dtype):
142
143
  xscales = rs * sv
143
144
  y = scale_fcn(x, xscales)
144
145
  y = cast_to_fp8_fcn(y, lp_dtype)
145
- htcore.mark_step() # we are measuring the error so we want to avoid fusion of the converts
146
+ cur_accelerator.synchronize() # we are measuring the error so we want to avoid fusion of the converts
146
147
  y = cast_fcn(y, hp_dtype)
147
148
  y = descale_fcn(y, xscales)
148
149
  err = torch.sum((x - y) ** 2, dim=sum_axis)
149
150
  opt_scale = torch.where(err < opt_err, sv, opt_scale)
150
151
  opt_err = torch.where(err < opt_err, err, opt_err)
151
- htcore.mark_step()
152
+ cur_accelerator.synchronize()
152
153
  return opt_scale * ref_scale
153
154
 
154
155
 
@@ -160,13 +161,13 @@ def mmse_scale(x, scales, lp_dtype, hp_dtype):
160
161
  for s in scales:
161
162
  y = scale_fcn(x, s)
162
163
  y = cast_to_fp8_fcn(y, lp_dtype)
163
- htcore.mark_step() # we are measuring the error so we want to avoid fusion of the converts
164
+ cur_accelerator.synchronize() # we are measuring the error so we want to avoid fusion of the converts
164
165
  y = cast_fcn(y, hp_dtype)
165
166
  y = descale_fcn(y, s)
166
167
  err = torch.norm(x - y)
167
168
  opt_scale = torch.where(err <= opt_err, s, opt_scale)
168
169
  opt_err = torch.where(err <= opt_err, err, opt_err)
169
- htcore.mark_step()
170
+ cur_accelerator.synchronize()
170
171
  return opt_scale
171
172
 
172
173