neural-compressor 3.2__tar.gz → 3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (603) hide show
  1. {neural_compressor-3.2 → neural_compressor-3.3}/PKG-INFO +35 -50
  2. {neural_compressor-3.2 → neural_compressor-3.3}/README.md +21 -48
  3. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/adaptor.py +1 -1
  4. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/mxnet.py +1 -1
  5. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/onnxrt.py +2 -2
  6. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_util.py +1 -1
  7. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/gptq.py +6 -2
  8. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/hawq_metric.py +0 -3
  9. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/layer_wise_quant/utils.py +2 -2
  10. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/common/base_config.py +7 -1
  11. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/common/version.py +1 -1
  12. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/datasets/dummy_dataset_v2.py +1 -1
  13. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/lm_eval/accuracy.py +12 -1
  14. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/lm_eval/utils.py +17 -0
  15. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_util.py +1 -1
  16. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/_core/common.py +6 -88
  17. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/_core/fp_utils.py +27 -38
  18. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/_core/measure.py +5 -2
  19. neural_compressor-3.3/neural_compressor/torch/algorithms/fp8_quant/_core/patching_common.py +113 -0
  20. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/_core/quant_dequant.py +98 -37
  21. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/_core/quantize.py +45 -34
  22. neural_compressor-3.3/neural_compressor/torch/algorithms/fp8_quant/_core/quantized_hpu_ops.py +196 -0
  23. neural_compressor-3.3/neural_compressor/torch/algorithms/fp8_quant/_core/scale.py +196 -0
  24. neural_compressor-3.3/neural_compressor/torch/algorithms/fp8_quant/_core/scale_handler.py +155 -0
  25. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/__init__.py +4 -4
  26. neural_compressor-3.3/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/ops_quantizer.py +311 -0
  27. neural_compressor-3.3/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/round_scales_function.py +65 -0
  28. neural_compressor-3.3/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/scale_method_factory.py +194 -0
  29. neural_compressor-3.3/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/scales_method.py +246 -0
  30. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/_core/utils.py +37 -16
  31. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/_quant_common/helper_modules.py +448 -318
  32. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/_quant_common/quant_config.py +18 -11
  33. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/common.py +1 -1
  34. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/model_configs.py +1 -4
  35. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/patched_module_base.py +47 -26
  36. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/quantizer.py +6 -0
  37. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/save_load.py +371 -50
  38. neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/scripts/fix_measurements.py → neural_compressor-3.3/neural_compressor/torch/algorithms/fp8_quant/scripts/postprocessing_vllm_measurements.py +60 -28
  39. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/utils/patched_module_restore_registry.py +9 -0
  40. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/pt2e_quant/core.py +3 -1
  41. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/pt2e_quant/half_precision_rewriter.py +46 -20
  42. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/pt2e_quant/utility.py +2 -1
  43. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/autoround.py +6 -4
  44. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/gptq.py +2 -1
  45. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/save_load.py +116 -28
  46. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/quantization/autotune.py +12 -3
  47. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/quantization/config.py +17 -3
  48. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/quantization/save_load_entry.py +21 -11
  49. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/utils/constants.py +1 -0
  50. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/utils/environ.py +8 -0
  51. neural_compressor-3.3/neural_compressor/torch/utils/llm_utility.py +126 -0
  52. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/utils/utility.py +14 -1
  53. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/transformers/__init__.py +1 -0
  54. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/transformers/models/__init__.py +8 -1
  55. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/transformers/models/modeling_auto.py +46 -17
  56. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/transformers/quantization/utils.py +110 -15
  57. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/transformers/utils/quantization_config.py +13 -0
  58. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/version.py +1 -1
  59. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor.egg-info/PKG-INFO +35 -50
  60. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor.egg-info/SOURCES.txt +8 -4
  61. neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/_core/scale.py +0 -500
  62. neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/_core/scale_handler.py +0 -50
  63. neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/max_abs.py +0 -553
  64. neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/single_scale.py +0 -96
  65. neural_compressor-3.2/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/smooth_quant.py +0 -131
  66. {neural_compressor-3.2 → neural_compressor-3.3}/LICENSE +0 -0
  67. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/__init__.py +0 -0
  68. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/__init__.py +0 -0
  69. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/keras.py +0 -0
  70. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/keras.yaml +0 -0
  71. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/keras_utils/__init__.py +0 -0
  72. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/keras_utils/conv2d.py +0 -0
  73. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/keras_utils/dense.py +0 -0
  74. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/keras_utils/depthwise_conv2d.py +0 -0
  75. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/keras_utils/pool2d.py +0 -0
  76. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/keras_utils/quantizer.py +0 -0
  77. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/keras_utils/separable_conv2d.py +0 -0
  78. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/mxnet.yaml +0 -0
  79. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/mxnet_utils/__init__.py +0 -0
  80. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/mxnet_utils/util.py +0 -0
  81. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/onnxrt.yaml +0 -0
  82. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/onnxrt_cuda.yaml +0 -0
  83. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/onnxrt_dml.yaml +0 -0
  84. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/onnxrt_dnnl.yaml +0 -0
  85. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/onnxrt_trt.yaml +0 -0
  86. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/__init__.py +0 -0
  87. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/calibration.py +0 -0
  88. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/calibrator.py +0 -0
  89. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/__init__.py +0 -0
  90. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/activation.py +0 -0
  91. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/argmax.py +0 -0
  92. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/attention.py +0 -0
  93. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/binary_op.py +0 -0
  94. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/concat.py +0 -0
  95. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/conv.py +0 -0
  96. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/direct_q8.py +0 -0
  97. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/embed_layernorm.py +0 -0
  98. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/gather.py +0 -0
  99. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/gavgpool.py +0 -0
  100. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/gemm.py +0 -0
  101. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/lstm.py +0 -0
  102. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/matmul.py +0 -0
  103. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/maxpool.py +0 -0
  104. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/norm.py +0 -0
  105. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/ops.py +0 -0
  106. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/pad.py +0 -0
  107. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/pooling.py +0 -0
  108. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/reduce.py +0 -0
  109. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/resize.py +0 -0
  110. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/split.py +0 -0
  111. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/operators/unary_op.py +0 -0
  112. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/quantizer.py +0 -0
  113. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/smooth_quant.py +0 -0
  114. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/util.py +0 -0
  115. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/ox_utils/weight_only.py +0 -0
  116. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/pytorch.py +0 -0
  117. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/pytorch_cpu.yaml +0 -0
  118. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/pytorch_gpu.yaml +0 -0
  119. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/pytorch_ipex.yaml +0 -0
  120. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/query.py +0 -0
  121. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tensorflow.py +0 -0
  122. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tensorflow.yaml +0 -0
  123. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tensorflow_itex.yaml +0 -0
  124. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/__init__.py +0 -0
  125. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_converter.py +0 -0
  126. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_converter_without_calib.py +0 -0
  127. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/__init__.py +0 -0
  128. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/__init__.py +0 -0
  129. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/bf16_convert.py +0 -0
  130. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/dequantize_cast_optimizer.py +0 -0
  131. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/__init__.py +0 -0
  132. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_add_to_biasadd.py +0 -0
  133. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_layout.py +0 -0
  134. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_leakyrelu.py +0 -0
  135. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_nan_to_random.py +0 -0
  136. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_placeholder_to_const.py +0 -0
  137. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dilated_contraction.py +0 -0
  138. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dummy_biasadd.py +0 -0
  139. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/expanddims_optimizer.py +0 -0
  140. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fetch_weight_from_reshape.py +0 -0
  141. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fold_batch_norm.py +0 -0
  142. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fold_constant.py +0 -0
  143. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_biasadd_add.py +0 -0
  144. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_column_wise_mul.py +0 -0
  145. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_conv_with_math.py +0 -0
  146. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_decomposed_bn.py +0 -0
  147. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_decomposed_in.py +0 -0
  148. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_gelu.py +0 -0
  149. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_layer_norm.py +0 -0
  150. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_conv.py +0 -0
  151. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_fp32_conv.py +0 -0
  152. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_reshape_transpose.py +0 -0
  153. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/graph_cse_optimizer.py +0 -0
  154. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/grappler_pass.py +0 -0
  155. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/insert_print_node.py +0 -0
  156. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/move_squeeze_after_relu.py +0 -0
  157. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py +0 -0
  158. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/remove_training_nodes.py +0 -0
  159. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/rename_batch_norm.py +0 -0
  160. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/split_shared_input.py +0 -0
  161. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/strip_equivalent_nodes.py +0 -0
  162. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/strip_unused_nodes.py +0 -0
  163. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/switch_optimizer.py +0 -0
  164. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/graph_base.py +0 -0
  165. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/__init__.py +0 -0
  166. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_fake_quant.py +0 -0
  167. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_value.py +0 -0
  168. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_value_without_calib.py +0 -0
  169. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_redundant_dequantize.py +0 -0
  170. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_requantize.py +0 -0
  171. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_redundant_dequantize.py +0 -0
  172. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_requantize.py +0 -0
  173. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/meta_op_optimizer.py +0 -0
  174. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/post_hostconst_converter.py +0 -0
  175. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/post_quantized_op_cse.py +0 -0
  176. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/rnn_convert.py +0 -0
  177. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/scale_propagation.py +0 -0
  178. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/__init__.py +0 -0
  179. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py +0 -0
  180. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_node.py +0 -0
  181. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_schema.py +0 -0
  182. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py +0 -0
  183. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/__init__.py +0 -0
  184. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/insert_qdq_pattern.py +0 -0
  185. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/merge_duplicated_qdq.py +0 -0
  186. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/share_qdq_y_pattern.py +0 -0
  187. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/__init__.py +0 -0
  188. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/__init__.py +0 -0
  189. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/fake_quantize.py +0 -0
  190. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_config.py +0 -0
  191. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_helper.py +0 -0
  192. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/__init__.py +0 -0
  193. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/optimize_layer.py +0 -0
  194. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_add.py +0 -0
  195. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_base.py +0 -0
  196. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_bn.py +0 -0
  197. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_wrapper.py +0 -0
  198. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/__init__.py +0 -0
  199. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_bn.py +0 -0
  200. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_concatv2.py +0 -0
  201. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_conv.py +0 -0
  202. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_deconv.py +0 -0
  203. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_in.py +0 -0
  204. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py +0 -0
  205. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_pooling.py +0 -0
  206. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/optimize_qdq.py +0 -0
  207. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_base.py +0 -0
  208. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_bn.py +0 -0
  209. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_concatv2.py +0 -0
  210. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_conv.py +0 -0
  211. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_for_intel_cpu.py +0 -0
  212. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_matmul.py +0 -0
  213. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_pooling.py +0 -0
  214. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/quantize_graph_common.py +0 -0
  215. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/smooth_quant_calibration.py +0 -0
  216. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/smooth_quant_scaler.py +0 -0
  217. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py +0 -0
  218. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/transform_graph/__init__.py +0 -0
  219. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/transform_graph/bias_correction.py +0 -0
  220. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/transform_graph/graph_transform_base.py +0 -0
  221. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/transform_graph/insert_logging.py +0 -0
  222. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/transform_graph/rerange_quantized_concat.py +0 -0
  223. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/tf_utils/util.py +0 -0
  224. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/__init__.py +0 -0
  225. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/auto_round.py +0 -0
  226. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/awq.py +0 -0
  227. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/bf16_convert.py +0 -0
  228. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/layer_wise_quant/__init__.py +0 -0
  229. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/layer_wise_quant/modified_pickle.py +0 -0
  230. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/layer_wise_quant/quantize.py +0 -0
  231. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/layer_wise_quant/torch_load.py +0 -0
  232. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/mixed_precision.py +0 -0
  233. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/model_wrapper.py +0 -0
  234. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/pattern_detector.py +0 -0
  235. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/symbolic_trace.py +0 -0
  236. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/teq.py +0 -0
  237. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/util.py +0 -0
  238. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/waq/__init__.py +0 -0
  239. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/waq/auto_alpha.py +0 -0
  240. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/waq/calibration.py +0 -0
  241. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/waq/graph_trace.py +0 -0
  242. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/waq/smooth_quant.py +0 -0
  243. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/waq/utils.py +0 -0
  244. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/adaptor/torch_utils/weight_only.py +0 -0
  245. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/algorithm/__init__.py +0 -0
  246. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/algorithm/algorithm.py +0 -0
  247. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/algorithm/fast_bias_correction.py +0 -0
  248. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/algorithm/smooth_quant.py +0 -0
  249. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/algorithm/weight_correction.py +0 -0
  250. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/benchmark.py +0 -0
  251. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/common/__init__.py +0 -0
  252. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/common/base_tuning.py +0 -0
  253. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/common/benchmark.py +0 -0
  254. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/common/tuning_param.py +0 -0
  255. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/common/utils/__init__.py +0 -0
  256. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/common/utils/constants.py +0 -0
  257. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/common/utils/logger.py +0 -0
  258. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/common/utils/save_load.py +0 -0
  259. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/common/utils/utility.py +0 -0
  260. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/__init__.py +0 -0
  261. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/callbacks.py +0 -0
  262. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/distillation/__init__.py +0 -0
  263. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/distillation/criterions.py +0 -0
  264. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/distillation/optimizers.py +0 -0
  265. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/distillation/utility.py +0 -0
  266. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/hpo/__init__.py +0 -0
  267. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/hpo/sa_optimizer.py +0 -0
  268. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/hpo/search_algorithms.py +0 -0
  269. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/hpo/search_space.py +0 -0
  270. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/__init__.py +0 -0
  271. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/criteria.py +0 -0
  272. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/dsnot.py +0 -0
  273. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/model_slim/__init__.py +0 -0
  274. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/model_slim/auto_slim.py +0 -0
  275. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/model_slim/pattern_analyzer.py +0 -0
  276. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/model_slim/weight_slim.py +0 -0
  277. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/patterns/__init__.py +0 -0
  278. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/patterns/base.py +0 -0
  279. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/patterns/mha.py +0 -0
  280. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/patterns/ninm.py +0 -0
  281. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/patterns/nxm.py +0 -0
  282. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/pruners/__init__.py +0 -0
  283. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/pruners/base.py +0 -0
  284. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/pruners/basic.py +0 -0
  285. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/pruners/block_mask.py +0 -0
  286. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/pruners/mha.py +0 -0
  287. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/pruners/pattern_lock.py +0 -0
  288. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/pruners/progressive.py +0 -0
  289. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/pruners/retrain_free.py +0 -0
  290. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/pruners/sparse_gpt.py +0 -0
  291. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/pruning.py +0 -0
  292. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/regs.py +0 -0
  293. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/schedulers.py +0 -0
  294. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/tf_criteria.py +0 -0
  295. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/utils.py +0 -0
  296. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/wanda/__init__.py +0 -0
  297. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/wanda/prune.py +0 -0
  298. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/wanda/utils.py +0 -0
  299. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/compression/pruner/wanda/wrapper.py +0 -0
  300. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/config.py +0 -0
  301. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/contrib/__init__.py +0 -0
  302. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/contrib/strategy/__init__.py +0 -0
  303. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/contrib/strategy/sigopt.py +0 -0
  304. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/contrib/strategy/tpe.py +0 -0
  305. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/__init__.py +0 -0
  306. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/dataloaders/__init__.py +0 -0
  307. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/dataloaders/base_dataloader.py +0 -0
  308. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/dataloaders/dataloader.py +0 -0
  309. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/dataloaders/default_dataloader.py +0 -0
  310. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/dataloaders/fetcher.py +0 -0
  311. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/dataloaders/mxnet_dataloader.py +0 -0
  312. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/dataloaders/onnxrt_dataloader.py +0 -0
  313. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/dataloaders/pytorch_dataloader.py +0 -0
  314. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/dataloaders/sampler.py +0 -0
  315. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/dataloaders/tensorflow_dataloader.py +0 -0
  316. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/datasets/__init__.py +0 -0
  317. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/datasets/bert_dataset.py +0 -0
  318. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/datasets/coco_dataset.py +0 -0
  319. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/datasets/dataset.py +0 -0
  320. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/datasets/dummy_dataset.py +0 -0
  321. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/datasets/imagenet_dataset.py +0 -0
  322. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/datasets/style_transfer_dataset.py +0 -0
  323. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/filters/__init__.py +0 -0
  324. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/filters/coco_filter.py +0 -0
  325. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/filters/filter.py +0 -0
  326. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/transforms/__init__.py +0 -0
  327. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/transforms/coco_transform.py +0 -0
  328. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/transforms/imagenet_transform.py +0 -0
  329. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/transforms/postprocess.py +0 -0
  330. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/transforms/tokenization.py +0 -0
  331. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/data/transforms/transform.py +0 -0
  332. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/__init__.py +0 -0
  333. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/bigcode_eval/__init__.py +0 -0
  334. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/bigcode_eval/evaluator.py +0 -0
  335. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/hf_eval/__init__.py +0 -0
  336. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/hf_eval/evaluator.py +0 -0
  337. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/hf_eval/hf_datasets/__init__.py +0 -0
  338. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/hf_eval/hf_datasets/cnn_dailymail.py +0 -0
  339. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/lm_eval/__init__.py +0 -0
  340. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/lm_eval/models/__init__.py +0 -0
  341. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/evaluation/lm_eval/models/huggingface.py +0 -0
  342. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/metric/__init__.py +0 -0
  343. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/metric/bleu.py +0 -0
  344. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/metric/bleu_util.py +0 -0
  345. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/metric/coco_label_map.py +0 -0
  346. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/metric/coco_tools.py +0 -0
  347. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/metric/evaluate_squad.py +0 -0
  348. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/metric/f1.py +0 -0
  349. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/metric/metric.py +0 -0
  350. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/mix_precision.py +0 -0
  351. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/model/__init__.py +0 -0
  352. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/model/base_model.py +0 -0
  353. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/model/keras_model.py +0 -0
  354. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/model/model.py +0 -0
  355. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/model/mxnet_model.py +0 -0
  356. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/model/nets_factory.py +0 -0
  357. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/model/onnx_model.py +0 -0
  358. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/model/tensorflow_model.py +0 -0
  359. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/model/torch_model.py +0 -0
  360. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/objective.py +0 -0
  361. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/__init__.py +0 -0
  362. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/parser/__init__.py +0 -0
  363. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/parser/factory.py +0 -0
  364. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/parser/onnx_parser/__init__.py +0 -0
  365. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/parser/onnx_parser/factory.py +0 -0
  366. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/parser/onnx_parser/parser.py +0 -0
  367. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/parser/parser.py +0 -0
  368. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/parser/result.py +0 -0
  369. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/parser/tensorflow_parser/__init__.py +0 -0
  370. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/parser/tensorflow_parser/factory.py +0 -0
  371. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/parser/tensorflow_parser/parser.py +0 -0
  372. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/profiler/__init__.py +0 -0
  373. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/profiler/factory.py +0 -0
  374. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/profiler/onnxrt_profiler/__init__.py +0 -0
  375. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/profiler/onnxrt_profiler/factory.py +0 -0
  376. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/profiler/onnxrt_profiler/profiler.py +0 -0
  377. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/profiler/onnxrt_profiler/utils.py +0 -0
  378. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/profiler/profiler.py +0 -0
  379. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/profiler/tensorflow_profiler/__init__.py +0 -0
  380. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/profiler/tensorflow_profiler/factory.py +0 -0
  381. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/profiler/tensorflow_profiler/profiler.py +0 -0
  382. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/profiling/profiler/tensorflow_profiler/utils.py +0 -0
  383. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/quantization.py +0 -0
  384. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/__init__.py +0 -0
  385. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/auto.py +0 -0
  386. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/auto_mixed_precision.py +0 -0
  387. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/basic.py +0 -0
  388. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/bayesian.py +0 -0
  389. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/conservative.py +0 -0
  390. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/exhaustive.py +0 -0
  391. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/hawq_v2.py +0 -0
  392. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/mse.py +0 -0
  393. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/mse_v2.py +0 -0
  394. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/random.py +0 -0
  395. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/strategy.py +0 -0
  396. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/utils/__init__.py +0 -0
  397. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/utils/constant.py +0 -0
  398. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/utils/tuning_sampler.py +0 -0
  399. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/utils/tuning_space.py +0 -0
  400. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/utils/tuning_structs.py +0 -0
  401. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/strategy/utils/utility.py +0 -0
  402. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/__init__.py +0 -0
  403. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/algorithms/__init__.py +0 -0
  404. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/algorithms/smoother/__init__.py +0 -0
  405. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/algorithms/smoother/calibration.py +0 -0
  406. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/algorithms/smoother/core.py +0 -0
  407. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/algorithms/smoother/scaler.py +0 -0
  408. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/algorithms/static_quant/__init__.py +0 -0
  409. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/algorithms/static_quant/keras.py +0 -0
  410. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/algorithms/static_quant/keras.yaml +0 -0
  411. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py +0 -0
  412. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.yaml +0 -0
  413. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/algorithms/static_quant/tensorflow_itex.yaml +0 -0
  414. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/keras/__init__.py +0 -0
  415. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/keras/layers/__init__.py +0 -0
  416. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/keras/layers/conv2d.py +0 -0
  417. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/keras/layers/dense.py +0 -0
  418. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/keras/layers/depthwise_conv2d.py +0 -0
  419. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/keras/layers/layer_initializer.py +0 -0
  420. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/keras/layers/pool2d.py +0 -0
  421. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/keras/layers/separable_conv2d.py +0 -0
  422. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/keras/quantization/__init__.py +0 -0
  423. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/keras/quantization/config.py +0 -0
  424. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/__init__.py +0 -0
  425. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/algorithm_entry.py +0 -0
  426. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/autotune.py +0 -0
  427. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/config.py +0 -0
  428. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/quantize.py +0 -0
  429. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/__init__.py +0 -0
  430. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_converter.py +0 -0
  431. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/__init__.py +0 -0
  432. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/bf16/__init__.py +0 -0
  433. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/bf16/bf16_convert.py +0 -0
  434. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/bf16/dequantize_cast_optimizer.py +0 -0
  435. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/__init__.py +0 -0
  436. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/convert_add_to_biasadd.py +0 -0
  437. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/convert_layout.py +0 -0
  438. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/convert_leakyrelu.py +0 -0
  439. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/convert_nan_to_random.py +0 -0
  440. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/convert_placeholder_to_const.py +0 -0
  441. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/dilated_contraction.py +0 -0
  442. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/dummy_biasadd.py +0 -0
  443. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/expanddims_optimizer.py +0 -0
  444. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fetch_weight_from_reshape.py +0 -0
  445. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fold_batch_norm.py +0 -0
  446. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fold_constant.py +0 -0
  447. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_biasadd_add.py +0 -0
  448. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_column_wise_mul.py +0 -0
  449. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_conv_with_math.py +0 -0
  450. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_decomposed_bn.py +0 -0
  451. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_decomposed_in.py +0 -0
  452. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_gelu.py +0 -0
  453. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_layer_norm.py +0 -0
  454. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_pad_with_conv.py +0 -0
  455. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_pad_with_fp32_conv.py +0 -0
  456. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_reshape_transpose.py +0 -0
  457. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/graph_cse_optimizer.py +0 -0
  458. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/grappler_pass.py +0 -0
  459. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/insert_print_node.py +0 -0
  460. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/move_squeeze_after_relu.py +0 -0
  461. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/pre_optimize.py +0 -0
  462. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/remove_training_nodes.py +0 -0
  463. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/rename_batch_norm.py +0 -0
  464. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/split_shared_input.py +0 -0
  465. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/strip_equivalent_nodes.py +0 -0
  466. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/strip_unused_nodes.py +0 -0
  467. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/switch_optimizer.py +0 -0
  468. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/graph_base.py +0 -0
  469. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/__init__.py +0 -0
  470. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/freeze_fake_quant.py +0 -0
  471. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/freeze_value.py +0 -0
  472. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/fuse_conv_redundant_dequantize.py +0 -0
  473. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/fuse_conv_requantize.py +0 -0
  474. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/fuse_matmul_redundant_dequantize.py +0 -0
  475. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/fuse_matmul_requantize.py +0 -0
  476. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/meta_op_optimizer.py +0 -0
  477. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/post_hostconst_converter.py +0 -0
  478. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/post_quantized_op_cse.py +0 -0
  479. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/scale_propagation.py +0 -0
  480. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/qdq/__init__.py +0 -0
  481. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/qdq/insert_qdq_pattern.py +0 -0
  482. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/qdq/merge_duplicated_qdq.py +0 -0
  483. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/qdq/share_qdq_y_pattern.py +0 -0
  484. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/__init__.py +0 -0
  485. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/__init__.py +0 -0
  486. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_bn.py +0 -0
  487. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_concatv2.py +0 -0
  488. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_conv.py +0 -0
  489. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_deconv.py +0 -0
  490. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_in.py +0 -0
  491. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_matmul.py +0 -0
  492. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_pooling.py +0 -0
  493. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/optimize_qdq.py +0 -0
  494. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_base.py +0 -0
  495. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_bn.py +0 -0
  496. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_concatv2.py +0 -0
  497. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_conv.py +0 -0
  498. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_for_intel_cpu.py +0 -0
  499. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_matmul.py +0 -0
  500. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_pooling.py +0 -0
  501. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/quantize_graph_common.py +0 -0
  502. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/transform_graph/__init__.py +0 -0
  503. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/transform_graph/bias_correction.py +0 -0
  504. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/transform_graph/graph_transform_base.py +0 -0
  505. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/transform_graph/insert_logging.py +0 -0
  506. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/transform_graph/rerange_quantized_concat.py +0 -0
  507. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/quantization/utils/utility.py +0 -0
  508. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/utils/__init__.py +0 -0
  509. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/utils/constants.py +0 -0
  510. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/utils/data.py +0 -0
  511. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/utils/model.py +0 -0
  512. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/utils/model_wrappers.py +0 -0
  513. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/tensorflow/utils/utility.py +0 -0
  514. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/__init__.py +0 -0
  515. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/__init__.py +0 -0
  516. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/base_algorithm.py +0 -0
  517. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/__init__.py +0 -0
  518. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/_core/__init__.py +0 -0
  519. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/_quant_common/__init__.py +0 -0
  520. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/custom_config/__init__.py +0 -0
  521. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/observer.py +0 -0
  522. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/prepare_quant/__init__.py +0 -0
  523. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/prepare_quant/prepare_model.py +0 -0
  524. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/scaling_method_base.py +0 -0
  525. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/scripts/__init__.py +0 -0
  526. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/utils/__init__.py +0 -0
  527. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/fp8_quant/utils/logger.py +0 -0
  528. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/layer_wise/__init__.py +0 -0
  529. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/layer_wise/load.py +0 -0
  530. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/layer_wise/modified_pickle.py +0 -0
  531. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/layer_wise/utils.py +0 -0
  532. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/mixed_low_precision/__init__.py +0 -0
  533. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/mixed_low_precision/custom_methods/__init__.py +0 -0
  534. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/mixed_low_precision/custom_methods/gptq.py +0 -0
  535. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/mixed_low_precision/custom_methods/quarot.py +0 -0
  536. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/mixed_low_precision/custom_methods/quarot_utils.py +0 -0
  537. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/mixed_precision/__init__.py +0 -0
  538. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/mixed_precision/half_precision_convert.py +0 -0
  539. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/mixed_precision/module_wrappers.py +0 -0
  540. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/mx_quant/__init__.py +0 -0
  541. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/mx_quant/mx.py +0 -0
  542. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/mx_quant/utils.py +0 -0
  543. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/pt2e_quant/__init__.py +0 -0
  544. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/pt2e_quant/save_load.py +0 -0
  545. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/smooth_quant/__init__.py +0 -0
  546. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/smooth_quant/save_load.py +0 -0
  547. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/smooth_quant/smooth_quant.py +0 -0
  548. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/smooth_quant/utility.py +0 -0
  549. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/static_quant/__init__.py +0 -0
  550. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/static_quant/save_load.py +0 -0
  551. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/static_quant/static_quant.py +0 -0
  552. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/static_quant/utility.py +0 -0
  553. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/__init__.py +0 -0
  554. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/awq.py +0 -0
  555. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/hqq/__init__.py +0 -0
  556. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/hqq/bitpack.py +0 -0
  557. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/hqq/config.py +0 -0
  558. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/hqq/core.py +0 -0
  559. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/hqq/optimizer.py +0 -0
  560. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/hqq/qtensor.py +0 -0
  561. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/hqq/quantizer.py +0 -0
  562. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/modules.py +0 -0
  563. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/rtn.py +0 -0
  564. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/teq.py +0 -0
  565. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/algorithms/weight_only/utility.py +0 -0
  566. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/export/__init__.py +0 -0
  567. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/export/pt2e_export.py +0 -0
  568. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/quantization/__init__.py +0 -0
  569. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/quantization/algorithm_entry.py +0 -0
  570. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/quantization/quantize.py +0 -0
  571. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/utils/__init__.py +0 -0
  572. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/utils/auto_accelerator.py +0 -0
  573. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/utils/bit_packer.py +0 -0
  574. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/torch/utils/block_wise.py +0 -0
  575. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/training.py +0 -0
  576. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/transformers/generation/__init__.py +0 -0
  577. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/transformers/generation/beam_search.py +0 -0
  578. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/transformers/generation/greedy_search.py +0 -0
  579. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/transformers/quantization/__init__.py +0 -0
  580. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/transformers/utils/__init__.py +0 -0
  581. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/__init__.py +0 -0
  582. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/collect_layer_histogram.py +0 -0
  583. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/constant.py +0 -0
  584. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/create_obj_from_config.py +0 -0
  585. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/export/__init__.py +0 -0
  586. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/export/qlinear2qdq.py +0 -0
  587. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/export/tf2onnx.py +0 -0
  588. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/export/torch2onnx.py +0 -0
  589. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/kl_divergence.py +0 -0
  590. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/load_huggingface.py +0 -0
  591. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/logger.py +0 -0
  592. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/options.py +0 -0
  593. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/pytorch.py +0 -0
  594. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/utility.py +0 -0
  595. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor/utils/weights_details.py +0 -0
  596. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor.egg-info/dependency_links.txt +0 -0
  597. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor.egg-info/entry_points.txt +0 -0
  598. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor.egg-info/requires.txt +0 -0
  599. {neural_compressor-3.2 → neural_compressor-3.3}/neural_compressor.egg-info/top_level.txt +0 -0
  600. {neural_compressor-3.2 → neural_compressor-3.3}/pyproject.toml +0 -0
  601. {neural_compressor-3.2 → neural_compressor-3.3}/setup.cfg +0 -0
  602. {neural_compressor-3.2 → neural_compressor-3.3}/setup.py +0 -0
  603. {neural_compressor-3.2 → neural_compressor-3.3}/third-party-programs.txt +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: neural_compressor
3
- Version: 3.2
3
+ Version: 3.3
4
4
  Summary: Repository of Intel® Neural Compressor
5
5
  Home-page: https://github.com/intel/neural-compressor
6
6
  Author: Intel AIPT Team
@@ -43,6 +43,18 @@ Requires-Dist: py-cpuinfo; extra == "tf"
43
43
  Requires-Dist: pydantic; extra == "tf"
44
44
  Requires-Dist: pyyaml; extra == "tf"
45
45
  Requires-Dist: tensorflow; extra == "tf"
46
+ Dynamic: author
47
+ Dynamic: author-email
48
+ Dynamic: classifier
49
+ Dynamic: description
50
+ Dynamic: description-content-type
51
+ Dynamic: home-page
52
+ Dynamic: keywords
53
+ Dynamic: license
54
+ Dynamic: provides-extra
55
+ Dynamic: requires-dist
56
+ Dynamic: requires-python
57
+ Dynamic: summary
46
58
 
47
59
  <div align="center">
48
60
 
@@ -51,7 +63,7 @@ Intel® Neural Compressor
51
63
  <h3> An open-source Python library supporting popular model compression techniques on all mainstream deep learning frameworks (TensorFlow, PyTorch, and ONNX Runtime)</h3>
52
64
 
53
65
  [![python](https://img.shields.io/badge/python-3.8%2B-blue)](https://github.com/intel/neural-compressor)
54
- [![version](https://img.shields.io/badge/release-3.2-green)](https://github.com/intel/neural-compressor/releases)
66
+ [![version](https://img.shields.io/badge/release-3.3-green)](https://github.com/intel/neural-compressor/releases)
55
67
  [![license](https://img.shields.io/badge/license-Apache%202-blue)](https://github.com/intel/neural-compressor/blob/master/LICENSE)
56
68
  [![coverage](https://img.shields.io/badge/coverage-85%25-green)](https://github.com/intel/neural-compressor)
57
69
  [![Downloads](https://static.pepy.tech/personalized-badge/neural-compressor?period=total&units=international_system&left_color=grey&right_color=green&left_text=downloads)](https://pepy.tech/project/neural-compressor)
@@ -78,55 +90,33 @@ support AMD CPU, ARM CPU, and NVidia GPU through ONNX Runtime with limited testi
78
90
  * [2024/07] Performance optimizations and usability improvements on [client-side](./docs/source/3x/client_quant.md).
79
91
 
80
92
  ## Installation
93
+ Choose the necessary framework dependencies to install based on your deploy environment.
81
94
  ### Install Framework
82
- #### Install torch for CPU
83
- ```Shell
84
- pip install torch --index-url https://download.pytorch.org/whl/cpu
95
+ * [Install intel_extension_for_pytorch for CPU](https://intel.github.io/intel-extension-for-pytorch/cpu/latest/)
96
+ * [Install intel_extension_for_pytorch for XPU](https://intel.github.io/intel-extension-for-pytorch/xpu/latest/)
97
+ * [Use Docker Image with torch installed for HPU](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#bare-metal-fresh-os-single-click)
98
+ **Note**: There is a version mapping between Intel Neural Compressor and Gaudi Software Stack, please refer to this [table](./docs/source/3x/gaudi_version_map.md) and make sure to use a matched combination.
99
+ * [Install torch for other platform](https://pytorch.org/get-started/locally)
100
+ * [Install TensorFlow](https://www.tensorflow.org/install)
101
+
102
+ ### Install Neural Compressor from pypi
85
103
  ```
86
- #### Use Docker Image with torch installed for HPU
87
- https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#bare-metal-fresh-os-single-click
88
-
89
- > **Note**:
90
- > There is a version mapping between Intel Neural Compressor and Gaudi Software Stack, please refer to this [table](./docs/source/3x/gaudi_version_map.md) and make sure to use a matched combination.
91
-
92
- #### Install torch/intel_extension_for_pytorch for Intel GPU
93
- https://intel.github.io/intel-extension-for-pytorch/index.html#installation
94
-
95
- #### Install torch for other platform
96
- https://pytorch.org/get-started/locally
97
-
98
- #### Install tensorflow
99
- ```Shell
100
- pip install tensorflow
101
- ```
102
-
103
- ### Install from pypi
104
- ```Shell
105
104
  # Install 2.X API + Framework extension API + PyTorch dependency
106
105
  pip install neural-compressor[pt]
107
106
  # Install 2.X API + Framework extension API + TensorFlow dependency
108
107
  pip install neural-compressor[tf]
109
- ```
110
- > **Note**:
111
- > Further installation methods can be found under [Installation Guide](./docs/source/installation_guide.md). check out our [FAQ](./docs/source/faq.md) for more details.
108
+ ```
109
+ **Note**: Further installation methods can be found under [Installation Guide](./docs/source/installation_guide.md). check out our [FAQ](./docs/source/faq.md) for more details.
112
110
 
113
111
  ## Getting Started
112
+ After successfully installing these packages, try your first quantization program. **Following example code demonstrates FP8 Quantization**, it is supported by Intel Gaudi2 AI Accelerator.
113
+ To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, please refer to following script for environment setup. More details can be found in [Gaudi Guide](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#launch-docker-image-that-was-built).
114
114
 
115
- Setting up the environment:
116
- ```bash
117
- pip install "neural-compressor>=2.3" "transformers>=4.34.0" torch torchvision
115
+ Run a container with an interactive shell, [more info](https://docs.habana.ai/en/latest/Installation_Guide/Additional_Installation/Docker_Installation.html#docker-installation)
118
116
  ```
119
- After successfully installing these packages, try your first quantization program.
120
-
121
- ### [FP8 Quantization](./docs/source/3x/PT_FP8Quant.md)
122
- Following example code demonstrates FP8 Quantization, it is supported by Intel Gaudi2 AI Accelerator.
123
-
124
- To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, please refer to following script for environment setup. More details can be found in [Gaudi Guide](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#launch-docker-image-that-was-built).
125
- ```bash
126
- # Run a container with an interactive shell
127
- docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.19.0/ubuntu24.04/habanalabs/pytorch-installer-2.5.1:latest
117
+ docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.20.0/ubuntu24.04/habanalabs/pytorch-installer-2.6.0:latest
128
118
  ```
129
- Run the example:
119
+ Run the example,
130
120
  ```python
131
121
  from neural_compressor.torch.quantization import (
132
122
  FP8Config,
@@ -148,12 +138,10 @@ model = convert(model)
148
138
 
149
139
  output = model(torch.randn(1, 3, 224, 224).to("hpu")).to("cpu")
150
140
  print(output.shape)
151
- ```
152
-
153
- ### Weight-Only Large Language Model Loading (LLMs)
154
-
155
- Following example code demonstrates weight-only large language model loading on Intel Gaudi2 AI Accelerator.
141
+ ```
142
+ More [FP8 quantization doc](./docs/source/3x/PT_FP8Quant.md).
156
143
 
144
+ **Following example code demonstrates weight-only large language model loading** on Intel Gaudi2 AI Accelerator.
157
145
  ```python
158
146
  from neural_compressor.torch.quantization import load
159
147
 
@@ -165,10 +153,7 @@ model = load(
165
153
  torch_dtype=torch.bfloat16,
166
154
  )
167
155
  ```
168
-
169
- **Note:**
170
-
171
- Intel Neural Compressor will convert the model format from auto-gptq to hpu format on the first load and save hpu_model.safetensors to the local cache directory for the next load. So it may take a while to load for the first time.
156
+ **Note:** Intel Neural Compressor will convert the model format from auto-gptq to hpu format on the first load and save hpu_model.safetensors to the local cache directory for the next load. So it may take a while to load for the first time.
172
157
 
173
158
  ## Documentation
174
159
 
@@ -5,7 +5,7 @@ Intel® Neural Compressor
5
5
  <h3> An open-source Python library supporting popular model compression techniques on all mainstream deep learning frameworks (TensorFlow, PyTorch, and ONNX Runtime)</h3>
6
6
 
7
7
  [![python](https://img.shields.io/badge/python-3.8%2B-blue)](https://github.com/intel/neural-compressor)
8
- [![version](https://img.shields.io/badge/release-3.2-green)](https://github.com/intel/neural-compressor/releases)
8
+ [![version](https://img.shields.io/badge/release-3.3-green)](https://github.com/intel/neural-compressor/releases)
9
9
  [![license](https://img.shields.io/badge/license-Apache%202-blue)](https://github.com/intel/neural-compressor/blob/master/LICENSE)
10
10
  [![coverage](https://img.shields.io/badge/coverage-85%25-green)](https://github.com/intel/neural-compressor)
11
11
  [![Downloads](https://static.pepy.tech/personalized-badge/neural-compressor?period=total&units=international_system&left_color=grey&right_color=green&left_text=downloads)](https://pepy.tech/project/neural-compressor)
@@ -32,55 +32,33 @@ support AMD CPU, ARM CPU, and NVidia GPU through ONNX Runtime with limited testi
32
32
  * [2024/07] Performance optimizations and usability improvements on [client-side](./docs/source/3x/client_quant.md).
33
33
 
34
34
  ## Installation
35
+ Choose the necessary framework dependencies to install based on your deploy environment.
35
36
  ### Install Framework
36
- #### Install torch for CPU
37
- ```Shell
38
- pip install torch --index-url https://download.pytorch.org/whl/cpu
37
+ * [Install intel_extension_for_pytorch for CPU](https://intel.github.io/intel-extension-for-pytorch/cpu/latest/)
38
+ * [Install intel_extension_for_pytorch for XPU](https://intel.github.io/intel-extension-for-pytorch/xpu/latest/)
39
+ * [Use Docker Image with torch installed for HPU](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#bare-metal-fresh-os-single-click)
40
+ **Note**: There is a version mapping between Intel Neural Compressor and Gaudi Software Stack, please refer to this [table](./docs/source/3x/gaudi_version_map.md) and make sure to use a matched combination.
41
+ * [Install torch for other platform](https://pytorch.org/get-started/locally)
42
+ * [Install TensorFlow](https://www.tensorflow.org/install)
43
+
44
+ ### Install Neural Compressor from pypi
39
45
  ```
40
- #### Use Docker Image with torch installed for HPU
41
- https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#bare-metal-fresh-os-single-click
42
-
43
- > **Note**:
44
- > There is a version mapping between Intel Neural Compressor and Gaudi Software Stack, please refer to this [table](./docs/source/3x/gaudi_version_map.md) and make sure to use a matched combination.
45
-
46
- #### Install torch/intel_extension_for_pytorch for Intel GPU
47
- https://intel.github.io/intel-extension-for-pytorch/index.html#installation
48
-
49
- #### Install torch for other platform
50
- https://pytorch.org/get-started/locally
51
-
52
- #### Install tensorflow
53
- ```Shell
54
- pip install tensorflow
55
- ```
56
-
57
- ### Install from pypi
58
- ```Shell
59
46
  # Install 2.X API + Framework extension API + PyTorch dependency
60
47
  pip install neural-compressor[pt]
61
48
  # Install 2.X API + Framework extension API + TensorFlow dependency
62
49
  pip install neural-compressor[tf]
63
- ```
64
- > **Note**:
65
- > Further installation methods can be found under [Installation Guide](./docs/source/installation_guide.md). check out our [FAQ](./docs/source/faq.md) for more details.
50
+ ```
51
+ **Note**: Further installation methods can be found under [Installation Guide](./docs/source/installation_guide.md). check out our [FAQ](./docs/source/faq.md) for more details.
66
52
 
67
53
  ## Getting Started
54
+ After successfully installing these packages, try your first quantization program. **Following example code demonstrates FP8 Quantization**, it is supported by Intel Gaudi2 AI Accelerator.
55
+ To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, please refer to following script for environment setup. More details can be found in [Gaudi Guide](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#launch-docker-image-that-was-built).
68
56
 
69
- Setting up the environment:
70
- ```bash
71
- pip install "neural-compressor>=2.3" "transformers>=4.34.0" torch torchvision
57
+ Run a container with an interactive shell, [more info](https://docs.habana.ai/en/latest/Installation_Guide/Additional_Installation/Docker_Installation.html#docker-installation)
72
58
  ```
73
- After successfully installing these packages, try your first quantization program.
74
-
75
- ### [FP8 Quantization](./docs/source/3x/PT_FP8Quant.md)
76
- Following example code demonstrates FP8 Quantization, it is supported by Intel Gaudi2 AI Accelerator.
77
-
78
- To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, please refer to following script for environment setup. More details can be found in [Gaudi Guide](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#launch-docker-image-that-was-built).
79
- ```bash
80
- # Run a container with an interactive shell
81
- docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.19.0/ubuntu24.04/habanalabs/pytorch-installer-2.5.1:latest
59
+ docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.20.0/ubuntu24.04/habanalabs/pytorch-installer-2.6.0:latest
82
60
  ```
83
- Run the example:
61
+ Run the example,
84
62
  ```python
85
63
  from neural_compressor.torch.quantization import (
86
64
  FP8Config,
@@ -102,12 +80,10 @@ model = convert(model)
102
80
 
103
81
  output = model(torch.randn(1, 3, 224, 224).to("hpu")).to("cpu")
104
82
  print(output.shape)
105
- ```
106
-
107
- ### Weight-Only Large Language Model Loading (LLMs)
108
-
109
- Following example code demonstrates weight-only large language model loading on Intel Gaudi2 AI Accelerator.
83
+ ```
84
+ More [FP8 quantization doc](./docs/source/3x/PT_FP8Quant.md).
110
85
 
86
+ **Following example code demonstrates weight-only large language model loading** on Intel Gaudi2 AI Accelerator.
111
87
  ```python
112
88
  from neural_compressor.torch.quantization import load
113
89
 
@@ -119,10 +95,7 @@ model = load(
119
95
  torch_dtype=torch.bfloat16,
120
96
  )
121
97
  ```
122
-
123
- **Note:**
124
-
125
- Intel Neural Compressor will convert the model format from auto-gptq to hpu format on the first load and save hpu_model.safetensors to the local cache directory for the next load. So it may take a while to load for the first time.
98
+ **Note:** Intel Neural Compressor will convert the model format from auto-gptq to hpu format on the first load and save hpu_model.safetensors to the local cache directory for the next load. So it may take a while to load for the first time.
126
99
 
127
100
  ## Documentation
128
101
 
@@ -49,7 +49,7 @@ class Adaptor(object):
49
49
 
50
50
  @abstractmethod
51
51
  def quantize(self, tune_cfg, model, dataloader, q_func=None):
52
- """The function is used to do calibration and quanitization in post-training quantization.
52
+ """The function is used to do calibration and quantization in post-training quantization.
53
53
 
54
54
  Args:
55
55
  tune_cfg(dict): The chosen tuning configuration.
@@ -59,7 +59,7 @@ class MxNetAdaptor(Adaptor):
59
59
 
60
60
  @dump_elapsed_time("Pass quantize model")
61
61
  def quantize(self, tune_cfg, nc_model, dataloader, q_func=None):
62
- """The function is used to do MXNet calibration and quanitization in post-training
62
+ """The function is used to do MXNet calibration and quantization in post-training
63
63
  quantization.
64
64
 
65
65
  Args:
@@ -252,7 +252,7 @@ class ONNXRUNTIMEAdaptor(Adaptor):
252
252
 
253
253
  @dump_elapsed_time("Pass quantize model")
254
254
  def quantize(self, tune_cfg, model, data_loader, q_func=None):
255
- """The function is used to do calibration and quanitization in post-training
255
+ """The function is used to do calibration and quantization in post-training
256
256
  quantization.
257
257
 
258
258
  Args:
@@ -1853,7 +1853,7 @@ class ONNXRT_WeightOnlyAdaptor(ONNXRUNTIMEAdaptor):
1853
1853
 
1854
1854
  @dump_elapsed_time("Pass quantize model")
1855
1855
  def quantize(self, tune_cfg, model, data_loader, q_func=None):
1856
- """The function is used to do calibration and quanitization in post-training
1856
+ """The function is used to do calibration and quantization in post-training
1857
1857
  quantization.
1858
1858
 
1859
1859
  Args:
@@ -212,7 +212,7 @@ class GraphAnalyzer:
212
212
  return self._search_patterns(patterns)
213
213
 
214
214
  def _search_patterns(self, input_pattern):
215
- """Search user specified patterns on internal grpah structure.
215
+ """Search user specified patterns on internal graph structure.
216
216
 
217
217
  Args:
218
218
  input_pattern (list): The element of the pattern list could be string/list/tuple.
@@ -718,10 +718,12 @@ class GPTQuantizer(object):
718
718
  for n, p in sub_layer.named_parameters():
719
719
  param_name = full_layer_name + "." + n
720
720
  if n == "weight":
721
- set_module_tensor_to_device(self.model, param_name, self.device, Q)
721
+ set_module_tensor_to_device(self.model, param_name, self.device, Q, dtype=Q.dtype)
722
722
  else:
723
723
  value = load_value(self.model, param_name, model_path)
724
- set_module_tensor_to_device(self.model, param_name, self.device, value)
724
+ set_module_tensor_to_device(
725
+ self.model, param_name, self.device, value, dtype=value.dtype
726
+ )
725
727
  # sub_layer.weight.data = Q
726
728
  torch.save(sub_layer.state_dict(), LWQ_WORKSPACE + f"/{full_layer_name}.pt")
727
729
  clean_module_weight(sub_layer)
@@ -745,6 +747,8 @@ class GPTQuantizer(object):
745
747
  for j in range(len(self.dataloader)):
746
748
  cache_keyword_batch = self.gather_single_batch_from_dict(self.cache_key_arguments, j)
747
749
  cache_positional_batch = self.gather_single_batch_from_list(self.cache_positional_arguments, j)
750
+ # breakpoint()
751
+ # transformer_block = transformer_block.to(getattr(torch, self.model.config.torch_dtype))
748
752
  out = transformer_block(*cache_positional_batch, **cache_keyword_batch)
749
753
  out = self.track_hidden_states(out)
750
754
  outs.append(out)
@@ -23,14 +23,11 @@ import copy
23
23
  import logging
24
24
 
25
25
  import numpy as np
26
- import torch.nn
27
- import torch.nn as nn
28
26
  from torch.quantization.quantize_fx import fuse_fx
29
27
 
30
28
  logger = logging.getLogger(__name__)
31
29
  from typing import Any, Callable, Dict, List, Optional, Set, Union
32
30
 
33
- import torch
34
31
  import tqdm
35
32
 
36
33
 
@@ -221,7 +221,7 @@ def load_module(model, module_name, path, device="cpu"):
221
221
  for n, p in module.named_parameters():
222
222
  param_name = module_name + "." + n
223
223
  value = load_value(model, param_name, path)
224
- set_module_tensor_to_device(model, param_name, device, value)
224
+ set_module_tensor_to_device(model, param_name, device, value, dtype=value.dtype)
225
225
 
226
226
 
227
227
  def register_weight_hooks(model, path, device="cpu", clean_weight=True, saved_path=None):
@@ -239,7 +239,7 @@ def register_weight_hooks(model, path, device="cpu", clean_weight=True, saved_pa
239
239
  value = state_dict[n]
240
240
  else:
241
241
  value = load_value(model, param_name, path)
242
- set_module_tensor_to_device(model, param_name, device, value)
242
+ set_module_tensor_to_device(model, param_name, device, value, dtype=value.dtype)
243
243
 
244
244
  return hook
245
245
 
@@ -18,6 +18,7 @@
18
18
 
19
19
  from __future__ import annotations
20
20
 
21
+ import copy
21
22
  import inspect
22
23
  import json
23
24
  import os
@@ -539,6 +540,7 @@ class BaseConfig(ABC):
539
540
  tuning_param_pair = dict(zip(tuning_param_name_lst, params_values))
540
541
  tmp_params_dict = {**not_tuning_param_pair, **tuning_param_pair}
541
542
  new_config = self.__class__(**tmp_params_dict)
543
+ new_config.local_config = copy.deepcopy(self.local_config)
542
544
  logger.info(new_config.to_dict())
543
545
  config_list.append(new_config)
544
546
  logger.info("Expanded the %s and got %d configs.", self.__class__.name, len(config_list))
@@ -629,9 +631,13 @@ class BaseConfig(ABC):
629
631
  """
630
632
  if not isinstance(other, type(self)):
631
633
  return False
632
- return self.params_list == other.params_list and all(
634
+
635
+ params_equal = self.params_list == other.params_list and all(
633
636
  getattr(self, str(attr)) == getattr(other, str(attr)) for attr in self.params_list
634
637
  )
638
+ local_config_equal = self.local_config == other.local_config
639
+ global_config_equal = self.global_config == other.global_config
640
+ return params_equal and local_config_equal and global_config_equal
635
641
 
636
642
 
637
643
  class ComposableConfig(BaseConfig):
@@ -15,4 +15,4 @@
15
15
  # See the License for the specific language governing permissions and
16
16
  # limitations under the License.
17
17
  """Intel® Neural Compressor: An open-source Python library supporting popular model compression techniques."""
18
- __version__ = "3.2"
18
+ __version__ = "3.3"
@@ -236,7 +236,7 @@ class SparseDummyDataset(IterableDataset): # pragma: no cover
236
236
  self.label_shape = len(self.dense_shape) * self.label_shape
237
237
  assert len(self.label_shape) == len(
238
238
  self.dense_shape
239
- ), "length of dense_shape should be euqal to length of label_shape"
239
+ ), "length of dense_shape should be equal to length of label_shape"
240
240
  self.label_dim = len(self.label_shape)
241
241
 
242
242
  self.input_dim = 1 if isinstance(dense_shape, tuple) else len(dense_shape)
@@ -39,7 +39,6 @@ from typing import Union
39
39
  import lm_eval
40
40
  import numpy as np
41
41
  from lm_eval import evaluator, utils
42
- from lm_eval.loggers import WandbLogger
43
42
  from lm_eval.tasks import TaskManager
44
43
  from lm_eval.utils import make_table, simple_parse_args_string
45
44
 
@@ -67,6 +66,17 @@ def _handle_non_serializable(o):
67
66
 
68
67
  def cli_evaluate(args) -> None:
69
68
  if args.wandb_args:
69
+ try:
70
+ # For 0.4.3 and above
71
+ from lm_eval.loggers import WandbLogger
72
+ except ImportError:
73
+ try:
74
+ # For 0.4.2
75
+ from lm_eval.logging_utils import WandbLogger
76
+ except ImportError:
77
+ raise ImportError("Import of WandbLogger failed. Please install wandb to use this feature.")
78
+ except Exception as e:
79
+ raise RuntimeError(f"An unexpected error occurred: {e}")
70
80
  wandb_logger = WandbLogger(**simple_parse_args_string(args.wandb_args))
71
81
 
72
82
  eval_logger = utils.eval_logger
@@ -200,6 +210,7 @@ def cli_evaluate(args) -> None:
200
210
  )
201
211
  lm.pad_to_buckets = args.pad_to_buckets
202
212
  lm.buckets = args.buckets
213
+ lm.add_bos_token = args.add_bos_token
203
214
 
204
215
  results = evaluator.simple_evaluate(
205
216
  model=lm,
@@ -20,6 +20,8 @@ try:
20
20
  except:
21
21
  _hpex_available = False
22
22
 
23
+ from neural_compressor.common import logger
24
+
23
25
 
24
26
  class LMEvalParser:
25
27
  def __init__(
@@ -50,6 +52,7 @@ class LMEvalParser:
50
52
  trust_remote_code=False,
51
53
  pad_to_buckets=None, # used by HPU to align input length for performance.
52
54
  buckets=[32, 64, 128, 256, 512, 1024, 2048, 4096], # used by HPU to limit input length range.
55
+ add_bos_token=False,
53
56
  ):
54
57
  self.model = model
55
58
  self.tasks = tasks
@@ -83,3 +86,17 @@ class LMEvalParser:
83
86
  else:
84
87
  self.pad_to_buckets = pad_to_buckets
85
88
  self.buckets = buckets
89
+ self.add_bos_token = add_bos_token
90
+ self._post_init()
91
+
92
+ def _check_add_bos_token(self):
93
+ if not self.add_bos_token:
94
+ logger.warning(
95
+ (
96
+ "`add_bos_token` is set to False. "
97
+ "If the model was trained or fine-tuned with a BOS token, this may lead to incorrect results."
98
+ )
99
+ )
100
+
101
+ def _post_init(self):
102
+ self._check_add_bos_token()
@@ -212,7 +212,7 @@ class GraphAnalyzer:
212
212
  return self._search_patterns(patterns)
213
213
 
214
214
  def _search_patterns(self, input_pattern):
215
- """Search user specified patterns on internal grpah structure.
215
+ """Search user specified patterns on internal graph structure.
216
216
 
217
217
  Args:
218
218
  input_pattern (list): The element of the pattern list could be string/list/tuple.
@@ -13,39 +13,24 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import functools
16
- import importlib.util
17
16
  import json
18
17
  import os
19
18
 
20
19
  import numpy as np
21
20
  import torch
21
+ from enum import Enum, auto
22
22
 
23
- from .._quant_common.helper_modules import *
24
23
  from .._quant_common.quant_config import get_hqt_config
25
24
  from ..utils.logger import logger
26
- from neural_compressor.torch.algorithms.fp8_quant.model_configs import (
27
- ModuleInfo,
28
- ModuleConfig,
29
- ModuleType,
30
- ModuleExtraConfig,
31
- get_patched_module_table,
32
- get_patched_module_type_table,
33
- )
34
- from neural_compressor.torch.utils.auto_accelerator import auto_detect_accelerator
35
- deepspeed_exists = False
36
- if importlib.util.find_spec("deepspeed"): # check if deepspeed is installed
37
- deepspeed_exists = True
25
+ from neural_compressor.torch.algorithms.fp8_quant.model_configs import ModuleConfig
38
26
 
39
27
  UNMEASURED_MODELS = "UnmeasuredModels"
40
28
 
41
29
 
42
- _mod_types = {
43
- "linear": ModuleType(1, ["weight"], 1, False),
44
- "matmul": ModuleType(2, [], 1, False),
45
- "kv_cache": ModuleType(1, [], 1, False),
46
- "softmax": ModuleType(1, [], 1, True),
47
- "fused_sdpa": ModuleType(3, [], 2, True),
48
- }
30
+ class QuantTensorType(Enum):
31
+ MEASUREMENTS = auto()
32
+ CONST = auto()
33
+ DYNAMIC = auto()
49
34
 
50
35
 
51
36
  class ShapeList:
@@ -196,73 +181,6 @@ format_functions = {
196
181
 
197
182
  format_functions_rec = lambda k: functools.partial(rec_fn, fn=format_functions[k])
198
183
 
199
- _mod_default_dict = {
200
- "Matmul": ModuleInfo("matmul", PatchedMatmul),
201
- "Linear": ModuleInfo("linear", PatchedLinear),
202
- "RowParallelLinear": ModuleInfo("linear", PatchedRowParallelLinear),
203
- "ColumnParallelLinear": ModuleInfo("linear", PatchedColumnParallelLinear),
204
- "MergedColumnParallelLinear": ModuleInfo("linear", PatchedColumnParallelLinear),
205
- "QKVParallelLinear": ModuleInfo("linear", PatchedColumnParallelLinear),
206
- "FalconLinear": ModuleInfo("linear", PatchedLinear),
207
- "KVCache": ModuleInfo("kv_cache", PatchedKVCache),
208
- "VLLMKVCache": ModuleInfo("kv_cache", PatchedVLLMKVCache),
209
- "Conv2d": ModuleInfo("linear", PatchedConv2d),
210
- "LoRACompatibleLinear": ModuleInfo("linear", PatchedLoRACompatibleLinear),
211
- "LoRACompatibleConv": ModuleInfo("linear", PatchedLoRACompatibleConv),
212
- "Softmax": ModuleInfo("softmax", PatchedSoftmax),
213
- "ModuleFusedSDPA": ModuleInfo("fused_sdpa", PatchedModuleFusedSDPA),
214
- "MoeMatmul": ModuleInfo("linear", PatchedMoeMatmul),
215
- "ReplicatedLinear": ModuleInfo("linear", PatchedReplicatedLinear),
216
- "FusedMoE": ModuleInfo("linear", PatchedMixtralMoE, False),
217
- }
218
-
219
-
220
- if deepspeed_exists:
221
- _mod_default_dict.update(
222
- {
223
- "LinearLayer": ModuleInfo("linear", PatchedLinear),
224
- "LinearAllreduce": ModuleInfo("linear", PatchedLinearAllReduce),
225
- "ScopedLinearAllReduce": ModuleInfo("linear", PatchedLinearAllReduce),
226
- "LmHeadLinearAllreduce": ModuleInfo("linear", PatchedLmHeadLinearAllreduce),
227
- }
228
- )
229
-
230
- @functools.lru_cache(maxsize=None)
231
- def _import_hpu_modules():
232
- from neural_compressor.torch.algorithms.fp8_quant.patched_module_base import (
233
- PATCHED_MODULE_TABLE, PATCHED_MODULE_TYPES_TABLE
234
- )
235
- cur_accelerator = auto_detect_accelerator()
236
- if not cur_accelerator.current_device_name().startswith("hpu"):
237
- return
238
- PATCHED_MODULE_TABLE["hpu"].update(_mod_default_dict)
239
- PATCHED_MODULE_TYPES_TABLE["hpu"].update(_mod_types)
240
-
241
-
242
- _import_hpu_modules()
243
-
244
- mod_default_dict = get_patched_module_table()
245
- mod_types = get_patched_module_type_table()
246
-
247
- def get_white_list():
248
- return list(mod_default_dict.keys())
249
-
250
- class ModInstInfo:
251
- def __init__(self, name, parent):
252
- self.name = name
253
- self.parent = parent
254
-
255
-
256
- parent_child_mod_dict = {}
257
-
258
-
259
- def generate_model_info(model):
260
- def create_mod_info_recursion(parent):
261
- for name, mod in parent.named_children():
262
- parent_child_mod_dict[mod] = ModInstInfo(name, parent)
263
- create_mod_info_recursion(mod)
264
-
265
- create_mod_info_recursion(model)
266
184
 
267
185
  def get_device_type_for_scales(mod):
268
186
  config = get_hqt_config(mod).cfg