neural-compressor 3.3__tar.gz → 3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (607) hide show
  1. {neural_compressor-3.3 → neural_compressor-3.4}/PKG-INFO +12 -24
  2. {neural_compressor-3.3 → neural_compressor-3.4}/README.md +9 -8
  3. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/__init__.py +15 -13
  4. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/adaptor.py +1 -1
  5. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/keras.py +2 -0
  6. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/onnxrt.py +2 -0
  7. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/pytorch.py +6 -2
  8. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tensorflow.py +2 -0
  9. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/torch_utils/layer_wise_quant/modified_pickle.py +1 -1
  10. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/benchmark.py +0 -2
  11. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/common/base_config.py +16 -1
  12. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/common/utils/constants.py +3 -1
  13. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/common/utils/logger.py +2 -0
  14. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/common/utils/save_load.py +13 -1
  15. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/common/utils/utility.py +4 -4
  16. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/common/version.py +1 -1
  17. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/config.py +6 -22
  18. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/dataloaders/dataloader.py +1 -4
  19. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/dataloaders/tensorflow_dataloader.py +1 -1
  20. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/datasets/coco_dataset.py +2 -3
  21. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/datasets/dataset.py +13 -131
  22. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/datasets/dummy_dataset.py +1 -3
  23. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/datasets/dummy_dataset_v2.py +2 -3
  24. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/datasets/imagenet_dataset.py +0 -14
  25. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/filters/coco_filter.py +1 -1
  26. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/filters/filter.py +4 -19
  27. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/transforms/transform.py +30 -294
  28. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/metric/metric.py +6 -66
  29. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/mix_precision.py +0 -2
  30. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/model/base_model.py +2 -3
  31. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/model/model.py +3 -17
  32. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/quantization.py +0 -2
  33. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/strategy/strategy.py +0 -2
  34. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/utils/utility.py +4 -4
  35. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/_core/common.py +13 -1
  36. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/_core/fp_utils.py +24 -13
  37. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/_core/measure.py +9 -1
  38. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/_core/patching_common.py +33 -8
  39. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/_core/quant_dequant.py +11 -18
  40. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/_core/quantize.py +7 -3
  41. neural_compressor-3.4/neural_compressor/torch/algorithms/fp8_quant/_core/quantized_func_wrappers/__init__.py +22 -0
  42. neural_compressor-3.4/neural_compressor/torch/algorithms/fp8_quant/_core/quantized_func_wrappers/hpu/__init__.py +13 -0
  43. neural_compressor-3.3/neural_compressor/torch/algorithms/fp8_quant/_core/quantized_hpu_ops.py → neural_compressor-3.4/neural_compressor/torch/algorithms/fp8_quant/_core/quantized_func_wrappers/hpu/hpu_quantized_func_wrapper.py +38 -62
  44. neural_compressor-3.4/neural_compressor/torch/algorithms/fp8_quant/_core/quantized_func_wrappers/quantized_func_wrapper.py +86 -0
  45. neural_compressor-3.4/neural_compressor/torch/algorithms/fp8_quant/_core/quantized_func_wrappers/quantized_func_wrapper_api.py +45 -0
  46. neural_compressor-3.4/neural_compressor/torch/algorithms/fp8_quant/_core/quantized_func_wrappers/xpu/__init__.py +13 -0
  47. neural_compressor-3.4/neural_compressor/torch/algorithms/fp8_quant/_core/quantized_func_wrappers/xpu/xpu_quantized_func_wrapper.py +75 -0
  48. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/_core/scale.py +4 -3
  49. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/_core/scale_handler.py +3 -0
  50. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/ops_quantizer.py +136 -61
  51. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/round_scales_function.py +2 -2
  52. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/scale_method_factory.py +10 -5
  53. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/scales_method.py +8 -7
  54. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/_core/utils.py +29 -4
  55. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/_quant_common/helper_modules.py +288 -86
  56. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/_quant_common/quant_config.py +37 -23
  57. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/common.py +2 -12
  58. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/model_configs.py +24 -0
  59. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/patched_module_base.py +13 -24
  60. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/quantizer.py +15 -10
  61. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/save_load.py +210 -45
  62. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/layer_wise/modified_pickle.py +1 -1
  63. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/layer_wise/utils.py +138 -6
  64. neural_compressor-3.4/neural_compressor/torch/algorithms/mixed_low_precision/__init__.py +15 -0
  65. neural_compressor-3.4/neural_compressor/torch/algorithms/mixed_low_precision/modules.py +150 -0
  66. neural_compressor-3.4/neural_compressor/torch/algorithms/mixed_low_precision/quantizer.py +64 -0
  67. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/smooth_quant/smooth_quant.py +3 -1
  68. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/static_quant/static_quant.py +3 -1
  69. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/static_quant/utility.py +2 -2
  70. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/weight_only/autoround.py +11 -2
  71. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/weight_only/gptq.py +108 -31
  72. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/weight_only/modules.py +40 -1
  73. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/weight_only/save_load.py +115 -44
  74. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/export/pt2e_export.py +9 -4
  75. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/quantization/__init__.py +1 -0
  76. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/quantization/algorithm_entry.py +30 -3
  77. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/quantization/config.py +48 -1
  78. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/quantization/quantize.py +70 -54
  79. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/quantization/save_load_entry.py +2 -1
  80. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/utils/auto_accelerator.py +36 -0
  81. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/utils/environ.py +10 -0
  82. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/utils/utility.py +2 -0
  83. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/transformers/models/modeling_auto.py +239 -196
  84. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/transformers/quantization/utils.py +10 -9
  85. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/utils/load_huggingface.py +3 -76
  86. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/utils/options.py +0 -1
  87. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/utils/utility.py +4 -11
  88. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/version.py +1 -1
  89. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor.egg-info/PKG-INFO +12 -24
  90. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor.egg-info/SOURCES.txt +9 -8
  91. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor.egg-info/requires.txt +1 -6
  92. {neural_compressor-3.3 → neural_compressor-3.4}/setup.py +12 -9
  93. neural_compressor-3.3/neural_compressor/adaptor/mxnet.py +0 -505
  94. neural_compressor-3.3/neural_compressor/adaptor/mxnet.yaml +0 -355
  95. neural_compressor-3.3/neural_compressor/adaptor/mxnet_utils/__init__.py +0 -18
  96. neural_compressor-3.3/neural_compressor/adaptor/mxnet_utils/util.py +0 -925
  97. neural_compressor-3.3/neural_compressor/data/dataloaders/mxnet_dataloader.py +0 -57
  98. neural_compressor-3.3/neural_compressor/model/mxnet_model.py +0 -76
  99. neural_compressor-3.3/neural_compressor/torch/algorithms/fp8_quant/utils/__init__.py +0 -18
  100. neural_compressor-3.3/neural_compressor/torch/algorithms/fp8_quant/utils/patched_module_restore_registry.py +0 -142
  101. {neural_compressor-3.3 → neural_compressor-3.4}/LICENSE +0 -0
  102. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/__init__.py +0 -0
  103. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/keras.yaml +0 -0
  104. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/keras_utils/__init__.py +0 -0
  105. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/keras_utils/conv2d.py +0 -0
  106. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/keras_utils/dense.py +0 -0
  107. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/keras_utils/depthwise_conv2d.py +0 -0
  108. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/keras_utils/pool2d.py +0 -0
  109. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/keras_utils/quantizer.py +0 -0
  110. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/keras_utils/separable_conv2d.py +0 -0
  111. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/onnxrt.yaml +0 -0
  112. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/onnxrt_cuda.yaml +0 -0
  113. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/onnxrt_dml.yaml +0 -0
  114. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/onnxrt_dnnl.yaml +0 -0
  115. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/onnxrt_trt.yaml +0 -0
  116. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/__init__.py +0 -0
  117. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/calibration.py +0 -0
  118. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/calibrator.py +0 -0
  119. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/operators/__init__.py +0 -0
  120. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/operators/activation.py +0 -0
  121. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/operators/argmax.py +0 -0
  122. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/operators/attention.py +0 -0
  123. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/operators/binary_op.py +0 -0
  124. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/operators/concat.py +0 -0
  125. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/operators/conv.py +0 -0
  126. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/operators/direct_q8.py +0 -0
  127. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/operators/embed_layernorm.py +0 -0
  128. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/operators/gather.py +0 -0
  129. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/operators/gavgpool.py +0 -0
  130. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/operators/gemm.py +0 -0
  131. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/operators/lstm.py +0 -0
  132. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/operators/matmul.py +0 -0
  133. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/operators/maxpool.py +0 -0
  134. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/operators/norm.py +0 -0
  135. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/operators/ops.py +0 -0
  136. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/operators/pad.py +0 -0
  137. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/operators/pooling.py +0 -0
  138. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/operators/reduce.py +0 -0
  139. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/operators/resize.py +0 -0
  140. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/operators/split.py +0 -0
  141. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/operators/unary_op.py +0 -0
  142. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/quantizer.py +0 -0
  143. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/smooth_quant.py +0 -0
  144. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/util.py +0 -0
  145. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/ox_utils/weight_only.py +0 -0
  146. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/pytorch_cpu.yaml +0 -0
  147. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/pytorch_gpu.yaml +0 -0
  148. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/pytorch_ipex.yaml +0 -0
  149. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/query.py +0 -0
  150. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tensorflow.yaml +0 -0
  151. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tensorflow_itex.yaml +0 -0
  152. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/__init__.py +0 -0
  153. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_converter.py +0 -0
  154. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_converter_without_calib.py +0 -0
  155. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/__init__.py +0 -0
  156. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/__init__.py +0 -0
  157. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/bf16_convert.py +0 -0
  158. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/dequantize_cast_optimizer.py +0 -0
  159. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/__init__.py +0 -0
  160. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_add_to_biasadd.py +0 -0
  161. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_layout.py +0 -0
  162. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_leakyrelu.py +0 -0
  163. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_nan_to_random.py +0 -0
  164. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_placeholder_to_const.py +0 -0
  165. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dilated_contraction.py +0 -0
  166. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dummy_biasadd.py +0 -0
  167. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/expanddims_optimizer.py +0 -0
  168. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fetch_weight_from_reshape.py +0 -0
  169. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fold_batch_norm.py +0 -0
  170. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fold_constant.py +0 -0
  171. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_biasadd_add.py +0 -0
  172. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_column_wise_mul.py +0 -0
  173. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_conv_with_math.py +0 -0
  174. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_decomposed_bn.py +0 -0
  175. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_decomposed_in.py +0 -0
  176. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_gelu.py +0 -0
  177. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_layer_norm.py +0 -0
  178. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_conv.py +0 -0
  179. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_fp32_conv.py +0 -0
  180. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_reshape_transpose.py +0 -0
  181. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/graph_cse_optimizer.py +0 -0
  182. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/grappler_pass.py +0 -0
  183. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/insert_print_node.py +0 -0
  184. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/move_squeeze_after_relu.py +0 -0
  185. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py +0 -0
  186. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/remove_training_nodes.py +0 -0
  187. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/rename_batch_norm.py +0 -0
  188. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/split_shared_input.py +0 -0
  189. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/strip_equivalent_nodes.py +0 -0
  190. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/strip_unused_nodes.py +0 -0
  191. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/switch_optimizer.py +0 -0
  192. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/graph_base.py +0 -0
  193. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/__init__.py +0 -0
  194. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_fake_quant.py +0 -0
  195. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_value.py +0 -0
  196. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_value_without_calib.py +0 -0
  197. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_redundant_dequantize.py +0 -0
  198. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_requantize.py +0 -0
  199. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_redundant_dequantize.py +0 -0
  200. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_requantize.py +0 -0
  201. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/meta_op_optimizer.py +0 -0
  202. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/post_hostconst_converter.py +0 -0
  203. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/post_quantized_op_cse.py +0 -0
  204. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/rnn_convert.py +0 -0
  205. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/scale_propagation.py +0 -0
  206. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/__init__.py +0 -0
  207. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py +0 -0
  208. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_node.py +0 -0
  209. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_schema.py +0 -0
  210. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py +0 -0
  211. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/__init__.py +0 -0
  212. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/insert_qdq_pattern.py +0 -0
  213. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/merge_duplicated_qdq.py +0 -0
  214. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/share_qdq_y_pattern.py +0 -0
  215. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/graph_util.py +0 -0
  216. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/__init__.py +0 -0
  217. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/__init__.py +0 -0
  218. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/fake_quantize.py +0 -0
  219. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_config.py +0 -0
  220. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_helper.py +0 -0
  221. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/__init__.py +0 -0
  222. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/optimize_layer.py +0 -0
  223. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_add.py +0 -0
  224. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_base.py +0 -0
  225. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_bn.py +0 -0
  226. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_wrapper.py +0 -0
  227. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/__init__.py +0 -0
  228. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_bn.py +0 -0
  229. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_concatv2.py +0 -0
  230. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_conv.py +0 -0
  231. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_deconv.py +0 -0
  232. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_in.py +0 -0
  233. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py +0 -0
  234. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_pooling.py +0 -0
  235. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/optimize_qdq.py +0 -0
  236. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_base.py +0 -0
  237. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_bn.py +0 -0
  238. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_concatv2.py +0 -0
  239. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_conv.py +0 -0
  240. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_for_intel_cpu.py +0 -0
  241. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_matmul.py +0 -0
  242. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_pooling.py +0 -0
  243. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/quantize_graph_common.py +0 -0
  244. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/smooth_quant_calibration.py +0 -0
  245. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/smooth_quant_scaler.py +0 -0
  246. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py +0 -0
  247. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/transform_graph/__init__.py +0 -0
  248. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/transform_graph/bias_correction.py +0 -0
  249. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/transform_graph/graph_transform_base.py +0 -0
  250. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/transform_graph/insert_logging.py +0 -0
  251. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/transform_graph/rerange_quantized_concat.py +0 -0
  252. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/tf_utils/util.py +0 -0
  253. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/torch_utils/__init__.py +0 -0
  254. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/torch_utils/auto_round.py +0 -0
  255. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/torch_utils/awq.py +0 -0
  256. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/torch_utils/bf16_convert.py +0 -0
  257. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/torch_utils/gptq.py +0 -0
  258. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/torch_utils/hawq_metric.py +0 -0
  259. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/torch_utils/layer_wise_quant/__init__.py +0 -0
  260. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/torch_utils/layer_wise_quant/quantize.py +0 -0
  261. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/torch_utils/layer_wise_quant/torch_load.py +0 -0
  262. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/torch_utils/layer_wise_quant/utils.py +0 -0
  263. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/torch_utils/mixed_precision.py +0 -0
  264. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/torch_utils/model_wrapper.py +0 -0
  265. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/torch_utils/pattern_detector.py +0 -0
  266. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/torch_utils/symbolic_trace.py +0 -0
  267. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/torch_utils/teq.py +0 -0
  268. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/torch_utils/util.py +0 -0
  269. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/torch_utils/waq/__init__.py +0 -0
  270. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/torch_utils/waq/auto_alpha.py +0 -0
  271. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/torch_utils/waq/calibration.py +0 -0
  272. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/torch_utils/waq/graph_trace.py +0 -0
  273. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/torch_utils/waq/smooth_quant.py +0 -0
  274. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/torch_utils/waq/utils.py +0 -0
  275. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/adaptor/torch_utils/weight_only.py +0 -0
  276. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/algorithm/__init__.py +0 -0
  277. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/algorithm/algorithm.py +0 -0
  278. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/algorithm/fast_bias_correction.py +0 -0
  279. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/algorithm/smooth_quant.py +0 -0
  280. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/algorithm/weight_correction.py +0 -0
  281. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/common/__init__.py +0 -0
  282. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/common/base_tuning.py +0 -0
  283. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/common/benchmark.py +0 -0
  284. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/common/tuning_param.py +0 -0
  285. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/common/utils/__init__.py +0 -0
  286. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/__init__.py +0 -0
  287. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/callbacks.py +0 -0
  288. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/distillation/__init__.py +0 -0
  289. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/distillation/criterions.py +0 -0
  290. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/distillation/optimizers.py +0 -0
  291. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/distillation/utility.py +0 -0
  292. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/hpo/__init__.py +0 -0
  293. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/hpo/sa_optimizer.py +0 -0
  294. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/hpo/search_algorithms.py +0 -0
  295. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/hpo/search_space.py +0 -0
  296. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/__init__.py +0 -0
  297. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/criteria.py +0 -0
  298. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/dsnot.py +0 -0
  299. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/model_slim/__init__.py +0 -0
  300. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/model_slim/auto_slim.py +0 -0
  301. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/model_slim/pattern_analyzer.py +0 -0
  302. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/model_slim/weight_slim.py +0 -0
  303. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/patterns/__init__.py +0 -0
  304. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/patterns/base.py +0 -0
  305. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/patterns/mha.py +0 -0
  306. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/patterns/ninm.py +0 -0
  307. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/patterns/nxm.py +0 -0
  308. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/pruners/__init__.py +0 -0
  309. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/pruners/base.py +0 -0
  310. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/pruners/basic.py +0 -0
  311. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/pruners/block_mask.py +0 -0
  312. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/pruners/mha.py +0 -0
  313. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/pruners/pattern_lock.py +0 -0
  314. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/pruners/progressive.py +0 -0
  315. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/pruners/retrain_free.py +0 -0
  316. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/pruners/sparse_gpt.py +0 -0
  317. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/pruning.py +0 -0
  318. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/regs.py +0 -0
  319. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/schedulers.py +0 -0
  320. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/tf_criteria.py +0 -0
  321. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/utils.py +0 -0
  322. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/wanda/__init__.py +0 -0
  323. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/wanda/prune.py +0 -0
  324. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/wanda/utils.py +0 -0
  325. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/compression/pruner/wanda/wrapper.py +0 -0
  326. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/contrib/__init__.py +0 -0
  327. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/contrib/strategy/__init__.py +0 -0
  328. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/contrib/strategy/sigopt.py +0 -0
  329. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/contrib/strategy/tpe.py +0 -0
  330. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/__init__.py +0 -0
  331. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/dataloaders/__init__.py +0 -0
  332. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/dataloaders/base_dataloader.py +0 -0
  333. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/dataloaders/default_dataloader.py +0 -0
  334. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/dataloaders/fetcher.py +0 -0
  335. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/dataloaders/onnxrt_dataloader.py +0 -0
  336. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/dataloaders/pytorch_dataloader.py +0 -0
  337. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/dataloaders/sampler.py +0 -0
  338. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/datasets/__init__.py +0 -0
  339. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/datasets/bert_dataset.py +0 -0
  340. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/datasets/style_transfer_dataset.py +0 -0
  341. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/filters/__init__.py +0 -0
  342. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/transforms/__init__.py +0 -0
  343. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/transforms/coco_transform.py +0 -0
  344. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/transforms/imagenet_transform.py +0 -0
  345. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/transforms/postprocess.py +0 -0
  346. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/data/transforms/tokenization.py +0 -0
  347. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/evaluation/__init__.py +0 -0
  348. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/evaluation/bigcode_eval/__init__.py +0 -0
  349. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/evaluation/bigcode_eval/evaluator.py +0 -0
  350. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/evaluation/hf_eval/__init__.py +0 -0
  351. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/evaluation/hf_eval/evaluator.py +0 -0
  352. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/evaluation/hf_eval/hf_datasets/__init__.py +0 -0
  353. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/evaluation/hf_eval/hf_datasets/cnn_dailymail.py +0 -0
  354. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/evaluation/lm_eval/__init__.py +0 -0
  355. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/evaluation/lm_eval/accuracy.py +0 -0
  356. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/evaluation/lm_eval/models/__init__.py +0 -0
  357. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/evaluation/lm_eval/models/huggingface.py +0 -0
  358. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/evaluation/lm_eval/utils.py +0 -0
  359. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/metric/__init__.py +0 -0
  360. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/metric/bleu.py +0 -0
  361. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/metric/bleu_util.py +0 -0
  362. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/metric/coco_label_map.py +0 -0
  363. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/metric/coco_tools.py +0 -0
  364. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/metric/evaluate_squad.py +0 -0
  365. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/metric/f1.py +0 -0
  366. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/model/__init__.py +0 -0
  367. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/model/keras_model.py +0 -0
  368. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/model/nets_factory.py +0 -0
  369. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/model/onnx_model.py +0 -0
  370. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/model/tensorflow_model.py +0 -0
  371. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/model/torch_model.py +0 -0
  372. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/objective.py +0 -0
  373. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/profiling/__init__.py +0 -0
  374. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/profiling/parser/__init__.py +0 -0
  375. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/profiling/parser/factory.py +0 -0
  376. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/profiling/parser/onnx_parser/__init__.py +0 -0
  377. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/profiling/parser/onnx_parser/factory.py +0 -0
  378. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/profiling/parser/onnx_parser/parser.py +0 -0
  379. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/profiling/parser/parser.py +0 -0
  380. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/profiling/parser/result.py +0 -0
  381. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/profiling/parser/tensorflow_parser/__init__.py +0 -0
  382. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/profiling/parser/tensorflow_parser/factory.py +0 -0
  383. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/profiling/parser/tensorflow_parser/parser.py +0 -0
  384. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/profiling/profiler/__init__.py +0 -0
  385. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/profiling/profiler/factory.py +0 -0
  386. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/profiling/profiler/onnxrt_profiler/__init__.py +0 -0
  387. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/profiling/profiler/onnxrt_profiler/factory.py +0 -0
  388. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/profiling/profiler/onnxrt_profiler/profiler.py +0 -0
  389. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/profiling/profiler/onnxrt_profiler/utils.py +0 -0
  390. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/profiling/profiler/profiler.py +0 -0
  391. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/profiling/profiler/tensorflow_profiler/__init__.py +0 -0
  392. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/profiling/profiler/tensorflow_profiler/factory.py +0 -0
  393. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/profiling/profiler/tensorflow_profiler/profiler.py +0 -0
  394. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/profiling/profiler/tensorflow_profiler/utils.py +0 -0
  395. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/strategy/__init__.py +0 -0
  396. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/strategy/auto.py +0 -0
  397. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/strategy/auto_mixed_precision.py +0 -0
  398. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/strategy/basic.py +0 -0
  399. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/strategy/bayesian.py +0 -0
  400. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/strategy/conservative.py +0 -0
  401. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/strategy/exhaustive.py +0 -0
  402. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/strategy/hawq_v2.py +0 -0
  403. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/strategy/mse.py +0 -0
  404. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/strategy/mse_v2.py +0 -0
  405. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/strategy/random.py +0 -0
  406. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/strategy/utils/__init__.py +0 -0
  407. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/strategy/utils/constant.py +0 -0
  408. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/strategy/utils/tuning_sampler.py +0 -0
  409. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/strategy/utils/tuning_space.py +0 -0
  410. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/strategy/utils/tuning_structs.py +0 -0
  411. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/strategy/utils/utility.py +0 -0
  412. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/__init__.py +0 -0
  413. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/algorithms/__init__.py +0 -0
  414. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/algorithms/smoother/__init__.py +0 -0
  415. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/algorithms/smoother/calibration.py +0 -0
  416. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/algorithms/smoother/core.py +0 -0
  417. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/algorithms/smoother/scaler.py +0 -0
  418. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/algorithms/static_quant/__init__.py +0 -0
  419. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/algorithms/static_quant/keras.py +0 -0
  420. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/algorithms/static_quant/keras.yaml +0 -0
  421. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py +0 -0
  422. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.yaml +0 -0
  423. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/algorithms/static_quant/tensorflow_itex.yaml +0 -0
  424. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/keras/__init__.py +0 -0
  425. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/keras/layers/__init__.py +0 -0
  426. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/keras/layers/conv2d.py +0 -0
  427. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/keras/layers/dense.py +0 -0
  428. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/keras/layers/depthwise_conv2d.py +0 -0
  429. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/keras/layers/layer_initializer.py +0 -0
  430. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/keras/layers/pool2d.py +0 -0
  431. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/keras/layers/separable_conv2d.py +0 -0
  432. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/keras/quantization/__init__.py +0 -0
  433. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/keras/quantization/config.py +0 -0
  434. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/__init__.py +0 -0
  435. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/algorithm_entry.py +0 -0
  436. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/autotune.py +0 -0
  437. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/config.py +0 -0
  438. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/quantize.py +0 -0
  439. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/__init__.py +0 -0
  440. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_converter.py +0 -0
  441. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/__init__.py +0 -0
  442. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/bf16/__init__.py +0 -0
  443. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/bf16/bf16_convert.py +0 -0
  444. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/bf16/dequantize_cast_optimizer.py +0 -0
  445. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/__init__.py +0 -0
  446. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/convert_add_to_biasadd.py +0 -0
  447. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/convert_layout.py +0 -0
  448. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/convert_leakyrelu.py +0 -0
  449. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/convert_nan_to_random.py +0 -0
  450. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/convert_placeholder_to_const.py +0 -0
  451. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/dilated_contraction.py +0 -0
  452. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/dummy_biasadd.py +0 -0
  453. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/expanddims_optimizer.py +0 -0
  454. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fetch_weight_from_reshape.py +0 -0
  455. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fold_batch_norm.py +0 -0
  456. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fold_constant.py +0 -0
  457. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_biasadd_add.py +0 -0
  458. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_column_wise_mul.py +0 -0
  459. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_conv_with_math.py +0 -0
  460. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_decomposed_bn.py +0 -0
  461. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_decomposed_in.py +0 -0
  462. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_gelu.py +0 -0
  463. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_layer_norm.py +0 -0
  464. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_pad_with_conv.py +0 -0
  465. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_pad_with_fp32_conv.py +0 -0
  466. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/fuse_reshape_transpose.py +0 -0
  467. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/graph_cse_optimizer.py +0 -0
  468. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/grappler_pass.py +0 -0
  469. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/insert_print_node.py +0 -0
  470. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/move_squeeze_after_relu.py +0 -0
  471. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/pre_optimize.py +0 -0
  472. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/remove_training_nodes.py +0 -0
  473. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/rename_batch_norm.py +0 -0
  474. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/split_shared_input.py +0 -0
  475. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/strip_equivalent_nodes.py +0 -0
  476. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/strip_unused_nodes.py +0 -0
  477. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/generic/switch_optimizer.py +0 -0
  478. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/graph_base.py +0 -0
  479. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/__init__.py +0 -0
  480. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/freeze_fake_quant.py +0 -0
  481. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/freeze_value.py +0 -0
  482. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/fuse_conv_redundant_dequantize.py +0 -0
  483. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/fuse_conv_requantize.py +0 -0
  484. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/fuse_matmul_redundant_dequantize.py +0 -0
  485. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/fuse_matmul_requantize.py +0 -0
  486. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/meta_op_optimizer.py +0 -0
  487. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/post_hostconst_converter.py +0 -0
  488. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/post_quantized_op_cse.py +0 -0
  489. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/int8/scale_propagation.py +0 -0
  490. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/qdq/__init__.py +0 -0
  491. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/qdq/insert_qdq_pattern.py +0 -0
  492. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/qdq/merge_duplicated_qdq.py +0 -0
  493. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_rewriter/qdq/share_qdq_y_pattern.py +0 -0
  494. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/graph_util.py +0 -0
  495. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/quantize_graph/__init__.py +0 -0
  496. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/__init__.py +0 -0
  497. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_bn.py +0 -0
  498. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_concatv2.py +0 -0
  499. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_conv.py +0 -0
  500. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_deconv.py +0 -0
  501. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_in.py +0 -0
  502. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_matmul.py +0 -0
  503. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/fuse_qdq_pooling.py +0 -0
  504. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/quantize_graph/qdq/optimize_qdq.py +0 -0
  505. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_base.py +0 -0
  506. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_bn.py +0 -0
  507. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_concatv2.py +0 -0
  508. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_conv.py +0 -0
  509. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_for_intel_cpu.py +0 -0
  510. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_matmul.py +0 -0
  511. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/quantize_graph/quantize_graph_pooling.py +0 -0
  512. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/quantize_graph_common.py +0 -0
  513. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/transform_graph/__init__.py +0 -0
  514. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/transform_graph/bias_correction.py +0 -0
  515. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/transform_graph/graph_transform_base.py +0 -0
  516. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/transform_graph/insert_logging.py +0 -0
  517. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/transform_graph/rerange_quantized_concat.py +0 -0
  518. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/quantization/utils/utility.py +0 -0
  519. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/utils/__init__.py +0 -0
  520. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/utils/constants.py +0 -0
  521. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/utils/data.py +0 -0
  522. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/utils/model.py +0 -0
  523. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/tensorflow/utils/model_wrappers.py +0 -0
  524. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/__init__.py +0 -0
  525. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/__init__.py +0 -0
  526. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/base_algorithm.py +0 -0
  527. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/__init__.py +0 -0
  528. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/_core/__init__.py +0 -0
  529. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/_core/scale_methods/__init__.py +0 -0
  530. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/_quant_common/__init__.py +0 -0
  531. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/custom_config/__init__.py +0 -0
  532. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/observer.py +0 -0
  533. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/prepare_quant/__init__.py +0 -0
  534. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/prepare_quant/prepare_model.py +0 -0
  535. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/scaling_method_base.py +0 -0
  536. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/scripts/__init__.py +0 -0
  537. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/scripts/postprocessing_vllm_measurements.py +0 -0
  538. {neural_compressor-3.3/neural_compressor/torch/algorithms/mixed_low_precision → neural_compressor-3.4/neural_compressor/torch/algorithms/fp8_quant/utils}/__init__.py +0 -0
  539. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/fp8_quant/utils/logger.py +0 -0
  540. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/layer_wise/__init__.py +0 -0
  541. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/layer_wise/load.py +0 -0
  542. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/mixed_low_precision/custom_methods/__init__.py +0 -0
  543. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/mixed_low_precision/custom_methods/gptq.py +0 -0
  544. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/mixed_low_precision/custom_methods/quarot.py +0 -0
  545. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/mixed_low_precision/custom_methods/quarot_utils.py +0 -0
  546. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/mixed_precision/__init__.py +0 -0
  547. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/mixed_precision/half_precision_convert.py +0 -0
  548. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/mixed_precision/module_wrappers.py +0 -0
  549. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/mx_quant/__init__.py +0 -0
  550. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/mx_quant/mx.py +0 -0
  551. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/mx_quant/utils.py +0 -0
  552. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/pt2e_quant/__init__.py +0 -0
  553. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/pt2e_quant/core.py +0 -0
  554. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/pt2e_quant/half_precision_rewriter.py +0 -0
  555. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/pt2e_quant/save_load.py +0 -0
  556. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/pt2e_quant/utility.py +0 -0
  557. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/smooth_quant/__init__.py +0 -0
  558. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/smooth_quant/save_load.py +0 -0
  559. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/smooth_quant/utility.py +0 -0
  560. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/static_quant/__init__.py +0 -0
  561. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/static_quant/save_load.py +0 -0
  562. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/weight_only/__init__.py +0 -0
  563. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/weight_only/awq.py +0 -0
  564. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/weight_only/hqq/__init__.py +0 -0
  565. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/weight_only/hqq/bitpack.py +0 -0
  566. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/weight_only/hqq/config.py +0 -0
  567. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/weight_only/hqq/core.py +0 -0
  568. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/weight_only/hqq/optimizer.py +0 -0
  569. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/weight_only/hqq/qtensor.py +0 -0
  570. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/weight_only/hqq/quantizer.py +0 -0
  571. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/weight_only/rtn.py +0 -0
  572. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/weight_only/teq.py +0 -0
  573. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/algorithms/weight_only/utility.py +0 -0
  574. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/export/__init__.py +0 -0
  575. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/quantization/autotune.py +0 -0
  576. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/utils/__init__.py +0 -0
  577. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/utils/bit_packer.py +0 -0
  578. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/utils/block_wise.py +0 -0
  579. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/utils/constants.py +0 -0
  580. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/torch/utils/llm_utility.py +0 -0
  581. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/training.py +0 -0
  582. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/transformers/__init__.py +0 -0
  583. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/transformers/generation/__init__.py +0 -0
  584. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/transformers/generation/beam_search.py +0 -0
  585. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/transformers/generation/greedy_search.py +0 -0
  586. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/transformers/models/__init__.py +0 -0
  587. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/transformers/quantization/__init__.py +0 -0
  588. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/transformers/utils/__init__.py +0 -0
  589. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/transformers/utils/quantization_config.py +0 -0
  590. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/utils/__init__.py +0 -0
  591. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/utils/collect_layer_histogram.py +0 -0
  592. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/utils/constant.py +0 -0
  593. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/utils/create_obj_from_config.py +0 -0
  594. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/utils/export/__init__.py +0 -0
  595. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/utils/export/qlinear2qdq.py +0 -0
  596. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/utils/export/tf2onnx.py +0 -0
  597. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/utils/export/torch2onnx.py +0 -0
  598. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/utils/kl_divergence.py +0 -0
  599. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/utils/logger.py +0 -0
  600. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/utils/pytorch.py +0 -0
  601. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor/utils/weights_details.py +0 -0
  602. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor.egg-info/dependency_links.txt +0 -0
  603. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor.egg-info/entry_points.txt +0 -0
  604. {neural_compressor-3.3 → neural_compressor-3.4}/neural_compressor.egg-info/top_level.txt +0 -0
  605. {neural_compressor-3.3 → neural_compressor-3.4}/pyproject.toml +0 -0
  606. {neural_compressor-3.3 → neural_compressor-3.4}/setup.cfg +0 -0
  607. {neural_compressor-3.3 → neural_compressor-3.4}/third-party-programs.txt +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.1
2
2
  Name: neural_compressor
3
- Version: 3.3
3
+ Version: 3.4
4
4
  Summary: Repository of Intel® Neural Compressor
5
5
  Home-page: https://github.com/intel/neural-compressor
6
6
  Author: Intel AIPT Team
@@ -30,8 +30,7 @@ Requires-Dist: requests
30
30
  Requires-Dist: schema
31
31
  Requires-Dist: scikit-learn
32
32
  Provides-Extra: pt
33
- Requires-Dist: numpy==1.23.5; python_version < "3.12" and extra == "pt"
34
- Requires-Dist: numpy<2.0; python_version >= "3.12" and extra == "pt"
33
+ Requires-Dist: numpy; extra == "pt"
35
34
  Requires-Dist: prettytable; extra == "pt"
36
35
  Requires-Dist: psutil; extra == "pt"
37
36
  Requires-Dist: py-cpuinfo; extra == "pt"
@@ -43,18 +42,6 @@ Requires-Dist: py-cpuinfo; extra == "tf"
43
42
  Requires-Dist: pydantic; extra == "tf"
44
43
  Requires-Dist: pyyaml; extra == "tf"
45
44
  Requires-Dist: tensorflow; extra == "tf"
46
- Dynamic: author
47
- Dynamic: author-email
48
- Dynamic: classifier
49
- Dynamic: description
50
- Dynamic: description-content-type
51
- Dynamic: home-page
52
- Dynamic: keywords
53
- Dynamic: license
54
- Dynamic: provides-extra
55
- Dynamic: requires-dist
56
- Dynamic: requires-python
57
- Dynamic: summary
58
45
 
59
46
  <div align="center">
60
47
 
@@ -63,7 +50,7 @@ Intel® Neural Compressor
63
50
  <h3> An open-source Python library supporting popular model compression techniques on all mainstream deep learning frameworks (TensorFlow, PyTorch, and ONNX Runtime)</h3>
64
51
 
65
52
  [![python](https://img.shields.io/badge/python-3.8%2B-blue)](https://github.com/intel/neural-compressor)
66
- [![version](https://img.shields.io/badge/release-3.3-green)](https://github.com/intel/neural-compressor/releases)
53
+ [![version](https://img.shields.io/badge/release-3.4-green)](https://github.com/intel/neural-compressor/releases)
67
54
  [![license](https://img.shields.io/badge/license-Apache%202-blue)](https://github.com/intel/neural-compressor/blob/master/LICENSE)
68
55
  [![coverage](https://img.shields.io/badge/coverage-85%25-green)](https://github.com/intel/neural-compressor)
69
56
  [![Downloads](https://static.pepy.tech/personalized-badge/neural-compressor?period=total&units=international_system&left_color=grey&right_color=green&left_text=downloads)](https://pepy.tech/project/neural-compressor)
@@ -93,7 +80,7 @@ support AMD CPU, ARM CPU, and NVidia GPU through ONNX Runtime with limited testi
93
80
  Choose the necessary framework dependencies to install based on your deploy environment.
94
81
  ### Install Framework
95
82
  * [Install intel_extension_for_pytorch for CPU](https://intel.github.io/intel-extension-for-pytorch/cpu/latest/)
96
- * [Install intel_extension_for_pytorch for XPU](https://intel.github.io/intel-extension-for-pytorch/xpu/latest/)
83
+ * [Install intel_extension_for_pytorch for Intel GPU](https://intel.github.io/intel-extension-for-pytorch/xpu/latest/)
97
84
  * [Use Docker Image with torch installed for HPU](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#bare-metal-fresh-os-single-click)
98
85
  **Note**: There is a version mapping between Intel Neural Compressor and Gaudi Software Stack, please refer to this [table](./docs/source/3x/gaudi_version_map.md) and make sure to use a matched combination.
99
86
  * [Install torch for other platform](https://pytorch.org/get-started/locally)
@@ -114,8 +101,11 @@ To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, p
114
101
 
115
102
  Run a container with an interactive shell, [more info](https://docs.habana.ai/en/latest/Installation_Guide/Additional_Installation/Docker_Installation.html#docker-installation)
116
103
  ```
117
- docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.20.0/ubuntu24.04/habanalabs/pytorch-installer-2.6.0:latest
104
+ docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.21.0/ubuntu24.04/habanalabs/pytorch-installer-2.6.0:latest
118
105
  ```
106
+
107
+ > Note: Since Habana software >= 1.21.0, `PT_HPU_LAZY_MODE=0` is the default setting. However, most low-precision functions (such as `convert_from_uint4`) do not support this setting. Therefore, we recommend setting `PT_HPU_LAZY_MODE=1` to maintain compatibility.
108
+
119
109
  Run the example,
120
110
  ```python
121
111
  from neural_compressor.torch.quantization import (
@@ -231,12 +221,10 @@ model = load(
231
221
 
232
222
  ## Selected Publications/Events
233
223
 
224
+ * arXiv: [Faster Inference of LLMs using FP8 on the Intel Gaudi](https://arxiv.org/abs/2503.09975) (Mar 2025)
225
+ * PyTorch landscape: [PyTorch general optimizations](https://landscape.pytorch.org/) (Mar 2025)
226
+ * Blog on SqueezeBits: [[Intel Gaudi] #4. FP8 Quantization](https://blog.squeezebits.com/intel-gaudi-4-fp8-quantization--40269) (Jan 2025)
234
227
  * EMNLP'2024: [Optimize Weight Rounding via Signed Gradient Descent for the Quantization of LLMs](https://arxiv.org/abs/2309.05516) (Sep 2024)
235
- * Blog on Medium: [Quantization on Intel Gaudi Series AI Accelerators](https://medium.com/intel-analytics-software/intel-neural-compressor-v3-0-a-quantization-tool-across-intel-hardware-9856adee6f11) (Aug 2024)
236
- * Blog by Intel: [Neural Compressor: Boosting AI Model Efficiency](https://community.intel.com/t5/Blogs/Tech-Innovation/Artificial-Intelligence-AI/Neural-Compressor-Boosting-AI-Model-Efficiency/post/1604740) (June 2024)
237
- * Blog by Intel: [Optimization of Intel AI Solutions for Alibaba Cloud’s Qwen2 Large Language Models](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-ai-solutions-accelerate-alibaba-qwen2-llms.html) (June 2024)
238
- * Blog by Intel: [Accelerate Meta* Llama 3 with Intel AI Solutions](https://www.intel.com/content/www/us/en/developer/articles/technical/accelerate-meta-llama3-with-intel-ai-solutions.html) (Apr 2024)
239
- * EMNLP'2023 (Under Review): [TEQ: Trainable Equivalent Transformation for Quantization of LLMs](https://openreview.net/forum?id=iaI8xEINAf&referrer=%5BAuthor%20Console%5D) (Sep 2023)
240
228
  * arXiv: [Efficient Post-training Quantization with FP8 Formats](https://arxiv.org/abs/2309.14592) (Sep 2023)
241
229
  * arXiv: [Optimize Weight Rounding via Signed Gradient Descent for the Quantization of LLMs](https://arxiv.org/abs/2309.05516) (Sep 2023)
242
230
 
@@ -5,7 +5,7 @@ Intel® Neural Compressor
5
5
  <h3> An open-source Python library supporting popular model compression techniques on all mainstream deep learning frameworks (TensorFlow, PyTorch, and ONNX Runtime)</h3>
6
6
 
7
7
  [![python](https://img.shields.io/badge/python-3.8%2B-blue)](https://github.com/intel/neural-compressor)
8
- [![version](https://img.shields.io/badge/release-3.3-green)](https://github.com/intel/neural-compressor/releases)
8
+ [![version](https://img.shields.io/badge/release-3.4-green)](https://github.com/intel/neural-compressor/releases)
9
9
  [![license](https://img.shields.io/badge/license-Apache%202-blue)](https://github.com/intel/neural-compressor/blob/master/LICENSE)
10
10
  [![coverage](https://img.shields.io/badge/coverage-85%25-green)](https://github.com/intel/neural-compressor)
11
11
  [![Downloads](https://static.pepy.tech/personalized-badge/neural-compressor?period=total&units=international_system&left_color=grey&right_color=green&left_text=downloads)](https://pepy.tech/project/neural-compressor)
@@ -35,7 +35,7 @@ support AMD CPU, ARM CPU, and NVidia GPU through ONNX Runtime with limited testi
35
35
  Choose the necessary framework dependencies to install based on your deploy environment.
36
36
  ### Install Framework
37
37
  * [Install intel_extension_for_pytorch for CPU](https://intel.github.io/intel-extension-for-pytorch/cpu/latest/)
38
- * [Install intel_extension_for_pytorch for XPU](https://intel.github.io/intel-extension-for-pytorch/xpu/latest/)
38
+ * [Install intel_extension_for_pytorch for Intel GPU](https://intel.github.io/intel-extension-for-pytorch/xpu/latest/)
39
39
  * [Use Docker Image with torch installed for HPU](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#bare-metal-fresh-os-single-click)
40
40
  **Note**: There is a version mapping between Intel Neural Compressor and Gaudi Software Stack, please refer to this [table](./docs/source/3x/gaudi_version_map.md) and make sure to use a matched combination.
41
41
  * [Install torch for other platform](https://pytorch.org/get-started/locally)
@@ -56,8 +56,11 @@ To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, p
56
56
 
57
57
  Run a container with an interactive shell, [more info](https://docs.habana.ai/en/latest/Installation_Guide/Additional_Installation/Docker_Installation.html#docker-installation)
58
58
  ```
59
- docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.20.0/ubuntu24.04/habanalabs/pytorch-installer-2.6.0:latest
59
+ docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.21.0/ubuntu24.04/habanalabs/pytorch-installer-2.6.0:latest
60
60
  ```
61
+
62
+ > Note: Since Habana software >= 1.21.0, `PT_HPU_LAZY_MODE=0` is the default setting. However, most low-precision functions (such as `convert_from_uint4`) do not support this setting. Therefore, we recommend setting `PT_HPU_LAZY_MODE=1` to maintain compatibility.
63
+
61
64
  Run the example,
62
65
  ```python
63
66
  from neural_compressor.torch.quantization import (
@@ -173,12 +176,10 @@ model = load(
173
176
 
174
177
  ## Selected Publications/Events
175
178
 
179
+ * arXiv: [Faster Inference of LLMs using FP8 on the Intel Gaudi](https://arxiv.org/abs/2503.09975) (Mar 2025)
180
+ * PyTorch landscape: [PyTorch general optimizations](https://landscape.pytorch.org/) (Mar 2025)
181
+ * Blog on SqueezeBits: [[Intel Gaudi] #4. FP8 Quantization](https://blog.squeezebits.com/intel-gaudi-4-fp8-quantization--40269) (Jan 2025)
176
182
  * EMNLP'2024: [Optimize Weight Rounding via Signed Gradient Descent for the Quantization of LLMs](https://arxiv.org/abs/2309.05516) (Sep 2024)
177
- * Blog on Medium: [Quantization on Intel Gaudi Series AI Accelerators](https://medium.com/intel-analytics-software/intel-neural-compressor-v3-0-a-quantization-tool-across-intel-hardware-9856adee6f11) (Aug 2024)
178
- * Blog by Intel: [Neural Compressor: Boosting AI Model Efficiency](https://community.intel.com/t5/Blogs/Tech-Innovation/Artificial-Intelligence-AI/Neural-Compressor-Boosting-AI-Model-Efficiency/post/1604740) (June 2024)
179
- * Blog by Intel: [Optimization of Intel AI Solutions for Alibaba Cloud’s Qwen2 Large Language Models](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-ai-solutions-accelerate-alibaba-qwen2-llms.html) (June 2024)
180
- * Blog by Intel: [Accelerate Meta* Llama 3 with Intel AI Solutions](https://www.intel.com/content/www/us/en/developer/articles/technical/accelerate-meta-llama3-with-intel-ai-solutions.html) (Apr 2024)
181
- * EMNLP'2023 (Under Review): [TEQ: Trainable Equivalent Transformation for Quantization of LLMs](https://openreview.net/forum?id=iaI8xEINAf&referrer=%5BAuthor%20Console%5D) (Sep 2023)
182
183
  * arXiv: [Efficient Post-training Quantization with FP8 Formats](https://arxiv.org/abs/2309.14592) (Sep 2023)
183
184
  * arXiv: [Optimize Weight Rounding via Signed Gradient Descent for the Quantization of LLMs](https://arxiv.org/abs/2309.05516) (Sep 2023)
184
185
 
@@ -17,16 +17,18 @@
17
17
  """Intel® Neural Compressor: An open-source Python library supporting popular model compression techniques."""
18
18
  from .version import __version__
19
19
 
20
- # we need to set a global 'NA' backend, or Model can't be used
21
- from .config import (
22
- DistillationConfig,
23
- PostTrainingQuantConfig,
24
- WeightPruningConfig,
25
- QuantizationAwareTrainingConfig,
26
- MixedPrecisionConfig,
27
- )
28
- from .contrib import *
29
- from .model import *
30
- from .metric import *
31
- from .utils import options
32
- from .utils.utility import set_random_seed, set_tensorboard, set_workspace, set_resume_from
20
+ import os
21
+
22
+ if not (os.environ.get("INC_PT_ONLY", False) or os.environ.get("INC_TF_ONLY", False)):
23
+ from .config import (
24
+ DistillationConfig,
25
+ PostTrainingQuantConfig,
26
+ WeightPruningConfig,
27
+ QuantizationAwareTrainingConfig,
28
+ MixedPrecisionConfig,
29
+ )
30
+ from .contrib import *
31
+ from .model import *
32
+ from .metric import *
33
+ from .utils import options
34
+ from .utils.utility import set_random_seed, set_tensorboard, set_workspace, set_resume_from
@@ -17,7 +17,7 @@
17
17
 
18
18
  from abc import abstractmethod
19
19
 
20
- """The framework backends supported by neural_compressor, including tensorflow, mxnet and pytorch.
20
+ """The framework backends supported by neural_compressor, including tensorflow and pytorch.
21
21
 
22
22
  User could add new backend support by implementing new Adaptor subclass under this directory.
23
23
  The naming convention of new Adaptor subclass should be something like ABCAdaptor, user
@@ -23,6 +23,7 @@ from collections import OrderedDict, UserDict
23
23
 
24
24
  import numpy as np
25
25
  import yaml
26
+ from deprecated import deprecated
26
27
 
27
28
  from ..data.dataloaders.base_dataloader import BaseDataLoader
28
29
  from ..utils import logger
@@ -68,6 +69,7 @@ def _add_supported_quantized_objects(custom_objects):
68
69
  return custom_objects
69
70
 
70
71
 
72
+ @deprecated(reason="KerasAdaptor is deprecated and may be removed in future versions.", version="3.4")
71
73
  @adaptor_registry
72
74
  class KerasAdaptor(Adaptor):
73
75
  """The keras class of framework adaptor layer."""
@@ -30,6 +30,7 @@ from typing import Dict
30
30
 
31
31
  import numpy as np
32
32
  import yaml
33
+ from deprecated import deprecated
33
34
  from packaging.version import Version
34
35
 
35
36
  from neural_compressor.adaptor.adaptor import Adaptor, adaptor_registry
@@ -48,6 +49,7 @@ ONNXRT112_VERSION = Version("1.12.0")
48
49
  logger = logging.getLogger("neural_compressor")
49
50
 
50
51
 
52
+ @deprecated(reason="ONNXRUNTIMEAdaptor is deprecated and may be removed in future versions.", version="3.4")
51
53
  @adaptor_registry
52
54
  class ONNXRUNTIMEAdaptor(Adaptor):
53
55
  """The ONNXRT adaptor layer, do onnx-rt quantization, calibration, inspect layer tensors.
@@ -4170,8 +4170,12 @@ class PyTorch_FXAdaptor(TemplateAdaptor):
4170
4170
  sub_name = node.target
4171
4171
  if not hasattr(model, node.target):
4172
4172
  continue
4173
- if "scale" in node.target:
4174
- tune_cfg["get_attr"][sub_name] = float(getattr(model, node.target))
4173
+ # Improved scale detection logic
4174
+ if "scale" in node.target and not any(exclude in node.target for exclude in ["layer_scale", "gamma"]):
4175
+ try:
4176
+ tune_cfg["get_attr"][sub_name] = getattr(model, node.target).tolist()
4177
+ except Exception as e:
4178
+ logger.warning(f"Could not convert {node.target} to list, skipping... Error: {str(e)}")
4175
4179
  elif "zero_point" in node.target:
4176
4180
  tune_cfg["get_attr"][sub_name] = int(getattr(model, node.target))
4177
4181
  else:
@@ -23,6 +23,7 @@ from collections import OrderedDict, UserDict
23
23
 
24
24
  import numpy as np
25
25
  import yaml
26
+ from deprecated import deprecated
26
27
 
27
28
  from ..data.dataloaders.base_dataloader import BaseDataLoader
28
29
  from ..utils import logger
@@ -55,6 +56,7 @@ spr_base_verions = (
55
56
  )
56
57
 
57
58
 
59
+ @deprecated(reason="TensorFlowAdaptor is deprecated and may be removed in future versions.", version="3.4")
58
60
  @adaptor_registry
59
61
  class TensorFlowAdaptor(Adaptor):
60
62
  """Adaptor Layer for stock tensorflow and spr-base."""
@@ -483,7 +483,7 @@ class _Pickler: # pragma: no cover
483
483
  The memo is the data structure that remembers which objects the
484
484
  pickler has already seen, so that shared or recursive objects
485
485
  are pickled by reference and not by value. This method is
486
- useful when re-using picklers.
486
+ useful when reusing picklers.
487
487
  """
488
488
  self.memo.clear()
489
489
 
@@ -166,8 +166,6 @@ def run_instance(model, conf, b_dataloader=None, b_func=None):
166
166
  )
167
167
  if framework == "keras":
168
168
  framework_specific_info.update({"workspace_path": options.workspace})
169
- if framework == "mxnet":
170
- framework_specific_info.update({"b_dataloader": b_dataloader})
171
169
  if "onnx" in framework:
172
170
  framework_specific_info.update(
173
171
  {"workspace_path": options.workspace, "graph_optimization": OPTIONS[framework].graph_optimization}
@@ -189,6 +189,7 @@ class BaseConfig(ABC):
189
189
 
190
190
  name = BASE_CONFIG
191
191
  params_list = []
192
+ _is_initialized = False
192
193
 
193
194
  def __init__(self, white_list: Optional[List[OP_NAME_OR_MODULE_TYPE]] = DEFAULT_WHITE_LIST) -> None:
194
195
  """Initialize the BaseConfig.
@@ -220,6 +221,14 @@ class BaseConfig(ABC):
220
221
  f"The white list should be one of {DEFAULT_WHITE_LIST}, {EMPTY_WHITE_LIST},"
221
222
  " a not empty list, but got {self.white_list}"
222
223
  )
224
+ self._is_initialized = True
225
+
226
+ def __setattr__(self, name, value):
227
+ """Override the setattr function to propagate updates."""
228
+ super().__setattr__(name, value)
229
+ if self._is_initialized and name in self.params_list:
230
+ self._is_initialized = False
231
+ self._post_init()
223
232
 
224
233
  @property
225
234
  def white_list(self):
@@ -683,6 +692,13 @@ class ComposableConfig(BaseConfig):
683
692
  self.config_list.append(other)
684
693
  return self
685
694
 
695
+ def __setattr__(self, name, value):
696
+ """Override the setattr function to propagate updates."""
697
+ ABC.__setattr__(self, name, value)
698
+ for config in self.config_list:
699
+ if hasattr(config, name):
700
+ setattr(config, name, value)
701
+
686
702
  def to_dict(self, params_list=[], operator2str=None):
687
703
  """Converts the configuration object to a dictionary.
688
704
 
@@ -884,7 +900,6 @@ class Options:
884
900
 
885
901
  def __init__(self, random_seed=1978, workspace=DEFAULT_WORKSPACE, resume_from=None, tensorboard=False):
886
902
  """Init an Option object."""
887
- os.makedirs(workspace, exist_ok=True)
888
903
  self.random_seed = random_seed
889
904
  self.workspace = workspace
890
905
  self.resume_from = resume_from
@@ -34,6 +34,7 @@ HQQ = "hqq" # pragma: no cover
34
34
  TEQ = "teq" # pragma: no cover
35
35
  AUTOROUND = "autoround"
36
36
  FP8_QUANT = "fp8_quant"
37
+ HYBRID_GPTQ = "hybrid_gptq"
37
38
  MX_QUANT = "mx_quant"
38
39
  MIXED_PRECISION = "mixed_precision"
39
40
 
@@ -51,12 +52,13 @@ from enum import Enum
51
52
 
52
53
 
53
54
  class Mode(Enum):
54
- """Enumeration class representing different modes of the quantizer execution."""
55
+ """Enumeration class representing different modes of the quantization."""
55
56
 
56
57
  PREPARE = "prepare"
57
58
  CONVERT = "convert"
58
59
  QUANTIZE = "quantize"
59
60
  LOAD = "load"
61
+ SAVE = "save"
60
62
 
61
63
 
62
64
  SERVER_PROCESSOR_BRAND_KEY_WORLD_LST = ["Xeon"]
@@ -161,6 +161,8 @@ def _get_log_msg(mode):
161
161
  log_msg = "Conversion"
162
162
  elif mode == Mode.LOAD: # pragma: no cover
163
163
  log_msg = "Loading"
164
+ elif mode == Mode.SAVE: # pragma: no cover
165
+ log_msg = "Saving"
164
166
  return log_msg
165
167
 
166
168
 
@@ -47,11 +47,23 @@ def load_config_mapping(qconfig_file_path, config_name_mapping): # pragma: no c
47
47
  Returns:
48
48
  config_mapping (dict): config mapping.
49
49
  """
50
+
51
+ def _fetch_from_string(key):
52
+ """Return op_name and op_type from key, such as "('transformer.h.0.attn.k_proj', 'Linear')"."""
53
+ import re
54
+
55
+ match = re.match(r"\('(.+)', '(.+)'\)", key)
56
+ if match:
57
+ op_name, op_type = match.groups()
58
+ return op_name, op_type
59
+ else:
60
+ raise ValueError(f"Invalid key format: {key}. Expected format: \"('op_name', 'op_type')\".")
61
+
50
62
  config_mapping = {}
51
63
  with open(qconfig_file_path, "r") as f:
52
64
  per_op_qconfig = json.load(f)
53
65
  for key, value in per_op_qconfig.items():
54
- op_name, op_type = eval(key)
66
+ op_name, op_type = _fetch_from_string(key)
55
67
  # value here is a dict, so we convert it to an object with config_name_mapping,
56
68
  # which is defined in a specific framework.
57
69
  config_name = next(iter(value))
@@ -108,13 +108,13 @@ class CpuInfo(object):
108
108
  max_extension_support = cpuid.get_max_extension_support()
109
109
  if max_extension_support >= 7:
110
110
  ecx = cpuid._run_asm(
111
- b"\x31\xC9", # xor ecx, ecx
112
- b"\xB8\x07\x00\x00\x00" b"\x0f\xa2" b"\x89\xC8" b"\xC3", # mov eax, 7 # cpuid # mov ax, cx # ret
111
+ b"\x31\xc9", # xor ecx, ecx
112
+ b"\xb8\x07\x00\x00\x00" b"\x0f\xa2" b"\x89\xc8" b"\xc3", # mov eax, 7 # cpuid # mov ax, cx # ret
113
113
  )
114
114
  self._vnni = bool(ecx & (1 << 11))
115
115
  eax = cpuid._run_asm(
116
- b"\xB9\x01\x00\x00\x00", # mov ecx, 1
117
- b"\xB8\x07\x00\x00\x00" b"\x0f\xa2" b"\xC3", # mov eax, 7 # cpuid # ret
116
+ b"\xb9\x01\x00\x00\x00", # mov ecx, 1
117
+ b"\xb8\x07\x00\x00\x00" b"\x0f\xa2" b"\xc3", # mov eax, 7 # cpuid # ret
118
118
  )
119
119
  self._bf16 = bool(eax & (1 << 5))
120
120
  self._info = info
@@ -15,4 +15,4 @@
15
15
  # See the License for the specific language governing permissions and
16
16
  # limitations under the License.
17
17
  """Intel® Neural Compressor: An open-source Python library supporting popular model compression techniques."""
18
- __version__ = "3.3"
18
+ __version__ = "3.4"
@@ -2361,11 +2361,11 @@ class NASConfig:
2361
2361
  self._search = search
2362
2362
 
2363
2363
 
2364
- class MXNet:
2365
- """Base config class for MXNet."""
2364
+ class PyTorch:
2365
+ """Base config class for PyTorch."""
2366
2366
 
2367
2367
  def __init__(self, precisions=None):
2368
- """Init an MXNet object."""
2368
+ """Init an PyTorch object."""
2369
2369
  self._precisions = precisions
2370
2370
 
2371
2371
  @property
@@ -2383,7 +2383,7 @@ class MXNet:
2383
2383
  self._precisions = precisions
2384
2384
 
2385
2385
 
2386
- class ONNX(MXNet):
2386
+ class ONNX(PyTorch):
2387
2387
  """Config class for ONNX."""
2388
2388
 
2389
2389
  def __init__(self, graph_optimization_level=None, precisions=None):
@@ -2408,7 +2408,7 @@ class ONNX(MXNet):
2408
2408
  self._graph_optimization_level = graph_optimization_level
2409
2409
 
2410
2410
 
2411
- class TensorFlow(MXNet):
2411
+ class TensorFlow(PyTorch):
2412
2412
  """Config class for TensorFlow."""
2413
2413
 
2414
2414
  def __init__(self, precisions=None):
@@ -2416,7 +2416,7 @@ class TensorFlow(MXNet):
2416
2416
  super().__init__(precisions)
2417
2417
 
2418
2418
 
2419
- class Keras(MXNet):
2419
+ class Keras(PyTorch):
2420
2420
  """Config class for Keras."""
2421
2421
 
2422
2422
  def __init__(self, precisions=None):
@@ -2424,14 +2424,6 @@ class Keras(MXNet):
2424
2424
  super().__init__(precisions)
2425
2425
 
2426
2426
 
2427
- class PyTorch(MXNet):
2428
- """Config class for PyTorch."""
2429
-
2430
- def __init__(self, precisions=None):
2431
- """Init a PyTorch object."""
2432
- super().__init__(precisions)
2433
-
2434
-
2435
2427
  quantization = PostTrainingQuantConfig()
2436
2428
  benchmark = BenchmarkConfig()
2437
2429
  options = Options()
@@ -2443,7 +2435,6 @@ onnxruntime_config = ONNX()
2443
2435
  tensorflow_config = TensorFlow()
2444
2436
  keras_config = Keras()
2445
2437
  pytorch_config = PyTorch()
2446
- mxnet_config = MXNet()
2447
2438
 
2448
2439
 
2449
2440
  class _Config:
@@ -2460,7 +2451,6 @@ class _Config:
2460
2451
  onnxruntime=onnxruntime_config,
2461
2452
  tensorflow=tensorflow_config,
2462
2453
  pytorch=pytorch_config,
2463
- mxnet=mxnet_config,
2464
2454
  keras=keras_config,
2465
2455
  ):
2466
2456
  """Init a config object."""
@@ -2473,7 +2463,6 @@ class _Config:
2473
2463
  self._nas = nas
2474
2464
  self._tensorflow = tensorflow
2475
2465
  self._pytorch = pytorch
2476
- self._mxnet = mxnet
2477
2466
  self._keras = keras
2478
2467
 
2479
2468
  @property
@@ -2501,11 +2490,6 @@ class _Config:
2501
2490
  """Get the pytorch object."""
2502
2491
  return self._pytorch
2503
2492
 
2504
- @property
2505
- def mxnet(self):
2506
- """Get the mxnet object."""
2507
- return self._mxnet
2508
-
2509
2493
  @property
2510
2494
  def pruning(self):
2511
2495
  """Get the pruning object."""
@@ -15,7 +15,6 @@
15
15
  # See the License for the specific language governing permissions and
16
16
  # limitations under the License.
17
17
  """Built-in dataloaders for multiple framework backends."""
18
- from .mxnet_dataloader import MXNetDataLoader
19
18
  from .onnxrt_dataloader import ONNXRTDataLoader
20
19
  from .pytorch_dataloader import PyTorchDataLoader
21
20
  from .tensorflow_dataloader import TensorflowDataLoader
@@ -24,7 +23,6 @@ DATALOADERS = {
24
23
  "tensorflow": TensorflowDataLoader,
25
24
  "tensorflow_itex": TensorflowDataLoader,
26
25
  "keras": TensorflowDataLoader,
27
- "mxnet": MXNetDataLoader,
28
26
  "pytorch": PyTorchDataLoader,
29
27
  "pytorch_ipex": PyTorchDataLoader,
30
28
  "pytorch_fx": PyTorchDataLoader,
@@ -89,8 +87,7 @@ class DataLoader(object):
89
87
  "onnxrt_qdqops",
90
88
  "onnxrt_qlinearops",
91
89
  "onnxrt_integerops",
92
- "mxnet",
93
- ), "framework support tensorflow pytorch mxnet onnxruntime"
90
+ ), "framework support tensorflow pytorch onnxruntime"
94
91
  return DATALOADERS[framework](
95
92
  dataset=dataset,
96
93
  batch_size=batch_size,
@@ -41,7 +41,7 @@ class TFDataDataLoader(BaseDataLoader): # pragma: no cover
41
41
 
42
42
  In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict
43
43
  method to do session run, this dataloader is designed to satisfy the usage of feed dict
44
- in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch.
44
+ in tf1.x. Although it's a general dataloader and can be used in PyTorch.
45
45
 
46
46
  Args:
47
47
  dataset: obj. wrapper of needed data.
@@ -38,7 +38,6 @@ from neural_compressor.utils.utility import LazyImport
38
38
  from .dataset import Dataset, IterableDataset, dataset_registry
39
39
 
40
40
  tf = LazyImport("tensorflow")
41
- mx = LazyImport("mxnet")
42
41
  torch = LazyImport("torch")
43
42
 
44
43
 
@@ -160,7 +159,7 @@ class COCORecordDataset(IterableDataset): # pragma: no cover
160
159
  @dataset_registry(
161
160
  dataset_type="COCORaw",
162
161
  framework="onnxrt_qlinearops, \
163
- onnxrt_integerops, pytorch, mxnet, tensorflow, \
162
+ onnxrt_integerops, pytorch, tensorflow, \
164
163
  tensorflow_itex",
165
164
  dataset_format="",
166
165
  )
@@ -263,7 +262,7 @@ class COCORaw(Dataset): # pragma: no cover
263
262
  @dataset_registry(
264
263
  dataset_type="COCONpy",
265
264
  framework="onnxrt_qlinearops, \
266
- onnxrt_integerops, pytorch, mxnet, tensorflow, \
265
+ onnxrt_integerops, pytorch, tensorflow, \
267
266
  tensorflow_itex",
268
267
  dataset_format="",
269
268
  )