neural-compressor 2.4__tar.gz → 2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (554) hide show
  1. {neural_compressor-2.4 → neural_compressor-2.5}/PKG-INFO +76 -24
  2. {neural_compressor-2.4 → neural_compressor-2.5}/README.md +70 -18
  3. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/keras.py +8 -0
  4. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/mxnet_utils/__init__.py +1 -0
  5. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/mxnet_utils/util.py +1 -0
  6. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/onnxrt.py +51 -32
  7. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/onnxrt_cuda.yaml +21 -0
  8. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/calibration.py +135 -39
  9. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/split.py +4 -1
  10. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/quantizer.py +82 -57
  11. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/smooth_quant.py +11 -11
  12. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/util.py +32 -2
  13. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/weight_only.py +81 -41
  14. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/pytorch.py +199 -99
  15. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/pytorch_cpu.yaml +1 -1
  16. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_converter.py +10 -6
  17. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dummy_biasadd.py +5 -3
  18. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_requantize.py +35 -21
  19. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_requantize.py +7 -3
  20. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_base.py +5 -3
  21. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/smooth_quant_calibration.py +1 -1
  22. neural_compressor-2.5/neural_compressor/adaptor/torch_utils/auto_round.py +25 -0
  23. neural_compressor-2.5/neural_compressor/adaptor/torch_utils/autoround/__init__.py +15 -0
  24. neural_compressor-2.5/neural_compressor/adaptor/torch_utils/autoround/autoround.py +1718 -0
  25. neural_compressor-2.5/neural_compressor/adaptor/torch_utils/autoround/export.py +99 -0
  26. neural_compressor-2.5/neural_compressor/adaptor/torch_utils/autoround/model_wrapper.py +346 -0
  27. neural_compressor-2.5/neural_compressor/adaptor/torch_utils/autoround/sign_sgd.py +389 -0
  28. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/awq.py +3 -4
  29. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/gptq.py +39 -7
  30. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/layer_wise_quant/quantize.py +1 -1
  31. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/model_wrapper.py +47 -45
  32. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/teq.py +6 -3
  33. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/util.py +13 -15
  34. neural_compressor-2.5/neural_compressor/adaptor/torch_utils/waq/__init__.py +17 -0
  35. neural_compressor-2.5/neural_compressor/adaptor/torch_utils/waq/auto_alpha.py +665 -0
  36. neural_compressor-2.5/neural_compressor/adaptor/torch_utils/waq/calibration.py +114 -0
  37. neural_compressor-2.5/neural_compressor/adaptor/torch_utils/waq/graph_trace.py +232 -0
  38. neural_compressor-2.5/neural_compressor/adaptor/torch_utils/waq/smooth_quant.py +595 -0
  39. neural_compressor-2.5/neural_compressor/adaptor/torch_utils/waq/utils.py +428 -0
  40. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/weight_only.py +229 -98
  41. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/distillation/__init__.py +1 -0
  42. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/hpo/__init__.py +1 -0
  43. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/hpo/sa_optimizer.py +1 -0
  44. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/__init__.py +1 -0
  45. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/criteria.py +22 -10
  46. neural_compressor-2.5/neural_compressor/compression/pruner/dsnot.py +354 -0
  47. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/model_slim/__init__.py +1 -0
  48. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/model_slim/auto_slim.py +1 -0
  49. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/model_slim/pattern_analyzer.py +1 -0
  50. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/model_slim/weight_slim.py +1 -0
  51. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/patterns/__init__.py +1 -0
  52. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/patterns/base.py +18 -7
  53. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/patterns/mha.py +1 -0
  54. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/patterns/ninm.py +40 -14
  55. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/patterns/nxm.py +49 -19
  56. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/pruners/__init__.py +1 -0
  57. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/pruners/base.py +8 -3
  58. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/pruners/basic.py +6 -1
  59. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/pruners/block_mask.py +1 -0
  60. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/pruners/mha.py +1 -0
  61. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/pruners/pattern_lock.py +5 -0
  62. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/pruners/progressive.py +1 -0
  63. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/pruners/retrain_free.py +1 -0
  64. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/pruning.py +26 -21
  65. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/regs.py +1 -0
  66. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/schedulers.py +2 -3
  67. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/tf_criteria.py +1 -0
  68. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/utils.py +105 -33
  69. neural_compressor-2.5/neural_compressor/compression/pruner/wanda/__init__.py +20 -0
  70. neural_compressor-2.5/neural_compressor/compression/pruner/wanda/prune.py +242 -0
  71. neural_compressor-2.5/neural_compressor/compression/pruner/wanda/utils.py +62 -0
  72. neural_compressor-2.5/neural_compressor/compression/pruner/wanda/wrapper.py +75 -0
  73. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/config.py +9 -2
  74. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/datasets/dataset.py +3 -0
  75. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/common/__init__.py +1 -0
  76. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/compression/__init__.py +1 -0
  77. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/contrib/strategy/tpe.py +5 -3
  78. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/datasets/dataset.py +3 -0
  79. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/nas/__init__.py +1 -0
  80. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pruning.py +1 -0
  81. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pruning_recipes/__init__.py +1 -0
  82. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pruning_recipes/patterns/__init__.py +1 -0
  83. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pruning_recipes/patterns/pattern.py +1 -0
  84. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pruning_recipes/patterns/tile_pattern.py +1 -0
  85. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pruning_v2.py +1 -0
  86. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pytorch_pruner/__init__.py +1 -0
  87. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pytorch_pruner/logger.py +1 -0
  88. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pytorch_pruner/patterns.py +1 -0
  89. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pytorch_pruner/prune_utils.py +1 -0
  90. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pytorch_pruner/pruner.py +1 -0
  91. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pytorch_pruner/pruning.py +1 -0
  92. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pytorch_pruner/scheduler.py +1 -0
  93. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/strategy.py +15 -9
  94. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/mix_precision.py +2 -2
  95. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/model/model.py +2 -2
  96. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/model/onnx_model.py +2 -2
  97. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/model/tensorflow_model.py +138 -45
  98. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/model/torch_model.py +16 -0
  99. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/objective.py +15 -9
  100. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/quantization.py +2 -2
  101. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/auto.py +1 -1
  102. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/conservative.py +4 -4
  103. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/strategy.py +44 -12
  104. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/utils/tuning_space.py +0 -8
  105. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/training.py +2 -2
  106. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/utils/load_huggingface.py +36 -0
  107. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/utils/pytorch.py +31 -19
  108. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/utils/utility.py +6 -2
  109. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/version.py +1 -1
  110. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor.egg-info/PKG-INFO +77 -25
  111. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor.egg-info/SOURCES.txt +17 -1
  112. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor.egg-info/requires.txt +4 -4
  113. {neural_compressor-2.4 → neural_compressor-2.5}/setup.py +49 -2
  114. {neural_compressor-2.4 → neural_compressor-2.5}/third-party-programs.txt +2 -0
  115. neural_compressor-2.4/neural_compressor/adaptor/torch_utils/smooth_quant.py +0 -1607
  116. {neural_compressor-2.4 → neural_compressor-2.5}/LICENSE +0 -0
  117. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/__init__.py +0 -0
  118. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/__main__.py +0 -0
  119. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/__init__.py +0 -0
  120. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/intel_extension_for_transformers.yaml +0 -0
  121. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/keras_inc.yaml +0 -0
  122. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_bf16.yaml +0 -0
  123. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_bf16_channels_last.yaml +0 -0
  124. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_bf16_ipex.yaml +0 -0
  125. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_bf16_ipex_channels_last.yaml +0 -0
  126. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_fp32_channels_last.yaml +0 -0
  127. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_fp32_ipex.yaml +0 -0
  128. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_fp32_ipex_channels_last.yaml +0 -0
  129. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_gpu_to_cpu.yaml +0 -0
  130. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_int8.yaml +0 -0
  131. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_jit_bf16.yaml +0 -0
  132. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_jit_bf16_channels_last.yaml +0 -0
  133. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_jit_bf16_ipex.yaml +0 -0
  134. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_jit_bf16_ipex_channels_last.yaml +0 -0
  135. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_jit_fp32.yaml +0 -0
  136. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_jit_fp32_channels_last.yaml +0 -0
  137. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_jit_fp32_ipex.yaml +0 -0
  138. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_jit_fp32_ipex_channels_last.yaml +0 -0
  139. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_onnxruntime_fp32.yaml +0 -0
  140. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_onnxruntime_int8_qlinear.yaml +0 -0
  141. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_openvino_fp32.yaml +0 -0
  142. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/nano_openvino_int8.yaml +0 -0
  143. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/onnx_inc_dynamic_quant.yaml +0 -0
  144. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/onnx_inc_static_quant_qdq.yaml +0 -0
  145. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/onnx_inc_static_quant_qlinear.yaml +0 -0
  146. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_aliblade.yaml +0 -0
  147. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_benchmark.yaml +0 -0
  148. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_channels_last.yaml +0 -0
  149. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_inc_bf16.yaml +0 -0
  150. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_inc_dynamic_quant.yaml +0 -0
  151. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_inc_dynamic_quant_fp8.yaml +0 -0
  152. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_inc_huggingface_optimum_dynamic.yaml +0 -0
  153. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_inc_huggingface_optimum_static.yaml +0 -0
  154. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_inc_static_quant_fx.yaml +0 -0
  155. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_inc_static_quant_fx_fp8.yaml +0 -0
  156. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_inc_static_quant_ipex.yaml +0 -0
  157. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_inc_static_quant_ipex_xpu.yaml +0 -0
  158. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_ipex_bf16.yaml +0 -0
  159. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_ipex_fp32.yaml +0 -0
  160. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_ipex_int8_dynamic_quant.yaml +0 -0
  161. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_ipex_int8_static_quant.yaml +0 -0
  162. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_jit_script.yaml +0 -0
  163. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_jit_script_ofi.yaml +0 -0
  164. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_jit_trace.yaml +0 -0
  165. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_jit_trace_ofi.yaml +0 -0
  166. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_mixed_precision_cpu.yaml +0 -0
  167. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_mixed_precision_cuda.yaml +0 -0
  168. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_mixed_precision_intel_gpu.yaml +0 -0
  169. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_torchdynamo_jit_script.yaml +0 -0
  170. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_torchdynamo_jit_script_ofi.yaml +0 -0
  171. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_torchdynamo_jit_trace.yaml +0 -0
  172. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/pytorch_torchdynamo_jit_trace_ofi.yaml +0 -0
  173. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/backends/template.yaml +0 -0
  174. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/__init__.py +0 -0
  175. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/autoinc/__init__.py +0 -0
  176. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/autoinc/autoinc_harness.py +0 -0
  177. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/autoinc/calib_dataloader.py +0 -0
  178. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/autoinc/domain.py +0 -0
  179. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/autoinc/eval_func.py +0 -0
  180. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/pytorch/__init__.py +0 -0
  181. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/pytorch/batch_size.py +0 -0
  182. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/pytorch/change_trainer_to_nlptrainer.py +0 -0
  183. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/pytorch/cuda_to_cpu.py +0 -0
  184. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/pytorch/dummy_dataloader.py +0 -0
  185. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/pytorch/harness.py +0 -0
  186. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/pytorch/lightning.py +0 -0
  187. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/pytorch/reclaim_inference_transformers_trainer.py +0 -0
  188. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/pytorch/reclaim_inputs.py +0 -0
  189. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/tensorflow/__init__.py +0 -0
  190. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/tensorflow/amp.py +0 -0
  191. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/tensorflow/inc.py +0 -0
  192. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/coders/transform.py +0 -0
  193. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/globals.py +0 -0
  194. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/graphers/__init__.py +0 -0
  195. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/graphers/code_line.py +0 -0
  196. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/graphers/function.py +0 -0
  197. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/graphers/model.py +0 -0
  198. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/graphers/preloads/__init__.py +0 -0
  199. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/graphers/preloads/transformers.yaml +0 -0
  200. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/interface.py +0 -0
  201. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/launcher.py +0 -0
  202. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/utils/__init__.py +0 -0
  203. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/utils/common.py +0 -0
  204. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/utils/cpu_info.py +0 -0
  205. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/utils/device.py +0 -0
  206. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/utils/handle_user_input.py +0 -0
  207. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/utils/line_operation.py +0 -0
  208. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/utils/numa_launcher.py +0 -0
  209. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/utils/pdf_report.py +0 -0
  210. {neural_compressor-2.4 → neural_compressor-2.5}/neural_coder/version.py +0 -0
  211. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/__init__.py +0 -0
  212. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/__init__.py +0 -0
  213. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/adaptor.py +0 -0
  214. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/keras.yaml +0 -0
  215. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/keras_utils/__init__.py +0 -0
  216. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/keras_utils/conv2d.py +0 -0
  217. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/keras_utils/dense.py +0 -0
  218. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/keras_utils/depthwise_conv2d.py +0 -0
  219. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/keras_utils/pool2d.py +0 -0
  220. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/keras_utils/quantizer.py +0 -0
  221. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/keras_utils/separable_conv2d.py +0 -0
  222. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/mxnet.py +0 -0
  223. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/mxnet.yaml +0 -0
  224. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/onnxrt.yaml +0 -0
  225. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/onnxrt_dml.yaml +0 -0
  226. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/onnxrt_dnnl.yaml +0 -0
  227. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/onnxrt_trt.yaml +0 -0
  228. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/__init__.py +0 -0
  229. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/calibrator.py +0 -0
  230. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/__init__.py +0 -0
  231. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/activation.py +0 -0
  232. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/argmax.py +0 -0
  233. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/attention.py +0 -0
  234. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/binary_op.py +0 -0
  235. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/concat.py +0 -0
  236. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/conv.py +0 -0
  237. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/direct_q8.py +0 -0
  238. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/embed_layernorm.py +0 -0
  239. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/gather.py +0 -0
  240. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/gavgpool.py +0 -0
  241. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/gemm.py +0 -0
  242. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/lstm.py +0 -0
  243. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/matmul.py +0 -0
  244. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/maxpool.py +0 -0
  245. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/norm.py +0 -0
  246. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/ops.py +0 -0
  247. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/pad.py +0 -0
  248. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/pooling.py +0 -0
  249. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/reduce.py +0 -0
  250. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/resize.py +0 -0
  251. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/ox_utils/operators/unary_op.py +0 -0
  252. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/pytorch_gpu.yaml +0 -0
  253. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/pytorch_ipex.yaml +0 -0
  254. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/query.py +0 -0
  255. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tensorflow.py +0 -0
  256. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tensorflow.yaml +0 -0
  257. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tensorflow_itex.yaml +0 -0
  258. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/__init__.py +0 -0
  259. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_converter_without_calib.py +0 -0
  260. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/__init__.py +0 -0
  261. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/__init__.py +0 -0
  262. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/bf16_convert.py +0 -0
  263. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/dequantize_cast_optimizer.py +0 -0
  264. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/__init__.py +0 -0
  265. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_add_to_biasadd.py +0 -0
  266. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_layout.py +0 -0
  267. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_leakyrelu.py +0 -0
  268. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_nan_to_random.py +0 -0
  269. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/convert_placeholder_to_const.py +0 -0
  270. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dilated_contraction.py +0 -0
  271. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/expanddims_optimizer.py +0 -0
  272. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fetch_weight_from_reshape.py +0 -0
  273. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fold_batch_norm.py +0 -0
  274. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fold_constant.py +0 -0
  275. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_biasadd_add.py +0 -0
  276. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_column_wise_mul.py +0 -0
  277. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_conv_with_math.py +0 -0
  278. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_decomposed_bn.py +0 -0
  279. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_decomposed_in.py +0 -0
  280. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_gelu.py +0 -0
  281. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_layer_norm.py +0 -0
  282. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_conv.py +0 -0
  283. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_pad_with_fp32_conv.py +0 -0
  284. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_reshape_transpose.py +0 -0
  285. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/graph_cse_optimizer.py +0 -0
  286. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/grappler_pass.py +0 -0
  287. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/insert_print_node.py +0 -0
  288. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/move_squeeze_after_relu.py +0 -0
  289. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py +0 -0
  290. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/remove_training_nodes.py +0 -0
  291. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/rename_batch_norm.py +0 -0
  292. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/split_shared_input.py +0 -0
  293. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/strip_equivalent_nodes.py +0 -0
  294. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/strip_unused_nodes.py +0 -0
  295. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/switch_optimizer.py +0 -0
  296. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/graph_base.py +0 -0
  297. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/__init__.py +0 -0
  298. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_fake_quant.py +0 -0
  299. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_value.py +0 -0
  300. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_value_without_calib.py +0 -0
  301. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_conv_redundant_dequantize.py +0 -0
  302. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_redundant_dequantize.py +0 -0
  303. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/meta_op_optimizer.py +0 -0
  304. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/post_hostconst_converter.py +0 -0
  305. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/post_quantized_op_cse.py +0 -0
  306. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/rnn_convert.py +0 -0
  307. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/scale_propagation.py +0 -0
  308. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/__init__.py +0 -0
  309. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_graph.py +0 -0
  310. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_node.py +0 -0
  311. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/onnx_schema.py +0 -0
  312. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/onnx/tf2onnx_utils.py +0 -0
  313. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/__init__.py +0 -0
  314. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/insert_qdq_pattern.py +0 -0
  315. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/merge_duplicated_qdq.py +0 -0
  316. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/share_qdq_y_pattern.py +0 -0
  317. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/graph_util.py +0 -0
  318. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/__init__.py +0 -0
  319. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/__init__.py +0 -0
  320. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/fake_quantize.py +0 -0
  321. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_config.py +0 -0
  322. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_helper.py +0 -0
  323. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/__init__.py +0 -0
  324. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/optimize_layer.py +0 -0
  325. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_add.py +0 -0
  326. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_base.py +0 -0
  327. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_layers/quantize_layer_bn.py +0 -0
  328. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qat/quantize_wrapper.py +0 -0
  329. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/__init__.py +0 -0
  330. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_bn.py +0 -0
  331. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_concatv2.py +0 -0
  332. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_conv.py +0 -0
  333. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_deconv.py +0 -0
  334. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_in.py +0 -0
  335. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py +0 -0
  336. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_pooling.py +0 -0
  337. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/optimize_qdq.py +0 -0
  338. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_bn.py +0 -0
  339. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_concatv2.py +0 -0
  340. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_conv.py +0 -0
  341. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_for_intel_cpu.py +0 -0
  342. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_matmul.py +0 -0
  343. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_pooling.py +0 -0
  344. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/quantize_graph_common.py +0 -0
  345. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/smooth_quant_scaler.py +0 -0
  346. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/tf2onnx_converter.py +0 -0
  347. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/transform_graph/__init__.py +0 -0
  348. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/transform_graph/bias_correction.py +0 -0
  349. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/transform_graph/graph_transform_base.py +0 -0
  350. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/transform_graph/insert_logging.py +0 -0
  351. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/transform_graph/rerange_quantized_concat.py +0 -0
  352. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/tf_utils/util.py +0 -0
  353. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/__init__.py +0 -0
  354. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/bf16_convert.py +0 -0
  355. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/hawq_metric.py +0 -0
  356. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/layer_wise_quant/__init__.py +0 -0
  357. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/layer_wise_quant/modified_pickle.py +0 -0
  358. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/layer_wise_quant/torch_load.py +0 -0
  359. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/layer_wise_quant/utils.py +0 -0
  360. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/mixed_precision.py +0 -0
  361. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/pattern_detector.py +0 -0
  362. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/adaptor/torch_utils/symbolic_trace.py +0 -0
  363. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/algorithm/__init__.py +0 -0
  364. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/algorithm/algorithm.py +0 -0
  365. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/algorithm/fast_bias_correction.py +0 -0
  366. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/algorithm/smooth_quant.py +0 -0
  367. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/algorithm/weight_correction.py +0 -0
  368. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/benchmark.py +0 -0
  369. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/__init__.py +0 -0
  370. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/callbacks.py +0 -0
  371. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/distillation/criterions.py +0 -0
  372. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/distillation/optimizers.py +0 -0
  373. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/distillation/utility.py +0 -0
  374. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/hpo/search_algorithms.py +0 -0
  375. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/hpo/search_space.py +0 -0
  376. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/compression/pruner/pruners/sparse_gpt.py +0 -0
  377. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/conf/__init__.py +0 -0
  378. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/conf/config.py +0 -0
  379. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/conf/dotdict.py +0 -0
  380. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/conf/pythonic_config.py +0 -0
  381. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/contrib/__init__.py +0 -0
  382. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/contrib/strategy/__init__.py +0 -0
  383. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/contrib/strategy/sigopt.py +0 -0
  384. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/contrib/strategy/tpe.py +0 -0
  385. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/__init__.py +0 -0
  386. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/dataloaders/__init__.py +0 -0
  387. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/dataloaders/base_dataloader.py +0 -0
  388. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/dataloaders/dataloader.py +0 -0
  389. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/dataloaders/default_dataloader.py +0 -0
  390. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/dataloaders/fetcher.py +0 -0
  391. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/dataloaders/mxnet_dataloader.py +0 -0
  392. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/dataloaders/onnxrt_dataloader.py +0 -0
  393. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/dataloaders/pytorch_dataloader.py +0 -0
  394. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/dataloaders/sampler.py +0 -0
  395. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/dataloaders/tensorflow_dataloader.py +0 -0
  396. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/datasets/__init__.py +0 -0
  397. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/datasets/bert_dataset.py +0 -0
  398. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/datasets/coco_dataset.py +0 -0
  399. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/datasets/dummy_dataset.py +0 -0
  400. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/datasets/dummy_dataset_v2.py +0 -0
  401. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/datasets/imagenet_dataset.py +0 -0
  402. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/datasets/style_transfer_dataset.py +0 -0
  403. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/filters/__init__.py +0 -0
  404. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/filters/coco_filter.py +0 -0
  405. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/filters/filter.py +0 -0
  406. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/transforms/__init__.py +0 -0
  407. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/transforms/coco_transform.py +0 -0
  408. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/transforms/imagenet_transform.py +0 -0
  409. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/transforms/postprocess.py +0 -0
  410. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/transforms/tokenization.py +0 -0
  411. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/data/transforms/transform.py +0 -0
  412. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/__init__.py +0 -0
  413. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/benchmark.py +0 -0
  414. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/common/criterion.py +0 -0
  415. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/common/dataloader.py +0 -0
  416. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/common/metric.py +0 -0
  417. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/common/model.py +0 -0
  418. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/common/optimizer.py +0 -0
  419. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/common/postprocess.py +0 -0
  420. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/common/torch_utils.py +0 -0
  421. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/component.py +0 -0
  422. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/contrib/__init__.py +0 -0
  423. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/contrib/strategy/__init__.py +0 -0
  424. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/contrib/strategy/sigopt.py +0 -0
  425. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/__init__.py +0 -0
  426. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/dataloaders/__init__.py +0 -0
  427. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/dataloaders/base_dataloader.py +0 -0
  428. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/dataloaders/dataloader.py +0 -0
  429. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/dataloaders/default_dataloader.py +0 -0
  430. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/dataloaders/fetcher.py +0 -0
  431. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/dataloaders/mxnet_dataloader.py +0 -0
  432. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/dataloaders/onnxrt_dataloader.py +0 -0
  433. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/dataloaders/pytorch_dataloader.py +0 -0
  434. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/dataloaders/sampler.py +0 -0
  435. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/dataloaders/tensorflow_dataloader.py +0 -0
  436. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/datasets/__init__.py +0 -0
  437. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/datasets/bert_dataset.py +0 -0
  438. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/datasets/coco_dataset.py +0 -0
  439. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/datasets/dummy_dataset.py +0 -0
  440. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/datasets/dummy_dataset_v2.py +0 -0
  441. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/datasets/imagenet_dataset.py +0 -0
  442. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/datasets/style_transfer_dataset.py +0 -0
  443. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/filters/__init__.py +0 -0
  444. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/filters/coco_filter.py +0 -0
  445. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/filters/filter.py +0 -0
  446. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/transforms/__init__.py +0 -0
  447. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/transforms/imagenet_transform.py +0 -0
  448. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/transforms/tokenization.py +0 -0
  449. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/data/transforms/transform.py +0 -0
  450. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/distillation.py +0 -0
  451. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/export/__init__.py +0 -0
  452. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/export/qlinear2qdq.py +0 -0
  453. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/export/tf2onnx.py +0 -0
  454. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/export/torch2onnx.py +0 -0
  455. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/graph_optimization.py +0 -0
  456. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/metric/__init__.py +0 -0
  457. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/metric/bleu.py +0 -0
  458. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/metric/bleu_util.py +0 -0
  459. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/metric/coco_label_map.py +0 -0
  460. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/metric/coco_tools.py +0 -0
  461. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/metric/evaluate_squad.py +0 -0
  462. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/metric/f1.py +0 -0
  463. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/metric/metric.py +0 -0
  464. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/mixed_precision.py +0 -0
  465. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/model_conversion.py +0 -0
  466. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/nas/basic_nas.py +0 -0
  467. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/nas/dynas.py +0 -0
  468. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/nas/nas.py +0 -0
  469. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/nas/nas_utils.py +0 -0
  470. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/nas/search_algorithms.py +0 -0
  471. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pruner_legacy/__init__.py +0 -0
  472. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pruner_legacy/gradient_sensitivity.py +0 -0
  473. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pruner_legacy/group_lasso.py +0 -0
  474. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pruner_legacy/magnitude.py +0 -0
  475. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pruner_legacy/pattern_lock.py +0 -0
  476. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/pruner_legacy/pruner.py +0 -0
  477. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/quantization.py +0 -0
  478. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/scheduler.py +0 -0
  479. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/__init__.py +0 -0
  480. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/auto_mixed_precision.py +0 -0
  481. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/basic.py +0 -0
  482. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/bayesian.py +0 -0
  483. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/exhaustive.py +0 -0
  484. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/mse.py +0 -0
  485. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/mse_v2.py +0 -0
  486. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/random.py +0 -0
  487. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/utils/__init__.py +0 -0
  488. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/utils/constant.py +0 -0
  489. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/utils/tuning_sampler.py +0 -0
  490. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/utils/tuning_space.py +0 -0
  491. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/utils/tuning_structs.py +0 -0
  492. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/experimental/strategy/utils/utility.py +0 -0
  493. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/metric/__init__.py +0 -0
  494. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/metric/bleu.py +0 -0
  495. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/metric/bleu_util.py +0 -0
  496. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/metric/coco_label_map.py +0 -0
  497. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/metric/coco_tools.py +0 -0
  498. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/metric/evaluate_squad.py +0 -0
  499. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/metric/f1.py +0 -0
  500. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/metric/metric.py +0 -0
  501. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/model/__init__.py +0 -0
  502. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/model/base_model.py +0 -0
  503. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/model/keras_model.py +0 -0
  504. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/model/mxnet_model.py +0 -0
  505. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/model/nets_factory.py +0 -0
  506. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/__init__.py +0 -0
  507. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/parser/__init__.py +0 -0
  508. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/parser/factory.py +0 -0
  509. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/parser/onnx_parser/__init__.py +0 -0
  510. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/parser/onnx_parser/factory.py +0 -0
  511. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/parser/onnx_parser/parser.py +0 -0
  512. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/parser/parser.py +0 -0
  513. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/parser/result.py +0 -0
  514. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/parser/tensorflow_parser/__init__.py +0 -0
  515. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/parser/tensorflow_parser/factory.py +0 -0
  516. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/parser/tensorflow_parser/parser.py +0 -0
  517. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/profiler/__init__.py +0 -0
  518. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/profiler/factory.py +0 -0
  519. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/profiler/onnxrt_profiler/__init__.py +0 -0
  520. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/profiler/onnxrt_profiler/factory.py +0 -0
  521. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/profiler/onnxrt_profiler/profiler.py +0 -0
  522. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/profiler/onnxrt_profiler/utils.py +0 -0
  523. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/profiler/profiler.py +0 -0
  524. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/profiler/tensorflow_profiler/__init__.py +0 -0
  525. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/profiler/tensorflow_profiler/factory.py +0 -0
  526. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/profiler/tensorflow_profiler/profiler.py +0 -0
  527. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/profiling/profiler/tensorflow_profiler/utils.py +0 -0
  528. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/__init__.py +0 -0
  529. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/auto_mixed_precision.py +0 -0
  530. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/basic.py +0 -0
  531. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/bayesian.py +0 -0
  532. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/exhaustive.py +0 -0
  533. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/hawq_v2.py +0 -0
  534. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/mse.py +0 -0
  535. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/mse_v2.py +0 -0
  536. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/random.py +0 -0
  537. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/utils/__init__.py +0 -0
  538. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/utils/constant.py +0 -0
  539. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/utils/tuning_sampler.py +0 -0
  540. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/utils/tuning_structs.py +0 -0
  541. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/strategy/utils/utility.py +0 -0
  542. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/utils/__init__.py +0 -0
  543. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/utils/collect_layer_histogram.py +0 -0
  544. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/utils/constant.py +0 -0
  545. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/utils/create_obj_from_config.py +0 -0
  546. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/utils/kl_divergence.py +0 -0
  547. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/utils/logger.py +0 -0
  548. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/utils/neural_insights_utils.py +0 -0
  549. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/utils/options.py +0 -0
  550. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor/utils/weights_details.py +0 -0
  551. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor.egg-info/dependency_links.txt +0 -0
  552. {neural_compressor-2.4 → neural_compressor-2.5}/neural_compressor.egg-info/top_level.txt +0 -0
  553. {neural_compressor-2.4 → neural_compressor-2.5}/pyproject.toml +0 -0
  554. {neural_compressor-2.4 → neural_compressor-2.5}/setup.cfg +0 -0
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: neural_compressor
3
- Version: 2.4
3
+ Version: 2.5
4
4
  Summary: Repository of Intel® Neural Compressor
5
5
  Home-page: https://github.com/intel/neural-compressor
6
- Author: Intel AIA Team
6
+ Author: Intel AIPT Team
7
7
  Author-email: feng.tian@intel.com, haihao.shen@intel.com, suyue.chen@intel.com
8
8
  License: Apache 2.0
9
9
  Keywords: quantization,auto-tuning,post-training static quantization,post-training dynamic quantization,quantization-aware training
@@ -16,7 +16,7 @@ Description-Content-Type: text/markdown
16
16
  License-File: LICENSE
17
17
  License-File: third-party-programs.txt
18
18
  Requires-Dist: deprecated>=1.2.13
19
- Requires-Dist: numpy
19
+ Requires-Dist: numpy<2.0
20
20
  Requires-Dist: opencv-python-headless
21
21
  Requires-Dist: pandas
22
22
  Requires-Dist: Pillow
@@ -30,11 +30,11 @@ Requires-Dist: requests
30
30
  Requires-Dist: schema
31
31
  Requires-Dist: scikit-learn
32
32
  Provides-Extra: pt
33
- Requires-Dist: neural_compressor_3x_pt==2.4; extra == "pt"
33
+ Requires-Dist: neural_compressor_3x_pt==2.5; extra == "pt"
34
34
  Provides-Extra: tf
35
- Requires-Dist: neural_compressor_3x_tf==2.4; extra == "tf"
35
+ Requires-Dist: neural_compressor_3x_tf==2.5; extra == "tf"
36
36
  Provides-Extra: ort
37
- Requires-Dist: neural_compressor_3x_ort==2.4; extra == "ort"
37
+ Requires-Dist: neural_compressor_3x_ort==2.5; extra == "ort"
38
38
 
39
39
  <div align="center">
40
40
 
@@ -43,12 +43,12 @@ Intel® Neural Compressor
43
43
  <h3> An open-source Python library supporting popular model compression techniques on all mainstream deep learning frameworks (TensorFlow, PyTorch, ONNX Runtime, and MXNet)</h3>
44
44
 
45
45
  [![python](https://img.shields.io/badge/python-3.8%2B-blue)](https://github.com/intel/neural-compressor)
46
- [![version](https://img.shields.io/badge/release-2.4-green)](https://github.com/intel/neural-compressor/releases)
46
+ [![version](https://img.shields.io/badge/release-2.5-green)](https://github.com/intel/neural-compressor/releases)
47
47
  [![license](https://img.shields.io/badge/license-Apache%202-blue)](https://github.com/intel/neural-compressor/blob/master/LICENSE)
48
48
  [![coverage](https://img.shields.io/badge/coverage-85%25-green)](https://github.com/intel/neural-compressor)
49
49
  [![Downloads](https://static.pepy.tech/personalized-badge/neural-compressor?period=total&units=international_system&left_color=grey&right_color=green&left_text=downloads)](https://pepy.tech/project/neural-compressor)
50
50
 
51
- [Architecture](./docs/source/design.md#architecture)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Workflow](./docs/source/design.md#workflow)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Results](./docs/source/validated_model_list.md)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Examples](./examples/README.md)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Documentations](https://intel.github.io/neural-compressor)
51
+ [Architecture](./docs/source/design.md#architecture)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Workflow](./docs/source/design.md#workflow)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[LLMs Recipes](./docs/source/llm_recipes.md)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Results](./docs/source/validated_model_list.md)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Documentations](https://intel.github.io/neural-compressor)
52
52
 
53
53
  ---
54
54
  <div align="left">
@@ -63,6 +63,9 @@ In particular, the tool provides the key features, typical examples, and open co
63
63
 
64
64
  * Collaborate with cloud marketplaces such as [Google Cloud Platform](https://console.cloud.google.com/marketplace/product/bitnami-launchpad/inc-tensorflow-intel?project=verdant-sensor-286207), [Amazon Web Services](https://aws.amazon.com/marketplace/pp/prodview-yjyh2xmggbmga#pdp-support), and [Azure](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/bitnami.inc-tensorflow-intel), software platforms such as [Alibaba Cloud](https://www.intel.com/content/www/us/en/developer/articles/technical/quantize-ai-by-oneapi-analytics-on-alibaba-cloud.html), [Tencent TACO](https://new.qq.com/rain/a/20221202A00B9S00) and [Microsoft Olive](https://github.com/microsoft/Olive), and open AI ecosystem such as [Hugging Face](https://huggingface.co/blog/intel), [PyTorch](https://pytorch.org/tutorials/recipes/intel_neural_compressor_for_pytorch.html), [ONNX](https://github.com/onnx/models#models), [ONNX Runtime](https://github.com/microsoft/onnxruntime), and [Lightning AI](https://github.com/Lightning-AI/lightning/blob/master/docs/source-pytorch/advanced/post_training_quantization.rst)
65
65
 
66
+ ## What's New
67
+ * [2024/03] A new SOTA approach [AutoRound](https://github.com/intel/auto-round) Weight-Only Quantization on [Intel Gaudi2 AI accelerator](https://habana.ai/products/gaudi2/) is available for LLMs.
68
+
66
69
  ## Installation
67
70
 
68
71
  ### Install from pypi
@@ -73,29 +76,77 @@ pip install neural-compressor
73
76
  > More installation methods can be found at [Installation Guide](https://github.com/intel/neural-compressor/blob/master/docs/source/installation_guide.md). Please check out our [FAQ](https://github.com/intel/neural-compressor/blob/master/docs/source/faq.md) for more details.
74
77
 
75
78
  ## Getting Started
76
- ### Quantization with Python API
77
79
 
78
- ```shell
79
- # Install Intel Neural Compressor and TensorFlow
80
- pip install neural-compressor
81
- pip install tensorflow
82
- # Prepare fp32 model
83
- wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/mobilenet_v1_1.0_224_frozen.pb
80
+ Setting up the environment:
81
+ ```bash
82
+ pip install "neural-compressor>=2.3" "transformers>=4.34.0" torch torchvision
84
83
  ```
84
+ After successfully installing these packages, try your first quantization program.
85
+
86
+ ### Weight-Only Quantization (LLMs)
87
+ Following example code demonstrates Weight-Only Quantization on LLMs, it supports Intel CPU, Intel Gauid2 AI Accelerator, Nvidia GPU, best device will be selected automatically.
88
+
89
+ To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, please refer to following script for environment setup. More details can be found in [Gaudi Guide](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#launch-docker-image-that-was-built).
90
+ ```bash
91
+ docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04//habanalabs/pytorch-installer-2.1.1:latest
92
+
93
+ # Check the container ID
94
+ docker ps
95
+
96
+ # Login into container
97
+ docker exec -it <container_id> bash
98
+
99
+ # Install the optimum-habana
100
+ pip install --upgrade-strategy eager optimum[habana]
101
+
102
+ # Install INC/auto_round
103
+ pip install neural-compressor auto_round
104
+ ```
105
+ Run the example:
85
106
  ```python
86
- from neural_compressor.data import DataLoader, Datasets
107
+ from transformers import AutoModel, AutoTokenizer
108
+
87
109
  from neural_compressor.config import PostTrainingQuantConfig
110
+ from neural_compressor.quantization import fit
111
+ from neural_compressor.adaptor.torch_utils.auto_round import get_dataloader
112
+
113
+ model_name = "EleutherAI/gpt-neo-125m"
114
+ float_model = AutoModel.from_pretrained(model_name)
115
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
116
+ dataloader = get_dataloader(tokenizer, seqlen=2048)
117
+
118
+ woq_conf = PostTrainingQuantConfig(
119
+ approach="weight_only",
120
+ op_type_dict={
121
+ ".*": { # match all ops
122
+ "weight": {
123
+ "dtype": "int",
124
+ "bits": 4,
125
+ "algorithm": "AUTOROUND",
126
+ },
127
+ }
128
+ },
129
+ )
130
+ quantized_model = fit(model=float_model, conf=woq_conf, calib_dataloader=dataloader)
131
+ ```
132
+ **Note:**
133
+
134
+ To try INT4 model inference, please directly use [Intel Extension for Transformers](https://github.com/intel/intel-extension-for-transformers), which leverages Intel Neural Compressor for model quantization.
88
135
 
89
- dataset = Datasets("tensorflow")["dummy"](shape=(1, 224, 224, 3))
90
- dataloader = DataLoader(framework="tensorflow", dataset=dataset)
136
+ ### Static Quantization (Non-LLMs)
91
137
 
138
+ ```python
139
+ from torchvision import models
140
+
141
+ from neural_compressor.config import PostTrainingQuantConfig
142
+ from neural_compressor.data import DataLoader, Datasets
92
143
  from neural_compressor.quantization import fit
93
144
 
94
- q_model = fit(
95
- model="./mobilenet_v1_1.0_224_frozen.pb",
96
- conf=PostTrainingQuantConfig(),
97
- calib_dataloader=dataloader,
98
- )
145
+ float_model = models.resnet18()
146
+ dataset = Datasets("pytorch")["dummy"](shape=(1, 3, 224, 224))
147
+ calib_dataloader = DataLoader(framework="pytorch", dataset=dataset)
148
+ static_quant_conf = PostTrainingQuantConfig()
149
+ quantized_model = fit(model=float_model, conf=static_quant_conf, calib_dataloader=calib_dataloader)
99
150
  ```
100
151
 
101
152
  ## Documentation
@@ -110,8 +161,9 @@ q_model = fit(
110
161
  <tr>
111
162
  <td colspan="2" align="center"><a href="./docs/source/design.md#architecture">Architecture</a></td>
112
163
  <td colspan="2" align="center"><a href="./docs/source/design.md#workflow">Workflow</a></td>
164
+ <td colspan="1" align="center"><a href="https://intel.github.io/neural-compressor/latest/docs/source/api-doc/apis.html">APIs</a></td>
165
+ <td colspan="1" align="center"><a href="./docs/source/llm_recipes.md">LLMs Recipes</a></td>
113
166
  <td colspan="2" align="center"><a href="examples/README.md">Examples</a></td>
114
- <td colspan="2" align="center"><a href="https://intel.github.io/neural-compressor/latest/docs/source/api-doc/apis.html">APIs</a></td>
115
167
  </tr>
116
168
  </tbody>
117
169
  <thead>
@@ -5,12 +5,12 @@ Intel® Neural Compressor
5
5
  <h3> An open-source Python library supporting popular model compression techniques on all mainstream deep learning frameworks (TensorFlow, PyTorch, ONNX Runtime, and MXNet)</h3>
6
6
 
7
7
  [![python](https://img.shields.io/badge/python-3.8%2B-blue)](https://github.com/intel/neural-compressor)
8
- [![version](https://img.shields.io/badge/release-2.4-green)](https://github.com/intel/neural-compressor/releases)
8
+ [![version](https://img.shields.io/badge/release-2.5-green)](https://github.com/intel/neural-compressor/releases)
9
9
  [![license](https://img.shields.io/badge/license-Apache%202-blue)](https://github.com/intel/neural-compressor/blob/master/LICENSE)
10
10
  [![coverage](https://img.shields.io/badge/coverage-85%25-green)](https://github.com/intel/neural-compressor)
11
11
  [![Downloads](https://static.pepy.tech/personalized-badge/neural-compressor?period=total&units=international_system&left_color=grey&right_color=green&left_text=downloads)](https://pepy.tech/project/neural-compressor)
12
12
 
13
- [Architecture](./docs/source/design.md#architecture)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Workflow](./docs/source/design.md#workflow)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Results](./docs/source/validated_model_list.md)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Examples](./examples/README.md)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Documentations](https://intel.github.io/neural-compressor)
13
+ [Architecture](./docs/source/design.md#architecture)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Workflow](./docs/source/design.md#workflow)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[LLMs Recipes](./docs/source/llm_recipes.md)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Results](./docs/source/validated_model_list.md)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Documentations](https://intel.github.io/neural-compressor)
14
14
 
15
15
  ---
16
16
  <div align="left">
@@ -25,6 +25,9 @@ In particular, the tool provides the key features, typical examples, and open co
25
25
 
26
26
  * Collaborate with cloud marketplaces such as [Google Cloud Platform](https://console.cloud.google.com/marketplace/product/bitnami-launchpad/inc-tensorflow-intel?project=verdant-sensor-286207), [Amazon Web Services](https://aws.amazon.com/marketplace/pp/prodview-yjyh2xmggbmga#pdp-support), and [Azure](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/bitnami.inc-tensorflow-intel), software platforms such as [Alibaba Cloud](https://www.intel.com/content/www/us/en/developer/articles/technical/quantize-ai-by-oneapi-analytics-on-alibaba-cloud.html), [Tencent TACO](https://new.qq.com/rain/a/20221202A00B9S00) and [Microsoft Olive](https://github.com/microsoft/Olive), and open AI ecosystem such as [Hugging Face](https://huggingface.co/blog/intel), [PyTorch](https://pytorch.org/tutorials/recipes/intel_neural_compressor_for_pytorch.html), [ONNX](https://github.com/onnx/models#models), [ONNX Runtime](https://github.com/microsoft/onnxruntime), and [Lightning AI](https://github.com/Lightning-AI/lightning/blob/master/docs/source-pytorch/advanced/post_training_quantization.rst)
27
27
 
28
+ ## What's New
29
+ * [2024/03] A new SOTA approach [AutoRound](https://github.com/intel/auto-round) Weight-Only Quantization on [Intel Gaudi2 AI accelerator](https://habana.ai/products/gaudi2/) is available for LLMs.
30
+
28
31
  ## Installation
29
32
 
30
33
  ### Install from pypi
@@ -35,29 +38,77 @@ pip install neural-compressor
35
38
  > More installation methods can be found at [Installation Guide](https://github.com/intel/neural-compressor/blob/master/docs/source/installation_guide.md). Please check out our [FAQ](https://github.com/intel/neural-compressor/blob/master/docs/source/faq.md) for more details.
36
39
 
37
40
  ## Getting Started
38
- ### Quantization with Python API
39
41
 
40
- ```shell
41
- # Install Intel Neural Compressor and TensorFlow
42
- pip install neural-compressor
43
- pip install tensorflow
44
- # Prepare fp32 model
45
- wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/mobilenet_v1_1.0_224_frozen.pb
42
+ Setting up the environment:
43
+ ```bash
44
+ pip install "neural-compressor>=2.3" "transformers>=4.34.0" torch torchvision
45
+ ```
46
+ After successfully installing these packages, try your first quantization program.
47
+
48
+ ### Weight-Only Quantization (LLMs)
49
+ Following example code demonstrates Weight-Only Quantization on LLMs, it supports Intel CPU, Intel Gauid2 AI Accelerator, Nvidia GPU, best device will be selected automatically.
50
+
51
+ To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, please refer to following script for environment setup. More details can be found in [Gaudi Guide](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#launch-docker-image-that-was-built).
52
+ ```bash
53
+ docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04//habanalabs/pytorch-installer-2.1.1:latest
54
+
55
+ # Check the container ID
56
+ docker ps
57
+
58
+ # Login into container
59
+ docker exec -it <container_id> bash
60
+
61
+ # Install the optimum-habana
62
+ pip install --upgrade-strategy eager optimum[habana]
63
+
64
+ # Install INC/auto_round
65
+ pip install neural-compressor auto_round
46
66
  ```
67
+ Run the example:
47
68
  ```python
48
- from neural_compressor.data import DataLoader, Datasets
69
+ from transformers import AutoModel, AutoTokenizer
70
+
49
71
  from neural_compressor.config import PostTrainingQuantConfig
72
+ from neural_compressor.quantization import fit
73
+ from neural_compressor.adaptor.torch_utils.auto_round import get_dataloader
74
+
75
+ model_name = "EleutherAI/gpt-neo-125m"
76
+ float_model = AutoModel.from_pretrained(model_name)
77
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
78
+ dataloader = get_dataloader(tokenizer, seqlen=2048)
79
+
80
+ woq_conf = PostTrainingQuantConfig(
81
+ approach="weight_only",
82
+ op_type_dict={
83
+ ".*": { # match all ops
84
+ "weight": {
85
+ "dtype": "int",
86
+ "bits": 4,
87
+ "algorithm": "AUTOROUND",
88
+ },
89
+ }
90
+ },
91
+ )
92
+ quantized_model = fit(model=float_model, conf=woq_conf, calib_dataloader=dataloader)
93
+ ```
94
+ **Note:**
95
+
96
+ To try INT4 model inference, please directly use [Intel Extension for Transformers](https://github.com/intel/intel-extension-for-transformers), which leverages Intel Neural Compressor for model quantization.
50
97
 
51
- dataset = Datasets("tensorflow")["dummy"](shape=(1, 224, 224, 3))
52
- dataloader = DataLoader(framework="tensorflow", dataset=dataset)
98
+ ### Static Quantization (Non-LLMs)
53
99
 
100
+ ```python
101
+ from torchvision import models
102
+
103
+ from neural_compressor.config import PostTrainingQuantConfig
104
+ from neural_compressor.data import DataLoader, Datasets
54
105
  from neural_compressor.quantization import fit
55
106
 
56
- q_model = fit(
57
- model="./mobilenet_v1_1.0_224_frozen.pb",
58
- conf=PostTrainingQuantConfig(),
59
- calib_dataloader=dataloader,
60
- )
107
+ float_model = models.resnet18()
108
+ dataset = Datasets("pytorch")["dummy"](shape=(1, 3, 224, 224))
109
+ calib_dataloader = DataLoader(framework="pytorch", dataset=dataset)
110
+ static_quant_conf = PostTrainingQuantConfig()
111
+ quantized_model = fit(model=float_model, conf=static_quant_conf, calib_dataloader=calib_dataloader)
61
112
  ```
62
113
 
63
114
  ## Documentation
@@ -72,8 +123,9 @@ q_model = fit(
72
123
  <tr>
73
124
  <td colspan="2" align="center"><a href="./docs/source/design.md#architecture">Architecture</a></td>
74
125
  <td colspan="2" align="center"><a href="./docs/source/design.md#workflow">Workflow</a></td>
126
+ <td colspan="1" align="center"><a href="https://intel.github.io/neural-compressor/latest/docs/source/api-doc/apis.html">APIs</a></td>
127
+ <td colspan="1" align="center"><a href="./docs/source/llm_recipes.md">LLMs Recipes</a></td>
75
128
  <td colspan="2" align="center"><a href="examples/README.md">Examples</a></td>
76
- <td colspan="2" align="center"><a href="https://intel.github.io/neural-compressor/latest/docs/source/api-doc/apis.html">APIs</a></td>
77
129
  </tr>
78
130
  </tbody>
79
131
  <thead>
@@ -42,6 +42,7 @@ from .adaptor import Adaptor, adaptor_registry
42
42
  from .query import QueryBackendCapability
43
43
 
44
44
  tf = LazyImport("tensorflow")
45
+ keras = LazyImport("keras")
45
46
 
46
47
 
47
48
  def _add_supported_quantized_objects(custom_objects):
@@ -519,6 +520,13 @@ class KerasAdaptor(Adaptor):
519
520
  def _restore_model_from_json(self, json_model):
520
521
  from tensorflow.keras.models import model_from_json
521
522
 
523
+ from neural_compressor.utils.utility import version1_gte_version2
524
+
525
+ if version1_gte_version2(keras.__version__, "2.13.1"):
526
+ from keras.src.saving import serialization_lib
527
+
528
+ serialization_lib.enable_unsafe_deserialization()
529
+
522
530
  custom_objects = {}
523
531
  # We need to keep a dictionary of custom objects as our quantized library
524
532
  # is not recognized by keras.
@@ -1,4 +1,5 @@
1
1
  """Mxnet util init."""
2
+
2
3
  #!/usr/bin/env python
3
4
  # -*- coding: utf-8 -*-
4
5
  #
@@ -1,4 +1,5 @@
1
1
  """Mxnet util module."""
2
+
2
3
  #!/usr/bin/env python
3
4
  # -*- coding: utf-8 -*-
4
5
  #
@@ -417,15 +417,21 @@ class ONNXRUNTIMEAdaptor(Adaptor):
417
417
  self.quantizable_op_types,
418
418
  self.query_handler.get_fallback_list(),
419
419
  self.reduce_range,
420
- options.onnxrt.qdq_setting.AddQDQPairToWeight
421
- if "add_qdq_pair_to_weight" not in self.recipes
422
- else self.recipes.get("add_qdq_pair_to_weight", False),
423
- options.onnxrt.qdq_setting.OpTypesToExcludeOutputQuantizatioin
424
- if "optypes_to_exclude_output_quant" not in self.recipes
425
- else self.recipes.get("optypes_to_exclude_output_quant", []),
426
- options.onnxrt.qdq_setting.DedicatedQDQPair
427
- if "dedicated_qdq_pair" not in self.recipes
428
- else self.recipes.get("dedicated_qdq_pair", False),
420
+ (
421
+ options.onnxrt.qdq_setting.AddQDQPairToWeight
422
+ if "add_qdq_pair_to_weight" not in self.recipes
423
+ else self.recipes.get("add_qdq_pair_to_weight", False)
424
+ ),
425
+ (
426
+ options.onnxrt.qdq_setting.OpTypesToExcludeOutputQuantizatioin
427
+ if "optypes_to_exclude_output_quant" not in self.recipes
428
+ else self.recipes.get("optypes_to_exclude_output_quant", [])
429
+ ),
430
+ (
431
+ options.onnxrt.qdq_setting.DedicatedQDQPair
432
+ if "dedicated_qdq_pair" not in self.recipes
433
+ else self.recipes.get("dedicated_qdq_pair", False)
434
+ ),
429
435
  self.backend,
430
436
  )
431
437
  quantizer.quantize_model()
@@ -502,15 +508,21 @@ class ONNXRUNTIMEAdaptor(Adaptor):
502
508
  self.quantizable_op_types,
503
509
  self.query_handler.get_fallback_list(),
504
510
  self.reduce_range,
505
- options.onnxrt.qdq_setting.AddQDQPairToWeight
506
- if "add_qdq_pair_to_weight" not in self.recipes
507
- else self.recipes.get("add_qdq_pair_to_weight", False),
508
- options.onnxrt.qdq_setting.OpTypesToExcludeOutputQuantizatioin
509
- if "optypes_to_exclude_output_quant" not in self.recipes
510
- else self.recipes.get("optypes_to_exclude_output_quant", []),
511
- options.onnxrt.qdq_setting.DedicatedQDQPair
512
- if "dedicated_qdq_pair" not in self.recipes
513
- else self.recipes.get("dedicated_qdq_pair", False),
511
+ (
512
+ options.onnxrt.qdq_setting.AddQDQPairToWeight
513
+ if "add_qdq_pair_to_weight" not in self.recipes
514
+ else self.recipes.get("add_qdq_pair_to_weight", False)
515
+ ),
516
+ (
517
+ options.onnxrt.qdq_setting.OpTypesToExcludeOutputQuantizatioin
518
+ if "optypes_to_exclude_output_quant" not in self.recipes
519
+ else self.recipes.get("optypes_to_exclude_output_quant", [])
520
+ ),
521
+ (
522
+ options.onnxrt.qdq_setting.DedicatedQDQPair
523
+ if "dedicated_qdq_pair" not in self.recipes
524
+ else self.recipes.get("dedicated_qdq_pair", False)
525
+ ),
514
526
  self.backend,
515
527
  )
516
528
  quantizer.quantize_model()
@@ -657,15 +669,21 @@ class ONNXRUNTIMEAdaptor(Adaptor):
657
669
  self.quantizable_op_types,
658
670
  self.query_handler.get_fallback_list(),
659
671
  self.reduce_range,
660
- options.onnxrt.qdq_setting.AddQDQPairToWeight
661
- if not options.onnxrt.qdq_setting.AddQDQPairToWeight
662
- else self.recipes.get("add_qdq_pair_to_weight", False),
663
- options.onnxrt.qdq_setting.OpTypesToExcludeOutputQuantizatioin
664
- if options.onnxrt.qdq_setting.OpTypesToExcludeOutputQuantizatioin is not None
665
- else self.recipes.get("optypes_to_exclude_output_quant", []),
666
- options.onnxrt.qdq_setting.DedicatedQDQPair
667
- if not options.onnxrt.qdq_setting.DedicatedQDQPair
668
- else self.recipes.get("dedicated_qdq_pair", False),
672
+ (
673
+ options.onnxrt.qdq_setting.AddQDQPairToWeight
674
+ if not options.onnxrt.qdq_setting.AddQDQPairToWeight
675
+ else self.recipes.get("add_qdq_pair_to_weight", False)
676
+ ),
677
+ (
678
+ options.onnxrt.qdq_setting.OpTypesToExcludeOutputQuantizatioin
679
+ if options.onnxrt.qdq_setting.OpTypesToExcludeOutputQuantizatioin is not None
680
+ else self.recipes.get("optypes_to_exclude_output_quant", [])
681
+ ),
682
+ (
683
+ options.onnxrt.qdq_setting.DedicatedQDQPair
684
+ if not options.onnxrt.qdq_setting.DedicatedQDQPair
685
+ else self.recipes.get("dedicated_qdq_pair", False)
686
+ ),
669
687
  )
670
688
 
671
689
  quantizer.quantize_model()
@@ -765,7 +783,7 @@ class ONNXRUNTIMEAdaptor(Adaptor):
765
783
  black_nodes=black_nodes,
766
784
  white_nodes=white_nodes,
767
785
  iterations=list(range(0, iterations)),
768
- backend=self.backend if self.backend != "DmlExecutionProvider" else "CPUExecutionProvider",
786
+ backend=self.backend,
769
787
  reduce_range=self.reduce_range,
770
788
  **kwargs,
771
789
  )
@@ -979,12 +997,10 @@ class ONNXRUNTIMEAdaptor(Adaptor):
979
997
  sess_options.register_custom_ops_library(get_library_path())
980
998
 
981
999
  if not model.is_large_model:
982
- sess = ort.InferenceSession(
983
- model.model.SerializeToString(), sess_options, providers=["CPUExecutionProvider"]
984
- )
1000
+ sess = ort.InferenceSession(model.model.SerializeToString(), sess_options, providers=[self.backend])
985
1001
  elif model.model_path is not None: # pragma: no cover
986
1002
  model.model = onnx.ModelProto() # clean memory for large model
987
- sess = ort.InferenceSession(model.model_path, sess_options, providers=["CPUExecutionProvider"])
1003
+ sess = ort.InferenceSession(model.model_path, sess_options, providers=[self.backend])
988
1004
  else: # pragma: no cover
989
1005
  logger.warning("Please use model path instead of onnx model object to quantize")
990
1006
  del sess
@@ -1914,6 +1930,7 @@ class ONNXRT_WeightOnlyAdaptor(ONNXRUNTIMEAdaptor):
1914
1930
  mse=mse,
1915
1931
  perchannel=perchannel,
1916
1932
  accuracy_level=accuracy_level,
1933
+ providers=[self.backend],
1917
1934
  )
1918
1935
  if "AWQ" in algos:
1919
1936
  from neural_compressor.adaptor.ox_utils.weight_only import awq_quantize
@@ -1931,6 +1948,7 @@ class ONNXRT_WeightOnlyAdaptor(ONNXRUNTIMEAdaptor):
1931
1948
  enable_auto_scale=enable_auto_scale,
1932
1949
  enable_mse_search=enable_mse_search,
1933
1950
  accuracy_level=accuracy_level,
1951
+ providers=[self.backend],
1934
1952
  )
1935
1953
  elif "RTN" in algos:
1936
1954
  from neural_compressor.adaptor.ox_utils.weight_only import rtn_quantize
@@ -1940,6 +1958,7 @@ class ONNXRT_WeightOnlyAdaptor(ONNXRUNTIMEAdaptor):
1940
1958
  tmp_model,
1941
1959
  quant_config,
1942
1960
  accuracy_level=accuracy_level,
1961
+ providers=[self.backend],
1943
1962
  )
1944
1963
  tmp_model.q_config = copy.deepcopy(quant_config)
1945
1964
  self._dump_model_op_stats(tmp_model, tune_cfg)
@@ -17,6 +17,20 @@
17
17
  -
18
18
  version:
19
19
  name: '1.6.0'
20
+ weight_only_integer: &cap_weight_only {
21
+ 'MatMul': &cap_weight_only_matmul {
22
+ 'weight': {
23
+ 'dtype': ['int'], # no need to care uint
24
+ 'bits': [4, 3, 8], # [1-8]
25
+ 'group_size': [32, -1, 1, 16, 64, 128, 256, 512, 1024], # [1-inf]
26
+ 'scheme': ['sym', 'asym'], # sym, no ZP
27
+ 'algorithm': ['RTN', 'AWQ', 'GPTQ']
28
+ },
29
+ 'activation': {
30
+ 'dtype': ['fp32']
31
+ }
32
+ },
33
+ }
20
34
  int8: &ref_1_6 {
21
35
  'static': &ref_1_6_static {
22
36
  'Conv': {
@@ -114,6 +128,7 @@
114
128
  -
115
129
  version:
116
130
  name: '1.7.0'
131
+ weight_only_integer: *cap_weight_only
117
132
  int8: {
118
133
  'static': {
119
134
  'FusedConv': {
@@ -155,6 +170,7 @@
155
170
  -
156
171
  version:
157
172
  name: '1.8.0'
173
+ weight_only_integer: *cap_weight_only
158
174
  int8: {
159
175
  'static': {
160
176
  'FusedConv': {
@@ -224,6 +240,7 @@
224
240
  -
225
241
  version:
226
242
  name: '1.9.0'
243
+ weight_only_integer: *cap_weight_only
227
244
  int8: {
228
245
  'static': {
229
246
  'FusedConv': {
@@ -300,6 +317,7 @@
300
317
  -
301
318
  version:
302
319
  name: '1.10.0'
320
+ weight_only_integer: *cap_weight_only
303
321
  int8: {
304
322
  'static': {
305
323
  'FusedConv': {
@@ -356,6 +374,7 @@
356
374
  -
357
375
  version:
358
376
  name: '1.11.0'
377
+ weight_only_integer: *cap_weight_only
359
378
  int8: &ref_1_11 {
360
379
  'static': {
361
380
  'FusedConv': {
@@ -427,6 +446,7 @@
427
446
  -
428
447
  version:
429
448
  name: '1.12.0'
449
+ weight_only_integer: *cap_weight_only
430
450
  int8: *ref_1_11
431
451
  fp16: *common_fp16
432
452
  bf16: *common_bf16
@@ -436,6 +456,7 @@
436
456
  -
437
457
  version:
438
458
  name: 'default'
459
+ weight_only_integer: *cap_weight_only
439
460
  int8: *ref_1_6
440
461
  fp16: *common_fp16
441
462
  bf16: *common_bf16