compressed-tensors 0.12.3a20251023__tar.gz → 0.12.3a20251028__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. {compressed_tensors-0.12.3a20251023/src/compressed_tensors.egg-info → compressed_tensors-0.12.3a20251028}/PKG-INFO +1 -1
  2. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/quant_args.py +9 -3
  3. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/utils/__init__.py +1 -0
  4. compressed_tensors-0.12.3a20251028/src/compressed_tensors/quantization/utils/mxfp4_utils.py +97 -0
  5. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/version.py +1 -1
  6. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028/src/compressed_tensors.egg-info}/PKG-INFO +1 -1
  7. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors.egg-info/SOURCES.txt +2 -0
  8. compressed_tensors-0.12.3a20251028/tests/test_quantization/test_utils/test_mxfp4_utils.py +79 -0
  9. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/.github/.gitkeep +0 -0
  10. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/.github/actions/test/action.yml +0 -0
  11. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/.github/scripts/step-status +0 -0
  12. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/.github/workflows/build-test.yml +0 -0
  13. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/.github/workflows/build.yml +0 -0
  14. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/.github/workflows/post-release-nightly-build.yml +0 -0
  15. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/.github/workflows/quality-check.yaml +0 -0
  16. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/.github/workflows/test-check.yaml +0 -0
  17. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/.github/workflows/test.yml +0 -0
  18. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/.github/workflows/trigger-all.yml +0 -0
  19. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/.gitignore +0 -0
  20. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/LICENSE +0 -0
  21. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/Makefile +0 -0
  22. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/README.md +0 -0
  23. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
  24. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/examples/bit_packing/int4_config.json +0 -0
  25. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/examples/bitmask_compression.ipynb +0 -0
  26. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/examples/llama_1.1b/ex_config_quantization.py +0 -0
  27. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
  28. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/examples/llama_1.1b/example_quant_config.json +0 -0
  29. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
  30. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/examples/quantize_and_pack_int4.ipynb +0 -0
  31. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/pyproject.toml +0 -0
  32. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/setup.cfg +0 -0
  33. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/setup.py +0 -0
  34. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/__init__.py +0 -0
  35. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/README.md +0 -0
  36. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/__init__.py +0 -0
  37. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/base.py +0 -0
  38. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/__init__.py +0 -0
  39. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/base.py +0 -0
  40. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/helpers.py +0 -0
  41. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
  42. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +0 -0
  43. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
  44. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
  45. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/quantized_compressors/fp4_quantized.py +0 -0
  46. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
  47. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
  48. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
  49. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
  50. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
  51. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
  52. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
  53. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
  54. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
  55. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/config/__init__.py +0 -0
  56. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/config/base.py +0 -0
  57. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/config/dense.py +0 -0
  58. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/config/format.py +0 -0
  59. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
  60. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  61. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/linear/__init__.py +0 -0
  62. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/linear/compressed_linear.py +0 -0
  63. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/logger.py +0 -0
  64. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/modeling/__init__.py +0 -0
  65. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/modeling/attention.py +0 -0
  66. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/modeling/kvcache.py +0 -0
  67. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/__init__.py +0 -0
  68. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  69. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
  70. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
  71. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
  72. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
  73. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
  74. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/quant_config.py +0 -0
  75. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/quant_metadata.py +0 -0
  76. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
  77. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
  78. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/registry/__init__.py +0 -0
  79. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/registry/registry.py +0 -0
  80. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/__init__.py +0 -0
  81. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/apply.py +0 -0
  82. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/factory/__init__.py +0 -0
  83. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/factory/base.py +0 -0
  84. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/factory/hadamard.py +0 -0
  85. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/factory/matrix_multiply.py +0 -0
  86. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/factory/random_hadamard.py +0 -0
  87. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/transform_args.py +0 -0
  88. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/transform_config.py +0 -0
  89. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/transform_scheme.py +0 -0
  90. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/utils/__init__.py +0 -0
  91. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/utils/hadamard.py +0 -0
  92. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/utils/hadamards.safetensors +0 -0
  93. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/utils/matrix.py +0 -0
  94. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/utils/__init__.py +0 -0
  95. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/utils/helpers.py +0 -0
  96. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/utils/internal.py +0 -0
  97. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/utils/match.py +0 -0
  98. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/utils/offload.py +0 -0
  99. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/utils/permutations_24.py +0 -0
  100. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/utils/safetensors_load.py +0 -0
  101. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
  102. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/utils/type.py +0 -0
  103. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
  104. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors.egg-info/requires.txt +0 -0
  105. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors.egg-info/top_level.txt +0 -0
  106. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/__init__.py +0 -0
  107. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/conftest.py +0 -0
  108. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/mock_observer.py +0 -0
  109. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/__init__.py +0 -0
  110. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/model_compressors/__init__.py +0 -0
  111. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
  112. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
  113. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/quantized_compressors/test_fp4_quant.py +0 -0
  114. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
  115. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
  116. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
  117. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
  118. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
  119. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
  120. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
  121. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
  122. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_configs/__init__.py +0 -0
  123. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_configs/test_base.py +0 -0
  124. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_configs/test_infer_quant.py +0 -0
  125. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
  126. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_linear/__init__.py +0 -0
  127. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_linear/test_compressed_linear.py +0 -0
  128. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_modeling/test_attention_and_cache.py +0 -0
  129. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/__init__.py +0 -0
  130. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/lifecycle/__init__.py +0 -0
  131. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/lifecycle/conftest.py +0 -0
  132. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/lifecycle/test_apply.py +0 -0
  133. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
  134. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
  135. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/lifecycle/test_forward.py +0 -0
  136. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/lifecycle/test_initialize.py +0 -0
  137. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
  138. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/lifecycle/test_static_lifecycle.py +0 -0
  139. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/test_configs/__init__.py +0 -0
  140. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
  141. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/test_configs/test_strategies.py +0 -0
  142. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/test_quant_args.py +0 -0
  143. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/test_quant_config.py +0 -0
  144. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/test_quant_scheme.py +0 -0
  145. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/test_utils/test_helpers.py +0 -0
  146. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_registry.py +0 -0
  147. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_transform/conftest.py +0 -0
  148. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_transform/factory/test_correctness.py +0 -0
  149. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_transform/factory/test_memory.py +0 -0
  150. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_transform/factory/test_serialization.py +0 -0
  151. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_transform/test_transform_args.py +0 -0
  152. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_transform/test_transform_config.py +0 -0
  153. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_transform/test_transform_scheme.py +0 -0
  154. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_transform/utils/test_hadamard.py +0 -0
  155. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_utils/__init__.py +0 -0
  156. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_utils/test_helpers.py +0 -0
  157. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_utils/test_match.py +0 -0
  158. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_utils/test_offload.py +0 -0
  159. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_utils/test_safetensors_load.py +0 -0
  160. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_utils/test_type.py +0 -0
  161. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/testing_utils.py +0 -0
  162. {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/utils/copyright.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.12.3a20251023
3
+ Version: 0.12.3a20251028
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/vllm-project/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -25,6 +25,7 @@ from pydantic import BaseModel, ConfigDict, Field, field_validator, model_valida
25
25
  __all__ = [
26
26
  "FP8_E4M3_DATA",
27
27
  "FP4_E2M1_DATA",
28
+ "BFLOAT16_DATA",
28
29
  "FloatArgs",
29
30
  "QuantizationType",
30
31
  "QuantizationStrategy",
@@ -38,9 +39,9 @@ __all__ = [
38
39
  class FloatArgs:
39
40
  exponent: int
40
41
  mantissa: int
41
- bits: int
42
- max: float
43
- min: float
42
+ bits: Optional[int] = None
43
+ max: Optional[float] = None
44
+ min: Optional[float] = None
44
45
  dtype: Optional[torch.dtype] = None
45
46
 
46
47
 
@@ -76,6 +77,11 @@ class FP8_E4M3_DATA(FloatArgs):
76
77
  dtype = torch.float8_e4m3fn
77
78
 
78
79
 
80
+ class BFLOAT16_DATA(FloatArgs):
81
+ exponent = 8
82
+ mantissa = 7
83
+
84
+
79
85
  class QuantizationType(str, Enum):
80
86
  """
81
87
  Enum storing quantization type options
@@ -14,3 +14,4 @@
14
14
 
15
15
  # flake8: noqa
16
16
  from .helpers import *
17
+ from .mxfp4_utils import *
@@ -0,0 +1,97 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import torch
16
+ from compressed_tensors.quantization.quant_args import BFLOAT16_DATA, FP4_E2M1_DATA
17
+
18
+
19
+ __all__ = ["convert_mxfp4_exp_scale", "generate_mxfp4_scales", "round_to_power_2"]
20
+
21
+ # Reference: https://github.com/vllm-project/vllm/blob/main/tests/quantization/reference_mxfp4.py # noqa: E501
22
+
23
+
24
+ def convert_mxfp4_exp_scale(
25
+ scale: torch.Tensor, dtype: torch.dtype = torch.bfloat16
26
+ ) -> torch.Tensor:
27
+ """
28
+ Converts mxfp4 scales. Scales are powers of 2, with the
29
+ exponents stored in uint8. Converts to dense dtype so that
30
+ they can be applied to the weights and activations during QDQ
31
+
32
+ :param scale: uint8 exponent scale
33
+ :param dtype: dense dtype
34
+ """
35
+ assert scale.dtype == torch.uint8
36
+ scale_exp = scale.to(torch.int32) - 127
37
+ scale = 2.00 ** (scale_exp.to(torch.float))
38
+ return scale.to(dtype)
39
+
40
+
41
+ def round_to_power_2(x: torch.Tensor) -> torch.Tensor:
42
+ """
43
+ Round values to the closest power of 2.
44
+ This is done by masking the values with BFLOAT16_SIGN_EXPONENT_MASK
45
+ which essentially removes the mantissa and keeps the exponent.
46
+ i.e the closest power of 2 for the input_value.
47
+
48
+ E.g:
49
+ 0.0825 = 1.32 (mantissa) x 2**-4 (exponent)
50
+ 0.0825 ==> -4 (exponent) + 127 = 123 = 01111011 (8 bits for bfloat16)
51
+ 0.0825 ==> 0.32 (mantissa) = 0101001 (7 bits for bfloat16)
52
+ 0.0825 == 0b01111011_0101001 (bfloat16)
53
+ 0b01111011_0101001 & 111111111_0000000 == 0b01111011_0000000
54
+ Keep the exponent + sign bit to give you the closest power of 2, 0.0625
55
+
56
+ :param x: tensor to round to closest power of 2
57
+ """
58
+ assert x.dtype == torch.bfloat16
59
+ x = x.view(torch.uint16).to(torch.int32)
60
+
61
+ # Find closest power of 2
62
+ BFLOAT16_VAL_TO_ADD = 1 << (BFLOAT16_DATA.mantissa - FP4_E2M1_DATA.mantissa - 1)
63
+ # Add value to push the value to the next exponent
64
+ BFLOAT16_SIGN_EXPONENT_MASK = (
65
+ (1 << (BFLOAT16_DATA.exponent + 1)) - 1
66
+ ) << BFLOAT16_DATA.mantissa
67
+ # mask to only keep exponent - we conservatively round down
68
+ # to better represent smaller numbers / prevent overflow
69
+ block_max_uint = torch.bitwise_and(
70
+ x + BFLOAT16_VAL_TO_ADD, BFLOAT16_SIGN_EXPONENT_MASK
71
+ )
72
+ return block_max_uint.to(torch.uint16).view(torch.bfloat16)
73
+
74
+
75
+ def generate_mxfp4_scales(x: torch.Tensor) -> torch.Tensor:
76
+ """
77
+ Generate mxfp4 scales. The scales require the following steps
78
+ 1. Round to the closest power of 2
79
+ 2. Convert to exponent
80
+ 3. Store in uint8
81
+
82
+ Called when calculating qparams using observers.
83
+
84
+ :param x: tensor to round to closest power of 2
85
+ :returns uint8 scales as exponents
86
+ """
87
+ # Round to closest power of 2
88
+ scale_power_2 = round_to_power_2(x)
89
+ # Convert to exponent
90
+ scale_exp = 127 + torch.floor(torch.log2(scale_power_2)).to(torch.int32) - 2
91
+ # Clamp and store in uint8, as expected by mxfp4
92
+ scale_exp = torch.clamp(
93
+ scale_exp,
94
+ max=torch.iinfo(torch.uint8).max,
95
+ min=torch.iinfo(torch.uint8).min,
96
+ )
97
+ return scale_exp.to(torch.uint8)
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.12.3.a20251023'
20
+ __version__ = version = '0.12.3.a20251028'
21
21
  __version_tuple__ = version_tuple = (0, 12, 3)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.12.3a20251023
3
+ Version: 0.12.3a20251028
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/vllm-project/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -75,6 +75,7 @@ src/compressed_tensors/quantization/lifecycle/helpers.py
75
75
  src/compressed_tensors/quantization/lifecycle/initialize.py
76
76
  src/compressed_tensors/quantization/utils/__init__.py
77
77
  src/compressed_tensors/quantization/utils/helpers.py
78
+ src/compressed_tensors/quantization/utils/mxfp4_utils.py
78
79
  src/compressed_tensors/registry/__init__.py
79
80
  src/compressed_tensors/registry/registry.py
80
81
  src/compressed_tensors/transform/__init__.py
@@ -142,6 +143,7 @@ tests/test_quantization/test_configs/__init__.py
142
143
  tests/test_quantization/test_configs/test_bit_depths.py
143
144
  tests/test_quantization/test_configs/test_strategies.py
144
145
  tests/test_quantization/test_utils/test_helpers.py
146
+ tests/test_quantization/test_utils/test_mxfp4_utils.py
145
147
  tests/test_transform/conftest.py
146
148
  tests/test_transform/test_transform_args.py
147
149
  tests/test_transform/test_transform_config.py
@@ -0,0 +1,79 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import torch
16
+ from compressed_tensors.quantization.utils import (
17
+ convert_mxfp4_exp_scale,
18
+ generate_mxfp4_scales,
19
+ round_to_power_2,
20
+ )
21
+
22
+
23
+ def test_round_power_2_noise():
24
+ powers = torch.Tensor(
25
+ [
26
+ [2**-10, 2**-9, 2**-8, 2**-7, 2**-6],
27
+ [2**-5, 2**-4, 2**-3, 2**-2, 2**-1],
28
+ [2**0, 2**1, 2**-10, 2**-9, 2**-8],
29
+ [2**-7, 2**-6, 2**-5, 2**-4, 2**-3],
30
+ [2**-2, 2**-1, 2**0, 2**1, 2**-10],
31
+ ]
32
+ ).to(torch.bfloat16)
33
+
34
+ noise = torch.rand_like(powers) * 0.2
35
+ powers_noisy = powers * (1 + noise)
36
+ rounded = round_to_power_2(powers_noisy)
37
+ assert torch.equal(rounded, powers)
38
+
39
+
40
+ def test_round_power_2():
41
+ x = torch.Tensor(
42
+ (
43
+ [5.687891, -8.291567, -1.540329, -0.315635, 0.965272],
44
+ [-6.944130, 0.073246, -0.451778, 8.571118, -9.856593],
45
+ [-0.040571, -0.708509, 2.485657, -4.003352, -0.995600],
46
+ [0.224199, 5.032586, -1.309816, -0.621958, 7.290238],
47
+ [-9.848001, -0.290731, 1.501562, 0.379829, -5.312081],
48
+ )
49
+ ).to(torch.bfloat16)
50
+ x_rounded = torch.Tensor(
51
+ (
52
+ [4.000000, -8.000000, -1.000000, -0.250000, 1.000000],
53
+ [-4.000000, 0.062500, -0.500000, 8.000000, -8.000000],
54
+ [-0.0312, -0.500000, 2.000000, -4.000000, -1.000000],
55
+ [0.250000, 4.000000, -1.000000, -0.500000, 8.000000],
56
+ [-8.000000, -0.250000, 1.000000, 0.250000, -4.000000],
57
+ )
58
+ ).to(torch.bfloat16)
59
+ rounded = round_to_power_2(x)
60
+ assert torch.equal(rounded, x_rounded)
61
+
62
+
63
+ def test_mxfp4_scales_e2e():
64
+ mock_weight = torch.normal(mean=0.0002, std=0.0576, size=(2880, 2880))
65
+
66
+ x = mock_weight.reshape(*mock_weight.shape[:-1], -1, 32).to(torch.bfloat16)
67
+ min_vals = torch.amin(x, dim=-1)
68
+ max_vals = torch.amax(x, dim=-1)
69
+
70
+ min_vals = torch.min(min_vals, torch.zeros_like(min_vals))
71
+ max_vals = torch.max(max_vals, torch.zeros_like(max_vals))
72
+ block_max = torch.max(torch.abs(min_vals), torch.abs(max_vals))
73
+
74
+ scales_generated = generate_mxfp4_scales(block_max)
75
+ converted_ct = convert_mxfp4_exp_scale(scales_generated)
76
+
77
+ scales_exp = torch.log2(converted_ct)
78
+ block_max_exp = torch.floor(torch.log2(round_to_power_2(block_max))) - 2
79
+ assert torch.equal(scales_exp, block_max_exp)