compressed-tensors 0.10.3a20250716__tar.gz → 0.10.3a20250721__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. {compressed_tensors-0.10.3a20250716/src/compressed_tensors.egg-info → compressed_tensors-0.10.3a20250721}/PKG-INFO +1 -1
  2. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/quantization/lifecycle/forward.py +68 -5
  3. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/quantization/quant_args.py +31 -8
  4. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/quantization/quant_scheme.py +41 -0
  5. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/quantization/utils/helpers.py +11 -2
  6. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/version.py +1 -1
  7. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721/src/compressed_tensors.egg-info}/PKG-INFO +1 -1
  8. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_examples/test_bitmask_compression_ipynb.py +3 -1
  9. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_quantization/lifecycle/test_forward.py +50 -0
  10. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_quantization/test_quant_args.py +2 -1
  11. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_quantization/test_utils/test_helpers.py +28 -1
  12. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/.github/.gitkeep +0 -0
  13. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/.github/actions/test/action.yml +0 -0
  14. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/.github/scripts/step-status +0 -0
  15. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/.github/workflows/build-test.yml +0 -0
  16. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/.github/workflows/build.yml +0 -0
  17. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/.github/workflows/report.yml +0 -0
  18. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/.github/workflows/test-check.yaml +0 -0
  19. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/.github/workflows/test.yml +0 -0
  20. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/.github/workflows/trigger-all.yml +0 -0
  21. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/.github/workflows/upload.yml +0 -0
  22. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/.gitignore +0 -0
  23. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/LICENSE +0 -0
  24. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/Makefile +0 -0
  25. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/README.md +0 -0
  26. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
  27. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/examples/bit_packing/int4_config.json +0 -0
  28. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/examples/bitmask_compression.ipynb +0 -0
  29. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/examples/llama_1.1b/ex_config_quantization.py +0 -0
  30. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
  31. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/examples/llama_1.1b/example_quant_config.json +0 -0
  32. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
  33. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/examples/quantize_and_pack_int4.ipynb +0 -0
  34. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/pyproject.toml +0 -0
  35. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/setup.cfg +0 -0
  36. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/setup.py +0 -0
  37. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/__init__.py +0 -0
  38. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/README.md +0 -0
  39. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/__init__.py +0 -0
  40. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/base.py +0 -0
  41. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/compressors/__init__.py +0 -0
  42. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/compressors/base.py +0 -0
  43. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/compressors/helpers.py +0 -0
  44. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
  45. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +0 -0
  46. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
  47. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
  48. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
  49. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py +0 -0
  50. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
  51. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
  52. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
  53. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
  54. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
  55. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
  56. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
  57. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
  58. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/config/__init__.py +0 -0
  59. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/config/base.py +0 -0
  60. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/config/dense.py +0 -0
  61. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
  62. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  63. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/linear/__init__.py +0 -0
  64. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/linear/compressed_linear.py +0 -0
  65. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/quantization/__init__.py +0 -0
  66. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  67. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
  68. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
  69. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
  70. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
  71. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/quantization/quant_config.py +0 -0
  72. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
  73. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/registry/__init__.py +0 -0
  74. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/registry/registry.py +0 -0
  75. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/transform/__init__.py +0 -0
  76. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/transform/apply.py +0 -0
  77. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/transform/factory/__init__.py +0 -0
  78. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/transform/factory/base.py +0 -0
  79. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/transform/factory/hadamard.py +0 -0
  80. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/transform/factory/matrix_multiply.py +0 -0
  81. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/transform/factory/random_hadamard.py +0 -0
  82. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/transform/transform_args.py +0 -0
  83. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/transform/transform_config.py +0 -0
  84. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/transform/transform_scheme.py +0 -0
  85. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/transform/utils/__init__.py +0 -0
  86. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/transform/utils/hadamard.py +0 -0
  87. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/transform/utils/hadamards.safetensors +0 -0
  88. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/transform/utils/matrix.py +0 -0
  89. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/utils/__init__.py +0 -0
  90. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/utils/helpers.py +0 -0
  91. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/utils/internal.py +0 -0
  92. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/utils/offload.py +0 -0
  93. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/utils/permutations_24.py +0 -0
  94. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/utils/permute.py +0 -0
  95. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/utils/safetensors_load.py +0 -0
  96. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
  97. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors.egg-info/SOURCES.txt +0 -0
  98. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
  99. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors.egg-info/requires.txt +0 -0
  100. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/src/compressed_tensors.egg-info/top_level.txt +0 -0
  101. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/__init__.py +0 -0
  102. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/conftest.py +0 -0
  103. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_compressors/__init__.py +0 -0
  104. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_compressors/model_compressors/__init__.py +0 -0
  105. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
  106. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
  107. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
  108. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
  109. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_compressors/quantized_compressors/test_nvfp4_quant.py +0 -0
  110. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
  111. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
  112. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
  113. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
  114. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
  115. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
  116. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_configs/__init__.py +0 -0
  117. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_configs/test_base.py +0 -0
  118. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_linear/__init__.py +0 -0
  119. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_linear/test_compressed_linear.py +0 -0
  120. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_quantization/__init__.py +0 -0
  121. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_quantization/lifecycle/__init__.py +0 -0
  122. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_quantization/lifecycle/conftest.py +0 -0
  123. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_quantization/lifecycle/test_apply.py +0 -0
  124. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
  125. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
  126. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_quantization/lifecycle/test_helpers.py +0 -0
  127. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_quantization/lifecycle/test_initialize.py +0 -0
  128. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
  129. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_quantization/test_configs/__init__.py +0 -0
  130. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
  131. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_quantization/test_configs/test_strategies.py +0 -0
  132. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_quantization/test_quant_config.py +0 -0
  133. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_quantization/test_quant_scheme.py +0 -0
  134. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_registry.py +0 -0
  135. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_transform/conftest.py +0 -0
  136. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_transform/factory/test_correctness.py +0 -0
  137. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_transform/factory/test_memory.py +0 -0
  138. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_transform/test_transform_args.py +0 -0
  139. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_transform/test_transform_config.py +0 -0
  140. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_transform/test_transform_scheme.py +0 -0
  141. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_transform/utils/test_hadamard.py +0 -0
  142. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_utils/__init__.py +0 -0
  143. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_utils/test_helpers.py +0 -0
  144. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_utils/test_offload.py +0 -0
  145. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/test_utils/test_safetensors_load.py +0 -0
  146. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/tests/testing_utils.py +0 -0
  147. {compressed_tensors-0.10.3a20250716 → compressed_tensors-0.10.3a20250721}/utils/copyright.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.10.3a20250716
3
+ Version: 0.10.3a20250721
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -111,11 +111,18 @@ def dequantize(
111
111
  elif scale.ndim == 2:
112
112
  if scale.shape[1] == 1:
113
113
  args = QuantizationArgs(strategy=QuantizationStrategy.CHANNEL)
114
- else:
114
+ # Scale height matches input or is 1 -> group quantization across columns
115
+ #
116
+ # Example 1: scale.shape[0] == 1
117
+ # x_q: (4, 8), scale: (1, 4) -> 2 columns per group
118
+ #
119
+ # Example 2: scale.shape[0] == x_q.shape[0]
120
+ # x_q: (4, 8), scale: (4, 4) -> 2 elements per group (per row)
121
+ elif (scale.shape[0] == 1) or (scale.shape[0] == x_q.shape[0]):
115
122
  group_size = int(x_q.shape[1] / scale.shape[1])
116
- args = QuantizationArgs(
117
- strategy=QuantizationStrategy.GROUP, group_size=group_size
118
- )
123
+ args = QuantizationArgs(strategy=QuantizationStrategy.GROUP, group_size=group_size)
124
+ else:
125
+ args = QuantizationArgs(strategy=QuantizationStrategy.BLOCK, block_structure=scale.shape)
119
126
  else:
120
127
  raise ValueError(
121
128
  f"Could not infer a quantization strategy from scale with {scale.ndim} "
@@ -189,7 +196,63 @@ def _process_quantization(
189
196
  q_min, q_max = calculate_range(args, x.device)
190
197
  group_size = args.group_size
191
198
 
192
- if args.strategy in (QuantizationStrategy.GROUP, QuantizationStrategy.TENSOR_GROUP):
199
+ # blockwise FP8: quantize per 2D block, supports block_structure for static block quant
200
+ if args.strategy == QuantizationStrategy.BLOCK:
201
+ original_shape = x.shape
202
+ rows, cols = x.shape[-2], x.shape[-1]
203
+ block_height, block_width = args.block_structure
204
+
205
+ # Ensure exact division (tensor dimensions must be divisible by block size)
206
+ if rows % block_height != 0:
207
+ raise ValueError(
208
+ f"Tensor height {rows} is not divisible by block_height {block_height}. "
209
+ f"Block quantization requires exact division."
210
+ )
211
+ if cols % block_width != 0:
212
+ raise ValueError(
213
+ f"Tensor width {cols} is not divisible by block_width {block_width}. "
214
+ f"Block quantization requires exact division."
215
+ )
216
+
217
+ # reshape into blocks and transpose to make each block contiguous
218
+ num_rows_blocks = rows // block_height
219
+ num_cols_blocks = cols // block_width
220
+ x_blocks = x.reshape(
221
+ num_rows_blocks,
222
+ block_height,
223
+ num_cols_blocks,
224
+ block_width,
225
+ ).transpose(1, 2)
226
+
227
+ # expand scale/zero_point for blocks
228
+ sb = scale.unsqueeze(-1).unsqueeze(-1)
229
+ zb = zero_point.unsqueeze(-1).unsqueeze(-1) if zero_point is not None else None
230
+ if do_quantize:
231
+ # quantize blocks
232
+ x_blocks = _quantize(
233
+ x=x_blocks,
234
+ scale=sb,
235
+ zero_point=zb,
236
+ q_min=q_min,
237
+ q_max=q_max,
238
+ args=args,
239
+ dtype=dtype,
240
+ global_scale=global_scale,
241
+ )
242
+ if do_dequantize:
243
+ # dequantize blocks
244
+ x_blocks = _dequantize(
245
+ x_q=x_blocks,
246
+ scale=sb,
247
+ zero_point=zb,
248
+ global_scale=global_scale,
249
+ )
250
+ # restore original shape
251
+ output = x_blocks.transpose(1, 2).reshape(original_shape)
252
+ elif args.strategy in (
253
+ QuantizationStrategy.GROUP,
254
+ QuantizationStrategy.TENSOR_GROUP,
255
+ ):
193
256
  n_dims = x.shape
194
257
  if len(n_dims) > 2:
195
258
  x = x.squeeze(0)
@@ -14,7 +14,7 @@
14
14
 
15
15
  import warnings
16
16
  from enum import Enum
17
- from typing import Any, Dict, Optional, Union
17
+ from typing import Any, Dict, List, Optional, Union
18
18
 
19
19
  import torch
20
20
  from compressed_tensors.utils import Aliasable
@@ -153,8 +153,8 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
153
153
  :param symmetric: whether or not quantization scale is symmetric about zero-point
154
154
  :param strategy: string id determining the scope of scale/zero-point to apply
155
155
  :param group_size: group length to use for the group strategy
156
- :param block_structure: 2d block structure to use for the block strategy, must be
157
- of the format "2x4", "8x16", etc.
156
+ :param block_structure: 2d block structure to use for the block strategy; must be
157
+ a list of two ints [rows, cols] like [128, 128].
158
158
  :param dynamic: set True to perform dynamic quantization - values will not be
159
159
  calibrated during calibration phase, instead during inference new quantization
160
160
  ranges will be observed with every sample. Defaults to False for static
@@ -169,7 +169,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
169
169
  symmetric: bool = True
170
170
  group_size: Optional[int] = None
171
171
  strategy: Optional[QuantizationStrategy] = None
172
- block_structure: Optional[str] = None
172
+ block_structure: Optional[List[int]] = None
173
173
  dynamic: Union[DynamicType, bool] = False
174
174
  actorder: Union[ActivationOrdering, bool, None] = None
175
175
  observer: Optional[str] = Field(
@@ -207,6 +207,28 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
207
207
 
208
208
  return value
209
209
 
210
+ @field_validator("block_structure", mode="before")
211
+ def validate_block_structure(cls, value) -> Optional[List[int]]:
212
+ if value is None:
213
+ return value
214
+ # For backward compatibility, allow string format "2x4", "8x16", etc.
215
+ if isinstance(value, str):
216
+ try:
217
+ return [int(x) for x in value.split("x")]
218
+ except Exception:
219
+ raise ValueError(
220
+ f"Invalid block_structure '{value}'. Must be a list of two ints [rows, cols]."
221
+ )
222
+ if isinstance(value, (list, tuple)):
223
+ if len(value) != 2 or not all(isinstance(v, int) for v in value):
224
+ raise ValueError(
225
+ f"Invalid block_structure '{value}'. Must be a list of two ints [rows, cols]."
226
+ )
227
+ return list(value)
228
+ raise ValueError(
229
+ f"Invalid block_structure '{value}'. Must be a list of two ints [rows, cols]."
230
+ )
231
+
210
232
  @field_validator("strategy", mode="before")
211
233
  def validate_strategy(cls, value) -> Union[QuantizationStrategy, None]:
212
234
  if isinstance(value, str):
@@ -277,14 +299,15 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
277
299
 
278
300
  # infer observer w.r.t. dynamic
279
301
  if dynamic:
280
- if strategy not in (
302
+ supported_strategies = (
281
303
  QuantizationStrategy.TOKEN,
282
304
  QuantizationStrategy.TENSOR,
283
305
  QuantizationStrategy.TENSOR_GROUP,
284
- ):
306
+ QuantizationStrategy.GROUP,
307
+ )
308
+ if strategy not in supported_strategies:
285
309
  raise ValueError(
286
- f"One of {(QuantizationStrategy.TOKEN, QuantizationStrategy.TENSOR, QuantizationStrategy.TENSOR_GROUP)} "
287
- "must be used for dynamic quantization",
310
+ f"One of {supported_strategies} must be used for dynamic quantization"
288
311
  )
289
312
 
290
313
  if (
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import warnings
15
16
  from copy import deepcopy
16
17
  from typing import Any, Dict, List, Optional
17
18
 
@@ -52,6 +53,7 @@ class QuantizationScheme(BaseModel):
52
53
  def validate_model_after(model: "QuantizationScheme") -> "QuantizationScheme":
53
54
  inputs = model.input_activations
54
55
  outputs = model.output_activations
56
+ weights = model.weights
55
57
 
56
58
  if inputs is not None:
57
59
  if inputs.actorder is not None:
@@ -61,6 +63,21 @@ class QuantizationScheme(BaseModel):
61
63
  if outputs.actorder is not None:
62
64
  raise ValueError("Cannot apply actorder to output activations")
63
65
 
66
+ if (
67
+ inputs and weights
68
+ and weights.strategy == QuantizationStrategy.GROUP
69
+ and inputs.strategy == QuantizationStrategy.GROUP
70
+ and weights.group_size != inputs.group_size
71
+ ):
72
+ warnings.warn(
73
+ "Using GROUP strategy for both weights and input_activations "
74
+ f"with different group sizes ({weights.group_size} vs {inputs.group_size}) "
75
+ "may complicate fused kernel implementations. Consider using "
76
+ "TENSOR_GROUP strategy for both or matching group sizes.",
77
+ UserWarning,
78
+ stacklevel=2
79
+ )
80
+
64
81
  return model
65
82
 
66
83
 
@@ -243,6 +260,29 @@ FP8_DYNAMIC = dict(
243
260
  ),
244
261
  )
245
262
 
263
+ # Block‐wise FP8 (deepseekv3-style quantization):
264
+ # static 128x128 per‐block weights and
265
+ # dynamic per‐token‐group activations
266
+ FP8_BLOCK = dict(
267
+ weights=QuantizationArgs(
268
+ num_bits=8,
269
+ type=QuantizationType.FLOAT,
270
+ strategy=QuantizationStrategy.BLOCK,
271
+ symmetric=True,
272
+ dynamic=False,
273
+ block_structure=[128, 128],
274
+ ),
275
+ input_activations=QuantizationArgs(
276
+ num_bits=8,
277
+ type=QuantizationType.FLOAT,
278
+ strategy=QuantizationStrategy.GROUP,
279
+ symmetric=True,
280
+ dynamic=True,
281
+ observer=None,
282
+ group_size=128,
283
+ ),
284
+ )
285
+
246
286
  PRESET_SCHEMES = {
247
287
  # Unquantized (no-op)
248
288
  "UNQUANTIZED": UNQUANTIZED,
@@ -257,6 +297,7 @@ PRESET_SCHEMES = {
257
297
  # Float weight and activation schemes
258
298
  "FP8": FP8,
259
299
  "FP8_DYNAMIC": FP8_DYNAMIC,
300
+ "FP8_BLOCK": FP8_BLOCK,
260
301
  "NVFP4A16": NVFP4A16,
261
302
  "NVFP4": NVFP4,
262
303
  }
@@ -171,7 +171,10 @@ def compute_dynamic_scales_and_zp(
171
171
  reduce_dims = tuple(idx for idx in range(value.ndim) if idx not in dim)
172
172
  elif args.strategy == QuantizationStrategy.TENSOR:
173
173
  reduce_dims = None
174
- elif args.strategy == QuantizationStrategy.TENSOR_GROUP:
174
+ elif args.strategy in (
175
+ QuantizationStrategy.TENSOR_GROUP,
176
+ QuantizationStrategy.GROUP,
177
+ ):
175
178
  if len(value.shape) > 2:
176
179
  value = value.squeeze(0)
177
180
 
@@ -187,9 +190,15 @@ def compute_dynamic_scales_and_zp(
187
190
  ),
188
191
  )
189
192
  else:
193
+ supported_strategies = (
194
+ QuantizationStrategy.TOKEN,
195
+ QuantizationStrategy.TENSOR,
196
+ QuantizationStrategy.TENSOR_GROUP,
197
+ QuantizationStrategy.GROUP,
198
+ )
190
199
  raise ValueError(
191
200
  "Dynamic quantization is only supported for ",
192
- f"{QuantizationStrategy.TOKEN, QuantizationStrategy.TENSOR, QuantizationStrategy.TENSOR_GROUP}",
201
+ f"{supported_strategies}",
193
202
  )
194
203
 
195
204
  if not reduce_dims:
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.10.3.a20250716'
20
+ __version__ = version = '0.10.3.a20250721'
21
21
  __version_tuple__ = version_tuple = (0, 10, 3)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.10.3a20250716
3
+ Version: 0.10.3a20250721
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -12,8 +12,10 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import nbformat
16
15
  import pytest
16
+
17
+
18
+ nbformat = pytest.importorskip("nbformat")
17
19
  from nbconvert.preprocessors import ExecutePreprocessor
18
20
 
19
21
 
@@ -13,9 +13,12 @@
13
13
  # limitations under the License.
14
14
 
15
15
 
16
+ import math
17
+
16
18
  import pytest
17
19
  import torch
18
20
  from compressed_tensors.quantization.lifecycle.forward import (
21
+ _process_quantization,
19
22
  dequantize,
20
23
  forward_quantize,
21
24
  quantize,
@@ -29,6 +32,7 @@ from compressed_tensors.quantization.quant_args import (
29
32
  QuantizationStrategy,
30
33
  )
31
34
  from compressed_tensors.quantization.quant_config import QuantizationStatus
35
+ from compressed_tensors.quantization.utils.helpers import calculate_range
32
36
  from torch.nn import Linear
33
37
 
34
38
 
@@ -203,3 +207,49 @@ def test_dequantize(num_bits, type, strategy, group_size, scale, zero_point, g_i
203
207
  dtype=None,
204
208
  g_idx=g_idx,
205
209
  )
210
+
211
+
212
+ def test_process_quantization_block_static():
213
+ """
214
+ Static block quantization (QuantizationStrategy.BLOCK) should split a 2D tensor
215
+ into blocks, quantize each block, and reassemble without changing shape.
216
+ """
217
+ rows, cols = 8, 8
218
+ bh, bw = 2, 4
219
+ x = torch.randn(rows, cols)
220
+ args = QuantizationArgs(
221
+ num_bits=8,
222
+ type="float",
223
+ strategy=QuantizationStrategy.BLOCK,
224
+ symmetric=True,
225
+ dynamic=False,
226
+ block_structure=[bh, bw],
227
+ )
228
+ num_rb = math.ceil(rows / bh)
229
+ num_cb = math.ceil(cols / bw)
230
+ scale = torch.rand(num_rb, num_cb) + 0.1
231
+ zp = torch.zeros_like(scale)
232
+ q_min, q_max = calculate_range(args, x.device)
233
+ out = _process_quantization(
234
+ x=x,
235
+ scale=scale,
236
+ zero_point=zp,
237
+ args=args,
238
+ do_quantize=True,
239
+ do_dequantize=False,
240
+ dtype=None,
241
+ global_scale=None,
242
+ )
243
+ assert out.shape == x.shape
244
+ # full fake-quantize roundtrip
245
+ out2 = _process_quantization(
246
+ x=x,
247
+ scale=scale,
248
+ zero_point=zp,
249
+ args=args,
250
+ do_quantize=True,
251
+ do_dequantize=True,
252
+ dtype=None,
253
+ global_scale=None,
254
+ )
255
+ assert out2.shape == x.shape
@@ -59,7 +59,8 @@ def test_block():
59
59
 
60
60
  block = QuantizationArgs(**kwargs)
61
61
  assert block.strategy == QuantizationStrategy.BLOCK
62
- assert block.block_structure == kwargs["block_structure"]
62
+ assert block.block_structure == [2, 4]
63
+ assert block.block_structure != kwargs["block_structure"] # "2x4" != [2, 4]
63
64
 
64
65
 
65
66
  def test_infer_strategy():
@@ -20,7 +20,11 @@ from compressed_tensors.quantization import (
20
20
  QuantizationArgs,
21
21
  QuantizationStrategy,
22
22
  )
23
- from compressed_tensors.quantization.utils import calculate_qparams, generate_gparam
23
+ from compressed_tensors.quantization.utils import (
24
+ calculate_qparams,
25
+ compute_dynamic_scales_and_zp,
26
+ generate_gparam,
27
+ )
24
28
 
25
29
 
26
30
  @pytest.mark.parametrize(
@@ -73,3 +77,26 @@ def test_fused_global_scales():
73
77
  assert max_tensor_value.item() == pytest.approx(
74
78
  FP4_E2M1_DATA.max * FP8_E4M3_DATA.max / global_scale, abs=0.001
75
79
  )
80
+
81
+
82
+ @pytest.mark.parametrize(
83
+ "shape,group_size,exp_shape",
84
+ [
85
+ # Only batch size =1 is supported for dynamic GROUP quantization
86
+ ((1, 4, 8), 4, torch.Size([4, 2])),
87
+ ],
88
+ )
89
+ def test_compute_dynamic_scales_and_zp_group(shape, group_size, exp_shape):
90
+ """
91
+ Dynamic group quantization should reduce activations in groups, producing
92
+ scales and zero points of shape [batch, num_groups].
93
+ """
94
+ value = torch.randn(*shape)
95
+ args = QuantizationArgs(
96
+ strategy=QuantizationStrategy.GROUP,
97
+ group_size=group_size,
98
+ dynamic=True,
99
+ )
100
+ scale, zp = compute_dynamic_scales_and_zp(value, args, module=torch.nn.Module())
101
+ assert scale.shape == exp_shape
102
+ assert zp.shape == exp_shape