compressed-tensors 0.10.3a20250812__tar.gz → 0.10.3a20250815__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. {compressed_tensors-0.10.3a20250812/src/compressed_tensors.egg-info → compressed_tensors-0.10.3a20250815}/PKG-INFO +1 -1
  2. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +93 -25
  3. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/config/base.py +1 -0
  4. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/quantization/quant_config.py +6 -0
  5. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/quantization/quant_scheme.py +9 -0
  6. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/utils/match.py +16 -8
  7. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/version.py +1 -1
  8. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815/src/compressed_tensors.egg-info}/PKG-INFO +1 -1
  9. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_compressors/model_compressors/test_model_compressor.py +46 -5
  10. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_quantization/test_quant_scheme.py +5 -1
  11. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_utils/test_match.py +14 -8
  12. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/.github/.gitkeep +0 -0
  13. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/.github/actions/test/action.yml +0 -0
  14. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/.github/scripts/step-status +0 -0
  15. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/.github/workflows/build-test.yml +0 -0
  16. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/.github/workflows/build.yml +0 -0
  17. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/.github/workflows/report.yml +0 -0
  18. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/.github/workflows/test-check.yaml +0 -0
  19. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/.github/workflows/test.yml +0 -0
  20. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/.github/workflows/trigger-all.yml +0 -0
  21. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/.github/workflows/upload.yml +0 -0
  22. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/.gitignore +0 -0
  23. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/LICENSE +0 -0
  24. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/Makefile +0 -0
  25. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/README.md +0 -0
  26. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
  27. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/examples/bit_packing/int4_config.json +0 -0
  28. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/examples/bitmask_compression.ipynb +0 -0
  29. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/examples/llama_1.1b/ex_config_quantization.py +0 -0
  30. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
  31. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/examples/llama_1.1b/example_quant_config.json +0 -0
  32. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
  33. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/examples/quantize_and_pack_int4.ipynb +0 -0
  34. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/pyproject.toml +0 -0
  35. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/setup.cfg +0 -0
  36. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/setup.py +0 -0
  37. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/__init__.py +0 -0
  38. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/README.md +0 -0
  39. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/__init__.py +0 -0
  40. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/base.py +0 -0
  41. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/compressors/__init__.py +0 -0
  42. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/compressors/base.py +0 -0
  43. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/compressors/helpers.py +0 -0
  44. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
  45. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
  46. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
  47. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
  48. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py +0 -0
  49. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
  50. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
  51. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
  52. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
  53. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
  54. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
  55. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
  56. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
  57. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/config/__init__.py +0 -0
  58. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/config/dense.py +0 -0
  59. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
  60. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  61. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/linear/__init__.py +0 -0
  62. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/linear/compressed_linear.py +0 -0
  63. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/quantization/__init__.py +0 -0
  64. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  65. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
  66. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
  67. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
  68. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
  69. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
  70. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/quantization/quant_args.py +0 -0
  71. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
  72. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
  73. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/registry/__init__.py +0 -0
  74. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/registry/registry.py +0 -0
  75. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/transform/__init__.py +0 -0
  76. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/transform/apply.py +0 -0
  77. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/transform/factory/__init__.py +0 -0
  78. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/transform/factory/base.py +0 -0
  79. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/transform/factory/hadamard.py +0 -0
  80. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/transform/factory/matrix_multiply.py +0 -0
  81. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/transform/factory/random_hadamard.py +0 -0
  82. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/transform/transform_args.py +0 -0
  83. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/transform/transform_config.py +0 -0
  84. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/transform/transform_scheme.py +0 -0
  85. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/transform/utils/__init__.py +0 -0
  86. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/transform/utils/hadamard.py +0 -0
  87. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/transform/utils/hadamards.safetensors +0 -0
  88. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/transform/utils/matrix.py +0 -0
  89. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/utils/__init__.py +0 -0
  90. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/utils/helpers.py +0 -0
  91. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/utils/internal.py +0 -0
  92. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/utils/offload.py +0 -0
  93. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/utils/permutations_24.py +0 -0
  94. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/utils/permute.py +0 -0
  95. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/utils/safetensors_load.py +0 -0
  96. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
  97. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors/utils/type.py +0 -0
  98. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors.egg-info/SOURCES.txt +0 -0
  99. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
  100. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors.egg-info/requires.txt +0 -0
  101. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/src/compressed_tensors.egg-info/top_level.txt +0 -0
  102. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/__init__.py +0 -0
  103. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/conftest.py +0 -0
  104. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_compressors/__init__.py +0 -0
  105. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_compressors/model_compressors/__init__.py +0 -0
  106. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
  107. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
  108. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
  109. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_compressors/quantized_compressors/test_nvfp4_quant.py +0 -0
  110. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
  111. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
  112. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
  113. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
  114. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
  115. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
  116. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_configs/__init__.py +0 -0
  117. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_configs/test_base.py +0 -0
  118. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
  119. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_linear/__init__.py +0 -0
  120. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_linear/test_compressed_linear.py +0 -0
  121. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_quantization/__init__.py +0 -0
  122. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_quantization/lifecycle/__init__.py +0 -0
  123. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_quantization/lifecycle/conftest.py +0 -0
  124. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_quantization/lifecycle/test_apply.py +0 -0
  125. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
  126. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
  127. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_quantization/lifecycle/test_forward.py +0 -0
  128. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_quantization/lifecycle/test_helpers.py +0 -0
  129. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_quantization/lifecycle/test_initialize.py +0 -0
  130. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
  131. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_quantization/test_configs/__init__.py +0 -0
  132. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
  133. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_quantization/test_configs/test_strategies.py +0 -0
  134. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_quantization/test_quant_args.py +0 -0
  135. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_quantization/test_quant_config.py +0 -0
  136. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_quantization/test_utils/test_helpers.py +0 -0
  137. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_registry.py +0 -0
  138. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_transform/conftest.py +0 -0
  139. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_transform/factory/test_correctness.py +0 -0
  140. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_transform/factory/test_memory.py +0 -0
  141. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_transform/factory/test_serialization.py +0 -0
  142. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_transform/test_transform_args.py +0 -0
  143. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_transform/test_transform_config.py +0 -0
  144. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_transform/test_transform_scheme.py +0 -0
  145. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_transform/utils/test_hadamard.py +0 -0
  146. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_utils/__init__.py +0 -0
  147. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_utils/test_helpers.py +0 -0
  148. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_utils/test_offload.py +0 -0
  149. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_utils/test_safetensors_load.py +0 -0
  150. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/test_utils/test_type.py +0 -0
  151. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/tests/testing_utils.py +0 -0
  152. {compressed_tensors-0.10.3a20250812 → compressed_tensors-0.10.3a20250815}/utils/copyright.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.10.3a20250812
3
+ Version: 0.10.3a20250815
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -169,7 +169,7 @@ class ModelCompressor:
169
169
  cls,
170
170
  model: Module,
171
171
  sparsity_config: Union[SparsityCompressionConfig, str, None] = None,
172
- quantization_format: Optional[str] = None,
172
+ quantization_format: Optional[Union[str, List[str]]] = None,
173
173
  ) -> Optional["ModelCompressor"]:
174
174
  """
175
175
  Given a pytorch model and optional sparsity and/or quantization configs,
@@ -182,7 +182,6 @@ class ModelCompressor:
182
182
  algorithm
183
183
  :return: compressor for the configs, or None if model is not compressed
184
184
  """
185
- # reconstruct config from schemes attached to modules
186
185
  quantization_config = QuantizationConfig.from_pretrained(
187
186
  model, format=quantization_format
188
187
  )
@@ -203,6 +202,9 @@ class ModelCompressor:
203
202
  sparsity_config=sparsity_config,
204
203
  quantization_config=quantization_config,
205
204
  transform_config=transform_config,
205
+ compression_formats=[quantization_format]
206
+ if isinstance(quantization_format, str)
207
+ else quantization_format,
206
208
  )
207
209
 
208
210
  @staticmethod
@@ -263,19 +265,39 @@ class ModelCompressor:
263
265
 
264
266
  return quantization_config
265
267
 
268
+ def _fetch_unique_quantization_formats(self) -> List[str]:
269
+ """
270
+ Get all unique compression formats present in a model.
271
+ :return: list of quantization formats
272
+ """
273
+ quantization_formats = []
274
+ for _, scheme in self.quantization_config.config_groups.items():
275
+ if scheme.format is not None and scheme.format not in quantization_formats:
276
+ quantization_formats.append(scheme.format)
277
+
278
+ if (
279
+ len(quantization_formats) == 0
280
+ and self.quantization_config.format
281
+ != CompressionFormat.mixed_precision.value
282
+ ):
283
+ quantization_formats.append(self.quantization_config.format)
284
+ return quantization_formats
285
+
266
286
  def __init__(
267
287
  self,
268
288
  sparsity_config: Optional[SparsityCompressionConfig] = None,
269
289
  quantization_config: Optional[QuantizationConfig] = None,
270
290
  transform_config: Optional[TransformConfig] = None,
291
+ compression_formats: Optional[List[str]] = None,
271
292
  ):
272
293
  self.sparsity_config = sparsity_config
273
294
  self.quantization_config = quantization_config
274
295
  self.transform_config = transform_config
296
+ self.compression_formats = compression_formats
275
297
 
276
298
  self.sparsity_compressor = None
277
299
  self.quantization_compressor: Optional[
278
- Union[BaseQuantizationCompressor, DenseCompressor]
300
+ Dict[str, Union[BaseQuantizationCompressor, DenseCompressor]]
279
301
  ] = None
280
302
  # no transform compressor is required
281
303
 
@@ -283,10 +305,21 @@ class ModelCompressor:
283
305
  self.sparsity_compressor = BaseCompressor.load_from_registry(
284
306
  sparsity_config.format, config=sparsity_config
285
307
  )
308
+
286
309
  if quantization_config is not None:
287
- self.quantization_compressor = BaseCompressor.load_from_registry(
288
- quantization_config.format, config=quantization_config
289
- )
310
+ # If a list of compression_format is not provided, we resolve the
311
+ # relevant quantization formats using the config groups from the config
312
+ # and if those are not defined, we fall-back to the global quantization format
313
+ if not self.compression_formats:
314
+ self.compression_formats = self._fetch_unique_quantization_formats()
315
+
316
+ self.quantization_compressor = {}
317
+ for format in self.compression_formats:
318
+ self.quantization_compressor[
319
+ format
320
+ ] = BaseCompressor.load_from_registry(
321
+ format, config=quantization_config
322
+ )
290
323
 
291
324
  # ----- used by hf quantizer ----- #
292
325
 
@@ -381,12 +414,13 @@ class ModelCompressor:
381
414
  targets=scheme.targets,
382
415
  ignore=self.quantization_config.ignore,
383
416
  )
384
- unexpected_keys.update(
385
- merge_names(target, param)
386
- for target in quant_targets
387
- for param in self.quantization_compressor.compression_param_names
388
- if param != "weight"
389
- )
417
+ for quant_compressor in self.quantization_compressor.values():
418
+ unexpected_keys.update(
419
+ merge_names(target, param)
420
+ for target in quant_targets
421
+ for param in quant_compressor.compression_param_names
422
+ if param != "weight"
423
+ )
390
424
 
391
425
  return list(unexpected_keys)
392
426
 
@@ -424,7 +458,21 @@ class ModelCompressor:
424
458
 
425
459
  # quantization first
426
460
  if prefix in module_to_scheme:
427
- state_dict = self.quantization_compressor.compress(
461
+ if (
462
+ not hasattr(module.quantization_scheme, "format")
463
+ or module.quantization_scheme.format is None
464
+ ):
465
+ if len(self.compression_formats) > 1:
466
+ raise ValueError(
467
+ "Applying multiple compressors without defining "
468
+ "per module formats is not supported "
469
+ )
470
+ format = self.compression_formats[0]
471
+ else:
472
+ format = module.quantization_scheme.format
473
+
474
+ quant_compressor = self.quantization_compressor.get(format)
475
+ state_dict = quant_compressor.compress(
428
476
  state_dict,
429
477
  names_to_scheme=module_to_scheme,
430
478
  show_progress=False,
@@ -495,12 +543,24 @@ class ModelCompressor:
495
543
 
496
544
  # quantization second
497
545
  if prefix in module_to_scheme:
498
- state_dict = (
499
- self.quantization_compressor.decompress_module_from_state_dict(
500
- prefix,
501
- state_dict,
502
- scheme=module_to_scheme[prefix],
503
- )
546
+
547
+ if (
548
+ not hasattr(module.quantization_scheme, "format")
549
+ or module.quantization_scheme.format is None
550
+ ):
551
+ if len(self.compression_formats) > 1:
552
+ raise ValueError(
553
+ "Applying multiple compressors without defining "
554
+ "per module formats is not supported "
555
+ )
556
+ format = self.compression_formats[0]
557
+ else:
558
+ format = module.quantization_scheme.format
559
+ quant_compressor = self.quantization_compressor.get(format)
560
+ state_dict = quant_compressor.decompress_module_from_state_dict(
561
+ prefix,
562
+ state_dict,
563
+ scheme=module_to_scheme[prefix],
504
564
  )
505
565
 
506
566
  # remove any existing parameters
@@ -539,7 +599,9 @@ class ModelCompressor:
539
599
 
540
600
  if self.quantization_compressor is not None:
541
601
  module_to_scheme = map_module_to_scheme(model)
542
- state_dict = self.quantization_compressor.compress(
602
+ # Note - compress only supports one compression format atm
603
+ quant_compressor = next(iter(self.quantization_compressor.values()))
604
+ state_dict = quant_compressor.compress(
543
605
  state_dict,
544
606
  names_to_scheme=module_to_scheme,
545
607
  show_progress=show_progress,
@@ -588,14 +650,20 @@ class ModelCompressor:
588
650
  """
589
651
  model_path = get_safetensors_folder(model_path)
590
652
  sparse_decompressed = False
653
+ quant_compressor = (
654
+ next(iter(self.quantization_compressor.values()))
655
+ if self.quantization_compressor is not None
656
+ else None
657
+ )
591
658
 
592
659
  if (
593
660
  self.sparsity_compressor is not None
594
661
  and self.sparsity_config.format != CompressionFormat.dense.value
595
662
  ):
663
+ # note - decompress only supports one compressor atm
596
664
  params_to_ignore = None
597
- if self.quantization_compressor is not None:
598
- params_to_ignore = self.quantization_compressor.compression_param_names
665
+ if quant_compressor is not None:
666
+ params_to_ignore = quant_compressor.compression_param_names
599
667
  # Sparse decompression is applied on the model_path
600
668
  # The compressor will try and load any quantization parameters as well
601
669
  # params_to_skip_load will skip over quantization params from being loaded
@@ -606,7 +674,7 @@ class ModelCompressor:
606
674
  setattr(model, SPARSITY_CONFIG_NAME, self.sparsity_compressor.config)
607
675
  sparse_decompressed = True
608
676
 
609
- if self.quantization_compressor is not None:
677
+ if quant_compressor is not None:
610
678
  # Temporarily set quantization status to FROZEN to prevent
611
679
  # quantization during apply_quantization_config. This ensures
612
680
  # that the dtypes of the weights are not unintentionally updated.
@@ -629,7 +697,7 @@ class ModelCompressor:
629
697
  # including initialization
630
698
  load_weight_quantization=(
631
699
  sparse_decompressed
632
- or isinstance(self.quantization_compressor, DenseCompressor)
700
+ or isinstance(quant_compressor, DenseCompressor)
633
701
  ),
634
702
  )
635
703
 
@@ -637,7 +705,7 @@ class ModelCompressor:
637
705
  model.state_dict() if sparse_decompressed else model_path
638
706
  )
639
707
 
640
- dense_gen = self.quantization_compressor.decompress(
708
+ dense_gen = quant_compressor.decompress(
641
709
  model_path_or_state_dict, names_to_scheme=names_to_scheme
642
710
  )
643
711
  # TODO: all weight quantization params will be moved to the compressor
@@ -32,6 +32,7 @@ class CompressionFormat(Enum):
32
32
  naive_quantized = "naive-quantized"
33
33
  pack_quantized = "pack-quantized"
34
34
  marlin_24 = "marlin-24"
35
+ mixed_precision = "mixed-precision"
35
36
  nvfp4_pack_quantized = "nvfp4-pack-quantized"
36
37
 
37
38
 
@@ -234,6 +234,12 @@ class QuantizationConfig(BaseModel):
234
234
  format = CompressionFormat.int_quantized.value
235
235
  else:
236
236
  format = CompressionFormat.dense.value
237
+ elif isinstance(format, list):
238
+ format = (
239
+ CompressionFormat.mixed_precision.value
240
+ if len(format) > 1
241
+ else format[0]
242
+ )
237
243
 
238
244
  return QuantizationConfig(
239
245
  config_groups=config_groups,
@@ -16,6 +16,7 @@ import warnings
16
16
  from copy import deepcopy
17
17
  from typing import List, Optional
18
18
 
19
+ from compressed_tensors.config import CompressionFormat
19
20
  from compressed_tensors.quantization.quant_args import (
20
21
  DynamicType,
21
22
  QuantizationArgs,
@@ -42,18 +43,21 @@ class QuantizationScheme(BaseModel):
42
43
  :param weights: quantization config for layer weights
43
44
  :param input_activations: quantization config for layer inputs
44
45
  :param output_activations: quantization config for layer outputs
46
+ :param format: CompressionFormat for the layer
45
47
  """
46
48
 
47
49
  targets: List[str]
48
50
  weights: Optional[QuantizationArgs] = None
49
51
  input_activations: Optional[QuantizationArgs] = None
50
52
  output_activations: Optional[QuantizationArgs] = None
53
+ format: Optional[str] = None
51
54
 
52
55
  @model_validator(mode="after")
53
56
  def validate_model_after(model: "QuantizationScheme") -> "QuantizationScheme":
54
57
  inputs = model.input_activations
55
58
  outputs = model.output_activations
56
59
  weights = model.weights
60
+ format = model.format
57
61
 
58
62
  if inputs is not None:
59
63
  if inputs.actorder is not None:
@@ -63,6 +67,11 @@ class QuantizationScheme(BaseModel):
63
67
  if outputs.actorder is not None:
64
68
  raise ValueError("Cannot apply actorder to output activations")
65
69
 
70
+ if format == CompressionFormat.mixed_precision.value:
71
+ raise ValueError(
72
+ "mixed-precision cannot be set as a format for a QuantizationScheme"
73
+ )
74
+
66
75
  if (
67
76
  inputs
68
77
  and weights
@@ -15,7 +15,7 @@
15
15
  import logging
16
16
  import re
17
17
  from collections.abc import Generator
18
- from typing import Iterable, Mapping, Optional, Tuple
18
+ from typing import Iterable, List, Mapping, Optional, Tuple, Union
19
19
 
20
20
  import torch
21
21
  from compressed_tensors.utils.internal import InternalModule
@@ -57,10 +57,10 @@ def match_named_modules(
57
57
  unmatched_targets = set(targets)
58
58
  for name, module in model.named_modules():
59
59
  for target in targets:
60
- if is_match(name, module, target, fused):
60
+ if is_match(name, module, target, fused=fused):
61
61
  unmatched_targets -= {target}
62
62
 
63
- if not any(is_match(name, module, ign, fused) for ign in ignore):
63
+ if not is_match(name, module, ignore, fused=fused):
64
64
  yield name, module
65
65
 
66
66
  if warn_on_fail:
@@ -155,9 +155,7 @@ def match_modules_set(
155
155
  for name, module in model.named_modules():
156
156
  # match until we get a full set
157
157
  for target in targets:
158
- if is_match(name, module, target) and not any(
159
- is_match(name, module, ign) for ign in ignore
160
- ):
158
+ if is_match(name, module, target, ignore):
161
159
  if matches[target] is not None:
162
160
  raise ValueError(f"Matched a {target} twice before completing set")
163
161
  matches[target] = module
@@ -176,7 +174,8 @@ def match_modules_set(
176
174
  def is_match(
177
175
  name: str,
178
176
  module: torch.nn.Module,
179
- target: str,
177
+ targets: Union[str, Iterable[str]],
178
+ ignore: Union[str, Iterable[str]] = tuple(),
180
179
  fused: Optional[FusedMappping] = None,
181
180
  ) -> bool:
182
181
  """
@@ -198,8 +197,17 @@ def is_match(
198
197
  :fused: optional mapping from suffixes of fused modules to the suffixes of their
199
198
  corresponding shards
200
199
  """
200
+ targets = [targets] if isinstance(targets, str) else targets
201
+ ignore = [ignore] if isinstance(ignore, str) else ignore
202
+
201
203
  return not isinstance(module, InternalModule) and (
202
- _match_name(name, target, fused) or _match_class(module, target)
204
+ any(
205
+ _match_name(name, target, fused) or _match_class(module, target)
206
+ for target in targets
207
+ )
208
+ and not any(
209
+ _match_name(name, ign, fused) or _match_class(module, ign) for ign in ignore
210
+ )
203
211
  )
204
212
 
205
213
 
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.10.3.a20250812'
20
+ __version__ = version = '0.10.3.a20250815'
21
21
  __version_tuple__ = version_tuple = (0, 10, 3)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.10.3a20250812
3
+ Version: 0.10.3a20250815
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -20,8 +20,12 @@ import pytest
20
20
  import torch
21
21
  import torch.nn as nn
22
22
  from compressed_tensors.compressors import ModelCompressor
23
- from compressed_tensors.config import SparsityCompressionConfig
24
- from compressed_tensors.quantization import QuantizationConfig
23
+ from compressed_tensors.config import CompressionFormat, SparsityCompressionConfig
24
+ from compressed_tensors.quantization import (
25
+ QuantizationArgs,
26
+ QuantizationConfig,
27
+ QuantizationScheme,
28
+ )
25
29
  from safetensors.torch import save_file
26
30
  from tests.testing_utils import induce_sparsity, requires_hf_quantizer
27
31
  from transformers import AutoModelForCausalLM
@@ -395,7 +399,7 @@ def _get_combined_config(s_config, q_config):
395
399
  )
396
400
  def test_compress_model(model_stub, q_format, s_config, tmpdir):
397
401
  model = AutoModelForCausalLM.from_pretrained(model_stub, torch_dtype=torch.float32)
398
- compressor = ModelCompressor.from_pretrained_model(model, s_config, q_format)
402
+ compressor = ModelCompressor.from_pretrained_model(model, s_config, [q_format])
399
403
 
400
404
  # compress model by eagerly compressing state dict
401
405
  true_compressed = dict(compressor.compress(model))
@@ -443,7 +447,7 @@ def test_compress_model_meta(model_stub, q_format, s_config):
443
447
  model_stub, torch_dtype=torch.float32
444
448
  )
445
449
  reference_compressor = ModelCompressor.from_pretrained_model(
446
- cpu_model, s_config, q_format
450
+ cpu_model, s_config, [q_format]
447
451
  )
448
452
  # Only stores dtype because meta model does not store values
449
453
  expected = {k: v.dtype for k, v in reference_compressor.compress(cpu_model).items()}
@@ -459,7 +463,7 @@ def test_compress_model_meta(model_stub, q_format, s_config):
459
463
  module.to_empty(device="meta")
460
464
 
461
465
  # Compress in-place on meta model
462
- compressor = ModelCompressor.from_pretrained_model(meta_model, s_config, q_format)
466
+ compressor = ModelCompressor.from_pretrained_model(meta_model, s_config, [q_format])
463
467
  compressor.compress_model(meta_model)
464
468
 
465
469
  # Compare keys and dtypes
@@ -469,6 +473,43 @@ def test_compress_model_meta(model_stub, q_format, s_config):
469
473
  assert compressed[key].dtype == dtype, f"{key} has incorrect dtype"
470
474
 
471
475
 
476
+ def test_multiple_quant_compressors():
477
+ model = torch.nn.Sequential(torch.nn.Linear(1, 2), torch.nn.Linear(2, 3))
478
+ input_activations = QuantizationArgs(num_bits=8, type="float")
479
+ weights = QuantizationArgs(num_bits=8, type="float")
480
+
481
+ scheme_fp8 = QuantizationScheme(
482
+ targets=["Linear"],
483
+ weights=weights,
484
+ input_activations=input_activations,
485
+ format=CompressionFormat.float_quantized.value,
486
+ )
487
+
488
+ input_activations = QuantizationArgs(num_bits=4, type="float")
489
+ weights = QuantizationArgs(num_bits=4, type="float")
490
+
491
+ scheme_nvfp4 = QuantizationScheme(
492
+ targets=["Linear"],
493
+ weights=weights,
494
+ input_activations=input_activations,
495
+ format=CompressionFormat.nvfp4_pack_quantized.value,
496
+ )
497
+
498
+ model[0].quantization_scheme = scheme_fp8
499
+ model[0].quantization_status = "frozen"
500
+ model[1].quantization_scheme = scheme_nvfp4
501
+ model[1].quantization_status = "frozen"
502
+
503
+ formats = [scheme_fp8.format, scheme_nvfp4.format]
504
+
505
+ compressor = ModelCompressor.from_pretrained_model(model, None, formats)
506
+ assert isinstance(compressor.quantization_compressor, dict)
507
+ assert (
508
+ compressor.quantization_config.format == CompressionFormat.mixed_precision.value
509
+ )
510
+ assert all(format in compressor.quantization_compressor for format in formats)
511
+
512
+
472
513
  @pytest.mark.parametrize(
473
514
  "model_stub,comp_stub",
474
515
  [
@@ -26,12 +26,13 @@ def test_basic_scheme():
26
26
  assert scheme.weights == weights
27
27
  assert scheme.input_activations is None
28
28
  assert scheme.output_activations is None
29
+ assert scheme.format is None
29
30
 
30
31
 
31
32
  def test_full_scheme():
32
33
  targets = ["Linear"]
33
34
  weights = QuantizationArgs()
34
- input_activations = QuantizationArgs(num_bits=4)
35
+ input_activations = QuantizationArgs(num_bits=8)
35
36
  output_activations = QuantizationArgs(num_bits=8, type="float", symmetric=False)
36
37
 
37
38
  scheme = QuantizationScheme(
@@ -39,11 +40,13 @@ def test_full_scheme():
39
40
  weights=weights,
40
41
  input_activations=input_activations,
41
42
  output_activations=output_activations,
43
+ format="float-quantized",
42
44
  )
43
45
  assert scheme.targets == targets
44
46
  assert scheme.weights == weights
45
47
  assert scheme.input_activations == input_activations
46
48
  assert scheme.output_activations == output_activations
49
+ assert scheme.format is "float-quantized"
47
50
 
48
51
 
49
52
  def test_needs_targets():
@@ -57,3 +60,4 @@ def test_defaults():
57
60
  assert output.weights is None
58
61
  assert output.input_activations is None
59
62
  assert output.output_activations is None
63
+ assert output.format is None
@@ -201,14 +201,20 @@ class TestIsMatch:
201
201
  "gate_up_proj": ["gate_proj", "up_proj"],
202
202
  }
203
203
 
204
- assert is_match("dummy.qkv_proj", linear, "re:.*q_proj", mapping) == True
205
- assert is_match("dummy.qkv_proj", linear, "re:.*k_proj", mapping) == True
206
- assert is_match("dummy.qkv_proj", linear, "re:.*v_proj", mapping) == True
207
- assert is_match("dummy.qkv_proj", linear, "Linear", mapping) == True
208
-
209
- assert is_match("dummy.gate_up_proj", linear, "re:.*gate_proj", mapping) == True
210
- assert is_match("dummy.gate_up_proj", linear, "re:.*up_proj", mapping) == True
211
- assert is_match("dummy.gate_up_proj", linear, "Linear", mapping) == True
204
+ assert is_match("dummy.qkv_proj", linear, "re:.*q_proj", fused=mapping) == True
205
+ assert is_match("dummy.qkv_proj", linear, "re:.*k_proj", fused=mapping) == True
206
+ assert is_match("dummy.qkv_proj", linear, "re:.*v_proj", fused=mapping) == True
207
+ assert is_match("dummy.qkv_proj", linear, "Linear", fused=mapping) == True
208
+
209
+ assert (
210
+ is_match("dummy.gate_up_proj", linear, "re:.*gate_proj", fused=mapping)
211
+ == True
212
+ )
213
+ assert (
214
+ is_match("dummy.gate_up_proj", linear, "re:.*up_proj", fused=mapping)
215
+ == True
216
+ )
217
+ assert is_match("dummy.gate_up_proj", linear, "Linear", fused=mapping) == True
212
218
 
213
219
 
214
220
  class TestMatchNamedModules: