compressed-tensors 0.13.1a20260130__tar.gz → 0.13.1a20260203__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. {compressed_tensors-0.13.1a20260130/src/compressed_tensors.egg-info → compressed_tensors-0.13.1a20260203}/PKG-INFO +1 -1
  2. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/base.py +11 -11
  3. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/helpers.py +8 -8
  4. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +28 -28
  5. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/quantized_compressors/base.py +16 -15
  6. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/quantized_compressors/fp4_quantized.py +14 -16
  7. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +10 -12
  8. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +13 -13
  9. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/sparse_compressors/base.py +9 -9
  10. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/sparse_compressors/dense.py +9 -8
  11. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +7 -8
  12. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +4 -6
  13. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +7 -7
  14. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/offload/__init__.py +3 -3
  15. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/offload/cache/base.py +3 -3
  16. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/offload/dispatch.py +4 -4
  17. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/offload/utils.py +2 -2
  18. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/quant_args.py +6 -0
  19. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/utils/mxfp4_utils.py +19 -10
  20. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/registry/registry.py +14 -18
  21. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/factory/base.py +3 -4
  22. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/factory/hadamard.py +3 -5
  23. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/factory/matrix_multiply.py +1 -3
  24. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/transform_args.py +2 -3
  25. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/transform_config.py +1 -3
  26. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/transform_scheme.py +2 -4
  27. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/utils/hadamard.py +2 -3
  28. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/utils/matrix.py +1 -3
  29. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/version.py +1 -1
  30. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203/src/compressed_tensors.egg-info}/PKG-INFO +1 -1
  31. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_offload/cache/helpers.py +3 -3
  32. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/test_utils/test_mxfp4_utils.py +4 -2
  33. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/.github/.gitkeep +0 -0
  34. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/.github/actions/test/action.yml +0 -0
  35. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/.github/mergify.yml +0 -0
  36. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/.github/scripts/step-status +0 -0
  37. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/.github/workflows/quality-check.yaml +0 -0
  38. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/.github/workflows/stale.yml +0 -0
  39. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/.github/workflows/test-check.yaml +0 -0
  40. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/.gitignore +0 -0
  41. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/LICENSE +0 -0
  42. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/Makefile +0 -0
  43. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/README.md +0 -0
  44. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
  45. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/examples/bit_packing/int4_config.json +0 -0
  46. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/examples/bitmask_compression.ipynb +0 -0
  47. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/examples/llama_1.1b/ex_config_quantization.py +0 -0
  48. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
  49. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/examples/llama_1.1b/example_quant_config.json +0 -0
  50. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
  51. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/examples/quantize_and_pack_int4.ipynb +0 -0
  52. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/pyproject.toml +0 -0
  53. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/setup.cfg +0 -0
  54. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/setup.py +0 -0
  55. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/__init__.py +0 -0
  56. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/README.md +0 -0
  57. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/__init__.py +0 -0
  58. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/base.py +0 -0
  59. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/__init__.py +0 -0
  60. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
  61. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
  62. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
  63. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
  64. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/config/__init__.py +0 -0
  65. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/config/base.py +0 -0
  66. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/config/dense.py +0 -0
  67. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/config/format.py +0 -0
  68. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
  69. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  70. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/linear/__init__.py +0 -0
  71. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/linear/compressed_linear.py +0 -0
  72. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/logger.py +0 -0
  73. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/modeling/__init__.py +0 -0
  74. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/modeling/attention.py +0 -0
  75. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/modeling/kvcache.py +0 -0
  76. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/offload/cache/__init__.py +0 -0
  77. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/offload/cache/cpu.py +0 -0
  78. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/offload/cache/device.py +0 -0
  79. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/offload/cache/dist_cpu.py +0 -0
  80. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/offload/module.py +0 -0
  81. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/__init__.py +0 -0
  82. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  83. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
  84. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
  85. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
  86. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
  87. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
  88. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/quant_config.py +0 -0
  89. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/quant_metadata.py +0 -0
  90. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
  91. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
  92. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
  93. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/registry/__init__.py +0 -0
  94. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/__init__.py +0 -0
  95. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/apply.py +0 -0
  96. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/factory/__init__.py +0 -0
  97. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/factory/random_hadamard.py +0 -0
  98. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/utils/__init__.py +0 -0
  99. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/utils/hadamards.safetensors +0 -0
  100. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/utils/__init__.py +0 -0
  101. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/utils/binary_search.py +0 -0
  102. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/utils/helpers.py +0 -0
  103. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/utils/internal.py +0 -0
  104. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/utils/match.py +0 -0
  105. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/utils/offload.py +0 -0
  106. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/utils/permutations_24.py +0 -0
  107. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/utils/safetensors_load.py +0 -0
  108. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
  109. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/utils/type.py +0 -0
  110. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors.egg-info/SOURCES.txt +0 -0
  111. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
  112. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors.egg-info/requires.txt +0 -0
  113. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors.egg-info/top_level.txt +0 -0
  114. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/__init__.py +0 -0
  115. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/conftest.py +0 -0
  116. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/mock_observer.py +0 -0
  117. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/__init__.py +0 -0
  118. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/model_compressors/__init__.py +0 -0
  119. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
  120. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
  121. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/quantized_compressors/test_fp4_quant.py +0 -0
  122. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
  123. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
  124. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
  125. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/quantized_compressors/test_packed_asym_decompression.py +0 -0
  126. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
  127. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
  128. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
  129. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
  130. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
  131. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_configs/__init__.py +0 -0
  132. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_configs/test_base.py +0 -0
  133. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_configs/test_infer_quant.py +0 -0
  134. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
  135. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_linear/__init__.py +0 -0
  136. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_linear/test_compressed_linear.py +0 -0
  137. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_modeling/test_attention_and_cache.py +0 -0
  138. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_modeling/test_deepseekv3_kvcache_quant.py +0 -0
  139. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_offload/cache/test_cpu.py +0 -0
  140. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_offload/cache/test_dist_cpu.py +0 -0
  141. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_offload/conftest.py +0 -0
  142. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_offload/test_dispatch.py +0 -0
  143. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_offload/test_interface.py +0 -0
  144. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_offload/test_module.py +0 -0
  145. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/__init__.py +0 -0
  146. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/lifecycle/__init__.py +0 -0
  147. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/lifecycle/conftest.py +0 -0
  148. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/lifecycle/test_apply.py +0 -0
  149. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
  150. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
  151. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/lifecycle/test_forward.py +0 -0
  152. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/lifecycle/test_initialize.py +0 -0
  153. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
  154. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/lifecycle/test_static_lifecycle.py +0 -0
  155. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/test_configs/__init__.py +0 -0
  156. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
  157. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/test_configs/test_strategies.py +0 -0
  158. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/test_quant_args.py +0 -0
  159. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/test_quant_config.py +0 -0
  160. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/test_quant_scheme.py +0 -0
  161. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/test_utils/test_helpers.py +0 -0
  162. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_registry.py +0 -0
  163. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_transform/conftest.py +0 -0
  164. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_transform/factory/test_correctness.py +0 -0
  165. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_transform/factory/test_memory.py +0 -0
  166. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_transform/factory/test_serialization.py +0 -0
  167. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_transform/test_transform_args.py +0 -0
  168. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_transform/test_transform_config.py +0 -0
  169. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_transform/test_transform_scheme.py +0 -0
  170. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_transform/utils/test_hadamard.py +0 -0
  171. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_utils/__init__.py +0 -0
  172. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_utils/test_helpers.py +0 -0
  173. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_utils/test_match.py +0 -0
  174. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_utils/test_safetensors_load.py +0 -0
  175. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_utils/test_type.py +0 -0
  176. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/testing_utils.py +0 -0
  177. {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/utils/copyright.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.13.1a20260130
3
+ Version: 0.13.1a20260203
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/vllm-project/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from abc import ABC, abstractmethod
16
- from typing import Dict, Generator, Optional, Tuple, Union
16
+ from collections.abc import Generator
17
17
 
18
18
  import torch
19
19
  from compressed_tensors.config import SparsityCompressionConfig
@@ -59,15 +59,15 @@ class BaseCompressor(RegistryMixin, ABC):
59
59
  """
60
60
 
61
61
  def __init__(
62
- self, config: Union[SparsityCompressionConfig, QuantizationConfig, None] = None
62
+ self, config: SparsityCompressionConfig | QuantizationConfig | None = None
63
63
  ):
64
64
  self.config = config
65
65
 
66
66
  def compression_param_info(
67
67
  self,
68
68
  weight_shape: torch.Size,
69
- quantization_args: Optional[QuantizationArgs] = None,
70
- ) -> Dict[str, Tuple[torch.Size, torch.dtype]]:
69
+ quantization_args: QuantizationArgs | None = None,
70
+ ) -> dict[str, tuple[torch.Size, torch.dtype]]:
71
71
  """
72
72
  Creates a dictionary of expected shapes and dtypes for each compression
73
73
  parameter used by the compressor
@@ -80,7 +80,7 @@ class BaseCompressor(RegistryMixin, ABC):
80
80
 
81
81
  @property
82
82
  @abstractmethod
83
- def compression_param_names(self) -> Tuple[str]:
83
+ def compression_param_names(self) -> tuple[str, ...]:
84
84
  """
85
85
  Returns a tuple of compression parameter names introduced by
86
86
  the compressor during compression
@@ -90,9 +90,9 @@ class BaseCompressor(RegistryMixin, ABC):
90
90
  @abstractmethod
91
91
  def compress(
92
92
  self,
93
- model_state: Dict[str, Tensor],
93
+ model_state: dict[str, Tensor],
94
94
  **kwargs,
95
- ) -> Dict[str, Tensor]:
95
+ ) -> dict[str, Tensor]:
96
96
  """
97
97
  Compresses a dense state dict
98
98
 
@@ -108,7 +108,7 @@ class BaseCompressor(RegistryMixin, ABC):
108
108
  path_to_model_or_tensors: str,
109
109
  device: str = "cpu",
110
110
  **kwargs,
111
- ) -> Generator[Tuple[str, Tensor], None, None]:
111
+ ) -> Generator[tuple[str, Tensor], None, None]:
112
112
  """
113
113
  Reads a compressed state dict located at path_to_model_or_tensors
114
114
  and returns a generator for sequentially decompressing back to a
@@ -122,7 +122,7 @@ class BaseCompressor(RegistryMixin, ABC):
122
122
  """
123
123
  raise NotImplementedError()
124
124
 
125
- def compress_module(self, module: Module) -> Optional[Dict[str, torch.Tensor]]:
125
+ def compress_module(self, module: Module) -> dict[str, torch.Tensor] | None:
126
126
  """
127
127
  Compresses a single quantized leaf PyTorch module. If the module is not
128
128
  quantized, this function has no effect.
@@ -153,7 +153,7 @@ class BaseCompressor(RegistryMixin, ABC):
153
153
  self,
154
154
  weight: Tensor,
155
155
  **kwargs,
156
- ) -> Dict[str, torch.Tensor]:
156
+ ) -> dict[str, torch.Tensor]:
157
157
  """
158
158
  Compresses a single uncompressed weight
159
159
 
@@ -196,7 +196,7 @@ class BaseCompressor(RegistryMixin, ABC):
196
196
  return decompressed_weight
197
197
 
198
198
  def decompress_weight(
199
- self, compressed_data: Dict[str, Tensor], **kwargs
199
+ self, compressed_data: dict[str, Tensor], **kwargs
200
200
  ) -> torch.Tensor:
201
201
  """
202
202
  Decompresses a single compressed weight
@@ -12,8 +12,8 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from collections.abc import Generator
15
16
  from pathlib import Path
16
- from typing import Dict, Generator, Optional, Tuple, Union
17
17
 
18
18
  import torch
19
19
  from compressed_tensors.compressors import BaseCompressor
@@ -32,9 +32,9 @@ __all__ = [
32
32
 
33
33
 
34
34
  def save_compressed(
35
- tensors: Dict[str, Tensor],
36
- save_path: Union[str, Path],
37
- compression_format: Optional[CompressionFormat] = None,
35
+ tensors: dict[str, Tensor],
36
+ save_path: str | Path,
37
+ compression_format: CompressionFormat | None = None,
38
38
  ):
39
39
  """
40
40
  Save compressed tensors to disk. If tensors are not compressed,
@@ -68,10 +68,10 @@ def save_compressed(
68
68
 
69
69
 
70
70
  def load_compressed(
71
- compressed_tensors: Union[str, Path],
71
+ compressed_tensors: str | Path,
72
72
  compression_config: SparsityCompressionConfig = None,
73
- device: Optional[str] = "cpu",
74
- ) -> Generator[Tuple[str, Tensor], None, None]:
73
+ device: str | None = "cpu",
74
+ ) -> Generator[tuple[str, Tensor], None, None]:
75
75
  """
76
76
  Load compressed tensors from disk.
77
77
  If tensors are not compressed, load them as is.
@@ -111,7 +111,7 @@ def load_compressed(
111
111
  def save_compressed_model(
112
112
  model: torch.nn.Module,
113
113
  filename: str,
114
- compression_format: Optional[CompressionFormat] = None,
114
+ compression_format: CompressionFormat | None = None,
115
115
  force_contiguous: bool = True,
116
116
  ):
117
117
  """
@@ -18,7 +18,7 @@ import operator
18
18
  import os
19
19
  import re
20
20
  from copy import deepcopy
21
- from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, TypeVar, Union
21
+ from typing import TYPE_CHECKING, Any, TypeVar
22
22
 
23
23
  import compressed_tensors
24
24
  import torch
@@ -109,16 +109,16 @@ class ModelCompressor:
109
109
  :param quantization_config: config specifying quantization compression parameters
110
110
  """
111
111
 
112
- sparsity_config: Optional[SparsityCompressionConfig] = None
113
- quantization_config: Optional[QuantizationConfig] = None
114
- transform_config: Optional[TransformConfig] = None
112
+ sparsity_config: SparsityCompressionConfig | None = None
113
+ quantization_config: QuantizationConfig | None = None
114
+ transform_config: TransformConfig | None = None
115
115
 
116
116
  @classmethod
117
117
  def from_pretrained(
118
118
  cls,
119
119
  pretrained_model_name_or_path: str,
120
120
  **kwargs,
121
- ) -> Optional["ModelCompressor"]:
121
+ ) -> "ModelCompressor | None":
122
122
  """
123
123
  Given a path to a model config, extract the sparsity and/or quantization
124
124
  configs and load a ModelCompressor
@@ -133,7 +133,7 @@ class ModelCompressor:
133
133
  @classmethod
134
134
  def from_compression_config(
135
135
  cls,
136
- compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"],
136
+ compression_config: "dict[str, Any] | CompressedTensorsConfig",
137
137
  ):
138
138
  """
139
139
  :param compression_config:
@@ -172,10 +172,10 @@ class ModelCompressor:
172
172
  def from_pretrained_model(
173
173
  cls,
174
174
  model: Module,
175
- sparsity_config_or_format: Union[SparsityCompressionConfig, str, None] = None,
176
- quantization_format: Optional[str] = None,
177
- sparsity_config: Union[SparsityCompressionConfig, str, None] = None,
178
- ) -> Optional["ModelCompressor"]:
175
+ sparsity_config_or_format: SparsityCompressionConfig | str | None = None,
176
+ quantization_format: str | None = None,
177
+ sparsity_config: SparsityCompressionConfig | str | None = None,
178
+ ) -> "ModelCompressor | None":
179
179
  """
180
180
  Given a pytorch model and optional sparsity and/or quantization configs,
181
181
  load the appropriate compressors
@@ -232,8 +232,8 @@ class ModelCompressor:
232
232
 
233
233
  @staticmethod
234
234
  def parse_sparsity_config(
235
- compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"],
236
- ) -> Union[Dict[str, Any], None]:
235
+ compression_config: "dict[str, Any] | CompressedTensorsConfig",
236
+ ) -> dict[str, Any] | None:
237
237
  """
238
238
  Parse sparsity config from quantization/compression config. Sparsity
239
239
  config is nested inside q/c config
@@ -253,8 +253,8 @@ class ModelCompressor:
253
253
 
254
254
  @staticmethod
255
255
  def parse_quantization_config(
256
- compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"],
257
- ) -> Union[Dict[str, Any], None]:
256
+ compression_config: "dict[str, Any] | CompressedTensorsConfig",
257
+ ) -> dict[str, Any] | None:
258
258
  """
259
259
  Parse quantization config from quantization/compression config. The
260
260
  quantization are all the fields that are not the sparsity config or
@@ -289,7 +289,7 @@ class ModelCompressor:
289
289
 
290
290
  return quantization_config
291
291
 
292
- def _fetch_unique_quantization_formats(self) -> List[str]:
292
+ def _fetch_unique_quantization_formats(self) -> list[str]:
293
293
  """
294
294
  Get all unique compression formats present in a model.
295
295
  :return: list of quantization formats
@@ -309,10 +309,10 @@ class ModelCompressor:
309
309
 
310
310
  def __init__(
311
311
  self,
312
- sparsity_config: Optional[SparsityCompressionConfig] = None,
313
- quantization_config: Optional[QuantizationConfig] = None,
314
- transform_config: Optional[TransformConfig] = None,
315
- compression_formats: Optional[List[str]] = None,
312
+ sparsity_config: SparsityCompressionConfig | None = None,
313
+ quantization_config: QuantizationConfig | None = None,
314
+ transform_config: TransformConfig | None = None,
315
+ compression_formats: list[str] | None = None,
316
316
  ):
317
317
  self.sparsity_config = sparsity_config
318
318
  self.quantization_config = quantization_config
@@ -320,8 +320,8 @@ class ModelCompressor:
320
320
  self.compression_formats = compression_formats
321
321
 
322
322
  self.sparsity_compressor = None
323
- self.quantization_compressor: Optional[
324
- Dict[str, Union[BaseQuantizationCompressor, DenseCompressor]]
323
+ self.quantization_compressor: dict[
324
+ str, BaseQuantizationCompressor | DenseCompressor
325
325
  ] = None
326
326
  # no transform compressor is required
327
327
 
@@ -345,7 +345,7 @@ class ModelCompressor:
345
345
  format, config=quantization_config
346
346
  )
347
347
 
348
- def get_missing_module_keys(self, model: Module) -> List[str]:
348
+ def get_missing_module_keys(self, model: Module) -> list[str]:
349
349
  """
350
350
  Identifies the expected missing weight keys in the compressed state_dict.
351
351
 
@@ -394,7 +394,7 @@ class ModelCompressor:
394
394
 
395
395
  return list(missing_keys)
396
396
 
397
- def get_unexpected_file_keys(self, model: Module) -> List[str]:
397
+ def get_unexpected_file_keys(self, model: Module) -> list[str]:
398
398
  """
399
399
  Identifies extra keys introduced by the compression process in the
400
400
  compressed state_dict that are not expected by the model graph.
@@ -625,9 +625,9 @@ class ModelCompressor:
625
625
  def compress(
626
626
  self,
627
627
  model: Module,
628
- state_dict: Optional[Dict[str, Tensor]] = None,
628
+ state_dict: dict[str, Tensor] | None = None,
629
629
  show_progress: bool = False,
630
- ) -> Dict[str, Tensor]:
630
+ ) -> dict[str, Tensor]:
631
631
  """
632
632
  Compresses a dense state dict or model with sparsity and/or quantization
633
633
 
@@ -656,7 +656,7 @@ class ModelCompressor:
656
656
  )
657
657
 
658
658
  if self.sparsity_compressor is not None:
659
- sparse_compression_targets: Set[str] = {
659
+ sparse_compression_targets: set[str] = {
660
660
  module_name
661
661
  for module_name, _module in match_named_modules(
662
662
  model=model,
@@ -732,7 +732,7 @@ class ModelCompressor:
732
732
  QuantizationStatus.FROZEN,
733
733
  ):
734
734
  apply_quantization_config(model, self.quantization_config)
735
- names_to_scheme: Set[QuantizationScheme] = {
735
+ names_to_scheme: dict[str, QuantizationScheme] = {
736
736
  name: getattr(module, "quantization_scheme")
737
737
  for name, module in model.named_modules()
738
738
  if getattr(module, "quantization_scheme", None) is not None
@@ -897,7 +897,7 @@ class ModelCompressor:
897
897
  update_parameter_data(module, param_data, param_name)
898
898
 
899
899
 
900
- def map_module_to_scheme(model: Module) -> Dict[str, QuantizationScheme]:
900
+ def map_module_to_scheme(model: Module) -> dict[str, QuantizationScheme]:
901
901
  """
902
902
  Returns a dictionary which maps quantized module names to their quantization
903
903
  schemes. Only includes modules with weight quantization
@@ -13,8 +13,9 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import logging
16
+ from collections.abc import Generator
16
17
  from pathlib import Path
17
- from typing import Any, Dict, Generator, Tuple, Union
18
+ from typing import Any
18
19
 
19
20
  import torch
20
21
  from compressed_tensors.compressors.base import BaseCompressor
@@ -68,12 +69,12 @@ class BaseQuantizationCompressor(BaseCompressor):
68
69
 
69
70
  def compress(
70
71
  self,
71
- model_state: Dict[str, Tensor],
72
- names_to_scheme: Dict[str, QuantizationScheme],
72
+ model_state: dict[str, Tensor],
73
+ names_to_scheme: dict[str, QuantizationScheme],
73
74
  show_progress: bool = False,
74
75
  compression_device: str = "cpu",
75
76
  **kwargs,
76
- ) -> Dict[str, Tensor]:
77
+ ) -> dict[str, Tensor]:
77
78
  """
78
79
  Compresses a dense state dict
79
80
 
@@ -141,7 +142,7 @@ class BaseQuantizationCompressor(BaseCompressor):
141
142
  return isinstance(self, NVFP4PackedCompressor)
142
143
 
143
144
  def _skip_zp(
144
- self, name: str, names_to_scheme: Dict[str, QuantizationScheme]
145
+ self, name: str, names_to_scheme: dict[str, QuantizationScheme]
145
146
  ) -> bool:
146
147
  from compressed_tensors.compressors import PackedQuantizationCompressor
147
148
 
@@ -169,10 +170,10 @@ class BaseQuantizationCompressor(BaseCompressor):
169
170
 
170
171
  def decompress(
171
172
  self,
172
- path_to_model_or_tensors: Union[str, Path, Dict[str, Any]],
173
- names_to_scheme: Dict[str, QuantizationScheme],
173
+ path_to_model_or_tensors: str | Path | dict[str, Any],
174
+ names_to_scheme: dict[str, QuantizationScheme],
174
175
  device: str = "cpu",
175
- ) -> Generator[Tuple[str, Tensor], None, None]:
176
+ ) -> Generator[tuple[str, Tensor], None, None]:
176
177
  """
177
178
  Reads a compressed state dict located at path_to_model_or_tensors
178
179
  and returns a generator for sequentially decompressing back to a
@@ -196,8 +197,8 @@ class BaseQuantizationCompressor(BaseCompressor):
196
197
 
197
198
  def _decompress_from_path(
198
199
  self,
199
- path_to_model: Union[str, Path, Dict[str, Any]],
200
- names_to_scheme: Dict[str, QuantizationScheme],
200
+ path_to_model: str | Path | dict[str, Any],
201
+ names_to_scheme: dict[str, QuantizationScheme],
201
202
  device: str,
202
203
  ):
203
204
  weight_mappings = get_nested_weight_mappings(
@@ -219,9 +220,9 @@ class BaseQuantizationCompressor(BaseCompressor):
219
220
 
220
221
  def decompress_from_state_dict(
221
222
  self,
222
- state_dict: Dict[str, torch.Tensor],
223
- names_to_scheme: Dict[str, QuantizationScheme],
224
- ) -> Generator[Tuple[str, Dict[str, torch.Tensor]], None, None]:
223
+ state_dict: dict[str, torch.Tensor],
224
+ names_to_scheme: dict[str, QuantizationScheme],
225
+ ) -> Generator[tuple[str, dict[str, torch.Tensor]], None, None]:
225
226
  weight_mappings = get_nested_mappings_from_state_dict(
226
227
  state_dict, self.compression_param_names
227
228
  )
@@ -239,9 +240,9 @@ class BaseQuantizationCompressor(BaseCompressor):
239
240
  def decompress_module_from_state_dict(
240
241
  self,
241
242
  prefix: str,
242
- state_dict: Dict[str, torch.Tensor],
243
+ state_dict: dict[str, torch.Tensor],
243
244
  scheme: QuantizationScheme,
244
- ) -> Dict[str, torch.Tensor]:
245
+ ) -> dict[str, torch.Tensor]:
245
246
  """
246
247
  Only used by in-memory decompression pathways to decompress the parameters of
247
248
  one module
@@ -13,8 +13,6 @@
13
13
  # limitations under the License.
14
14
 
15
15
 
16
- from typing import Dict, Optional, Tuple
17
-
18
16
  import torch
19
17
  from compressed_tensors.compressors.base import BaseCompressor
20
18
  from compressed_tensors.compressors.quantized_compressors.base import (
@@ -48,7 +46,7 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
48
46
  """
49
47
 
50
48
  @property
51
- def compression_param_names(self) -> Tuple[str]:
49
+ def compression_param_names(self) -> tuple[str, ...]:
52
50
  """
53
51
  Returns a tuple of compression parameter names introduced by
54
52
  the compressor during compression
@@ -63,8 +61,8 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
63
61
  def compression_param_info(
64
62
  self,
65
63
  weight_shape: torch.Size,
66
- quantization_args: Optional[QuantizationArgs] = None,
67
- ) -> Dict[str, Tuple[torch.Size, torch.dtype]]:
64
+ quantization_args: QuantizationArgs | None = None,
65
+ ) -> dict[str, tuple[torch.Size, torch.dtype]]:
68
66
  """
69
67
  Creates a dictionary of expected shapes and dtypes for each compression
70
68
  parameter used by the compressor
@@ -85,7 +83,7 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
85
83
  self,
86
84
  scale: Tensor,
87
85
  quantization_args: QuantizationArgs,
88
- ) -> Dict[str, torch.Tensor]:
86
+ ) -> dict[str, torch.Tensor]:
89
87
  assert quantization_args.scale_dtype is not None
90
88
  return scale.to(quantization_args.scale_dtype)
91
89
 
@@ -95,10 +93,10 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
95
93
  scale: Tensor,
96
94
  global_scale: Tensor,
97
95
  quantization_args: QuantizationArgs,
98
- device: Optional[torch.device] = None,
99
- zero_point: Optional[torch.Tensor] = None,
100
- g_idx: Optional[torch.Tensor] = None,
101
- ) -> Dict[str, torch.Tensor]:
96
+ device: torch.device | None = None,
97
+ zero_point: torch.Tensor | None = None,
98
+ g_idx: torch.Tensor | None = None,
99
+ ) -> dict[str, torch.Tensor]:
102
100
  quantized_weight = quantize(
103
101
  x=weight,
104
102
  scale=scale,
@@ -118,8 +116,8 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
118
116
 
119
117
  def decompress_weight(
120
118
  self,
121
- compressed_data: Dict[str, Tensor],
122
- quantization_args: Optional[QuantizationArgs] = None,
119
+ compressed_data: dict[str, Tensor],
120
+ quantization_args: QuantizationArgs | None = None,
123
121
  ) -> torch.Tensor:
124
122
  weight = compressed_data["weight_packed"]
125
123
  global_scale = compressed_data["weight_global_scale"]
@@ -149,15 +147,15 @@ class MXFP4PackedCompressor(NVFP4PackedCompressor):
149
147
  self,
150
148
  scale: Tensor,
151
149
  quantization_args: QuantizationArgs,
152
- ) -> Dict[str, torch.Tensor]:
150
+ ) -> dict[str, torch.Tensor]:
153
151
  assert quantization_args.scale_dtype is not None
154
152
  scale_exp = 127 + torch.floor(torch.log2(scale)).to(torch.int32)
155
153
  return scale_exp.to(quantization_args.scale_dtype)
156
154
 
157
155
  def decompress_weight(
158
156
  self,
159
- compressed_data: Dict[str, Tensor],
160
- quantization_args: Optional[QuantizationArgs] = None,
157
+ compressed_data: dict[str, Tensor],
158
+ quantization_args: QuantizationArgs | None = None,
161
159
  ) -> torch.Tensor:
162
160
  raise NotImplementedError("MXFP4 Decompression is currently not supported")
163
161
 
@@ -216,7 +214,7 @@ kE2M1ToFloat = torch.tensor(
216
214
  # reference: : https://github.com/vllm-project/vllm/pull/16362
217
215
  @torch.compile(fullgraph=True, dynamic=True)
218
216
  def unpack_fp4_from_uint8(
219
- a: torch.Tensor, m: int, n: int, dtype: Optional[torch.dtype] = torch.bfloat16
217
+ a: torch.Tensor, m: int, n: int, dtype: torch.dtype | None = torch.bfloat16
220
218
  ) -> torch.Tensor:
221
219
  """
222
220
  Unpacks uint8 values into fp4. Each uint8 consists of two fp4 values
@@ -12,8 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Dict, Optional, Tuple
16
-
17
15
  import torch
18
16
  from compressed_tensors.compressors.base import BaseCompressor
19
17
  from compressed_tensors.compressors.quantized_compressors.base import (
@@ -42,7 +40,7 @@ class NaiveQuantizationCompressor(BaseQuantizationCompressor):
42
40
  """
43
41
 
44
42
  @property
45
- def compression_param_names(self) -> Tuple[str]:
43
+ def compression_param_names(self) -> tuple[str, ...]:
46
44
  """
47
45
  Returns a tuple of compression parameter names introduced by
48
46
  the compressor during compression
@@ -57,8 +55,8 @@ class NaiveQuantizationCompressor(BaseQuantizationCompressor):
57
55
  def compression_param_info(
58
56
  self,
59
57
  weight_shape: torch.Size,
60
- quantization_args: Optional[QuantizationArgs] = None,
61
- ) -> Dict[str, Tuple[torch.Size, torch.dtype]]:
58
+ quantization_args: QuantizationArgs | None = None,
59
+ ) -> dict[str, tuple[torch.Size, torch.dtype]]:
62
60
  """
63
61
  Creates a dictionary of expected shapes and dtypes for each compression
64
62
  parameter used by the compressor
@@ -75,11 +73,11 @@ class NaiveQuantizationCompressor(BaseQuantizationCompressor):
75
73
  weight: Tensor,
76
74
  scale: Tensor,
77
75
  quantization_args: QuantizationArgs,
78
- zero_point: Optional[Tensor] = None,
79
- g_idx: Optional[torch.Tensor] = None,
80
- device: Optional[torch.device] = None,
81
- global_scale: Optional[torch.Tensor] = None,
82
- ) -> Dict[str, torch.Tensor]:
76
+ zero_point: Tensor | None = None,
77
+ g_idx: torch.Tensor | None = None,
78
+ device: torch.device | None = None,
79
+ global_scale: torch.Tensor | None = None,
80
+ ) -> dict[str, torch.Tensor]:
83
81
  """
84
82
  Compresses a single uncompressed weight
85
83
 
@@ -115,8 +113,8 @@ class NaiveQuantizationCompressor(BaseQuantizationCompressor):
115
113
 
116
114
  def decompress_weight(
117
115
  self,
118
- compressed_data: Dict[str, Tensor],
119
- quantization_args: Optional[QuantizationArgs] = None,
116
+ compressed_data: dict[str, Tensor],
117
+ quantization_args: QuantizationArgs | None = None,
120
118
  ) -> torch.Tensor:
121
119
  """
122
120
  Decompresses a single compressed weight
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import math
15
- from typing import Dict, Literal, Optional, Tuple, Union
15
+ from typing import Literal
16
16
 
17
17
  import torch
18
18
  from compressed_tensors.compressors.base import BaseCompressor
@@ -36,7 +36,7 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
36
36
  """
37
37
 
38
38
  @property
39
- def compression_param_names(self) -> Tuple[str]:
39
+ def compression_param_names(self) -> tuple[str, ...]:
40
40
  """
41
41
  Returns a tuple of compression parameter names introduced by
42
42
  the compressor during compression
@@ -52,8 +52,8 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
52
52
  def compression_param_info(
53
53
  self,
54
54
  weight_shape: torch.Size,
55
- quantization_args: Optional[QuantizationArgs] = None,
56
- ) -> Dict[str, Tuple[torch.Size, torch.dtype]]:
55
+ quantization_args: QuantizationArgs | None = None,
56
+ ) -> dict[str, tuple[torch.Size, torch.dtype]]:
57
57
  """
58
58
  Creates a dictionary of expected shapes and dtypes for each compression
59
59
  parameter used by the compressor
@@ -90,11 +90,11 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
90
90
  weight: Tensor,
91
91
  scale: Tensor,
92
92
  quantization_args: QuantizationArgs,
93
- zero_point: Optional[Tensor] = None,
94
- g_idx: Optional[torch.Tensor] = None,
95
- device: Optional[torch.device] = None,
96
- global_scale: Optional[torch.Tensor] = None,
97
- ) -> Dict[str, torch.Tensor]:
93
+ zero_point: Tensor | None = None,
94
+ g_idx: torch.Tensor | None = None,
95
+ device: torch.device | None = None,
96
+ global_scale: torch.Tensor | None = None,
97
+ ) -> dict[str, torch.Tensor]:
98
98
  """
99
99
  Compresses a single uncompressed weight
100
100
 
@@ -146,8 +146,8 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
146
146
 
147
147
  def decompress_weight(
148
148
  self,
149
- compressed_data: Dict[str, Tensor],
150
- quantization_args: Optional[QuantizationArgs] = None,
149
+ compressed_data: dict[str, Tensor],
150
+ quantization_args: QuantizationArgs | None = None,
151
151
  ) -> torch.Tensor:
152
152
  """
153
153
  Decompresses a single compressed weight
@@ -190,7 +190,7 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
190
190
  def pack_to_int32(
191
191
  value: torch.Tensor,
192
192
  num_bits: int,
193
- packed_dim: Union[Literal[0], Literal[1]] = 1,
193
+ packed_dim: Literal[0, 1] = 1,
194
194
  ) -> torch.Tensor:
195
195
  """
196
196
  Packs a tensor of quantized weights stored in int8 into int32s with padding
@@ -254,7 +254,7 @@ def unpack_from_int32(
254
254
  value: torch.Tensor,
255
255
  num_bits: int,
256
256
  shape: torch.Size,
257
- packed_dim: Union[Literal[0], Literal[1]] = 1,
257
+ packed_dim: Literal[0, 1] = 1,
258
258
  ) -> torch.Tensor:
259
259
  """
260
260
  Unpacks a tensor of packed int32 weights into individual int8s, maintaining the
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import logging
16
- from typing import Dict, Generator, Optional, Set, Tuple
16
+ from collections.abc import Generator
17
17
 
18
18
  from compressed_tensors.compressors.base import BaseCompressor
19
19
  from compressed_tensors.utils import (
@@ -65,10 +65,10 @@ class BaseSparseCompressor(BaseCompressor):
65
65
 
66
66
  def compress(
67
67
  self,
68
- model_state: Dict[str, Tensor],
69
- compression_targets: Optional[Set[str]] = None,
68
+ model_state: dict[str, Tensor],
69
+ compression_targets: set[str] | None = None,
70
70
  show_progress: bool = False,
71
- ) -> Dict[str, Tensor]:
71
+ ) -> dict[str, Tensor]:
72
72
  """
73
73
  Compresses a dense state dict using bitmask compression
74
74
 
@@ -110,9 +110,9 @@ class BaseSparseCompressor(BaseCompressor):
110
110
  self,
111
111
  path_to_model_or_tensors: str,
112
112
  device: str = "cpu",
113
- params_to_skip_load: Optional[Tuple] = None,
113
+ params_to_skip_load: tuple | None = None,
114
114
  **kwargs,
115
- ) -> Generator[Tuple[str, Tensor], None, None]:
115
+ ) -> Generator[tuple[str, Tensor], None, None]:
116
116
  """
117
117
  Reads a bitmask compressed state dict located
118
118
  at path_to_model_or_tensors and returns a generator
@@ -157,8 +157,8 @@ class BaseSparseCompressor(BaseCompressor):
157
157
 
158
158
  def decompress_from_state_dict(
159
159
  self,
160
- state_dict: Dict[str, Tensor],
161
- ) -> Generator[Tuple[str, Dict[str, Tensor]], None, None]:
160
+ state_dict: dict[str, Tensor],
161
+ ) -> Generator[tuple[str, dict[str, Tensor]], None, None]:
162
162
  """
163
163
  Decompress the state dict of a module (or model)
164
164
 
@@ -185,7 +185,7 @@ class BaseSparseCompressor(BaseCompressor):
185
185
  yield ignored_param_path, ignored_param_value
186
186
 
187
187
  @staticmethod
188
- def should_compress(name: str, expanded_targets: Optional[Set[str]] = None) -> bool:
188
+ def should_compress(name: str, expanded_targets: set[str] | None = None) -> bool:
189
189
  """
190
190
  Check if a parameter should be compressed.
191
191
  Currently, this only returns True for weight parameters.