compressed-tensors 0.13.1a20260123__tar.gz → 0.13.1a20260127__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/.github/actions/test/action.yml +1 -1
  2. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/.github/workflows/test-check.yaml +1 -1
  3. {compressed_tensors-0.13.1a20260123/src/compressed_tensors.egg-info → compressed_tensors-0.13.1a20260127}/PKG-INFO +4 -2
  4. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/setup.py +2 -2
  5. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/linear/compressed_linear.py +0 -6
  6. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/offload/__init__.py +7 -6
  7. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/offload/dispatch.py +1 -1
  8. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/lifecycle/forward.py +18 -19
  9. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/lifecycle/initialize.py +2 -2
  10. compressed_tensors-0.13.1a20260127/src/compressed_tensors/transform/apply.py +36 -0
  11. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/factory/base.py +3 -11
  12. compressed_tensors-0.13.1a20260127/src/compressed_tensors/utils/offload.py +195 -0
  13. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/version.py +1 -1
  14. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127/src/compressed_tensors.egg-info}/PKG-INFO +4 -2
  15. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors.egg-info/SOURCES.txt +0 -1
  16. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors.egg-info/requires.txt +3 -1
  17. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/lifecycle/test_apply.py +2 -12
  18. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/lifecycle/test_initialize.py +4 -5
  19. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_transform/factory/test_correctness.py +6 -15
  20. compressed_tensors-0.13.1a20260127/tests/test_transform/factory/test_memory.py +74 -0
  21. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_transform/factory/test_serialization.py +8 -16
  22. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_utils/test_match.py +28 -34
  23. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/testing_utils.py +0 -18
  24. compressed_tensors-0.13.1a20260123/src/compressed_tensors/transform/apply.py +0 -71
  25. compressed_tensors-0.13.1a20260123/src/compressed_tensors/utils/offload.py +0 -672
  26. compressed_tensors-0.13.1a20260123/tests/test_transform/factory/test_memory.py +0 -92
  27. compressed_tensors-0.13.1a20260123/tests/test_utils/test_offload.py +0 -540
  28. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/.github/.gitkeep +0 -0
  29. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/.github/mergify.yml +0 -0
  30. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/.github/scripts/step-status +0 -0
  31. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/.github/workflows/quality-check.yaml +0 -0
  32. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/.github/workflows/stale.yml +0 -0
  33. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/.gitignore +0 -0
  34. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/LICENSE +0 -0
  35. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/Makefile +0 -0
  36. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/README.md +0 -0
  37. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
  38. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/examples/bit_packing/int4_config.json +0 -0
  39. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/examples/bitmask_compression.ipynb +0 -0
  40. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/examples/llama_1.1b/ex_config_quantization.py +0 -0
  41. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
  42. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/examples/llama_1.1b/example_quant_config.json +0 -0
  43. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
  44. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/examples/quantize_and_pack_int4.ipynb +0 -0
  45. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/pyproject.toml +0 -0
  46. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/setup.cfg +0 -0
  47. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/__init__.py +0 -0
  48. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/README.md +0 -0
  49. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/__init__.py +0 -0
  50. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/base.py +0 -0
  51. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/__init__.py +0 -0
  52. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/base.py +0 -0
  53. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/helpers.py +0 -0
  54. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
  55. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +0 -0
  56. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
  57. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
  58. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/quantized_compressors/fp4_quantized.py +0 -0
  59. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
  60. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
  61. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
  62. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
  63. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
  64. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
  65. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
  66. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
  67. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
  68. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/config/__init__.py +0 -0
  69. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/config/base.py +0 -0
  70. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/config/dense.py +0 -0
  71. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/config/format.py +0 -0
  72. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
  73. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  74. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/linear/__init__.py +0 -0
  75. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/logger.py +0 -0
  76. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/modeling/__init__.py +0 -0
  77. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/modeling/attention.py +0 -0
  78. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/modeling/kvcache.py +0 -0
  79. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/offload/cache/__init__.py +0 -0
  80. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/offload/cache/base.py +0 -0
  81. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/offload/cache/cpu.py +0 -0
  82. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/offload/cache/device.py +0 -0
  83. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/offload/module.py +0 -0
  84. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/offload/utils.py +0 -0
  85. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/__init__.py +0 -0
  86. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  87. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
  88. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
  89. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
  90. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/quant_args.py +0 -0
  91. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/quant_config.py +0 -0
  92. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/quant_metadata.py +0 -0
  93. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
  94. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
  95. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
  96. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/utils/mxfp4_utils.py +0 -0
  97. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/registry/__init__.py +0 -0
  98. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/registry/registry.py +0 -0
  99. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/__init__.py +0 -0
  100. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/factory/__init__.py +0 -0
  101. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/factory/hadamard.py +0 -0
  102. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/factory/matrix_multiply.py +0 -0
  103. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/factory/random_hadamard.py +0 -0
  104. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/transform_args.py +0 -0
  105. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/transform_config.py +0 -0
  106. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/transform_scheme.py +0 -0
  107. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/utils/__init__.py +0 -0
  108. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/utils/hadamard.py +0 -0
  109. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/utils/hadamards.safetensors +0 -0
  110. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/utils/matrix.py +0 -0
  111. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/utils/__init__.py +0 -0
  112. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/utils/binary_search.py +0 -0
  113. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/utils/helpers.py +0 -0
  114. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/utils/internal.py +0 -0
  115. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/utils/match.py +0 -0
  116. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/utils/permutations_24.py +0 -0
  117. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/utils/safetensors_load.py +0 -0
  118. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
  119. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/utils/type.py +0 -0
  120. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
  121. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors.egg-info/top_level.txt +0 -0
  122. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/__init__.py +0 -0
  123. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/conftest.py +0 -0
  124. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/mock_observer.py +0 -0
  125. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/__init__.py +0 -0
  126. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/model_compressors/__init__.py +0 -0
  127. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
  128. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
  129. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/quantized_compressors/test_fp4_quant.py +0 -0
  130. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
  131. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
  132. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
  133. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/quantized_compressors/test_packed_asym_decompression.py +0 -0
  134. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
  135. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
  136. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
  137. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
  138. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
  139. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_configs/__init__.py +0 -0
  140. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_configs/test_base.py +0 -0
  141. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_configs/test_infer_quant.py +0 -0
  142. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
  143. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_linear/__init__.py +0 -0
  144. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_linear/test_compressed_linear.py +0 -0
  145. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_modeling/test_attention_and_cache.py +0 -0
  146. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_offload/cache/test_cpu.py +0 -0
  147. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_offload/test_dispatch.py +0 -0
  148. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_offload/test_interface.py +0 -0
  149. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_offload/test_module.py +0 -0
  150. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/__init__.py +0 -0
  151. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/lifecycle/__init__.py +0 -0
  152. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/lifecycle/conftest.py +0 -0
  153. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
  154. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
  155. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/lifecycle/test_forward.py +0 -0
  156. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
  157. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/lifecycle/test_static_lifecycle.py +0 -0
  158. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/test_configs/__init__.py +0 -0
  159. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
  160. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/test_configs/test_strategies.py +0 -0
  161. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/test_quant_args.py +0 -0
  162. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/test_quant_config.py +0 -0
  163. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/test_quant_scheme.py +0 -0
  164. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/test_utils/test_helpers.py +0 -0
  165. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/test_utils/test_mxfp4_utils.py +0 -0
  166. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_registry.py +0 -0
  167. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_transform/conftest.py +0 -0
  168. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_transform/test_transform_args.py +0 -0
  169. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_transform/test_transform_config.py +0 -0
  170. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_transform/test_transform_scheme.py +0 -0
  171. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_transform/utils/test_hadamard.py +0 -0
  172. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_utils/__init__.py +0 -0
  173. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_utils/test_helpers.py +0 -0
  174. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_utils/test_safetensors_load.py +0 -0
  175. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/tests/test_utils/test_type.py +0 -0
  176. {compressed_tensors-0.13.1a20260123 → compressed_tensors-0.13.1a20260127}/utils/copyright.py +0 -0
@@ -23,7 +23,7 @@ runs:
23
23
  with:
24
24
  venv: ${{ inputs.venv }}
25
25
  name: compressed
26
- extra: "[dev,accelerate]"
26
+ extra: "[dev]"
27
27
 
28
28
  - name: clean up
29
29
  run: |
@@ -30,7 +30,7 @@ jobs:
30
30
  - name: Set Env
31
31
  run: pip3 install --upgrade pip setuptools
32
32
  - name: "⚙️ Install dependencies"
33
- run: pip3 install .[dev,accelerate]
33
+ run: pip3 install .[dev]
34
34
  - name: clean up
35
35
  run: |
36
36
  echo "cleaning up disk space as GHA runner has limited disk size."
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.13.1a20260123
3
+ Version: 0.13.1a20260127
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/vllm-project/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -9,7 +9,7 @@ License: Apache 2.0
9
9
  Description-Content-Type: text/markdown
10
10
  License-File: LICENSE
11
11
  Requires-Dist: torch<=2.9.1,>=1.7.0
12
- Requires-Dist: transformers
12
+ Requires-Dist: transformers<5.0.0
13
13
  Requires-Dist: pydantic>=2.0
14
14
  Requires-Dist: loguru
15
15
  Provides-Extra: dev
@@ -19,6 +19,8 @@ Requires-Dist: wheel>=0.36.2; extra == "dev"
19
19
  Requires-Dist: flake8>=3.8.3; extra == "dev"
20
20
  Requires-Dist: pytest>=6.0.0; extra == "dev"
21
21
  Requires-Dist: nbconvert>=7.16.3; extra == "dev"
22
+ Requires-Dist: transformers<5.0; extra == "dev"
23
+ Requires-Dist: accelerate; extra == "dev"
22
24
  Provides-Extra: accelerate
23
25
  Requires-Dist: accelerate; extra == "accelerate"
24
26
  Dynamic: author
@@ -88,11 +88,11 @@ def _setup_packages() -> List:
88
88
  )
89
89
 
90
90
  def _setup_install_requires() -> List:
91
- return ["torch>=1.7.0,<=2.9.1", "transformers", "pydantic>=2.0", "loguru"]
91
+ return ["torch>=1.7.0,<=2.9.1", "transformers<5.0.0", "pydantic>=2.0", "loguru"]
92
92
 
93
93
  def _setup_extras() -> Dict:
94
94
  return {
95
- "dev": ["black==22.12.0", "isort==5.8.0", "wheel>=0.36.2", "flake8>=3.8.3", "pytest>=6.0.0", "nbconvert>=7.16.3"],
95
+ "dev": ["black==22.12.0", "isort==5.8.0", "wheel>=0.36.2", "flake8>=3.8.3", "pytest>=6.0.0", "nbconvert>=7.16.3", "transformers<5.0", "accelerate"],
96
96
  "accelerate": ["accelerate"]
97
97
  }
98
98
 
@@ -87,12 +87,6 @@ class CompressedLinear(Linear):
87
87
  # mark module as compressed
88
88
  module.quantization_status = QuantizationStatus.COMPRESSED
89
89
 
90
- # handles case where forward is wrapped in new_forward by accelerate hooks
91
- if hasattr(module, "_old_forward"):
92
- module._old_forward = CompressedLinear.forward.__get__(
93
- module, CompressedLinear
94
- )
95
-
96
90
  return module
97
91
 
98
92
  def forward(self, input: Tensor) -> Tensor:
@@ -135,9 +135,7 @@ def register_offload_module(base: torch.nn.Module, name: str, module: torch.nn.M
135
135
  """
136
136
  cache = base._parameters
137
137
  if isinstance(cache, OffloadCache):
138
- offload_module(
139
- module, cache.onload_device, cache.offload_device, no_split=False
140
- )
138
+ offload_module(module, cache.onload_device, cache.offload_device)
141
139
 
142
140
  base.register_module(name, module)
143
141
 
@@ -178,9 +176,12 @@ def align_module_device(
178
176
  if isinstance(module._parameters, OffloadCache):
179
177
  assert isinstance(module._buffers, OffloadCache)
180
178
  with module._parameters.disable_offloading():
181
- with patch_attr(
182
- module._parameters, "onload_device", execution_device
183
- ), patch_attr(module._buffers, "onload_device", execution_device):
179
+ if execution_device is not None:
180
+ with patch_attr(
181
+ module._parameters, "onload_device", execution_device
182
+ ), patch_attr(module._buffers, "onload_device", execution_device):
183
+ yield
184
+ else:
184
185
  yield
185
186
 
186
187
  else:
@@ -39,7 +39,7 @@ ModelType = TypeVar("ModelType", bound=torch.nn.Module)
39
39
  def offload_model(
40
40
  model: ModelType,
41
41
  onload_device: torch.device | str,
42
- offload_device: Optional[torch.device | str | Literal["disk"]] = None,
42
+ offload_device: torch.device | str | Literal["disk"] = torch.device("cpu"),
43
43
  ) -> ModelType:
44
44
  """
45
45
  Offload a model to the `offload_device`. During forward passes, model weights will
@@ -14,7 +14,6 @@
14
14
 
15
15
  from functools import wraps
16
16
  from math import ceil
17
- from typing import Optional
18
17
 
19
18
  import torch
20
19
  from compressed_tensors.quantization.quant_args import (
@@ -47,9 +46,9 @@ def quantize(
47
46
  scale: torch.Tensor,
48
47
  zero_point: torch.Tensor,
49
48
  args: QuantizationArgs,
50
- dtype: Optional[torch.dtype] = None,
51
- g_idx: Optional[torch.Tensor] = None,
52
- global_scale: Optional[torch.Tensor] = None,
49
+ dtype: torch.dtype | None = None,
50
+ g_idx: torch.Tensor | None = None,
51
+ global_scale: torch.Tensor | None = None,
53
52
  ) -> torch.Tensor:
54
53
  """
55
54
  Quantize the input tensor x using the QuantizationStrategy specified in args.
@@ -85,11 +84,11 @@ def quantize(
85
84
  def dequantize(
86
85
  x_q: torch.Tensor,
87
86
  scale: torch.Tensor,
88
- zero_point: Optional[torch.Tensor] = None,
89
- args: Optional[QuantizationArgs] = None,
90
- dtype: Optional[torch.dtype] = None,
91
- g_idx: Optional[torch.Tensor] = None,
92
- global_scale: Optional[torch.Tensor] = None,
87
+ zero_point: torch.Tensor | None = None,
88
+ args: QuantizationArgs | None = None,
89
+ dtype: torch.dtype | None = None,
90
+ g_idx: torch.Tensor | None = None,
91
+ global_scale: torch.Tensor | None = None,
93
92
  ) -> torch.Tensor:
94
93
  """
95
94
  Dequantize a quantized input tensor x_q based on the strategy specified in args. If
@@ -159,8 +158,8 @@ def fake_quantize(
159
158
  scale: torch.Tensor,
160
159
  zero_point: torch.Tensor,
161
160
  args: QuantizationArgs,
162
- g_idx: Optional[torch.Tensor] = None,
163
- global_scale: Optional[torch.Tensor] = None,
161
+ g_idx: torch.Tensor | None = None,
162
+ global_scale: torch.Tensor | None = None,
164
163
  ) -> torch.Tensor:
165
164
  """
166
165
  Fake quantize the input tensor x by quantizing then dequantizing with
@@ -195,11 +194,11 @@ def _process_quantization(
195
194
  scale: torch.Tensor,
196
195
  zero_point: torch.Tensor,
197
196
  args: QuantizationArgs,
198
- g_idx: Optional[torch.Tensor] = None,
199
- dtype: Optional[torch.dtype] = None,
197
+ g_idx: torch.Tensor | None = None,
198
+ dtype: torch.dtype | None = None,
200
199
  do_quantize: bool = True,
201
200
  do_dequantize: bool = True,
202
- global_scale: Optional[torch.Tensor] = None,
201
+ global_scale: torch.Tensor | None = None,
203
202
  ) -> torch.Tensor:
204
203
  q_min, q_max = calculate_range(args, x.device)
205
204
  group_size = args.group_size
@@ -457,8 +456,8 @@ def _quantize(
457
456
  q_min: torch.Tensor,
458
457
  q_max: torch.Tensor,
459
458
  args: QuantizationArgs,
460
- dtype: Optional[torch.dtype] = None,
461
- global_scale: Optional[torch.Tensor] = None,
459
+ dtype: torch.dtype | None = None,
460
+ global_scale: torch.Tensor | None = None,
462
461
  ) -> torch.Tensor:
463
462
 
464
463
  # if a global scale is optionally provided, use it
@@ -486,9 +485,9 @@ def _quantize(
486
485
  def _dequantize(
487
486
  x_q: torch.Tensor,
488
487
  scale: torch.Tensor,
489
- zero_point: torch.Tensor = None,
490
- dtype: Optional[torch.dtype] = None,
491
- global_scale: Optional[torch.Tensor] = None,
488
+ zero_point: torch.Tensor | None = None,
489
+ dtype: torch.dtype | None = None,
490
+ global_scale: torch.Tensor | None = None,
492
491
  ) -> torch.Tensor:
493
492
 
494
493
  # if a global scale is optionally provided, use it
@@ -23,6 +23,7 @@ from compressed_tensors.modeling import (
23
23
  QuantizedAttentionImpl,
24
24
  QuantizedKVCache,
25
25
  )
26
+ from compressed_tensors.offload import unwrap_offload_forward
26
27
  from compressed_tensors.quantization import (
27
28
  ActivationOrdering,
28
29
  DynamicType,
@@ -37,7 +38,6 @@ from compressed_tensors.quantization.lifecycle.forward import (
37
38
  )
38
39
  from compressed_tensors.quantization.utils import strategy_cdiv
39
40
  from compressed_tensors.utils import (
40
- disable_hf_hook,
41
41
  get_execution_device,
42
42
  get_head_dim,
43
43
  get_num_attn_heads,
@@ -134,7 +134,7 @@ def initialize_module_for_quantization(
134
134
  force_zero_point=force_zero_point,
135
135
  )
136
136
 
137
- with disable_hf_hook(module):
137
+ with unwrap_offload_forward(module):
138
138
  # wrap forward call of module to perform
139
139
  # quantized actions based on calltime status
140
140
  wrap_module_forward_quantized(module, scheme)
@@ -0,0 +1,36 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import torch
16
+ from compressed_tensors import TRANSFORM_CONFIG_NAME
17
+ from compressed_tensors.transform import TransformConfig, TransformFactory
18
+
19
+
20
+ __all__ = ["apply_transform_config"]
21
+
22
+
23
+ def apply_transform_config(model: torch.nn.Module, config: TransformConfig):
24
+ """
25
+ Apply a transform config to a model. Weight transforms are fused into weights, while
26
+ activation transforms are attached as submodules and trigger via pytorch hooks
27
+
28
+ :param model: model to apply config to
29
+ :param config: transform config to apply
30
+ """
31
+ for name, scheme in config.config_groups.items():
32
+ factory = TransformFactory.from_scheme(scheme, name=name)
33
+ factory.apply_to_model(model)
34
+
35
+ # attach config to model for compression/serialization
36
+ setattr(model, TRANSFORM_CONFIG_NAME, config)
@@ -26,6 +26,7 @@ from compressed_tensors.modeling.kvcache import (
26
26
  initialize_hooked_kv_cache,
27
27
  register_key_hook,
28
28
  )
29
+ from compressed_tensors.offload import OffloadCache
29
30
  from compressed_tensors.registry.registry import RegistryMixin, T
30
31
  from compressed_tensors.transform import (
31
32
  TransformArgs,
@@ -34,8 +35,6 @@ from compressed_tensors.transform import (
34
35
  )
35
36
  from compressed_tensors.utils import (
36
37
  align_module_device,
37
- delete_offload_module,
38
- has_offloaded_params,
39
38
  match_named_modules,
40
39
  patch_attr,
41
40
  register_offload_module,
@@ -116,13 +115,6 @@ class TransformFactory(RegistryMixin, ABC):
116
115
  :param module: target module to apply transforms to
117
116
  :param args: defines how the transform will be applied to the target module
118
117
  """
119
- if has_offloaded_params(module):
120
- if module._hf_hook.place_submodules:
121
- raise NotImplementedError(
122
- "Applying transforms to offloaded submodules with "
123
- "`place_submodules=True` is not supported"
124
- )
125
-
126
118
  # create transform as submodule
127
119
  transform_name = f"{self.name}_{args.location}"
128
120
  transform = self.create_transform(module, args)
@@ -150,13 +142,13 @@ class TransformFactory(RegistryMixin, ABC):
150
142
  if self.scheme.requires_grad:
151
143
  # for training, the weight changes with every forward pass
152
144
  # so we can leverage parametrization to propagate the gradient
153
- if has_offloaded_params(module):
145
+ if isinstance(module._parameters, OffloadCache):
154
146
  raise ValueError("Offloaded training is not supported")
155
147
  P.register_parametrization(module, "weight", transform)
156
148
 
157
149
  else:
158
150
  # transform is no longer needed (unfusing is not supported)
159
- delete_offload_module(module, transform_name)
151
+ delattr(module, transform_name)
160
152
 
161
153
  # register output transformation hook
162
154
  elif args.location == TransformLocation.OUTPUT:
@@ -0,0 +1,195 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """
15
+ Utilities associated with offloading functionality
16
+
17
+ | ------------------------------------------------------------------------------------------------------ | # noqa: E501
18
+ | Operation | Without offloading support | With offloading support | # noqa: E501
19
+ | ---------- | -------------------------------------- | ------------------------------------------------ | # noqa: E501
20
+ | Update | module.name.data.copy_(new_data) | update_offload_parameter(module, name, new_data) | # noqa: E501
21
+ | ------------------------------------------------------------------------------------------------------ | # noqa: E501
22
+ """
23
+
24
+ import contextlib
25
+ from typing import Literal, Optional
26
+
27
+ import torch
28
+ from compressed_tensors.offload import (
29
+ align_module_device,
30
+ align_modules,
31
+ disable_offloading,
32
+ get_execution_device,
33
+ get_offloaded_device,
34
+ offload_model,
35
+ register_offload_module,
36
+ remove_dispatch,
37
+ update_offload_parameter,
38
+ )
39
+ from compressed_tensors.utils.helpers import deprecated
40
+
41
+
42
+ __all__ = [
43
+ "get_execution_device",
44
+ "get_offloaded_device",
45
+ "update_parameter_data",
46
+ "register_offload_parameter",
47
+ "update_offload_parameter",
48
+ "delete_offload_parameter",
49
+ "has_offloaded_params",
50
+ "disable_hf_hook",
51
+ "disable_offload",
52
+ "align_modules",
53
+ "align_module_device",
54
+ "register_offload_module",
55
+ "delete_offload_module",
56
+ "offloaded_dispatch",
57
+ "disable_offloading",
58
+ "remove_dispatch",
59
+ "cast_to_device",
60
+ "offload_to_weights_map",
61
+ "delete_from_weights_map",
62
+ ]
63
+
64
+
65
+ def update_parameter_data(
66
+ module: torch.nn.Module, new_param_data: torch.Tensor, param_name: str
67
+ ):
68
+ """
69
+ Update the data of an existing parameter and its offload dict. Supports both
70
+ parameters of offloaded modules and non-offloaded modules
71
+
72
+ :param module: module containing the parameter to update
73
+ :param new_param_data: tensor to update parameter with
74
+ :param param_name: name of module parameter to update
75
+ """
76
+ update_offload_parameter(module, param_name, new_param_data)
77
+
78
+
79
+ """ Candidates for Upstreaming """
80
+
81
+
82
+ @deprecated()
83
+ def cast_to_device(device_spec: int | torch.device) -> torch.device:
84
+ """
85
+ Convert an integer device index or torch.device into a torch.device object.
86
+
87
+ :param device_spec: Device index (int) or torch.device object.
88
+ Negative integers map to CPU.
89
+ :return: torch.device corresponding to the given device specification.
90
+ """
91
+ if isinstance(device_spec, int):
92
+ return torch.device(f"cuda:{device_spec}" if device_spec >= 0 else "cpu")
93
+ return device_spec
94
+
95
+
96
+ @deprecated("module.register_parameter(name, parameter)")
97
+ def register_offload_parameter(
98
+ module: torch.nn.Module,
99
+ name: str,
100
+ parameter: torch.nn.Parameter,
101
+ offload_device: Optional[torch.device | Literal["disk"]] = None,
102
+ ):
103
+ """
104
+ Register a parameter to the given module which may be offloaded
105
+
106
+ :param module: maybe offloaded module
107
+ :param name: name of newly registered parameter
108
+ :param parameter: parameter being registered
109
+ :param offload_device: device on which weight will be offloaded to. If None is
110
+ provided, then infer device from parameters on module
111
+ """
112
+ if offload_device == "disk":
113
+ raise NotImplementedError("Disk offloading is not currently supported")
114
+
115
+ module.register_parameter(name, parameter)
116
+
117
+
118
+ @deprecated("delattr(module, name)")
119
+ def delete_offload_parameter(module: torch.nn.Module, name: str):
120
+ """
121
+ Delete a parameter from a module which may be offloaded,
122
+ including any metadata in _hf_hook
123
+
124
+ :param module: maybe offloaded module
125
+ :param name: name of parameter being deleted
126
+ """
127
+ delattr(module, name)
128
+
129
+
130
+ @deprecated("compressed_tensors.offload::unwrap_offload")
131
+ @contextlib.contextmanager
132
+ def disable_hf_hook(module: torch.nn.Module):
133
+ raise ValueError()
134
+
135
+
136
+ @deprecated("delattr(base, name)")
137
+ def delete_offload_module(base: torch.nn.Module, name: str):
138
+ """
139
+ Delete a submodule from a model which may contain offloading
140
+ :param base: parent module to delete submodule from
141
+ :param name: name of submodule on parent
142
+ """
143
+ delattr(base, name)
144
+
145
+
146
+ @deprecated("compressed_tensors.offload::offload_model")
147
+ def offloaded_dispatch(
148
+ module: torch.nn.Module,
149
+ execution_device: torch.device,
150
+ offload_device: Optional[torch.device | Literal["disk"]] = None,
151
+ ) -> torch.nn.Module:
152
+ """
153
+ Dispatch a model, keeping device parameters offloaded on their current device
154
+
155
+ :param module: module containing parameters to offload
156
+ :param execution_device: device that modules will onload and execute on
157
+ :param offload_device: device that module parameters will offload to
158
+ :return: module with offloading device hooks
159
+ """
160
+ if offload_device is not None:
161
+ raise ValueError(
162
+ "Passing offload_device to offloaded_dispatch is no longer supported"
163
+ )
164
+ offload_model(module, execution_device)
165
+
166
+
167
+ @deprecated("compressed_tensors.offload::align_module_device")
168
+ def disable_offload(module: torch.nn.Module):
169
+ raise ValueError()
170
+
171
+
172
+ @deprecated()
173
+ def offload_to_weights_map(*args, **kwargs):
174
+ raise ValueError()
175
+
176
+
177
+ @deprecated()
178
+ def delete_from_weights_map(*args, **kwargs):
179
+ raise ValueError()
180
+
181
+
182
+ @deprecated()
183
+ def has_offloaded_params(module: torch.nn.Module) -> bool:
184
+ """
185
+ Checks if a module has offloaded parameters by checking if the given module has a
186
+ AlignDevicesHook attached with offloading enabled
187
+
188
+ Args:
189
+ module (`torch.nn.Module`): The module to check for an offload hook.
190
+
191
+ Returns:
192
+ bool: `True` if the module has an offload hook and offloading is enabled,
193
+ `False` otherwise.
194
+ """
195
+ return False
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.13.1.a20260123'
20
+ __version__ = version = '0.13.1.a20260127'
21
21
  __version_tuple__ = version_tuple = (0, 13, 1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.13.1a20260123
3
+ Version: 0.13.1a20260127
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/vllm-project/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -9,7 +9,7 @@ License: Apache 2.0
9
9
  Description-Content-Type: text/markdown
10
10
  License-File: LICENSE
11
11
  Requires-Dist: torch<=2.9.1,>=1.7.0
12
- Requires-Dist: transformers
12
+ Requires-Dist: transformers<5.0.0
13
13
  Requires-Dist: pydantic>=2.0
14
14
  Requires-Dist: loguru
15
15
  Provides-Extra: dev
@@ -19,6 +19,8 @@ Requires-Dist: wheel>=0.36.2; extra == "dev"
19
19
  Requires-Dist: flake8>=3.8.3; extra == "dev"
20
20
  Requires-Dist: pytest>=6.0.0; extra == "dev"
21
21
  Requires-Dist: nbconvert>=7.16.3; extra == "dev"
22
+ Requires-Dist: transformers<5.0; extra == "dev"
23
+ Requires-Dist: accelerate; extra == "dev"
22
24
  Provides-Extra: accelerate
23
25
  Requires-Dist: accelerate; extra == "accelerate"
24
26
  Dynamic: author
@@ -166,7 +166,6 @@ tests/test_transform/utils/test_hadamard.py
166
166
  tests/test_utils/__init__.py
167
167
  tests/test_utils/test_helpers.py
168
168
  tests/test_utils/test_match.py
169
- tests/test_utils/test_offload.py
170
169
  tests/test_utils/test_safetensors_load.py
171
170
  tests/test_utils/test_type.py
172
171
  utils/copyright.py
@@ -1,5 +1,5 @@
1
1
  torch<=2.9.1,>=1.7.0
2
- transformers
2
+ transformers<5.0.0
3
3
  pydantic>=2.0
4
4
  loguru
5
5
 
@@ -13,3 +13,5 @@ wheel>=0.36.2
13
13
  flake8>=3.8.3
14
14
  pytest>=6.0.0
15
15
  nbconvert>=7.16.3
16
+ transformers<5.0
17
+ accelerate
@@ -32,7 +32,6 @@ from compressed_tensors.quantization import (
32
32
  )
33
33
  from compressed_tensors.quantization.lifecycle import apply_quantization_config
34
34
  from compressed_tensors.utils import is_match, match_named_modules
35
- from tests.testing_utils import requires_accelerate
36
35
  from transformers import AutoModelForCausalLM
37
36
 
38
37
 
@@ -322,7 +321,6 @@ def get_sample_tinyllama_quant_config(
322
321
  return QuantizationConfig.model_validate(config_dict)
323
322
 
324
323
 
325
- @requires_accelerate()
326
324
  @pytest.mark.parametrize(
327
325
  "target,should_raise_warning",
328
326
  [
@@ -462,12 +460,8 @@ def test_multi_apply_quantization_config():
462
460
  )
463
461
 
464
462
 
465
- @requires_accelerate()
466
463
  def test_apply_kv_cache():
467
- from accelerate import init_empty_weights
468
-
469
- with init_empty_weights():
470
- model = AutoModelForCausalLM.from_pretrained("nm-testing/llama2.c-stories15M")
464
+ model = AutoModelForCausalLM.from_pretrained("nm-testing/llama2.c-stories15M")
471
465
 
472
466
  args = QuantizationArgs(
473
467
  num_bits=8,
@@ -486,12 +480,8 @@ def test_apply_kv_cache():
486
480
  assert hasattr(layer.self_attn, "v_scale")
487
481
 
488
482
 
489
- @requires_accelerate()
490
483
  def test_apply_attention():
491
- from accelerate import init_empty_weights
492
-
493
- with init_empty_weights():
494
- model = AutoModelForCausalLM.from_pretrained("nm-testing/llama2.c-stories15M")
484
+ model = AutoModelForCausalLM.from_pretrained("nm-testing/llama2.c-stories15M")
495
485
 
496
486
  scheme = QuantizationScheme(
497
487
  targets=["LlamaAttention"],
@@ -17,6 +17,7 @@ import math
17
17
 
18
18
  import pytest
19
19
  import torch
20
+ from compressed_tensors.offload import offload_model
20
21
  from compressed_tensors.quantization import (
21
22
  FP8_E4M3_DATA,
22
23
  ActivationOrdering,
@@ -28,7 +29,7 @@ from compressed_tensors.quantization import (
28
29
  from compressed_tensors.quantization.lifecycle.initialize import (
29
30
  initialize_module_for_quantization,
30
31
  )
31
- from tests.testing_utils import requires_accelerate
32
+ from tests.testing_utils import requires_gpu
32
33
  from torch.nn import Linear
33
34
 
34
35
 
@@ -98,7 +99,7 @@ def test_initialize_module_for_quantization(
98
99
  assert layer.quantization_status == QuantizationStatus.INITIALIZED
99
100
 
100
101
 
101
- @requires_accelerate()
102
+ @requires_gpu
102
103
  @pytest.mark.parametrize(
103
104
  "weights,input_activations",
104
105
  [
@@ -119,9 +120,7 @@ def test_initialize_module_for_quantization(
119
120
  def test_initialize_module_for_quantization_offloaded(
120
121
  create_quantization_scheme, weights, input_activations, layer
121
122
  ):
122
- from accelerate.hooks import attach_align_device_hook
123
-
124
- attach_align_device_hook(layer, offload=True)
123
+ offload_model(layer, "cuda:0")
125
124
 
126
125
  test_initialize_module_for_quantization(
127
126
  create_quantization_scheme,