compressed-tensors 0.13.1a20260116__tar.gz → 0.13.1a20260127__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/.github/actions/test/action.yml +1 -1
  2. compressed_tensors-0.13.1a20260127/.github/mergify.yml +64 -0
  3. compressed_tensors-0.13.1a20260127/.github/workflows/stale.yml +44 -0
  4. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/.github/workflows/test-check.yaml +1 -1
  5. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/Makefile +2 -2
  6. {compressed_tensors-0.13.1a20260116/src/compressed_tensors.egg-info → compressed_tensors-0.13.1a20260127}/PKG-INFO +5 -3
  7. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/setup.py +2 -2
  8. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/__init__.py +10 -1
  9. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +7 -0
  10. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/config/format.py +7 -0
  11. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/linear/compressed_linear.py +0 -6
  12. compressed_tensors-0.13.1a20260127/src/compressed_tensors/offload/__init__.py +198 -0
  13. compressed_tensors-0.13.1a20260127/src/compressed_tensors/offload/cache/__init__.py +17 -0
  14. compressed_tensors-0.13.1a20260127/src/compressed_tensors/offload/cache/base.py +231 -0
  15. compressed_tensors-0.13.1a20260127/src/compressed_tensors/offload/cache/cpu.py +43 -0
  16. compressed_tensors-0.13.1a20260127/src/compressed_tensors/offload/cache/device.py +48 -0
  17. compressed_tensors-0.13.1a20260127/src/compressed_tensors/offload/dispatch.py +228 -0
  18. compressed_tensors-0.13.1a20260127/src/compressed_tensors/offload/module.py +103 -0
  19. compressed_tensors-0.13.1a20260127/src/compressed_tensors/offload/utils.py +158 -0
  20. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/lifecycle/forward.py +26 -29
  21. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/lifecycle/initialize.py +2 -2
  22. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/quant_metadata.py +24 -1
  23. compressed_tensors-0.13.1a20260127/src/compressed_tensors/transform/apply.py +36 -0
  24. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/factory/base.py +3 -11
  25. compressed_tensors-0.13.1a20260127/src/compressed_tensors/utils/binary_search.py +52 -0
  26. compressed_tensors-0.13.1a20260127/src/compressed_tensors/utils/offload.py +195 -0
  27. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/version.py +1 -1
  28. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127/src/compressed_tensors.egg-info}/PKG-INFO +5 -3
  29. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors.egg-info/SOURCES.txt +15 -1
  30. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors.egg-info/requires.txt +4 -2
  31. compressed_tensors-0.13.1a20260127/tests/test_offload/cache/test_cpu.py +138 -0
  32. compressed_tensors-0.13.1a20260127/tests/test_offload/test_dispatch.py +215 -0
  33. compressed_tensors-0.13.1a20260127/tests/test_offload/test_interface.py +174 -0
  34. compressed_tensors-0.13.1a20260127/tests/test_offload/test_module.py +213 -0
  35. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/lifecycle/test_apply.py +2 -12
  36. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/lifecycle/test_initialize.py +4 -5
  37. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_transform/factory/test_correctness.py +6 -15
  38. compressed_tensors-0.13.1a20260127/tests/test_transform/factory/test_memory.py +74 -0
  39. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_transform/factory/test_serialization.py +8 -16
  40. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_utils/test_match.py +28 -34
  41. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/testing_utils.py +0 -18
  42. compressed_tensors-0.13.1a20260116/src/compressed_tensors/transform/apply.py +0 -71
  43. compressed_tensors-0.13.1a20260116/src/compressed_tensors/utils/offload.py +0 -663
  44. compressed_tensors-0.13.1a20260116/tests/test_transform/factory/test_memory.py +0 -92
  45. compressed_tensors-0.13.1a20260116/tests/test_utils/test_offload.py +0 -540
  46. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/.github/.gitkeep +0 -0
  47. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/.github/scripts/step-status +0 -0
  48. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/.github/workflows/quality-check.yaml +0 -0
  49. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/.gitignore +0 -0
  50. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/LICENSE +0 -0
  51. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/README.md +0 -0
  52. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
  53. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/examples/bit_packing/int4_config.json +0 -0
  54. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/examples/bitmask_compression.ipynb +0 -0
  55. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/examples/llama_1.1b/ex_config_quantization.py +0 -0
  56. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
  57. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/examples/llama_1.1b/example_quant_config.json +0 -0
  58. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
  59. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/examples/quantize_and_pack_int4.ipynb +0 -0
  60. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/pyproject.toml +0 -0
  61. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/setup.cfg +0 -0
  62. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/__init__.py +0 -0
  63. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/README.md +0 -0
  64. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/base.py +0 -0
  65. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/__init__.py +0 -0
  66. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/base.py +0 -0
  67. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/helpers.py +0 -0
  68. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
  69. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +0 -0
  70. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
  71. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
  72. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/quantized_compressors/fp4_quantized.py +0 -0
  73. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
  74. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
  75. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
  76. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
  77. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
  78. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
  79. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
  80. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
  81. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/config/__init__.py +0 -0
  82. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/config/base.py +0 -0
  83. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/config/dense.py +0 -0
  84. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
  85. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  86. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/linear/__init__.py +0 -0
  87. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/logger.py +0 -0
  88. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/modeling/__init__.py +0 -0
  89. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/modeling/attention.py +0 -0
  90. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/modeling/kvcache.py +0 -0
  91. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/__init__.py +0 -0
  92. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  93. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
  94. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
  95. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
  96. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/quant_args.py +0 -0
  97. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/quant_config.py +0 -0
  98. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
  99. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
  100. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
  101. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/quantization/utils/mxfp4_utils.py +0 -0
  102. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/registry/__init__.py +0 -0
  103. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/registry/registry.py +0 -0
  104. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/__init__.py +0 -0
  105. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/factory/__init__.py +0 -0
  106. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/factory/hadamard.py +0 -0
  107. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/factory/matrix_multiply.py +0 -0
  108. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/factory/random_hadamard.py +0 -0
  109. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/transform_args.py +0 -0
  110. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/transform_config.py +0 -0
  111. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/transform_scheme.py +0 -0
  112. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/utils/__init__.py +0 -0
  113. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/utils/hadamard.py +0 -0
  114. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/utils/hadamards.safetensors +0 -0
  115. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/transform/utils/matrix.py +0 -0
  116. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/utils/__init__.py +0 -0
  117. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/utils/helpers.py +0 -0
  118. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/utils/internal.py +0 -0
  119. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/utils/match.py +0 -0
  120. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/utils/permutations_24.py +0 -0
  121. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/utils/safetensors_load.py +0 -0
  122. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
  123. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors/utils/type.py +0 -0
  124. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
  125. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/src/compressed_tensors.egg-info/top_level.txt +0 -0
  126. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/__init__.py +0 -0
  127. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/conftest.py +0 -0
  128. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/mock_observer.py +0 -0
  129. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/__init__.py +0 -0
  130. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/model_compressors/__init__.py +0 -0
  131. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
  132. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
  133. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/quantized_compressors/test_fp4_quant.py +0 -0
  134. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
  135. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
  136. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
  137. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/quantized_compressors/test_packed_asym_decompression.py +0 -0
  138. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
  139. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
  140. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
  141. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
  142. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
  143. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_configs/__init__.py +0 -0
  144. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_configs/test_base.py +0 -0
  145. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_configs/test_infer_quant.py +0 -0
  146. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
  147. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_linear/__init__.py +0 -0
  148. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_linear/test_compressed_linear.py +0 -0
  149. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_modeling/test_attention_and_cache.py +0 -0
  150. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/__init__.py +0 -0
  151. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/lifecycle/__init__.py +0 -0
  152. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/lifecycle/conftest.py +0 -0
  153. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
  154. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
  155. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/lifecycle/test_forward.py +0 -0
  156. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
  157. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/lifecycle/test_static_lifecycle.py +0 -0
  158. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/test_configs/__init__.py +0 -0
  159. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
  160. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/test_configs/test_strategies.py +0 -0
  161. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/test_quant_args.py +0 -0
  162. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/test_quant_config.py +0 -0
  163. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/test_quant_scheme.py +0 -0
  164. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/test_utils/test_helpers.py +0 -0
  165. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_quantization/test_utils/test_mxfp4_utils.py +0 -0
  166. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_registry.py +0 -0
  167. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_transform/conftest.py +0 -0
  168. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_transform/test_transform_args.py +0 -0
  169. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_transform/test_transform_config.py +0 -0
  170. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_transform/test_transform_scheme.py +0 -0
  171. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_transform/utils/test_hadamard.py +0 -0
  172. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_utils/__init__.py +0 -0
  173. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_utils/test_helpers.py +0 -0
  174. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_utils/test_safetensors_load.py +0 -0
  175. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/tests/test_utils/test_type.py +0 -0
  176. {compressed_tensors-0.13.1a20260116 → compressed_tensors-0.13.1a20260127}/utils/copyright.py +0 -0
@@ -23,7 +23,7 @@ runs:
23
23
  with:
24
24
  venv: ${{ inputs.venv }}
25
25
  name: compressed
26
- extra: "[dev,accelerate]"
26
+ extra: "[dev]"
27
27
 
28
28
  - name: clean up
29
29
  run: |
@@ -0,0 +1,64 @@
1
+ pull_request_rules:
2
+ - name: label-documentation
3
+ description: Automatically apply documentation label
4
+ conditions:
5
+ - label != stale
6
+ - -closed
7
+ - or:
8
+ - files~=^[^/]+\.md$
9
+ - files~=^docs/
10
+ - files~=^examples/
11
+ actions:
12
+ label:
13
+ add:
14
+ - documentation
15
+
16
+ - name: ping author on conflicts and add 'needs-rebase' label
17
+ conditions:
18
+ - label != stale
19
+ - conflict
20
+ - -closed
21
+ actions:
22
+ label:
23
+ add:
24
+ - needs-rebase
25
+ comment:
26
+ message: |
27
+ This pull request has merge conflicts that must be resolved before it can be
28
+ merged. Please rebase the PR, @{{author}}.
29
+
30
+ https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/syncing-a-fork
31
+
32
+ - name: remove 'needs-rebase' label when conflict is resolved
33
+ conditions:
34
+ - -conflict
35
+ - -closed
36
+ actions:
37
+ label:
38
+ remove:
39
+ - needs-rebase
40
+
41
+ - name: add quality-failed label
42
+ conditions:
43
+ - label != stale
44
+ - check-failure = quality-check
45
+ - -closed
46
+ actions:
47
+ label:
48
+ add:
49
+ - quality-failed
50
+ comment:
51
+ message: |
52
+ The quality checks have failed. Please run `make style` and `make quality` under
53
+ the root directory to adddress the lint failures. You will need to install the
54
+ dev optional install to get the required linting packages.
55
+
56
+ - name: remove quality-failed label
57
+ conditions:
58
+ - label != stale
59
+ - -check-failure = quality-check
60
+ - -closed
61
+ actions:
62
+ label:
63
+ remove:
64
+ - quality-failed
@@ -0,0 +1,44 @@
1
+ name: 'Close inactive PRs'
2
+
3
+ on:
4
+ schedule:
5
+ - cron: '0 17 * * *'
6
+
7
+ jobs:
8
+ close-pull-requests:
9
+ if: github.repository == 'vllm-project/compressed-tensors'
10
+ permissions:
11
+ issues: write
12
+ pull-requests: write
13
+ actions: write
14
+ runs-on: ubuntu-latest
15
+ steps:
16
+ - uses: actions/stale@997185467fa4f803885201cee163a9f38240193d
17
+ with:
18
+ operations-per-run: 1000
19
+ exempt-draft-pr: true
20
+ exempt-issue-labels: 'keep-open'
21
+ exempt-pr-labels: 'keep-open'
22
+
23
+ days-before-issue-stale: 90
24
+ days-before-issue-close: 30
25
+ stale-issue-label: 'stale'
26
+ stale-issue-message: >
27
+ This issue has been automatically marked as stale because it has not
28
+ had any activity within 90 days. It will be automatically closed if no
29
+ further activity occurs within 30 days. Leave a comment if
30
+ you feel this issue should remain open. Thank you!
31
+ close-issue-message: >
32
+ This issue has been automatically closed due to inactivity. Please
33
+ feel free to reopen if you feel it is still relevant. Thank you!
34
+
35
+ days-before-pr-stale: 90
36
+ days-before-pr-close: 30
37
+ stale-pr-label: 'stale'
38
+ stale-pr-message: >
39
+ This pull request has been automatically marked as stale because it
40
+ has not had any activity within 90 days. It will be automatically
41
+ closed if no further activity occurs within 30 days.
42
+ close-pr-message: >
43
+ This pull request has been automatically closed due to inactivity.
44
+ Please feel free to reopen if you intend to continue working on it.
@@ -30,7 +30,7 @@ jobs:
30
30
  - name: Set Env
31
31
  run: pip3 install --upgrade pip setuptools
32
32
  - name: "⚙️ Install dependencies"
33
- run: pip3 install .[dev,accelerate]
33
+ run: pip3 install .[dev]
34
34
  - name: clean up
35
35
  run: |
36
36
  echo "cleaning up disk space as GHA runner has limited disk size."
@@ -8,7 +8,7 @@ quality:
8
8
  @echo "Running copyright checks";
9
9
  python utils/copyright.py quality $(PYCHECKGLOBS)
10
10
  @echo "Running python quality checks";
11
- black --check $(PYCHECKDIRS);
11
+ black --target-version py310 --check $(PYCHECKDIRS);
12
12
  isort --check-only $(PYCHECKDIRS);
13
13
  flake8 $(PYCHECKDIRS);
14
14
 
@@ -17,7 +17,7 @@ style:
17
17
  @echo "Running copyright style";
18
18
  python utils/copyright.py style $(PYCHECKGLOBS)
19
19
  @echo "Running python styling";
20
- black $(PYCHECKDIRS);
20
+ black --target-version py310 $(PYCHECKDIRS);
21
21
  isort $(PYCHECKDIRS);
22
22
 
23
23
  # run tests for the repo
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.13.1a20260116
3
+ Version: 0.13.1a20260127
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/vllm-project/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -8,8 +8,8 @@ Author-email: support@neuralmagic.com
8
8
  License: Apache 2.0
9
9
  Description-Content-Type: text/markdown
10
10
  License-File: LICENSE
11
- Requires-Dist: torch>=1.7.0
12
- Requires-Dist: transformers
11
+ Requires-Dist: torch<=2.9.1,>=1.7.0
12
+ Requires-Dist: transformers<5.0.0
13
13
  Requires-Dist: pydantic>=2.0
14
14
  Requires-Dist: loguru
15
15
  Provides-Extra: dev
@@ -19,6 +19,8 @@ Requires-Dist: wheel>=0.36.2; extra == "dev"
19
19
  Requires-Dist: flake8>=3.8.3; extra == "dev"
20
20
  Requires-Dist: pytest>=6.0.0; extra == "dev"
21
21
  Requires-Dist: nbconvert>=7.16.3; extra == "dev"
22
+ Requires-Dist: transformers<5.0; extra == "dev"
23
+ Requires-Dist: accelerate; extra == "dev"
22
24
  Provides-Extra: accelerate
23
25
  Requires-Dist: accelerate; extra == "accelerate"
24
26
  Dynamic: author
@@ -88,11 +88,11 @@ def _setup_packages() -> List:
88
88
  )
89
89
 
90
90
  def _setup_install_requires() -> List:
91
- return ["torch>=1.7.0", "transformers", "pydantic>=2.0", "loguru"]
91
+ return ["torch>=1.7.0,<=2.9.1", "transformers<5.0.0", "pydantic>=2.0", "loguru"]
92
92
 
93
93
  def _setup_extras() -> Dict:
94
94
  return {
95
- "dev": ["black==22.12.0", "isort==5.8.0", "wheel>=0.36.2", "flake8>=3.8.3", "pytest>=6.0.0", "nbconvert>=7.16.3"],
95
+ "dev": ["black==22.12.0", "isort==5.8.0", "wheel>=0.36.2", "flake8>=3.8.3", "pytest>=6.0.0", "nbconvert>=7.16.3", "transformers<5.0", "accelerate"],
96
96
  "accelerate": ["accelerate"]
97
97
  }
98
98
 
@@ -20,5 +20,14 @@ from .base import *
20
20
  from .compressors import *
21
21
  from .config import *
22
22
  from .quantization import QuantizationConfig, QuantizationStatus
23
- from .utils import *
23
+
24
+ # avoid resolving compressed_tensors.offload as compressed_tensors.utils.offload
25
+ from .utils.offload import *
26
+ from .utils.helpers import *
27
+ from .utils.internal import *
28
+ from .utils.match import *
29
+ from .utils.permutations_24 import *
30
+ from .utils.safetensors_load import *
31
+ from .utils.semi_structured_conversions import *
32
+ from .utils.type import *
24
33
  from .version import *
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import logging
16
+ import warnings
16
17
  from typing import Dict, Generator, Tuple
17
18
 
18
19
  import numpy as np
@@ -138,6 +139,12 @@ class Marlin24Compressor(BaseCompressor):
138
139
  :param show_progress: whether to show tqdm progress
139
140
  :return: compressed state dict
140
141
  """
142
+ warnings.warn(
143
+ "The marlin24 format is deprecated and will be removed in a "
144
+ "future release. vLLM no longer supports marlin24 models.",
145
+ DeprecationWarning,
146
+ stacklevel=2,
147
+ )
141
148
  self.validate_quant_compatability(names_to_scheme)
142
149
 
143
150
  compressed_dict = {}
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import warnings
15
16
  from typing import List, Optional
16
17
 
17
18
  import torch
@@ -68,6 +69,12 @@ def _get_quant_compression_format(
68
69
  ):
69
70
  # marlin24 kernel only applicable for channel/group quantization
70
71
  # Note: vLLM may only support group quant for marlin24
72
+ warnings.warn(
73
+ "The marlin24 format is deprecated and will be removed in a "
74
+ "future release. vLLM no longer supports marlin24 models.",
75
+ DeprecationWarning,
76
+ stacklevel=2,
77
+ )
71
78
  return CompressionFormat.marlin_24
72
79
  return CompressionFormat.pack_quantized
73
80
 
@@ -87,12 +87,6 @@ class CompressedLinear(Linear):
87
87
  # mark module as compressed
88
88
  module.quantization_status = QuantizationStatus.COMPRESSED
89
89
 
90
- # handles case where forward is wrapped in new_forward by accelerate hooks
91
- if hasattr(module, "_old_forward"):
92
- module._old_forward = CompressedLinear.forward.__get__(
93
- module, CompressedLinear
94
- )
95
-
96
90
  return module
97
91
 
98
92
  def forward(self, input: Tensor) -> Tensor:
@@ -0,0 +1,198 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import contextlib
16
+ from typing import Iterable, Optional
17
+
18
+ import torch
19
+ from compressed_tensors.offload.cache import OffloadCache
20
+ from compressed_tensors.offload.dispatch import ( # noqa: F401
21
+ dispatch_model,
22
+ offload_model,
23
+ remove_dispatch,
24
+ )
25
+ from compressed_tensors.offload.module import offload_module, unwrap_offload_forward
26
+ from compressed_tensors.offload.utils import get_module_device, move_module_tensor
27
+ from compressed_tensors.utils.helpers import patch_attr
28
+
29
+
30
+ __all__ = [
31
+ # dispatch models
32
+ "offload_model",
33
+ "dispatch_model",
34
+ "remove_dispatch",
35
+ # control movement
36
+ "disable_onloading",
37
+ "disable_offloading",
38
+ # manipulate parameters
39
+ "update_offload_parameter",
40
+ "get_execution_device",
41
+ "get_offloaded_device",
42
+ "register_offload_module",
43
+ # manipulate forward
44
+ "unwrap_offload_forward",
45
+ # backwards compatibility: should be deprecated
46
+ "align_modules",
47
+ "align_module_device",
48
+ ]
49
+
50
+
51
+ @contextlib.contextmanager
52
+ def disable_offloading():
53
+ """
54
+ When offloading is disabled, onloaded tensors remain onloaded in memory until exit
55
+
56
+ ```
57
+ with OffloadCache.disable_offloading():
58
+ ... = cache["weight"]
59
+ ... = cache["weight"] # cache hit
60
+ ... = cache["weight"] # cache hit
61
+
62
+ # upon exit, all onloaded weights are released
63
+ ```
64
+ """
65
+ with OffloadCache.disable_offloading():
66
+ yield
67
+
68
+
69
+ @contextlib.contextmanager
70
+ def disable_onloading():
71
+ """
72
+ When onloading is disabled, tensors are not offloaded on access, and assignments do
73
+ not trigger offloading. This is mostly used to disable device movement for debugging
74
+
75
+ ```
76
+ with OffloadCache.disable_onloading():
77
+ tensor = ...
78
+ cache["weight"] = tensor # assignments do not trigger onloading
79
+ cache["weight"] is tensor # tensor remains offloaded
80
+ ```
81
+ """
82
+ with OffloadCache.disable_onloading():
83
+ yield
84
+
85
+
86
+ def update_offload_parameter(module: torch.nn.Module, name: str, data: torch.Tensor):
87
+ """
88
+ Update the data of an existing parameter and its offload dict. Supports both
89
+ parameters of offloaded modules and non-offloaded modules
90
+
91
+ :param module: module containing the parameter to update
92
+ :param name: name of module parameter to update
93
+ :param data: tensor to update parameter with
94
+ """
95
+ if isinstance(module._parameters, OffloadCache):
96
+ with module._parameters.disable_onloading():
97
+ value = getattr(module, name)
98
+ value.copy_(module._parameters.offload(data))
99
+ setattr(module, name, value)
100
+
101
+ else:
102
+ getattr(module, name).copy_(data)
103
+
104
+
105
+ def get_execution_device(module: torch.nn.Module) -> torch.device | str:
106
+ """
107
+ Get the device which inputs should be moved to before module execution.
108
+
109
+ :param module: module to check, may be offloaded
110
+ :return: onload device of module
111
+ """
112
+ if isinstance(module._parameters, OffloadCache):
113
+ return module._parameters.onload_device
114
+
115
+ else:
116
+ return get_module_device(module)
117
+
118
+
119
+ def get_offloaded_device(module: torch.nn.Module) -> torch.device:
120
+ """
121
+ :param module: module to check
122
+ :return: device module is offloaded to onto after forward pass
123
+ """
124
+ with disable_onloading():
125
+ return get_module_device(module)
126
+
127
+
128
+ def register_offload_module(base: torch.nn.Module, name: str, module: torch.nn.Module):
129
+ """
130
+ Register a submodule with offloading if the parent module is offloaded
131
+
132
+ :param base: module to attach submodule to
133
+ :param name: name of submodule
134
+ :param module: submodule to attach
135
+ """
136
+ cache = base._parameters
137
+ if isinstance(cache, OffloadCache):
138
+ offload_module(module, cache.onload_device, cache.offload_device)
139
+
140
+ base.register_module(name, module)
141
+
142
+
143
+ """ Implemented for backwards compatibility """
144
+
145
+
146
+ @contextlib.contextmanager
147
+ def align_modules(
148
+ modules: torch.nn.Module | Iterable[torch.nn.Module],
149
+ execution_device: Optional[torch.device] = None,
150
+ ):
151
+ """
152
+ Context manager for onloading modules to a device, and disabling onload and offload
153
+ attempts triggered by forward calls. Used for sequential onloading of layers
154
+
155
+ :param modules: `torch.nn.Module` or iterable of `torch.nn.Module`s to onload
156
+ :param execution_device: device to onload to
157
+ """
158
+ with contextlib.ExitStack() as stack:
159
+ for module in modules:
160
+ stack.enter_context(align_module_device(module, execution_device))
161
+ yield
162
+
163
+
164
+ @contextlib.contextmanager
165
+ def align_module_device(
166
+ module: torch.nn.Module, execution_device: Optional[torch.device] = None
167
+ ):
168
+ """
169
+ Context manager that moves a module's parameters to the specified execution device.
170
+
171
+ :param module: Module with parameters to align
172
+ :param execution_device: If provided, overrides the module's execution device
173
+ within the context. Otherwise, use hook execution device or pass
174
+ """
175
+
176
+ if isinstance(module._parameters, OffloadCache):
177
+ assert isinstance(module._buffers, OffloadCache)
178
+ with module._parameters.disable_offloading():
179
+ if execution_device is not None:
180
+ with patch_attr(
181
+ module._parameters, "onload_device", execution_device
182
+ ), patch_attr(module._buffers, "onload_device", execution_device):
183
+ yield
184
+ else:
185
+ yield
186
+
187
+ else:
188
+ original_device = {}
189
+ for name, param in module.named_parameters(recurse=False):
190
+ original_device[name] = param.device
191
+ move_module_tensor(module, name, execution_device)
192
+
193
+ try:
194
+ yield
195
+ finally:
196
+ for name, param in module.named_parameters(recurse=False):
197
+ device = original_device[name]
198
+ move_module_tensor(module, name, device)
@@ -0,0 +1,17 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # flake8: noqa
15
+
16
+ from .base import OffloadCache
17
+ from .cpu import CPUCache