compressed-tensors 0.11.1a20250821__tar.gz → 0.11.1a20250902__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/.github/workflows/test.yml +10 -0
  2. {compressed_tensors-0.11.1a20250821/src/compressed_tensors.egg-info → compressed_tensors-0.11.1a20250902}/PKG-INFO +1 -1
  3. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/examples/llama_1.1b/ex_llmcompressor_quantization.py +1 -1
  4. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +6 -3
  5. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py +5 -4
  6. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/quantization/lifecycle/apply.py +14 -19
  7. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/version.py +1 -1
  8. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902/src/compressed_tensors.egg-info}/PKG-INFO +1 -1
  9. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/.github/.gitkeep +0 -0
  10. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/.github/actions/test/action.yml +0 -0
  11. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/.github/scripts/step-status +0 -0
  12. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/.github/workflows/build-test.yml +0 -0
  13. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/.github/workflows/build.yml +0 -0
  14. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/.github/workflows/quality-check.yaml +0 -0
  15. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/.github/workflows/report.yml +0 -0
  16. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/.github/workflows/test-check.yaml +0 -0
  17. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/.github/workflows/trigger-all.yml +0 -0
  18. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/.github/workflows/upload.yml +0 -0
  19. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/.gitignore +0 -0
  20. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/LICENSE +0 -0
  21. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/Makefile +0 -0
  22. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/README.md +0 -0
  23. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
  24. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/examples/bit_packing/int4_config.json +0 -0
  25. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/examples/bitmask_compression.ipynb +0 -0
  26. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/examples/llama_1.1b/ex_config_quantization.py +0 -0
  27. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/examples/llama_1.1b/example_quant_config.json +0 -0
  28. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
  29. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/examples/quantize_and_pack_int4.ipynb +0 -0
  30. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/pyproject.toml +0 -0
  31. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/setup.cfg +0 -0
  32. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/setup.py +0 -0
  33. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/__init__.py +0 -0
  34. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/README.md +0 -0
  35. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/__init__.py +0 -0
  36. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/base.py +0 -0
  37. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/compressors/__init__.py +0 -0
  38. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/compressors/base.py +0 -0
  39. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/compressors/helpers.py +0 -0
  40. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
  41. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
  42. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
  43. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
  44. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
  45. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
  46. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
  47. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
  48. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
  49. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
  50. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
  51. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
  52. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/config/__init__.py +0 -0
  53. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/config/base.py +0 -0
  54. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/config/dense.py +0 -0
  55. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
  56. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  57. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/linear/__init__.py +0 -0
  58. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/linear/compressed_linear.py +0 -0
  59. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/quantization/__init__.py +0 -0
  60. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  61. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
  62. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
  63. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
  64. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
  65. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/quantization/quant_args.py +0 -0
  66. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/quantization/quant_config.py +0 -0
  67. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
  68. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
  69. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
  70. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/registry/__init__.py +0 -0
  71. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/registry/registry.py +0 -0
  72. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/transform/__init__.py +0 -0
  73. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/transform/apply.py +0 -0
  74. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/transform/factory/__init__.py +0 -0
  75. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/transform/factory/base.py +0 -0
  76. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/transform/factory/hadamard.py +0 -0
  77. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/transform/factory/matrix_multiply.py +0 -0
  78. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/transform/factory/random_hadamard.py +0 -0
  79. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/transform/transform_args.py +0 -0
  80. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/transform/transform_config.py +0 -0
  81. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/transform/transform_scheme.py +0 -0
  82. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/transform/utils/__init__.py +0 -0
  83. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/transform/utils/hadamard.py +0 -0
  84. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/transform/utils/hadamards.safetensors +0 -0
  85. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/transform/utils/matrix.py +0 -0
  86. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/utils/__init__.py +0 -0
  87. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/utils/helpers.py +0 -0
  88. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/utils/internal.py +0 -0
  89. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/utils/match.py +0 -0
  90. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/utils/offload.py +0 -0
  91. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/utils/permutations_24.py +0 -0
  92. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/utils/permute.py +0 -0
  93. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/utils/safetensors_load.py +0 -0
  94. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
  95. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors/utils/type.py +0 -0
  96. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors.egg-info/SOURCES.txt +0 -0
  97. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
  98. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors.egg-info/requires.txt +0 -0
  99. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/src/compressed_tensors.egg-info/top_level.txt +0 -0
  100. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/__init__.py +0 -0
  101. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/conftest.py +0 -0
  102. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_compressors/__init__.py +0 -0
  103. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_compressors/model_compressors/__init__.py +0 -0
  104. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
  105. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
  106. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
  107. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
  108. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_compressors/quantized_compressors/test_nvfp4_quant.py +0 -0
  109. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
  110. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
  111. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
  112. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
  113. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
  114. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
  115. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_configs/__init__.py +0 -0
  116. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_configs/test_base.py +0 -0
  117. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
  118. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_linear/__init__.py +0 -0
  119. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_linear/test_compressed_linear.py +0 -0
  120. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_quantization/__init__.py +0 -0
  121. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_quantization/lifecycle/__init__.py +0 -0
  122. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_quantization/lifecycle/conftest.py +0 -0
  123. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_quantization/lifecycle/test_apply.py +0 -0
  124. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
  125. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
  126. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_quantization/lifecycle/test_forward.py +0 -0
  127. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_quantization/lifecycle/test_helpers.py +0 -0
  128. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_quantization/lifecycle/test_initialize.py +0 -0
  129. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
  130. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_quantization/test_configs/__init__.py +0 -0
  131. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
  132. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_quantization/test_configs/test_strategies.py +0 -0
  133. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_quantization/test_quant_args.py +0 -0
  134. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_quantization/test_quant_config.py +0 -0
  135. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_quantization/test_quant_scheme.py +0 -0
  136. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_quantization/test_utils/test_helpers.py +0 -0
  137. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_registry.py +0 -0
  138. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_transform/conftest.py +0 -0
  139. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_transform/factory/test_correctness.py +0 -0
  140. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_transform/factory/test_memory.py +0 -0
  141. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_transform/factory/test_serialization.py +0 -0
  142. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_transform/test_transform_args.py +0 -0
  143. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_transform/test_transform_config.py +0 -0
  144. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_transform/test_transform_scheme.py +0 -0
  145. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_transform/utils/test_hadamard.py +0 -0
  146. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_utils/__init__.py +0 -0
  147. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_utils/test_helpers.py +0 -0
  148. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_utils/test_match.py +0 -0
  149. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_utils/test_offload.py +0 -0
  150. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_utils/test_safetensors_load.py +0 -0
  151. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/test_utils/test_type.py +0 -0
  152. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/tests/testing_utils.py +0 -0
  153. {compressed_tensors-0.11.1a20250821 → compressed_tensors-0.11.1a20250902}/utils/copyright.py +0 -0
@@ -89,6 +89,16 @@ jobs:
89
89
  with:
90
90
  python-version: ${{ inputs.python }}
91
91
 
92
+ - name: install system dependencies
93
+ run: |
94
+ if command -v g++ >/dev/null 2>&1; then
95
+ echo "found g++ compiler"
96
+ else
97
+ echo "installing g++ etc compilers..."
98
+ sudo apt update && sudo apt install -y g++ gcc
99
+ fi
100
+ shell: bash
101
+
92
102
  - name: checkout code
93
103
  id: checkout
94
104
  uses: actions/checkout@v4
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.11.1a20250821
3
+ Version: 0.11.1a20250902
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -24,7 +24,7 @@
24
24
  from pathlib import Path
25
25
 
26
26
  import torch
27
- from llmcompressor.transformers import oneshot
27
+ from llmcompressor import oneshot
28
28
 
29
29
 
30
30
  recipe = str(Path(__file__).parent / "example_quant_recipe.yaml")
@@ -703,9 +703,12 @@ class ModelCompressor:
703
703
  with override_quantization_status(
704
704
  self.quantization_config, QuantizationStatus.FROZEN
705
705
  ):
706
- names_to_scheme = apply_quantization_config(
707
- model, self.quantization_config
708
- )
706
+ apply_quantization_config(model, self.quantization_config)
707
+ names_to_scheme: Set[QuantizationScheme] = {
708
+ name: getattr(module, "quantization_scheme")
709
+ for name, module in model.named_modules()
710
+ if getattr(module, "quantization_scheme", None) is not None
711
+ }
709
712
  # Load activation scales/zp or any other quantization parameters
710
713
  # Conditionally load the weight quantization parameters if we have a
711
714
  # dense compressor or if a sparsity compressor has already been applied
@@ -123,6 +123,7 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
123
123
  return decompressed_weight
124
124
 
125
125
 
126
+ @torch.compile(fullgraph=True, dynamic=True)
126
127
  def pack_fp4_to_uint8(x: torch.Tensor) -> torch.Tensor:
127
128
  """
128
129
  Packs a tensor with values in the fp4 range into uint8.
@@ -145,12 +146,11 @@ def pack_fp4_to_uint8(x: torch.Tensor) -> torch.Tensor:
145
146
 
146
147
  # Find closest valid FP4 value index for each element
147
148
  abs_x = torch.abs(x)
148
- abs_indices = torch.zeros_like(abs_x, dtype=torch.long)
149
- for i, val in enumerate(kE2M1):
150
- abs_indices = torch.where(torch.isclose(abs_x, val), i, abs_indices)
149
+ abs_diff_x = torch.abs(abs_x.unsqueeze(-1) - kE2M1) # [m, n, 8]
150
+ abs_indices = torch.argmin(abs_diff_x, dim=-1) # [m, n]
151
151
 
152
152
  # Apply sign bit (bit 3) to get final 4-bit representation
153
- indices = abs_indices + (torch.signbit(x) << 3).to(torch.long)
153
+ indices = abs_indices + (torch.signbit(x).to(torch.long) << 3)
154
154
 
155
155
  # Reshape to prepare for packing pairs of values
156
156
  indices = indices.reshape(-1)
@@ -174,6 +174,7 @@ kE2M1ToFloat = torch.tensor(
174
174
 
175
175
 
176
176
  # reference: : https://github.com/vllm-project/vllm/pull/16362
177
+ @torch.compile(fullgraph=True, dynamic=True)
177
178
  def unpack_fp4_from_uint8(
178
179
  a: torch.Tensor, m: int, n: int, dtype: Optional[torch.dtype] = torch.bfloat16
179
180
  ) -> torch.Tensor:
@@ -115,7 +115,7 @@ def load_pretrained_quantization_parameters(
115
115
 
116
116
  def apply_quantization_config(
117
117
  model: Module, config: Union[QuantizationConfig, None], run_compressed: bool = False
118
- ) -> Dict[str, QuantizationScheme]:
118
+ ):
119
119
  """
120
120
  Initializes the model for quantization in-place based on the given config.
121
121
  Optionally coverts quantizable modules to compressed_linear modules
@@ -125,26 +125,22 @@ def apply_quantization_config(
125
125
  :param run_compressed: Whether the model will be run in compressed mode or
126
126
  decompressed fully on load
127
127
  """
128
- # Workaround for when HF Quantizer passes None, see PR #180
129
- if config is None:
130
- return dict()
128
+ from compressed_tensors.linear.compressed_linear import CompressedLinear
131
129
 
132
- # remove reference to the original `config`
133
- # argument. This function can mutate it, and we'd
134
- # like to keep the original `config` as it is.
135
130
  config = deepcopy(config)
131
+ if config is None: # see PR #180
132
+ return dict()
133
+
134
+ # preprocess to support kv cache scheme
135
+ config = process_quantization_config(config)
136
+
136
137
  # build mapping of targets to schemes for easier matching
137
138
  # use ordered dict to preserve target ordering in config
138
139
  target_to_scheme = OrderedDict()
139
- config = process_quantization_config(config)
140
- names_to_scheme = dict()
141
140
  for scheme in config.config_groups.values():
142
141
  for target in scheme.targets:
143
142
  target_to_scheme[target] = scheme
144
143
 
145
- if run_compressed:
146
- from compressed_tensors.linear.compressed_linear import CompressedLinear
147
-
148
144
  # mark appropriate layers for quantization by setting their quantization schemes
149
145
  for name, submodule in match_named_modules(
150
146
  model, target_to_scheme, config.ignore, warn_on_fail=True
@@ -153,7 +149,12 @@ def apply_quantization_config(
153
149
  # quant scheme to the matching layers
154
150
  matched_targets = match_targets(name, submodule, target_to_scheme)
155
151
  scheme = _scheme_from_targets(target_to_scheme, matched_targets, name)
156
- if run_compressed:
152
+ # target matched - add layer and scheme to target list
153
+ submodule.quantization_scheme = scheme
154
+
155
+ # replace with run compressed if applicable
156
+ # FUTURE: move this to model compressor
157
+ if isinstance(submodule, torch.nn.Linear) and run_compressed:
157
158
  format = config.format
158
159
  if format != CompressionFormat.dense.value:
159
160
  if isinstance(submodule, torch.nn.Linear):
@@ -165,14 +166,8 @@ def apply_quantization_config(
165
166
  )
166
167
  replace_module(model, name, compressed_linear)
167
168
 
168
- # target matched - add layer and scheme to target list
169
- submodule.quantization_scheme = scheme
170
-
171
- names_to_scheme[name] = submodule.quantization_scheme
172
-
173
169
  # apply current quantization status across all targeted layers
174
170
  apply_quantization_status(model, config.quantization_status)
175
- return names_to_scheme
176
171
 
177
172
 
178
173
  def process_quantization_config(config: QuantizationConfig) -> QuantizationConfig:
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.11.1.a20250821'
20
+ __version__ = version = '0.11.1.a20250902'
21
21
  __version_tuple__ = version_tuple = (0, 11, 1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.11.1a20250821
3
+ Version: 0.11.1a20250902
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.