compressed-tensors 0.12.3a20251214__tar.gz → 0.13.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. {compressed_tensors-0.12.3a20251214/src/compressed_tensors.egg-info → compressed_tensors-0.13.0}/PKG-INFO +1 -1
  2. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/examples/quantize_and_pack_int4.ipynb +51 -93
  3. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/version.py +2 -2
  4. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0/src/compressed_tensors.egg-info}/PKG-INFO +1 -1
  5. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/.github/.gitkeep +0 -0
  6. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/.github/actions/test/action.yml +0 -0
  7. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/.github/scripts/step-status +0 -0
  8. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/.github/workflows/quality-check.yaml +0 -0
  9. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/.github/workflows/test-check.yaml +0 -0
  10. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/.gitignore +0 -0
  11. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/LICENSE +0 -0
  12. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/Makefile +0 -0
  13. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/README.md +0 -0
  14. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
  15. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/examples/bit_packing/int4_config.json +0 -0
  16. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/examples/bitmask_compression.ipynb +0 -0
  17. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/examples/llama_1.1b/ex_config_quantization.py +0 -0
  18. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
  19. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/examples/llama_1.1b/example_quant_config.json +0 -0
  20. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
  21. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/pyproject.toml +0 -0
  22. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/setup.cfg +0 -0
  23. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/setup.py +0 -0
  24. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/__init__.py +0 -0
  25. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/README.md +0 -0
  26. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/__init__.py +0 -0
  27. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/base.py +0 -0
  28. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/compressors/__init__.py +0 -0
  29. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/compressors/base.py +0 -0
  30. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/compressors/helpers.py +0 -0
  31. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
  32. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +0 -0
  33. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
  34. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
  35. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/compressors/quantized_compressors/fp4_quantized.py +0 -0
  36. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
  37. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
  38. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
  39. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
  40. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
  41. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
  42. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
  43. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
  44. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
  45. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/config/__init__.py +0 -0
  46. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/config/base.py +0 -0
  47. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/config/dense.py +0 -0
  48. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/config/format.py +0 -0
  49. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
  50. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  51. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/linear/__init__.py +0 -0
  52. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/linear/compressed_linear.py +0 -0
  53. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/logger.py +0 -0
  54. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/modeling/__init__.py +0 -0
  55. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/modeling/attention.py +0 -0
  56. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/modeling/kvcache.py +0 -0
  57. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/quantization/__init__.py +0 -0
  58. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  59. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
  60. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
  61. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
  62. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
  63. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
  64. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/quantization/quant_args.py +0 -0
  65. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/quantization/quant_config.py +0 -0
  66. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/quantization/quant_metadata.py +0 -0
  67. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
  68. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
  69. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
  70. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/quantization/utils/mxfp4_utils.py +0 -0
  71. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/registry/__init__.py +0 -0
  72. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/registry/registry.py +0 -0
  73. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/transform/__init__.py +0 -0
  74. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/transform/apply.py +0 -0
  75. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/transform/factory/__init__.py +0 -0
  76. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/transform/factory/base.py +0 -0
  77. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/transform/factory/hadamard.py +0 -0
  78. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/transform/factory/matrix_multiply.py +0 -0
  79. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/transform/factory/random_hadamard.py +0 -0
  80. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/transform/transform_args.py +0 -0
  81. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/transform/transform_config.py +0 -0
  82. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/transform/transform_scheme.py +0 -0
  83. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/transform/utils/__init__.py +0 -0
  84. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/transform/utils/hadamard.py +0 -0
  85. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/transform/utils/hadamards.safetensors +0 -0
  86. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/transform/utils/matrix.py +0 -0
  87. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/utils/__init__.py +0 -0
  88. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/utils/helpers.py +0 -0
  89. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/utils/internal.py +0 -0
  90. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/utils/match.py +0 -0
  91. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/utils/offload.py +0 -0
  92. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/utils/permutations_24.py +0 -0
  93. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/utils/safetensors_load.py +0 -0
  94. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
  95. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors/utils/type.py +0 -0
  96. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors.egg-info/SOURCES.txt +0 -0
  97. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
  98. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors.egg-info/requires.txt +0 -0
  99. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/src/compressed_tensors.egg-info/top_level.txt +0 -0
  100. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/__init__.py +0 -0
  101. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/conftest.py +0 -0
  102. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/mock_observer.py +0 -0
  103. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_compressors/__init__.py +0 -0
  104. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_compressors/model_compressors/__init__.py +0 -0
  105. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
  106. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
  107. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_compressors/quantized_compressors/test_fp4_quant.py +0 -0
  108. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
  109. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
  110. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
  111. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_compressors/quantized_compressors/test_packed_asym_decompression.py +0 -0
  112. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
  113. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
  114. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
  115. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
  116. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
  117. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_configs/__init__.py +0 -0
  118. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_configs/test_base.py +0 -0
  119. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_configs/test_infer_quant.py +0 -0
  120. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
  121. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_linear/__init__.py +0 -0
  122. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_linear/test_compressed_linear.py +0 -0
  123. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_modeling/test_attention_and_cache.py +0 -0
  124. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_quantization/__init__.py +0 -0
  125. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_quantization/lifecycle/__init__.py +0 -0
  126. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_quantization/lifecycle/conftest.py +0 -0
  127. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_quantization/lifecycle/test_apply.py +0 -0
  128. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
  129. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
  130. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_quantization/lifecycle/test_forward.py +0 -0
  131. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_quantization/lifecycle/test_initialize.py +0 -0
  132. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
  133. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_quantization/lifecycle/test_static_lifecycle.py +0 -0
  134. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_quantization/test_configs/__init__.py +0 -0
  135. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
  136. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_quantization/test_configs/test_strategies.py +0 -0
  137. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_quantization/test_quant_args.py +0 -0
  138. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_quantization/test_quant_config.py +0 -0
  139. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_quantization/test_quant_scheme.py +0 -0
  140. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_quantization/test_utils/test_helpers.py +0 -0
  141. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_quantization/test_utils/test_mxfp4_utils.py +0 -0
  142. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_registry.py +0 -0
  143. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_transform/conftest.py +0 -0
  144. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_transform/factory/test_correctness.py +0 -0
  145. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_transform/factory/test_memory.py +0 -0
  146. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_transform/factory/test_serialization.py +0 -0
  147. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_transform/test_transform_args.py +0 -0
  148. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_transform/test_transform_config.py +0 -0
  149. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_transform/test_transform_scheme.py +0 -0
  150. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_transform/utils/test_hadamard.py +0 -0
  151. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_utils/__init__.py +0 -0
  152. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_utils/test_helpers.py +0 -0
  153. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_utils/test_match.py +0 -0
  154. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_utils/test_offload.py +0 -0
  155. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_utils/test_safetensors_load.py +0 -0
  156. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/test_utils/test_type.py +0 -0
  157. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/tests/testing_utils.py +0 -0
  158. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.13.0}/utils/copyright.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.12.3a20251214
3
+ Version: 0.13.0
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/vllm-project/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -15,7 +15,7 @@
15
15
  },
16
16
  {
17
17
  "cell_type": "code",
18
- "execution_count": 2,
18
+ "execution_count": 12,
19
19
  "metadata": {},
20
20
  "outputs": [],
21
21
  "source": [
@@ -25,8 +25,7 @@
25
25
  "from compressed_tensors.quantization import (\n",
26
26
  " QuantizationConfig,\n",
27
27
  " QuantizationStatus,\n",
28
- " apply_quantization_config,\n",
29
- " compress_quantized_weights\n",
28
+ " apply_quantization_config\n",
30
29
  ")\n",
31
30
  "from compressed_tensors.compressors import ModelCompressor\n",
32
31
  "from transformers import AutoModelForCausalLM, AutoTokenizer, DefaultDataCollator\n",
@@ -37,51 +36,9 @@
37
36
  },
38
37
  {
39
38
  "cell_type": "code",
40
- "execution_count": 3,
39
+ "execution_count": 13,
41
40
  "metadata": {},
42
41
  "outputs": [
43
- {
44
- "data": {
45
- "application/vnd.jupyter.widget-view+json": {
46
- "model_id": "c883cdc8ecd04866bd01d61796b81c26",
47
- "version_major": 2,
48
- "version_minor": 0
49
- },
50
- "text/plain": [
51
- "config.json: 0%| | 0.00/560 [00:00<?, ?B/s]"
52
- ]
53
- },
54
- "metadata": {},
55
- "output_type": "display_data"
56
- },
57
- {
58
- "data": {
59
- "application/vnd.jupyter.widget-view+json": {
60
- "model_id": "32b18b14b6774ce7b61d2854a1ed5f49",
61
- "version_major": 2,
62
- "version_minor": 0
63
- },
64
- "text/plain": [
65
- "model.safetensors: 0%| | 0.00/4.40G [00:00<?, ?B/s]"
66
- ]
67
- },
68
- "metadata": {},
69
- "output_type": "display_data"
70
- },
71
- {
72
- "data": {
73
- "application/vnd.jupyter.widget-view+json": {
74
- "model_id": "370c6d18521a4b65833a411728be1ed7",
75
- "version_major": 2,
76
- "version_minor": 0
77
- },
78
- "text/plain": [
79
- "generation_config.json: 0%| | 0.00/129 [00:00<?, ?B/s]"
80
- ]
81
- },
82
- "metadata": {},
83
- "output_type": "display_data"
84
- },
85
42
  {
86
43
  "data": {
87
44
  "text/plain": [
@@ -113,7 +70,7 @@
113
70
  ")"
114
71
  ]
115
72
  },
116
- "execution_count": 3,
73
+ "execution_count": 13,
117
74
  "metadata": {},
118
75
  "output_type": "execute_result"
119
76
  }
@@ -122,7 +79,7 @@
122
79
  "# load a dense, unquantized tiny llama model\n",
123
80
  "device = \"cuda:0\"\n",
124
81
  "model_name = \"TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T\"\n",
125
- "model = AutoModelForCausalLM.from_pretrained(model_name, device_map=device, torch_dtype=\"auto\")\n",
82
+ "model = AutoModelForCausalLM.from_pretrained(model_name, device_map=device, torch_dtype=torch.bfloat16)\n",
126
83
  "model"
127
84
  ]
128
85
  },
@@ -139,7 +96,7 @@
139
96
  },
140
97
  {
141
98
  "cell_type": "code",
142
- "execution_count": 23,
99
+ "execution_count": 14,
143
100
  "metadata": {},
144
101
  "outputs": [],
145
102
  "source": [
@@ -164,7 +121,7 @@
164
121
  },
165
122
  {
166
123
  "cell_type": "code",
167
- "execution_count": null,
124
+ "execution_count": 15,
168
125
  "metadata": {},
169
126
  "outputs": [],
170
127
  "source": [
@@ -177,7 +134,7 @@
177
134
  },
178
135
  {
179
136
  "cell_type": "code",
180
- "execution_count": null,
137
+ "execution_count": 16,
181
138
  "metadata": {},
182
139
  "outputs": [],
183
140
  "source": [
@@ -198,14 +155,14 @@
198
155
  },
199
156
  {
200
157
  "cell_type": "code",
201
- "execution_count": 28,
158
+ "execution_count": 17,
202
159
  "metadata": {},
203
160
  "outputs": [
204
161
  {
205
162
  "name": "stderr",
206
163
  "output_type": "stream",
207
164
  "text": [
208
- "Running calibration: 512it [00:33, 15.42it/s]\n"
165
+ "Running calibration: 512it [00:58, 8.82it/s]\n"
209
166
  ]
210
167
  }
211
168
  ],
@@ -233,20 +190,24 @@
233
190
  "\n",
234
191
  "Notice that at this point, the weight itself is still a floating point and has not been quantized. \n",
235
192
  "\n",
236
- "To convert the weights to an integer type, we need to apply the `compress_quantized_weights` function. After compressing the weights, a forward pass of the model can no longer be run in PyTorch"
193
+ "To convert the weights to an integer type, we need to apply the `compress_model` function. After compressing the weights, a forward pass of the model can no longer be run in PyTorch.\n",
194
+ "\n",
195
+ "After compressing the quantized model with the `pack-quantized` format, weights are represented as logical int4 values packed into int32 containers ( `weight_packed` ), with the original shape recorded in `weight_shape`.\n",
196
+ "\n",
197
+ "This packed representation is what gets saved to disk when using ModelCompressor.compress_model(model)."
237
198
  ]
238
199
  },
239
200
  {
240
201
  "cell_type": "code",
241
- "execution_count": 29,
202
+ "execution_count": 18,
242
203
  "metadata": {},
243
204
  "outputs": [
244
205
  {
245
206
  "name": "stdout",
246
207
  "output_type": "stream",
247
208
  "text": [
248
- "Scale: tensor([17296.], device='cuda:4', dtype=torch.float16), Zero Point: tensor([0], device='cuda:4', dtype=torch.int8)\n",
249
- "Weight min: -1.587890625 max: 1.0283203125 dtype: torch.float16\n"
209
+ "Scale: tensor([-3.0465e+26], device='cuda:0', dtype=torch.bfloat16), Zero Point: tensor([0], device='cuda:0', dtype=torch.int8)\n",
210
+ "Weight min: -1.5859375 max: 1.03125 dtype: torch.bfloat16\n"
250
211
  ]
251
212
  }
252
213
  ],
@@ -262,64 +223,62 @@
262
223
  },
263
224
  {
264
225
  "cell_type": "code",
265
- "execution_count": 30,
226
+ "execution_count": 19,
266
227
  "metadata": {},
267
228
  "outputs": [
229
+ {
230
+ "name": "stderr",
231
+ "output_type": "stream",
232
+ "text": [
233
+ "Compressing model: 154it [00:02, 59.75it/s]"
234
+ ]
235
+ },
268
236
  {
269
237
  "name": "stdout",
270
238
  "output_type": "stream",
271
239
  "text": [
272
- "Scale: tensor([17296.], device='cuda:4', dtype=torch.float16), Zero Point: tensor([0], device='cuda:4', dtype=torch.int8)\n",
273
- "Weight min: 0 max: 0 dtype: torch.int8\n"
240
+ "Compressed weight scale: tensor([-3.0465e+26], device='cuda:0', dtype=torch.bfloat16), zero point: tensor([0], device='cuda:0', dtype=torch.int8)\n",
241
+ "Compressed weight dtype: torch.int32\n",
242
+ "Compressed weight shape: torch.Size([2048, 256])\n",
243
+ "Uncompressed weight shape: tensor([2048, 2048], device='cuda:0')\n"
244
+ ]
245
+ },
246
+ {
247
+ "name": "stderr",
248
+ "output_type": "stream",
249
+ "text": [
250
+ "\n"
274
251
  ]
275
252
  }
276
253
  ],
277
254
  "source": [
278
255
  "# convert quantized weights to integers\n",
279
- "model.apply(compress_quantized_weights)\n",
256
+ "compressor = ModelCompressor(quantization_config=config)\n",
257
+ "compressor.compress_model(model)\n",
280
258
  "\n",
281
259
  "state_dict = model.state_dict()\n",
282
260
  "example_layer = \"model.layers.0.self_attn.q_proj.weight\"\n",
283
261
  "scale = state_dict[example_layer + \"_scale\"]\n",
284
262
  "zero_point = state_dict[example_layer + \"_zero_point\"]\n",
285
- "weight = state_dict[example_layer]\n",
286
- "print(f\"Scale: {scale}, Zero Point: {zero_point}\")\n",
287
- "print(f\"Weight min: {torch.min(weight)} max: {torch.max(weight)} dtype: {weight.dtype}\")"
288
- ]
289
- },
290
- {
291
- "cell_type": "markdown",
292
- "metadata": {},
293
- "source": [
294
- "After compressing the quantized model, the weight matrix has a range of int4 but is stored in an int8. \n",
295
- "\n",
296
- "We can further compress the model on disk using the `pack-quantized` format we specified in the config. This compression format will pack the int4 weights into int32"
263
+ "weight = state_dict[example_layer + \"_packed\"]\n",
264
+ "shape = state_dict[example_layer + \"_shape\"]\n",
265
+ "print(f\"Compressed weight scale: {scale}, zero point: {zero_point}\")\n",
266
+ "print(f\"Compressed weight dtype: {weight.dtype}\")\n",
267
+ "print(f\"Compressed weight shape: {weight.shape}\")\n",
268
+ "print(f\"Uncompressed weight shape: {shape}\")"
297
269
  ]
298
270
  },
299
271
  {
300
272
  "cell_type": "code",
301
- "execution_count": 31,
273
+ "execution_count": 20,
302
274
  "metadata": {},
303
275
  "outputs": [
304
276
  {
305
277
  "name": "stdout",
306
278
  "output_type": "stream",
307
279
  "text": [
308
- "Compression format: pack-quantized\n"
309
- ]
310
- },
311
- {
312
- "name": "stderr",
313
- "output_type": "stream",
314
- "text": [
315
- "Quantized Compression: 100%|██████████| 509/509 [00:03<00:00, 153.70it/s]\n"
316
- ]
317
- },
318
- {
319
- "name": "stdout",
320
- "output_type": "stream",
321
- "text": [
322
- "Size of the model's weights on disk using safetensors: 712.23 MB\n"
280
+ "Compression format: pack-quantized\n",
281
+ "Size of the model's weights on disk using safetensors: 712.25 MB\n"
323
282
  ]
324
283
  }
325
284
  ],
@@ -330,9 +289,8 @@
330
289
  "compression_format = config.format\n",
331
290
  "print(f\"Compression format: {compression_format}\")\n",
332
291
  "\n",
333
- "compressor = ModelCompressor(quantization_config=config)\n",
334
- "compressed_state_dict = compressor.compress(model)\n",
335
- "model.save_pretrained(output_dir, state_dict=compressed_state_dict)\n",
292
+ "\n",
293
+ "model.save_pretrained(output_dir, state_dict=model.state_dict())\n",
336
294
  "compressor.update_config(output_dir)\n",
337
295
  "\n",
338
296
  "compressed_size_on_disk_mb = os.path.getsize(os.path.join(output_dir, \"model.safetensors\")) / 1024 / 1024\n",
@@ -356,7 +314,7 @@
356
314
  "name": "python",
357
315
  "nbconvert_exporter": "python",
358
316
  "pygments_lexer": "ipython3",
359
- "version": "3.10.12"
317
+ "version": "3.12.12"
360
318
  }
361
319
  },
362
320
  "nbformat": 4,
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.12.3.a20251214'
21
- __version_tuple__ = version_tuple = (0, 12, 3)
20
+ __version__ = version = '0.13.0'
21
+ __version_tuple__ = version_tuple = (0, 13, 0)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.12.3a20251214
3
+ Version: 0.13.0
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/vllm-project/compressed-tensors
6
6
  Author: Neuralmagic, Inc.