compressed-tensors 0.12.3a20251214__tar.gz → 0.12.3a20251215__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. {compressed_tensors-0.12.3a20251214/src/compressed_tensors.egg-info → compressed_tensors-0.12.3a20251215}/PKG-INFO +1 -1
  2. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/examples/quantize_and_pack_int4.ipynb +51 -93
  3. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/version.py +1 -1
  4. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215/src/compressed_tensors.egg-info}/PKG-INFO +1 -1
  5. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/.github/.gitkeep +0 -0
  6. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/.github/actions/test/action.yml +0 -0
  7. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/.github/scripts/step-status +0 -0
  8. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/.github/workflows/quality-check.yaml +0 -0
  9. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/.github/workflows/test-check.yaml +0 -0
  10. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/.gitignore +0 -0
  11. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/LICENSE +0 -0
  12. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/Makefile +0 -0
  13. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/README.md +0 -0
  14. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
  15. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/examples/bit_packing/int4_config.json +0 -0
  16. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/examples/bitmask_compression.ipynb +0 -0
  17. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/examples/llama_1.1b/ex_config_quantization.py +0 -0
  18. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
  19. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/examples/llama_1.1b/example_quant_config.json +0 -0
  20. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
  21. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/pyproject.toml +0 -0
  22. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/setup.cfg +0 -0
  23. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/setup.py +0 -0
  24. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/__init__.py +0 -0
  25. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/README.md +0 -0
  26. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/__init__.py +0 -0
  27. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/base.py +0 -0
  28. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/compressors/__init__.py +0 -0
  29. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/compressors/base.py +0 -0
  30. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/compressors/helpers.py +0 -0
  31. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
  32. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +0 -0
  33. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
  34. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
  35. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/compressors/quantized_compressors/fp4_quantized.py +0 -0
  36. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
  37. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
  38. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
  39. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
  40. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
  41. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
  42. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
  43. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
  44. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
  45. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/config/__init__.py +0 -0
  46. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/config/base.py +0 -0
  47. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/config/dense.py +0 -0
  48. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/config/format.py +0 -0
  49. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
  50. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  51. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/linear/__init__.py +0 -0
  52. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/linear/compressed_linear.py +0 -0
  53. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/logger.py +0 -0
  54. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/modeling/__init__.py +0 -0
  55. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/modeling/attention.py +0 -0
  56. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/modeling/kvcache.py +0 -0
  57. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/quantization/__init__.py +0 -0
  58. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  59. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
  60. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
  61. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
  62. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
  63. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
  64. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/quantization/quant_args.py +0 -0
  65. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/quantization/quant_config.py +0 -0
  66. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/quantization/quant_metadata.py +0 -0
  67. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
  68. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
  69. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
  70. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/quantization/utils/mxfp4_utils.py +0 -0
  71. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/registry/__init__.py +0 -0
  72. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/registry/registry.py +0 -0
  73. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/transform/__init__.py +0 -0
  74. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/transform/apply.py +0 -0
  75. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/transform/factory/__init__.py +0 -0
  76. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/transform/factory/base.py +0 -0
  77. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/transform/factory/hadamard.py +0 -0
  78. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/transform/factory/matrix_multiply.py +0 -0
  79. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/transform/factory/random_hadamard.py +0 -0
  80. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/transform/transform_args.py +0 -0
  81. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/transform/transform_config.py +0 -0
  82. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/transform/transform_scheme.py +0 -0
  83. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/transform/utils/__init__.py +0 -0
  84. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/transform/utils/hadamard.py +0 -0
  85. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/transform/utils/hadamards.safetensors +0 -0
  86. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/transform/utils/matrix.py +0 -0
  87. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/utils/__init__.py +0 -0
  88. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/utils/helpers.py +0 -0
  89. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/utils/internal.py +0 -0
  90. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/utils/match.py +0 -0
  91. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/utils/offload.py +0 -0
  92. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/utils/permutations_24.py +0 -0
  93. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/utils/safetensors_load.py +0 -0
  94. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
  95. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors/utils/type.py +0 -0
  96. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors.egg-info/SOURCES.txt +0 -0
  97. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
  98. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors.egg-info/requires.txt +0 -0
  99. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/src/compressed_tensors.egg-info/top_level.txt +0 -0
  100. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/__init__.py +0 -0
  101. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/conftest.py +0 -0
  102. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/mock_observer.py +0 -0
  103. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_compressors/__init__.py +0 -0
  104. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_compressors/model_compressors/__init__.py +0 -0
  105. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
  106. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
  107. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_compressors/quantized_compressors/test_fp4_quant.py +0 -0
  108. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
  109. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
  110. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
  111. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_compressors/quantized_compressors/test_packed_asym_decompression.py +0 -0
  112. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
  113. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
  114. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
  115. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
  116. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
  117. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_configs/__init__.py +0 -0
  118. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_configs/test_base.py +0 -0
  119. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_configs/test_infer_quant.py +0 -0
  120. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
  121. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_linear/__init__.py +0 -0
  122. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_linear/test_compressed_linear.py +0 -0
  123. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_modeling/test_attention_and_cache.py +0 -0
  124. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_quantization/__init__.py +0 -0
  125. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_quantization/lifecycle/__init__.py +0 -0
  126. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_quantization/lifecycle/conftest.py +0 -0
  127. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_quantization/lifecycle/test_apply.py +0 -0
  128. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
  129. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
  130. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_quantization/lifecycle/test_forward.py +0 -0
  131. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_quantization/lifecycle/test_initialize.py +0 -0
  132. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
  133. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_quantization/lifecycle/test_static_lifecycle.py +0 -0
  134. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_quantization/test_configs/__init__.py +0 -0
  135. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
  136. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_quantization/test_configs/test_strategies.py +0 -0
  137. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_quantization/test_quant_args.py +0 -0
  138. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_quantization/test_quant_config.py +0 -0
  139. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_quantization/test_quant_scheme.py +0 -0
  140. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_quantization/test_utils/test_helpers.py +0 -0
  141. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_quantization/test_utils/test_mxfp4_utils.py +0 -0
  142. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_registry.py +0 -0
  143. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_transform/conftest.py +0 -0
  144. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_transform/factory/test_correctness.py +0 -0
  145. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_transform/factory/test_memory.py +0 -0
  146. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_transform/factory/test_serialization.py +0 -0
  147. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_transform/test_transform_args.py +0 -0
  148. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_transform/test_transform_config.py +0 -0
  149. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_transform/test_transform_scheme.py +0 -0
  150. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_transform/utils/test_hadamard.py +0 -0
  151. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_utils/__init__.py +0 -0
  152. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_utils/test_helpers.py +0 -0
  153. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_utils/test_match.py +0 -0
  154. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_utils/test_offload.py +0 -0
  155. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_utils/test_safetensors_load.py +0 -0
  156. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/test_utils/test_type.py +0 -0
  157. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/tests/testing_utils.py +0 -0
  158. {compressed_tensors-0.12.3a20251214 → compressed_tensors-0.12.3a20251215}/utils/copyright.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.12.3a20251214
3
+ Version: 0.12.3a20251215
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/vllm-project/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -15,7 +15,7 @@
15
15
  },
16
16
  {
17
17
  "cell_type": "code",
18
- "execution_count": 2,
18
+ "execution_count": 12,
19
19
  "metadata": {},
20
20
  "outputs": [],
21
21
  "source": [
@@ -25,8 +25,7 @@
25
25
  "from compressed_tensors.quantization import (\n",
26
26
  " QuantizationConfig,\n",
27
27
  " QuantizationStatus,\n",
28
- " apply_quantization_config,\n",
29
- " compress_quantized_weights\n",
28
+ " apply_quantization_config\n",
30
29
  ")\n",
31
30
  "from compressed_tensors.compressors import ModelCompressor\n",
32
31
  "from transformers import AutoModelForCausalLM, AutoTokenizer, DefaultDataCollator\n",
@@ -37,51 +36,9 @@
37
36
  },
38
37
  {
39
38
  "cell_type": "code",
40
- "execution_count": 3,
39
+ "execution_count": 13,
41
40
  "metadata": {},
42
41
  "outputs": [
43
- {
44
- "data": {
45
- "application/vnd.jupyter.widget-view+json": {
46
- "model_id": "c883cdc8ecd04866bd01d61796b81c26",
47
- "version_major": 2,
48
- "version_minor": 0
49
- },
50
- "text/plain": [
51
- "config.json: 0%| | 0.00/560 [00:00<?, ?B/s]"
52
- ]
53
- },
54
- "metadata": {},
55
- "output_type": "display_data"
56
- },
57
- {
58
- "data": {
59
- "application/vnd.jupyter.widget-view+json": {
60
- "model_id": "32b18b14b6774ce7b61d2854a1ed5f49",
61
- "version_major": 2,
62
- "version_minor": 0
63
- },
64
- "text/plain": [
65
- "model.safetensors: 0%| | 0.00/4.40G [00:00<?, ?B/s]"
66
- ]
67
- },
68
- "metadata": {},
69
- "output_type": "display_data"
70
- },
71
- {
72
- "data": {
73
- "application/vnd.jupyter.widget-view+json": {
74
- "model_id": "370c6d18521a4b65833a411728be1ed7",
75
- "version_major": 2,
76
- "version_minor": 0
77
- },
78
- "text/plain": [
79
- "generation_config.json: 0%| | 0.00/129 [00:00<?, ?B/s]"
80
- ]
81
- },
82
- "metadata": {},
83
- "output_type": "display_data"
84
- },
85
42
  {
86
43
  "data": {
87
44
  "text/plain": [
@@ -113,7 +70,7 @@
113
70
  ")"
114
71
  ]
115
72
  },
116
- "execution_count": 3,
73
+ "execution_count": 13,
117
74
  "metadata": {},
118
75
  "output_type": "execute_result"
119
76
  }
@@ -122,7 +79,7 @@
122
79
  "# load a dense, unquantized tiny llama model\n",
123
80
  "device = \"cuda:0\"\n",
124
81
  "model_name = \"TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T\"\n",
125
- "model = AutoModelForCausalLM.from_pretrained(model_name, device_map=device, torch_dtype=\"auto\")\n",
82
+ "model = AutoModelForCausalLM.from_pretrained(model_name, device_map=device, torch_dtype=torch.bfloat16)\n",
126
83
  "model"
127
84
  ]
128
85
  },
@@ -139,7 +96,7 @@
139
96
  },
140
97
  {
141
98
  "cell_type": "code",
142
- "execution_count": 23,
99
+ "execution_count": 14,
143
100
  "metadata": {},
144
101
  "outputs": [],
145
102
  "source": [
@@ -164,7 +121,7 @@
164
121
  },
165
122
  {
166
123
  "cell_type": "code",
167
- "execution_count": null,
124
+ "execution_count": 15,
168
125
  "metadata": {},
169
126
  "outputs": [],
170
127
  "source": [
@@ -177,7 +134,7 @@
177
134
  },
178
135
  {
179
136
  "cell_type": "code",
180
- "execution_count": null,
137
+ "execution_count": 16,
181
138
  "metadata": {},
182
139
  "outputs": [],
183
140
  "source": [
@@ -198,14 +155,14 @@
198
155
  },
199
156
  {
200
157
  "cell_type": "code",
201
- "execution_count": 28,
158
+ "execution_count": 17,
202
159
  "metadata": {},
203
160
  "outputs": [
204
161
  {
205
162
  "name": "stderr",
206
163
  "output_type": "stream",
207
164
  "text": [
208
- "Running calibration: 512it [00:33, 15.42it/s]\n"
165
+ "Running calibration: 512it [00:58, 8.82it/s]\n"
209
166
  ]
210
167
  }
211
168
  ],
@@ -233,20 +190,24 @@
233
190
  "\n",
234
191
  "Notice that at this point, the weight itself is still a floating point and has not been quantized. \n",
235
192
  "\n",
236
- "To convert the weights to an integer type, we need to apply the `compress_quantized_weights` function. After compressing the weights, a forward pass of the model can no longer be run in PyTorch"
193
+ "To convert the weights to an integer type, we need to apply the `compress_model` function. After compressing the weights, a forward pass of the model can no longer be run in PyTorch.\n",
194
+ "\n",
195
+ "After compressing the quantized model with the `pack-quantized` format, weights are represented as logical int4 values packed into int32 containers ( `weight_packed` ), with the original shape recorded in `weight_shape`.\n",
196
+ "\n",
197
+ "This packed representation is what gets saved to disk when using ModelCompressor.compress_model(model)."
237
198
  ]
238
199
  },
239
200
  {
240
201
  "cell_type": "code",
241
- "execution_count": 29,
202
+ "execution_count": 18,
242
203
  "metadata": {},
243
204
  "outputs": [
244
205
  {
245
206
  "name": "stdout",
246
207
  "output_type": "stream",
247
208
  "text": [
248
- "Scale: tensor([17296.], device='cuda:4', dtype=torch.float16), Zero Point: tensor([0], device='cuda:4', dtype=torch.int8)\n",
249
- "Weight min: -1.587890625 max: 1.0283203125 dtype: torch.float16\n"
209
+ "Scale: tensor([-3.0465e+26], device='cuda:0', dtype=torch.bfloat16), Zero Point: tensor([0], device='cuda:0', dtype=torch.int8)\n",
210
+ "Weight min: -1.5859375 max: 1.03125 dtype: torch.bfloat16\n"
250
211
  ]
251
212
  }
252
213
  ],
@@ -262,64 +223,62 @@
262
223
  },
263
224
  {
264
225
  "cell_type": "code",
265
- "execution_count": 30,
226
+ "execution_count": 19,
266
227
  "metadata": {},
267
228
  "outputs": [
229
+ {
230
+ "name": "stderr",
231
+ "output_type": "stream",
232
+ "text": [
233
+ "Compressing model: 154it [00:02, 59.75it/s]"
234
+ ]
235
+ },
268
236
  {
269
237
  "name": "stdout",
270
238
  "output_type": "stream",
271
239
  "text": [
272
- "Scale: tensor([17296.], device='cuda:4', dtype=torch.float16), Zero Point: tensor([0], device='cuda:4', dtype=torch.int8)\n",
273
- "Weight min: 0 max: 0 dtype: torch.int8\n"
240
+ "Compressed weight scale: tensor([-3.0465e+26], device='cuda:0', dtype=torch.bfloat16), zero point: tensor([0], device='cuda:0', dtype=torch.int8)\n",
241
+ "Compressed weight dtype: torch.int32\n",
242
+ "Compressed weight shape: torch.Size([2048, 256])\n",
243
+ "Uncompressed weight shape: tensor([2048, 2048], device='cuda:0')\n"
244
+ ]
245
+ },
246
+ {
247
+ "name": "stderr",
248
+ "output_type": "stream",
249
+ "text": [
250
+ "\n"
274
251
  ]
275
252
  }
276
253
  ],
277
254
  "source": [
278
255
  "# convert quantized weights to integers\n",
279
- "model.apply(compress_quantized_weights)\n",
256
+ "compressor = ModelCompressor(quantization_config=config)\n",
257
+ "compressor.compress_model(model)\n",
280
258
  "\n",
281
259
  "state_dict = model.state_dict()\n",
282
260
  "example_layer = \"model.layers.0.self_attn.q_proj.weight\"\n",
283
261
  "scale = state_dict[example_layer + \"_scale\"]\n",
284
262
  "zero_point = state_dict[example_layer + \"_zero_point\"]\n",
285
- "weight = state_dict[example_layer]\n",
286
- "print(f\"Scale: {scale}, Zero Point: {zero_point}\")\n",
287
- "print(f\"Weight min: {torch.min(weight)} max: {torch.max(weight)} dtype: {weight.dtype}\")"
288
- ]
289
- },
290
- {
291
- "cell_type": "markdown",
292
- "metadata": {},
293
- "source": [
294
- "After compressing the quantized model, the weight matrix has a range of int4 but is stored in an int8. \n",
295
- "\n",
296
- "We can further compress the model on disk using the `pack-quantized` format we specified in the config. This compression format will pack the int4 weights into int32"
263
+ "weight = state_dict[example_layer + \"_packed\"]\n",
264
+ "shape = state_dict[example_layer + \"_shape\"]\n",
265
+ "print(f\"Compressed weight scale: {scale}, zero point: {zero_point}\")\n",
266
+ "print(f\"Compressed weight dtype: {weight.dtype}\")\n",
267
+ "print(f\"Compressed weight shape: {weight.shape}\")\n",
268
+ "print(f\"Uncompressed weight shape: {shape}\")"
297
269
  ]
298
270
  },
299
271
  {
300
272
  "cell_type": "code",
301
- "execution_count": 31,
273
+ "execution_count": 20,
302
274
  "metadata": {},
303
275
  "outputs": [
304
276
  {
305
277
  "name": "stdout",
306
278
  "output_type": "stream",
307
279
  "text": [
308
- "Compression format: pack-quantized\n"
309
- ]
310
- },
311
- {
312
- "name": "stderr",
313
- "output_type": "stream",
314
- "text": [
315
- "Quantized Compression: 100%|██████████| 509/509 [00:03<00:00, 153.70it/s]\n"
316
- ]
317
- },
318
- {
319
- "name": "stdout",
320
- "output_type": "stream",
321
- "text": [
322
- "Size of the model's weights on disk using safetensors: 712.23 MB\n"
280
+ "Compression format: pack-quantized\n",
281
+ "Size of the model's weights on disk using safetensors: 712.25 MB\n"
323
282
  ]
324
283
  }
325
284
  ],
@@ -330,9 +289,8 @@
330
289
  "compression_format = config.format\n",
331
290
  "print(f\"Compression format: {compression_format}\")\n",
332
291
  "\n",
333
- "compressor = ModelCompressor(quantization_config=config)\n",
334
- "compressed_state_dict = compressor.compress(model)\n",
335
- "model.save_pretrained(output_dir, state_dict=compressed_state_dict)\n",
292
+ "\n",
293
+ "model.save_pretrained(output_dir, state_dict=model.state_dict())\n",
336
294
  "compressor.update_config(output_dir)\n",
337
295
  "\n",
338
296
  "compressed_size_on_disk_mb = os.path.getsize(os.path.join(output_dir, \"model.safetensors\")) / 1024 / 1024\n",
@@ -356,7 +314,7 @@
356
314
  "name": "python",
357
315
  "nbconvert_exporter": "python",
358
316
  "pygments_lexer": "ipython3",
359
- "version": "3.10.12"
317
+ "version": "3.12.12"
360
318
  }
361
319
  },
362
320
  "nbformat": 4,
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.12.3.a20251214'
20
+ __version__ = version = '0.12.3.a20251215'
21
21
  __version_tuple__ = version_tuple = (0, 12, 3)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.12.3a20251214
3
+ Version: 0.12.3a20251215
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/vllm-project/compressed-tensors
6
6
  Author: Neuralmagic, Inc.