compressed-tensors 0.12.3a20251008__tar.gz → 0.12.3a20251010__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/.github/workflows/test.yml +1 -1
  2. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/.github/workflows/trigger-all.yml +1 -1
  3. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/PKG-INFO +1 -1
  4. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/base.py +0 -3
  5. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/lifecycle/forward.py +1 -1
  6. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/lifecycle/initialize.py +10 -3
  7. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/quant_args.py +8 -0
  8. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/quant_scheme.py +1 -0
  9. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/version.py +1 -1
  10. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors.egg-info/PKG-INFO +1 -1
  11. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors.egg-info/SOURCES.txt +2 -0
  12. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/conftest.py +0 -21
  13. compressed_tensors-0.12.3a20251010/tests/mock_observer.py +173 -0
  14. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/lifecycle/test_initialize.py +0 -7
  15. compressed_tensors-0.12.3a20251010/tests/test_quantization/lifecycle/test_static_lifecycle.py +388 -0
  16. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/test_configs/test_strategies.py +0 -31
  17. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/test_utils/test_helpers.py +0 -1
  18. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/.github/.gitkeep +0 -0
  19. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/.github/actions/test/action.yml +0 -0
  20. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/.github/scripts/step-status +0 -0
  21. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/.github/workflows/build-test.yml +0 -0
  22. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/.github/workflows/build.yml +0 -0
  23. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/.github/workflows/post-release-nightly-build.yml +0 -0
  24. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/.github/workflows/quality-check.yaml +0 -0
  25. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/.github/workflows/report.yml +0 -0
  26. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/.github/workflows/test-check.yaml +0 -0
  27. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/.github/workflows/upload.yml +0 -0
  28. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/.gitignore +0 -0
  29. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/LICENSE +0 -0
  30. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/Makefile +0 -0
  31. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/README.md +0 -0
  32. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
  33. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/examples/bit_packing/int4_config.json +0 -0
  34. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/examples/bitmask_compression.ipynb +0 -0
  35. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/examples/llama_1.1b/ex_config_quantization.py +0 -0
  36. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
  37. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/examples/llama_1.1b/example_quant_config.json +0 -0
  38. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
  39. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/examples/quantize_and_pack_int4.ipynb +0 -0
  40. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/pyproject.toml +0 -0
  41. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/setup.cfg +0 -0
  42. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/setup.py +0 -0
  43. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/__init__.py +0 -0
  44. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/README.md +0 -0
  45. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/__init__.py +0 -0
  46. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/__init__.py +0 -0
  47. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/base.py +0 -0
  48. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/helpers.py +0 -0
  49. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
  50. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +0 -0
  51. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
  52. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
  53. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
  54. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py +0 -0
  55. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
  56. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
  57. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
  58. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
  59. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
  60. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
  61. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
  62. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
  63. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/config/__init__.py +0 -0
  64. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/config/base.py +0 -0
  65. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/config/dense.py +0 -0
  66. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/config/format.py +0 -0
  67. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
  68. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  69. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/linear/__init__.py +0 -0
  70. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/linear/compressed_linear.py +0 -0
  71. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/logger.py +0 -0
  72. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/__init__.py +0 -0
  73. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  74. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
  75. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
  76. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
  77. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/quant_config.py +0 -0
  78. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/quant_metadata.py +0 -0
  79. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
  80. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
  81. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/registry/__init__.py +0 -0
  82. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/registry/registry.py +0 -0
  83. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/__init__.py +0 -0
  84. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/apply.py +0 -0
  85. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/factory/__init__.py +0 -0
  86. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/factory/base.py +0 -0
  87. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/factory/hadamard.py +0 -0
  88. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/factory/matrix_multiply.py +0 -0
  89. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/factory/random_hadamard.py +0 -0
  90. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/transform_args.py +0 -0
  91. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/transform_config.py +0 -0
  92. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/transform_scheme.py +0 -0
  93. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/utils/__init__.py +0 -0
  94. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/utils/hadamard.py +0 -0
  95. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/utils/hadamards.safetensors +0 -0
  96. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/transform/utils/matrix.py +0 -0
  97. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/utils/__init__.py +0 -0
  98. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/utils/helpers.py +0 -0
  99. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/utils/internal.py +0 -0
  100. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/utils/match.py +0 -0
  101. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/utils/offload.py +0 -0
  102. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/utils/permutations_24.py +0 -0
  103. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/utils/safetensors_load.py +0 -0
  104. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
  105. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors/utils/type.py +0 -0
  106. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
  107. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors.egg-info/requires.txt +0 -0
  108. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/src/compressed_tensors.egg-info/top_level.txt +0 -0
  109. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/__init__.py +0 -0
  110. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/__init__.py +0 -0
  111. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/model_compressors/__init__.py +0 -0
  112. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
  113. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
  114. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
  115. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
  116. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/quantized_compressors/test_nvfp4_quant.py +0 -0
  117. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
  118. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
  119. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
  120. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
  121. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
  122. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
  123. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_configs/__init__.py +0 -0
  124. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_configs/test_base.py +0 -0
  125. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_configs/test_infer_quant.py +0 -0
  126. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
  127. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_linear/__init__.py +0 -0
  128. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_linear/test_compressed_linear.py +0 -0
  129. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/__init__.py +0 -0
  130. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/lifecycle/__init__.py +0 -0
  131. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/lifecycle/conftest.py +0 -0
  132. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/lifecycle/test_apply.py +0 -0
  133. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
  134. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
  135. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/lifecycle/test_forward.py +0 -0
  136. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
  137. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/test_configs/__init__.py +0 -0
  138. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
  139. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/test_quant_args.py +0 -0
  140. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/test_quant_config.py +0 -0
  141. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_quantization/test_quant_scheme.py +0 -0
  142. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_registry.py +0 -0
  143. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_transform/conftest.py +0 -0
  144. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_transform/factory/test_correctness.py +0 -0
  145. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_transform/factory/test_memory.py +0 -0
  146. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_transform/factory/test_serialization.py +0 -0
  147. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_transform/test_transform_args.py +0 -0
  148. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_transform/test_transform_config.py +0 -0
  149. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_transform/test_transform_scheme.py +0 -0
  150. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_transform/utils/test_hadamard.py +0 -0
  151. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_utils/__init__.py +0 -0
  152. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_utils/test_helpers.py +0 -0
  153. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_utils/test_match.py +0 -0
  154. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_utils/test_offload.py +0 -0
  155. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_utils/test_safetensors_load.py +0 -0
  156. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/test_utils/test_type.py +0 -0
  157. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/tests/testing_utils.py +0 -0
  158. {compressed_tensors-0.12.3a20251008 → compressed_tensors-0.12.3a20251010}/utils/copyright.py +0 -0
@@ -82,7 +82,7 @@ jobs:
82
82
 
83
83
  - name: set python
84
84
  id: set_python
85
- uses: actions/setup-python@v5
85
+ uses: actions/setup-python@v6
86
86
  with:
87
87
  python-version: ${{ inputs.python }}
88
88
 
@@ -49,6 +49,6 @@ jobs:
49
49
  push_to_pypi: ${{ (github.event.schedule == '30 0 * * *') || inputs.push_to_pypi || false }}
50
50
  test_configs: '[{"python":"3.11.4","label":"k8s-util","timeout":"40","code_coverage":true},
51
51
  {"python":"3.10.12","label":"k8s-util","timeout":"40"},
52
- {"python":"3.9.17","label":"k8s-h100-solo","timeout":"40"},
52
+ {"python":"3.13","label":"k8s-h100-solo","timeout":"40"},
53
53
  {"python":"3.12.6","label":"k8s-a100-duo","timeout":"40"}]'
54
54
  secrets: inherit
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.12.3a20251008
3
+ Version: 0.12.3a20251010
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -20,6 +20,3 @@ TRANSFORM_CONFIG_NAME = "transform_config"
20
20
  # required fields
21
21
  COMPRESSION_VERSION_NAME = "version"
22
22
  QUANTIZATION_METHOD_NAME = "quant_method"
23
-
24
- # auxillary configs
25
- KV_CACHE_SCHEME_NAME = "kv_cache_scheme"
@@ -330,7 +330,7 @@ def _process_quantization(
330
330
  inv_perm = torch.argsort(perm)
331
331
  output = output.index_select(-1, inv_perm)
332
332
 
333
- else: # covers channel, token and tensor strategies
333
+ else: # covers tensor, channel, token, and attn_head strategies
334
334
  if do_quantize:
335
335
  output = _quantize(
336
336
  x=x,
@@ -14,7 +14,7 @@
14
14
 
15
15
 
16
16
  import logging
17
- from typing import Optional, Tuple
17
+ from typing import Optional, Tuple, Union
18
18
 
19
19
  import torch
20
20
  from compressed_tensors.quantization import (
@@ -152,7 +152,7 @@ def initialize_qparams(
152
152
  module: Module,
153
153
  base_name: str,
154
154
  quantization_args: QuantizationArgs,
155
- observed_shape: Tuple[int],
155
+ observed_shape: Tuple[Union[int, None]],
156
156
  observed_dtype: torch.dtype,
157
157
  force_zero_point: bool = True,
158
158
  ):
@@ -199,7 +199,7 @@ def initialize_qparams(
199
199
  expected_shape = (1,)
200
200
 
201
201
  elif strategy == QuantizationStrategy.TOKEN:
202
- expected_shape = (1, 1)
202
+ raise ValueError("Cannot perform static token quantization")
203
203
 
204
204
  elif strategy == QuantizationStrategy.CHANNEL:
205
205
  if len(observed_shape) < 2:
@@ -234,6 +234,13 @@ def initialize_qparams(
234
234
  num_cols = strategy_cdiv(observed_shape[-1], block_structure[-1], strategy)
235
235
  expected_shape = (num_rows, num_cols)
236
236
 
237
+ elif strategy == QuantizationStrategy.ATTN_HEAD:
238
+ # (batch_size, num_attention_heads, seq_len, head_dim)
239
+ if len(observed_shape) < 3:
240
+ raise ValueError("Attention quant requires at least 3 observed dimensions")
241
+
242
+ expected_shape = (observed_shape[-3], 1, 1)
243
+
237
244
  else:
238
245
  assert False, f"Unknown strategy {strategy}"
239
246
 
@@ -101,6 +101,7 @@ class QuantizationStrategy(str, Enum):
101
101
  BLOCK = "block"
102
102
  TOKEN = "token"
103
103
  TENSOR_GROUP = "tensor_group"
104
+ ATTN_HEAD = "attn_head"
104
105
 
105
106
 
106
107
  class DynamicType(str, Enum):
@@ -263,6 +264,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
263
264
  actorder = model.actorder
264
265
  dynamic = model.dynamic
265
266
  observer = model.observer
267
+ dynamic = model.dynamic
266
268
 
267
269
  # infer strategy
268
270
  if strategy is None:
@@ -278,6 +280,12 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
278
280
  "strategy='group' and group_size = -1 for 'channel'"
279
281
  )
280
282
 
283
+ # validate token strategy
284
+ if strategy == QuantizationStrategy.TOKEN and not dynamic:
285
+ raise ValueError(
286
+ "Cannot perform static token quantization, please use `dynamic=True`"
287
+ )
288
+
281
289
  # validate group strategy
282
290
  if strategy == QuantizationStrategy.GROUP:
283
291
  if group_size is None or group_size <= 0:
@@ -65,6 +65,7 @@ class QuantizationScheme(BaseModel):
65
65
  QuantizationStrategy.TENSOR,
66
66
  QuantizationStrategy.GROUP,
67
67
  QuantizationStrategy.TENSOR_GROUP,
68
+ QuantizationStrategy.ATTN_HEAD,
68
69
  ):
69
70
  if (
70
71
  inputs.strategy == QuantizationStrategy.GROUP
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.12.3.a20251008'
20
+ __version__ = version = '0.12.3.a20251010'
21
21
  __version_tuple__ = version_tuple = (0, 12, 3)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.12.3a20251008
3
+ Version: 0.12.3a20251010
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -101,6 +101,7 @@ src/compressed_tensors/utils/semi_structured_conversions.py
101
101
  src/compressed_tensors/utils/type.py
102
102
  tests/__init__.py
103
103
  tests/conftest.py
104
+ tests/mock_observer.py
104
105
  tests/test_registry.py
105
106
  tests/testing_utils.py
106
107
  tests/test_compressors/__init__.py
@@ -134,6 +135,7 @@ tests/test_quantization/lifecycle/test_enabled.py
134
135
  tests/test_quantization/lifecycle/test_forward.py
135
136
  tests/test_quantization/lifecycle/test_initialize.py
136
137
  tests/test_quantization/lifecycle/test_lifecycle.py
138
+ tests/test_quantization/lifecycle/test_static_lifecycle.py
137
139
  tests/test_quantization/test_configs/__init__.py
138
140
  tests/test_quantization/test_configs/test_bit_depths.py
139
141
  tests/test_quantization/test_configs/test_strategies.py
@@ -29,27 +29,6 @@ def _get_dim(dim: int, value: torch.Tensor):
29
29
  return reduce_dims
30
30
 
31
31
 
32
- @pytest.fixture
33
- def mock_per_token_calibration():
34
- def update_scale_zp(module: torch.nn.Module, base_name: str, value: torch.Tensor):
35
- quantization_scheme = getattr(module, "quantization_scheme", None)
36
- if not quantization_scheme:
37
- # no quantization scheme nothing to do
38
- return
39
-
40
- arg_name = "weights" if base_name == "weight" else f"{base_name}_activations"
41
- args = getattr(quantization_scheme, arg_name, None)
42
-
43
- dim = _get_dim({0, 1}, value)
44
- min_val = torch.amin(value, dim=dim, keepdims=True)
45
- max_val = torch.amax(value, dim=dim, keepdims=True)
46
- scale, zp = calculate_qparams(min_val, max_val, args)
47
- update_parameter_data(module, scale, f"{base_name}_scale")
48
- update_parameter_data(module, zp, f"{base_name}_zero_point")
49
-
50
- return update_scale_zp
51
-
52
-
53
32
  @pytest.fixture
54
33
  def mock_per_group_calibration():
55
34
  def update_scale_zp(
@@ -0,0 +1,173 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Tuple
16
+ from weakref import ref
17
+
18
+ import torch
19
+ from compressed_tensors.quantization import QuantizationArgs, QuantizationStrategy
20
+ from compressed_tensors.quantization.utils import (
21
+ calculate_qparams,
22
+ generate_gparam,
23
+ strategy_cdiv,
24
+ )
25
+
26
+
27
+ class MockMinMaxObserver(torch.nn.Module):
28
+ def __init__(self, base_name: str, args: QuantizationArgs, module: torch.nn.Module):
29
+ super().__init__()
30
+ self.parent = ref(module)
31
+ self.base_name = base_name
32
+ self.args = args
33
+
34
+ # used for testing
35
+ self.min_vals = None
36
+ self.max_vals = None
37
+
38
+ def get_min_max(self, observed: torch.Tensor):
39
+ min_vals = torch.amin(observed, dim=(0, -1))
40
+ max_vals = torch.amax(observed, dim=(0, -1))
41
+
42
+ return min_vals, max_vals
43
+
44
+ def forward(self, observed: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
45
+ observed = flatten_for_quantization(observed, self.base_name, self.args)
46
+
47
+ self.min_vals, self.max_vals = self.get_min_max(observed)
48
+
49
+ scales, zero_points = calculate_qparams(
50
+ min_vals=self.min_vals,
51
+ max_vals=self.max_vals,
52
+ quantization_args=self.args,
53
+ global_scale=getattr(self.parent(), f"{self.base_name}_global_scale", None),
54
+ )
55
+
56
+ return scales, zero_points
57
+
58
+ def get_global_scale(self, observed: torch.Tensor):
59
+ observed = observed.reshape((1, 1, -1)) # per tensor reshape
60
+ min_vals, max_vals = self.get_min_max(observed)
61
+ global_scale = generate_gparam(min_vals, max_vals)
62
+
63
+ return global_scale
64
+
65
+
66
+ def flatten_for_quantization(
67
+ value: torch.Tensor, base_name: str, args: QuantizationArgs
68
+ ) -> torch.Tensor:
69
+ if base_name == "weight":
70
+ return flatten_weight_for_quantization(value, args)
71
+ elif base_name in ("input", "output"):
72
+ return flatten_activation_for_quantization(value, args)
73
+ elif base_name in ("q", "k", "v"):
74
+ return flatten_attention_for_quantization(value, args)
75
+ else:
76
+ raise ValueError(f"Unknown quantization base name: {base_name}")
77
+
78
+
79
+ def flatten_weight_for_quantization(value: torch.Tensor, args: QuantizationArgs):
80
+ # value.shape = (num_rows, num_cols)
81
+
82
+ if args.strategy == QuantizationStrategy.TENSOR:
83
+ # (1, 1, num_weight_elems)
84
+ return value.reshape((1, 1, -1))
85
+
86
+ if args.strategy == QuantizationStrategy.TOKEN:
87
+ raise ValueError("Token quantization cannot be applied to weights")
88
+
89
+ if args.strategy == QuantizationStrategy.CHANNEL:
90
+ # (1, num_rows, 1, num_cols)
91
+ return value.unsqueeze(-2).unsqueeze(0)
92
+
93
+ if args.strategy in (QuantizationStrategy.GROUP, QuantizationStrategy.TENSOR_GROUP):
94
+ # (1, num_rows, num_groups, group_size)
95
+ return value.unflatten(-1, (-1, args.group_size)).unsqueeze(0)
96
+
97
+ if args.strategy == QuantizationStrategy.BLOCK:
98
+ # (1, num_block_rows, num_block_cols, block_width * block_height)
99
+ block_height, block_width = args.block_structure
100
+ num_rows, num_cols = value.shape
101
+ num_block_rows = strategy_cdiv(num_rows, block_height, args.strategy)
102
+ num_block_cols = strategy_cdiv(num_cols, block_width, args.strategy)
103
+ return (
104
+ value.reshape(
105
+ num_block_rows,
106
+ block_height,
107
+ num_block_cols,
108
+ block_width,
109
+ )
110
+ .transpose(1, 2)
111
+ .flatten(-2, -1)
112
+ .unsqueeze(0)
113
+ )
114
+
115
+ if args.strategy == QuantizationStrategy.ATTN_HEAD:
116
+ raise ValueError("attention head quantization cannot be applied to weights")
117
+
118
+ assert False, f"Unknown strategy {args.strategy}"
119
+
120
+
121
+ def flatten_activation_for_quantization(value: torch.Tensor, args: QuantizationArgs):
122
+ # value.shape = (batch_size, seq_len, hidden_dim)
123
+
124
+ if args.strategy == QuantizationStrategy.TENSOR:
125
+ # (batch_size * seq_len, 1, hidden_dim)
126
+ return value.reshape((-1, 1, value.size(-1)))
127
+
128
+ if args.strategy == QuantizationStrategy.TOKEN:
129
+ # (batch_size, seq_len, hidden_dim)
130
+ # warning: token quantization uses `compute_dynamic_scales_and_zp`
131
+ return value.flatten(2, -1)
132
+
133
+ if args.strategy == QuantizationStrategy.CHANNEL:
134
+ raise ValueError("Channel quantization cannot be applied to activations")
135
+
136
+ if args.strategy in (QuantizationStrategy.GROUP, QuantizationStrategy.TENSOR_GROUP):
137
+ # (batch_size * seq_len, num_groups, group_size)
138
+ # warning: group activation quantization uses compute_dynamic_scales_and_zp
139
+ return value.flatten(0, 1).unflatten(-1, (-1, args.group_size))
140
+
141
+ if args.strategy == QuantizationStrategy.BLOCK:
142
+ raise ValueError("Block quantization cannot be applied to activations")
143
+
144
+ if args.strategy == QuantizationStrategy.ATTN_HEAD:
145
+ raise ValueError("attention head quantization cannot be applied to linear acts")
146
+
147
+ assert False, f"Unknown strategy {args.strategy}"
148
+
149
+
150
+ def flatten_attention_for_quantization(value: torch.Tensor, args: QuantizationArgs):
151
+ # value.shape = (batch_size, num_heads, seq_len, head_dim)
152
+
153
+ if args.strategy == QuantizationStrategy.TENSOR:
154
+ # (batch_size * seq_len, 1, num_heads * head_dim)
155
+ return value.transpose(1, 2).flatten(0, 1).flatten(-2, -1).unsqueeze(-2)
156
+
157
+ if args.strategy == QuantizationStrategy.TOKEN:
158
+ raise ValueError("Token quantization cannot be applied to attention")
159
+
160
+ if args.strategy == QuantizationStrategy.CHANNEL:
161
+ raise ValueError("Channel quantization cannot be applied to attention")
162
+
163
+ if args.strategy in (QuantizationStrategy.GROUP, QuantizationStrategy.TENSOR_GROUP):
164
+ raise ValueError("Group quantization cannot be applied to attention")
165
+
166
+ if args.strategy == QuantizationStrategy.BLOCK:
167
+ raise ValueError("Block quantization cannot be applied to attention")
168
+
169
+ if args.strategy == QuantizationStrategy.ATTN_HEAD:
170
+ # (batch_size * seq_len, num_heads, 1, 1, head_dim)
171
+ return value.transpose(1, 2).flatten(0, 1).unsqueeze(-2).unsqueeze(-2)
172
+
173
+ assert False, f"Unknown strategy {args.strategy}"
@@ -176,10 +176,6 @@ def test_initialize_module_for_quantization_offloaded(
176
176
  QuantizationArgs(strategy="block", block_structure=[2, 4]),
177
177
  None,
178
178
  ),
179
- (
180
- QuantizationArgs(strategy="token"),
181
- QuantizationArgs(strategy="token"),
182
- ),
183
179
  ],
184
180
  )
185
181
  def test_initialize_quantization_parameters(weights, input_activations):
@@ -238,9 +234,6 @@ def test_initialize_quantization_parameters(weights, input_activations):
238
234
  # For activations or when block_structure is None
239
235
  expected_shape = (1,)
240
236
 
241
- elif args.strategy == QuantizationStrategy.TOKEN:
242
- expected_shape = (1, 1)
243
-
244
237
  if not args.dynamic:
245
238
  assert getattr(layer, f"{q_param_name}_scale").shape == expected_shape
246
239
  assert getattr(layer, f"{q_param_name}_zero_point").shape == expected_shape