compressed-tensors 0.10.3a20250721__tar.gz → 0.10.3a20250728__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. {compressed_tensors-0.10.3a20250721/src/compressed_tensors.egg-info → compressed_tensors-0.10.3a20250728}/PKG-INFO +1 -1
  2. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +12 -6
  3. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/quantization/lifecycle/forward.py +8 -4
  4. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/quantization/lifecycle/initialize.py +37 -2
  5. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/quantization/quant_scheme.py +4 -3
  6. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/transform/factory/base.py +3 -4
  7. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/transform/factory/hadamard.py +6 -5
  8. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/utils/__init__.py +1 -0
  9. compressed_tensors-0.10.3a20250728/src/compressed_tensors/utils/match.py +196 -0
  10. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/version.py +1 -1
  11. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728/src/compressed_tensors.egg-info}/PKG-INFO +1 -1
  12. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors.egg-info/SOURCES.txt +2 -0
  13. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_quantization/lifecycle/test_initialize.py +13 -3
  14. compressed_tensors-0.10.3a20250728/tests/test_utils/test_match.py +465 -0
  15. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/.github/.gitkeep +0 -0
  16. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/.github/actions/test/action.yml +0 -0
  17. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/.github/scripts/step-status +0 -0
  18. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/.github/workflows/build-test.yml +0 -0
  19. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/.github/workflows/build.yml +0 -0
  20. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/.github/workflows/report.yml +0 -0
  21. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/.github/workflows/test-check.yaml +0 -0
  22. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/.github/workflows/test.yml +0 -0
  23. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/.github/workflows/trigger-all.yml +0 -0
  24. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/.github/workflows/upload.yml +0 -0
  25. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/.gitignore +0 -0
  26. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/LICENSE +0 -0
  27. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/Makefile +0 -0
  28. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/README.md +0 -0
  29. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
  30. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/examples/bit_packing/int4_config.json +0 -0
  31. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/examples/bitmask_compression.ipynb +0 -0
  32. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/examples/llama_1.1b/ex_config_quantization.py +0 -0
  33. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
  34. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/examples/llama_1.1b/example_quant_config.json +0 -0
  35. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
  36. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/examples/quantize_and_pack_int4.ipynb +0 -0
  37. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/pyproject.toml +0 -0
  38. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/setup.cfg +0 -0
  39. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/setup.py +0 -0
  40. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/__init__.py +0 -0
  41. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/README.md +0 -0
  42. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/__init__.py +0 -0
  43. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/base.py +0 -0
  44. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/compressors/__init__.py +0 -0
  45. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/compressors/base.py +0 -0
  46. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/compressors/helpers.py +0 -0
  47. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
  48. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
  49. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
  50. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
  51. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py +0 -0
  52. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
  53. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
  54. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
  55. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
  56. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
  57. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
  58. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
  59. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
  60. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/config/__init__.py +0 -0
  61. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/config/base.py +0 -0
  62. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/config/dense.py +0 -0
  63. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
  64. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  65. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/linear/__init__.py +0 -0
  66. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/linear/compressed_linear.py +0 -0
  67. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/quantization/__init__.py +0 -0
  68. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  69. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
  70. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
  71. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
  72. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/quantization/quant_args.py +0 -0
  73. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/quantization/quant_config.py +0 -0
  74. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
  75. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
  76. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/registry/__init__.py +0 -0
  77. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/registry/registry.py +0 -0
  78. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/transform/__init__.py +0 -0
  79. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/transform/apply.py +0 -0
  80. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/transform/factory/__init__.py +0 -0
  81. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/transform/factory/matrix_multiply.py +0 -0
  82. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/transform/factory/random_hadamard.py +0 -0
  83. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/transform/transform_args.py +0 -0
  84. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/transform/transform_config.py +0 -0
  85. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/transform/transform_scheme.py +0 -0
  86. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/transform/utils/__init__.py +0 -0
  87. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/transform/utils/hadamard.py +0 -0
  88. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/transform/utils/hadamards.safetensors +0 -0
  89. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/transform/utils/matrix.py +0 -0
  90. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/utils/helpers.py +0 -0
  91. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/utils/internal.py +0 -0
  92. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/utils/offload.py +0 -0
  93. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/utils/permutations_24.py +0 -0
  94. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/utils/permute.py +0 -0
  95. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/utils/safetensors_load.py +0 -0
  96. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
  97. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
  98. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors.egg-info/requires.txt +0 -0
  99. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/src/compressed_tensors.egg-info/top_level.txt +0 -0
  100. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/__init__.py +0 -0
  101. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/conftest.py +0 -0
  102. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_compressors/__init__.py +0 -0
  103. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_compressors/model_compressors/__init__.py +0 -0
  104. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
  105. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
  106. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
  107. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
  108. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_compressors/quantized_compressors/test_nvfp4_quant.py +0 -0
  109. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
  110. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
  111. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
  112. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
  113. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
  114. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
  115. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_configs/__init__.py +0 -0
  116. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_configs/test_base.py +0 -0
  117. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
  118. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_linear/__init__.py +0 -0
  119. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_linear/test_compressed_linear.py +0 -0
  120. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_quantization/__init__.py +0 -0
  121. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_quantization/lifecycle/__init__.py +0 -0
  122. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_quantization/lifecycle/conftest.py +0 -0
  123. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_quantization/lifecycle/test_apply.py +0 -0
  124. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
  125. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
  126. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_quantization/lifecycle/test_forward.py +0 -0
  127. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_quantization/lifecycle/test_helpers.py +0 -0
  128. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
  129. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_quantization/test_configs/__init__.py +0 -0
  130. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
  131. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_quantization/test_configs/test_strategies.py +0 -0
  132. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_quantization/test_quant_args.py +0 -0
  133. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_quantization/test_quant_config.py +0 -0
  134. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_quantization/test_quant_scheme.py +0 -0
  135. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_quantization/test_utils/test_helpers.py +0 -0
  136. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_registry.py +0 -0
  137. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_transform/conftest.py +0 -0
  138. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_transform/factory/test_correctness.py +0 -0
  139. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_transform/factory/test_memory.py +0 -0
  140. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_transform/test_transform_args.py +0 -0
  141. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_transform/test_transform_config.py +0 -0
  142. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_transform/test_transform_scheme.py +0 -0
  143. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_transform/utils/test_hadamard.py +0 -0
  144. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_utils/__init__.py +0 -0
  145. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_utils/test_helpers.py +0 -0
  146. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_utils/test_offload.py +0 -0
  147. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/test_utils/test_safetensors_load.py +0 -0
  148. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/tests/testing_utils.py +0 -0
  149. {compressed_tensors-0.10.3a20250721 → compressed_tensors-0.10.3a20250728}/utils/copyright.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.10.3a20250721
3
+ Version: 0.10.3a20250728
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -400,7 +400,10 @@ class ModelCompressor:
400
400
 
401
401
  # in the future, support compression on same device
402
402
  with align_module_device(module, execution_device=exec_device):
403
- state_dict = module.state_dict(prefix=f"{prefix}.")
403
+ state_dict = {
404
+ f"{prefix}.{name}": param
405
+ for name, param in module.named_parameters(recurse=False)
406
+ }
404
407
 
405
408
  # quantization first
406
409
  if prefix in module_to_scheme:
@@ -421,7 +424,7 @@ class ModelCompressor:
421
424
 
422
425
  # remove any existing parameters
423
426
  offload_device = get_offloaded_device(module)
424
- for name, _ in list(module.named_parameters()):
427
+ for name, _ in list(module.named_parameters(recurse=False)):
425
428
  delete_offload_parameter(module, name)
426
429
 
427
430
  # replace with compressed parameters
@@ -458,7 +461,10 @@ class ModelCompressor:
458
461
  if prefix in module_to_scheme or prefix in sparse_compression_targets:
459
462
  # in the future, support decompression on same device
460
463
  with align_module_device(module, execution_device="cpu"):
461
- state_dict = module.state_dict(prefix=f"{prefix}.")
464
+ state_dict = {
465
+ f"{prefix}.{name}": param
466
+ for name, param in module.named_parameters(recurse=False)
467
+ }
462
468
 
463
469
  # sparsity first
464
470
  if prefix in sparse_compression_targets:
@@ -483,7 +489,7 @@ class ModelCompressor:
483
489
  # remove any existing parameters
484
490
  exec_device = get_execution_device(module)
485
491
  offload_device = get_offloaded_device(module)
486
- for name, _ in list(module.named_parameters()):
492
+ for name, _ in list(module.named_parameters(recurse=False)):
487
493
  delete_offload_parameter(module, name)
488
494
 
489
495
  # replace with decompressed parameters
@@ -754,8 +760,8 @@ def map_module_to_scheme(model: Module) -> Dict[str, QuantizationScheme]:
754
760
  fix_fsdp_module_name(name): module.quantization_scheme
755
761
  for name, module in model.named_modules()
756
762
  if (
757
- hasattr(module, "quantization_scheme") and
758
- module.quantization_scheme.weights is not None
763
+ hasattr(module, "quantization_scheme")
764
+ and module.quantization_scheme.weights is not None
759
765
  )
760
766
  }
761
767
 
@@ -112,17 +112,21 @@ def dequantize(
112
112
  if scale.shape[1] == 1:
113
113
  args = QuantizationArgs(strategy=QuantizationStrategy.CHANNEL)
114
114
  # Scale height matches input or is 1 -> group quantization across columns
115
- #
115
+ #
116
116
  # Example 1: scale.shape[0] == 1
117
117
  # x_q: (4, 8), scale: (1, 4) -> 2 columns per group
118
118
  #
119
- # Example 2: scale.shape[0] == x_q.shape[0]
119
+ # Example 2: scale.shape[0] == x_q.shape[0]
120
120
  # x_q: (4, 8), scale: (4, 4) -> 2 elements per group (per row)
121
121
  elif (scale.shape[0] == 1) or (scale.shape[0] == x_q.shape[0]):
122
122
  group_size = int(x_q.shape[1] / scale.shape[1])
123
- args = QuantizationArgs(strategy=QuantizationStrategy.GROUP, group_size=group_size)
123
+ args = QuantizationArgs(
124
+ strategy=QuantizationStrategy.GROUP, group_size=group_size
125
+ )
124
126
  else:
125
- args = QuantizationArgs(strategy=QuantizationStrategy.BLOCK, block_structure=scale.shape)
127
+ args = QuantizationArgs(
128
+ strategy=QuantizationStrategy.BLOCK, block_structure=scale.shape
129
+ )
126
130
  else:
127
131
  raise ValueError(
128
132
  f"Could not infer a quantization strategy from scale with {scale.ndim} "
@@ -15,6 +15,7 @@
15
15
 
16
16
  import logging
17
17
  import math
18
+ import warnings
18
19
  from enum import Enum
19
20
  from typing import List, Optional
20
21
 
@@ -172,14 +173,43 @@ def _initialize_scale_zero_point(
172
173
 
173
174
  if base_name == "weight" and weight_shape is not None:
174
175
  if quantization_args.strategy == QuantizationStrategy.CHANNEL:
175
- # (output_channels, 1)
176
+ # (output_channels, 1) - only for weights
176
177
  expected_shape = (weight_shape[0], 1)
177
178
  elif quantization_args.strategy in (
178
179
  QuantizationStrategy.TENSOR_GROUP,
179
180
  QuantizationStrategy.GROUP,
180
181
  ):
182
+ # GROUP/TENSOR_GROUP for both weights and activations
181
183
  num_groups = math.ceil(weight_shape[1] / quantization_args.group_size)
182
184
  expected_shape = (weight_shape[0], max(num_groups, 1))
185
+ elif quantization_args.strategy == QuantizationStrategy.BLOCK:
186
+ # For block quantization, scale shape should match number of blocks - only for weights
187
+ if quantization_args.block_structure is None:
188
+ raise ValueError(
189
+ "Block quantization requires block_structure to be specified"
190
+ )
191
+ block_height, block_width = quantization_args.block_structure
192
+ rows, cols = weight_shape[-2], weight_shape[-1]
193
+ num_rows_blocks = math.ceil(rows / block_height)
194
+ num_cols_blocks = math.ceil(cols / block_width)
195
+
196
+ # Warn if dimensions don't divide evenly
197
+ if rows % block_height != 0 or cols % block_width != 0:
198
+ warnings.warn(
199
+ f"Block quantization: tensor shape {weight_shape} does not divide evenly "
200
+ f"by block structure {quantization_args.block_structure}. "
201
+ f"Some blocks will be incomplete which may affect quantization quality.",
202
+ UserWarning,
203
+ )
204
+
205
+ expected_shape = (num_rows_blocks, num_cols_blocks)
206
+ elif quantization_args.strategy == QuantizationStrategy.BLOCK:
207
+ warnings.warn(
208
+ f"BLOCK quantization not supported for {base_name} activations. "
209
+ f"Falling back to tensor-level quantization.",
210
+ UserWarning,
211
+ )
212
+ expected_shape = 1
183
213
 
184
214
  # 3. Identify quantization scale and zp dtype
185
215
  scale_dtype = scale_dtype if scale_dtype is not None else module.weight.dtype
@@ -189,7 +219,12 @@ def _initialize_scale_zero_point(
189
219
  else:
190
220
  # TODO: consider erroring out in the future as if the dtype if not one of these,
191
221
  # there is likely bug
192
- if scale_dtype not in [torch.float16, torch.bfloat16, torch.float32, torch.float64]:
222
+ if scale_dtype not in [
223
+ torch.float16,
224
+ torch.bfloat16,
225
+ torch.float32,
226
+ torch.float64,
227
+ ]:
193
228
  scale_dtype = torch.float16
194
229
  zp_dtype = quantization_args.pytorch_dtype()
195
230
 
@@ -64,8 +64,9 @@ class QuantizationScheme(BaseModel):
64
64
  raise ValueError("Cannot apply actorder to output activations")
65
65
 
66
66
  if (
67
- inputs and weights
68
- and weights.strategy == QuantizationStrategy.GROUP
67
+ inputs
68
+ and weights
69
+ and weights.strategy == QuantizationStrategy.GROUP
69
70
  and inputs.strategy == QuantizationStrategy.GROUP
70
71
  and weights.group_size != inputs.group_size
71
72
  ):
@@ -75,7 +76,7 @@ class QuantizationScheme(BaseModel):
75
76
  "may complicate fused kernel implementations. Consider using "
76
77
  "TENSOR_GROUP strategy for both or matching group sizes.",
77
78
  UserWarning,
78
- stacklevel=2
79
+ stacklevel=2,
79
80
  )
80
81
 
81
82
  return model
@@ -18,7 +18,6 @@ from typing import Optional
18
18
  import torch
19
19
  import torch.nn.utils.parametrize as P
20
20
  from compressed_tensors import InternalModule
21
- from compressed_tensors.quantization.lifecycle import is_target # TODO: move to utils
22
21
  from compressed_tensors.registry.registry import RegistryMixin, T
23
22
  from compressed_tensors.transform import (
24
23
  TransformArgs,
@@ -29,6 +28,7 @@ from compressed_tensors.utils import (
29
28
  align_module_device,
30
29
  delete_offload_module,
31
30
  has_offloaded_params,
31
+ match_named_modules,
32
32
  patch_attr,
33
33
  register_offload_module,
34
34
  update_offload_parameter,
@@ -87,9 +87,8 @@ class TransformFactory(RegistryMixin, ABC):
87
87
  :param model: module to apply transforms to
88
88
  """
89
89
  for arg in self.scheme.apply:
90
- for name, module in list(model.named_modules()):
91
- if is_target(name, module, arg.targets, arg.ignore):
92
- self._apply_to_module(module, arg)
90
+ for _, module in match_named_modules(model, arg.targets, arg.ignore):
91
+ self._apply_to_module(module, arg)
93
92
 
94
93
  def _apply_to_module(self, module: Module, args: TransformArgs):
95
94
  """
@@ -12,9 +12,9 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import math
15
16
  from typing import Optional, Union
16
17
 
17
- import math
18
18
  import torch
19
19
  from compressed_tensors.transform import TransformArgs, TransformScheme
20
20
  from compressed_tensors.transform.factory.base import TransformBase, TransformFactory
@@ -103,7 +103,8 @@ class HadamardTransform(TransformBase):
103
103
 
104
104
  if self.args.inverse:
105
105
  weight = weight.T
106
-
107
- return apply_transform_weight(
108
- weight, value, self.args.location, self.module_type
109
- ) / self._scale
106
+
107
+ return (
108
+ apply_transform_weight(weight, value, self.args.location, self.module_type)
109
+ / self._scale
110
+ )
@@ -15,6 +15,7 @@
15
15
 
16
16
  from .helpers import *
17
17
  from .internal import *
18
+ from .match import *
18
19
  from .offload import *
19
20
  from .permutations_24 import *
20
21
  from .permute import *
@@ -0,0 +1,196 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import logging
16
+ import re
17
+ from collections.abc import Generator
18
+ from typing import Iterable, Tuple
19
+
20
+ import torch
21
+ from compressed_tensors.utils.internal import InternalModule
22
+
23
+
24
+ _LOGGER: logging.Logger = logging.getLogger(__name__)
25
+
26
+
27
+ __all__ = [
28
+ "match_named_modules",
29
+ "match_named_parameters",
30
+ "match_modules_set",
31
+ "is_match",
32
+ ]
33
+
34
+
35
+ def match_named_modules(
36
+ model: torch.nn.Module,
37
+ targets: Iterable[str],
38
+ ignore: Iterable[str] = tuple(),
39
+ warn_on_fail: bool = False,
40
+ ) -> Generator[Tuple[str, torch.nn.Module]]:
41
+ """
42
+ Yields names and modules which match `targets` but do not match `ignore`.
43
+ Values are returned in order of `model.named_modules()`
44
+
45
+ :param model: model containing submodules to match against
46
+ :param targets: target strings, potentially containing "re:" prefixes
47
+ :param ignore: targets to ignore, potentially containing "re:" prefixes
48
+ :param warn_on_fail: if True, warns if any targets do not match any modules in model
49
+ :return: generator of module names and modules
50
+ """
51
+ unmatched_targets = set(targets)
52
+ for name, module in model.named_modules():
53
+ for target in targets:
54
+ if is_match(name, module, target):
55
+ unmatched_targets -= {target}
56
+
57
+ if not any(is_match(name, module, ign) for ign in ignore):
58
+ yield name, module
59
+
60
+ if warn_on_fail:
61
+ for target in unmatched_targets:
62
+ _LOGGER.warning(
63
+ f"Could not match `{target}` in instance of {model.__class__.__name__}"
64
+ )
65
+
66
+
67
+ def match_named_parameters(
68
+ model: torch.nn.Module,
69
+ targets: Iterable[str],
70
+ ignore: Iterable[str] = tuple(),
71
+ warn_on_fail: bool = False,
72
+ ) -> Generator[Tuple[str, torch.nn.Module, torch.nn.Parameter]]:
73
+ """
74
+ Yields parameters which match `targets` but do not match `ignore`.
75
+ Values are returned in order of `model.named_modules()`
76
+
77
+ :param model: model containing params to match against
78
+ :param targets: target strings, potentially containing "re:" prefixes
79
+ :param ignore: targets to ignore, potentially containing "re:" prefixes
80
+ :param warn_on_fail: if True, warns if any targets do not match any params in model
81
+ :return: generator of fully-qualified param names, parent modules, and params
82
+ """
83
+ unmatched_targets = set(targets)
84
+ for module_name, module in model.named_modules():
85
+ if isinstance(module, InternalModule):
86
+ continue
87
+
88
+ for param_name, param in module.named_parameters(recurse=False):
89
+ param_fqn = f"{module_name}.{param_name}"
90
+ for target in targets:
91
+ if _match_name(param_fqn, target):
92
+ unmatched_targets -= {target}
93
+
94
+ if not any(_match_name(param_fqn, ign) for ign in ignore):
95
+ yield param_fqn, module, param
96
+
97
+ if warn_on_fail:
98
+ for target in unmatched_targets:
99
+ _LOGGER.warning(
100
+ f"Could not match `{target}` in instance of {model.__class__.__name__}"
101
+ )
102
+
103
+
104
+ def match_modules_set(
105
+ model: torch.nn.Module,
106
+ targets: Iterable[str],
107
+ ignore: Iterable[str] = tuple(),
108
+ ) -> Generator[Iterable[torch.nn.Module]]:
109
+ """
110
+ Yields modules grouped with the same order and size as `targets`.
111
+ Values are returned in order of `model.named_modules()`
112
+
113
+ For example, the following targets would yield module belonging to the following layers:
114
+ ```python3
115
+ match_modules_set(model, ["q_proj", "k_proj", "v_proj"]) == (
116
+ (
117
+ `model.layers.0.self_attn.q_proj`,
118
+ `model.layers.0.self_attn.k_proj`,
119
+ `model.layers.0.self_attn.v_proj`,
120
+ ),
121
+ (
122
+ `model.layers.1.self_attn.q_proj`,
123
+ `model.layers.1.self_attn.k_proj`,
124
+ `model.layers.1.self_attn.v_proj`,
125
+ ),
126
+ ...
127
+ (
128
+ `model.layers.32.self_attn.q_proj`,
129
+ `model.layers.32.self_attn.k_proj`,
130
+ `model.layers.32.self_attn.v_proj`,
131
+ ),
132
+ )
133
+ ```
134
+
135
+ This can be used to match layers to their corresponding downstream counterparts.
136
+ For example, matching layer norms to their subsequent linear layers
137
+ ```python3
138
+ for norm, q, k, v in match_modules_set(model, (norm_tgt, q_tgt, k_tgt, v_tgt)):
139
+ fuse_norm_linears(norm, [q, k, v])
140
+
141
+ :param model: model containing modules to match against
142
+ :param targets: target strings, potentially containing "re:" prefixes
143
+ :param ignore: targets to ignore, potentially containing "re:" prefixes
144
+ """
145
+ matches = dict.fromkeys(targets, None)
146
+ for name, module in model.named_modules():
147
+ # match until we get a full set
148
+ for target in targets:
149
+ if is_match(name, module, target) and not any(
150
+ is_match(name, module, ign) for ign in ignore
151
+ ):
152
+ if matches[target] is not None:
153
+ raise ValueError(f"Matched a {target} twice before completing set")
154
+ matches[target] = module
155
+
156
+ # once we have a full set, yield and reset
157
+ if targets and all((matches[target] is not None for target in targets)):
158
+ yield [matches[target] for target in targets] # ensure correct ordering
159
+ matches = dict.fromkeys(targets, None)
160
+
161
+ # check that none are left over
162
+ unmatched_keys = [match for match, value in matches.items() if value is not None]
163
+ if len(unmatched_keys):
164
+ raise ValueError(f"Unable to match targets into set: {unmatched_keys}")
165
+
166
+
167
+ def is_match(name: str, module: torch.nn.Module, target: str) -> bool:
168
+ """
169
+ Returns true if either module name or module parent classes match against target
170
+ and the module is not an internal module
171
+ """
172
+ return not isinstance(module, InternalModule) and (
173
+ _match_name(name, target) or _match_class(module, target)
174
+ )
175
+
176
+
177
+ def _match_name(name: str, target: str) -> bool:
178
+ """
179
+ Returns true if target string begins with "re:" and
180
+ regex matches or if target string exactly matches name
181
+ """
182
+ if target.startswith("re:"):
183
+ return re.match(target.removeprefix("re:"), name) is not None
184
+ else:
185
+ return target == name
186
+
187
+
188
+ def _match_class(module: torch.nn.Module, target: str) -> bool:
189
+ """
190
+ Returns true if any torch parent class names match the target string exactly
191
+ """
192
+ # will never match against a regex pattern since `:` is not allowed in class names
193
+ return any(
194
+ issubclass(cls, torch.nn.Module) and cls.__name__ == target
195
+ for cls in module.__class__.__mro__
196
+ )
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.10.3.a20250721'
20
+ __version__ = version = '0.10.3.a20250728'
21
21
  __version_tuple__ = version_tuple = (0, 10, 3)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.10.3a20250721
3
+ Version: 0.10.3a20250728
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -88,6 +88,7 @@ src/compressed_tensors/transform/utils/matrix.py
88
88
  src/compressed_tensors/utils/__init__.py
89
89
  src/compressed_tensors/utils/helpers.py
90
90
  src/compressed_tensors/utils/internal.py
91
+ src/compressed_tensors/utils/match.py
91
92
  src/compressed_tensors/utils/offload.py
92
93
  src/compressed_tensors/utils/permutations_24.py
93
94
  src/compressed_tensors/utils/permute.py
@@ -141,6 +142,7 @@ tests/test_transform/factory/test_memory.py
141
142
  tests/test_transform/utils/test_hadamard.py
142
143
  tests/test_utils/__init__.py
143
144
  tests/test_utils/test_helpers.py
145
+ tests/test_utils/test_match.py
144
146
  tests/test_utils/test_offload.py
145
147
  tests/test_utils/test_safetensors_load.py
146
148
  utils/copyright.py
@@ -174,8 +174,8 @@ def test_initialize_module_for_quantization_offloaded(
174
174
  ),
175
175
  ),
176
176
  (
177
- QuantizationArgs(strategy="block"),
178
- QuantizationArgs(strategy="block"),
177
+ QuantizationArgs(strategy="block", block_structure=[2, 4]),
178
+ None,
179
179
  ),
180
180
  (
181
181
  QuantizationArgs(strategy="token"),
@@ -227,7 +227,17 @@ def test_initialize_quantization_parameters(weights, input_activations):
227
227
  expected_shape = (layer.weight.shape[0], max(num_groups, 1))
228
228
 
229
229
  elif args.strategy == QuantizationStrategy.BLOCK:
230
- expected_shape = (1,)
230
+ # For block quantization, only weights get block-level scales
231
+ # Activations fall back to tensor-level since shape is unknown at init
232
+ if q_type == "weights" and args.block_structure is not None:
233
+ block_height, block_width = args.block_structure
234
+ rows, cols = layer.weight.shape[-2], layer.weight.shape[-1]
235
+ num_rows_blocks = math.ceil(rows / block_height)
236
+ num_cols_blocks = math.ceil(cols / block_width)
237
+ expected_shape = (num_rows_blocks, num_cols_blocks)
238
+ else:
239
+ # For activations or when block_structure is None
240
+ expected_shape = (1,)
231
241
 
232
242
  elif args.strategy == QuantizationStrategy.TOKEN:
233
243
  expected_shape = (1, 1)