compressed-tensors 0.10.3a20250731__tar.gz → 0.10.3a20250805__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. {compressed_tensors-0.10.3a20250731/src/compressed_tensors.egg-info → compressed_tensors-0.10.3a20250805}/PKG-INFO +1 -1
  2. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/quantization/lifecycle/forward.py +6 -1
  3. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/utils/match.py +67 -13
  4. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/version.py +1 -1
  5. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805/src/compressed_tensors.egg-info}/PKG-INFO +1 -1
  6. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_utils/test_match.py +31 -1
  7. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/.github/.gitkeep +0 -0
  8. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/.github/actions/test/action.yml +0 -0
  9. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/.github/scripts/step-status +0 -0
  10. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/.github/workflows/build-test.yml +0 -0
  11. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/.github/workflows/build.yml +0 -0
  12. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/.github/workflows/report.yml +0 -0
  13. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/.github/workflows/test-check.yaml +0 -0
  14. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/.github/workflows/test.yml +0 -0
  15. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/.github/workflows/trigger-all.yml +0 -0
  16. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/.github/workflows/upload.yml +0 -0
  17. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/.gitignore +0 -0
  18. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/LICENSE +0 -0
  19. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/Makefile +0 -0
  20. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/README.md +0 -0
  21. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
  22. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/examples/bit_packing/int4_config.json +0 -0
  23. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/examples/bitmask_compression.ipynb +0 -0
  24. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/examples/llama_1.1b/ex_config_quantization.py +0 -0
  25. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
  26. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/examples/llama_1.1b/example_quant_config.json +0 -0
  27. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
  28. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/examples/quantize_and_pack_int4.ipynb +0 -0
  29. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/pyproject.toml +0 -0
  30. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/setup.cfg +0 -0
  31. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/setup.py +0 -0
  32. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/__init__.py +0 -0
  33. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/README.md +0 -0
  34. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/__init__.py +0 -0
  35. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/base.py +0 -0
  36. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/compressors/__init__.py +0 -0
  37. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/compressors/base.py +0 -0
  38. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/compressors/helpers.py +0 -0
  39. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
  40. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +0 -0
  41. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
  42. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
  43. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
  44. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py +0 -0
  45. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
  46. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
  47. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
  48. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
  49. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
  50. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
  51. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
  52. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
  53. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/config/__init__.py +0 -0
  54. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/config/base.py +0 -0
  55. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/config/dense.py +0 -0
  56. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
  57. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  58. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/linear/__init__.py +0 -0
  59. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/linear/compressed_linear.py +0 -0
  60. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/quantization/__init__.py +0 -0
  61. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  62. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
  63. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
  64. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
  65. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
  66. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/quantization/quant_args.py +0 -0
  67. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/quantization/quant_config.py +0 -0
  68. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
  69. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
  70. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
  71. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/registry/__init__.py +0 -0
  72. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/registry/registry.py +0 -0
  73. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/transform/__init__.py +0 -0
  74. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/transform/apply.py +0 -0
  75. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/transform/factory/__init__.py +0 -0
  76. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/transform/factory/base.py +0 -0
  77. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/transform/factory/hadamard.py +0 -0
  78. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/transform/factory/matrix_multiply.py +0 -0
  79. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/transform/factory/random_hadamard.py +0 -0
  80. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/transform/transform_args.py +0 -0
  81. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/transform/transform_config.py +0 -0
  82. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/transform/transform_scheme.py +0 -0
  83. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/transform/utils/__init__.py +0 -0
  84. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/transform/utils/hadamard.py +0 -0
  85. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/transform/utils/hadamards.safetensors +0 -0
  86. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/transform/utils/matrix.py +0 -0
  87. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/utils/__init__.py +0 -0
  88. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/utils/helpers.py +0 -0
  89. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/utils/internal.py +0 -0
  90. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/utils/offload.py +0 -0
  91. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/utils/permutations_24.py +0 -0
  92. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/utils/permute.py +0 -0
  93. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/utils/safetensors_load.py +0 -0
  94. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
  95. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors.egg-info/SOURCES.txt +0 -0
  96. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
  97. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors.egg-info/requires.txt +0 -0
  98. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/src/compressed_tensors.egg-info/top_level.txt +0 -0
  99. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/__init__.py +0 -0
  100. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/conftest.py +0 -0
  101. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_compressors/__init__.py +0 -0
  102. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_compressors/model_compressors/__init__.py +0 -0
  103. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
  104. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
  105. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
  106. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
  107. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_compressors/quantized_compressors/test_nvfp4_quant.py +0 -0
  108. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
  109. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
  110. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
  111. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
  112. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
  113. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
  114. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_configs/__init__.py +0 -0
  115. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_configs/test_base.py +0 -0
  116. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
  117. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_linear/__init__.py +0 -0
  118. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_linear/test_compressed_linear.py +0 -0
  119. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_quantization/__init__.py +0 -0
  120. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_quantization/lifecycle/__init__.py +0 -0
  121. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_quantization/lifecycle/conftest.py +0 -0
  122. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_quantization/lifecycle/test_apply.py +0 -0
  123. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
  124. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
  125. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_quantization/lifecycle/test_forward.py +0 -0
  126. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_quantization/lifecycle/test_helpers.py +0 -0
  127. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_quantization/lifecycle/test_initialize.py +0 -0
  128. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
  129. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_quantization/test_configs/__init__.py +0 -0
  130. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
  131. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_quantization/test_configs/test_strategies.py +0 -0
  132. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_quantization/test_quant_args.py +0 -0
  133. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_quantization/test_quant_config.py +0 -0
  134. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_quantization/test_quant_scheme.py +0 -0
  135. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_quantization/test_utils/test_helpers.py +0 -0
  136. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_registry.py +0 -0
  137. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_transform/conftest.py +0 -0
  138. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_transform/factory/test_correctness.py +0 -0
  139. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_transform/factory/test_memory.py +0 -0
  140. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_transform/test_transform_args.py +0 -0
  141. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_transform/test_transform_config.py +0 -0
  142. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_transform/test_transform_scheme.py +0 -0
  143. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_transform/utils/test_hadamard.py +0 -0
  144. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_utils/__init__.py +0 -0
  145. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_utils/test_helpers.py +0 -0
  146. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_utils/test_offload.py +0 -0
  147. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/test_utils/test_safetensors_load.py +0 -0
  148. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/tests/testing_utils.py +0 -0
  149. {compressed_tensors-0.10.3a20250731 → compressed_tensors-0.10.3a20250805}/utils/copyright.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.10.3a20250731
3
+ Version: 0.10.3a20250805
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -124,8 +124,13 @@ def dequantize(
124
124
  strategy=QuantizationStrategy.GROUP, group_size=group_size
125
125
  )
126
126
  else:
127
+ rows, cols = x_q.shape[-2], x_q.shape[-1]
128
+ block_height = rows // scale.shape[0] # Rows per block
129
+ block_width = cols // scale.shape[1] # Columns per block
130
+
127
131
  args = QuantizationArgs(
128
- strategy=QuantizationStrategy.BLOCK, block_structure=scale.shape
132
+ strategy=QuantizationStrategy.BLOCK,
133
+ block_structure=[block_height, block_width],
129
134
  )
130
135
  else:
131
136
  raise ValueError(
@@ -15,7 +15,7 @@
15
15
  import logging
16
16
  import re
17
17
  from collections.abc import Generator
18
- from typing import Iterable, Tuple
18
+ from typing import Iterable, Mapping, Optional, Tuple
19
19
 
20
20
  import torch
21
21
  from compressed_tensors.utils.internal import InternalModule
@@ -32,10 +32,14 @@ __all__ = [
32
32
  ]
33
33
 
34
34
 
35
+ FusedMappping = Mapping[str, Iterable[str]]
36
+
37
+
35
38
  def match_named_modules(
36
39
  model: torch.nn.Module,
37
40
  targets: Iterable[str],
38
41
  ignore: Iterable[str] = tuple(),
42
+ fused: Optional[FusedMappping] = None,
39
43
  warn_on_fail: bool = False,
40
44
  ) -> Generator[Tuple[str, torch.nn.Module]]:
41
45
  """
@@ -45,16 +49,18 @@ def match_named_modules(
45
49
  :param model: model containing submodules to match against
46
50
  :param targets: target strings, potentially containing "re:" prefixes
47
51
  :param ignore: targets to ignore, potentially containing "re:" prefixes
52
+ :fused: optional mapping from suffixes of fused modules to the suffixes of their
53
+ corresponding shards. See `compressed_tensors.utils.match.is_match`
48
54
  :param warn_on_fail: if True, warns if any targets do not match any modules in model
49
55
  :return: generator of module names and modules
50
56
  """
51
57
  unmatched_targets = set(targets)
52
58
  for name, module in model.named_modules():
53
59
  for target in targets:
54
- if is_match(name, module, target):
60
+ if is_match(name, module, target, fused):
55
61
  unmatched_targets -= {target}
56
62
 
57
- if not any(is_match(name, module, ign) for ign in ignore):
63
+ if not any(is_match(name, module, ign, fused) for ign in ignore):
58
64
  yield name, module
59
65
 
60
66
  if warn_on_fail:
@@ -68,6 +74,7 @@ def match_named_parameters(
68
74
  model: torch.nn.Module,
69
75
  targets: Iterable[str],
70
76
  ignore: Iterable[str] = tuple(),
77
+ fused: Optional[FusedMappping] = None,
71
78
  warn_on_fail: bool = False,
72
79
  ) -> Generator[Tuple[str, torch.nn.Module, torch.nn.Parameter]]:
73
80
  """
@@ -77,6 +84,8 @@ def match_named_parameters(
77
84
  :param model: model containing params to match against
78
85
  :param targets: target strings, potentially containing "re:" prefixes
79
86
  :param ignore: targets to ignore, potentially containing "re:" prefixes
87
+ :fused: optional mapping from suffixes of fused modules to the suffixes of their
88
+ corresponding shards. See `compressed_tensors.utils.match.is_match`
80
89
  :param warn_on_fail: if True, warns if any targets do not match any params in model
81
90
  :return: generator of fully-qualified param names, parent modules, and params
82
91
  """
@@ -88,10 +97,10 @@ def match_named_parameters(
88
97
  for param_name, param in module.named_parameters(recurse=False):
89
98
  param_fqn = f"{module_name}.{param_name}"
90
99
  for target in targets:
91
- if _match_name(param_fqn, target):
100
+ if _match_name(param_fqn, target, fused):
92
101
  unmatched_targets -= {target}
93
102
 
94
- if not any(_match_name(param_fqn, ign) for ign in ignore):
103
+ if not any(_match_name(param_fqn, ign, fused) for ign in ignore):
95
104
  yield param_fqn, module, param
96
105
 
97
106
  if warn_on_fail:
@@ -164,21 +173,56 @@ def match_modules_set(
164
173
  raise ValueError(f"Unable to match targets into set: {unmatched_keys}")
165
174
 
166
175
 
167
- def is_match(name: str, module: torch.nn.Module, target: str) -> bool:
176
+ def is_match(
177
+ name: str,
178
+ module: torch.nn.Module,
179
+ target: str,
180
+ fused: Optional[FusedMappping] = None,
181
+ ) -> bool:
168
182
  """
169
183
  Returns true if either module name or module parent classes match against target
170
- and the module is not an internal module
184
+ and the module is not an internal module. The name and module may refer to a fused
185
+ module defined by vLLM. In these cases, a `fused` mapping must be provided.
186
+
187
+ For example, in `vllm/model_executor/models/llama.py`:
188
+ ```python
189
+ packed_modules_mapping = {
190
+ "qkv_proj": ["q_proj", "k_proj", "v_proj"],
191
+ "gate_up_proj": ["gate_proj", "up_proj"]
192
+ }
193
+ ```
194
+
195
+ :param name: name of module
196
+ :param module: module to match
197
+ :param target: target which matches name or module, potentially contains regex
198
+ :fused: optional mapping from suffixes of fused modules to the suffixes of their
199
+ corresponding shards
171
200
  """
172
201
  return not isinstance(module, InternalModule) and (
173
- _match_name(name, target) or _match_class(module, target)
202
+ _match_name(name, target, fused) or _match_class(module, target)
174
203
  )
175
204
 
176
205
 
177
- def _match_name(name: str, target: str) -> bool:
206
+ def _match_name(name: str, target: str, fused: Optional[FusedMappping] = None) -> bool:
178
207
  """
179
- Returns true if target string begins with "re:" and
180
- regex matches or if target string exactly matches name
208
+ Returns true if target string begins with "re:" and regex matches or if target
209
+ string exactly matches name. If the name refers to a fused module defined by vLLM,
210
+ a `fused` mapping must be provided.
211
+
212
+ :param name: name of module
213
+ :param target: target name, potentially contains regex
214
+ :fused: optional mapping from suffixes of fused modules to the suffixes of their
215
+ corresponding shards
181
216
  """
217
+ if fused is not None:
218
+ for fused_suffix in fused:
219
+ if name.endswith(fused_suffix):
220
+ name_stripped = name.removesuffix(fused_suffix)
221
+ return any(
222
+ _match_name(name_stripped + shard_suffix, target)
223
+ for shard_suffix in fused[fused_suffix]
224
+ )
225
+
182
226
  if target.startswith("re:"):
183
227
  return re.match(target.removeprefix("re:"), name) is not None
184
228
  else:
@@ -187,10 +231,20 @@ def _match_name(name: str, target: str) -> bool:
187
231
 
188
232
  def _match_class(module: torch.nn.Module, target: str) -> bool:
189
233
  """
190
- Returns true if any torch parent class names match the target string exactly
234
+ Returns true if any torch parent class names match the target string exactly.
235
+ A special exception is made for vllm's `LinearBase` class which matches `Linear`
236
+
237
+ :param module: module to match
238
+ :param target: target which matches name or module
191
239
  """
192
240
  # will never match against a regex pattern since `:` is not allowed in class names
193
241
  return any(
194
- issubclass(cls, torch.nn.Module) and cls.__name__ == target
242
+ (
243
+ issubclass(cls, torch.nn.Module)
244
+ and (
245
+ cls.__name__ == target
246
+ or (cls.__name__ == "LinearBase" and target == "Linear")
247
+ )
248
+ )
195
249
  for cls in module.__class__.__mro__
196
250
  )
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.10.3.a20250731'
20
+ __version__ = version = '0.10.3.a20250805'
21
21
  __version_tuple__ = version_tuple = (0, 10, 3)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.10.3a20250731
3
+ Version: 0.10.3a20250805
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -16,7 +16,6 @@ from unittest.mock import patch
16
16
 
17
17
  import pytest
18
18
  import torch.nn as nn
19
- from accelerate import init_empty_weights
20
19
 
21
20
  # Assuming the module is named "module_matching" - adjust import as needed
22
21
  from compressed_tensors.utils import (
@@ -33,6 +32,11 @@ class DummyModel(nn.Module):
33
32
  """Test model for unit tests. Weights are initialized on meta device"""
34
33
 
35
34
  def __init__(self):
35
+ try:
36
+ from accelerate import init_empty_weights
37
+ except ImportError:
38
+ pytest.skip("Skipping weight init requires accelerate")
39
+
36
40
  super().__init__()
37
41
  with init_empty_weights():
38
42
  self.layer1 = nn.Linear(10, 20)
@@ -142,6 +146,15 @@ class TestMatchClass:
142
146
  assert _match_class(model, "DummyModel") == True
143
147
  assert _match_class(model, "Module") == True
144
148
 
149
+ def test_linear_base(self):
150
+ """Test matching against vllm's LinearBase class"""
151
+
152
+ class LinearBase(nn.Module):
153
+ pass
154
+
155
+ linear = LinearBase()
156
+ assert _match_class(linear, "Linear") == True
157
+
145
158
 
146
159
  class TestIsMatch:
147
160
  """Test cases for is_match function"""
@@ -180,6 +193,23 @@ class TestIsMatch:
180
193
  linear = InternalLinear(10, 20)
181
194
  assert is_match("layer1", linear, "re:layer.*") == False
182
195
 
196
+ def test_fused_mapping(self):
197
+ """"""
198
+ linear = nn.Linear(10, 20)
199
+ mapping = {
200
+ "qkv_proj": ["q_proj", "k_proj", "v_proj"],
201
+ "gate_up_proj": ["gate_proj", "up_proj"],
202
+ }
203
+
204
+ assert is_match("dummy.qkv_proj", linear, "re:.*q_proj", mapping) == True
205
+ assert is_match("dummy.qkv_proj", linear, "re:.*k_proj", mapping) == True
206
+ assert is_match("dummy.qkv_proj", linear, "re:.*v_proj", mapping) == True
207
+ assert is_match("dummy.qkv_proj", linear, "Linear", mapping) == True
208
+
209
+ assert is_match("dummy.gate_up_proj", linear, "re:.*gate_proj", mapping) == True
210
+ assert is_match("dummy.gate_up_proj", linear, "re:.*up_proj", mapping) == True
211
+ assert is_match("dummy.gate_up_proj", linear, "Linear", mapping) == True
212
+
183
213
 
184
214
  class TestMatchNamedModules:
185
215
  """Test cases for match_named_modules function"""