compressed-tensors 0.13.1a20260130__tar.gz → 0.13.1a20260203__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {compressed_tensors-0.13.1a20260130/src/compressed_tensors.egg-info → compressed_tensors-0.13.1a20260203}/PKG-INFO +1 -1
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/base.py +11 -11
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/helpers.py +8 -8
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +28 -28
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/quantized_compressors/base.py +16 -15
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/quantized_compressors/fp4_quantized.py +14 -16
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +10 -12
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +13 -13
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/sparse_compressors/base.py +9 -9
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/sparse_compressors/dense.py +9 -8
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +7 -8
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +4 -6
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +7 -7
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/offload/__init__.py +3 -3
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/offload/cache/base.py +3 -3
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/offload/dispatch.py +4 -4
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/offload/utils.py +2 -2
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/quant_args.py +6 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/utils/mxfp4_utils.py +19 -10
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/registry/registry.py +14 -18
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/factory/base.py +3 -4
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/factory/hadamard.py +3 -5
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/factory/matrix_multiply.py +1 -3
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/transform_args.py +2 -3
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/transform_config.py +1 -3
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/transform_scheme.py +2 -4
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/utils/hadamard.py +2 -3
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/utils/matrix.py +1 -3
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/version.py +1 -1
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203/src/compressed_tensors.egg-info}/PKG-INFO +1 -1
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_offload/cache/helpers.py +3 -3
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/test_utils/test_mxfp4_utils.py +4 -2
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/.github/.gitkeep +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/.github/actions/test/action.yml +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/.github/mergify.yml +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/.github/scripts/step-status +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/.github/workflows/quality-check.yaml +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/.github/workflows/stale.yml +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/.github/workflows/test-check.yaml +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/.gitignore +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/LICENSE +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/Makefile +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/README.md +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/examples/bit_packing/int4_config.json +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/examples/bitmask_compression.ipynb +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/examples/llama_1.1b/ex_config_quantization.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/examples/llama_1.1b/example_quant_config.json +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/examples/quantize_and_pack_int4.ipynb +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/pyproject.toml +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/setup.cfg +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/setup.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/README.md +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/base.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/config/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/config/base.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/config/dense.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/config/format.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/linear/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/linear/compressed_linear.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/logger.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/modeling/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/modeling/attention.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/modeling/kvcache.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/offload/cache/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/offload/cache/cpu.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/offload/cache/device.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/offload/cache/dist_cpu.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/offload/module.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/quant_config.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/quant_metadata.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/registry/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/apply.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/factory/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/factory/random_hadamard.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/utils/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/transform/utils/hadamards.safetensors +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/utils/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/utils/binary_search.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/utils/helpers.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/utils/internal.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/utils/match.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/utils/offload.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/utils/permutations_24.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/utils/safetensors_load.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors/utils/type.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors.egg-info/SOURCES.txt +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors.egg-info/requires.txt +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/src/compressed_tensors.egg-info/top_level.txt +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/conftest.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/mock_observer.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/model_compressors/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/quantized_compressors/test_fp4_quant.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/quantized_compressors/test_packed_asym_decompression.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_configs/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_configs/test_base.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_configs/test_infer_quant.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_linear/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_linear/test_compressed_linear.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_modeling/test_attention_and_cache.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_modeling/test_deepseekv3_kvcache_quant.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_offload/cache/test_cpu.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_offload/cache/test_dist_cpu.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_offload/conftest.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_offload/test_dispatch.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_offload/test_interface.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_offload/test_module.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/lifecycle/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/lifecycle/conftest.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/lifecycle/test_apply.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/lifecycle/test_forward.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/lifecycle/test_initialize.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/lifecycle/test_static_lifecycle.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/test_configs/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/test_configs/test_strategies.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/test_quant_args.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/test_quant_config.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/test_quant_scheme.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_quantization/test_utils/test_helpers.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_registry.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_transform/conftest.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_transform/factory/test_correctness.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_transform/factory/test_memory.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_transform/factory/test_serialization.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_transform/test_transform_args.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_transform/test_transform_config.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_transform/test_transform_scheme.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_transform/utils/test_hadamard.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_utils/__init__.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_utils/test_helpers.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_utils/test_match.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_utils/test_safetensors_load.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/test_utils/test_type.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/tests/testing_utils.py +0 -0
- {compressed_tensors-0.13.1a20260130 → compressed_tensors-0.13.1a20260203}/utils/copyright.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: compressed-tensors
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.1a20260203
|
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
|
5
5
|
Home-page: https://github.com/vllm-project/compressed-tensors
|
|
6
6
|
Author: Neuralmagic, Inc.
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from abc import ABC, abstractmethod
|
|
16
|
-
from
|
|
16
|
+
from collections.abc import Generator
|
|
17
17
|
|
|
18
18
|
import torch
|
|
19
19
|
from compressed_tensors.config import SparsityCompressionConfig
|
|
@@ -59,15 +59,15 @@ class BaseCompressor(RegistryMixin, ABC):
|
|
|
59
59
|
"""
|
|
60
60
|
|
|
61
61
|
def __init__(
|
|
62
|
-
self, config:
|
|
62
|
+
self, config: SparsityCompressionConfig | QuantizationConfig | None = None
|
|
63
63
|
):
|
|
64
64
|
self.config = config
|
|
65
65
|
|
|
66
66
|
def compression_param_info(
|
|
67
67
|
self,
|
|
68
68
|
weight_shape: torch.Size,
|
|
69
|
-
quantization_args:
|
|
70
|
-
) ->
|
|
69
|
+
quantization_args: QuantizationArgs | None = None,
|
|
70
|
+
) -> dict[str, tuple[torch.Size, torch.dtype]]:
|
|
71
71
|
"""
|
|
72
72
|
Creates a dictionary of expected shapes and dtypes for each compression
|
|
73
73
|
parameter used by the compressor
|
|
@@ -80,7 +80,7 @@ class BaseCompressor(RegistryMixin, ABC):
|
|
|
80
80
|
|
|
81
81
|
@property
|
|
82
82
|
@abstractmethod
|
|
83
|
-
def compression_param_names(self) ->
|
|
83
|
+
def compression_param_names(self) -> tuple[str, ...]:
|
|
84
84
|
"""
|
|
85
85
|
Returns a tuple of compression parameter names introduced by
|
|
86
86
|
the compressor during compression
|
|
@@ -90,9 +90,9 @@ class BaseCompressor(RegistryMixin, ABC):
|
|
|
90
90
|
@abstractmethod
|
|
91
91
|
def compress(
|
|
92
92
|
self,
|
|
93
|
-
model_state:
|
|
93
|
+
model_state: dict[str, Tensor],
|
|
94
94
|
**kwargs,
|
|
95
|
-
) ->
|
|
95
|
+
) -> dict[str, Tensor]:
|
|
96
96
|
"""
|
|
97
97
|
Compresses a dense state dict
|
|
98
98
|
|
|
@@ -108,7 +108,7 @@ class BaseCompressor(RegistryMixin, ABC):
|
|
|
108
108
|
path_to_model_or_tensors: str,
|
|
109
109
|
device: str = "cpu",
|
|
110
110
|
**kwargs,
|
|
111
|
-
) -> Generator[
|
|
111
|
+
) -> Generator[tuple[str, Tensor], None, None]:
|
|
112
112
|
"""
|
|
113
113
|
Reads a compressed state dict located at path_to_model_or_tensors
|
|
114
114
|
and returns a generator for sequentially decompressing back to a
|
|
@@ -122,7 +122,7 @@ class BaseCompressor(RegistryMixin, ABC):
|
|
|
122
122
|
"""
|
|
123
123
|
raise NotImplementedError()
|
|
124
124
|
|
|
125
|
-
def compress_module(self, module: Module) ->
|
|
125
|
+
def compress_module(self, module: Module) -> dict[str, torch.Tensor] | None:
|
|
126
126
|
"""
|
|
127
127
|
Compresses a single quantized leaf PyTorch module. If the module is not
|
|
128
128
|
quantized, this function has no effect.
|
|
@@ -153,7 +153,7 @@ class BaseCompressor(RegistryMixin, ABC):
|
|
|
153
153
|
self,
|
|
154
154
|
weight: Tensor,
|
|
155
155
|
**kwargs,
|
|
156
|
-
) ->
|
|
156
|
+
) -> dict[str, torch.Tensor]:
|
|
157
157
|
"""
|
|
158
158
|
Compresses a single uncompressed weight
|
|
159
159
|
|
|
@@ -196,7 +196,7 @@ class BaseCompressor(RegistryMixin, ABC):
|
|
|
196
196
|
return decompressed_weight
|
|
197
197
|
|
|
198
198
|
def decompress_weight(
|
|
199
|
-
self, compressed_data:
|
|
199
|
+
self, compressed_data: dict[str, Tensor], **kwargs
|
|
200
200
|
) -> torch.Tensor:
|
|
201
201
|
"""
|
|
202
202
|
Decompresses a single compressed weight
|
|
@@ -12,8 +12,8 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from collections.abc import Generator
|
|
15
16
|
from pathlib import Path
|
|
16
|
-
from typing import Dict, Generator, Optional, Tuple, Union
|
|
17
17
|
|
|
18
18
|
import torch
|
|
19
19
|
from compressed_tensors.compressors import BaseCompressor
|
|
@@ -32,9 +32,9 @@ __all__ = [
|
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
def save_compressed(
|
|
35
|
-
tensors:
|
|
36
|
-
save_path:
|
|
37
|
-
compression_format:
|
|
35
|
+
tensors: dict[str, Tensor],
|
|
36
|
+
save_path: str | Path,
|
|
37
|
+
compression_format: CompressionFormat | None = None,
|
|
38
38
|
):
|
|
39
39
|
"""
|
|
40
40
|
Save compressed tensors to disk. If tensors are not compressed,
|
|
@@ -68,10 +68,10 @@ def save_compressed(
|
|
|
68
68
|
|
|
69
69
|
|
|
70
70
|
def load_compressed(
|
|
71
|
-
compressed_tensors:
|
|
71
|
+
compressed_tensors: str | Path,
|
|
72
72
|
compression_config: SparsityCompressionConfig = None,
|
|
73
|
-
device:
|
|
74
|
-
) -> Generator[
|
|
73
|
+
device: str | None = "cpu",
|
|
74
|
+
) -> Generator[tuple[str, Tensor], None, None]:
|
|
75
75
|
"""
|
|
76
76
|
Load compressed tensors from disk.
|
|
77
77
|
If tensors are not compressed, load them as is.
|
|
@@ -111,7 +111,7 @@ def load_compressed(
|
|
|
111
111
|
def save_compressed_model(
|
|
112
112
|
model: torch.nn.Module,
|
|
113
113
|
filename: str,
|
|
114
|
-
compression_format:
|
|
114
|
+
compression_format: CompressionFormat | None = None,
|
|
115
115
|
force_contiguous: bool = True,
|
|
116
116
|
):
|
|
117
117
|
"""
|
|
@@ -18,7 +18,7 @@ import operator
|
|
|
18
18
|
import os
|
|
19
19
|
import re
|
|
20
20
|
from copy import deepcopy
|
|
21
|
-
from typing import TYPE_CHECKING, Any,
|
|
21
|
+
from typing import TYPE_CHECKING, Any, TypeVar
|
|
22
22
|
|
|
23
23
|
import compressed_tensors
|
|
24
24
|
import torch
|
|
@@ -109,16 +109,16 @@ class ModelCompressor:
|
|
|
109
109
|
:param quantization_config: config specifying quantization compression parameters
|
|
110
110
|
"""
|
|
111
111
|
|
|
112
|
-
sparsity_config:
|
|
113
|
-
quantization_config:
|
|
114
|
-
transform_config:
|
|
112
|
+
sparsity_config: SparsityCompressionConfig | None = None
|
|
113
|
+
quantization_config: QuantizationConfig | None = None
|
|
114
|
+
transform_config: TransformConfig | None = None
|
|
115
115
|
|
|
116
116
|
@classmethod
|
|
117
117
|
def from_pretrained(
|
|
118
118
|
cls,
|
|
119
119
|
pretrained_model_name_or_path: str,
|
|
120
120
|
**kwargs,
|
|
121
|
-
) ->
|
|
121
|
+
) -> "ModelCompressor | None":
|
|
122
122
|
"""
|
|
123
123
|
Given a path to a model config, extract the sparsity and/or quantization
|
|
124
124
|
configs and load a ModelCompressor
|
|
@@ -133,7 +133,7 @@ class ModelCompressor:
|
|
|
133
133
|
@classmethod
|
|
134
134
|
def from_compression_config(
|
|
135
135
|
cls,
|
|
136
|
-
compression_config:
|
|
136
|
+
compression_config: "dict[str, Any] | CompressedTensorsConfig",
|
|
137
137
|
):
|
|
138
138
|
"""
|
|
139
139
|
:param compression_config:
|
|
@@ -172,10 +172,10 @@ class ModelCompressor:
|
|
|
172
172
|
def from_pretrained_model(
|
|
173
173
|
cls,
|
|
174
174
|
model: Module,
|
|
175
|
-
sparsity_config_or_format:
|
|
176
|
-
quantization_format:
|
|
177
|
-
sparsity_config:
|
|
178
|
-
) ->
|
|
175
|
+
sparsity_config_or_format: SparsityCompressionConfig | str | None = None,
|
|
176
|
+
quantization_format: str | None = None,
|
|
177
|
+
sparsity_config: SparsityCompressionConfig | str | None = None,
|
|
178
|
+
) -> "ModelCompressor | None":
|
|
179
179
|
"""
|
|
180
180
|
Given a pytorch model and optional sparsity and/or quantization configs,
|
|
181
181
|
load the appropriate compressors
|
|
@@ -232,8 +232,8 @@ class ModelCompressor:
|
|
|
232
232
|
|
|
233
233
|
@staticmethod
|
|
234
234
|
def parse_sparsity_config(
|
|
235
|
-
compression_config:
|
|
236
|
-
) ->
|
|
235
|
+
compression_config: "dict[str, Any] | CompressedTensorsConfig",
|
|
236
|
+
) -> dict[str, Any] | None:
|
|
237
237
|
"""
|
|
238
238
|
Parse sparsity config from quantization/compression config. Sparsity
|
|
239
239
|
config is nested inside q/c config
|
|
@@ -253,8 +253,8 @@ class ModelCompressor:
|
|
|
253
253
|
|
|
254
254
|
@staticmethod
|
|
255
255
|
def parse_quantization_config(
|
|
256
|
-
compression_config:
|
|
257
|
-
) ->
|
|
256
|
+
compression_config: "dict[str, Any] | CompressedTensorsConfig",
|
|
257
|
+
) -> dict[str, Any] | None:
|
|
258
258
|
"""
|
|
259
259
|
Parse quantization config from quantization/compression config. The
|
|
260
260
|
quantization are all the fields that are not the sparsity config or
|
|
@@ -289,7 +289,7 @@ class ModelCompressor:
|
|
|
289
289
|
|
|
290
290
|
return quantization_config
|
|
291
291
|
|
|
292
|
-
def _fetch_unique_quantization_formats(self) ->
|
|
292
|
+
def _fetch_unique_quantization_formats(self) -> list[str]:
|
|
293
293
|
"""
|
|
294
294
|
Get all unique compression formats present in a model.
|
|
295
295
|
:return: list of quantization formats
|
|
@@ -309,10 +309,10 @@ class ModelCompressor:
|
|
|
309
309
|
|
|
310
310
|
def __init__(
|
|
311
311
|
self,
|
|
312
|
-
sparsity_config:
|
|
313
|
-
quantization_config:
|
|
314
|
-
transform_config:
|
|
315
|
-
compression_formats:
|
|
312
|
+
sparsity_config: SparsityCompressionConfig | None = None,
|
|
313
|
+
quantization_config: QuantizationConfig | None = None,
|
|
314
|
+
transform_config: TransformConfig | None = None,
|
|
315
|
+
compression_formats: list[str] | None = None,
|
|
316
316
|
):
|
|
317
317
|
self.sparsity_config = sparsity_config
|
|
318
318
|
self.quantization_config = quantization_config
|
|
@@ -320,8 +320,8 @@ class ModelCompressor:
|
|
|
320
320
|
self.compression_formats = compression_formats
|
|
321
321
|
|
|
322
322
|
self.sparsity_compressor = None
|
|
323
|
-
self.quantization_compressor:
|
|
324
|
-
|
|
323
|
+
self.quantization_compressor: dict[
|
|
324
|
+
str, BaseQuantizationCompressor | DenseCompressor
|
|
325
325
|
] = None
|
|
326
326
|
# no transform compressor is required
|
|
327
327
|
|
|
@@ -345,7 +345,7 @@ class ModelCompressor:
|
|
|
345
345
|
format, config=quantization_config
|
|
346
346
|
)
|
|
347
347
|
|
|
348
|
-
def get_missing_module_keys(self, model: Module) ->
|
|
348
|
+
def get_missing_module_keys(self, model: Module) -> list[str]:
|
|
349
349
|
"""
|
|
350
350
|
Identifies the expected missing weight keys in the compressed state_dict.
|
|
351
351
|
|
|
@@ -394,7 +394,7 @@ class ModelCompressor:
|
|
|
394
394
|
|
|
395
395
|
return list(missing_keys)
|
|
396
396
|
|
|
397
|
-
def get_unexpected_file_keys(self, model: Module) ->
|
|
397
|
+
def get_unexpected_file_keys(self, model: Module) -> list[str]:
|
|
398
398
|
"""
|
|
399
399
|
Identifies extra keys introduced by the compression process in the
|
|
400
400
|
compressed state_dict that are not expected by the model graph.
|
|
@@ -625,9 +625,9 @@ class ModelCompressor:
|
|
|
625
625
|
def compress(
|
|
626
626
|
self,
|
|
627
627
|
model: Module,
|
|
628
|
-
state_dict:
|
|
628
|
+
state_dict: dict[str, Tensor] | None = None,
|
|
629
629
|
show_progress: bool = False,
|
|
630
|
-
) ->
|
|
630
|
+
) -> dict[str, Tensor]:
|
|
631
631
|
"""
|
|
632
632
|
Compresses a dense state dict or model with sparsity and/or quantization
|
|
633
633
|
|
|
@@ -656,7 +656,7 @@ class ModelCompressor:
|
|
|
656
656
|
)
|
|
657
657
|
|
|
658
658
|
if self.sparsity_compressor is not None:
|
|
659
|
-
sparse_compression_targets:
|
|
659
|
+
sparse_compression_targets: set[str] = {
|
|
660
660
|
module_name
|
|
661
661
|
for module_name, _module in match_named_modules(
|
|
662
662
|
model=model,
|
|
@@ -732,7 +732,7 @@ class ModelCompressor:
|
|
|
732
732
|
QuantizationStatus.FROZEN,
|
|
733
733
|
):
|
|
734
734
|
apply_quantization_config(model, self.quantization_config)
|
|
735
|
-
names_to_scheme:
|
|
735
|
+
names_to_scheme: dict[str, QuantizationScheme] = {
|
|
736
736
|
name: getattr(module, "quantization_scheme")
|
|
737
737
|
for name, module in model.named_modules()
|
|
738
738
|
if getattr(module, "quantization_scheme", None) is not None
|
|
@@ -897,7 +897,7 @@ class ModelCompressor:
|
|
|
897
897
|
update_parameter_data(module, param_data, param_name)
|
|
898
898
|
|
|
899
899
|
|
|
900
|
-
def map_module_to_scheme(model: Module) ->
|
|
900
|
+
def map_module_to_scheme(model: Module) -> dict[str, QuantizationScheme]:
|
|
901
901
|
"""
|
|
902
902
|
Returns a dictionary which maps quantized module names to their quantization
|
|
903
903
|
schemes. Only includes modules with weight quantization
|
|
@@ -13,8 +13,9 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import logging
|
|
16
|
+
from collections.abc import Generator
|
|
16
17
|
from pathlib import Path
|
|
17
|
-
from typing import Any
|
|
18
|
+
from typing import Any
|
|
18
19
|
|
|
19
20
|
import torch
|
|
20
21
|
from compressed_tensors.compressors.base import BaseCompressor
|
|
@@ -68,12 +69,12 @@ class BaseQuantizationCompressor(BaseCompressor):
|
|
|
68
69
|
|
|
69
70
|
def compress(
|
|
70
71
|
self,
|
|
71
|
-
model_state:
|
|
72
|
-
names_to_scheme:
|
|
72
|
+
model_state: dict[str, Tensor],
|
|
73
|
+
names_to_scheme: dict[str, QuantizationScheme],
|
|
73
74
|
show_progress: bool = False,
|
|
74
75
|
compression_device: str = "cpu",
|
|
75
76
|
**kwargs,
|
|
76
|
-
) ->
|
|
77
|
+
) -> dict[str, Tensor]:
|
|
77
78
|
"""
|
|
78
79
|
Compresses a dense state dict
|
|
79
80
|
|
|
@@ -141,7 +142,7 @@ class BaseQuantizationCompressor(BaseCompressor):
|
|
|
141
142
|
return isinstance(self, NVFP4PackedCompressor)
|
|
142
143
|
|
|
143
144
|
def _skip_zp(
|
|
144
|
-
self, name: str, names_to_scheme:
|
|
145
|
+
self, name: str, names_to_scheme: dict[str, QuantizationScheme]
|
|
145
146
|
) -> bool:
|
|
146
147
|
from compressed_tensors.compressors import PackedQuantizationCompressor
|
|
147
148
|
|
|
@@ -169,10 +170,10 @@ class BaseQuantizationCompressor(BaseCompressor):
|
|
|
169
170
|
|
|
170
171
|
def decompress(
|
|
171
172
|
self,
|
|
172
|
-
path_to_model_or_tensors:
|
|
173
|
-
names_to_scheme:
|
|
173
|
+
path_to_model_or_tensors: str | Path | dict[str, Any],
|
|
174
|
+
names_to_scheme: dict[str, QuantizationScheme],
|
|
174
175
|
device: str = "cpu",
|
|
175
|
-
) -> Generator[
|
|
176
|
+
) -> Generator[tuple[str, Tensor], None, None]:
|
|
176
177
|
"""
|
|
177
178
|
Reads a compressed state dict located at path_to_model_or_tensors
|
|
178
179
|
and returns a generator for sequentially decompressing back to a
|
|
@@ -196,8 +197,8 @@ class BaseQuantizationCompressor(BaseCompressor):
|
|
|
196
197
|
|
|
197
198
|
def _decompress_from_path(
|
|
198
199
|
self,
|
|
199
|
-
path_to_model:
|
|
200
|
-
names_to_scheme:
|
|
200
|
+
path_to_model: str | Path | dict[str, Any],
|
|
201
|
+
names_to_scheme: dict[str, QuantizationScheme],
|
|
201
202
|
device: str,
|
|
202
203
|
):
|
|
203
204
|
weight_mappings = get_nested_weight_mappings(
|
|
@@ -219,9 +220,9 @@ class BaseQuantizationCompressor(BaseCompressor):
|
|
|
219
220
|
|
|
220
221
|
def decompress_from_state_dict(
|
|
221
222
|
self,
|
|
222
|
-
state_dict:
|
|
223
|
-
names_to_scheme:
|
|
224
|
-
) -> Generator[
|
|
223
|
+
state_dict: dict[str, torch.Tensor],
|
|
224
|
+
names_to_scheme: dict[str, QuantizationScheme],
|
|
225
|
+
) -> Generator[tuple[str, dict[str, torch.Tensor]], None, None]:
|
|
225
226
|
weight_mappings = get_nested_mappings_from_state_dict(
|
|
226
227
|
state_dict, self.compression_param_names
|
|
227
228
|
)
|
|
@@ -239,9 +240,9 @@ class BaseQuantizationCompressor(BaseCompressor):
|
|
|
239
240
|
def decompress_module_from_state_dict(
|
|
240
241
|
self,
|
|
241
242
|
prefix: str,
|
|
242
|
-
state_dict:
|
|
243
|
+
state_dict: dict[str, torch.Tensor],
|
|
243
244
|
scheme: QuantizationScheme,
|
|
244
|
-
) ->
|
|
245
|
+
) -> dict[str, torch.Tensor]:
|
|
245
246
|
"""
|
|
246
247
|
Only used by in-memory decompression pathways to decompress the parameters of
|
|
247
248
|
one module
|
|
@@ -13,8 +13,6 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
from typing import Dict, Optional, Tuple
|
|
17
|
-
|
|
18
16
|
import torch
|
|
19
17
|
from compressed_tensors.compressors.base import BaseCompressor
|
|
20
18
|
from compressed_tensors.compressors.quantized_compressors.base import (
|
|
@@ -48,7 +46,7 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
|
|
|
48
46
|
"""
|
|
49
47
|
|
|
50
48
|
@property
|
|
51
|
-
def compression_param_names(self) ->
|
|
49
|
+
def compression_param_names(self) -> tuple[str, ...]:
|
|
52
50
|
"""
|
|
53
51
|
Returns a tuple of compression parameter names introduced by
|
|
54
52
|
the compressor during compression
|
|
@@ -63,8 +61,8 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
|
|
|
63
61
|
def compression_param_info(
|
|
64
62
|
self,
|
|
65
63
|
weight_shape: torch.Size,
|
|
66
|
-
quantization_args:
|
|
67
|
-
) ->
|
|
64
|
+
quantization_args: QuantizationArgs | None = None,
|
|
65
|
+
) -> dict[str, tuple[torch.Size, torch.dtype]]:
|
|
68
66
|
"""
|
|
69
67
|
Creates a dictionary of expected shapes and dtypes for each compression
|
|
70
68
|
parameter used by the compressor
|
|
@@ -85,7 +83,7 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
|
|
|
85
83
|
self,
|
|
86
84
|
scale: Tensor,
|
|
87
85
|
quantization_args: QuantizationArgs,
|
|
88
|
-
) ->
|
|
86
|
+
) -> dict[str, torch.Tensor]:
|
|
89
87
|
assert quantization_args.scale_dtype is not None
|
|
90
88
|
return scale.to(quantization_args.scale_dtype)
|
|
91
89
|
|
|
@@ -95,10 +93,10 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
|
|
|
95
93
|
scale: Tensor,
|
|
96
94
|
global_scale: Tensor,
|
|
97
95
|
quantization_args: QuantizationArgs,
|
|
98
|
-
device:
|
|
99
|
-
zero_point:
|
|
100
|
-
g_idx:
|
|
101
|
-
) ->
|
|
96
|
+
device: torch.device | None = None,
|
|
97
|
+
zero_point: torch.Tensor | None = None,
|
|
98
|
+
g_idx: torch.Tensor | None = None,
|
|
99
|
+
) -> dict[str, torch.Tensor]:
|
|
102
100
|
quantized_weight = quantize(
|
|
103
101
|
x=weight,
|
|
104
102
|
scale=scale,
|
|
@@ -118,8 +116,8 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
|
|
|
118
116
|
|
|
119
117
|
def decompress_weight(
|
|
120
118
|
self,
|
|
121
|
-
compressed_data:
|
|
122
|
-
quantization_args:
|
|
119
|
+
compressed_data: dict[str, Tensor],
|
|
120
|
+
quantization_args: QuantizationArgs | None = None,
|
|
123
121
|
) -> torch.Tensor:
|
|
124
122
|
weight = compressed_data["weight_packed"]
|
|
125
123
|
global_scale = compressed_data["weight_global_scale"]
|
|
@@ -149,15 +147,15 @@ class MXFP4PackedCompressor(NVFP4PackedCompressor):
|
|
|
149
147
|
self,
|
|
150
148
|
scale: Tensor,
|
|
151
149
|
quantization_args: QuantizationArgs,
|
|
152
|
-
) ->
|
|
150
|
+
) -> dict[str, torch.Tensor]:
|
|
153
151
|
assert quantization_args.scale_dtype is not None
|
|
154
152
|
scale_exp = 127 + torch.floor(torch.log2(scale)).to(torch.int32)
|
|
155
153
|
return scale_exp.to(quantization_args.scale_dtype)
|
|
156
154
|
|
|
157
155
|
def decompress_weight(
|
|
158
156
|
self,
|
|
159
|
-
compressed_data:
|
|
160
|
-
quantization_args:
|
|
157
|
+
compressed_data: dict[str, Tensor],
|
|
158
|
+
quantization_args: QuantizationArgs | None = None,
|
|
161
159
|
) -> torch.Tensor:
|
|
162
160
|
raise NotImplementedError("MXFP4 Decompression is currently not supported")
|
|
163
161
|
|
|
@@ -216,7 +214,7 @@ kE2M1ToFloat = torch.tensor(
|
|
|
216
214
|
# reference: : https://github.com/vllm-project/vllm/pull/16362
|
|
217
215
|
@torch.compile(fullgraph=True, dynamic=True)
|
|
218
216
|
def unpack_fp4_from_uint8(
|
|
219
|
-
a: torch.Tensor, m: int, n: int, dtype:
|
|
217
|
+
a: torch.Tensor, m: int, n: int, dtype: torch.dtype | None = torch.bfloat16
|
|
220
218
|
) -> torch.Tensor:
|
|
221
219
|
"""
|
|
222
220
|
Unpacks uint8 values into fp4. Each uint8 consists of two fp4 values
|
|
@@ -12,8 +12,6 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from typing import Dict, Optional, Tuple
|
|
16
|
-
|
|
17
15
|
import torch
|
|
18
16
|
from compressed_tensors.compressors.base import BaseCompressor
|
|
19
17
|
from compressed_tensors.compressors.quantized_compressors.base import (
|
|
@@ -42,7 +40,7 @@ class NaiveQuantizationCompressor(BaseQuantizationCompressor):
|
|
|
42
40
|
"""
|
|
43
41
|
|
|
44
42
|
@property
|
|
45
|
-
def compression_param_names(self) ->
|
|
43
|
+
def compression_param_names(self) -> tuple[str, ...]:
|
|
46
44
|
"""
|
|
47
45
|
Returns a tuple of compression parameter names introduced by
|
|
48
46
|
the compressor during compression
|
|
@@ -57,8 +55,8 @@ class NaiveQuantizationCompressor(BaseQuantizationCompressor):
|
|
|
57
55
|
def compression_param_info(
|
|
58
56
|
self,
|
|
59
57
|
weight_shape: torch.Size,
|
|
60
|
-
quantization_args:
|
|
61
|
-
) ->
|
|
58
|
+
quantization_args: QuantizationArgs | None = None,
|
|
59
|
+
) -> dict[str, tuple[torch.Size, torch.dtype]]:
|
|
62
60
|
"""
|
|
63
61
|
Creates a dictionary of expected shapes and dtypes for each compression
|
|
64
62
|
parameter used by the compressor
|
|
@@ -75,11 +73,11 @@ class NaiveQuantizationCompressor(BaseQuantizationCompressor):
|
|
|
75
73
|
weight: Tensor,
|
|
76
74
|
scale: Tensor,
|
|
77
75
|
quantization_args: QuantizationArgs,
|
|
78
|
-
zero_point:
|
|
79
|
-
g_idx:
|
|
80
|
-
device:
|
|
81
|
-
global_scale:
|
|
82
|
-
) ->
|
|
76
|
+
zero_point: Tensor | None = None,
|
|
77
|
+
g_idx: torch.Tensor | None = None,
|
|
78
|
+
device: torch.device | None = None,
|
|
79
|
+
global_scale: torch.Tensor | None = None,
|
|
80
|
+
) -> dict[str, torch.Tensor]:
|
|
83
81
|
"""
|
|
84
82
|
Compresses a single uncompressed weight
|
|
85
83
|
|
|
@@ -115,8 +113,8 @@ class NaiveQuantizationCompressor(BaseQuantizationCompressor):
|
|
|
115
113
|
|
|
116
114
|
def decompress_weight(
|
|
117
115
|
self,
|
|
118
|
-
compressed_data:
|
|
119
|
-
quantization_args:
|
|
116
|
+
compressed_data: dict[str, Tensor],
|
|
117
|
+
quantization_args: QuantizationArgs | None = None,
|
|
120
118
|
) -> torch.Tensor:
|
|
121
119
|
"""
|
|
122
120
|
Decompresses a single compressed weight
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import math
|
|
15
|
-
from typing import
|
|
15
|
+
from typing import Literal
|
|
16
16
|
|
|
17
17
|
import torch
|
|
18
18
|
from compressed_tensors.compressors.base import BaseCompressor
|
|
@@ -36,7 +36,7 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
|
|
|
36
36
|
"""
|
|
37
37
|
|
|
38
38
|
@property
|
|
39
|
-
def compression_param_names(self) ->
|
|
39
|
+
def compression_param_names(self) -> tuple[str, ...]:
|
|
40
40
|
"""
|
|
41
41
|
Returns a tuple of compression parameter names introduced by
|
|
42
42
|
the compressor during compression
|
|
@@ -52,8 +52,8 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
|
|
|
52
52
|
def compression_param_info(
|
|
53
53
|
self,
|
|
54
54
|
weight_shape: torch.Size,
|
|
55
|
-
quantization_args:
|
|
56
|
-
) ->
|
|
55
|
+
quantization_args: QuantizationArgs | None = None,
|
|
56
|
+
) -> dict[str, tuple[torch.Size, torch.dtype]]:
|
|
57
57
|
"""
|
|
58
58
|
Creates a dictionary of expected shapes and dtypes for each compression
|
|
59
59
|
parameter used by the compressor
|
|
@@ -90,11 +90,11 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
|
|
|
90
90
|
weight: Tensor,
|
|
91
91
|
scale: Tensor,
|
|
92
92
|
quantization_args: QuantizationArgs,
|
|
93
|
-
zero_point:
|
|
94
|
-
g_idx:
|
|
95
|
-
device:
|
|
96
|
-
global_scale:
|
|
97
|
-
) ->
|
|
93
|
+
zero_point: Tensor | None = None,
|
|
94
|
+
g_idx: torch.Tensor | None = None,
|
|
95
|
+
device: torch.device | None = None,
|
|
96
|
+
global_scale: torch.Tensor | None = None,
|
|
97
|
+
) -> dict[str, torch.Tensor]:
|
|
98
98
|
"""
|
|
99
99
|
Compresses a single uncompressed weight
|
|
100
100
|
|
|
@@ -146,8 +146,8 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
|
|
|
146
146
|
|
|
147
147
|
def decompress_weight(
|
|
148
148
|
self,
|
|
149
|
-
compressed_data:
|
|
150
|
-
quantization_args:
|
|
149
|
+
compressed_data: dict[str, Tensor],
|
|
150
|
+
quantization_args: QuantizationArgs | None = None,
|
|
151
151
|
) -> torch.Tensor:
|
|
152
152
|
"""
|
|
153
153
|
Decompresses a single compressed weight
|
|
@@ -190,7 +190,7 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
|
|
|
190
190
|
def pack_to_int32(
|
|
191
191
|
value: torch.Tensor,
|
|
192
192
|
num_bits: int,
|
|
193
|
-
packed_dim:
|
|
193
|
+
packed_dim: Literal[0, 1] = 1,
|
|
194
194
|
) -> torch.Tensor:
|
|
195
195
|
"""
|
|
196
196
|
Packs a tensor of quantized weights stored in int8 into int32s with padding
|
|
@@ -254,7 +254,7 @@ def unpack_from_int32(
|
|
|
254
254
|
value: torch.Tensor,
|
|
255
255
|
num_bits: int,
|
|
256
256
|
shape: torch.Size,
|
|
257
|
-
packed_dim:
|
|
257
|
+
packed_dim: Literal[0, 1] = 1,
|
|
258
258
|
) -> torch.Tensor:
|
|
259
259
|
"""
|
|
260
260
|
Unpacks a tensor of packed int32 weights into individual int8s, maintaining the
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import logging
|
|
16
|
-
from
|
|
16
|
+
from collections.abc import Generator
|
|
17
17
|
|
|
18
18
|
from compressed_tensors.compressors.base import BaseCompressor
|
|
19
19
|
from compressed_tensors.utils import (
|
|
@@ -65,10 +65,10 @@ class BaseSparseCompressor(BaseCompressor):
|
|
|
65
65
|
|
|
66
66
|
def compress(
|
|
67
67
|
self,
|
|
68
|
-
model_state:
|
|
69
|
-
compression_targets:
|
|
68
|
+
model_state: dict[str, Tensor],
|
|
69
|
+
compression_targets: set[str] | None = None,
|
|
70
70
|
show_progress: bool = False,
|
|
71
|
-
) ->
|
|
71
|
+
) -> dict[str, Tensor]:
|
|
72
72
|
"""
|
|
73
73
|
Compresses a dense state dict using bitmask compression
|
|
74
74
|
|
|
@@ -110,9 +110,9 @@ class BaseSparseCompressor(BaseCompressor):
|
|
|
110
110
|
self,
|
|
111
111
|
path_to_model_or_tensors: str,
|
|
112
112
|
device: str = "cpu",
|
|
113
|
-
params_to_skip_load:
|
|
113
|
+
params_to_skip_load: tuple | None = None,
|
|
114
114
|
**kwargs,
|
|
115
|
-
) -> Generator[
|
|
115
|
+
) -> Generator[tuple[str, Tensor], None, None]:
|
|
116
116
|
"""
|
|
117
117
|
Reads a bitmask compressed state dict located
|
|
118
118
|
at path_to_model_or_tensors and returns a generator
|
|
@@ -157,8 +157,8 @@ class BaseSparseCompressor(BaseCompressor):
|
|
|
157
157
|
|
|
158
158
|
def decompress_from_state_dict(
|
|
159
159
|
self,
|
|
160
|
-
state_dict:
|
|
161
|
-
) -> Generator[
|
|
160
|
+
state_dict: dict[str, Tensor],
|
|
161
|
+
) -> Generator[tuple[str, dict[str, Tensor]], None, None]:
|
|
162
162
|
"""
|
|
163
163
|
Decompress the state dict of a module (or model)
|
|
164
164
|
|
|
@@ -185,7 +185,7 @@ class BaseSparseCompressor(BaseCompressor):
|
|
|
185
185
|
yield ignored_param_path, ignored_param_value
|
|
186
186
|
|
|
187
187
|
@staticmethod
|
|
188
|
-
def should_compress(name: str, expanded_targets:
|
|
188
|
+
def should_compress(name: str, expanded_targets: set[str] | None = None) -> bool:
|
|
189
189
|
"""
|
|
190
190
|
Check if a parameter should be compressed.
|
|
191
191
|
Currently, this only returns True for weight parameters.
|