compressed-tensors 0.11.1a20250820__tar.gz → 0.11.1a20250821__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors-0.11.1a20250821/.github/workflows/quality-check.yaml +29 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/.github/workflows/test-check.yaml +3 -0
- {compressed_tensors-0.11.1a20250820/src/compressed_tensors.egg-info → compressed_tensors-0.11.1a20250821}/PKG-INFO +1 -1
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/setup.cfg +1 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +172 -153
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/compressors/quantized_compressors/base.py +2 -2
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py +4 -5
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +4 -3
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +1 -1
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +1 -1
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/quantization/lifecycle/apply.py +40 -129
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/quantization/lifecycle/forward.py +5 -4
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/quantization/lifecycle/initialize.py +7 -6
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/quantization/quant_args.py +7 -5
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/quantization/quant_scheme.py +4 -3
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/quantization/utils/helpers.py +0 -1
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/registry/registry.py +1 -1
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/transform/transform_config.py +1 -1
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/transform/utils/matrix.py +1 -1
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/utils/match.py +57 -8
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/utils/offload.py +0 -1
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/utils/safetensors_load.py +0 -1
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/version.py +1 -1
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821/src/compressed_tensors.egg-info}/PKG-INFO +1 -1
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors.egg-info/SOURCES.txt +1 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_examples/test_bitmask_compression_ipynb.py +1 -1
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_quantization/lifecycle/test_apply.py +6 -78
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_quantization/lifecycle/test_initialize.py +0 -1
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_quantization/test_quant_scheme.py +1 -1
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_transform/conftest.py +1 -1
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_transform/test_transform_config.py +1 -1
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_utils/test_match.py +108 -61
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_utils/test_offload.py +4 -4
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_utils/test_type.py +1 -1
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/utils/copyright.py +4 -1
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/.github/.gitkeep +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/.github/actions/test/action.yml +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/.github/scripts/step-status +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/.github/workflows/build-test.yml +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/.github/workflows/build.yml +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/.github/workflows/report.yml +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/.github/workflows/test.yml +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/.github/workflows/trigger-all.yml +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/.github/workflows/upload.yml +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/.gitignore +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/LICENSE +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/Makefile +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/README.md +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/examples/bit_packing/int4_config.json +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/examples/bitmask_compression.ipynb +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/examples/llama_1.1b/ex_config_quantization.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/examples/llama_1.1b/example_quant_config.json +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/examples/quantize_and_pack_int4.ipynb +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/pyproject.toml +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/setup.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/README.md +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/base.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/compressors/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/compressors/base.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/compressors/helpers.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/config/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/config/base.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/config/dense.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/linear/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/linear/compressed_linear.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/quantization/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/quantization/quant_config.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/registry/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/transform/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/transform/apply.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/transform/factory/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/transform/factory/base.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/transform/factory/hadamard.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/transform/factory/matrix_multiply.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/transform/factory/random_hadamard.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/transform/transform_args.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/transform/transform_scheme.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/transform/utils/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/transform/utils/hadamard.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/transform/utils/hadamards.safetensors +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/utils/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/utils/helpers.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/utils/internal.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/utils/permutations_24.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/utils/permute.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors/utils/type.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors.egg-info/requires.txt +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/src/compressed_tensors.egg-info/top_level.txt +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/conftest.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_compressors/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_compressors/model_compressors/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_compressors/quantized_compressors/test_nvfp4_quant.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_configs/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_configs/test_base.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_linear/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_linear/test_compressed_linear.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_quantization/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_quantization/lifecycle/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_quantization/lifecycle/conftest.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_quantization/lifecycle/test_forward.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_quantization/lifecycle/test_helpers.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_quantization/test_configs/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_quantization/test_configs/test_strategies.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_quantization/test_quant_args.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_quantization/test_quant_config.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_quantization/test_utils/test_helpers.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_registry.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_transform/factory/test_correctness.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_transform/factory/test_memory.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_transform/factory/test_serialization.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_transform/test_transform_args.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_transform/test_transform_scheme.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_transform/utils/test_hadamard.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_utils/__init__.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_utils/test_helpers.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/test_utils/test_safetensors_load.py +0 -0
- {compressed_tensors-0.11.1a20250820 → compressed_tensors-0.11.1a20250821}/tests/testing_utils.py +0 -0
@@ -0,0 +1,29 @@
|
|
1
|
+
name: Quality Checks
|
2
|
+
on:
|
3
|
+
push:
|
4
|
+
branches:
|
5
|
+
- main
|
6
|
+
- 'release/*'
|
7
|
+
pull_request:
|
8
|
+
branches:
|
9
|
+
- main
|
10
|
+
- 'release/*'
|
11
|
+
|
12
|
+
jobs:
|
13
|
+
quality-check:
|
14
|
+
runs-on: ubuntu-24.04
|
15
|
+
steps:
|
16
|
+
- uses: actions/setup-python@v5
|
17
|
+
with:
|
18
|
+
python-version: '3.10'
|
19
|
+
- uses: actions/checkout@v4
|
20
|
+
with:
|
21
|
+
fetch-depth: 0
|
22
|
+
fetch-tags: true
|
23
|
+
- name: Set Env
|
24
|
+
run: |
|
25
|
+
pip3 install --upgrade pip && pip3 install --upgrade setuptools
|
26
|
+
- name: "⚙️ Install dependencies"
|
27
|
+
run: pip3 install .[dev]
|
28
|
+
- name: "🧹 Running quality checks"
|
29
|
+
run: make quality
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: compressed-tensors
|
3
|
-
Version: 0.11.
|
3
|
+
Version: 0.11.1a20250821
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -42,8 +42,6 @@ from compressed_tensors.quantization import (
|
|
42
42
|
apply_quantization_config,
|
43
43
|
load_pretrained_quantization_parameters,
|
44
44
|
)
|
45
|
-
from compressed_tensors.quantization.lifecycle import expand_target_names
|
46
|
-
from compressed_tensors.quantization.utils import is_module_quantized
|
47
45
|
from compressed_tensors.transform import TransformConfig
|
48
46
|
from compressed_tensors.utils import (
|
49
47
|
align_module_device,
|
@@ -60,6 +58,7 @@ from compressed_tensors.utils.helpers import (
|
|
60
58
|
fix_fsdp_module_name,
|
61
59
|
is_compressed_tensors_config,
|
62
60
|
)
|
61
|
+
from compressed_tensors.utils.match import match_named_modules
|
63
62
|
from torch import Tensor
|
64
63
|
from torch.nn import Module
|
65
64
|
from tqdm import tqdm
|
@@ -309,7 +308,7 @@ class ModelCompressor:
|
|
309
308
|
if quantization_config is not None:
|
310
309
|
# If a list of compression_format is not provided, we resolve the
|
311
310
|
# relevant quantization formats using the config groups from the config
|
312
|
-
# and if those are not defined, we fall-back to the global quantization
|
311
|
+
# and if those are not defined, we fall-back to the global quantization fmt
|
313
312
|
if not self.compression_formats:
|
314
313
|
self.compression_formats = self._fetch_unique_quantization_formats()
|
315
314
|
|
@@ -342,13 +341,15 @@ class ModelCompressor:
|
|
342
341
|
self.sparsity_compressor
|
343
342
|
and self.sparsity_config.format != CompressionFormat.dense.value
|
344
343
|
):
|
345
|
-
sparse_targets =
|
344
|
+
sparse_targets = match_named_modules(
|
346
345
|
model=model,
|
347
346
|
targets=self.sparsity_config.targets,
|
348
347
|
ignore=self.sparsity_config.ignore,
|
349
348
|
)
|
349
|
+
|
350
350
|
missing_keys.update(
|
351
|
-
merge_names(
|
351
|
+
merge_names(target_name, "weight")
|
352
|
+
for target_name, _module in sparse_targets
|
352
353
|
)
|
353
354
|
|
354
355
|
# Determine missing keys due to pack quantization
|
@@ -358,13 +359,14 @@ class ModelCompressor:
|
|
358
359
|
== CompressionFormat.pack_quantized.value
|
359
360
|
):
|
360
361
|
for scheme in self.quantization_config.config_groups.values():
|
361
|
-
quant_targets =
|
362
|
+
quant_targets = match_named_modules(
|
362
363
|
model=model,
|
363
364
|
targets=scheme.targets,
|
364
365
|
ignore=self.quantization_config.ignore,
|
365
366
|
)
|
366
367
|
missing_keys.update(
|
367
|
-
merge_names(
|
368
|
+
merge_names(target_name, "weight")
|
369
|
+
for target_name, _module in quant_targets
|
368
370
|
)
|
369
371
|
|
370
372
|
return list(missing_keys)
|
@@ -395,29 +397,29 @@ class ModelCompressor:
|
|
395
397
|
self.sparsity_compressor
|
396
398
|
and self.sparsity_config.format != CompressionFormat.dense.value
|
397
399
|
):
|
398
|
-
sparse_targets
|
400
|
+
sparse_targets = match_named_modules(
|
399
401
|
model=model,
|
400
402
|
targets=self.sparsity_config.targets,
|
401
403
|
ignore=self.sparsity_config.ignore,
|
402
404
|
)
|
403
405
|
unexpected_keys.update(
|
404
|
-
merge_names(
|
405
|
-
for
|
406
|
+
merge_names(target_name, param)
|
407
|
+
for target_name, _module in sparse_targets
|
406
408
|
for param in self.sparsity_compressor.compression_param_names
|
407
409
|
)
|
408
410
|
|
409
411
|
# Identify unexpected keys from quantization compression
|
410
412
|
if self.quantization_compressor:
|
411
413
|
for scheme in self.quantization_config.config_groups.values():
|
412
|
-
quant_targets
|
414
|
+
quant_targets = match_named_modules(
|
413
415
|
model=model,
|
414
416
|
targets=scheme.targets,
|
415
417
|
ignore=self.quantization_config.ignore,
|
416
418
|
)
|
417
419
|
for quant_compressor in self.quantization_compressor.values():
|
418
420
|
unexpected_keys.update(
|
419
|
-
merge_names(
|
420
|
-
for
|
421
|
+
merge_names(target_name, param)
|
422
|
+
for target_name, _module in quant_targets
|
421
423
|
for param in quant_compressor.compression_param_names
|
422
424
|
if param != "weight"
|
423
425
|
)
|
@@ -434,73 +436,79 @@ class ModelCompressor:
|
|
434
436
|
:param model: model containing parameters to compress
|
435
437
|
"""
|
436
438
|
module_to_scheme = map_module_to_scheme(model)
|
437
|
-
sparse_compression_targets
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
)
|
439
|
+
sparse_compression_targets = [
|
440
|
+
module_name
|
441
|
+
for module_name, _module in match_named_modules(
|
442
|
+
model=model,
|
443
|
+
targets=self.sparsity_config.targets if self.sparsity_config else [],
|
444
|
+
ignore=self.sparsity_config.ignore if self.sparsity_config else [],
|
445
|
+
)
|
446
|
+
]
|
447
|
+
for prefix, module in tqdm(
|
448
|
+
match_named_modules(
|
449
|
+
model,
|
450
|
+
[*sparse_compression_targets, *module_to_scheme.keys()],
|
451
|
+
warn_on_fail=True,
|
452
|
+
),
|
453
|
+
desc="Compressing model",
|
454
|
+
):
|
455
|
+
module_device = get_execution_device(module)
|
456
|
+
is_meta = module_device.type == "meta"
|
457
|
+
|
458
|
+
exec_device = "meta" if is_meta else "cpu"
|
459
|
+
onloading_device = "meta" if is_meta else module_device
|
460
|
+
|
461
|
+
# in the future, support compression on same device
|
462
|
+
with align_module_device(module, execution_device=exec_device):
|
463
|
+
state_dict = {
|
464
|
+
f"{prefix}.{name}": param
|
465
|
+
for name, param in module.named_parameters(recurse=False)
|
466
|
+
}
|
467
|
+
|
468
|
+
# quantization first
|
469
|
+
if prefix in module_to_scheme:
|
470
|
+
if (
|
471
|
+
not hasattr(module.quantization_scheme, "format")
|
472
|
+
or module.quantization_scheme.format is None
|
473
|
+
):
|
474
|
+
if len(self.compression_formats) > 1:
|
475
|
+
raise ValueError(
|
476
|
+
"Applying multiple compressors without defining "
|
477
|
+
"per module formats is not supported "
|
478
|
+
)
|
479
|
+
format = self.compression_formats[0]
|
480
|
+
else:
|
481
|
+
format = module.quantization_scheme.format
|
482
|
+
|
483
|
+
quant_compressor = self.quantization_compressor.get(format)
|
484
|
+
state_dict = quant_compressor.compress(
|
485
|
+
state_dict,
|
486
|
+
names_to_scheme=module_to_scheme,
|
487
|
+
show_progress=False,
|
488
|
+
compression_device=exec_device,
|
489
|
+
)
|
489
490
|
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
491
|
+
# sparsity second
|
492
|
+
if prefix in sparse_compression_targets:
|
493
|
+
state_dict = self.sparsity_compressor.compress(
|
494
|
+
state_dict,
|
495
|
+
compression_targets=sparse_compression_targets,
|
496
|
+
show_progress=False,
|
497
|
+
)
|
494
498
|
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
param = torch.nn.Parameter(value, requires_grad=False)
|
500
|
-
register_offload_parameter(module, name, param, offload_device)
|
499
|
+
# remove any existing parameters
|
500
|
+
offload_device = get_offloaded_device(module)
|
501
|
+
for name, _ in list(module.named_parameters(recurse=False)):
|
502
|
+
delete_offload_parameter(module, name)
|
501
503
|
|
502
|
-
|
504
|
+
# replace with compressed parameters
|
505
|
+
for name, value in state_dict.items():
|
506
|
+
name = name.removeprefix(f"{prefix}.")
|
507
|
+
value = value.to(onloading_device)
|
508
|
+
param = torch.nn.Parameter(value, requires_grad=False)
|
509
|
+
register_offload_parameter(module, name, param, offload_device)
|
503
510
|
|
511
|
+
module.quantization_status = QuantizationStatus.COMPRESSED
|
504
512
|
# TODO: consider sparse compression to also be compression
|
505
513
|
if (
|
506
514
|
self.quantization_config is not None
|
@@ -516,67 +524,75 @@ class ModelCompressor:
|
|
516
524
|
:param model: model containing parameters to compress
|
517
525
|
"""
|
518
526
|
module_to_scheme = map_module_to_scheme(model)
|
519
|
-
sparse_compression_targets
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
527
|
+
sparse_compression_targets = [
|
528
|
+
module_name
|
529
|
+
for module_name, _module in match_named_modules(
|
530
|
+
model=model,
|
531
|
+
targets=self.sparsity_config.targets if self.sparsity_config else [],
|
532
|
+
ignore=self.sparsity_config.ignore if self.sparsity_config else [],
|
533
|
+
)
|
534
|
+
]
|
535
|
+
|
536
|
+
for prefix, module in tqdm(
|
537
|
+
match_named_modules(
|
538
|
+
model,
|
539
|
+
[*sparse_compression_targets, *module_to_scheme.keys()],
|
540
|
+
warn_on_fail=True,
|
541
|
+
),
|
542
|
+
desc="Decompressing model",
|
543
|
+
):
|
544
|
+
# in the future, support decompression on same device
|
545
|
+
with align_module_device(module, execution_device="cpu"):
|
546
|
+
state_dict = {
|
547
|
+
f"{prefix}.{name}": param
|
548
|
+
for name, param in module.named_parameters(recurse=False)
|
549
|
+
}
|
550
|
+
|
551
|
+
# sparsity first
|
552
|
+
if prefix in sparse_compression_targets:
|
553
|
+
# sparse_compression_targets are automatically inferred by this fn
|
554
|
+
generator = self.sparsity_compressor.decompress_from_state_dict(
|
555
|
+
state_dict,
|
556
|
+
)
|
557
|
+
# generates (param_path, param_val)
|
558
|
+
# of compressed and unused params
|
559
|
+
state_dict = {key: value for key, value in generator}
|
560
|
+
|
561
|
+
# quantization second
|
562
|
+
if prefix in module_to_scheme:
|
563
|
+
if (
|
564
|
+
not hasattr(module.quantization_scheme, "format")
|
565
|
+
or module.quantization_scheme.format is None
|
566
|
+
):
|
567
|
+
if len(self.compression_formats) > 1:
|
568
|
+
raise ValueError(
|
569
|
+
"Applying multiple compressors without defining "
|
570
|
+
"per module formats is not supported "
|
571
|
+
)
|
572
|
+
format = self.compression_formats[0]
|
573
|
+
else:
|
574
|
+
format = module.quantization_scheme.format
|
575
|
+
quant_compressor = self.quantization_compressor.get(format)
|
576
|
+
state_dict = quant_compressor.decompress_module_from_state_dict(
|
577
|
+
prefix,
|
578
|
+
state_dict,
|
579
|
+
scheme=module_to_scheme[prefix],
|
580
|
+
)
|
565
581
|
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
582
|
+
# remove any existing parameters
|
583
|
+
exec_device = get_execution_device(module)
|
584
|
+
offload_device = get_offloaded_device(module)
|
585
|
+
for name, _ in list(module.named_parameters(recurse=False)):
|
586
|
+
delete_offload_parameter(module, name)
|
571
587
|
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
588
|
+
# replace with decompressed parameters
|
589
|
+
for name, value in state_dict.items():
|
590
|
+
name = name.removeprefix(f"{prefix}.")
|
591
|
+
value = value.to(exec_device)
|
592
|
+
param = torch.nn.Parameter(value, requires_grad=False)
|
593
|
+
register_offload_parameter(module, name, param, offload_device)
|
578
594
|
|
579
|
-
|
595
|
+
module.quantization_status = QuantizationStatus.FROZEN
|
580
596
|
|
581
597
|
# ----- state dict compression pathways ----- #
|
582
598
|
|
@@ -614,11 +630,14 @@ class ModelCompressor:
|
|
614
630
|
)
|
615
631
|
|
616
632
|
if self.sparsity_compressor is not None:
|
617
|
-
sparse_compression_targets: Set[str] =
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
633
|
+
sparse_compression_targets: Set[str] = {
|
634
|
+
module_name
|
635
|
+
for module_name, _module in match_named_modules(
|
636
|
+
model=model,
|
637
|
+
targets=self.sparsity_config.targets,
|
638
|
+
ignore=self.sparsity_config.ignore,
|
639
|
+
)
|
640
|
+
}
|
622
641
|
state_dict = self.sparsity_compressor.compress(
|
623
642
|
state_dict,
|
624
643
|
compression_targets=sparse_compression_targets,
|
@@ -641,11 +660,12 @@ class ModelCompressor:
|
|
641
660
|
:param model_path: path to compressed weights
|
642
661
|
:param model: pytorch model to load decompressed weights into
|
643
662
|
|
644
|
-
Note: decompress makes use of both _replace_sparsity_weights and
|
645
|
-
The variations in these methods are a result of the subtle
|
646
|
-
and quantization compressors. Specifically,
|
647
|
-
|
648
|
-
|
663
|
+
Note: decompress makes use of both _replace_sparsity_weights and
|
664
|
+
_replace_weights. The variations in these methods are a result of the subtle
|
665
|
+
variations between the sparsity and quantization compressors. Specifically,
|
666
|
+
quantization compressors return not just the decompressed weight, but the
|
667
|
+
quantization parameters (e.g scales, zero_point) whereas sparsity compressors
|
668
|
+
only return the decompressed weight.
|
649
669
|
|
650
670
|
"""
|
651
671
|
model_path = get_safetensors_folder(model_path)
|
@@ -683,18 +703,17 @@ class ModelCompressor:
|
|
683
703
|
with override_quantization_status(
|
684
704
|
self.quantization_config, QuantizationStatus.FROZEN
|
685
705
|
):
|
686
|
-
|
687
706
|
names_to_scheme = apply_quantization_config(
|
688
707
|
model, self.quantization_config
|
689
708
|
)
|
690
709
|
# Load activation scales/zp or any other quantization parameters
|
691
|
-
# Conditionally load the weight quantization parameters if we have a
|
692
|
-
#
|
710
|
+
# Conditionally load the weight quantization parameters if we have a
|
711
|
+
# dense compressor or if a sparsity compressor has already been applied
|
693
712
|
load_pretrained_quantization_parameters(
|
694
713
|
model,
|
695
714
|
model_path,
|
696
|
-
# TODO: all weight quantization params will be moved to the
|
697
|
-
# including initialization
|
715
|
+
# TODO: all weight quantization params will be moved to the
|
716
|
+
# compressor in a follow-up including initialization
|
698
717
|
load_weight_quantization=(
|
699
718
|
sparse_decompressed
|
700
719
|
or isinstance(quant_compressor, DenseCompressor)
|
@@ -786,7 +805,6 @@ class ModelCompressor:
|
|
786
805
|
:param model: The model whose weights are to be updated.
|
787
806
|
"""
|
788
807
|
for name, data in tqdm(dense_weight_generator, desc="Decompressing model"):
|
789
|
-
|
790
808
|
split_name = name.split(".")
|
791
809
|
prefix, param_name = ".".join(split_name[:-1]), split_name[-1]
|
792
810
|
module = operator.attrgetter(prefix)(model)
|
@@ -822,9 +840,10 @@ class ModelCompressor:
|
|
822
840
|
for param_name, param_data in data.items():
|
823
841
|
if hasattr(module, param_name):
|
824
842
|
# If compressed, will have an incorrect dtype for transformers >4.49
|
825
|
-
# TODO: we can also just skip initialization of scales/zp if in
|
826
|
-
# to be consistent with loading which happens
|
827
|
-
# however, update_data does a good shape check -
|
843
|
+
# TODO: we can also just skip initialization of scales/zp if in
|
844
|
+
# decompression in init to be consistent with loading which happens
|
845
|
+
# later as well however, update_data does a good shape check -
|
846
|
+
# should be moved to the compressor
|
828
847
|
if param_name == "weight":
|
829
848
|
delattr(module, param_name)
|
830
849
|
requires_grad = param_data.dtype in (
|
@@ -24,7 +24,6 @@ from compressed_tensors.utils import (
|
|
24
24
|
get_nested_weight_mappings,
|
25
25
|
merge_names,
|
26
26
|
)
|
27
|
-
from compressed_tensors.utils.safetensors_load import match_param_name
|
28
27
|
from safetensors import safe_open
|
29
28
|
from torch import Tensor
|
30
29
|
from tqdm import tqdm
|
@@ -107,7 +106,8 @@ class BaseQuantizationCompressor(BaseCompressor):
|
|
107
106
|
compressed_dict[name] = value.to(compression_device)
|
108
107
|
continue
|
109
108
|
|
110
|
-
# compress values on meta if loading from meta otherwise on cpu (memory
|
109
|
+
# compress values on meta if loading from meta otherwise on cpu (memory
|
110
|
+
# movement too expensive)
|
111
111
|
module_path = prefix[:-1] if prefix.endswith(".") else prefix
|
112
112
|
quant_args = names_to_scheme[module_path].weights
|
113
113
|
compressed_values = self.compress_weight(
|
@@ -15,7 +15,6 @@
|
|
15
15
|
|
16
16
|
from typing import Dict, Optional, Tuple
|
17
17
|
|
18
|
-
import numpy
|
19
18
|
import torch
|
20
19
|
from compressed_tensors.compressors.base import BaseCompressor
|
21
20
|
from compressed_tensors.compressors.quantized_compressors.base import (
|
@@ -92,7 +91,6 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
|
|
92
91
|
zero_point: Optional[torch.Tensor] = None,
|
93
92
|
g_idx: Optional[torch.Tensor] = None,
|
94
93
|
) -> Dict[str, torch.Tensor]:
|
95
|
-
|
96
94
|
quantized_weight = quantize(
|
97
95
|
x=weight,
|
98
96
|
scale=scale,
|
@@ -112,7 +110,6 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
|
|
112
110
|
compressed_data: Dict[str, Tensor],
|
113
111
|
quantization_args: Optional[QuantizationArgs] = None,
|
114
112
|
) -> torch.Tensor:
|
115
|
-
|
116
113
|
weight = compressed_data["weight_packed"]
|
117
114
|
scale = compressed_data["weight_scale"]
|
118
115
|
global_scale = compressed_data["weight_global_scale"]
|
@@ -175,14 +172,16 @@ kE2M1ToFloat = torch.tensor(
|
|
175
172
|
[0.0, 0.5, 1.0, 1.5, 2.0, 3.0, 4.0, 6.0], dtype=torch.float32
|
176
173
|
)
|
177
174
|
|
175
|
+
|
178
176
|
# reference: : https://github.com/vllm-project/vllm/pull/16362
|
179
177
|
def unpack_fp4_from_uint8(
|
180
178
|
a: torch.Tensor, m: int, n: int, dtype: Optional[torch.dtype] = torch.bfloat16
|
181
179
|
) -> torch.Tensor:
|
182
180
|
"""
|
183
181
|
Unpacks uint8 values into fp4. Each uint8 consists of two fp4 values
|
184
|
-
(i.e. first four bits correspond to one fp4 value, last four
|
185
|
-
fp4 value). The bits represent an index, which are mapped to an fp4
|
182
|
+
(i.e. first four bits correspond to one fp4 value, last four correspond to a
|
183
|
+
consecutive fp4 value). The bits represent an index, which are mapped to an fp4
|
184
|
+
value.
|
186
185
|
|
187
186
|
:param a: tensor to unpack
|
188
187
|
:param m: original dim 0 size of the unpacked tensor
|
@@ -14,7 +14,6 @@
|
|
14
14
|
import math
|
15
15
|
from typing import Dict, Literal, Optional, Tuple, Union
|
16
16
|
|
17
|
-
import numpy as np
|
18
17
|
import torch
|
19
18
|
from compressed_tensors.compressors.base import BaseCompressor
|
20
19
|
from compressed_tensors.compressors.quantized_compressors.base import (
|
@@ -135,7 +134,8 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
|
|
135
134
|
compressed_dict["weight_shape"] = weight_shape
|
136
135
|
compressed_dict["weight_packed"] = packed_weight
|
137
136
|
|
138
|
-
# We typically don't compress zp; apart from when using the packed_compressor
|
137
|
+
# We typically don't compress zp; apart from when using the packed_compressor
|
138
|
+
# and when storing group/channel zp
|
139
139
|
if not quantization_args.symmetric and quantization_args.strategy in [
|
140
140
|
QuantizationStrategy.GROUP.value,
|
141
141
|
QuantizationStrategy.CHANNEL.value,
|
@@ -166,7 +166,8 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
|
|
166
166
|
num_bits = quantization_args.num_bits
|
167
167
|
unpacked = unpack_from_int32(weight, num_bits, original_shape)
|
168
168
|
|
169
|
-
# NOTE: this will fail decompression as we don't currently handle packed zp on
|
169
|
+
# NOTE: this will fail decompression as we don't currently handle packed zp on
|
170
|
+
# decompression
|
170
171
|
if not quantization_args.symmetric and quantization_args.strategy in [
|
171
172
|
QuantizationStrategy.GROUP.value,
|
172
173
|
QuantizationStrategy.CHANNEL.value,
|
@@ -13,7 +13,7 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
from dataclasses import dataclass
|
16
|
-
from typing import Dict,
|
16
|
+
from typing import Dict, List, Tuple, Union
|
17
17
|
|
18
18
|
import torch
|
19
19
|
from compressed_tensors.compressors.base import BaseCompressor
|
@@ -48,7 +48,7 @@ class Marlin24Compressor(BaseCompressor):
|
|
48
48
|
|
49
49
|
@staticmethod
|
50
50
|
def validate_quant_compatability(
|
51
|
-
names_to_scheme: Dict[str, QuantizationScheme]
|
51
|
+
names_to_scheme: Dict[str, QuantizationScheme],
|
52
52
|
) -> bool:
|
53
53
|
"""
|
54
54
|
Checks if every quantized module in the model is compatible with Marlin24
|