compressed-tensors 0.13.0__tar.gz → 0.13.1a20260108__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {compressed_tensors-0.13.0/src/compressed_tensors.egg-info → compressed_tensors-0.13.1a20260108}/PKG-INFO +1 -1
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/compressors/base.py +8 -2
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +4 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/compressors/quantized_compressors/fp4_quantized.py +5 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +4 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/version.py +2 -2
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108/src/compressed_tensors.egg-info}/PKG-INFO +1 -1
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/.github/.gitkeep +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/.github/actions/test/action.yml +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/.github/scripts/step-status +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/.github/workflows/quality-check.yaml +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/.github/workflows/test-check.yaml +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/.gitignore +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/LICENSE +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/Makefile +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/README.md +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/examples/bit_packing/int4_config.json +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/examples/bitmask_compression.ipynb +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/examples/llama_1.1b/ex_config_quantization.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/examples/llama_1.1b/example_quant_config.json +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/examples/quantize_and_pack_int4.ipynb +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/pyproject.toml +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/setup.cfg +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/setup.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/README.md +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/base.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/compressors/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/compressors/helpers.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/config/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/config/base.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/config/dense.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/config/format.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/linear/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/linear/compressed_linear.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/logger.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/modeling/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/modeling/attention.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/modeling/kvcache.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/quantization/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/quantization/quant_args.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/quantization/quant_config.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/quantization/quant_metadata.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/quantization/utils/mxfp4_utils.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/registry/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/registry/registry.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/transform/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/transform/apply.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/transform/factory/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/transform/factory/base.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/transform/factory/hadamard.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/transform/factory/matrix_multiply.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/transform/factory/random_hadamard.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/transform/transform_args.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/transform/transform_config.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/transform/transform_scheme.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/transform/utils/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/transform/utils/hadamard.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/transform/utils/hadamards.safetensors +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/transform/utils/matrix.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/utils/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/utils/helpers.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/utils/internal.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/utils/match.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/utils/offload.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/utils/permutations_24.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/utils/safetensors_load.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/utils/type.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors.egg-info/SOURCES.txt +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors.egg-info/requires.txt +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors.egg-info/top_level.txt +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/conftest.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/mock_observer.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_compressors/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_compressors/model_compressors/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_compressors/quantized_compressors/test_fp4_quant.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_compressors/quantized_compressors/test_packed_asym_decompression.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_configs/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_configs/test_base.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_configs/test_infer_quant.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_linear/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_linear/test_compressed_linear.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_modeling/test_attention_and_cache.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_quantization/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_quantization/lifecycle/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_quantization/lifecycle/conftest.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_quantization/lifecycle/test_apply.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_quantization/lifecycle/test_forward.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_quantization/lifecycle/test_initialize.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_quantization/lifecycle/test_static_lifecycle.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_quantization/test_configs/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_quantization/test_configs/test_strategies.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_quantization/test_quant_args.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_quantization/test_quant_config.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_quantization/test_quant_scheme.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_quantization/test_utils/test_helpers.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_quantization/test_utils/test_mxfp4_utils.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_registry.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_transform/conftest.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_transform/factory/test_correctness.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_transform/factory/test_memory.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_transform/factory/test_serialization.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_transform/test_transform_args.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_transform/test_transform_config.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_transform/test_transform_scheme.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_transform/utils/test_hadamard.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_utils/__init__.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_utils/test_helpers.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_utils/test_match.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_utils/test_offload.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_utils/test_safetensors_load.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_utils/test_type.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/testing_utils.py +0 -0
- {compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/utils/copyright.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: compressed-tensors
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.1a20260108
|
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
|
5
5
|
Home-page: https://github.com/vllm-project/compressed-tensors
|
|
6
6
|
Author: Neuralmagic, Inc.
|
|
@@ -19,7 +19,7 @@ import torch
|
|
|
19
19
|
from compressed_tensors.config import SparsityCompressionConfig
|
|
20
20
|
from compressed_tensors.quantization import QuantizationArgs, QuantizationConfig
|
|
21
21
|
from compressed_tensors.registry import RegistryMixin
|
|
22
|
-
from compressed_tensors.utils import has_offloaded_params
|
|
22
|
+
from compressed_tensors.utils import has_offloaded_params, register_offload_parameter
|
|
23
23
|
from torch import Tensor
|
|
24
24
|
from torch.nn import Module
|
|
25
25
|
|
|
@@ -185,10 +185,16 @@ class BaseCompressor(RegistryMixin, ABC):
|
|
|
185
185
|
for name, parameter in module.named_parameters():
|
|
186
186
|
compressed_data[name] = parameter
|
|
187
187
|
|
|
188
|
-
|
|
188
|
+
decompressed_weight = self.decompress_weight(
|
|
189
189
|
compressed_data=compressed_data, quantization_args=quantization_args
|
|
190
190
|
).to(device)
|
|
191
191
|
|
|
192
|
+
for name in ("weight_scale", "weight_zero_point"):
|
|
193
|
+
if hasattr(module, name):
|
|
194
|
+
register_offload_parameter(module, name, compressed_data[name])
|
|
195
|
+
|
|
196
|
+
return decompressed_weight
|
|
197
|
+
|
|
192
198
|
def decompress_weight(
|
|
193
199
|
self, compressed_data: Dict[str, Tensor], **kwargs
|
|
194
200
|
) -> torch.Tensor:
|
|
@@ -36,6 +36,7 @@ from compressed_tensors.config import CompressionFormat, SparsityCompressionConf
|
|
|
36
36
|
from compressed_tensors.config.format import (
|
|
37
37
|
infer_and_set_per_module_quantization_format,
|
|
38
38
|
)
|
|
39
|
+
from compressed_tensors.linear.compressed_linear import CompressedLinear
|
|
39
40
|
from compressed_tensors.quantization import (
|
|
40
41
|
DEFAULT_QUANTIZATION_METHOD,
|
|
41
42
|
QuantizationConfig,
|
|
@@ -474,6 +475,9 @@ class ModelCompressor:
|
|
|
474
475
|
),
|
|
475
476
|
desc="Compressing model",
|
|
476
477
|
):
|
|
478
|
+
if isinstance(module, CompressedLinear):
|
|
479
|
+
continue # already compressed
|
|
480
|
+
|
|
477
481
|
module_device = get_execution_device(module)
|
|
478
482
|
is_meta = module_device.type == "meta"
|
|
479
483
|
|
|
@@ -127,6 +127,11 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
|
|
|
127
127
|
m, n = weight.shape
|
|
128
128
|
# TODO: use a user provided dequant dtype
|
|
129
129
|
unpacked = unpack_fp4_from_uint8(weight, m, n * 2)
|
|
130
|
+
|
|
131
|
+
# decompress scale
|
|
132
|
+
scale = scale.to(unpacked.dtype)
|
|
133
|
+
compressed_data["weight_scale"] = torch.nn.Parameter(scale, requires_grad=False)
|
|
134
|
+
|
|
130
135
|
decompressed_weight = dequantize(
|
|
131
136
|
x_q=unpacked, scale=scale, global_scale=global_scale, dtype=unpacked.dtype
|
|
132
137
|
)
|
|
@@ -175,6 +175,10 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
|
|
|
175
175
|
zero_point = unpack_from_int32(
|
|
176
176
|
zero_point, num_bits, original_zp_shape, packed_dim=0
|
|
177
177
|
)
|
|
178
|
+
# Update the compressed_data dict with unpacked zero_point
|
|
179
|
+
compressed_data["weight_zero_point"] = torch.nn.Parameter(
|
|
180
|
+
zero_point, requires_grad=False
|
|
181
|
+
)
|
|
178
182
|
|
|
179
183
|
decompressed_weight = dequantize(
|
|
180
184
|
x_q=unpacked, scale=scale, zero_point=zero_point, g_idx=g_idx
|
{compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/version.py
RENAMED
|
@@ -17,5 +17,5 @@ __version__: str
|
|
|
17
17
|
__version_tuple__: VERSION_TUPLE
|
|
18
18
|
version_tuple: VERSION_TUPLE
|
|
19
19
|
|
|
20
|
-
__version__ = version = '0.13.
|
|
21
|
-
__version_tuple__ = version_tuple = (0, 13,
|
|
20
|
+
__version__ = version = '0.13.1.a20260108'
|
|
21
|
+
__version_tuple__ = version_tuple = (0, 13, 1)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: compressed-tensors
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.1a20260108
|
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
|
5
5
|
Home-page: https://github.com/vllm-project/compressed-tensors
|
|
6
6
|
Author: Neuralmagic, Inc.
|
|
File without changes
|
{compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/.github/actions/test/action.yml
RENAMED
|
File without changes
|
{compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/.github/scripts/step-status
RENAMED
|
File without changes
|
|
File without changes
|
{compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/.github/workflows/test-check.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/examples/bitmask_compression.ipynb
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/README.md
RENAMED
|
File without changes
|
{compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/__init__.py
RENAMED
|
File without changes
|
{compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/base.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/src/compressed_tensors/logger.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_compressors/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_configs/__init__.py
RENAMED
|
File without changes
|
{compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_configs/test_base.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_linear/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_quantization/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_transform/conftest.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_utils/__init__.py
RENAMED
|
File without changes
|
{compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_utils/test_helpers.py
RENAMED
|
File without changes
|
{compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_utils/test_match.py
RENAMED
|
File without changes
|
{compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_utils/test_offload.py
RENAMED
|
File without changes
|
|
File without changes
|
{compressed_tensors-0.13.0 → compressed_tensors-0.13.1a20260108}/tests/test_utils/test_type.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|