compressed-tensors 0.12.3a20251212__tar.gz → 0.12.3a20251214__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {compressed_tensors-0.12.3a20251212/src/compressed_tensors.egg-info → compressed_tensors-0.12.3a20251214}/PKG-INFO +1 -1
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/compressors/base.py +1 -33
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/compressors/quantized_compressors/base.py +39 -24
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/compressors/quantized_compressors/fp4_quantized.py +3 -14
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +7 -35
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/version.py +1 -1
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214/src/compressed_tensors.egg-info}/PKG-INFO +1 -1
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/.github/.gitkeep +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/.github/actions/test/action.yml +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/.github/scripts/step-status +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/.github/workflows/quality-check.yaml +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/.github/workflows/test-check.yaml +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/.gitignore +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/LICENSE +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/Makefile +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/README.md +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/examples/bit_packing/int4_config.json +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/examples/bitmask_compression.ipynb +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/examples/llama_1.1b/ex_config_quantization.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/examples/llama_1.1b/example_quant_config.json +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/examples/quantize_and_pack_int4.ipynb +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/pyproject.toml +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/setup.cfg +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/setup.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/README.md +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/base.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/compressors/helpers.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/config/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/config/base.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/config/dense.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/config/format.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/linear/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/linear/compressed_linear.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/logger.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/modeling/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/modeling/attention.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/modeling/kvcache.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/quantization/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/quantization/quant_args.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/quantization/quant_config.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/quantization/quant_metadata.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/quantization/utils/mxfp4_utils.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/registry/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/registry/registry.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/transform/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/transform/apply.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/transform/factory/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/transform/factory/base.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/transform/factory/hadamard.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/transform/factory/matrix_multiply.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/transform/factory/random_hadamard.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/transform/transform_args.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/transform/transform_config.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/transform/transform_scheme.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/transform/utils/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/transform/utils/hadamard.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/transform/utils/hadamards.safetensors +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/transform/utils/matrix.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/utils/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/utils/helpers.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/utils/internal.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/utils/match.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/utils/offload.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/utils/permutations_24.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/utils/safetensors_load.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors/utils/type.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors.egg-info/SOURCES.txt +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors.egg-info/requires.txt +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/src/compressed_tensors.egg-info/top_level.txt +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/conftest.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/mock_observer.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_compressors/model_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_compressors/quantized_compressors/test_fp4_quant.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_compressors/quantized_compressors/test_packed_asym_decompression.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_configs/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_configs/test_base.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_configs/test_infer_quant.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_linear/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_linear/test_compressed_linear.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_modeling/test_attention_and_cache.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_quantization/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_quantization/lifecycle/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_quantization/lifecycle/conftest.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_quantization/lifecycle/test_apply.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_quantization/lifecycle/test_forward.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_quantization/lifecycle/test_initialize.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_quantization/lifecycle/test_static_lifecycle.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_quantization/test_configs/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_quantization/test_configs/test_strategies.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_quantization/test_quant_args.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_quantization/test_quant_config.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_quantization/test_quant_scheme.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_quantization/test_utils/test_helpers.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_quantization/test_utils/test_mxfp4_utils.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_registry.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_transform/conftest.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_transform/factory/test_correctness.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_transform/factory/test_memory.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_transform/factory/test_serialization.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_transform/test_transform_args.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_transform/test_transform_config.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_transform/test_transform_scheme.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_transform/utils/test_hadamard.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_utils/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_utils/test_helpers.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_utils/test_match.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_utils/test_offload.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_utils/test_safetensors_load.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_utils/test_type.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/testing_utils.py +0 -0
- {compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/utils/copyright.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: compressed-tensors
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.3a20251214
|
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
|
5
5
|
Home-page: https://github.com/vllm-project/compressed-tensors
|
|
6
6
|
Author: Neuralmagic, Inc.
|
|
@@ -20,11 +20,6 @@ from compressed_tensors.config import SparsityCompressionConfig
|
|
|
20
20
|
from compressed_tensors.quantization import QuantizationArgs, QuantizationConfig
|
|
21
21
|
from compressed_tensors.registry import RegistryMixin
|
|
22
22
|
from compressed_tensors.utils import has_offloaded_params
|
|
23
|
-
from compressed_tensors.utils.offload import (
|
|
24
|
-
delete_offload_parameter,
|
|
25
|
-
get_offloaded_device,
|
|
26
|
-
register_offload_parameter,
|
|
27
|
-
)
|
|
28
23
|
from torch import Tensor
|
|
29
24
|
from torch.nn import Module
|
|
30
25
|
|
|
@@ -190,37 +185,10 @@ class BaseCompressor(RegistryMixin, ABC):
|
|
|
190
185
|
for name, parameter in module.named_parameters():
|
|
191
186
|
compressed_data[name] = parameter
|
|
192
187
|
|
|
193
|
-
|
|
194
|
-
original_scale = compressed_data.get("weight_scale")
|
|
195
|
-
original_zp = compressed_data.get("weight_zero_point")
|
|
196
|
-
|
|
197
|
-
# NOTE: decompress_weight may modify compressed_data dict in-place
|
|
198
|
-
# This is subtle but allows us to update the module's qparams with
|
|
199
|
-
# the unpacked values.
|
|
200
|
-
# TODO: Consider refactoring to return modified qparams explicitly
|
|
201
|
-
result = self.decompress_weight(
|
|
188
|
+
return self.decompress_weight(
|
|
202
189
|
compressed_data=compressed_data, quantization_args=quantization_args
|
|
203
190
|
).to(device)
|
|
204
191
|
|
|
205
|
-
# Update module's parameters only if they were modified
|
|
206
|
-
for param_name, original_param in [
|
|
207
|
-
("weight_scale", original_scale),
|
|
208
|
-
("weight_zero_point", original_zp),
|
|
209
|
-
]:
|
|
210
|
-
if (
|
|
211
|
-
param_name in compressed_data
|
|
212
|
-
and compressed_data[param_name] is not original_param
|
|
213
|
-
):
|
|
214
|
-
# Delete the old parameter and register the updated one
|
|
215
|
-
delete_offload_parameter(module, param_name)
|
|
216
|
-
offload_device = get_offloaded_device(module)
|
|
217
|
-
param = torch.nn.Parameter(
|
|
218
|
-
compressed_data[param_name], requires_grad=False
|
|
219
|
-
)
|
|
220
|
-
register_offload_parameter(module, param_name, param, offload_device)
|
|
221
|
-
|
|
222
|
-
return result
|
|
223
|
-
|
|
224
192
|
def decompress_weight(
|
|
225
193
|
self, compressed_data: Dict[str, Tensor], **kwargs
|
|
226
194
|
) -> torch.Tensor:
|
|
@@ -18,7 +18,7 @@ from typing import Any, Dict, Generator, Tuple, Union
|
|
|
18
18
|
|
|
19
19
|
import torch
|
|
20
20
|
from compressed_tensors.compressors.base import BaseCompressor
|
|
21
|
-
from compressed_tensors.quantization import QuantizationScheme
|
|
21
|
+
from compressed_tensors.quantization import QuantizationScheme, QuantizationStrategy
|
|
22
22
|
from compressed_tensors.utils import (
|
|
23
23
|
get_nested_mappings_from_state_dict,
|
|
24
24
|
get_nested_weight_mappings,
|
|
@@ -85,7 +85,6 @@ class BaseQuantizationCompressor(BaseCompressor):
|
|
|
85
85
|
"""
|
|
86
86
|
uncompressed_names = list(model_state.keys())
|
|
87
87
|
compressed_dict = {}
|
|
88
|
-
compressed_param_names = set()
|
|
89
88
|
|
|
90
89
|
# compress values
|
|
91
90
|
desc = "Compressing with quantization"
|
|
@@ -120,38 +119,54 @@ class BaseQuantizationCompressor(BaseCompressor):
|
|
|
120
119
|
device=compression_device,
|
|
121
120
|
)
|
|
122
121
|
|
|
123
|
-
# update state dict
|
|
122
|
+
# update state dict
|
|
124
123
|
for key, value in compressed_values.items():
|
|
125
|
-
|
|
126
|
-
compressed_dict[full_name] = value.to(compression_device)
|
|
127
|
-
compressed_param_names.add(full_name)
|
|
124
|
+
compressed_dict[prefix + key] = value.to(compression_device)
|
|
128
125
|
|
|
129
126
|
else:
|
|
130
|
-
#
|
|
131
|
-
if name
|
|
127
|
+
# omit saving zero points for symmetric or packed quantization
|
|
128
|
+
if name.endswith("zero_point") and self._skip_zp(name, names_to_scheme):
|
|
132
129
|
continue
|
|
133
130
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
if name.endswith("weight_zero_point"):
|
|
137
|
-
module_path = name.rsplit(".", 1)[0]
|
|
138
|
-
if (
|
|
139
|
-
module_path in names_to_scheme
|
|
140
|
-
and names_to_scheme[module_path].weights.symmetric
|
|
141
|
-
):
|
|
142
|
-
continue
|
|
143
|
-
# Call compress_zp if available (for PackedQuantizationCompressor)
|
|
144
|
-
if module_path in names_to_scheme and hasattr(self, "compress_zp"):
|
|
145
|
-
value = self.compress_zp(
|
|
146
|
-
value, names_to_scheme[module_path].weights
|
|
147
|
-
)
|
|
148
|
-
if value is None:
|
|
149
|
-
continue
|
|
131
|
+
if name.endswith("weight_scale") and self._skip_scale():
|
|
132
|
+
continue
|
|
150
133
|
|
|
151
134
|
compressed_dict[name] = value.to(compression_device)
|
|
152
135
|
|
|
153
136
|
return compressed_dict
|
|
154
137
|
|
|
138
|
+
def _skip_scale(self):
|
|
139
|
+
from compressed_tensors.compressors import NVFP4PackedCompressor
|
|
140
|
+
|
|
141
|
+
return isinstance(self, NVFP4PackedCompressor)
|
|
142
|
+
|
|
143
|
+
def _skip_zp(
|
|
144
|
+
self, name: str, names_to_scheme: Dict[str, QuantizationScheme]
|
|
145
|
+
) -> bool:
|
|
146
|
+
from compressed_tensors.compressors import PackedQuantizationCompressor
|
|
147
|
+
|
|
148
|
+
module_name, zp_name = name.rsplit(".", 1) if "." in name else ("", name)
|
|
149
|
+
scheme = names_to_scheme[module_name]
|
|
150
|
+
|
|
151
|
+
if zp_name == "weight_zero_point":
|
|
152
|
+
args = scheme.weights
|
|
153
|
+
if zp_name == "input_zero_point":
|
|
154
|
+
args = scheme.input_activations
|
|
155
|
+
if zp_name == "output_zero_point":
|
|
156
|
+
args = scheme.output_activations
|
|
157
|
+
|
|
158
|
+
symmetric = args.symmetric
|
|
159
|
+
packable_strategies = [
|
|
160
|
+
QuantizationStrategy.GROUP.value,
|
|
161
|
+
QuantizationStrategy.CHANNEL.value,
|
|
162
|
+
]
|
|
163
|
+
packed = (
|
|
164
|
+
isinstance(self, PackedQuantizationCompressor)
|
|
165
|
+
and args.strategy in packable_strategies
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
return symmetric or packed
|
|
169
|
+
|
|
155
170
|
def decompress(
|
|
156
171
|
self,
|
|
157
172
|
path_to_model_or_tensors: Union[str, Path, Dict[str, Any]],
|
|
@@ -56,6 +56,7 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
|
|
|
56
56
|
return (
|
|
57
57
|
"weight_packed",
|
|
58
58
|
"weight_scale",
|
|
59
|
+
"weight_zero_point",
|
|
59
60
|
"weight_global_scale",
|
|
60
61
|
)
|
|
61
62
|
|
|
@@ -72,12 +73,13 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
|
|
|
72
73
|
:param quantization_args: quantization parameters for the weight
|
|
73
74
|
:return: dictionary mapping compressed parameter names to shape and dtype
|
|
74
75
|
"""
|
|
75
|
-
|
|
76
|
+
output = {
|
|
76
77
|
"weight_packed": (
|
|
77
78
|
torch.Size((weight_shape[0], weight_shape[1] // 2)),
|
|
78
79
|
torch.uint8,
|
|
79
80
|
),
|
|
80
81
|
}
|
|
82
|
+
return output
|
|
81
83
|
|
|
82
84
|
def compress_scale(
|
|
83
85
|
self,
|
|
@@ -112,13 +114,6 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
|
|
|
112
114
|
compressed_dict["weight_scale"] = self.compress_scale(
|
|
113
115
|
scale=scale, quantization_args=quantization_args
|
|
114
116
|
)
|
|
115
|
-
|
|
116
|
-
if global_scale is None:
|
|
117
|
-
raise ValueError(
|
|
118
|
-
"NVFP4 quantization requires global_scale (TENSOR_GROUP strategy). "
|
|
119
|
-
"Use TENSOR_GROUP strategy instead of GROUP for FP4 quantization."
|
|
120
|
-
)
|
|
121
|
-
|
|
122
117
|
return compressed_dict
|
|
123
118
|
|
|
124
119
|
def decompress_weight(
|
|
@@ -132,12 +127,6 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
|
|
|
132
127
|
m, n = weight.shape
|
|
133
128
|
# TODO: use a user provided dequant dtype
|
|
134
129
|
unpacked = unpack_fp4_from_uint8(weight, m, n * 2)
|
|
135
|
-
|
|
136
|
-
# cast scale dtype to match unpacked dtype for dequantization
|
|
137
|
-
if scale.dtype != unpacked.dtype:
|
|
138
|
-
scale = scale.to(unpacked.dtype)
|
|
139
|
-
compressed_data["weight_scale"] = scale
|
|
140
|
-
|
|
141
130
|
decompressed_weight = dequantize(
|
|
142
131
|
x_q=unpacked, scale=scale, global_scale=global_scale, dtype=unpacked.dtype
|
|
143
132
|
)
|
|
@@ -64,34 +64,25 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
|
|
|
64
64
|
"""
|
|
65
65
|
pack_factor = 32 // quantization_args.num_bits
|
|
66
66
|
packed_size = math.ceil(weight_shape[1] / pack_factor)
|
|
67
|
+
packed_size_zp = math.ceil(weight_shape[0] / pack_factor)
|
|
67
68
|
output = {
|
|
68
69
|
"weight_packed": (torch.Size((weight_shape[0], packed_size)), torch.int32),
|
|
69
70
|
"weight_shape": (torch.Size((2,)), torch.int32),
|
|
70
71
|
}
|
|
71
|
-
|
|
72
|
-
# Add weight_scale - always needed for quantization
|
|
73
|
-
if quantization_args.strategy in [
|
|
72
|
+
if not quantization_args.symmetric and quantization_args.strategy in [
|
|
74
73
|
QuantizationStrategy.GROUP.value,
|
|
75
74
|
QuantizationStrategy.CHANNEL.value,
|
|
76
75
|
]:
|
|
77
|
-
|
|
76
|
+
zp_factor = (
|
|
78
77
|
quantization_args.group_size
|
|
79
78
|
if quantization_args.strategy == QuantizationStrategy.GROUP.value
|
|
80
79
|
else weight_shape[-1]
|
|
81
80
|
)
|
|
82
|
-
scale_cols = math.ceil(weight_shape[-1] / shape_factor)
|
|
83
|
-
output["weight_scale"] = (
|
|
84
|
-
torch.Size((weight_shape[0], scale_cols)),
|
|
85
|
-
quantization_args.scale_dtype,
|
|
86
|
-
)
|
|
87
|
-
|
|
88
|
-
# Add weight_zero_point for asymmetric quantization
|
|
89
|
-
if not quantization_args.symmetric:
|
|
90
|
-
output["weight_zero_point"] = (
|
|
91
|
-
torch.Size((math.ceil(weight_shape[0] / pack_factor), scale_cols)),
|
|
92
|
-
torch.int32,
|
|
93
|
-
)
|
|
94
81
|
|
|
82
|
+
output["weight_zero_point"] = (
|
|
83
|
+
torch.Size((packed_size_zp, weight_shape[-1] // zp_factor)),
|
|
84
|
+
torch.int32,
|
|
85
|
+
)
|
|
95
86
|
return output
|
|
96
87
|
|
|
97
88
|
def compress_weight(
|
|
@@ -184,8 +175,6 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
|
|
|
184
175
|
zero_point = unpack_from_int32(
|
|
185
176
|
zero_point, num_bits, original_zp_shape, packed_dim=0
|
|
186
177
|
)
|
|
187
|
-
# Update the compressed_data dict with the unpacked zero_point
|
|
188
|
-
compressed_data["weight_zero_point"] = zero_point
|
|
189
178
|
|
|
190
179
|
decompressed_weight = dequantize(
|
|
191
180
|
x_q=unpacked, scale=scale, zero_point=zero_point, g_idx=g_idx
|
|
@@ -193,20 +182,6 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
|
|
|
193
182
|
|
|
194
183
|
return decompressed_weight
|
|
195
184
|
|
|
196
|
-
def compress_zp(
|
|
197
|
-
self, zero_point: Tensor, quantization_args: Optional[QuantizationArgs] = None
|
|
198
|
-
) -> Optional[Tensor]:
|
|
199
|
-
if zero_point is None or quantization_args.symmetric:
|
|
200
|
-
return None
|
|
201
|
-
if zero_point.dtype == torch.int32:
|
|
202
|
-
return zero_point
|
|
203
|
-
if quantization_args.strategy in [
|
|
204
|
-
QuantizationStrategy.GROUP.value,
|
|
205
|
-
QuantizationStrategy.CHANNEL.value,
|
|
206
|
-
]:
|
|
207
|
-
return pack_to_int32(zero_point, quantization_args.num_bits, packed_dim=0)
|
|
208
|
-
return zero_point
|
|
209
|
-
|
|
210
185
|
|
|
211
186
|
def pack_to_int32(
|
|
212
187
|
value: torch.Tensor,
|
|
@@ -251,9 +226,6 @@ def pack_to_int32(
|
|
|
251
226
|
if packed_dim == 0:
|
|
252
227
|
value = value.transpose(0, 1)
|
|
253
228
|
|
|
254
|
-
# Ensure contiguous memory for .view() operation
|
|
255
|
-
value = value.contiguous()
|
|
256
|
-
|
|
257
229
|
rows, cols = value.shape
|
|
258
230
|
padded_cols = math.ceil(cols / pack_factor) * pack_factor
|
|
259
231
|
pad_len = padded_cols - cols
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: compressed-tensors
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.3a20251214
|
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
|
5
5
|
Home-page: https://github.com/vllm-project/compressed-tensors
|
|
6
6
|
Author: Neuralmagic, Inc.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/mock_observer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/test_registry.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/tests/testing_utils.py
RENAMED
|
File without changes
|
{compressed_tensors-0.12.3a20251212 → compressed_tensors-0.12.3a20251214}/utils/copyright.py
RENAMED
|
File without changes
|