compressed-tensors 0.12.3a20251023__tar.gz → 0.12.3a20251028__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {compressed_tensors-0.12.3a20251023/src/compressed_tensors.egg-info → compressed_tensors-0.12.3a20251028}/PKG-INFO +1 -1
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/quant_args.py +9 -3
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/utils/__init__.py +1 -0
- compressed_tensors-0.12.3a20251028/src/compressed_tensors/quantization/utils/mxfp4_utils.py +97 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/version.py +1 -1
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028/src/compressed_tensors.egg-info}/PKG-INFO +1 -1
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors.egg-info/SOURCES.txt +2 -0
- compressed_tensors-0.12.3a20251028/tests/test_quantization/test_utils/test_mxfp4_utils.py +79 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/.github/.gitkeep +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/.github/actions/test/action.yml +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/.github/scripts/step-status +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/.github/workflows/build-test.yml +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/.github/workflows/build.yml +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/.github/workflows/post-release-nightly-build.yml +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/.github/workflows/quality-check.yaml +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/.github/workflows/test-check.yaml +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/.github/workflows/test.yml +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/.github/workflows/trigger-all.yml +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/.gitignore +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/LICENSE +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/Makefile +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/README.md +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/examples/bit_packing/int4_config.json +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/examples/bitmask_compression.ipynb +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/examples/llama_1.1b/ex_config_quantization.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/examples/llama_1.1b/example_quant_config.json +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/examples/quantize_and_pack_int4.ipynb +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/pyproject.toml +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/setup.cfg +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/setup.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/README.md +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/base.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/base.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/helpers.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/quantized_compressors/fp4_quantized.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/config/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/config/base.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/config/dense.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/config/format.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/linear/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/linear/compressed_linear.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/logger.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/modeling/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/modeling/attention.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/modeling/kvcache.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/quant_config.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/quant_metadata.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/registry/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/registry/registry.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/apply.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/factory/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/factory/base.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/factory/hadamard.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/factory/matrix_multiply.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/factory/random_hadamard.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/transform_args.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/transform_config.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/transform_scheme.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/utils/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/utils/hadamard.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/utils/hadamards.safetensors +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/transform/utils/matrix.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/utils/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/utils/helpers.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/utils/internal.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/utils/match.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/utils/offload.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/utils/permutations_24.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/utils/safetensors_load.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors/utils/type.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors.egg-info/requires.txt +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/src/compressed_tensors.egg-info/top_level.txt +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/conftest.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/mock_observer.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/model_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/quantized_compressors/test_fp4_quant.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_configs/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_configs/test_base.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_configs/test_infer_quant.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_linear/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_linear/test_compressed_linear.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_modeling/test_attention_and_cache.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/lifecycle/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/lifecycle/conftest.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/lifecycle/test_apply.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/lifecycle/test_forward.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/lifecycle/test_initialize.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/lifecycle/test_static_lifecycle.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/test_configs/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/test_configs/test_strategies.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/test_quant_args.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/test_quant_config.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/test_quant_scheme.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_quantization/test_utils/test_helpers.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_registry.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_transform/conftest.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_transform/factory/test_correctness.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_transform/factory/test_memory.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_transform/factory/test_serialization.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_transform/test_transform_args.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_transform/test_transform_config.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_transform/test_transform_scheme.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_transform/utils/test_hadamard.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_utils/__init__.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_utils/test_helpers.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_utils/test_match.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_utils/test_offload.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_utils/test_safetensors_load.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_utils/test_type.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/testing_utils.py +0 -0
- {compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/utils/copyright.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: compressed-tensors
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.3a20251028
|
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
|
5
5
|
Home-page: https://github.com/vllm-project/compressed-tensors
|
|
6
6
|
Author: Neuralmagic, Inc.
|
|
@@ -25,6 +25,7 @@ from pydantic import BaseModel, ConfigDict, Field, field_validator, model_valida
|
|
|
25
25
|
__all__ = [
|
|
26
26
|
"FP8_E4M3_DATA",
|
|
27
27
|
"FP4_E2M1_DATA",
|
|
28
|
+
"BFLOAT16_DATA",
|
|
28
29
|
"FloatArgs",
|
|
29
30
|
"QuantizationType",
|
|
30
31
|
"QuantizationStrategy",
|
|
@@ -38,9 +39,9 @@ __all__ = [
|
|
|
38
39
|
class FloatArgs:
|
|
39
40
|
exponent: int
|
|
40
41
|
mantissa: int
|
|
41
|
-
bits: int
|
|
42
|
-
max: float
|
|
43
|
-
min: float
|
|
42
|
+
bits: Optional[int] = None
|
|
43
|
+
max: Optional[float] = None
|
|
44
|
+
min: Optional[float] = None
|
|
44
45
|
dtype: Optional[torch.dtype] = None
|
|
45
46
|
|
|
46
47
|
|
|
@@ -76,6 +77,11 @@ class FP8_E4M3_DATA(FloatArgs):
|
|
|
76
77
|
dtype = torch.float8_e4m3fn
|
|
77
78
|
|
|
78
79
|
|
|
80
|
+
class BFLOAT16_DATA(FloatArgs):
|
|
81
|
+
exponent = 8
|
|
82
|
+
mantissa = 7
|
|
83
|
+
|
|
84
|
+
|
|
79
85
|
class QuantizationType(str, Enum):
|
|
80
86
|
"""
|
|
81
87
|
Enum storing quantization type options
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing,
|
|
10
|
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import torch
|
|
16
|
+
from compressed_tensors.quantization.quant_args import BFLOAT16_DATA, FP4_E2M1_DATA
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
__all__ = ["convert_mxfp4_exp_scale", "generate_mxfp4_scales", "round_to_power_2"]
|
|
20
|
+
|
|
21
|
+
# Reference: https://github.com/vllm-project/vllm/blob/main/tests/quantization/reference_mxfp4.py # noqa: E501
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def convert_mxfp4_exp_scale(
|
|
25
|
+
scale: torch.Tensor, dtype: torch.dtype = torch.bfloat16
|
|
26
|
+
) -> torch.Tensor:
|
|
27
|
+
"""
|
|
28
|
+
Converts mxfp4 scales. Scales are powers of 2, with the
|
|
29
|
+
exponents stored in uint8. Converts to dense dtype so that
|
|
30
|
+
they can be applied to the weights and activations during QDQ
|
|
31
|
+
|
|
32
|
+
:param scale: uint8 exponent scale
|
|
33
|
+
:param dtype: dense dtype
|
|
34
|
+
"""
|
|
35
|
+
assert scale.dtype == torch.uint8
|
|
36
|
+
scale_exp = scale.to(torch.int32) - 127
|
|
37
|
+
scale = 2.00 ** (scale_exp.to(torch.float))
|
|
38
|
+
return scale.to(dtype)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def round_to_power_2(x: torch.Tensor) -> torch.Tensor:
|
|
42
|
+
"""
|
|
43
|
+
Round values to the closest power of 2.
|
|
44
|
+
This is done by masking the values with BFLOAT16_SIGN_EXPONENT_MASK
|
|
45
|
+
which essentially removes the mantissa and keeps the exponent.
|
|
46
|
+
i.e the closest power of 2 for the input_value.
|
|
47
|
+
|
|
48
|
+
E.g:
|
|
49
|
+
0.0825 = 1.32 (mantissa) x 2**-4 (exponent)
|
|
50
|
+
0.0825 ==> -4 (exponent) + 127 = 123 = 01111011 (8 bits for bfloat16)
|
|
51
|
+
0.0825 ==> 0.32 (mantissa) = 0101001 (7 bits for bfloat16)
|
|
52
|
+
0.0825 == 0b01111011_0101001 (bfloat16)
|
|
53
|
+
0b01111011_0101001 & 111111111_0000000 == 0b01111011_0000000
|
|
54
|
+
Keep the exponent + sign bit to give you the closest power of 2, 0.0625
|
|
55
|
+
|
|
56
|
+
:param x: tensor to round to closest power of 2
|
|
57
|
+
"""
|
|
58
|
+
assert x.dtype == torch.bfloat16
|
|
59
|
+
x = x.view(torch.uint16).to(torch.int32)
|
|
60
|
+
|
|
61
|
+
# Find closest power of 2
|
|
62
|
+
BFLOAT16_VAL_TO_ADD = 1 << (BFLOAT16_DATA.mantissa - FP4_E2M1_DATA.mantissa - 1)
|
|
63
|
+
# Add value to push the value to the next exponent
|
|
64
|
+
BFLOAT16_SIGN_EXPONENT_MASK = (
|
|
65
|
+
(1 << (BFLOAT16_DATA.exponent + 1)) - 1
|
|
66
|
+
) << BFLOAT16_DATA.mantissa
|
|
67
|
+
# mask to only keep exponent - we conservatively round down
|
|
68
|
+
# to better represent smaller numbers / prevent overflow
|
|
69
|
+
block_max_uint = torch.bitwise_and(
|
|
70
|
+
x + BFLOAT16_VAL_TO_ADD, BFLOAT16_SIGN_EXPONENT_MASK
|
|
71
|
+
)
|
|
72
|
+
return block_max_uint.to(torch.uint16).view(torch.bfloat16)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def generate_mxfp4_scales(x: torch.Tensor) -> torch.Tensor:
|
|
76
|
+
"""
|
|
77
|
+
Generate mxfp4 scales. The scales require the following steps
|
|
78
|
+
1. Round to the closest power of 2
|
|
79
|
+
2. Convert to exponent
|
|
80
|
+
3. Store in uint8
|
|
81
|
+
|
|
82
|
+
Called when calculating qparams using observers.
|
|
83
|
+
|
|
84
|
+
:param x: tensor to round to closest power of 2
|
|
85
|
+
:returns uint8 scales as exponents
|
|
86
|
+
"""
|
|
87
|
+
# Round to closest power of 2
|
|
88
|
+
scale_power_2 = round_to_power_2(x)
|
|
89
|
+
# Convert to exponent
|
|
90
|
+
scale_exp = 127 + torch.floor(torch.log2(scale_power_2)).to(torch.int32) - 2
|
|
91
|
+
# Clamp and store in uint8, as expected by mxfp4
|
|
92
|
+
scale_exp = torch.clamp(
|
|
93
|
+
scale_exp,
|
|
94
|
+
max=torch.iinfo(torch.uint8).max,
|
|
95
|
+
min=torch.iinfo(torch.uint8).min,
|
|
96
|
+
)
|
|
97
|
+
return scale_exp.to(torch.uint8)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: compressed-tensors
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.3a20251028
|
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
|
5
5
|
Home-page: https://github.com/vllm-project/compressed-tensors
|
|
6
6
|
Author: Neuralmagic, Inc.
|
|
@@ -75,6 +75,7 @@ src/compressed_tensors/quantization/lifecycle/helpers.py
|
|
|
75
75
|
src/compressed_tensors/quantization/lifecycle/initialize.py
|
|
76
76
|
src/compressed_tensors/quantization/utils/__init__.py
|
|
77
77
|
src/compressed_tensors/quantization/utils/helpers.py
|
|
78
|
+
src/compressed_tensors/quantization/utils/mxfp4_utils.py
|
|
78
79
|
src/compressed_tensors/registry/__init__.py
|
|
79
80
|
src/compressed_tensors/registry/registry.py
|
|
80
81
|
src/compressed_tensors/transform/__init__.py
|
|
@@ -142,6 +143,7 @@ tests/test_quantization/test_configs/__init__.py
|
|
|
142
143
|
tests/test_quantization/test_configs/test_bit_depths.py
|
|
143
144
|
tests/test_quantization/test_configs/test_strategies.py
|
|
144
145
|
tests/test_quantization/test_utils/test_helpers.py
|
|
146
|
+
tests/test_quantization/test_utils/test_mxfp4_utils.py
|
|
145
147
|
tests/test_transform/conftest.py
|
|
146
148
|
tests/test_transform/test_transform_args.py
|
|
147
149
|
tests/test_transform/test_transform_config.py
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing,
|
|
10
|
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import torch
|
|
16
|
+
from compressed_tensors.quantization.utils import (
|
|
17
|
+
convert_mxfp4_exp_scale,
|
|
18
|
+
generate_mxfp4_scales,
|
|
19
|
+
round_to_power_2,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_round_power_2_noise():
|
|
24
|
+
powers = torch.Tensor(
|
|
25
|
+
[
|
|
26
|
+
[2**-10, 2**-9, 2**-8, 2**-7, 2**-6],
|
|
27
|
+
[2**-5, 2**-4, 2**-3, 2**-2, 2**-1],
|
|
28
|
+
[2**0, 2**1, 2**-10, 2**-9, 2**-8],
|
|
29
|
+
[2**-7, 2**-6, 2**-5, 2**-4, 2**-3],
|
|
30
|
+
[2**-2, 2**-1, 2**0, 2**1, 2**-10],
|
|
31
|
+
]
|
|
32
|
+
).to(torch.bfloat16)
|
|
33
|
+
|
|
34
|
+
noise = torch.rand_like(powers) * 0.2
|
|
35
|
+
powers_noisy = powers * (1 + noise)
|
|
36
|
+
rounded = round_to_power_2(powers_noisy)
|
|
37
|
+
assert torch.equal(rounded, powers)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_round_power_2():
|
|
41
|
+
x = torch.Tensor(
|
|
42
|
+
(
|
|
43
|
+
[5.687891, -8.291567, -1.540329, -0.315635, 0.965272],
|
|
44
|
+
[-6.944130, 0.073246, -0.451778, 8.571118, -9.856593],
|
|
45
|
+
[-0.040571, -0.708509, 2.485657, -4.003352, -0.995600],
|
|
46
|
+
[0.224199, 5.032586, -1.309816, -0.621958, 7.290238],
|
|
47
|
+
[-9.848001, -0.290731, 1.501562, 0.379829, -5.312081],
|
|
48
|
+
)
|
|
49
|
+
).to(torch.bfloat16)
|
|
50
|
+
x_rounded = torch.Tensor(
|
|
51
|
+
(
|
|
52
|
+
[4.000000, -8.000000, -1.000000, -0.250000, 1.000000],
|
|
53
|
+
[-4.000000, 0.062500, -0.500000, 8.000000, -8.000000],
|
|
54
|
+
[-0.0312, -0.500000, 2.000000, -4.000000, -1.000000],
|
|
55
|
+
[0.250000, 4.000000, -1.000000, -0.500000, 8.000000],
|
|
56
|
+
[-8.000000, -0.250000, 1.000000, 0.250000, -4.000000],
|
|
57
|
+
)
|
|
58
|
+
).to(torch.bfloat16)
|
|
59
|
+
rounded = round_to_power_2(x)
|
|
60
|
+
assert torch.equal(rounded, x_rounded)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def test_mxfp4_scales_e2e():
|
|
64
|
+
mock_weight = torch.normal(mean=0.0002, std=0.0576, size=(2880, 2880))
|
|
65
|
+
|
|
66
|
+
x = mock_weight.reshape(*mock_weight.shape[:-1], -1, 32).to(torch.bfloat16)
|
|
67
|
+
min_vals = torch.amin(x, dim=-1)
|
|
68
|
+
max_vals = torch.amax(x, dim=-1)
|
|
69
|
+
|
|
70
|
+
min_vals = torch.min(min_vals, torch.zeros_like(min_vals))
|
|
71
|
+
max_vals = torch.max(max_vals, torch.zeros_like(max_vals))
|
|
72
|
+
block_max = torch.max(torch.abs(min_vals), torch.abs(max_vals))
|
|
73
|
+
|
|
74
|
+
scales_generated = generate_mxfp4_scales(block_max)
|
|
75
|
+
converted_ct = convert_mxfp4_exp_scale(scales_generated)
|
|
76
|
+
|
|
77
|
+
scales_exp = torch.log2(converted_ct)
|
|
78
|
+
block_max_exp = torch.floor(torch.log2(round_to_power_2(block_max))) - 2
|
|
79
|
+
assert torch.equal(scales_exp, block_max_exp)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/.github/workflows/test.yml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/mock_observer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/test_registry.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/tests/testing_utils.py
RENAMED
|
File without changes
|
{compressed_tensors-0.12.3a20251023 → compressed_tensors-0.12.3a20251028}/utils/copyright.py
RENAMED
|
File without changes
|