compressed-tensors 0.10.3a20250707__tar.gz → 0.10.3a20250709__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/.github/actions/test/action.yml +32 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/.github/workflows/test.yml +17 -0
- {compressed_tensors-0.10.3a20250707/src/compressed_tensors.egg-info → compressed_tensors-0.10.3a20250709}/PKG-INFO +1 -1
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +12 -8
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/quantized_compressors/base.py +6 -7
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +22 -18
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +19 -5
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/quantization/lifecycle/apply.py +8 -9
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/quantization/lifecycle/initialize.py +1 -1
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/quantization/quant_config.py +1 -5
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/quantization/utils/helpers.py +12 -36
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/__init__.py +1 -0
- compressed_tensors-0.10.3a20250709/src/compressed_tensors/transform/apply.py +32 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/factory/base.py +8 -5
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/utils/__init__.py +1 -0
- compressed_tensors-0.10.3a20250709/src/compressed_tensors/utils/internal.py +29 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/version.py +1 -1
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709/src/compressed_tensors.egg-info}/PKG-INFO +1 -1
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors.egg-info/SOURCES.txt +2 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/model_compressors/test_model_compressor.py +60 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/lifecycle/test_apply.py +1 -2
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_transform/factory/test_correctness.py +14 -22
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_transform/factory/test_memory.py +16 -23
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/.github/.gitkeep +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/.github/scripts/step-status +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/.github/workflows/build-test.yml +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/.github/workflows/build.yml +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/.github/workflows/report.yml +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/.github/workflows/test-check.yaml +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/.github/workflows/trigger-all.yml +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/.github/workflows/upload.yml +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/.gitignore +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/LICENSE +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/Makefile +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/README.md +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/examples/bit_packing/int4_config.json +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/examples/bitmask_compression.ipynb +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/examples/llama_1.1b/ex_config_quantization.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/examples/llama_1.1b/example_quant_config.json +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/examples/quantize_and_pack_int4.ipynb +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/pyproject.toml +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/setup.cfg +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/setup.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/README.md +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/base.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/base.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/helpers.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/config/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/config/base.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/config/dense.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/linear/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/linear/compressed_linear.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/quantization/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/quantization/quant_args.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/registry/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/registry/registry.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/factory/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/factory/hadamard.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/factory/matrix_multiply.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/factory/random_hadamard.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/transform_args.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/transform_config.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/transform_scheme.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/utils/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/utils/hadamard.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/utils/hadamards.safetensors +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/transform/utils/utils.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/utils/helpers.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/utils/offload.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/utils/permutations_24.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/utils/permute.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/utils/safetensors_load.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors.egg-info/requires.txt +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/src/compressed_tensors.egg-info/top_level.txt +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/conftest.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/model_compressors/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/quantized_compressors/test_nvfp4_quant.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_configs/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_configs/test_base.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_linear/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_linear/test_compressed_linear.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/lifecycle/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/lifecycle/conftest.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/lifecycle/test_forward.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/lifecycle/test_helpers.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/lifecycle/test_initialize.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/test_configs/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/test_configs/test_strategies.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/test_quant_args.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/test_quant_config.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/test_quant_scheme.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_quantization/test_utils/test_helpers.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_registry.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_transform/conftest.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_transform/test_transform_args.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_transform/test_transform_config.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_transform/test_transform_scheme.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_transform/utils/test_hadamard.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_utils/__init__.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_utils/test_helpers.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_utils/test_offload.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/test_utils/test_safetensors_load.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/tests/testing_utils.py +0 -0
- {compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/utils/copyright.py +0 -0
@@ -7,6 +7,10 @@ inputs:
|
|
7
7
|
suitename:
|
8
8
|
description: "test suite name"
|
9
9
|
required: true
|
10
|
+
code_coverage:
|
11
|
+
description: whether to collect code coverage metrics during test run
|
12
|
+
type: boolean
|
13
|
+
default: false
|
10
14
|
outputs:
|
11
15
|
status:
|
12
16
|
description: "final status from test"
|
@@ -44,9 +48,37 @@ runs:
|
|
44
48
|
run: |
|
45
49
|
source ${{ inputs.venv }}/bin/activate
|
46
50
|
rm -rf src
|
51
|
+
|
52
|
+
if [[ "${ENABLE_COVERAGE}" == "true" ]]; then
|
53
|
+
echo "::group::Installing code coverage requirements via pip"
|
54
|
+
pip install bashlex https://github.com/neuralmagic/pytest-nm-releng/archive/v0.4.0.tar.gz
|
55
|
+
pip install coverage pytest-cov
|
56
|
+
|
57
|
+
# Adding Code coverage to the tests
|
58
|
+
nmre-generate-coverage-flags --package "compressed_tensors" --output-file ".coverage_flags.sh"
|
59
|
+
source .coverage_flags.sh
|
60
|
+
echo "::endgroup::"
|
61
|
+
fi
|
62
|
+
|
63
|
+
echo "::group::running tests"
|
64
|
+
echo "PYTEST_ADDOPTS set to: ${PYTEST_ADDOPTS}"
|
65
|
+
|
47
66
|
SUCCESS=0
|
48
67
|
pytest tests --junitxml=test-results/report.xml -o junit_suite_name="${{ inputs.suitename }}" || SUCCESS=$?
|
49
68
|
echo "status=${SUCCESS}" >> "$GITHUB_OUTPUT"
|
69
|
+
echo "::endgroup::"
|
70
|
+
|
71
|
+
if [[ "${ENABLE_COVERAGE}" == "true" ]]; then
|
72
|
+
echo "::group::consolidating coverage reports"
|
73
|
+
mkdir -p coverage-results
|
74
|
+
mv .coverage coverage-results/ || echo ".coverage file not found"
|
75
|
+
mv coverage-html coverage-results/ || echo "coverage-html folder not found"
|
76
|
+
mv coverage.json coverage-results/ || echo "coverage.json file not found"
|
77
|
+
echo "::endgroup::"
|
78
|
+
fi
|
79
|
+
|
50
80
|
deactivate
|
51
81
|
exit ${SUCCESS}
|
52
82
|
shell: bash
|
83
|
+
env:
|
84
|
+
ENABLE_COVERAGE: ${{ inputs.code_coverage || false }}
|
{compressed_tensors-0.10.3a20250707 → compressed_tensors-0.10.3a20250709}/.github/workflows/test.yml
RENAMED
@@ -25,6 +25,10 @@ on:
|
|
25
25
|
run_id:
|
26
26
|
description: run id of the BUILD job that generated the assets
|
27
27
|
type: string
|
28
|
+
code_coverage:
|
29
|
+
description: whether to collect code coverage metrics during test run
|
30
|
+
type: boolean
|
31
|
+
default: false
|
28
32
|
|
29
33
|
# makes workflow manually callable
|
30
34
|
workflow_dispatch:
|
@@ -51,6 +55,10 @@ on:
|
|
51
55
|
run_id:
|
52
56
|
description: run id of the BUILD job that generated the assets
|
53
57
|
type: string
|
58
|
+
code_coverage:
|
59
|
+
description: whether to collect code coverage metrics during test run
|
60
|
+
type: boolean
|
61
|
+
default: false
|
54
62
|
|
55
63
|
jobs:
|
56
64
|
|
@@ -124,6 +132,7 @@ jobs:
|
|
124
132
|
with:
|
125
133
|
venv: ${{ steps.create_venv.outputs.penv }}
|
126
134
|
suitename: test-${{ inputs.python }}-${{ inputs.test_label }}
|
135
|
+
code_coverage: ${{ inputs.code_coverage }}
|
127
136
|
|
128
137
|
- name: summary
|
129
138
|
uses: neuralmagic/nm-actions/actions/summary-test@v1.13.0
|
@@ -146,3 +155,11 @@ jobs:
|
|
146
155
|
name: report-${{ inputs.test_label }}.xml
|
147
156
|
path: test-results/report.xml
|
148
157
|
retention-days: 5
|
158
|
+
|
159
|
+
- name: upload coverage report
|
160
|
+
uses: actions/upload-artifact@v4
|
161
|
+
if: (success() || failure()) && inputs.code_coverage
|
162
|
+
with:
|
163
|
+
name: coverage-results
|
164
|
+
path: coverage-results/*
|
165
|
+
retention-days: 5
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: compressed-tensors
|
3
|
-
Version: 0.10.
|
3
|
+
Version: 0.10.3a20250709
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -42,10 +42,7 @@ from compressed_tensors.quantization import (
|
|
42
42
|
load_pretrained_quantization_parameters,
|
43
43
|
)
|
44
44
|
from compressed_tensors.quantization.lifecycle import expand_target_names
|
45
|
-
from compressed_tensors.quantization.utils import
|
46
|
-
is_module_quantized,
|
47
|
-
iter_named_leaf_modules,
|
48
|
-
)
|
45
|
+
from compressed_tensors.quantization.utils import is_module_quantized
|
49
46
|
from compressed_tensors.utils import (
|
50
47
|
align_module_device,
|
51
48
|
delete_offload_parameter,
|
@@ -393,9 +390,16 @@ class ModelCompressor:
|
|
393
390
|
)
|
394
391
|
|
395
392
|
for prefix, module in tqdm(model.named_modules(), desc="Compressing model"):
|
393
|
+
|
396
394
|
if prefix in module_to_scheme or prefix in sparse_compression_targets:
|
395
|
+
module_device = get_execution_device(module).type
|
396
|
+
is_meta = (module_device == "meta")
|
397
|
+
|
398
|
+
exec_device = "meta" if is_meta else "cpu"
|
399
|
+
onloading_device = "meta" if is_meta else module_device
|
400
|
+
|
397
401
|
# in the future, support compression on same device
|
398
|
-
with align_module_device(module, execution_device=
|
402
|
+
with align_module_device(module, execution_device=exec_device):
|
399
403
|
state_dict = module.state_dict(prefix=f"{prefix}.")
|
400
404
|
|
401
405
|
# quantization first
|
@@ -404,6 +408,7 @@ class ModelCompressor:
|
|
404
408
|
state_dict,
|
405
409
|
names_to_scheme=module_to_scheme,
|
406
410
|
show_progress=False,
|
411
|
+
compression_device=exec_device,
|
407
412
|
)
|
408
413
|
|
409
414
|
# sparsity second
|
@@ -415,7 +420,6 @@ class ModelCompressor:
|
|
415
420
|
)
|
416
421
|
|
417
422
|
# remove any existing parameters
|
418
|
-
exec_device = get_execution_device(module)
|
419
423
|
offload_device = get_offloaded_device(module)
|
420
424
|
for name, _ in list(module.named_parameters()):
|
421
425
|
delete_offload_parameter(module, name)
|
@@ -423,7 +427,7 @@ class ModelCompressor:
|
|
423
427
|
# replace with compressed parameters
|
424
428
|
for name, value in state_dict.items():
|
425
429
|
name = name.removeprefix(f"{prefix}.")
|
426
|
-
value = value.to(
|
430
|
+
value = value.to(onloading_device)
|
427
431
|
param = torch.nn.Parameter(value, requires_grad=False)
|
428
432
|
register_offload_parameter(module, name, param, offload_device)
|
429
433
|
|
@@ -747,7 +751,7 @@ def map_module_to_scheme(model: Module) -> Dict[str, QuantizationScheme]:
|
|
747
751
|
"""
|
748
752
|
return {
|
749
753
|
fix_fsdp_module_name(name): module.quantization_scheme
|
750
|
-
for name, module in
|
754
|
+
for name, module in model.named_modules()
|
751
755
|
if is_module_quantized(module)
|
752
756
|
}
|
753
757
|
|
@@ -72,6 +72,7 @@ class BaseQuantizationCompressor(BaseCompressor):
|
|
72
72
|
model_state: Dict[str, Tensor],
|
73
73
|
names_to_scheme: Dict[str, QuantizationScheme],
|
74
74
|
show_progress: bool = False,
|
75
|
+
compression_device: str = "cpu",
|
75
76
|
**kwargs,
|
76
77
|
) -> Dict[str, Tensor]:
|
77
78
|
"""
|
@@ -85,7 +86,6 @@ class BaseQuantizationCompressor(BaseCompressor):
|
|
85
86
|
"""
|
86
87
|
uncompressed_names = list(model_state.keys())
|
87
88
|
compressed_dict = {}
|
88
|
-
save_device = "cpu"
|
89
89
|
|
90
90
|
# compress values
|
91
91
|
desc = "Compressing with quantization"
|
@@ -104,10 +104,10 @@ class BaseQuantizationCompressor(BaseCompressor):
|
|
104
104
|
|
105
105
|
# is scale does not exist, then weight cannot be compressed
|
106
106
|
if scale is None:
|
107
|
-
compressed_dict[name] = value.to(
|
107
|
+
compressed_dict[name] = value.to(compression_device)
|
108
108
|
continue
|
109
109
|
|
110
|
-
# compress values on cpu (memory movement too expensive)
|
110
|
+
# compress values on meta if loading from meta otherwise on cpu (memory movement too expensive)
|
111
111
|
module_path = prefix[:-1] if prefix.endswith(".") else prefix
|
112
112
|
quant_args = names_to_scheme[module_path].weights
|
113
113
|
compressed_values = self.compress_weight(
|
@@ -117,12 +117,12 @@ class BaseQuantizationCompressor(BaseCompressor):
|
|
117
117
|
global_scale=global_scale,
|
118
118
|
g_idx=g_idx,
|
119
119
|
quantization_args=quant_args,
|
120
|
-
device=
|
120
|
+
device=compression_device,
|
121
121
|
)
|
122
122
|
|
123
123
|
# update state dict
|
124
124
|
for key, value in compressed_values.items():
|
125
|
-
compressed_dict[prefix + key] = value.to(
|
125
|
+
compressed_dict[prefix + key] = value.to(compression_device)
|
126
126
|
|
127
127
|
else:
|
128
128
|
# omit saving zero points for symmetric or packed quantization
|
@@ -133,8 +133,7 @@ class BaseQuantizationCompressor(BaseCompressor):
|
|
133
133
|
# TODO: does this case actually occur?
|
134
134
|
elif name.endswith("g_idx") and torch.any(value <= -1):
|
135
135
|
continue
|
136
|
-
|
137
|
-
compressed_dict[name] = value.to(save_device)
|
136
|
+
compressed_dict[name] = value.to(compression_device)
|
138
137
|
|
139
138
|
return compressed_dict
|
140
139
|
|
@@ -220,30 +220,34 @@ def pack_to_int32(
|
|
220
220
|
if num_bits < 1:
|
221
221
|
raise ValueError(f"num_bits must be at least 1, got {num_bits}")
|
222
222
|
|
223
|
-
#
|
223
|
+
# Convert to unsigned range for packing, matching quantization offset
|
224
224
|
offset = 1 << (num_bits - 1)
|
225
225
|
value = (value + offset).to(torch.uint8)
|
226
|
-
|
226
|
+
device = value.device
|
227
|
+
|
227
228
|
pack_factor = 32 // num_bits
|
228
229
|
|
229
|
-
|
230
|
-
|
231
|
-
padding = packed_size * pack_factor - value.shape[packed_dim]
|
232
|
-
value = np.pad(value, pad_width=[(0, 0), (0, padding)], constant_values=0)
|
230
|
+
if packed_dim == 0:
|
231
|
+
value = value.transpose(0, 1)
|
233
232
|
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
233
|
+
rows, cols = value.shape
|
234
|
+
padded_cols = math.ceil(cols / pack_factor) * pack_factor
|
235
|
+
pad_len = padded_cols - cols
|
236
|
+
|
237
|
+
if pad_len > 0:
|
238
|
+
value = torch.nn.functional.pad(value, (0, pad_len))
|
239
|
+
|
240
|
+
num_groups = padded_cols // pack_factor
|
241
|
+
|
242
|
+
# Use int32 here
|
243
|
+
reshaped = value.view(rows, num_groups, pack_factor).to(torch.int32)
|
244
|
+
bit_shifts = torch.arange(pack_factor, device=device, dtype=torch.int32) * num_bits
|
245
|
+
packed = (reshaped << bit_shifts).sum(dim=2, dtype=torch.int32)
|
246
|
+
|
247
|
+
if packed_dim == 0:
|
248
|
+
packed = packed.transpose(0, 1)
|
243
249
|
|
244
|
-
|
245
|
-
packed = np.ascontiguousarray(packed).view(np.int32)
|
246
|
-
return torch.from_numpy(packed)
|
250
|
+
return packed
|
247
251
|
|
248
252
|
|
249
253
|
def unpack_from_int32(
|
@@ -56,8 +56,10 @@ class Sparse24BitMaskCompressor(BaseSparseCompressor):
|
|
56
56
|
bitmask_tensor = Sparse24BitMaskTensor.from_dense(
|
57
57
|
value, self.config.sparsity_structure
|
58
58
|
)
|
59
|
-
|
60
|
-
|
59
|
+
return bitmask_tensor.dict(
|
60
|
+
name_prefix=name,
|
61
|
+
device="meta" if value.is_meta else "cpu",
|
62
|
+
)
|
61
63
|
|
62
64
|
def decompress_weight(self, weight_data):
|
63
65
|
data = Sparse24BitMaskTensor.from_compressed_data(**weight_data)
|
@@ -90,9 +92,14 @@ class Sparse24BitMaskTensor:
|
|
90
92
|
:return: instantiated compressed tensor
|
91
93
|
"""
|
92
94
|
shape = list(tensor.shape)
|
93
|
-
|
94
|
-
|
95
|
-
|
95
|
+
if tensor.is_meta:
|
96
|
+
compressed, bitmask = sparse24_bitmask_compress(
|
97
|
+
tensor, sparsity_structure=sparsity_structure
|
98
|
+
)
|
99
|
+
else:
|
100
|
+
compressed, bitmask = sparse24_bitmask_compress(
|
101
|
+
tensor.cpu(), sparsity_structure=sparsity_structure
|
102
|
+
)
|
96
103
|
return Sparse24BitMaskTensor(
|
97
104
|
shape=shape,
|
98
105
|
compressed=compressed,
|
@@ -169,6 +176,13 @@ def sparse24_bitmask_compress(
|
|
169
176
|
SparsityStructure(sparsity_structure) == SparsityStructure.TWO_FOUR
|
170
177
|
), "Only 2:4 sparsity is supported"
|
171
178
|
|
179
|
+
if tensor.is_meta:
|
180
|
+
num_rows, num_cols = tensor.shape
|
181
|
+
compressed_values = torch.empty((num_rows, num_cols // 2), dtype=tensor.dtype, device="meta")
|
182
|
+
packed_cols = (num_cols + 7) // 8
|
183
|
+
bitmasks_packed = torch.empty((num_rows, packed_cols), dtype=torch.uint8, device="meta")
|
184
|
+
return compressed_values, bitmasks_packed
|
185
|
+
|
172
186
|
bytemasks = get_24_bytemasks(tensor=tensor)
|
173
187
|
|
174
188
|
if tensor.dtype == FP8_DTYPE:
|
@@ -38,8 +38,6 @@ from compressed_tensors.quantization.utils import (
|
|
38
38
|
KV_CACHE_TARGETS,
|
39
39
|
infer_quantization_status,
|
40
40
|
is_kv_cache_quant_scheme,
|
41
|
-
iter_named_leaf_modules,
|
42
|
-
iter_named_quantizable_modules,
|
43
41
|
)
|
44
42
|
from compressed_tensors.utils.helpers import fix_fsdp_module_name, replace_module
|
45
43
|
from compressed_tensors.utils.offload import update_parameter_data
|
@@ -87,7 +85,7 @@ def load_pretrained_quantization_parameters(
|
|
87
85
|
model_path = get_safetensors_folder(model_name_or_path)
|
88
86
|
mapping = get_quantization_parameter_to_path_mapping(model_path)
|
89
87
|
|
90
|
-
for name, submodule in
|
88
|
+
for name, submodule in model.named_modules():
|
91
89
|
if not is_module_quantized(submodule):
|
92
90
|
continue
|
93
91
|
if submodule.quantization_scheme.input_activations is not None:
|
@@ -152,11 +150,7 @@ def apply_quantization_config(
|
|
152
150
|
# list of submodules to ignore
|
153
151
|
ignored_submodules = defaultdict(list)
|
154
152
|
# mark appropriate layers for quantization by setting their quantization schemes
|
155
|
-
for name, submodule in
|
156
|
-
model,
|
157
|
-
include_children=True,
|
158
|
-
include_attn=True,
|
159
|
-
): # child modules and attention modules
|
153
|
+
for name, submodule in model.named_modules():
|
160
154
|
# potentially fix module name to remove FSDP wrapper prefix
|
161
155
|
name = fix_fsdp_module_name(name)
|
162
156
|
if matches := find_name_or_class_matches(name, submodule, config.ignore):
|
@@ -287,7 +281,7 @@ def expand_target_names(
|
|
287
281
|
"""
|
288
282
|
return {
|
289
283
|
name
|
290
|
-
for name, module in
|
284
|
+
for name, module in model.named_modules()
|
291
285
|
if is_target(name, module, targets, ignore)
|
292
286
|
}
|
293
287
|
|
@@ -328,6 +322,11 @@ def find_name_or_class_matches(
|
|
328
322
|
2. matches on regex patterns
|
329
323
|
3. matches on module names
|
330
324
|
"""
|
325
|
+
from compressed_tensors import InternalModule
|
326
|
+
|
327
|
+
if isinstance(module, InternalModule):
|
328
|
+
return []
|
329
|
+
|
331
330
|
targets = sorted(targets, key=lambda x: ("re:" in x, x))
|
332
331
|
if isinstance(targets, Iterable):
|
333
332
|
matches = _find_matches(name, targets) + _find_matches(
|
@@ -189,7 +189,7 @@ def _initialize_scale_zero_point(
|
|
189
189
|
else:
|
190
190
|
# TODO: consider erroring out in the future as if the dtype if not one of these,
|
191
191
|
# there is likely bug
|
192
|
-
if scale_dtype not in [torch.float16, torch.bfloat16, torch.float32]:
|
192
|
+
if scale_dtype not in [torch.float16, torch.bfloat16, torch.float32, torch.float64]:
|
193
193
|
scale_dtype = torch.float16
|
194
194
|
zp_dtype = quantization_args.pytorch_dtype()
|
195
195
|
|
@@ -22,9 +22,7 @@ from compressed_tensors.quantization.quant_scheme import (
|
|
22
22
|
preset_name_to_scheme,
|
23
23
|
)
|
24
24
|
from compressed_tensors.quantization.utils import (
|
25
|
-
calculate_compression_ratio,
|
26
25
|
is_module_quantized,
|
27
|
-
iter_named_quantizable_modules,
|
28
26
|
module_type,
|
29
27
|
parse_out_kv_cache_args,
|
30
28
|
)
|
@@ -177,9 +175,7 @@ class QuantizationConfig(BaseModel):
|
|
177
175
|
quantization_status = None
|
178
176
|
ignore = {}
|
179
177
|
quantization_type_names = set()
|
180
|
-
for name, submodule in
|
181
|
-
model, include_children=True, include_attn=True
|
182
|
-
):
|
178
|
+
for name, submodule in model.named_modules():
|
183
179
|
layer_type = module_type(submodule)
|
184
180
|
if not is_module_quantized(submodule):
|
185
181
|
if layer_type not in ignore:
|
@@ -26,6 +26,7 @@ from compressed_tensors.quantization.quant_args import (
|
|
26
26
|
QuantizationType,
|
27
27
|
)
|
28
28
|
from compressed_tensors.quantization.quant_scheme import QuantizationScheme
|
29
|
+
from compressed_tensors.utils import deprecated
|
29
30
|
from torch import FloatTensor, IntTensor, Tensor
|
30
31
|
from torch.nn import Module
|
31
32
|
from tqdm import tqdm
|
@@ -36,7 +37,6 @@ __all__ = [
|
|
36
37
|
"is_module_quantized",
|
37
38
|
"is_model_quantized",
|
38
39
|
"module_type",
|
39
|
-
"calculate_compression_ratio",
|
40
40
|
"get_torch_bit_depth",
|
41
41
|
"can_quantize",
|
42
42
|
"parse_out_kv_cache_args",
|
@@ -276,12 +276,7 @@ def is_model_quantized(model: Module) -> bool:
|
|
276
276
|
:param model: pytorch model
|
277
277
|
:return: True if model is quantized, False otherwise
|
278
278
|
"""
|
279
|
-
|
280
|
-
for _, submodule in iter_named_leaf_modules(model):
|
281
|
-
if is_module_quantized(submodule):
|
282
|
-
return True
|
283
|
-
|
284
|
-
return False
|
279
|
+
return any(is_module_quantized(submodule) for submodule in model.modules())
|
285
280
|
|
286
281
|
|
287
282
|
def module_type(module: Module) -> str:
|
@@ -294,6 +289,11 @@ def module_type(module: Module) -> str:
|
|
294
289
|
return type(module).__name__
|
295
290
|
|
296
291
|
|
292
|
+
@deprecated(
|
293
|
+
message="This function will be removed in a future release. "
|
294
|
+
"Please use `model.named_modules()` and filter by "
|
295
|
+
"compressed_tensors.InternalModule if neceessary"
|
296
|
+
)
|
297
297
|
def iter_named_leaf_modules(model: Module) -> Generator[Tuple[str, Module], None, None]:
|
298
298
|
"""
|
299
299
|
Yields modules that do not have any submodules except observers. The observers
|
@@ -320,6 +320,11 @@ def iter_named_leaf_modules(model: Module) -> Generator[Tuple[str, Module], None
|
|
320
320
|
yield name, submodule
|
321
321
|
|
322
322
|
|
323
|
+
@deprecated(
|
324
|
+
message="This function will be removed in a future release. "
|
325
|
+
"Please use `model.named_modules()` and filter by "
|
326
|
+
"compressed_tensors.InternalModule if neceessary"
|
327
|
+
)
|
323
328
|
def iter_named_quantizable_modules(
|
324
329
|
model: Module,
|
325
330
|
include_children: bool = True,
|
@@ -330,7 +335,6 @@ def iter_named_quantizable_modules(
|
|
330
335
|
Yield name and submodule of
|
331
336
|
- leaf modules, set by include_children
|
332
337
|
- attention modyles, set by include_attn
|
333
|
-
|
334
338
|
:param model: model to get leaf modules of
|
335
339
|
:param include_children: flag to get the leaf modules
|
336
340
|
:param inlcude_attn: flag to get the attention modules
|
@@ -397,34 +401,6 @@ def can_quantize(value: torch.Tensor, quant_args: "QuantizationArgs") -> bool:
|
|
397
401
|
return bit_depth > quant_args.num_bits
|
398
402
|
|
399
403
|
|
400
|
-
def calculate_compression_ratio(model: Module) -> float:
|
401
|
-
"""
|
402
|
-
Calculates the quantization compression ratio of a pytorch model, based on the
|
403
|
-
number of bits needed to represent the total weights in compressed form. Does not
|
404
|
-
take into account activation quantizatons.
|
405
|
-
|
406
|
-
:param model: pytorch module to calculate compression ratio for
|
407
|
-
:return: compression ratio of the whole model
|
408
|
-
"""
|
409
|
-
total_compressed = 0.0
|
410
|
-
total_uncompressed = 0.0
|
411
|
-
for name, submodule in tqdm(
|
412
|
-
iter_named_leaf_modules(model),
|
413
|
-
desc="Calculating quantization compression ratio",
|
414
|
-
):
|
415
|
-
for parameter in model.parameters():
|
416
|
-
uncompressed_bits = get_torch_bit_depth(parameter)
|
417
|
-
compressed_bits = uncompressed_bits
|
418
|
-
if is_module_quantized(submodule) and submodule.quantization_scheme.weights:
|
419
|
-
compressed_bits = submodule.quantization_scheme.weights.num_bits
|
420
|
-
|
421
|
-
num_weights = parameter.numel()
|
422
|
-
total_compressed += compressed_bits * num_weights
|
423
|
-
total_uncompressed += uncompressed_bits * num_weights
|
424
|
-
|
425
|
-
return total_uncompressed / total_compressed
|
426
|
-
|
427
|
-
|
428
404
|
def is_kv_cache_quant_scheme(scheme: QuantizationScheme) -> bool:
|
429
405
|
"""
|
430
406
|
Check whether the QuantizationScheme targets the kv cache.
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing,
|
10
|
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
import torch
|
16
|
+
from compressed_tensors.transform import TransformConfig, TransformFactory
|
17
|
+
|
18
|
+
|
19
|
+
__all__ = ["apply_transform_config"]
|
20
|
+
|
21
|
+
|
22
|
+
def apply_transform_config(model: torch.nn.Module, config: TransformConfig):
|
23
|
+
"""
|
24
|
+
Apply a transform config to a model. Weight transforms are fused into weights, while
|
25
|
+
activation transforms are attached as submodules and trigger via pytorch hooks
|
26
|
+
|
27
|
+
:param model: model to apply config to
|
28
|
+
:param config: transform config to apply
|
29
|
+
"""
|
30
|
+
for name, scheme in config.config_groups.items():
|
31
|
+
factory = TransformFactory.from_scheme(scheme, name=name)
|
32
|
+
factory.apply_to_model(model)
|
@@ -17,6 +17,7 @@ from typing import Optional
|
|
17
17
|
|
18
18
|
import torch
|
19
19
|
import torch.nn.utils.parametrize as P
|
20
|
+
from compressed_tensors import InternalModule
|
20
21
|
from compressed_tensors.quantization.lifecycle import is_target # TODO: move to utils
|
21
22
|
from compressed_tensors.registry.registry import RegistryMixin, T
|
22
23
|
from compressed_tensors.transform import (
|
@@ -26,6 +27,7 @@ from compressed_tensors.transform import (
|
|
26
27
|
)
|
27
28
|
from compressed_tensors.utils import (
|
28
29
|
align_module_device,
|
30
|
+
delete_offload_module,
|
29
31
|
has_offloaded_params,
|
30
32
|
patch_attr,
|
31
33
|
register_offload_module,
|
@@ -99,7 +101,7 @@ class TransformFactory(RegistryMixin, ABC):
|
|
99
101
|
# create transform as submodule
|
100
102
|
transform_name = f"{self.name}_{args.location.value}"
|
101
103
|
transform = self.create_transform(module, args)
|
102
|
-
register_offload_module(module, transform_name, transform)
|
104
|
+
register_offload_module(module, transform_name, transform)
|
103
105
|
|
104
106
|
# register input transformation hook
|
105
107
|
if args.location == TransformLocation.INPUT:
|
@@ -118,6 +120,7 @@ class TransformFactory(RegistryMixin, ABC):
|
|
118
120
|
assert isinstance(module, torch.nn.Linear)
|
119
121
|
assert module.bias is None
|
120
122
|
|
123
|
+
# fuse transform into weight
|
121
124
|
with torch.no_grad(), align_module_device(module):
|
122
125
|
update_offload_parameter(module, "weight", transform(module.weight))
|
123
126
|
|
@@ -128,6 +131,9 @@ class TransformFactory(RegistryMixin, ABC):
|
|
128
131
|
raise ValueError("Offloaded training is not supported")
|
129
132
|
P.register_parametrization(module, "weight", transform)
|
130
133
|
|
134
|
+
# transform is no longer needed (unfusing is not supported)
|
135
|
+
delete_offload_module(module, transform_name)
|
136
|
+
|
131
137
|
# register output transformation hook
|
132
138
|
elif args.location == TransformLocation.OUTPUT:
|
133
139
|
|
@@ -140,11 +146,8 @@ class TransformFactory(RegistryMixin, ABC):
|
|
140
146
|
else:
|
141
147
|
raise NotImplementedError()
|
142
148
|
|
143
|
-
# (1) even in the `weight` cases, this submodule attachment is needed in order
|
144
|
-
# to support saving in the frozen state
|
145
|
-
|
146
149
|
|
147
|
-
class TransformBase(
|
150
|
+
class TransformBase(InternalModule, ABC):
|
148
151
|
"""
|
149
152
|
Represents the application of a transform accord to TransformArgs
|
150
153
|
"""
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing,
|
10
|
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
import torch
|
16
|
+
|
17
|
+
|
18
|
+
__all__ = ["InternalModule"]
|
19
|
+
|
20
|
+
|
21
|
+
class InternalModule(torch.nn.Module):
|
22
|
+
"""
|
23
|
+
Abstract base class for modules which are not a part of the the model definition.
|
24
|
+
`torch.nn.Module`s which inherit from this class will not be targeted by configs
|
25
|
+
|
26
|
+
This is typically used to skip apply configs to `Observers` and `Transforms`
|
27
|
+
"""
|
28
|
+
|
29
|
+
pass
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: compressed-tensors
|
3
|
-
Version: 0.10.
|
3
|
+
Version: 0.10.3a20250709
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -72,6 +72,7 @@ src/compressed_tensors/quantization/utils/helpers.py
|
|
72
72
|
src/compressed_tensors/registry/__init__.py
|
73
73
|
src/compressed_tensors/registry/registry.py
|
74
74
|
src/compressed_tensors/transform/__init__.py
|
75
|
+
src/compressed_tensors/transform/apply.py
|
75
76
|
src/compressed_tensors/transform/transform_args.py
|
76
77
|
src/compressed_tensors/transform/transform_config.py
|
77
78
|
src/compressed_tensors/transform/transform_scheme.py
|
@@ -86,6 +87,7 @@ src/compressed_tensors/transform/utils/hadamards.safetensors
|
|
86
87
|
src/compressed_tensors/transform/utils/utils.py
|
87
88
|
src/compressed_tensors/utils/__init__.py
|
88
89
|
src/compressed_tensors/utils/helpers.py
|
90
|
+
src/compressed_tensors/utils/internal.py
|
89
91
|
src/compressed_tensors/utils/offload.py
|
90
92
|
src/compressed_tensors/utils/permutations_24.py
|
91
93
|
src/compressed_tensors/utils/permute.py
|