compressed-tensors 0.10.2a20250612__tar.gz → 0.10.2a20250616__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {compressed_tensors-0.10.2a20250612/src/compressed_tensors.egg-info → compressed_tensors-0.10.2a20250616}/PKG-INFO +1 -1
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/setup.py +1 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/transform/factory/hadamard.py +1 -1
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/transform/factory/random_hadamard.py +1 -1
- compressed_tensors-0.10.2a20250616/src/compressed_tensors/transform/utils/hadamard.py +160 -0
- compressed_tensors-0.10.2a20250616/src/compressed_tensors/transform/utils/hadamards.safetensors +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/utils/offload.py +62 -12
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/version.py +1 -1
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616/src/compressed_tensors.egg-info}/PKG-INFO +1 -1
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors.egg-info/SOURCES.txt +1 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_transform/utils/test_hadamard.py +38 -32
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_utils/test_offload.py +97 -8
- compressed_tensors-0.10.2a20250612/src/compressed_tensors/transform/utils/hadamard.py +0 -161
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/.github/.gitkeep +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/.github/actions/test/action.yml +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/.github/scripts/step-status +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/.github/workflows/build-test.yml +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/.github/workflows/build.yml +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/.github/workflows/report.yml +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/.github/workflows/test-check.yaml +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/.github/workflows/test.yml +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/.github/workflows/trigger-all.yml +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/.github/workflows/upload.yml +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/.gitignore +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/LICENSE +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/Makefile +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/README.md +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/examples/bit_packing/int4_config.json +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/examples/bitmask_compression.ipynb +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/examples/llama_1.1b/ex_config_quantization.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/examples/llama_1.1b/example_quant_config.json +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/examples/quantize_and_pack_int4.ipynb +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/pyproject.toml +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/setup.cfg +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/README.md +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/base.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/compressors/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/compressors/base.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/compressors/helpers.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/config/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/config/base.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/config/dense.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/linear/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/linear/compressed_linear.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/quantization/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/quantization/quant_args.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/quantization/quant_config.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/registry/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/registry/registry.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/transform/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/transform/factory/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/transform/factory/base.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/transform/factory/matrix_multiply.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/transform/transform_args.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/transform/transform_config.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/transform/transform_scheme.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/transform/utils/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/transform/utils/utils.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/utils/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/utils/helpers.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/utils/permutations_24.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/utils/permute.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/utils/safetensors_load.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors.egg-info/requires.txt +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/src/compressed_tensors.egg-info/top_level.txt +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/conftest.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_compressors/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_compressors/model_compressors/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_compressors/quantized_compressors/test_nvfp4_quant.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_configs/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_configs/test_base.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_linear/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_linear/test_compressed_linear.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_quantization/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_quantization/lifecycle/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_quantization/lifecycle/conftest.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_quantization/lifecycle/test_apply.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_quantization/lifecycle/test_forward.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_quantization/lifecycle/test_helpers.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_quantization/lifecycle/test_initialize.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_quantization/test_configs/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_quantization/test_configs/test_strategies.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_quantization/test_quant_args.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_quantization/test_quant_config.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_quantization/test_quant_scheme.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_quantization/test_utils/test_helpers.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_registry.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_transform/factory/test_correctness.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_transform/factory/test_memory.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_transform/test_transform_args.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_transform/test_transform_config.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_transform/test_transform_scheme.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_utils/__init__.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_utils/test_helpers.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/test_utils/test_safetensors_load.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/tests/testing_utils.py +0 -0
- {compressed_tensors-0.10.2a20250612 → compressed_tensors-0.10.2a20250616}/utils/copyright.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: compressed-tensors
|
3
|
-
Version: 0.10.
|
3
|
+
Version: 0.10.2a20250616
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -59,7 +59,7 @@ class HadamardFactory(TransformFactory):
|
|
59
59
|
return HadamardTransform(weight, args)
|
60
60
|
|
61
61
|
def _create_weight(self, size: int, dtype: dtype, device: device) -> Parameter:
|
62
|
-
data = deterministic_hadamard_matrix(size)
|
62
|
+
data = deterministic_hadamard_matrix(size, dtype, device)
|
63
63
|
data = data.to(dtype=dtype, device=device)
|
64
64
|
return Parameter(data, requires_grad=self.scheme.requires_grad)
|
65
65
|
|
@@ -29,6 +29,6 @@ class RandomHadamardFactory(HadamardFactory):
|
|
29
29
|
"""
|
30
30
|
|
31
31
|
def _create_weight(self, size: int, dtype: dtype, device: device) -> Parameter:
|
32
|
-
data = random_hadamard_matrix(size, self.generator)
|
32
|
+
data = random_hadamard_matrix(size, dtype, device, self.generator)
|
33
33
|
data = data.to(dtype=dtype, device=device)
|
34
34
|
return Parameter(data, requires_grad=self.scheme.requires_grad)
|
@@ -0,0 +1,160 @@
|
|
1
|
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing,
|
10
|
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
import math
|
16
|
+
from pathlib import Path
|
17
|
+
from typing import Optional
|
18
|
+
|
19
|
+
import torch
|
20
|
+
from safetensors import safe_open
|
21
|
+
|
22
|
+
|
23
|
+
REPO_PATH = Path(__file__).parent / "hadamards.safetensors"
|
24
|
+
|
25
|
+
|
26
|
+
__all__ = ["random_hadamard_matrix", "deterministic_hadamard_matrix", "is_pow2"]
|
27
|
+
|
28
|
+
|
29
|
+
# note that hadamard matrix multiplication can be accelerated using a library such as
|
30
|
+
# https://github.com/Dao-AILab/fast-hadamard-transform/tree/master
|
31
|
+
|
32
|
+
|
33
|
+
def deterministic_hadamard_matrix(
|
34
|
+
size: int,
|
35
|
+
dtype: torch.dtype = torch.bfloat16,
|
36
|
+
device: torch.device = torch.device("cpu"),
|
37
|
+
) -> torch.Tensor:
|
38
|
+
"""
|
39
|
+
Construct an n-by-n Hadamard matrix, using Sylvester's construction.
|
40
|
+
`n` must be a power of 2.
|
41
|
+
|
42
|
+
Adapated from https://github.com/scipy/scipy/blob/v1.15.2/scipy/linalg/_special_matrices.py # noqa: E501
|
43
|
+
|
44
|
+
:param size: order of the matrix, must be a power of 2
|
45
|
+
:param dtype: data type of matrix
|
46
|
+
:param device: device to construct matrix on
|
47
|
+
:return: hadamard matrix of size `size`
|
48
|
+
"""
|
49
|
+
if size <= 0:
|
50
|
+
raise ValueError("Cannot construct deterministic hadamard of size <= 0")
|
51
|
+
|
52
|
+
log2 = int(math.log2(size))
|
53
|
+
if size != 2**log2:
|
54
|
+
raise ValueError("Cannot construct deterministic hadamard of size != 2^n")
|
55
|
+
|
56
|
+
H = torch.tensor([[1]], dtype=dtype, device=device)
|
57
|
+
|
58
|
+
# Sylvester's construction
|
59
|
+
for _ in range(log2):
|
60
|
+
H = torch.vstack((torch.hstack((H, H)), torch.hstack((H, -H))))
|
61
|
+
|
62
|
+
return H / math.sqrt(size)
|
63
|
+
|
64
|
+
|
65
|
+
def random_hadamard_matrix(
|
66
|
+
size: int,
|
67
|
+
dtype: torch.dtype = torch.bfloat16,
|
68
|
+
device: torch.device = torch.device("cpu"),
|
69
|
+
gen: Optional[torch.Generator] = None,
|
70
|
+
) -> torch.Tensor:
|
71
|
+
"""
|
72
|
+
Produces a randomly generated Hadamard matrix. Differs from
|
73
|
+
`deterministic_hadamard_matrix` in that this function supports non powers of 2
|
74
|
+
and randomization using a seeded generator
|
75
|
+
|
76
|
+
Adapated from https://github.com/facebookresearch/SpinQuant/blob/main/utils/hadamard_utils.py # noqa: E501
|
77
|
+
Known matrices were retrieved from N. J. A. Sloane's Library of Hadamard Matrices http://www.neilsloane.com/hadamard/ # noqa: E501
|
78
|
+
|
79
|
+
:param size: The dimension of the hamadard matrix
|
80
|
+
:param dtype: data type of matrix
|
81
|
+
:param device: device to construct matrix on
|
82
|
+
:param gen: Optional generator random values
|
83
|
+
:return: randomly generated hadamard matrix
|
84
|
+
"""
|
85
|
+
Q = torch.randint(low=0, high=2, size=(size,), generator=gen, dtype=dtype) # cpu
|
86
|
+
Q = Q.to(device=device)
|
87
|
+
Q = Q * 2 - 1
|
88
|
+
Q = torch.diag(Q)
|
89
|
+
return _matmul_hadU(Q) / math.sqrt(size)
|
90
|
+
|
91
|
+
|
92
|
+
def is_pow2(n: int) -> bool:
|
93
|
+
"""
|
94
|
+
Check if a number is a power of 2
|
95
|
+
|
96
|
+
:param n: number to check
|
97
|
+
:return: True iff `n` is a power of 2
|
98
|
+
"""
|
99
|
+
return n > 0 and (n & (n - 1) == 0)
|
100
|
+
|
101
|
+
|
102
|
+
def _fetch_hadamard_divisor(
|
103
|
+
n: int,
|
104
|
+
dtype: torch.dtype,
|
105
|
+
device: torch.device = torch.device("cpu"),
|
106
|
+
file_path: str = REPO_PATH,
|
107
|
+
) -> Optional[torch.Tensor]:
|
108
|
+
"""
|
109
|
+
Fetch a known hadamard matrix from the given file path. The returned matrix will
|
110
|
+
be of of size `k` such that `n / k` is a power of two. Return None if no such
|
111
|
+
matrix exists.
|
112
|
+
|
113
|
+
Note: This function reopens the safetensors file every time it is called.
|
114
|
+
This is technically inefficient, but a very small runtime cost and simpler
|
115
|
+
than forcing callers to manage the file open context
|
116
|
+
|
117
|
+
:param n: size of known hadamard matrix
|
118
|
+
:return: a known hadamard matrix of size `n` if one exists, else None
|
119
|
+
"""
|
120
|
+
with safe_open(file_path, framework="pt", device=str(device)) as file:
|
121
|
+
divisors = sorted((int(key) for key in file.keys()), reverse=True)
|
122
|
+
for divisor in divisors:
|
123
|
+
if n % divisor == 0 and is_pow2(n // divisor):
|
124
|
+
return file.get_tensor(str(divisor)).to(dtype=dtype)
|
125
|
+
|
126
|
+
return None
|
127
|
+
|
128
|
+
|
129
|
+
def _matmul_hadU(X: torch.Tensor) -> torch.Tensor:
|
130
|
+
size = X.size(0)
|
131
|
+
dtype = X.dtype
|
132
|
+
device = X.device
|
133
|
+
|
134
|
+
# Check if we have the determined hadamard matrix
|
135
|
+
hadK = _fetch_hadamard_divisor(size, dtype, device=device)
|
136
|
+
if hadK is None:
|
137
|
+
raise ValueError(f"Cannot construct random hadamard matrix of size {size}")
|
138
|
+
K = hadK.size(0)
|
139
|
+
|
140
|
+
# Reshape diag matrix with randomized -1/+1
|
141
|
+
input = X.clone().view(-1, size, 1)
|
142
|
+
output = input.clone()
|
143
|
+
while input.shape[1] > K:
|
144
|
+
input = input.view(input.shape[0], input.shape[1] // 2, 2, input.shape[2])
|
145
|
+
output = output.view(input.shape)
|
146
|
+
output[:, :, 0, :] = input[:, :, 0, :] + input[:, :, 1, :]
|
147
|
+
output[:, :, 1, :] = input[:, :, 0, :] - input[:, :, 1, :]
|
148
|
+
output = output.view(input.shape[0], input.shape[1], -1)
|
149
|
+
(input, output) = (output, input)
|
150
|
+
assert input.shape[1] == K
|
151
|
+
del output
|
152
|
+
|
153
|
+
# Do not explicitly repeat - OOM
|
154
|
+
# input = torch.bmm(
|
155
|
+
# hadK.repeat(len(input), 1, 1).to(input.device).to(input.dtype), input)
|
156
|
+
# Use bcast instead
|
157
|
+
input = hadK.view(1, K, K).to(input) @ input
|
158
|
+
|
159
|
+
# normalize
|
160
|
+
return input.view(X.shape)
|
compressed_tensors-0.10.2a20250616/src/compressed_tensors/transform/utils/hadamards.safetensors
ADDED
Binary file
|
@@ -31,9 +31,10 @@ import contextlib
|
|
31
31
|
import warnings
|
32
32
|
from functools import wraps
|
33
33
|
from operator import attrgetter
|
34
|
-
from typing import Any, Callable, Dict, Iterable, Literal, Optional, Union
|
34
|
+
from typing import Any, Callable, Dict, Iterable, Literal, Optional, Tuple, Union
|
35
35
|
|
36
36
|
import torch
|
37
|
+
from compressed_tensors.utils import patch_attr
|
37
38
|
|
38
39
|
|
39
40
|
try:
|
@@ -83,6 +84,7 @@ __all__ = [
|
|
83
84
|
"register_offload_module",
|
84
85
|
"delete_offload_module",
|
85
86
|
"offloaded_dispatch",
|
87
|
+
"disable_offloading",
|
86
88
|
]
|
87
89
|
|
88
90
|
|
@@ -204,9 +206,24 @@ def register_offload_parameter(
|
|
204
206
|
has_onload = any(p.device != torch.device("meta") for p in module.parameters())
|
205
207
|
module.register_parameter(name, parameter)
|
206
208
|
|
209
|
+
# do everything AlignDevicesHook.init_hook does
|
210
|
+
# https://github.com/huggingface/accelerate/blob/main/src/accelerate/hooks.py#L281
|
207
211
|
if has_offloaded_params(module):
|
208
|
-
|
209
|
-
|
212
|
+
hook: AlignDevicesHook = module._hf_hook
|
213
|
+
assert hook.weights_map is not None
|
214
|
+
|
215
|
+
# append to original_devices
|
216
|
+
hook.original_devices[name] = parameter.device
|
217
|
+
|
218
|
+
# append to weights map
|
219
|
+
offload_to_weights_map(hook.weights_map, name, parameter.data, offload_device)
|
220
|
+
|
221
|
+
# append to tied_params_map
|
222
|
+
offloaded = hook.weights_map[name]
|
223
|
+
if hook.tied_params_map is not None:
|
224
|
+
hook.tied_params_map[offloaded.data_ptr()] = {} # (1)
|
225
|
+
|
226
|
+
# perform offloading
|
210
227
|
if not has_onload:
|
211
228
|
set_module_tensor_to_device(module, name, "meta")
|
212
229
|
|
@@ -214,7 +231,7 @@ def register_offload_parameter(
|
|
214
231
|
def update_offload_parameter(
|
215
232
|
module: torch.nn.Module,
|
216
233
|
name: str,
|
217
|
-
data:
|
234
|
+
data: torch.Tensor,
|
218
235
|
offload_device: Optional[Union[torch.device, Literal["disk"]]] = None,
|
219
236
|
):
|
220
237
|
"""
|
@@ -227,7 +244,7 @@ def update_offload_parameter(
|
|
227
244
|
:param offload_device: device on which weight will be offloaded to. If None is
|
228
245
|
provided, then infer device from parameters on module
|
229
246
|
"""
|
230
|
-
param = getattr(module, name)
|
247
|
+
param: torch.nn.Parameter = getattr(module, name)
|
231
248
|
if param.data.shape != data.shape:
|
232
249
|
warnings.warn(
|
233
250
|
f"Shape of parameter being updated {param.data.shape} does not match shape "
|
@@ -235,7 +252,7 @@ def update_offload_parameter(
|
|
235
252
|
)
|
236
253
|
|
237
254
|
# copy data into onloaded parameter if applicable
|
238
|
-
if param.device != torch.device("meta"):
|
255
|
+
if param.device != torch.device("meta") and data is not param.data:
|
239
256
|
param.data.copy_(data)
|
240
257
|
|
241
258
|
# update offload dict
|
@@ -420,7 +437,6 @@ def register_offload_module(base: torch.nn.Module, name: str, module: torch.nn.M
|
|
420
437
|
hook: AlignDevicesHook = base._hf_hook
|
421
438
|
assert hook.offload
|
422
439
|
assert hook.weights_map is not None
|
423
|
-
assert hook.tied_params_map is not None
|
424
440
|
|
425
441
|
# offloading kwargs for submodule
|
426
442
|
place_submodules = False
|
@@ -435,7 +451,8 @@ def register_offload_module(base: torch.nn.Module, name: str, module: torch.nn.M
|
|
435
451
|
module, include_buffers=offload_buffers, recurse=place_submodules
|
436
452
|
):
|
437
453
|
offloaded = param.to(offload_device)
|
438
|
-
hook.tied_params_map
|
454
|
+
if hook.tied_params_map is not None:
|
455
|
+
hook.tied_params_map[offloaded.data_ptr()] = {} # (1)
|
439
456
|
offload_to_weights_map(hook.weights_map, f"{name}.{param_name}", offloaded)
|
440
457
|
|
441
458
|
# if the parent places submodules, offload here
|
@@ -463,9 +480,6 @@ def register_offload_module(base: torch.nn.Module, name: str, module: torch.nn.M
|
|
463
480
|
|
464
481
|
base.register_module(name, module)
|
465
482
|
|
466
|
-
# (1): Since we cannot know which pointers are shared when we add parameters in an
|
467
|
-
# online way, assume that all pointers are shared. This comes at no runtime cost
|
468
|
-
|
469
483
|
|
470
484
|
def delete_offload_module(base: torch.nn.Module, name: str):
|
471
485
|
"""
|
@@ -501,7 +515,9 @@ def offloaded_dispatch(
|
|
501
515
|
raise NotImplementedError("Disk offloading is not currently supported")
|
502
516
|
|
503
517
|
# create weights map
|
504
|
-
|
518
|
+
state_dict = module.state_dict()
|
519
|
+
state_dict = {key: val.to(offload_device) for key, val in state_dict.items()}
|
520
|
+
weights_map = OffloadedWeightsLoader(state_dict=state_dict, device=offload_device)
|
505
521
|
|
506
522
|
# create tied params map
|
507
523
|
tied_params = find_tied_parameters(module)
|
@@ -522,6 +538,36 @@ def offloaded_dispatch(
|
|
522
538
|
return module
|
523
539
|
|
524
540
|
|
541
|
+
@contextlib.contextmanager
|
542
|
+
def disable_offloading():
|
543
|
+
"""
|
544
|
+
Keep modules onloaded and disable offloading until this context exits.
|
545
|
+
Affects modules which have been hooked with accelerate's `AlignDevicesHook`
|
546
|
+
"""
|
547
|
+
original_pre_forward = AlignDevicesHook.pre_forward
|
548
|
+
onloaded_modules: Dict[torch.nn.Module, Tuple[AlignDevicesHook, bool]] = dict()
|
549
|
+
|
550
|
+
# onload once and disable any future onloading/offloading steps
|
551
|
+
def keep_onload_pre_forward(self: AlignDevicesHook, module, *args, **kwargs):
|
552
|
+
ret = original_pre_forward(self, module, *args, **kwargs)
|
553
|
+
if module not in onloaded_modules:
|
554
|
+
onloaded_modules[module] = (self, self.offload)
|
555
|
+
self.offload = False
|
556
|
+
return ret
|
557
|
+
|
558
|
+
# use the patched pre_forward function within the context
|
559
|
+
with patch_attr(AlignDevicesHook, "pre_forward", keep_onload_pre_forward):
|
560
|
+
yield
|
561
|
+
|
562
|
+
# manually offload all modules that were onloaded
|
563
|
+
# update any parameters which may have changed
|
564
|
+
for module, (hook, offload) in onloaded_modules.items():
|
565
|
+
hook.offload = offload
|
566
|
+
for name, param in module.named_parameters():
|
567
|
+
update_offload_parameter(module, name, param.data)
|
568
|
+
hook.post_forward(module, None)
|
569
|
+
|
570
|
+
|
525
571
|
""" Upstreamed Functions """
|
526
572
|
|
527
573
|
|
@@ -589,3 +635,7 @@ def align_module_device(
|
|
589
635
|
|
590
636
|
else:
|
591
637
|
yield
|
638
|
+
|
639
|
+
|
640
|
+
# (1): Since we cannot know which pointers are shared when we add parameters in an
|
641
|
+
# online way, assume that all pointers are shared. This has virtually no runtime cost
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: compressed-tensors
|
3
|
-
Version: 0.10.
|
3
|
+
Version: 0.10.2a20250616
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -82,6 +82,7 @@ src/compressed_tensors/transform/factory/matrix_multiply.py
|
|
82
82
|
src/compressed_tensors/transform/factory/random_hadamard.py
|
83
83
|
src/compressed_tensors/transform/utils/__init__.py
|
84
84
|
src/compressed_tensors/transform/utils/hadamard.py
|
85
|
+
src/compressed_tensors/transform/utils/hadamards.safetensors
|
85
86
|
src/compressed_tensors/transform/utils/utils.py
|
86
87
|
src/compressed_tensors/utils/__init__.py
|
87
88
|
src/compressed_tensors/utils/helpers.py
|
@@ -13,46 +13,48 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
|
16
|
-
import numpy
|
17
16
|
import pytest
|
18
17
|
import torch
|
19
18
|
from compressed_tensors.transform.utils.hadamard import (
|
20
|
-
_get_had12,
|
21
|
-
_get_had20,
|
22
19
|
deterministic_hadamard_matrix,
|
20
|
+
is_pow2,
|
23
21
|
random_hadamard_matrix,
|
24
22
|
)
|
23
|
+
from tests.testing_utils import requires_gpu
|
25
24
|
|
26
25
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
#
|
38
|
-
|
39
|
-
|
26
|
+
_sizes_to_test = [
|
27
|
+
768, # gpt2 small
|
28
|
+
1024, # gpt2 medium
|
29
|
+
1280, # qwen_2_5_vl vision
|
30
|
+
1600, # gpt2 xl
|
31
|
+
2048, # gpt3 small
|
32
|
+
3584, # qwen_2_5_vl
|
33
|
+
3840, # qwen_2_5_vl vision qkv
|
34
|
+
4096, # llama3
|
35
|
+
7168, # deepseek_v3
|
36
|
+
14336, # llama3 intermediate
|
37
|
+
18432, # deepseek_v3 intermediate
|
38
|
+
18944, # qwen_2_5_vl intermediate
|
39
|
+
]
|
40
|
+
_atol = 1e-1 # bfloat16 is low precision for large matrices
|
40
41
|
|
41
42
|
|
42
|
-
@
|
43
|
-
|
44
|
-
[4096, 2048],
|
45
|
-
)
|
43
|
+
@requires_gpu
|
44
|
+
@pytest.mark.parametrize("size", _sizes_to_test)
|
46
45
|
def test_random_hadamard_matrix_compliant(size):
|
47
|
-
|
48
|
-
|
49
|
-
|
46
|
+
# (H / sqrt(n))(H.T / sqrt(n)) == I
|
47
|
+
matrix = random_hadamard_matrix(size, device="cuda")
|
48
|
+
product = matrix @ matrix.T
|
49
|
+
eye = torch.eye(size, dtype=product.dtype, device="cuda")
|
50
|
+
assert torch.allclose(product, eye, atol=_atol)
|
50
51
|
|
51
52
|
|
52
53
|
def test_random_hadamard_generator():
|
54
|
+
# check that generation is deterministic with a seed
|
53
55
|
generator = torch.Generator().manual_seed(42)
|
54
|
-
one = random_hadamard_matrix(2048, generator)
|
55
|
-
two = random_hadamard_matrix(2048, generator)
|
56
|
+
one = random_hadamard_matrix(2048, gen=generator)
|
57
|
+
two = random_hadamard_matrix(2048, gen=generator)
|
56
58
|
|
57
59
|
one_true = torch.tensor(
|
58
60
|
[
|
@@ -73,12 +75,16 @@ def test_random_hadamard_generator():
|
|
73
75
|
assert torch.all(two[:3, :3].sign() == two_true.sign())
|
74
76
|
|
75
77
|
|
76
|
-
@
|
77
|
-
|
78
|
-
[1024],
|
79
|
-
)
|
78
|
+
@requires_gpu
|
79
|
+
@pytest.mark.parametrize("size", _sizes_to_test)
|
80
80
|
def test_deterministic_hadamard_compliant(size):
|
81
|
-
|
81
|
+
if not is_pow2(size):
|
82
|
+
with pytest.raises(ValueError):
|
83
|
+
matrix = deterministic_hadamard_matrix(size, device="cuda")
|
84
|
+
return
|
85
|
+
|
82
86
|
# (H / sqrt(n))(H.T / sqrt(n)) == I
|
83
|
-
|
84
|
-
|
87
|
+
matrix = deterministic_hadamard_matrix(size, device="cuda")
|
88
|
+
product = matrix @ matrix.T
|
89
|
+
eye = torch.eye(size, dtype=product.dtype, device="cuda")
|
90
|
+
assert torch.allclose(product, eye, atol=_atol)
|
@@ -19,6 +19,7 @@ from compressed_tensors.utils import (
|
|
19
19
|
delete_offload_module,
|
20
20
|
delete_offload_parameter,
|
21
21
|
disable_hf_hook,
|
22
|
+
disable_offloading,
|
22
23
|
get_execution_device,
|
23
24
|
has_offloaded_params,
|
24
25
|
offloaded_dispatch,
|
@@ -148,6 +149,47 @@ def test_register_offload_parameter():
|
|
148
149
|
assert module.a.device == module.b.device == module.c.device == torch.device("meta")
|
149
150
|
|
150
151
|
|
152
|
+
@requires_accelerate()
|
153
|
+
@requires_gpu
|
154
|
+
def test_register_offload_parameter_hook_replacement():
|
155
|
+
module = ExampleModule()
|
156
|
+
parameter_c = torch.nn.Parameter(torch.tensor(1.0, device="cuda"))
|
157
|
+
parameter_d = torch.nn.Parameter(torch.tensor(1.0, device="cpu"))
|
158
|
+
|
159
|
+
offloaded_dispatch(module, "cuda")
|
160
|
+
register_offload_parameter(module, "c", parameter_c)
|
161
|
+
register_offload_parameter(module, "d", parameter_d)
|
162
|
+
|
163
|
+
with disable_hf_hook(module):
|
164
|
+
assert module.a.device == torch.device("cpu")
|
165
|
+
assert module.b.device == torch.device("cpu")
|
166
|
+
assert module.c.device == torch.device("cuda:0")
|
167
|
+
assert module.d.device == torch.device("cpu")
|
168
|
+
|
169
|
+
assert module.a.device == torch.device("meta")
|
170
|
+
assert module.b.device == torch.device("meta")
|
171
|
+
assert module.c.device == torch.device("meta")
|
172
|
+
assert module.d.device == torch.device("meta")
|
173
|
+
assert module._hf_hook.weights_map["a"].device == torch.device("cpu")
|
174
|
+
assert module._hf_hook.weights_map["b"].device == torch.device("cpu")
|
175
|
+
assert module._hf_hook.weights_map["c"].device == torch.device("cpu")
|
176
|
+
assert module._hf_hook.weights_map["d"].device == torch.device("cpu")
|
177
|
+
|
178
|
+
|
179
|
+
@requires_accelerate()
|
180
|
+
@requires_gpu
|
181
|
+
def test_register_offload_parameter_shared():
|
182
|
+
module = ExampleModule()
|
183
|
+
parameter = torch.nn.Parameter(torch.tensor(1.0))
|
184
|
+
|
185
|
+
offloaded_dispatch(module, "cuda")
|
186
|
+
register_offload_parameter(module, "c", parameter)
|
187
|
+
register_offload_parameter(module, "d", parameter)
|
188
|
+
|
189
|
+
with align_module_device(module):
|
190
|
+
assert module.c is module.d
|
191
|
+
|
192
|
+
|
151
193
|
@requires_accelerate()
|
152
194
|
def test_update_offload_parameter():
|
153
195
|
from accelerate.hooks import attach_align_device_hook
|
@@ -397,15 +439,23 @@ def test_delete_offload_module(exec_device):
|
|
397
439
|
|
398
440
|
@requires_gpu
|
399
441
|
@requires_accelerate()
|
400
|
-
@pytest.mark.parametrize(
|
401
|
-
|
442
|
+
@pytest.mark.parametrize(
|
443
|
+
"exec_device,offload_device",
|
444
|
+
[
|
445
|
+
(torch.device("cpu"), torch.device("cpu")),
|
446
|
+
(torch.device("cpu"), torch.device("cuda:0")),
|
447
|
+
(torch.device("cuda:0"), torch.device("cpu")),
|
448
|
+
(torch.device("cuda:0"), torch.device("cuda:0")),
|
449
|
+
],
|
450
|
+
)
|
451
|
+
def test_offloaded_dispatch(exec_device, offload_device):
|
402
452
|
# single module
|
403
|
-
module = torch.nn.Linear(1, 2)
|
404
|
-
module = offloaded_dispatch(module, exec_device)
|
453
|
+
module = torch.nn.Linear(1, 2, device=offload_device)
|
454
|
+
module = offloaded_dispatch(module, exec_device, offload_device)
|
405
455
|
assert has_offloaded_params(module)
|
406
456
|
assert module._hf_hook.offload
|
407
457
|
assert module.weight.device == torch.device("meta")
|
408
|
-
assert "weight"
|
458
|
+
assert module._hf_hook.weights_map["weight"].device == offload_device
|
409
459
|
assert module._hf_hook.tied_params_map is not None
|
410
460
|
|
411
461
|
# can run
|
@@ -413,13 +463,13 @@ def test_offloaded_dispatch(exec_device):
|
|
413
463
|
|
414
464
|
# model
|
415
465
|
model = ExampleModel()
|
416
|
-
model = offloaded_dispatch(model, exec_device)
|
466
|
+
model = offloaded_dispatch(model, exec_device, offload_device)
|
417
467
|
assert not has_offloaded_params(model)
|
418
468
|
|
419
469
|
assert has_offloaded_params(model.linear)
|
420
470
|
assert model.linear._hf_hook.offload
|
421
471
|
assert model.linear.weight.device == torch.device("meta")
|
422
|
-
assert
|
472
|
+
assert model.linear._hf_hook.weights_map["weight"].device == offload_device
|
423
473
|
assert model.linear._hf_hook.tied_params_map is not None
|
424
474
|
|
425
475
|
# can run
|
@@ -429,4 +479,43 @@ def test_offloaded_dispatch(exec_device):
|
|
429
479
|
parameter = torch.nn.Parameter(torch.tensor(1.0))
|
430
480
|
register_offload_parameter(module, "new_param", parameter)
|
431
481
|
assert module.new_param.device == torch.device("meta")
|
432
|
-
assert module._hf_hook.weights_map["new_param"].device ==
|
482
|
+
assert module._hf_hook.weights_map["new_param"].device == offload_device
|
483
|
+
|
484
|
+
|
485
|
+
@requires_gpu
|
486
|
+
@requires_accelerate()
|
487
|
+
@pytest.mark.parametrize(
|
488
|
+
"exec_device,offload_device",
|
489
|
+
[
|
490
|
+
(torch.device("cpu"), torch.device("cpu")),
|
491
|
+
(torch.device("cpu"), torch.device("cuda:0")),
|
492
|
+
(torch.device("cuda:0"), torch.device("cpu")),
|
493
|
+
(torch.device("cuda:0"), torch.device("cuda:0")),
|
494
|
+
],
|
495
|
+
)
|
496
|
+
def test_disable_offloading(exec_device, offload_device):
|
497
|
+
module = torch.nn.Linear(1, 2, device=exec_device)
|
498
|
+
|
499
|
+
# non-offloaded modules are unaffected
|
500
|
+
with disable_offloading():
|
501
|
+
output = module(torch.empty(1, device=exec_device))
|
502
|
+
assert module.weight.device == exec_device
|
503
|
+
assert output.device == exec_device
|
504
|
+
|
505
|
+
# offloaded modules stay on device until context exit
|
506
|
+
offloaded_dispatch(module, exec_device, offload_device)
|
507
|
+
assert module.weight.device == torch.device("meta")
|
508
|
+
assert module._hf_hook.weights_map["weight"].device == offload_device
|
509
|
+
|
510
|
+
with disable_offloading():
|
511
|
+
assert module.weight.device == torch.device("meta")
|
512
|
+
output = module(torch.empty(1, device=exec_device))
|
513
|
+
assert module.weight.device == exec_device
|
514
|
+
assert output.device == exec_device
|
515
|
+
|
516
|
+
output = module(torch.empty(1, device=exec_device))
|
517
|
+
assert module.weight.device == exec_device
|
518
|
+
assert output.device == exec_device
|
519
|
+
|
520
|
+
assert module.weight.device == torch.device("meta")
|
521
|
+
assert module._hf_hook.weights_map["weight"].device == offload_device
|