compressed-tensors 0.9.5a20250507__tar.gz → 0.9.5a20250512__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. {compressed_tensors-0.9.5a20250507/src/compressed_tensors.egg-info → compressed_tensors-0.9.5a20250512}/PKG-INFO +1 -1
  2. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +1 -0
  3. compressed_tensors-0.9.5a20250512/src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py +190 -0
  4. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/config/base.py +1 -0
  5. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/quantization/quant_args.py +54 -2
  6. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/quantization/quant_scheme.py +12 -0
  7. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/version.py +1 -1
  8. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512/src/compressed_tensors.egg-info}/PKG-INFO +1 -1
  9. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors.egg-info/SOURCES.txt +2 -0
  10. compressed_tensors-0.9.5a20250512/tests/test_compressors/quantized_compressors/test_nvfp4_quant.py +43 -0
  11. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/.github/.gitkeep +0 -0
  12. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/.github/actions/test/action.yml +0 -0
  13. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/.github/scripts/step-status +0 -0
  14. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/.github/workflows/build-test.yml +0 -0
  15. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/.github/workflows/build.yml +0 -0
  16. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/.github/workflows/report.yml +0 -0
  17. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/.github/workflows/test-check.yaml +0 -0
  18. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/.github/workflows/test.yml +0 -0
  19. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/.github/workflows/trigger-all.yml +0 -0
  20. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/.github/workflows/upload.yml +0 -0
  21. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/.gitignore +0 -0
  22. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/LICENSE +0 -0
  23. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/Makefile +0 -0
  24. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/README.md +0 -0
  25. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/examples/bit_packing/ex_quantize_and_pack.py +0 -0
  26. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/examples/bit_packing/int4_config.json +0 -0
  27. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/examples/bitmask_compression.ipynb +0 -0
  28. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/examples/llama_1.1b/ex_config_quantization.py +0 -0
  29. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/examples/llama_1.1b/ex_llmcompressor_quantization.py +0 -0
  30. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/examples/llama_1.1b/example_quant_config.json +0 -0
  31. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/examples/llama_1.1b/example_quant_recipe.yaml +0 -0
  32. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/examples/quantize_and_pack_int4.ipynb +0 -0
  33. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/pyproject.toml +0 -0
  34. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/setup.cfg +0 -0
  35. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/setup.py +0 -0
  36. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/__init__.py +0 -0
  37. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/README.md +0 -0
  38. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/__init__.py +0 -0
  39. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/base.py +0 -0
  40. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/compressors/__init__.py +0 -0
  41. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/compressors/base.py +0 -0
  42. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/compressors/helpers.py +0 -0
  43. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
  44. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +0 -0
  45. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
  46. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
  47. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
  48. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
  49. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
  50. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
  51. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
  52. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
  53. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
  54. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
  55. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/config/__init__.py +0 -0
  56. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/config/dense.py +0 -0
  57. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
  58. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  59. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/linear/__init__.py +0 -0
  60. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/linear/compressed_linear.py +0 -0
  61. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/quantization/__init__.py +0 -0
  62. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  63. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
  64. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
  65. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
  66. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
  67. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
  68. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/quantization/quant_config.py +0 -0
  69. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
  70. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
  71. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/registry/__init__.py +0 -0
  72. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/registry/registry.py +0 -0
  73. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/utils/__init__.py +0 -0
  74. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/utils/helpers.py +0 -0
  75. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/utils/offload.py +0 -0
  76. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/utils/permutations_24.py +0 -0
  77. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/utils/permute.py +0 -0
  78. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/utils/safetensors_load.py +0 -0
  79. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
  80. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
  81. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors.egg-info/requires.txt +0 -0
  82. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/src/compressed_tensors.egg-info/top_level.txt +0 -0
  83. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/__init__.py +0 -0
  84. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/conftest.py +0 -0
  85. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_compressors/__init__.py +0 -0
  86. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_compressors/model_compressors/__init__.py +0 -0
  87. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_compressors/model_compressors/test_model_compressor.py +0 -0
  88. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_compressors/quantized_compressors/__init__.py +0 -0
  89. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_compressors/quantized_compressors/test_fp8_quant.py +0 -0
  90. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_compressors/quantized_compressors/test_int_quant.py +0 -0
  91. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_compressors/quantized_compressors/test_pack_quant.py +0 -0
  92. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_compressors/sparse_compressors/__init__.py +0 -0
  93. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_compressors/sparse_compressors/test_bitmask.py +0 -0
  94. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_compressors/sparse_compressors/test_sparse_24_bitmask.py +0 -0
  95. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_compressors/sparse_quantized_compressors/__init__.py +0 -0
  96. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_compressors/sparse_quantized_compressors/test_marlin_24.py +0 -0
  97. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_configs/__init__.py +0 -0
  98. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_configs/test_base.py +0 -0
  99. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_examples/test_bitmask_compression_ipynb.py +0 -0
  100. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_linear/__init__.py +0 -0
  101. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_linear/test_compressed_linear.py +0 -0
  102. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_quantization/__init__.py +0 -0
  103. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_quantization/lifecycle/__init__.py +0 -0
  104. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_quantization/lifecycle/conftest.py +0 -0
  105. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_quantization/lifecycle/test_apply.py +0 -0
  106. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_quantization/lifecycle/test_dynamic_lifecycle.py +0 -0
  107. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_quantization/lifecycle/test_enabled.py +0 -0
  108. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_quantization/lifecycle/test_forward.py +0 -0
  109. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_quantization/lifecycle/test_helpers.py +0 -0
  110. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_quantization/lifecycle/test_initialize.py +0 -0
  111. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_quantization/lifecycle/test_lifecycle.py +0 -0
  112. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_quantization/test_configs/__init__.py +0 -0
  113. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_quantization/test_configs/test_bit_depths.py +0 -0
  114. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_quantization/test_configs/test_strategies.py +0 -0
  115. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_quantization/test_quant_args.py +0 -0
  116. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_quantization/test_quant_config.py +0 -0
  117. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_quantization/test_quant_scheme.py +0 -0
  118. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_quantization/test_utils/test_helpers.py +0 -0
  119. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_registry.py +0 -0
  120. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_utils/__init__.py +0 -0
  121. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_utils/test_helpers.py +0 -0
  122. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_utils/test_offload.py +0 -0
  123. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/test_utils/test_safetensors_load.py +0 -0
  124. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/tests/testing_utils.py +0 -0
  125. {compressed_tensors-0.9.5a20250507 → compressed_tensors-0.9.5a20250512}/utils/copyright.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.9.5a20250507
3
+ Version: 0.9.5a20250512
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -15,4 +15,5 @@
15
15
 
16
16
  from .base import *
17
17
  from .naive_quantized import *
18
+ from .nvfp4_quantized import *
18
19
  from .pack_quantized import *
@@ -0,0 +1,190 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from typing import Dict, Optional, Tuple
17
+
18
+ import numpy
19
+ import torch
20
+ from compressed_tensors.compressors.base import BaseCompressor
21
+ from compressed_tensors.compressors.quantized_compressors.base import (
22
+ BaseQuantizationCompressor,
23
+ )
24
+ from compressed_tensors.config import CompressionFormat
25
+ from compressed_tensors.quantization import QuantizationArgs
26
+ from compressed_tensors.quantization.lifecycle.forward import dequantize, quantize
27
+ from torch import Tensor
28
+
29
+
30
+ __all__ = ["pack_fp4_to_uint8", "unpack_fp4_from_uint8"]
31
+
32
+ FLOAT_TO_E2M1 = [
33
+ 0.0,
34
+ 0.5,
35
+ 1.0,
36
+ 1.5,
37
+ 2.0,
38
+ 3.0,
39
+ 4.0,
40
+ 6.0,
41
+ ]
42
+
43
+
44
+ @BaseCompressor.register(name=CompressionFormat.nvfp4_pack_quantized.value)
45
+ class NVFP4PackedCompressor(BaseQuantizationCompressor):
46
+ """
47
+ Implements compression of FP4 values. Weights of each quantized layer
48
+ are packed into uint8. Only supports symmetric weight compression for now.
49
+ """
50
+
51
+ @property
52
+ def compression_param_names(self) -> Tuple[str]:
53
+ """
54
+ Returns a tuple of compression parameter names introduced by
55
+ the compressor during compression
56
+ """
57
+ return (
58
+ "weight_packed",
59
+ "weight_scale",
60
+ "weight_zero_point",
61
+ "weight_global_scale",
62
+ )
63
+
64
+ def compress_weight(
65
+ self,
66
+ weight: Tensor,
67
+ scale: Tensor,
68
+ global_scale: Tensor,
69
+ quantization_args: QuantizationArgs,
70
+ device: Optional[torch.device] = None,
71
+ zero_point: Optional[torch.Tensor] = None,
72
+ g_idx: Optional[torch.Tensor] = None,
73
+ ) -> Dict[str, torch.Tensor]:
74
+
75
+ quantized_weight = quantize(
76
+ x=weight,
77
+ scale=scale,
78
+ global_scale=global_scale,
79
+ zero_point=zero_point,
80
+ args=quantization_args,
81
+ )
82
+ compressed_dict = {}
83
+ weight_packed = pack_fp4_to_uint8(quantized_weight)
84
+ if device is not None:
85
+ weight_packed = weight_packed.to(device)
86
+ compressed_dict["weight_packed"] = weight_packed
87
+ return compressed_dict
88
+
89
+ def decompress_weight(
90
+ self,
91
+ compressed_data: Dict[str, Tensor],
92
+ quantization_args: Optional[QuantizationArgs] = None,
93
+ ) -> torch.Tensor:
94
+
95
+ weight = compressed_data["weight_packed"]
96
+ scale = compressed_data["weight_scale"]
97
+ global_scale = compressed_data["weight_global_scale"]
98
+ m, n = weight.shape
99
+ # TODO: use a user provided dequant dtype
100
+ unpacked = unpack_fp4_from_uint8(weight, m, n * 2)
101
+ decompressed_weight = dequantize(
102
+ x_q=unpacked, scale=scale, global_scale=global_scale, dtype=unpacked.dtype
103
+ )
104
+
105
+ return decompressed_weight
106
+
107
+
108
+ def pack_fp4_to_uint8(x: torch.Tensor) -> torch.Tensor:
109
+ """
110
+ Packs a tensor with values in the fp4 range into uint8.
111
+ As there are 16 valid fp4 values, two fp4 values can be
112
+ packed into one uint8. Each fp4 value is mapped to its
113
+ particular index (e.g. 0.5 is mapped to index 1, 6.0 is mapped
114
+ to index 7) which is then represented using 4 bits. Consecutive
115
+ pairs of 4 bits are then packed into an uint8.
116
+
117
+ :param x: tensor to pack
118
+ returns: a packed tensor in uint8
119
+ """
120
+
121
+ m, n = x.shape
122
+ device = x.device
123
+
124
+ # Create lookup table for FP4 values to indices
125
+ # Map the absolute values to 0-7 indices
126
+ kE2M1 = torch.tensor(FLOAT_TO_E2M1, device=device, dtype=x.dtype)
127
+
128
+ # Find closest valid FP4 value index for each element
129
+ abs_x = torch.abs(x)
130
+ abs_indices = torch.zeros_like(abs_x, dtype=torch.long)
131
+ for i, val in enumerate(kE2M1):
132
+ abs_indices = torch.where(torch.isclose(abs_x, val), i, abs_indices)
133
+
134
+ # Apply sign bit (bit 3) to get final 4-bit representation
135
+ indices = abs_indices + (torch.signbit(x) << 3).to(torch.long)
136
+
137
+ # Reshape to prepare for packing pairs of values
138
+ indices = indices.reshape(-1)
139
+
140
+ # Handle odd length by padding if necessary
141
+ if indices.numel() % 2 != 0:
142
+ indices = torch.cat([indices, torch.zeros(1, dtype=torch.long, device=device)])
143
+
144
+ # Reshape to pair consecutive elements
145
+ indices = indices.reshape(-1, 2)
146
+
147
+ # Pack pairs of 4-bit values into 8-bit values
148
+ packed = (indices[:, 0] | (indices[:, 1] << 4)).to(torch.uint8)
149
+
150
+ return packed.reshape(m, n // 2)
151
+
152
+
153
+ kE2M1ToFloat = torch.tensor(
154
+ [0.0, 0.5, 1.0, 1.5, 2.0, 3.0, 4.0, 6.0], dtype=torch.float32
155
+ )
156
+
157
+ # reference: : https://github.com/vllm-project/vllm/pull/16362
158
+ def unpack_fp4_from_uint8(
159
+ a: torch.Tensor, m: int, n: int, dtype: Optional[torch.dtype] = torch.bfloat16
160
+ ) -> torch.Tensor:
161
+ """
162
+ Unpacks uint8 values into fp4. Each uint8 consists of two fp4 values
163
+ (i.e. first four bits correspond to one fp4 value, last four corresond to a consecutive
164
+ fp4 value). The bits represent an index, which are mapped to an fp4 value.
165
+
166
+ :param a: tensor to unpack
167
+ :param m: original dim 0 size of the unpacked tensor
168
+ :param n: original dim 1 size of the unpacked tensor
169
+ :param dtype: dense dtype to cast the unpacked tensor to
170
+ """
171
+ assert a.dtype == torch.uint8
172
+
173
+ # Vectorized nibble processing
174
+ a_flat = a.flatten()
175
+ high = (a_flat & 0xF0) >> 4 # Upper nibbles
176
+ low = a_flat & 0x0F # Lower nibbles
177
+
178
+ # Combine nibbles for batch processing
179
+ combined = torch.stack((low, high), dim=1).flatten()
180
+
181
+ # Vectorized sign and magnitude extraction
182
+ signs = (combined & 0x08).to(torch.bool) # Sign bits
183
+ abs_vals = (combined & 0x07).to(torch.long) # Magnitude indices
184
+
185
+ # Device-aware lookup and sign application
186
+ kE2M1 = kE2M1ToFloat.to(device=a.device)
187
+ values = kE2M1[abs_vals] * torch.where(signs, -1.0, 1.0)
188
+
189
+ # Reshape to final form
190
+ return values.reshape(m, n).to(dtype=dtype)
@@ -32,6 +32,7 @@ class CompressionFormat(Enum):
32
32
  naive_quantized = "naive-quantized"
33
33
  pack_quantized = "pack-quantized"
34
34
  marlin_24 = "marlin-24"
35
+ nvfp4_pack_quantized = "nvfp4-pack-quantized"
35
36
 
36
37
 
37
38
  @unique
@@ -24,6 +24,8 @@ from pydantic import BaseModel, Field, field_validator, model_validator
24
24
 
25
25
  __all__ = [
26
26
  "FP8_DTYPE",
27
+ "FP8_E4M3_DATA",
28
+ "FP4_E2M1_DATA",
27
29
  "QuantizationType",
28
30
  "QuantizationStrategy",
29
31
  "QuantizationArgs",
@@ -31,6 +33,48 @@ __all__ = [
31
33
  "ActivationOrdering",
32
34
  ]
33
35
 
36
+
37
+ class FloatArgs:
38
+ exponent: int
39
+ mantissa: int
40
+ bits: int
41
+ max: float
42
+ min: float
43
+ dtype: Optional[torch.dtype] = None
44
+
45
+
46
+ class FP4_E2M1_DATA(FloatArgs):
47
+ exponent = 2
48
+ mantissa = 1
49
+ bits = 4
50
+ max = 6.0
51
+ min = -6.0
52
+
53
+ @staticmethod
54
+ def cast_to_fp4(x):
55
+ sign = torch.sign(x)
56
+ x = torch.abs(x)
57
+ x[(x >= 0.0) & (x <= 0.25)] = 0.0
58
+ x[(x > 0.25) & (x < 0.75)] = 0.5
59
+ x[(x >= 0.75) & (x <= 1.25)] = 1.0
60
+ x[(x > 1.25) & (x < 1.75)] = 1.5
61
+ x[(x >= 1.75) & (x <= 2.5)] = 2.0
62
+ x[(x > 2.5) & (x < 3.5)] = 3.0
63
+ x[(x >= 3.5) & (x <= 5.0)] = 4.0
64
+ x[x > 5.0] = 6.0
65
+ return x * sign
66
+
67
+
68
+ class FP8_E4M3_DATA(FloatArgs):
69
+ exponent = 4
70
+ mantissa = 3
71
+ bits = 8
72
+ max = torch.finfo(torch.float8_e4m3fn).max
73
+ min = torch.finfo(torch.float8_e4m3fn).min
74
+ dtype = torch.float8_e4m3fn
75
+
76
+
77
+ # TODO: Remove soon in favour of a more descriptive FloatArgs
34
78
  FP8_DTYPE = torch.float8_e4m3fn
35
79
 
36
80
 
@@ -234,7 +278,10 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
234
278
 
235
279
  def pytorch_dtype(self) -> torch.dtype:
236
280
  if self.type == QuantizationType.FLOAT:
237
- return FP8_DTYPE
281
+ if self.num_bits == 8:
282
+ return FP8_E4M3_DATA.dtype
283
+ else:
284
+ raise NotImplementedError("Only num_bits in (8) are supported")
238
285
  elif self.type == QuantizationType.INT:
239
286
  if self.num_bits <= 8:
240
287
  return torch.int8
@@ -263,7 +310,12 @@ def round_to_quantized_type(
263
310
  """
264
311
  original_dtype = tensor.dtype
265
312
  if args.type == QuantizationType.FLOAT:
266
- rounded = tensor.to(FP8_DTYPE)
313
+ if args.num_bits == 8:
314
+ rounded = tensor.to(FP8_E4M3_DATA.dtype)
315
+ elif args.num_bits == 4:
316
+ rounded = FP4_E2M1_DATA.cast_to_fp4(tensor)
317
+ else:
318
+ raise NotImplementedError("Only num_bits in (4, 8) are supported")
267
319
  elif args.type == QuantizationType.INT:
268
320
  rounded = torch.round(tensor)
269
321
  else:
@@ -100,6 +100,17 @@ def is_preset_scheme(name: str) -> bool:
100
100
 
101
101
  UNQUANTIZED = dict()
102
102
 
103
+ NVFP4A16 = dict(
104
+ weights=QuantizationArgs(
105
+ num_bits=4,
106
+ type=QuantizationType.FLOAT,
107
+ strategy=QuantizationStrategy.GROUP,
108
+ symmetric=True,
109
+ dynamic=False,
110
+ group_size=16,
111
+ )
112
+ )
113
+
103
114
  # 8 bit integer weights and 8 bit activations quantization
104
115
  INT8_W8A8 = dict(
105
116
  weights=QuantizationArgs(
@@ -225,4 +236,5 @@ PRESET_SCHEMES = {
225
236
  # Float weight and activation schemes
226
237
  "FP8": FP8,
227
238
  "FP8_DYNAMIC": FP8_DYNAMIC,
239
+ "NVFP4A16": NVFP4A16,
228
240
  }
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.9.5.a20250507'
20
+ __version__ = version = '0.9.5.a20250512'
21
21
  __version_tuple__ = version_tuple = (0, 9, 5)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.9.5a20250507
3
+ Version: 0.9.5a20250512
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -41,6 +41,7 @@ src/compressed_tensors/compressors/model_compressors/model_compressor.py
41
41
  src/compressed_tensors/compressors/quantized_compressors/__init__.py
42
42
  src/compressed_tensors/compressors/quantized_compressors/base.py
43
43
  src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py
44
+ src/compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py
44
45
  src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py
45
46
  src/compressed_tensors/compressors/sparse_compressors/__init__.py
46
47
  src/compressed_tensors/compressors/sparse_compressors/base.py
@@ -87,6 +88,7 @@ tests/test_compressors/model_compressors/test_model_compressor.py
87
88
  tests/test_compressors/quantized_compressors/__init__.py
88
89
  tests/test_compressors/quantized_compressors/test_fp8_quant.py
89
90
  tests/test_compressors/quantized_compressors/test_int_quant.py
91
+ tests/test_compressors/quantized_compressors/test_nvfp4_quant.py
90
92
  tests/test_compressors/quantized_compressors/test_pack_quant.py
91
93
  tests/test_compressors/sparse_compressors/__init__.py
92
94
  tests/test_compressors/sparse_compressors/test_bitmask.py
@@ -0,0 +1,43 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import torch
16
+ from compressed_tensors.compressors.quantized_compressors.nvfp4_quantized import (
17
+ pack_fp4_to_uint8,
18
+ unpack_fp4_from_uint8,
19
+ )
20
+
21
+
22
+ def test_pack_unpack():
23
+ x = torch.Tensor(
24
+ [
25
+ [-0.5000, -6.0000, -0.5000, -1.5000, -1.0000, 6.0000, 0.0000, -0.0000],
26
+ [-1.0000, -6.0000, -0.5000, -0.0000, 0.5000, 0.5000, -0.0000, 0.0000],
27
+ [-3.0000, -6.0000, -0.5000, -2.0000, -0.5000, -1.5000, -0.0000, -0.0000],
28
+ [1.5000, 6.0000, -0.0000, -0.5000, 1.0000, 1.0000, -0.0000, 0.0000],
29
+ ]
30
+ )
31
+
32
+ dense_dtype = torch.bfloat16
33
+ x = x.to(dense_dtype)
34
+ m, n = x.shape
35
+ packed = pack_fp4_to_uint8(x)
36
+ assert packed.dtype == torch.uint8
37
+ unpacked = unpack_fp4_from_uint8(packed, m, n, dtype=dense_dtype)
38
+ assert unpacked.dtype == dense_dtype
39
+
40
+ assert torch.equal(unpacked, x) # misleading as -0 and 0 are considered equal
41
+ sign_bitx = torch.signbit(x)
42
+ sign_bitout = torch.signbit(unpacked)
43
+ assert torch.equal(sign_bitout, sign_bitx)