PyPI - compressed-tensors - Versions diffs - 0.9.5a20250509__py3-none-any.whl → 0.9.5a20250512__py3-none-any.whl - Mend

compressed-tensors 0.9.5a20250509py3-none-any.whl → 0.9.5a20250512py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

compressed_tensors/quantization/quant_args.py CHANGED Viewed

@@ -24,6 +24,8 @@ from pydantic import BaseModel, Field, field_validator, model_validator
 __all__ = [
     "FP8_DTYPE",
+    "FP8_E4M3_DATA",
+    "FP4_E2M1_DATA",
     "QuantizationType",
     "QuantizationStrategy",
     "QuantizationArgs",
@@ -31,6 +33,48 @@ __all__ = [
     "ActivationOrdering",
 ]
+class FloatArgs:
+    exponent: int
+    mantissa: int
+    bits: int
+    max: float
+    min: float
+    dtype: Optional[torch.dtype] = None
+class FP4_E2M1_DATA(FloatArgs):
+    exponent = 2
+    mantissa = 1
+    bits = 4
+    max = 6.0
+    min = -6.0
+    @staticmethod
+    def cast_to_fp4(x):
+        sign = torch.sign(x)
+        x = torch.abs(x)
+        x[(x >= 0.0) & (x <= 0.25)] = 0.0
+        x[(x > 0.25) & (x < 0.75)] = 0.5
+        x[(x >= 0.75) & (x <= 1.25)] = 1.0
+        x[(x > 1.25) & (x < 1.75)] = 1.5
+        x[(x >= 1.75) & (x <= 2.5)] = 2.0
+        x[(x > 2.5) & (x < 3.5)] = 3.0
+        x[(x >= 3.5) & (x <= 5.0)] = 4.0
+        x[x > 5.0] = 6.0
+        return x * sign
+class FP8_E4M3_DATA(FloatArgs):
+    exponent = 4
+    mantissa = 3
+    bits = 8
+    max = torch.finfo(torch.float8_e4m3fn).max
+    min = torch.finfo(torch.float8_e4m3fn).min
+    dtype = torch.float8_e4m3fn
+# TODO: Remove soon in favour of a more descriptive FloatArgs
 FP8_DTYPE = torch.float8_e4m3fn
@@ -234,7 +278,10 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
     def pytorch_dtype(self) -> torch.dtype:
         if self.type == QuantizationType.FLOAT:
-            return FP8_DTYPE
+            if self.num_bits == 8:
+                return FP8_E4M3_DATA.dtype
+            else:
+                raise NotImplementedError("Only num_bits in (8) are supported")
         elif self.type == QuantizationType.INT:
             if self.num_bits <= 8:
                 return torch.int8
@@ -263,7 +310,12 @@ def round_to_quantized_type(
     """
     original_dtype = tensor.dtype
     if args.type == QuantizationType.FLOAT:
-        rounded = tensor.to(FP8_DTYPE)
+        if args.num_bits == 8:
+            rounded = tensor.to(FP8_E4M3_DATA.dtype)
+        elif args.num_bits == 4:
+            rounded = FP4_E2M1_DATA.cast_to_fp4(tensor)
+        else:
+            raise NotImplementedError("Only num_bits in (4, 8) are supported")
     elif args.type == QuantizationType.INT:
         rounded = torch.round(tensor)
     else:

compressed_tensors/quantization/quant_scheme.py CHANGED Viewed

@@ -100,6 +100,17 @@ def is_preset_scheme(name: str) -> bool:
 UNQUANTIZED = dict()
+NVFP4A16 = dict(
+    weights=QuantizationArgs(
+        num_bits=4,
+        type=QuantizationType.FLOAT,
+        strategy=QuantizationStrategy.GROUP,
+        symmetric=True,
+        dynamic=False,
+        group_size=16,
+    )
+)
 # 8 bit integer weights and 8 bit activations quantization
 INT8_W8A8 = dict(
     weights=QuantizationArgs(
@@ -225,4 +236,5 @@ PRESET_SCHEMES = {
     # Float weight and activation schemes
     "FP8": FP8,
     "FP8_DYNAMIC": FP8_DYNAMIC,
+    "NVFP4A16": NVFP4A16,
 }

compressed_tensors/version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.9.5.a20250509'
+__version__ = version = '0.9.5.a20250512'
 __version_tuple__ = version_tuple = (0, 9, 5)

{compressed_tensors-0.9.5a20250509.dist-info → compressed_tensors-0.9.5a20250512.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.9.5a20250509
+Version: 0.9.5a20250512
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.9.5a20250509.dist-info → compressed_tensors-0.9.5a20250512.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
 compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
-compressed_tensors/version.py,sha256=MJNvDWAetuDB0OYncH2iIZtFhxnO8XXV5IHnTcj9e6k,521
+compressed_tensors/version.py,sha256=Yd5MPtXGvm9XWCPM_O99KCK_9DwKmcl2OenuJ4MxlUI,521
 compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
 compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
 compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
@@ -26,9 +26,9 @@ compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5y
 compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
 compressed_tensors/linear/compressed_linear.py,sha256=_m6XpNcI53eeSHO8VdiuAM6UBTdpDhn5Ivd8iRMwEKc,3980
 compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
-compressed_tensors/quantization/quant_args.py,sha256=sKpb8DcNObidjXjNol1Tn_Iih3ZXBycSp-fyz68TGhY,9117
+compressed_tensors/quantization/quant_args.py,sha256=2m4WJWBnNjkU-3rVR_f2a6p_BZMvGfMrPyOui8JUwWk,10487
 compressed_tensors/quantization/quant_config.py,sha256=MxSUcb5dOqMN6LFyD5K2h8X0TvEtcWIAoiUJqD2dHGE,10159
-compressed_tensors/quantization/quant_scheme.py,sha256=yz0oMbbwp7QZXXd2k5KIJu-Q6aTqg2929VdUzZ7vysM,6324
+compressed_tensors/quantization/quant_scheme.py,sha256=0FpN3R7bVn8rQ18Vp0NuDVpoilTZ7X8vk9zp_8AndwY,6578
 compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
 compressed_tensors/quantization/lifecycle/apply.py,sha256=DOoxH4jM8r0270GGGUFOpRrgwaisiJi7TV-Q6E8qM8E,18067
 compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
@@ -46,8 +46,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
 compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
 compressed_tensors/utils/safetensors_load.py,sha256=kkkUDmS1H40MFy6FDP-DFGiAYbtqke6bKE7YrAtORtA,11499
 compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
-compressed_tensors-0.9.5a20250509.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors-0.9.5a20250509.dist-info/METADATA,sha256=2BVor0VJtcZHud4QBjYS5OhLXob0nF9dmeP08RUSA5k,7004
-compressed_tensors-0.9.5a20250509.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
-compressed_tensors-0.9.5a20250509.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors-0.9.5a20250509.dist-info/RECORD,,
+compressed_tensors-0.9.5a20250512.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors-0.9.5a20250512.dist-info/METADATA,sha256=gArKa7gy0jdBGF5PbuNLVh_ZmnXq4CdEp1-7grxpjnw,7004
+compressed_tensors-0.9.5a20250512.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
+compressed_tensors-0.9.5a20250512.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors-0.9.5a20250512.dist-info/RECORD,,

{compressed_tensors-0.9.5a20250509.dist-info → compressed_tensors-0.9.5a20250512.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250509.dist-info → compressed_tensors-0.9.5a20250512.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250509.dist-info → compressed_tensors-0.9.5a20250512.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors 0.9.5a20250509__py3-none-any.whl → 0.9.5a20250512__py3-none-any.whl

compressed-tensors 0.9.5a20250509py3-none-any.whl → 0.9.5a20250512py3-none-any.whl