PyPI - compressed-tensors-nightly - Versions diffs - 0.4.0.20240711__py3-none-any.whl → 0.4.0.20240712__py3-none-any.whl - Mend

compressed-tensors-nightly 0.4.0.20240711py3-none-any.whl → 0.4.0.20240712py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

compressed_tensors/compressors/marlin_24.py CHANGED Viewed

@@ -18,15 +18,16 @@ from typing import Dict, Generator, Tuple
 import numpy as np
 import torch
 from compressed_tensors.compressors import Compressor
-from compressed_tensors.compressors.utils import (
+from compressed_tensors.config import CompressionFormat
+from compressed_tensors.quantization import QuantizationArgs, QuantizationStrategy
+from compressed_tensors.quantization.lifecycle.forward import quantize
+from compressed_tensors.utils import (
     get_permutations_24,
+    is_quantization_param,
+    merge_names,
     sparse_semi_structured_from_dense_cutlass,
     tensor_follows_mask_structure,
 )
-from compressed_tensors.config import CompressionFormat
-from compressed_tensors.quantization import QuantizationArgs, QuantizationStrategy
-from compressed_tensors.quantization.lifecycle.forward import quantize
-from compressed_tensors.utils import is_quantization_param, merge_names
 from torch import Tensor
 from tqdm import tqdm

compressed_tensors/quantization/quant_scheme.py CHANGED Viewed

@@ -111,55 +111,91 @@ def is_preset_scheme(name: str) -> bool:
     return name.upper() in PRESET_SCHEMES
+# 8 bit integer weights and 8 bit activations quantization
 W8A8 = dict(
     weights=QuantizationArgs(
         num_bits=8,
-        symmetric=True,
         type=QuantizationType.INT,
         strategy=QuantizationStrategy.CHANNEL,
+        symmetric=True,
+        dynamic=False,
     ),
     input_activations=QuantizationArgs(
         num_bits=8,
-        symmetric=True,
         type=QuantizationType.INT,
         strategy=QuantizationStrategy.TOKEN,
+        symmetric=True,
         dynamic=True,
     ),
 )
+# 8 bit integer weights only quantization
 W8A16 = dict(
     weights=QuantizationArgs(
         num_bits=8,
-        symmetric=True,
         type=QuantizationType.INT,
         strategy=QuantizationStrategy.CHANNEL,
-    )
+        symmetric=True,
+        dynamic=False,
+    ),
 )
+# 4 bit integer weights only quantization
 W4A16 = dict(
     weights=QuantizationArgs(
         num_bits=4,
-        symmetric=True,
         type=QuantizationType.INT,
         strategy=QuantizationStrategy.GROUP,
         group_size=128,
-    )
+        symmetric=True,
+        dynamic=False,
+    ),
 )
-FP8 = dict(
+# 4 bit integer weights and 8 bit activations quantization
+W4A8 = dict(
     weights=QuantizationArgs(
+        num_bits=4,
+        type=QuantizationType.INT,
+        group_size=128,
+        strategy=QuantizationStrategy.GROUP,
+        symmetric=True,
+        dynamic=False,
+    ),
+    input_activations=QuantizationArgs(
         num_bits=8,
+        type=QuantizationType.INT,
+        strategy=QuantizationStrategy.TENSOR,
         symmetric=True,
+        dynamic=True,
+    ),
+)
+# FP8 weights and FP8 activations quantization
+FP8 = dict(
+    weights=QuantizationArgs(
+        num_bits=8,
         type=QuantizationType.FLOAT,
         strategy=QuantizationStrategy.TENSOR,
+        symmetric=True,
+        dynamic=False,
     ),
     input_activations=QuantizationArgs(
         num_bits=8,
-        symmetric=True,
         type=QuantizationType.FLOAT,
         strategy=QuantizationStrategy.TENSOR,
+        symmetric=True,
         dynamic=False,
     ),
 )
-PRESET_SCHEMES = {"W8A8": W8A8, "W8A16": W8A16, "W4A16": W4A16, "FP8": FP8}
+PRESET_SCHEMES = {
+    # Integer weight only schemes
+    "W8A16": W8A16,
+    "W4A16": W4A16,
+    # Integer weight and activation schemes
+    "W8A8": W8A8,
+    "W4A8": W4A8,
+    # Float weight and activation schemes
+    "FP8": FP8,
+}

compressed_tensors/utils/__init__.py CHANGED Viewed

@@ -13,4 +13,7 @@
 # limitations under the License.
 # flake8: noqa
+from .helpers import *
+from .permutations_24 import *
 from .safetensors_load import *
+from .semi_structured_conversions import *

compressed_tensors/utils/helpers.py CHANGED Viewed

@@ -14,10 +14,15 @@
 from typing import Optional
+import torch
 from transformers import AutoConfig
-__all__ = ["infer_compressor_from_model_config", "fix_fsdp_module_name"]
+__all__ = [
+    "infer_compressor_from_model_config",
+    "fix_fsdp_module_name",
+    "tensor_follows_mask_structure",
+]
 FSDP_WRAPPER_NAME = "_fsdp_wrapped_module"
@@ -60,3 +65,28 @@ def fix_fsdp_module_name(name: str) -> str:
     return name.replace(FSDP_WRAPPER_NAME + ".", "").replace(
         "." + FSDP_WRAPPER_NAME, ""
     )
+def tensor_follows_mask_structure(tensor, mask: str = "2:4") -> bool:
+    """
+    :param tensor: tensor to check
+    :param mask: mask structure to check for, in the format "n:m"
+    :return: True if the tensor follows the mask structure, False otherwise.
+        Note, some weights can incidentally be zero, so we check for
+        atleast n zeros in each chunk of size m
+    """
+    n, m = tuple(map(int, mask.split(":")))
+    # Reshape the tensor into chunks of size m
+    tensor = tensor.view(-1, m)
+    # Count the number of zeros in each chunk
+    zero_counts = (tensor == 0).sum(dim=1)
+    # Check if the number of zeros in each chunk atleast n
+    # Greater than sign is needed as some weights can incidentally
+    # be zero
+    if not torch.all(zero_counts >= n).item():
+        raise ValueError()
+    return True

{compressed_tensors_nightly-0.4.0.20240711.dist-info → compressed_tensors_nightly-0.4.0.20240712.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: compressed-tensors-nightly
-Version: 0.4.0.20240711
+Version: 0.4.0.20240712
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors_nightly-0.4.0.20240711.dist-info → compressed_tensors_nightly-0.4.0.20240712.dist-info}/RECORD RENAMED Viewed

@@ -5,15 +5,11 @@ compressed_tensors/compressors/__init__.py,sha256=wmX4VnkUTS63xBwK5-6w8FP78bNZpc
 compressed_tensors/compressors/base.py,sha256=-rqT2h9G2iwDkwrVj0d0jxxn9h0dccJA1mqOzVEkwGM,2144
 compressed_tensors/compressors/dense.py,sha256=xcWECjcRY4INN6jC7vHx5wvUX3NmnKlxA9SVE1A6m2Q,1267
 compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24SCfhzZ-RusXfw,5403
-compressed_tensors/compressors/marlin_24.py,sha256=PULMP1fp1sNWz-xOxvM0JXhOrUbq6sPwOTscYSifgDw,9450
+compressed_tensors/compressors/marlin_24.py,sha256=e7fGUyZbjUpA5VUMCPxqcYPGNiwoDKupHJaXWCoVKRw,9410
 compressed_tensors/compressors/model_compressor.py,sha256=9dyM2mvAgO7QeFTBWXBzT29JtmRMKQWWU7xh8StaFyI,13446
 compressed_tensors/compressors/naive_quantized.py,sha256=6_1wuTF96-lw-UzzrsiEX_ipciKiQQJoZ8uotVwtbyQ,5569
 compressed_tensors/compressors/pack_quantized.py,sha256=tnhqvkko6fIaTywI2JNvh5lE2xXWKJ_hYShv_s6C9Vk,8506
 compressed_tensors/compressors/sparse_bitmask.py,sha256=kiDwBlFV0sJGLcIdDYxIiuF64ccgwDfqq1hWRQThYDc,8647
-compressed_tensors/compressors/utils/__init__.py,sha256=-mbGDZh1hd9T6u62Ht_iBIK255UmMg0f5bLkSs1f9Cc,731
-compressed_tensors/compressors/utils/helpers.py,sha256=4fq7KclSIK__jemCG9pwYlgWLrQjsaAMxhIrhjdw0BQ,1506
-compressed_tensors/compressors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
-compressed_tensors/compressors/utils/semi_structured_conversions.py,sha256=g1EZHzdv-ko7ufPX430dp7wE33o6FWJXuSP4zZydCu0,13488
 compressed_tensors/config/__init__.py,sha256=ZBqWn3r6ku1qfmlHHYp0mQueY0i7Pwhr9rbQk9dDlMc,704
 compressed_tensors/config/base.py,sha256=caSZ7xZ_kgcHRMXZ5hM1i6TKbgY__CkiSjZ93imHZQ0,1562
 compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74jNbjks,1317
@@ -21,7 +17,7 @@ compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5y
 compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
 compressed_tensors/quantization/quant_args.py,sha256=Vc_tWSTcbZZsMJlACpLq4JEPvGx87izc8VEx-mcXjoM,5621
 compressed_tensors/quantization/quant_config.py,sha256=PU3BchHm09ks6_yAderrHoIZI07zBlU9ejC87v3A-54,9568
-compressed_tensors/quantization/quant_scheme.py,sha256=urZz0YOvxjC2l9waSD5iLDTg9Pqu7N1IAeXldCXDNk0,4604
+compressed_tensors/quantization/quant_scheme.py,sha256=IKTtMfusSe7x31t7hipBfptTbCwGd9eGMtrWC1sPM9o,5522
 compressed_tensors/quantization/lifecycle/__init__.py,sha256=ggRGWRqhCxCaTTDWRcgTVX3axnS2xV6rc5YvdzK7fSg,798
 compressed_tensors/quantization/lifecycle/apply.py,sha256=fyv5ujZC0__oG1ESOTmMyMsKK7DGAxG7uQI7_sxT7Mw,13308
 compressed_tensors/quantization/lifecycle/calibration.py,sha256=mLns4jlaWmBwOW8Jtlm5bMX-JET1AiZYUBO7qa-XuxI,1776
@@ -38,11 +34,13 @@ compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5
 compressed_tensors/quantization/utils/helpers.py,sha256=YjXABJQUnelof-z7qcwck6fnrFLh4uMSrOmPiqNp_RY,8591
 compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
 compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85SLG77nml2iA,11890
-compressed_tensors/utils/__init__.py,sha256=5DrYjoZbaEvSkJcC-GRSbM_RBHVF4tG9gMd3zsJnjLw,665
-compressed_tensors/utils/helpers.py,sha256=dt4uxSIeqvqDmeJBJ6UUVHEOnMI7EtMSzEDv6PRUu14,2266
+compressed_tensors/utils/__init__.py,sha256=dvAatm3p0He4J7u5ZmZYVa8Iwpwq3ZSm6S9ZJleta5M,762
+compressed_tensors/utils/helpers.py,sha256=d3yP9ViQ8R3GzMHfohxNlaokzyrRuj2PyjxWAJZmSws,3156
+compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
 compressed_tensors/utils/safetensors_load.py,sha256=0MheXwx1jeY12PeISppiSIZHs6rmN2YddwPpFb9V67I,8527
-compressed_tensors_nightly-0.4.0.20240711.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors_nightly-0.4.0.20240711.dist-info/METADATA,sha256=8nqYjnAofwgxizqKTlBjrVg3pz8QX4ya0EtzscwE2F0,5668
-compressed_tensors_nightly-0.4.0.20240711.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-compressed_tensors_nightly-0.4.0.20240711.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors_nightly-0.4.0.20240711.dist-info/RECORD,,
+compressed_tensors/utils/semi_structured_conversions.py,sha256=g1EZHzdv-ko7ufPX430dp7wE33o6FWJXuSP4zZydCu0,13488
+compressed_tensors_nightly-0.4.0.20240712.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors_nightly-0.4.0.20240712.dist-info/METADATA,sha256=K39YHRwW4YcpN7VjAeCt5wE28KM8oHuRFc4-YgoNQAI,5668
+compressed_tensors_nightly-0.4.0.20240712.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+compressed_tensors_nightly-0.4.0.20240712.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors_nightly-0.4.0.20240712.dist-info/RECORD,,

compressed_tensors/compressors/utils/__init__.py DELETED Viewed

@@ -1,19 +0,0 @@
-# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# flake8: noqa
-from .helpers import *
-from .permutations_24 import *
-from .semi_structured_conversions import *

compressed_tensors/compressors/utils/helpers.py DELETED Viewed

@@ -1,43 +0,0 @@
-# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import torch
-__all__ = ["tensor_follows_mask_structure"]
-def tensor_follows_mask_structure(tensor, mask: str = "2:4") -> bool:
-    """
-    :param tensor: tensor to check
-    :param mask: mask structure to check for, in the format "n:m"
-    :return: True if the tensor follows the mask structure, False otherwise.
-        Note, some weights can incidentally be zero, so we check for
-        atleast n zeros in each chunk of size m
-    """
-    n, m = tuple(map(int, mask.split(":")))
-    # Reshape the tensor into chunks of size m
-    tensor = tensor.view(-1, m)
-    # Count the number of zeros in each chunk
-    zero_counts = (tensor == 0).sum(dim=1)
-    # Check if the number of zeros in each chunk atleast n
-    # Greater than sign is needed as some weights can incidentally
-    # be zero
-    if not torch.all(zero_counts >= n).item():
-        raise ValueError()
-    return True

/compressed_tensors/{compressors/utils → utils}/permutations_24.py RENAMED Viewed

File without changes

/compressed_tensors/{compressors/utils → utils}/semi_structured_conversions.py RENAMED Viewed

File without changes

{compressed_tensors_nightly-0.4.0.20240711.dist-info → compressed_tensors_nightly-0.4.0.20240712.dist-info}/LICENSE RENAMED Viewed

File without changes

{compressed_tensors_nightly-0.4.0.20240711.dist-info → compressed_tensors_nightly-0.4.0.20240712.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors_nightly-0.4.0.20240711.dist-info → compressed_tensors_nightly-0.4.0.20240712.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors-nightly 0.4.0.20240711__py3-none-any.whl → 0.4.0.20240712__py3-none-any.whl

compressed-tensors-nightly 0.4.0.20240711py3-none-any.whl → 0.4.0.20240712py3-none-any.whl