PyPI - compressed-tensors-nightly - Versions diffs - 0.4.0.20240701__py3-none-any.whl → 0.4.0.20240703__py3-none-any.whl - Mend

compressed-tensors-nightly 0.4.0.20240701py3-none-any.whl → 0.4.0.20240703py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

compressed_tensors/compressors/model_compressor.py CHANGED Viewed

@@ -81,6 +81,7 @@ class ModelCompressor:
     def from_pretrained(
         cls,
         pretrained_model_name_or_path: str,
+        **kwargs,
     ) -> Optional["ModelCompressor"]:
         """
         Given a path to a model config, extract the sparsity and/or quantization
@@ -89,7 +90,7 @@ class ModelCompressor:
         :param pretrained_model_name_or_path: path to model config on disk or HF hub
         :return: compressor for the extracted configs
         """
-        config = AutoConfig.from_pretrained(pretrained_model_name_or_path)
+        config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
         compression_config = getattr(config, COMPRESSION_CONFIG_NAME, None)
         return cls.from_compression_config(compression_config)

compressed_tensors/quantization/quant_scheme.py CHANGED Viewed

@@ -17,6 +17,7 @@ from typing import List, Optional
 from compressed_tensors.quantization.quant_args import (
     QuantizationArgs,
+    QuantizationStrategy,
     QuantizationType,
 )
 from pydantic import BaseModel
@@ -110,15 +111,55 @@ def is_preset_scheme(name: str) -> bool:
     return name.upper() in PRESET_SCHEMES
-W8A8 = dict(weights=QuantizationArgs(), input_activations=QuantizationArgs())
+W8A8 = dict(
+    weights=QuantizationArgs(
+        num_bits=8,
+        symmetric=True,
+        type=QuantizationType.INT,
+        strategy=QuantizationStrategy.CHANNEL,
+    ),
+    input_activations=QuantizationArgs(
+        num_bits=8,
+        symmetric=True,
+        type=QuantizationType.INT,
+        strategy=QuantizationStrategy.TOKEN,
+        dynamic=True,
+    ),
+)
-W4A16 = dict(weights=QuantizationArgs(num_bits=4, group_size=128))
+W8A16 = dict(
+    weights=QuantizationArgs(
+        num_bits=8,
+        symmetric=True,
+        type=QuantizationType.INT,
+        strategy=QuantizationStrategy.CHANNEL,
+    )
+)
-FP8 = dict(
-    weights=QuantizationArgs(type=QuantizationType.FLOAT),
-    input_activations=QuantizationArgs(type=QuantizationType.FLOAT),
+W4A16 = dict(
+    weights=QuantizationArgs(
+        num_bits=4,
+        symmetric=True,
+        type=QuantizationType.INT,
+        strategy=QuantizationStrategy.GROUP,
+        group_size=128,
+    )
 )
-PRESET_SCHEMES = {"W8A8": W8A8, "W4A16": W4A16, "FP8": FP8}
+FP8 = dict(
+    weights=QuantizationArgs(
+        num_bits=8,
+        symmetric=True,
+        type=QuantizationType.FLOAT,
+        strategy=QuantizationStrategy.TENSOR,
+    ),
+    input_activations=QuantizationArgs(
+        num_bits=8,
+        symmetric=True,
+        type=QuantizationType.FLOAT,
+        strategy=QuantizationStrategy.TENSOR,
+        dynamic=False,
+    ),
+)
-PRESET_SCHEMES = {"W8A8": W8A8, "W4A16": W4A16, "FP8": FP8}
+PRESET_SCHEMES = {"W8A8": W8A8, "W8A16": W8A16, "W4A16": W4A16, "FP8": FP8}

{compressed_tensors_nightly-0.4.0.20240701.dist-info → compressed_tensors_nightly-0.4.0.20240703.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: compressed-tensors-nightly
-Version: 0.4.0.20240701
+Version: 0.4.0.20240703
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors_nightly-0.4.0.20240701.dist-info → compressed_tensors_nightly-0.4.0.20240703.dist-info}/RECORD RENAMED Viewed

@@ -6,7 +6,7 @@ compressed_tensors/compressors/base.py,sha256=-rqT2h9G2iwDkwrVj0d0jxxn9h0dccJA1m
 compressed_tensors/compressors/dense.py,sha256=xcWECjcRY4INN6jC7vHx5wvUX3NmnKlxA9SVE1A6m2Q,1267
 compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24SCfhzZ-RusXfw,5403
 compressed_tensors/compressors/marlin_24.py,sha256=PULMP1fp1sNWz-xOxvM0JXhOrUbq6sPwOTscYSifgDw,9450
-compressed_tensors/compressors/model_compressor.py,sha256=t4dH7Yh637JV53VPyys-gkoMPJHGf_tlWWufLRyIdUM,13418
+compressed_tensors/compressors/model_compressor.py,sha256=9dyM2mvAgO7QeFTBWXBzT29JtmRMKQWWU7xh8StaFyI,13446
 compressed_tensors/compressors/naive_quantized.py,sha256=6_1wuTF96-lw-UzzrsiEX_ipciKiQQJoZ8uotVwtbyQ,5569
 compressed_tensors/compressors/pack_quantized.py,sha256=tnhqvkko6fIaTywI2JNvh5lE2xXWKJ_hYShv_s6C9Vk,8506
 compressed_tensors/compressors/sparse_bitmask.py,sha256=kiDwBlFV0sJGLcIdDYxIiuF64ccgwDfqq1hWRQThYDc,8647
@@ -21,7 +21,7 @@ compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5y
 compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
 compressed_tensors/quantization/quant_args.py,sha256=Vc_tWSTcbZZsMJlACpLq4JEPvGx87izc8VEx-mcXjoM,5621
 compressed_tensors/quantization/quant_config.py,sha256=PU3BchHm09ks6_yAderrHoIZI07zBlU9ejC87v3A-54,9568
-compressed_tensors/quantization/quant_scheme.py,sha256=TU9W3bOWCY2l5Vrha0ufRtW1ac4gew1uwW8N3JGbZvg,3785
+compressed_tensors/quantization/quant_scheme.py,sha256=urZz0YOvxjC2l9waSD5iLDTg9Pqu7N1IAeXldCXDNk0,4604
 compressed_tensors/quantization/lifecycle/__init__.py,sha256=ggRGWRqhCxCaTTDWRcgTVX3axnS2xV6rc5YvdzK7fSg,798
 compressed_tensors/quantization/lifecycle/apply.py,sha256=fyv5ujZC0__oG1ESOTmMyMsKK7DGAxG7uQI7_sxT7Mw,13308
 compressed_tensors/quantization/lifecycle/calibration.py,sha256=mLns4jlaWmBwOW8Jtlm5bMX-JET1AiZYUBO7qa-XuxI,1776
@@ -41,8 +41,8 @@ compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85S
 compressed_tensors/utils/__init__.py,sha256=5DrYjoZbaEvSkJcC-GRSbM_RBHVF4tG9gMd3zsJnjLw,665
 compressed_tensors/utils/helpers.py,sha256=dt4uxSIeqvqDmeJBJ6UUVHEOnMI7EtMSzEDv6PRUu14,2266
 compressed_tensors/utils/safetensors_load.py,sha256=0MheXwx1jeY12PeISppiSIZHs6rmN2YddwPpFb9V67I,8527
-compressed_tensors_nightly-0.4.0.20240701.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors_nightly-0.4.0.20240701.dist-info/METADATA,sha256=01PuMUcrvra_BAJaUwOExROXU3KAyNCzOSZqPov7kEI,5668
-compressed_tensors_nightly-0.4.0.20240701.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-compressed_tensors_nightly-0.4.0.20240701.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors_nightly-0.4.0.20240701.dist-info/RECORD,,
+compressed_tensors_nightly-0.4.0.20240703.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors_nightly-0.4.0.20240703.dist-info/METADATA,sha256=ibxIuIYMl5tsbzEKptWNeCQVLLaHw4pB15OX5ZV7pZs,5668
+compressed_tensors_nightly-0.4.0.20240703.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+compressed_tensors_nightly-0.4.0.20240703.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors_nightly-0.4.0.20240703.dist-info/RECORD,,

{compressed_tensors_nightly-0.4.0.20240701.dist-info → compressed_tensors_nightly-0.4.0.20240703.dist-info}/LICENSE RENAMED Viewed

File without changes

{compressed_tensors_nightly-0.4.0.20240701.dist-info → compressed_tensors_nightly-0.4.0.20240703.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors_nightly-0.4.0.20240701.dist-info → compressed_tensors_nightly-0.4.0.20240703.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors-nightly 0.4.0.20240701__py3-none-any.whl → 0.4.0.20240703__py3-none-any.whl

compressed-tensors-nightly 0.4.0.20240701py3-none-any.whl → 0.4.0.20240703py3-none-any.whl