compressed-tensors-nightly 0.4.0.20240701__py3-none-any.whl → 0.4.0.20240703__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -81,6 +81,7 @@ class ModelCompressor:
81
81
  def from_pretrained(
82
82
  cls,
83
83
  pretrained_model_name_or_path: str,
84
+ **kwargs,
84
85
  ) -> Optional["ModelCompressor"]:
85
86
  """
86
87
  Given a path to a model config, extract the sparsity and/or quantization
@@ -89,7 +90,7 @@ class ModelCompressor:
89
90
  :param pretrained_model_name_or_path: path to model config on disk or HF hub
90
91
  :return: compressor for the extracted configs
91
92
  """
92
- config = AutoConfig.from_pretrained(pretrained_model_name_or_path)
93
+ config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
93
94
  compression_config = getattr(config, COMPRESSION_CONFIG_NAME, None)
94
95
  return cls.from_compression_config(compression_config)
95
96
 
@@ -17,6 +17,7 @@ from typing import List, Optional
17
17
 
18
18
  from compressed_tensors.quantization.quant_args import (
19
19
  QuantizationArgs,
20
+ QuantizationStrategy,
20
21
  QuantizationType,
21
22
  )
22
23
  from pydantic import BaseModel
@@ -110,15 +111,55 @@ def is_preset_scheme(name: str) -> bool:
110
111
  return name.upper() in PRESET_SCHEMES
111
112
 
112
113
 
113
- W8A8 = dict(weights=QuantizationArgs(), input_activations=QuantizationArgs())
114
+ W8A8 = dict(
115
+ weights=QuantizationArgs(
116
+ num_bits=8,
117
+ symmetric=True,
118
+ type=QuantizationType.INT,
119
+ strategy=QuantizationStrategy.CHANNEL,
120
+ ),
121
+ input_activations=QuantizationArgs(
122
+ num_bits=8,
123
+ symmetric=True,
124
+ type=QuantizationType.INT,
125
+ strategy=QuantizationStrategy.TOKEN,
126
+ dynamic=True,
127
+ ),
128
+ )
114
129
 
115
- W4A16 = dict(weights=QuantizationArgs(num_bits=4, group_size=128))
130
+ W8A16 = dict(
131
+ weights=QuantizationArgs(
132
+ num_bits=8,
133
+ symmetric=True,
134
+ type=QuantizationType.INT,
135
+ strategy=QuantizationStrategy.CHANNEL,
136
+ )
137
+ )
116
138
 
117
- FP8 = dict(
118
- weights=QuantizationArgs(type=QuantizationType.FLOAT),
119
- input_activations=QuantizationArgs(type=QuantizationType.FLOAT),
139
+ W4A16 = dict(
140
+ weights=QuantizationArgs(
141
+ num_bits=4,
142
+ symmetric=True,
143
+ type=QuantizationType.INT,
144
+ strategy=QuantizationStrategy.GROUP,
145
+ group_size=128,
146
+ )
120
147
  )
121
148
 
122
- PRESET_SCHEMES = {"W8A8": W8A8, "W4A16": W4A16, "FP8": FP8}
149
+ FP8 = dict(
150
+ weights=QuantizationArgs(
151
+ num_bits=8,
152
+ symmetric=True,
153
+ type=QuantizationType.FLOAT,
154
+ strategy=QuantizationStrategy.TENSOR,
155
+ ),
156
+ input_activations=QuantizationArgs(
157
+ num_bits=8,
158
+ symmetric=True,
159
+ type=QuantizationType.FLOAT,
160
+ strategy=QuantizationStrategy.TENSOR,
161
+ dynamic=False,
162
+ ),
163
+ )
123
164
 
124
- PRESET_SCHEMES = {"W8A8": W8A8, "W4A16": W4A16, "FP8": FP8}
165
+ PRESET_SCHEMES = {"W8A8": W8A8, "W8A16": W8A16, "W4A16": W4A16, "FP8": FP8}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: compressed-tensors-nightly
3
- Version: 0.4.0.20240701
3
+ Version: 0.4.0.20240703
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -6,7 +6,7 @@ compressed_tensors/compressors/base.py,sha256=-rqT2h9G2iwDkwrVj0d0jxxn9h0dccJA1m
6
6
  compressed_tensors/compressors/dense.py,sha256=xcWECjcRY4INN6jC7vHx5wvUX3NmnKlxA9SVE1A6m2Q,1267
7
7
  compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24SCfhzZ-RusXfw,5403
8
8
  compressed_tensors/compressors/marlin_24.py,sha256=PULMP1fp1sNWz-xOxvM0JXhOrUbq6sPwOTscYSifgDw,9450
9
- compressed_tensors/compressors/model_compressor.py,sha256=t4dH7Yh637JV53VPyys-gkoMPJHGf_tlWWufLRyIdUM,13418
9
+ compressed_tensors/compressors/model_compressor.py,sha256=9dyM2mvAgO7QeFTBWXBzT29JtmRMKQWWU7xh8StaFyI,13446
10
10
  compressed_tensors/compressors/naive_quantized.py,sha256=6_1wuTF96-lw-UzzrsiEX_ipciKiQQJoZ8uotVwtbyQ,5569
11
11
  compressed_tensors/compressors/pack_quantized.py,sha256=tnhqvkko6fIaTywI2JNvh5lE2xXWKJ_hYShv_s6C9Vk,8506
12
12
  compressed_tensors/compressors/sparse_bitmask.py,sha256=kiDwBlFV0sJGLcIdDYxIiuF64ccgwDfqq1hWRQThYDc,8647
@@ -21,7 +21,7 @@ compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5y
21
21
  compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
22
22
  compressed_tensors/quantization/quant_args.py,sha256=Vc_tWSTcbZZsMJlACpLq4JEPvGx87izc8VEx-mcXjoM,5621
23
23
  compressed_tensors/quantization/quant_config.py,sha256=PU3BchHm09ks6_yAderrHoIZI07zBlU9ejC87v3A-54,9568
24
- compressed_tensors/quantization/quant_scheme.py,sha256=TU9W3bOWCY2l5Vrha0ufRtW1ac4gew1uwW8N3JGbZvg,3785
24
+ compressed_tensors/quantization/quant_scheme.py,sha256=urZz0YOvxjC2l9waSD5iLDTg9Pqu7N1IAeXldCXDNk0,4604
25
25
  compressed_tensors/quantization/lifecycle/__init__.py,sha256=ggRGWRqhCxCaTTDWRcgTVX3axnS2xV6rc5YvdzK7fSg,798
26
26
  compressed_tensors/quantization/lifecycle/apply.py,sha256=fyv5ujZC0__oG1ESOTmMyMsKK7DGAxG7uQI7_sxT7Mw,13308
27
27
  compressed_tensors/quantization/lifecycle/calibration.py,sha256=mLns4jlaWmBwOW8Jtlm5bMX-JET1AiZYUBO7qa-XuxI,1776
@@ -41,8 +41,8 @@ compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85S
41
41
  compressed_tensors/utils/__init__.py,sha256=5DrYjoZbaEvSkJcC-GRSbM_RBHVF4tG9gMd3zsJnjLw,665
42
42
  compressed_tensors/utils/helpers.py,sha256=dt4uxSIeqvqDmeJBJ6UUVHEOnMI7EtMSzEDv6PRUu14,2266
43
43
  compressed_tensors/utils/safetensors_load.py,sha256=0MheXwx1jeY12PeISppiSIZHs6rmN2YddwPpFb9V67I,8527
44
- compressed_tensors_nightly-0.4.0.20240701.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
45
- compressed_tensors_nightly-0.4.0.20240701.dist-info/METADATA,sha256=01PuMUcrvra_BAJaUwOExROXU3KAyNCzOSZqPov7kEI,5668
46
- compressed_tensors_nightly-0.4.0.20240701.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
47
- compressed_tensors_nightly-0.4.0.20240701.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
48
- compressed_tensors_nightly-0.4.0.20240701.dist-info/RECORD,,
44
+ compressed_tensors_nightly-0.4.0.20240703.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
45
+ compressed_tensors_nightly-0.4.0.20240703.dist-info/METADATA,sha256=ibxIuIYMl5tsbzEKptWNeCQVLLaHw4pB15OX5ZV7pZs,5668
46
+ compressed_tensors_nightly-0.4.0.20240703.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
47
+ compressed_tensors_nightly-0.4.0.20240703.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
48
+ compressed_tensors_nightly-0.4.0.20240703.dist-info/RECORD,,