compressed-tensors 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors/__init__.py +1 -0
- compressed_tensors/base.py +2 -0
- compressed_tensors/compressors/__init__.py +6 -12
- compressed_tensors/compressors/base.py +38 -102
- compressed_tensors/compressors/helpers.py +6 -6
- compressed_tensors/compressors/model_compressors/__init__.py +17 -0
- compressed_tensors/compressors/{model_compressor.py → model_compressors/model_compressor.py} +95 -106
- compressed_tensors/compressors/quantized_compressors/__init__.py +18 -0
- compressed_tensors/compressors/quantized_compressors/base.py +146 -0
- compressed_tensors/compressors/{naive_quantized.py → quantized_compressors/naive_quantized.py} +11 -11
- compressed_tensors/compressors/{pack_quantized.py → quantized_compressors/pack_quantized.py} +6 -3
- compressed_tensors/compressors/sparse_compressors/__init__.py +18 -0
- compressed_tensors/compressors/sparse_compressors/base.py +110 -0
- compressed_tensors/compressors/{dense.py → sparse_compressors/dense.py} +3 -3
- compressed_tensors/compressors/{sparse_bitmask.py → sparse_compressors/sparse_bitmask.py} +14 -59
- compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +16 -0
- compressed_tensors/compressors/{marlin_24.py → sparse_quantized_compressors/marlin_24.py} +3 -3
- compressed_tensors/linear/compressed_linear.py +2 -2
- compressed_tensors/quantization/__init__.py +1 -0
- compressed_tensors/quantization/cache.py +201 -0
- compressed_tensors/quantization/lifecycle/apply.py +19 -3
- compressed_tensors/quantization/lifecycle/calibration.py +2 -3
- compressed_tensors/quantization/lifecycle/forward.py +52 -3
- compressed_tensors/quantization/lifecycle/frozen.py +6 -1
- compressed_tensors/quantization/lifecycle/helpers.py +0 -47
- compressed_tensors/quantization/lifecycle/initialize.py +110 -62
- compressed_tensors/quantization/quant_args.py +6 -0
- compressed_tensors/quantization/quant_config.py +14 -2
- compressed_tensors/quantization/quant_scheme.py +5 -4
- compressed_tensors/quantization/utils/helpers.py +43 -18
- compressed_tensors/utils/helpers.py +17 -1
- compressed_tensors/version.py +1 -1
- {compressed_tensors-0.6.0.dist-info → compressed_tensors-0.7.0.dist-info}/METADATA +1 -1
- compressed_tensors-0.7.0.dist-info/RECORD +59 -0
- compressed_tensors-0.6.0.dist-info/RECORD +0 -52
- {compressed_tensors-0.6.0.dist-info → compressed_tensors-0.7.0.dist-info}/LICENSE +0 -0
- {compressed_tensors-0.6.0.dist-info → compressed_tensors-0.7.0.dist-info}/WHEEL +0 -0
- {compressed_tensors-0.6.0.dist-info → compressed_tensors-0.7.0.dist-info}/top_level.txt +0 -0
@@ -24,7 +24,7 @@ from compressed_tensors.quantization.quant_scheme import (
|
|
24
24
|
from compressed_tensors.quantization.utils import (
|
25
25
|
calculate_compression_ratio,
|
26
26
|
is_module_quantized,
|
27
|
-
|
27
|
+
iter_named_quantizable_modules,
|
28
28
|
module_type,
|
29
29
|
parse_out_kv_cache_args,
|
30
30
|
)
|
@@ -177,7 +177,9 @@ class QuantizationConfig(BaseModel):
|
|
177
177
|
quantization_status = None
|
178
178
|
ignore = {}
|
179
179
|
quantization_type_names = set()
|
180
|
-
for name, submodule in
|
180
|
+
for name, submodule in iter_named_quantizable_modules(
|
181
|
+
model, include_children=True, include_attn=True
|
182
|
+
):
|
181
183
|
layer_type = module_type(submodule)
|
182
184
|
if not is_module_quantized(submodule):
|
183
185
|
if layer_type not in ignore:
|
@@ -199,6 +201,13 @@ class QuantizationConfig(BaseModel):
|
|
199
201
|
if len(quant_scheme_to_layers) == 0: # No quantized layers
|
200
202
|
return None
|
201
203
|
|
204
|
+
# kv-cache only, no weight/activation quantization
|
205
|
+
if (
|
206
|
+
len(quantization_type_names) == 1
|
207
|
+
and "attention" in list(quantization_type_names)[0].lower()
|
208
|
+
):
|
209
|
+
quantization_type_names.add("Linear")
|
210
|
+
|
202
211
|
# clean up ignore list, we can leave out layers types if none of the
|
203
212
|
# instances are quantized
|
204
213
|
consolidated_ignore = []
|
@@ -241,6 +250,9 @@ class QuantizationConfig(BaseModel):
|
|
241
250
|
)
|
242
251
|
|
243
252
|
def requires_calibration_data(self):
|
253
|
+
if self.kv_cache_scheme is not None:
|
254
|
+
return True
|
255
|
+
|
244
256
|
for _, scheme in self.config_groups.items():
|
245
257
|
if scheme.input_activations is not None:
|
246
258
|
if not scheme.input_activations.dynamic:
|
@@ -108,7 +108,7 @@ def is_preset_scheme(name: str) -> bool:
|
|
108
108
|
UNQUANTIZED = dict()
|
109
109
|
|
110
110
|
# 8 bit integer weights and 8 bit activations quantization
|
111
|
-
|
111
|
+
INT8_W8A8 = dict(
|
112
112
|
weights=QuantizationArgs(
|
113
113
|
num_bits=8,
|
114
114
|
type=QuantizationType.INT,
|
@@ -149,7 +149,7 @@ W4A16 = dict(
|
|
149
149
|
)
|
150
150
|
|
151
151
|
# 4 bit integer weights and 8 bit activations quantization
|
152
|
-
|
152
|
+
INT8_W4A8 = dict(
|
153
153
|
weights=QuantizationArgs(
|
154
154
|
num_bits=4,
|
155
155
|
type=QuantizationType.INT,
|
@@ -210,8 +210,9 @@ PRESET_SCHEMES = {
|
|
210
210
|
"W8A16": W8A16,
|
211
211
|
"W4A16": W4A16,
|
212
212
|
# Integer weight and activation schemes
|
213
|
-
"W8A8":
|
214
|
-
"
|
213
|
+
"W8A8": INT8_W8A8,
|
214
|
+
"INT8": INT8_W8A8, # alias for W8A8
|
215
|
+
"W4A8": INT8_W4A8,
|
215
216
|
# Float weight and activation schemes
|
216
217
|
"FP8": FP8,
|
217
218
|
"FP8_DYNAMIC": FP8_DYNAMIC,
|
@@ -13,8 +13,7 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
import logging
|
16
|
-
import
|
17
|
-
from typing import List, Optional, Tuple
|
16
|
+
from typing import Generator, List, Optional, Tuple
|
18
17
|
|
19
18
|
import torch
|
20
19
|
from compressed_tensors.quantization.observers.base import Observer
|
@@ -28,7 +27,6 @@ __all__ = [
|
|
28
27
|
"infer_quantization_status",
|
29
28
|
"is_module_quantized",
|
30
29
|
"is_model_quantized",
|
31
|
-
"iter_named_leaf_modules",
|
32
30
|
"module_type",
|
33
31
|
"calculate_compression_ratio",
|
34
32
|
"get_torch_bit_depth",
|
@@ -36,9 +34,14 @@ __all__ = [
|
|
36
34
|
"parse_out_kv_cache_args",
|
37
35
|
"KV_CACHE_TARGETS",
|
38
36
|
"is_kv_cache_quant_scheme",
|
37
|
+
"iter_named_leaf_modules",
|
38
|
+
"iter_named_quantizable_modules",
|
39
39
|
]
|
40
40
|
|
41
|
-
|
41
|
+
# target the self_attn layer
|
42
|
+
# QuantizedKVParameterCache is responsible for obtaining the k_scale and v_scale
|
43
|
+
KV_CACHE_TARGETS = ["re:.*self_attn$"]
|
44
|
+
|
42
45
|
_LOGGER: logging.Logger = logging.getLogger(__name__)
|
43
46
|
|
44
47
|
|
@@ -106,11 +109,10 @@ def module_type(module: Module) -> str:
|
|
106
109
|
return type(module).__name__
|
107
110
|
|
108
111
|
|
109
|
-
def iter_named_leaf_modules(model: Module) -> Tuple[str, Module]:
|
112
|
+
def iter_named_leaf_modules(model: Module) -> Generator[Tuple[str, Module], None, None]:
|
110
113
|
"""
|
111
114
|
Yields modules that do not have any submodules except observers. The observers
|
112
115
|
themselves are not yielded
|
113
|
-
|
114
116
|
:param model: model to get leaf modules of
|
115
117
|
:returns: generator tuple of (name, leaf_submodule)
|
116
118
|
"""
|
@@ -128,6 +130,37 @@ def iter_named_leaf_modules(model: Module) -> Tuple[str, Module]:
|
|
128
130
|
yield name, submodule
|
129
131
|
|
130
132
|
|
133
|
+
def iter_named_quantizable_modules(
|
134
|
+
model: Module, include_children: bool = True, include_attn: bool = False
|
135
|
+
) -> Generator[Tuple[str, Module], None, None]:
|
136
|
+
"""
|
137
|
+
Yield name and submodule of
|
138
|
+
- leaf modules, set by include_children
|
139
|
+
- attention modyles, set by include_attn
|
140
|
+
|
141
|
+
:param model: model to get leaf modules of
|
142
|
+
:param include_children: flag to get the leaf modules
|
143
|
+
:param inlcude_attn: flag to get the attention modules
|
144
|
+
:returns: generator tuple of (name, submodule)
|
145
|
+
"""
|
146
|
+
for name, submodule in model.named_modules():
|
147
|
+
if include_children:
|
148
|
+
children = list(submodule.children())
|
149
|
+
if len(children) == 0 and not isinstance(submodule, Observer):
|
150
|
+
yield name, submodule
|
151
|
+
else:
|
152
|
+
has_non_observer_children = False
|
153
|
+
for child in children:
|
154
|
+
if not isinstance(child, Observer):
|
155
|
+
has_non_observer_children = True
|
156
|
+
|
157
|
+
if not has_non_observer_children:
|
158
|
+
yield name, submodule
|
159
|
+
if include_attn:
|
160
|
+
if name.endswith("self_attn"):
|
161
|
+
yield name, submodule
|
162
|
+
|
163
|
+
|
131
164
|
def get_torch_bit_depth(value: torch.Tensor) -> int:
|
132
165
|
"""
|
133
166
|
Determine the number of bits used to represent the dtype of a tensor
|
@@ -204,19 +237,11 @@ def is_kv_cache_quant_scheme(scheme: QuantizationScheme) -> bool:
|
|
204
237
|
:param scheme: The QuantizationScheme to investigate
|
205
238
|
:return: boolean flag
|
206
239
|
"""
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
is_match_targets = any(
|
211
|
-
[re.match(pattern[3:], scheme.targets[0]) for pattern in KV_CACHE_TARGETS]
|
212
|
-
)
|
213
|
-
else:
|
214
|
-
# match on the exact KV_CACHE_TARGETS
|
215
|
-
# if there are multiple targets
|
216
|
-
is_match_targets = set(KV_CACHE_TARGETS) == set(scheme.targets)
|
240
|
+
for target in scheme.targets:
|
241
|
+
if target in KV_CACHE_TARGETS:
|
242
|
+
return True
|
217
243
|
|
218
|
-
|
219
|
-
return is_match_targets and is_match_output_activations
|
244
|
+
return False
|
220
245
|
|
221
246
|
|
222
247
|
def parse_out_kv_cache_args(
|
@@ -12,7 +12,7 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
-
from typing import Optional
|
15
|
+
from typing import Any, Optional
|
16
16
|
|
17
17
|
import torch
|
18
18
|
from transformers import AutoConfig
|
@@ -23,6 +23,7 @@ __all__ = [
|
|
23
23
|
"fix_fsdp_module_name",
|
24
24
|
"tensor_follows_mask_structure",
|
25
25
|
"replace_module",
|
26
|
+
"is_compressed_tensors_config",
|
26
27
|
]
|
27
28
|
|
28
29
|
FSDP_WRAPPER_NAME = "_fsdp_wrapped_module"
|
@@ -103,3 +104,18 @@ def replace_module(model: torch.nn.Module, name: str, new_module: torch.nn.Modul
|
|
103
104
|
parent = model
|
104
105
|
child_name = name
|
105
106
|
setattr(parent, child_name, new_module)
|
107
|
+
|
108
|
+
|
109
|
+
def is_compressed_tensors_config(compression_config: Any) -> bool:
|
110
|
+
"""
|
111
|
+
Returns True if CompressedTensorsConfig is available from transformers and
|
112
|
+
compression_config is an instance of CompressedTensorsConfig
|
113
|
+
|
114
|
+
See: https://github.com/huggingface/transformers/pull/31704
|
115
|
+
"""
|
116
|
+
try:
|
117
|
+
from transformers.utils.quantization_config import CompressedTensorsConfig
|
118
|
+
|
119
|
+
return isinstance(compression_config, CompressedTensorsConfig)
|
120
|
+
except ImportError:
|
121
|
+
return False
|
compressed_tensors/version.py
CHANGED
@@ -0,0 +1,59 @@
|
|
1
|
+
compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
|
2
|
+
compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
|
3
|
+
compressed_tensors/version.py,sha256=RTYptXdV8f4QbYCRQ13eGeEsq4grNJs6EXgejoZl9EE,1585
|
4
|
+
compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
|
5
|
+
compressed_tensors/compressors/base.py,sha256=D9TNwQcjanDiAHODPbg8JUqc66e3j50rctY7A708NEs,6743
|
6
|
+
compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
|
7
|
+
compressed_tensors/compressors/model_compressors/__init__.py,sha256=5RGGPFu4YqEt_aOdFSQYFYFDjcZFJN0CsMqRtDZz3Js,666
|
8
|
+
compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=XJgPsq8KiDfiR4e8bSI38lmoOd2ApqRk1aPcXS2obqY,15600
|
9
|
+
compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=09UJq68Pht6Bf-4iP9xYl3tetKsncNPHD8IAGbePsr4,714
|
10
|
+
compressed_tensors/compressors/quantized_compressors/base.py,sha256=K1KOnS6Y8nUA1-HN7VhyfsDc01nilW0WfXMUhuD-l8w,5954
|
11
|
+
compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=Mmfr-hap-4zw7CzE1mXi0UirknqGidNxw38GGWVgTqM,4916
|
12
|
+
compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=9H8UrG5v1GRtslLjOEiUM2dnyxJnR-HJmlsFezQs_r0,7706
|
13
|
+
compressed_tensors/compressors/sparse_compressors/__init__.py,sha256=i2TESH27l7KXeOhJ6hShIoI904XX96l-cRQiMR6MAaU,704
|
14
|
+
compressed_tensors/compressors/sparse_compressors/base.py,sha256=Ua4rUSGyucEs-YJI5z3oIUF-zqQLrFsQ9f-qKasEdUM,4410
|
15
|
+
compressed_tensors/compressors/sparse_compressors/dense.py,sha256=lSKNWRx6H7aUqaJj1j4qbXk8Gkm1UohbnvW1Rvq6Ra4,1284
|
16
|
+
compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py,sha256=4fKwCG7ZM8mUtSnjPvubzEHl-mTnxMzwjmcs7L43WLY,6622
|
17
|
+
compressed_tensors/compressors/sparse_quantized_compressors/__init__.py,sha256=4f_cwcKXB1nVVMoiKgTFAc8jAPjPLElo-Df_EDm1_xw,675
|
18
|
+
compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py,sha256=akqE7eW8CLTslpWRxERaZ8R0TSm1lS7D1bgZXKL0xi8,9427
|
19
|
+
compressed_tensors/config/__init__.py,sha256=ZBqWn3r6ku1qfmlHHYp0mQueY0i7Pwhr9rbQk9dDlMc,704
|
20
|
+
compressed_tensors/config/base.py,sha256=BNTFKy12isY7qblwxdi_R1f00EzgrNOXLrfxqLCPT8w,1903
|
21
|
+
compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74jNbjks,1317
|
22
|
+
compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5ynVAUeiiYpS1Gt8,1308
|
23
|
+
compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
|
24
|
+
compressed_tensors/linear/compressed_linear.py,sha256=0jTTf6XxOAjAYs3tvFtgiNMAO4W10sSeR-pdH2M413g,3218
|
25
|
+
compressed_tensors/quantization/__init__.py,sha256=nWP_fsl6Nn0ksEgZPzerGiETdvF-ZfNwPnwGlRiR5pY,805
|
26
|
+
compressed_tensors/quantization/cache.py,sha256=vnBB5zasO_XpHomZvzUPVVbzyCz2VgebsHePm0kANzY,6831
|
27
|
+
compressed_tensors/quantization/quant_args.py,sha256=73KevZXHyrkMCT_3CxbYHz70fI3i-wcF8NvN0wsBPK4,8271
|
28
|
+
compressed_tensors/quantization/quant_config.py,sha256=NCiMvUMnnz5kTyAkDylxjtEGQnjgsIYIeNR2zyHEdTQ,10371
|
29
|
+
compressed_tensors/quantization/quant_scheme.py,sha256=uFgp6ECU6ZkHWkeKlAVAzZTLDbrTrzPSPrY23eJluaw,5931
|
30
|
+
compressed_tensors/quantization/lifecycle/__init__.py,sha256=MXE2E7GfIfRRfhrdGy2Og3AZOz5N59B0ZGFcsD89y6c,821
|
31
|
+
compressed_tensors/quantization/lifecycle/apply.py,sha256=czaayvpeUYyWRJhO_klffw6esptOgA9sBKL5TWQcRdw,15805
|
32
|
+
compressed_tensors/quantization/lifecycle/calibration.py,sha256=IuLeRkVQPrMxkMcIjr4OMFlIUMHkqjH4qAxC2KiUBGw,2673
|
33
|
+
compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
|
34
|
+
compressed_tensors/quantization/lifecycle/forward.py,sha256=eLup6QDRUUp_Ozcas7RDRLIXBWjFbxn5gWbcAIJEGlw,15715
|
35
|
+
compressed_tensors/quantization/lifecycle/frozen.py,sha256=NiJw7NP7pcT6idWFa8vksgiLoT8oQ975e57S4QfD2QQ,1874
|
36
|
+
compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
|
37
|
+
compressed_tensors/quantization/lifecycle/initialize.py,sha256=4_YG7jKl7d2-Cy58pOkMtInFRhvYahxYchesWMPxPVM,8862
|
38
|
+
compressed_tensors/quantization/observers/__init__.py,sha256=4Sa7rqi5RB_S5bPO8KmncETiqDsoMBhwP37arlQym8s,764
|
39
|
+
compressed_tensors/quantization/observers/base.py,sha256=5ovQicWPYHjIxr6-EkQ4lgOX0PpI9g23iSzKpxjM1Zg,8420
|
40
|
+
compressed_tensors/quantization/observers/helpers.py,sha256=s_A23Qa_BLfOdHJCN5bm-qPWkhjjj_RIVrhSp1Y9Dtk,4211
|
41
|
+
compressed_tensors/quantization/observers/memoryless.py,sha256=jH_c6K3gxf4W3VNXQ7tbnP-J_86QTrEfjBn6Kh1C-H8,2165
|
42
|
+
compressed_tensors/quantization/observers/min_max.py,sha256=sQXqU3z-voxIDfR_9mQzwQUflZj2sASm_G8CYaXntFw,3865
|
43
|
+
compressed_tensors/quantization/observers/mse.py,sha256=Aeh-253Vbab1F8cYuBiGNn4OXWJ67wXQ_JVfl3mu2a8,6034
|
44
|
+
compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
|
45
|
+
compressed_tensors/quantization/utils/helpers.py,sha256=y4LEyC2oUd876ZMdALWKGH3Ct5EgBJZV4id_NUjTGH8,9531
|
46
|
+
compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
|
47
|
+
compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85SLG77nml2iA,11890
|
48
|
+
compressed_tensors/utils/__init__.py,sha256=gS4gSU2pwcAbsKj-6YMaqhm25udFy6ISYaWBf-myRSM,808
|
49
|
+
compressed_tensors/utils/helpers.py,sha256=hWGIR0W7ENHwdC7wW2SQJJiCF9-xOu_u3fY2RzLyYg4,4101
|
50
|
+
compressed_tensors/utils/offload.py,sha256=d9q8LNe8HyF8tOjgjA7QGLD3HRysmNp0d8eBbdqBgIM,4089
|
51
|
+
compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
|
52
|
+
compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
|
53
|
+
compressed_tensors/utils/safetensors_load.py,sha256=m08ANVuTBxQdoa6LufDgcNJ7wCLDJolyZljB8VEybAU,8578
|
54
|
+
compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
|
55
|
+
compressed_tensors-0.7.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
56
|
+
compressed_tensors-0.7.0.dist-info/METADATA,sha256=Lgcl4rU8ifo0PY-FrurFApAkuTD9HBeJohuULjVqebs,6782
|
57
|
+
compressed_tensors-0.7.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
58
|
+
compressed_tensors-0.7.0.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
|
59
|
+
compressed_tensors-0.7.0.dist-info/RECORD,,
|
@@ -1,52 +0,0 @@
|
|
1
|
-
compressed_tensors/__init__.py,sha256=SV1csvHUVCd8kHXz6UDZim1HZ_fAVG3vfk-j_4Bb6hY,789
|
2
|
-
compressed_tensors/base.py,sha256=Mq4mfVQcJhNpha-BXzpOfpmFIdl01o09BJE7D2oQ_00,796
|
3
|
-
compressed_tensors/version.py,sha256=qWZbaQsbr0_5Y1qtWGstNPljw-ggMLBGZBvSKfzB9pw,1585
|
4
|
-
compressed_tensors/compressors/__init__.py,sha256=wmX4VnkUTS63xBwK5-6w8FP78bNZpcdcqvf2KOEC5E4,1133
|
5
|
-
compressed_tensors/compressors/base.py,sha256=NfVkhq6PRiq2cvAXaUXLoqC_nVYWdSrkE12c9AXYSMo,9956
|
6
|
-
compressed_tensors/compressors/dense.py,sha256=xcWECjcRY4INN6jC7vHx5wvUX3NmnKlxA9SVE1A6m2Q,1267
|
7
|
-
compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24SCfhzZ-RusXfw,5403
|
8
|
-
compressed_tensors/compressors/marlin_24.py,sha256=e7fGUyZbjUpA5VUMCPxqcYPGNiwoDKupHJaXWCoVKRw,9410
|
9
|
-
compressed_tensors/compressors/model_compressor.py,sha256=gI6KKtH3eeWi2540Ayx-4bg9o8qjrvxlF4Gd_sqltGA,16678
|
10
|
-
compressed_tensors/compressors/naive_quantized.py,sha256=z3h3ca5xKCN69mahutxcbzdv-OysiaxaM8P-Qum6zUQ,4823
|
11
|
-
compressed_tensors/compressors/pack_quantized.py,sha256=27RVmJ2wg2dvCoawj407HSmKT3VPGJ6ujAMHlT26WlI,7571
|
12
|
-
compressed_tensors/compressors/sparse_bitmask.py,sha256=kiDwBlFV0sJGLcIdDYxIiuF64ccgwDfqq1hWRQThYDc,8647
|
13
|
-
compressed_tensors/config/__init__.py,sha256=ZBqWn3r6ku1qfmlHHYp0mQueY0i7Pwhr9rbQk9dDlMc,704
|
14
|
-
compressed_tensors/config/base.py,sha256=BNTFKy12isY7qblwxdi_R1f00EzgrNOXLrfxqLCPT8w,1903
|
15
|
-
compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74jNbjks,1317
|
16
|
-
compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5ynVAUeiiYpS1Gt8,1308
|
17
|
-
compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
|
18
|
-
compressed_tensors/linear/compressed_linear.py,sha256=G0gEFfxLAUsgRcnfSV-PKz1ZBNTVokOauOoup7SE1mw,3210
|
19
|
-
compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
|
20
|
-
compressed_tensors/quantization/quant_args.py,sha256=CmyVtjJeHlqCW-7R5Z7tIw6lXUrzCX6Y9bwgmMxEudY,8069
|
21
|
-
compressed_tensors/quantization/quant_config.py,sha256=NpVu8YJ4Xw2pIQW_PGaNaml8kx1bUnxkvb0jBYWbKdE,9971
|
22
|
-
compressed_tensors/quantization/quant_scheme.py,sha256=HmR1DcFZcjuqX7KHUYI0NFXsCIzJ8sxFGH6zhYuHmEs,5870
|
23
|
-
compressed_tensors/quantization/lifecycle/__init__.py,sha256=MXE2E7GfIfRRfhrdGy2Og3AZOz5N59B0ZGFcsD89y6c,821
|
24
|
-
compressed_tensors/quantization/lifecycle/apply.py,sha256=uftWFunr_CpCZM_qWfo2O1USXKB2qSYD1pBJsO8BuCU,15285
|
25
|
-
compressed_tensors/quantization/lifecycle/calibration.py,sha256=PlS_EqCOPqJD3QKuLPXO9AOtDzXtQWvEBTynFv-FFVw,2698
|
26
|
-
compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
|
27
|
-
compressed_tensors/quantization/lifecycle/forward.py,sha256=PljD9pzATILEOiC3ZdHUTsfSbZdAa6iSIxWmvAHLG9I,13688
|
28
|
-
compressed_tensors/quantization/lifecycle/frozen.py,sha256=h1XYt89MouBTf3jTYLG_6OdFxIu5q2N8tPjsy6J4E6Y,1726
|
29
|
-
compressed_tensors/quantization/lifecycle/helpers.py,sha256=TmLY_G5VP_Fg2Ywio_dxoHRTxOKZdT7_aG5S9WtD4zI,2424
|
30
|
-
compressed_tensors/quantization/lifecycle/initialize.py,sha256=S5Kwy16Da8WUIIpa1xVKc72MijJ5C_rqM6JjanZ7MGk,7133
|
31
|
-
compressed_tensors/quantization/observers/__init__.py,sha256=4Sa7rqi5RB_S5bPO8KmncETiqDsoMBhwP37arlQym8s,764
|
32
|
-
compressed_tensors/quantization/observers/base.py,sha256=5ovQicWPYHjIxr6-EkQ4lgOX0PpI9g23iSzKpxjM1Zg,8420
|
33
|
-
compressed_tensors/quantization/observers/helpers.py,sha256=s_A23Qa_BLfOdHJCN5bm-qPWkhjjj_RIVrhSp1Y9Dtk,4211
|
34
|
-
compressed_tensors/quantization/observers/memoryless.py,sha256=jH_c6K3gxf4W3VNXQ7tbnP-J_86QTrEfjBn6Kh1C-H8,2165
|
35
|
-
compressed_tensors/quantization/observers/min_max.py,sha256=sQXqU3z-voxIDfR_9mQzwQUflZj2sASm_G8CYaXntFw,3865
|
36
|
-
compressed_tensors/quantization/observers/mse.py,sha256=Aeh-253Vbab1F8cYuBiGNn4OXWJ67wXQ_JVfl3mu2a8,6034
|
37
|
-
compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
|
38
|
-
compressed_tensors/quantization/utils/helpers.py,sha256=pwvU613XRvMDtI5b39II5jukBl5OUCqoX0ofVRpOFRY,8633
|
39
|
-
compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
|
40
|
-
compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85SLG77nml2iA,11890
|
41
|
-
compressed_tensors/utils/__init__.py,sha256=gS4gSU2pwcAbsKj-6YMaqhm25udFy6ISYaWBf-myRSM,808
|
42
|
-
compressed_tensors/utils/helpers.py,sha256=bh4G8mj_YCRf8Bo2FQ9FkIIZXY8xqqPjckNnVYB0gBA,3557
|
43
|
-
compressed_tensors/utils/offload.py,sha256=d9q8LNe8HyF8tOjgjA7QGLD3HRysmNp0d8eBbdqBgIM,4089
|
44
|
-
compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
|
45
|
-
compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
|
46
|
-
compressed_tensors/utils/safetensors_load.py,sha256=m08ANVuTBxQdoa6LufDgcNJ7wCLDJolyZljB8VEybAU,8578
|
47
|
-
compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
|
48
|
-
compressed_tensors-0.6.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
49
|
-
compressed_tensors-0.6.0.dist-info/METADATA,sha256=opV5oXtNn7EmogPQaMiv9ddOLHv-XObt_NuUSwR5Bz4,6782
|
50
|
-
compressed_tensors-0.6.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
51
|
-
compressed_tensors-0.6.0.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
|
52
|
-
compressed_tensors-0.6.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|