compressed-tensors 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. compressed_tensors/__init__.py +1 -0
  2. compressed_tensors/base.py +2 -0
  3. compressed_tensors/compressors/__init__.py +6 -12
  4. compressed_tensors/compressors/base.py +38 -102
  5. compressed_tensors/compressors/helpers.py +6 -6
  6. compressed_tensors/compressors/model_compressors/__init__.py +17 -0
  7. compressed_tensors/compressors/{model_compressor.py → model_compressors/model_compressor.py} +95 -106
  8. compressed_tensors/compressors/quantized_compressors/__init__.py +18 -0
  9. compressed_tensors/compressors/quantized_compressors/base.py +146 -0
  10. compressed_tensors/compressors/{naive_quantized.py → quantized_compressors/naive_quantized.py} +11 -11
  11. compressed_tensors/compressors/{pack_quantized.py → quantized_compressors/pack_quantized.py} +6 -3
  12. compressed_tensors/compressors/sparse_compressors/__init__.py +18 -0
  13. compressed_tensors/compressors/sparse_compressors/base.py +110 -0
  14. compressed_tensors/compressors/{dense.py → sparse_compressors/dense.py} +3 -3
  15. compressed_tensors/compressors/{sparse_bitmask.py → sparse_compressors/sparse_bitmask.py} +14 -59
  16. compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +16 -0
  17. compressed_tensors/compressors/{marlin_24.py → sparse_quantized_compressors/marlin_24.py} +3 -3
  18. compressed_tensors/linear/compressed_linear.py +2 -2
  19. compressed_tensors/quantization/__init__.py +1 -0
  20. compressed_tensors/quantization/cache.py +201 -0
  21. compressed_tensors/quantization/lifecycle/apply.py +19 -3
  22. compressed_tensors/quantization/lifecycle/calibration.py +2 -3
  23. compressed_tensors/quantization/lifecycle/forward.py +52 -3
  24. compressed_tensors/quantization/lifecycle/frozen.py +6 -1
  25. compressed_tensors/quantization/lifecycle/helpers.py +0 -47
  26. compressed_tensors/quantization/lifecycle/initialize.py +110 -62
  27. compressed_tensors/quantization/quant_args.py +6 -0
  28. compressed_tensors/quantization/quant_config.py +14 -2
  29. compressed_tensors/quantization/quant_scheme.py +5 -4
  30. compressed_tensors/quantization/utils/helpers.py +43 -18
  31. compressed_tensors/utils/helpers.py +17 -1
  32. compressed_tensors/version.py +1 -1
  33. {compressed_tensors-0.6.0.dist-info → compressed_tensors-0.7.0.dist-info}/METADATA +1 -1
  34. compressed_tensors-0.7.0.dist-info/RECORD +59 -0
  35. compressed_tensors-0.6.0.dist-info/RECORD +0 -52
  36. {compressed_tensors-0.6.0.dist-info → compressed_tensors-0.7.0.dist-info}/LICENSE +0 -0
  37. {compressed_tensors-0.6.0.dist-info → compressed_tensors-0.7.0.dist-info}/WHEEL +0 -0
  38. {compressed_tensors-0.6.0.dist-info → compressed_tensors-0.7.0.dist-info}/top_level.txt +0 -0
@@ -24,7 +24,7 @@ from compressed_tensors.quantization.quant_scheme import (
24
24
  from compressed_tensors.quantization.utils import (
25
25
  calculate_compression_ratio,
26
26
  is_module_quantized,
27
- iter_named_leaf_modules,
27
+ iter_named_quantizable_modules,
28
28
  module_type,
29
29
  parse_out_kv_cache_args,
30
30
  )
@@ -177,7 +177,9 @@ class QuantizationConfig(BaseModel):
177
177
  quantization_status = None
178
178
  ignore = {}
179
179
  quantization_type_names = set()
180
- for name, submodule in iter_named_leaf_modules(model):
180
+ for name, submodule in iter_named_quantizable_modules(
181
+ model, include_children=True, include_attn=True
182
+ ):
181
183
  layer_type = module_type(submodule)
182
184
  if not is_module_quantized(submodule):
183
185
  if layer_type not in ignore:
@@ -199,6 +201,13 @@ class QuantizationConfig(BaseModel):
199
201
  if len(quant_scheme_to_layers) == 0: # No quantized layers
200
202
  return None
201
203
 
204
+ # kv-cache only, no weight/activation quantization
205
+ if (
206
+ len(quantization_type_names) == 1
207
+ and "attention" in list(quantization_type_names)[0].lower()
208
+ ):
209
+ quantization_type_names.add("Linear")
210
+
202
211
  # clean up ignore list, we can leave out layers types if none of the
203
212
  # instances are quantized
204
213
  consolidated_ignore = []
@@ -241,6 +250,9 @@ class QuantizationConfig(BaseModel):
241
250
  )
242
251
 
243
252
  def requires_calibration_data(self):
253
+ if self.kv_cache_scheme is not None:
254
+ return True
255
+
244
256
  for _, scheme in self.config_groups.items():
245
257
  if scheme.input_activations is not None:
246
258
  if not scheme.input_activations.dynamic:
@@ -108,7 +108,7 @@ def is_preset_scheme(name: str) -> bool:
108
108
  UNQUANTIZED = dict()
109
109
 
110
110
  # 8 bit integer weights and 8 bit activations quantization
111
- W8A8 = dict(
111
+ INT8_W8A8 = dict(
112
112
  weights=QuantizationArgs(
113
113
  num_bits=8,
114
114
  type=QuantizationType.INT,
@@ -149,7 +149,7 @@ W4A16 = dict(
149
149
  )
150
150
 
151
151
  # 4 bit integer weights and 8 bit activations quantization
152
- W4A8 = dict(
152
+ INT8_W4A8 = dict(
153
153
  weights=QuantizationArgs(
154
154
  num_bits=4,
155
155
  type=QuantizationType.INT,
@@ -210,8 +210,9 @@ PRESET_SCHEMES = {
210
210
  "W8A16": W8A16,
211
211
  "W4A16": W4A16,
212
212
  # Integer weight and activation schemes
213
- "W8A8": W8A8,
214
- "W4A8": W4A8,
213
+ "W8A8": INT8_W8A8,
214
+ "INT8": INT8_W8A8, # alias for W8A8
215
+ "W4A8": INT8_W4A8,
215
216
  # Float weight and activation schemes
216
217
  "FP8": FP8,
217
218
  "FP8_DYNAMIC": FP8_DYNAMIC,
@@ -13,8 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import logging
16
- import re
17
- from typing import List, Optional, Tuple
16
+ from typing import Generator, List, Optional, Tuple
18
17
 
19
18
  import torch
20
19
  from compressed_tensors.quantization.observers.base import Observer
@@ -28,7 +27,6 @@ __all__ = [
28
27
  "infer_quantization_status",
29
28
  "is_module_quantized",
30
29
  "is_model_quantized",
31
- "iter_named_leaf_modules",
32
30
  "module_type",
33
31
  "calculate_compression_ratio",
34
32
  "get_torch_bit_depth",
@@ -36,9 +34,14 @@ __all__ = [
36
34
  "parse_out_kv_cache_args",
37
35
  "KV_CACHE_TARGETS",
38
36
  "is_kv_cache_quant_scheme",
37
+ "iter_named_leaf_modules",
38
+ "iter_named_quantizable_modules",
39
39
  ]
40
40
 
41
- KV_CACHE_TARGETS = ["re:.*k_proj", "re:.*v_proj"]
41
+ # target the self_attn layer
42
+ # QuantizedKVParameterCache is responsible for obtaining the k_scale and v_scale
43
+ KV_CACHE_TARGETS = ["re:.*self_attn$"]
44
+
42
45
  _LOGGER: logging.Logger = logging.getLogger(__name__)
43
46
 
44
47
 
@@ -106,11 +109,10 @@ def module_type(module: Module) -> str:
106
109
  return type(module).__name__
107
110
 
108
111
 
109
- def iter_named_leaf_modules(model: Module) -> Tuple[str, Module]:
112
+ def iter_named_leaf_modules(model: Module) -> Generator[Tuple[str, Module], None, None]:
110
113
  """
111
114
  Yields modules that do not have any submodules except observers. The observers
112
115
  themselves are not yielded
113
-
114
116
  :param model: model to get leaf modules of
115
117
  :returns: generator tuple of (name, leaf_submodule)
116
118
  """
@@ -128,6 +130,37 @@ def iter_named_leaf_modules(model: Module) -> Tuple[str, Module]:
128
130
  yield name, submodule
129
131
 
130
132
 
133
+ def iter_named_quantizable_modules(
134
+ model: Module, include_children: bool = True, include_attn: bool = False
135
+ ) -> Generator[Tuple[str, Module], None, None]:
136
+ """
137
+ Yield name and submodule of
138
+ - leaf modules, set by include_children
139
+ - attention modyles, set by include_attn
140
+
141
+ :param model: model to get leaf modules of
142
+ :param include_children: flag to get the leaf modules
143
+ :param inlcude_attn: flag to get the attention modules
144
+ :returns: generator tuple of (name, submodule)
145
+ """
146
+ for name, submodule in model.named_modules():
147
+ if include_children:
148
+ children = list(submodule.children())
149
+ if len(children) == 0 and not isinstance(submodule, Observer):
150
+ yield name, submodule
151
+ else:
152
+ has_non_observer_children = False
153
+ for child in children:
154
+ if not isinstance(child, Observer):
155
+ has_non_observer_children = True
156
+
157
+ if not has_non_observer_children:
158
+ yield name, submodule
159
+ if include_attn:
160
+ if name.endswith("self_attn"):
161
+ yield name, submodule
162
+
163
+
131
164
  def get_torch_bit_depth(value: torch.Tensor) -> int:
132
165
  """
133
166
  Determine the number of bits used to represent the dtype of a tensor
@@ -204,19 +237,11 @@ def is_kv_cache_quant_scheme(scheme: QuantizationScheme) -> bool:
204
237
  :param scheme: The QuantizationScheme to investigate
205
238
  :return: boolean flag
206
239
  """
207
- if len(scheme.targets) == 1:
208
- # match on the KV_CACHE_TARGETS regex pattern
209
- # if there is only one target
210
- is_match_targets = any(
211
- [re.match(pattern[3:], scheme.targets[0]) for pattern in KV_CACHE_TARGETS]
212
- )
213
- else:
214
- # match on the exact KV_CACHE_TARGETS
215
- # if there are multiple targets
216
- is_match_targets = set(KV_CACHE_TARGETS) == set(scheme.targets)
240
+ for target in scheme.targets:
241
+ if target in KV_CACHE_TARGETS:
242
+ return True
217
243
 
218
- is_match_output_activations = scheme.output_activations is not None
219
- return is_match_targets and is_match_output_activations
244
+ return False
220
245
 
221
246
 
222
247
  def parse_out_kv_cache_args(
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Optional
15
+ from typing import Any, Optional
16
16
 
17
17
  import torch
18
18
  from transformers import AutoConfig
@@ -23,6 +23,7 @@ __all__ = [
23
23
  "fix_fsdp_module_name",
24
24
  "tensor_follows_mask_structure",
25
25
  "replace_module",
26
+ "is_compressed_tensors_config",
26
27
  ]
27
28
 
28
29
  FSDP_WRAPPER_NAME = "_fsdp_wrapped_module"
@@ -103,3 +104,18 @@ def replace_module(model: torch.nn.Module, name: str, new_module: torch.nn.Modul
103
104
  parent = model
104
105
  child_name = name
105
106
  setattr(parent, child_name, new_module)
107
+
108
+
109
+ def is_compressed_tensors_config(compression_config: Any) -> bool:
110
+ """
111
+ Returns True if CompressedTensorsConfig is available from transformers and
112
+ compression_config is an instance of CompressedTensorsConfig
113
+
114
+ See: https://github.com/huggingface/transformers/pull/31704
115
+ """
116
+ try:
117
+ from transformers.utils.quantization_config import CompressedTensorsConfig
118
+
119
+ return isinstance(compression_config, CompressedTensorsConfig)
120
+ except ImportError:
121
+ return False
@@ -17,7 +17,7 @@ Functionality for storing and setting the version info for SparseML
17
17
  """
18
18
 
19
19
 
20
- version_base = "0.6.0"
20
+ version_base = "0.7.0"
21
21
  is_release = True # change to True to set the generated version as a release version
22
22
 
23
23
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: compressed-tensors
3
- Version: 0.6.0
3
+ Version: 0.7.0
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -0,0 +1,59 @@
1
+ compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
2
+ compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
3
+ compressed_tensors/version.py,sha256=RTYptXdV8f4QbYCRQ13eGeEsq4grNJs6EXgejoZl9EE,1585
4
+ compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
5
+ compressed_tensors/compressors/base.py,sha256=D9TNwQcjanDiAHODPbg8JUqc66e3j50rctY7A708NEs,6743
6
+ compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
7
+ compressed_tensors/compressors/model_compressors/__init__.py,sha256=5RGGPFu4YqEt_aOdFSQYFYFDjcZFJN0CsMqRtDZz3Js,666
8
+ compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=XJgPsq8KiDfiR4e8bSI38lmoOd2ApqRk1aPcXS2obqY,15600
9
+ compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=09UJq68Pht6Bf-4iP9xYl3tetKsncNPHD8IAGbePsr4,714
10
+ compressed_tensors/compressors/quantized_compressors/base.py,sha256=K1KOnS6Y8nUA1-HN7VhyfsDc01nilW0WfXMUhuD-l8w,5954
11
+ compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=Mmfr-hap-4zw7CzE1mXi0UirknqGidNxw38GGWVgTqM,4916
12
+ compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=9H8UrG5v1GRtslLjOEiUM2dnyxJnR-HJmlsFezQs_r0,7706
13
+ compressed_tensors/compressors/sparse_compressors/__init__.py,sha256=i2TESH27l7KXeOhJ6hShIoI904XX96l-cRQiMR6MAaU,704
14
+ compressed_tensors/compressors/sparse_compressors/base.py,sha256=Ua4rUSGyucEs-YJI5z3oIUF-zqQLrFsQ9f-qKasEdUM,4410
15
+ compressed_tensors/compressors/sparse_compressors/dense.py,sha256=lSKNWRx6H7aUqaJj1j4qbXk8Gkm1UohbnvW1Rvq6Ra4,1284
16
+ compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py,sha256=4fKwCG7ZM8mUtSnjPvubzEHl-mTnxMzwjmcs7L43WLY,6622
17
+ compressed_tensors/compressors/sparse_quantized_compressors/__init__.py,sha256=4f_cwcKXB1nVVMoiKgTFAc8jAPjPLElo-Df_EDm1_xw,675
18
+ compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py,sha256=akqE7eW8CLTslpWRxERaZ8R0TSm1lS7D1bgZXKL0xi8,9427
19
+ compressed_tensors/config/__init__.py,sha256=ZBqWn3r6ku1qfmlHHYp0mQueY0i7Pwhr9rbQk9dDlMc,704
20
+ compressed_tensors/config/base.py,sha256=BNTFKy12isY7qblwxdi_R1f00EzgrNOXLrfxqLCPT8w,1903
21
+ compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74jNbjks,1317
22
+ compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5ynVAUeiiYpS1Gt8,1308
23
+ compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
24
+ compressed_tensors/linear/compressed_linear.py,sha256=0jTTf6XxOAjAYs3tvFtgiNMAO4W10sSeR-pdH2M413g,3218
25
+ compressed_tensors/quantization/__init__.py,sha256=nWP_fsl6Nn0ksEgZPzerGiETdvF-ZfNwPnwGlRiR5pY,805
26
+ compressed_tensors/quantization/cache.py,sha256=vnBB5zasO_XpHomZvzUPVVbzyCz2VgebsHePm0kANzY,6831
27
+ compressed_tensors/quantization/quant_args.py,sha256=73KevZXHyrkMCT_3CxbYHz70fI3i-wcF8NvN0wsBPK4,8271
28
+ compressed_tensors/quantization/quant_config.py,sha256=NCiMvUMnnz5kTyAkDylxjtEGQnjgsIYIeNR2zyHEdTQ,10371
29
+ compressed_tensors/quantization/quant_scheme.py,sha256=uFgp6ECU6ZkHWkeKlAVAzZTLDbrTrzPSPrY23eJluaw,5931
30
+ compressed_tensors/quantization/lifecycle/__init__.py,sha256=MXE2E7GfIfRRfhrdGy2Og3AZOz5N59B0ZGFcsD89y6c,821
31
+ compressed_tensors/quantization/lifecycle/apply.py,sha256=czaayvpeUYyWRJhO_klffw6esptOgA9sBKL5TWQcRdw,15805
32
+ compressed_tensors/quantization/lifecycle/calibration.py,sha256=IuLeRkVQPrMxkMcIjr4OMFlIUMHkqjH4qAxC2KiUBGw,2673
33
+ compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
34
+ compressed_tensors/quantization/lifecycle/forward.py,sha256=eLup6QDRUUp_Ozcas7RDRLIXBWjFbxn5gWbcAIJEGlw,15715
35
+ compressed_tensors/quantization/lifecycle/frozen.py,sha256=NiJw7NP7pcT6idWFa8vksgiLoT8oQ975e57S4QfD2QQ,1874
36
+ compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
37
+ compressed_tensors/quantization/lifecycle/initialize.py,sha256=4_YG7jKl7d2-Cy58pOkMtInFRhvYahxYchesWMPxPVM,8862
38
+ compressed_tensors/quantization/observers/__init__.py,sha256=4Sa7rqi5RB_S5bPO8KmncETiqDsoMBhwP37arlQym8s,764
39
+ compressed_tensors/quantization/observers/base.py,sha256=5ovQicWPYHjIxr6-EkQ4lgOX0PpI9g23iSzKpxjM1Zg,8420
40
+ compressed_tensors/quantization/observers/helpers.py,sha256=s_A23Qa_BLfOdHJCN5bm-qPWkhjjj_RIVrhSp1Y9Dtk,4211
41
+ compressed_tensors/quantization/observers/memoryless.py,sha256=jH_c6K3gxf4W3VNXQ7tbnP-J_86QTrEfjBn6Kh1C-H8,2165
42
+ compressed_tensors/quantization/observers/min_max.py,sha256=sQXqU3z-voxIDfR_9mQzwQUflZj2sASm_G8CYaXntFw,3865
43
+ compressed_tensors/quantization/observers/mse.py,sha256=Aeh-253Vbab1F8cYuBiGNn4OXWJ67wXQ_JVfl3mu2a8,6034
44
+ compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
45
+ compressed_tensors/quantization/utils/helpers.py,sha256=y4LEyC2oUd876ZMdALWKGH3Ct5EgBJZV4id_NUjTGH8,9531
46
+ compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
47
+ compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85SLG77nml2iA,11890
48
+ compressed_tensors/utils/__init__.py,sha256=gS4gSU2pwcAbsKj-6YMaqhm25udFy6ISYaWBf-myRSM,808
49
+ compressed_tensors/utils/helpers.py,sha256=hWGIR0W7ENHwdC7wW2SQJJiCF9-xOu_u3fY2RzLyYg4,4101
50
+ compressed_tensors/utils/offload.py,sha256=d9q8LNe8HyF8tOjgjA7QGLD3HRysmNp0d8eBbdqBgIM,4089
51
+ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
52
+ compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
53
+ compressed_tensors/utils/safetensors_load.py,sha256=m08ANVuTBxQdoa6LufDgcNJ7wCLDJolyZljB8VEybAU,8578
54
+ compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
55
+ compressed_tensors-0.7.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
56
+ compressed_tensors-0.7.0.dist-info/METADATA,sha256=Lgcl4rU8ifo0PY-FrurFApAkuTD9HBeJohuULjVqebs,6782
57
+ compressed_tensors-0.7.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
58
+ compressed_tensors-0.7.0.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
59
+ compressed_tensors-0.7.0.dist-info/RECORD,,
@@ -1,52 +0,0 @@
1
- compressed_tensors/__init__.py,sha256=SV1csvHUVCd8kHXz6UDZim1HZ_fAVG3vfk-j_4Bb6hY,789
2
- compressed_tensors/base.py,sha256=Mq4mfVQcJhNpha-BXzpOfpmFIdl01o09BJE7D2oQ_00,796
3
- compressed_tensors/version.py,sha256=qWZbaQsbr0_5Y1qtWGstNPljw-ggMLBGZBvSKfzB9pw,1585
4
- compressed_tensors/compressors/__init__.py,sha256=wmX4VnkUTS63xBwK5-6w8FP78bNZpcdcqvf2KOEC5E4,1133
5
- compressed_tensors/compressors/base.py,sha256=NfVkhq6PRiq2cvAXaUXLoqC_nVYWdSrkE12c9AXYSMo,9956
6
- compressed_tensors/compressors/dense.py,sha256=xcWECjcRY4INN6jC7vHx5wvUX3NmnKlxA9SVE1A6m2Q,1267
7
- compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24SCfhzZ-RusXfw,5403
8
- compressed_tensors/compressors/marlin_24.py,sha256=e7fGUyZbjUpA5VUMCPxqcYPGNiwoDKupHJaXWCoVKRw,9410
9
- compressed_tensors/compressors/model_compressor.py,sha256=gI6KKtH3eeWi2540Ayx-4bg9o8qjrvxlF4Gd_sqltGA,16678
10
- compressed_tensors/compressors/naive_quantized.py,sha256=z3h3ca5xKCN69mahutxcbzdv-OysiaxaM8P-Qum6zUQ,4823
11
- compressed_tensors/compressors/pack_quantized.py,sha256=27RVmJ2wg2dvCoawj407HSmKT3VPGJ6ujAMHlT26WlI,7571
12
- compressed_tensors/compressors/sparse_bitmask.py,sha256=kiDwBlFV0sJGLcIdDYxIiuF64ccgwDfqq1hWRQThYDc,8647
13
- compressed_tensors/config/__init__.py,sha256=ZBqWn3r6ku1qfmlHHYp0mQueY0i7Pwhr9rbQk9dDlMc,704
14
- compressed_tensors/config/base.py,sha256=BNTFKy12isY7qblwxdi_R1f00EzgrNOXLrfxqLCPT8w,1903
15
- compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74jNbjks,1317
16
- compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5ynVAUeiiYpS1Gt8,1308
17
- compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
18
- compressed_tensors/linear/compressed_linear.py,sha256=G0gEFfxLAUsgRcnfSV-PKz1ZBNTVokOauOoup7SE1mw,3210
19
- compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
20
- compressed_tensors/quantization/quant_args.py,sha256=CmyVtjJeHlqCW-7R5Z7tIw6lXUrzCX6Y9bwgmMxEudY,8069
21
- compressed_tensors/quantization/quant_config.py,sha256=NpVu8YJ4Xw2pIQW_PGaNaml8kx1bUnxkvb0jBYWbKdE,9971
22
- compressed_tensors/quantization/quant_scheme.py,sha256=HmR1DcFZcjuqX7KHUYI0NFXsCIzJ8sxFGH6zhYuHmEs,5870
23
- compressed_tensors/quantization/lifecycle/__init__.py,sha256=MXE2E7GfIfRRfhrdGy2Og3AZOz5N59B0ZGFcsD89y6c,821
24
- compressed_tensors/quantization/lifecycle/apply.py,sha256=uftWFunr_CpCZM_qWfo2O1USXKB2qSYD1pBJsO8BuCU,15285
25
- compressed_tensors/quantization/lifecycle/calibration.py,sha256=PlS_EqCOPqJD3QKuLPXO9AOtDzXtQWvEBTynFv-FFVw,2698
26
- compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
27
- compressed_tensors/quantization/lifecycle/forward.py,sha256=PljD9pzATILEOiC3ZdHUTsfSbZdAa6iSIxWmvAHLG9I,13688
28
- compressed_tensors/quantization/lifecycle/frozen.py,sha256=h1XYt89MouBTf3jTYLG_6OdFxIu5q2N8tPjsy6J4E6Y,1726
29
- compressed_tensors/quantization/lifecycle/helpers.py,sha256=TmLY_G5VP_Fg2Ywio_dxoHRTxOKZdT7_aG5S9WtD4zI,2424
30
- compressed_tensors/quantization/lifecycle/initialize.py,sha256=S5Kwy16Da8WUIIpa1xVKc72MijJ5C_rqM6JjanZ7MGk,7133
31
- compressed_tensors/quantization/observers/__init__.py,sha256=4Sa7rqi5RB_S5bPO8KmncETiqDsoMBhwP37arlQym8s,764
32
- compressed_tensors/quantization/observers/base.py,sha256=5ovQicWPYHjIxr6-EkQ4lgOX0PpI9g23iSzKpxjM1Zg,8420
33
- compressed_tensors/quantization/observers/helpers.py,sha256=s_A23Qa_BLfOdHJCN5bm-qPWkhjjj_RIVrhSp1Y9Dtk,4211
34
- compressed_tensors/quantization/observers/memoryless.py,sha256=jH_c6K3gxf4W3VNXQ7tbnP-J_86QTrEfjBn6Kh1C-H8,2165
35
- compressed_tensors/quantization/observers/min_max.py,sha256=sQXqU3z-voxIDfR_9mQzwQUflZj2sASm_G8CYaXntFw,3865
36
- compressed_tensors/quantization/observers/mse.py,sha256=Aeh-253Vbab1F8cYuBiGNn4OXWJ67wXQ_JVfl3mu2a8,6034
37
- compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
38
- compressed_tensors/quantization/utils/helpers.py,sha256=pwvU613XRvMDtI5b39II5jukBl5OUCqoX0ofVRpOFRY,8633
39
- compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
40
- compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85SLG77nml2iA,11890
41
- compressed_tensors/utils/__init__.py,sha256=gS4gSU2pwcAbsKj-6YMaqhm25udFy6ISYaWBf-myRSM,808
42
- compressed_tensors/utils/helpers.py,sha256=bh4G8mj_YCRf8Bo2FQ9FkIIZXY8xqqPjckNnVYB0gBA,3557
43
- compressed_tensors/utils/offload.py,sha256=d9q8LNe8HyF8tOjgjA7QGLD3HRysmNp0d8eBbdqBgIM,4089
44
- compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
45
- compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
46
- compressed_tensors/utils/safetensors_load.py,sha256=m08ANVuTBxQdoa6LufDgcNJ7wCLDJolyZljB8VEybAU,8578
47
- compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
48
- compressed_tensors-0.6.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
49
- compressed_tensors-0.6.0.dist-info/METADATA,sha256=opV5oXtNn7EmogPQaMiv9ddOLHv-XObt_NuUSwR5Bz4,6782
50
- compressed_tensors-0.6.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
51
- compressed_tensors-0.6.0.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
52
- compressed_tensors-0.6.0.dist-info/RECORD,,