compressed-tensors-nightly 0.5.0.20240814__py3-none-any.whl → 0.5.0.20240830__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. compressed_tensors/compressors/base.py +198 -8
  2. compressed_tensors/compressors/model_compressor.py +65 -1
  3. compressed_tensors/compressors/naive_quantized.py +71 -75
  4. compressed_tensors/compressors/pack_quantized.py +83 -94
  5. compressed_tensors/linear/__init__.py +13 -0
  6. compressed_tensors/linear/compressed_linear.py +87 -0
  7. compressed_tensors/quantization/lifecycle/apply.py +36 -4
  8. compressed_tensors/quantization/lifecycle/calibration.py +3 -2
  9. compressed_tensors/quantization/lifecycle/compressed.py +1 -1
  10. compressed_tensors/quantization/lifecycle/forward.py +67 -43
  11. compressed_tensors/quantization/lifecycle/helpers.py +29 -2
  12. compressed_tensors/quantization/lifecycle/initialize.py +50 -16
  13. compressed_tensors/quantization/observers/__init__.py +1 -0
  14. compressed_tensors/quantization/observers/base.py +54 -14
  15. compressed_tensors/quantization/observers/min_max.py +8 -0
  16. compressed_tensors/quantization/observers/mse.py +162 -0
  17. compressed_tensors/quantization/quant_args.py +48 -20
  18. compressed_tensors/utils/__init__.py +1 -0
  19. compressed_tensors/utils/helpers.py +13 -0
  20. compressed_tensors/utils/offload.py +7 -1
  21. compressed_tensors/utils/permute.py +70 -0
  22. compressed_tensors/utils/safetensors_load.py +2 -0
  23. compressed_tensors/utils/semi_structured_conversions.py +1 -0
  24. {compressed_tensors_nightly-0.5.0.20240814.dist-info → compressed_tensors_nightly-0.5.0.20240830.dist-info}/METADATA +3 -2
  25. compressed_tensors_nightly-0.5.0.20240830.dist-info/RECORD +52 -0
  26. compressed_tensors_nightly-0.5.0.20240814.dist-info/RECORD +0 -48
  27. {compressed_tensors_nightly-0.5.0.20240814.dist-info → compressed_tensors_nightly-0.5.0.20240830.dist-info}/LICENSE +0 -0
  28. {compressed_tensors_nightly-0.5.0.20240814.dist-info → compressed_tensors_nightly-0.5.0.20240830.dist-info}/WHEEL +0 -0
  29. {compressed_tensors_nightly-0.5.0.20240814.dist-info → compressed_tensors_nightly-0.5.0.20240830.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,162 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Any, Optional, Tuple
16
+
17
+ import torch
18
+ from compressed_tensors.quantization.observers.base import Observer
19
+ from compressed_tensors.quantization.observers.helpers import calculate_qparams
20
+ from compressed_tensors.quantization.quant_args import QuantizationArgs
21
+ from torch import FloatTensor, IntTensor, Tensor
22
+
23
+
24
+ __all__ = ["MovingAverageMSEObserver"]
25
+
26
+
27
+ @Observer.register("mse")
28
+ class MovingAverageMSEObserver(Observer):
29
+ """
30
+ Implements a dynamic quantization observer that sets the scale and
31
+ zero point based on a moving average of the mse-clipped min and max observed values
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ quantization_args: QuantizationArgs,
37
+ averaging_constant: float = 0.01,
38
+ grid: float = 100.0,
39
+ maxshrink: float = 0.80,
40
+ norm: float = 2.4,
41
+ ):
42
+ super().__init__(quantization_args=quantization_args)
43
+
44
+ self.min_val = {}
45
+ self.max_val = {}
46
+ self.averaging_constant = averaging_constant
47
+ self.grid = grid
48
+ self.maxshrink = maxshrink
49
+ self.norm = norm
50
+
51
+ def calculate_mse_min_max(
52
+ self,
53
+ observed: Tensor,
54
+ reduce_dims: Optional[Tuple[int]] = None,
55
+ ):
56
+ """
57
+ Computes the mse-clipped min and max values of the observed tensor by
58
+ optimizing for quantization error
59
+
60
+ :param observed: observed tensor to calculate quantization parameters for
61
+ :param reduce_dims: optional tuple of dimensions to reduce along,
62
+ returned values will be shaped (1,) along the reduced dimensions
63
+ :return: tuple of min and max values derived from the observed tensor
64
+ """
65
+ from compressed_tensors.quantization.lifecycle import fake_quantize
66
+
67
+ if not reduce_dims:
68
+ absolute_min_val, absolute_max_val = torch.aminmax(observed)
69
+ else:
70
+ absolute_min_val = torch.amin(observed, dim=reduce_dims, keepdims=True)
71
+ absolute_max_val = torch.amax(observed, dim=reduce_dims, keepdims=True)
72
+
73
+ best = torch.full(absolute_min_val.shape, float("inf"))
74
+ min_val = torch.ones(absolute_min_val.shape)
75
+ max_val = torch.zeros(absolute_max_val.shape)
76
+ for i in range(int(self.maxshrink * self.grid)):
77
+ p = 1 - i / self.grid
78
+ shrinked_min_val = p * absolute_min_val
79
+ shrinked_max_val = p * absolute_max_val
80
+
81
+ candidate_scales, candidate_zero_points = calculate_qparams(
82
+ shrinked_min_val, shrinked_max_val, self.quantization_args
83
+ )
84
+ q = fake_quantize(
85
+ observed,
86
+ candidate_scales,
87
+ candidate_zero_points,
88
+ self.quantization_args,
89
+ )
90
+
91
+ q -= observed
92
+ q.abs_()
93
+ q.pow_(self.norm)
94
+ if not reduce_dims:
95
+ err = torch.sum(q)
96
+ else:
97
+ err = torch.sum(q, reduce_dims, keepdims=True)
98
+
99
+ tmp = err < best
100
+ if torch.any(tmp):
101
+ best[tmp] = err[tmp]
102
+ min_val[tmp] = shrinked_min_val[tmp]
103
+ max_val[tmp] = shrinked_max_val[tmp]
104
+ return min_val, max_val
105
+
106
+ def calculate_qparams(
107
+ self,
108
+ observed: Tensor,
109
+ reduce_dims: Optional[Tuple[int]] = None,
110
+ tensor_id: Optional[Any] = None,
111
+ ) -> Tuple[FloatTensor, IntTensor]:
112
+ """
113
+ Updates the mse-clipped min and max values of the observed tensor using
114
+ a moving average smoothed by the averaging_constant
115
+
116
+ :param observed: observed tensor to calculate quantization parameters for
117
+ :param reduce_dims: optional tuple of dimensions to reduce along,
118
+ returned scale and zero point will be shaped (1,) along the
119
+ reduced dimensions
120
+ :param tensor_id: Optional id if different ranges of observed tensors are
121
+ passed, useful for sharding tensors by group_size
122
+ :return: tuple of scale and zero point derived from the observed tensor
123
+ """
124
+ min_val, max_val = self.calculate_mse_min_max(observed, reduce_dims)
125
+
126
+ running_min_val = self.min_val.get(tensor_id, None)
127
+ running_max_val = self.max_val.get(tensor_id, None)
128
+
129
+ if running_min_val is None or running_max_val is None:
130
+ updated_min_val = min_val
131
+ updated_max_val = max_val
132
+ else:
133
+ updated_min_val = running_min_val + self.averaging_constant * (
134
+ min_val - running_min_val
135
+ )
136
+ updated_max_val = running_max_val + self.averaging_constant * (
137
+ max_val - running_max_val
138
+ )
139
+
140
+ tensor_id = tensor_id or "default"
141
+ self.min_val[tensor_id] = updated_min_val
142
+ self.max_val[tensor_id] = updated_max_val
143
+
144
+ return calculate_qparams(
145
+ updated_min_val, updated_max_val, self.quantization_args
146
+ )
147
+
148
+ def get_qparams_along_dim(
149
+ self, observed, dim: int, tensor_id: Optional[Any] = None
150
+ ):
151
+ reduce_dims = tuple(idx for idx in range(observed.ndim) if idx != dim)
152
+ return self.calculate_qparams(
153
+ observed, reduce_dims=reduce_dims, tensor_id=tensor_id
154
+ )
155
+
156
+ def reset(self):
157
+ """
158
+ Reset the state of the observer, including min and maximum values
159
+ """
160
+ super().reset()
161
+ self.min_val = {}
162
+ self.max_val = {}
@@ -16,7 +16,7 @@ from enum import Enum
16
16
  from typing import Any, Dict, Optional
17
17
 
18
18
  import torch
19
- from pydantic import BaseModel, Field, validator
19
+ from pydantic import BaseModel, Field, field_validator, model_validator
20
20
 
21
21
 
22
22
  __all__ = [
@@ -68,6 +68,8 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
68
68
  ranges will be observed with every sample. Defaults to False for static
69
69
  quantization. Note that enabling dynamic quantization will change the default
70
70
  observer to a memoryless one
71
+ :param actorder: whether to apply group quantization in decreasing order of
72
+ activation. Defaults to False for arbitrary ordering
71
73
  """
72
74
 
73
75
  num_bits: int = 8
@@ -77,6 +79,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
77
79
  strategy: Optional[QuantizationStrategy] = None
78
80
  block_structure: Optional[str] = None
79
81
  dynamic: bool = False
82
+ actorder: bool = False
80
83
  observer: str = Field(
81
84
  default="minmax",
82
85
  description=(
@@ -98,40 +101,65 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
98
101
  """
99
102
  from compressed_tensors.quantization.observers.base import Observer
100
103
 
101
- if self.observer == "minmax" and self.dynamic:
104
+ if self.dynamic:
102
105
  # override defualt observer for dynamic, you never want minmax which
103
106
  # keeps state across samples for dynamic
104
107
  self.observer = "memoryless"
105
108
 
106
109
  return Observer.load_from_registry(self.observer, quantization_args=self)
107
110
 
108
- @validator("strategy", pre=True, always=True)
109
- def validate_strategy(cls, value, values):
110
- group_size = values.get("group_size")
111
+ @field_validator("group_size", mode="before")
112
+ def validate_group(cls, value) -> int:
113
+ if value is None:
114
+ return value
111
115
 
112
- # use group_size to determinine strategy if not given explicity
113
- if group_size is not None and value is None:
114
- if group_size > 0:
115
- return QuantizationStrategy.GROUP
116
+ if value < -1:
117
+ raise ValueError(
118
+ f"Invalid group size {value}. Use group_size > 0 for "
119
+ "strategy='group' and group_size = -1 for 'channel'"
120
+ )
116
121
 
117
- elif group_size == -1:
118
- return QuantizationStrategy.CHANNEL
122
+ return value
123
+
124
+ @model_validator(mode="before")
125
+ def validate_strategy(values) -> Dict[str, Any]:
126
+ model_fields = QuantizationArgs.model_fields
127
+ strategy = values.get("strategy", model_fields["strategy"].default)
128
+ group_size = values.get("group_size", model_fields["group_size"].default)
129
+ actorder = values.get("actorder", model_fields["actorder"].default)
119
130
 
131
+ if strategy is not None:
132
+ strategy = QuantizationStrategy(strategy.lower())
133
+
134
+ else:
135
+ # use group_size to determinine strategy if not given explicity
136
+ if group_size is None:
137
+ strategy = QuantizationStrategy.TENSOR
138
+ elif group_size > 0:
139
+ strategy = QuantizationStrategy.GROUP
140
+ elif group_size == -1:
141
+ strategy = QuantizationStrategy.CHANNEL
120
142
  else:
121
143
  raise ValueError(
122
- f"group_size={group_size} with strategy {value} is invald. "
123
- "group_size > 0 for strategy='group' and "
124
- "group_size = -1 for 'channel'"
144
+ f"Invalid group size {group_size}. Use group_size > 0 for "
145
+ "strategy='group' and group_size = -1 for 'channel'"
125
146
  )
126
147
 
127
- if value == QuantizationStrategy.GROUP:
128
- if group_size is None:
129
- raise ValueError(f"strategy {value} requires group_size to be set.")
148
+ if strategy == QuantizationStrategy.GROUP:
149
+ if group_size is None or group_size <= 0:
150
+ raise ValueError(
151
+ f"strategy {strategy} requires group_size to be "
152
+ "set to a positive value"
153
+ )
130
154
 
131
- if value is None:
132
- return QuantizationStrategy.TENSOR
155
+ if actorder and strategy != QuantizationStrategy.GROUP:
156
+ raise ValueError(
157
+ "Group quantization must be specified in order to apply "
158
+ "activation ordering"
159
+ )
133
160
 
134
- return value
161
+ values["strategy"] = strategy
162
+ return values
135
163
 
136
164
  def pytorch_dtype(self) -> torch.dtype:
137
165
  if self.type == QuantizationType.FLOAT:
@@ -16,5 +16,6 @@
16
16
  from .helpers import *
17
17
  from .offload import *
18
18
  from .permutations_24 import *
19
+ from .permute import *
19
20
  from .safetensors_load import *
20
21
  from .semi_structured_conversions import *
@@ -22,6 +22,7 @@ __all__ = [
22
22
  "infer_compressor_from_model_config",
23
23
  "fix_fsdp_module_name",
24
24
  "tensor_follows_mask_structure",
25
+ "replace_module",
25
26
  ]
26
27
 
27
28
  FSDP_WRAPPER_NAME = "_fsdp_wrapped_module"
@@ -90,3 +91,15 @@ def tensor_follows_mask_structure(tensor, mask: str = "2:4") -> bool:
90
91
  raise ValueError()
91
92
 
92
93
  return True
94
+
95
+
96
+ def replace_module(model: torch.nn.Module, name: str, new_module: torch.nn.Module):
97
+ if "." in name:
98
+ parent_name = name.rsplit(".", 1)[0]
99
+ child_name = name[len(parent_name) + 1 :]
100
+ parent = model.get_submodule(parent_name)
101
+ else:
102
+ parent_name = ""
103
+ parent = model
104
+ child_name = name
105
+ setattr(parent, child_name, new_module)
@@ -89,8 +89,11 @@ def update_parameter_data(
89
89
 
90
90
  :param module: layer containing the parameter to update
91
91
  :param new_param_data: tensor to update parameter with
92
- :param param_name:
92
+ :param param_name: name of layer parameter to update
93
93
  """
94
+ if not hasattr(module, param_name):
95
+ return
96
+
94
97
  device = next(module.parameters()).device
95
98
 
96
99
  offloaded = False
@@ -99,6 +102,9 @@ def update_parameter_data(
99
102
  offloaded = True
100
103
 
101
104
  parameter = getattr(module, param_name, None)
105
+ if parameter is None:
106
+ raise ValueError("Attempted to update uninitialized parameter")
107
+
102
108
  dtype = parameter.dtype
103
109
  parameter.data = new_param_data.to(device).to(dtype)
104
110
 
@@ -0,0 +1,70 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Set, Tuple
16
+
17
+ import torch
18
+
19
+
20
+ __all__ = ["safe_permute"]
21
+
22
+
23
+ # these datatypes are missing implementations required for standard permutation
24
+ _EXPERIMENTAL_DTYPES: Set[Tuple[torch.dtype, torch.device]] = set()
25
+
26
+
27
+ def safe_permute(value: torch.Tensor, perm: torch.Tensor, dim: int = 0) -> torch.Tensor:
28
+ """
29
+ Perform out-of-place permutation without using torch.Tensor.index_put_,
30
+ whose implementation is missing for datatypes such as `torch.float8_e4m3fn`
31
+
32
+ :param value: tensor to permute
33
+ :param perm: permutation map
34
+ :param dim: dimension along which to apply permutation
35
+ :return: permuted value
36
+ """
37
+ dtype_tuple = (value.dtype, value.device)
38
+
39
+ if dtype_tuple in _EXPERIMENTAL_DTYPES:
40
+ return _fallback_permute(value, perm, dim)
41
+
42
+ try:
43
+ return value[tuple([slice(None)] * dim + [perm])]
44
+ except RuntimeError:
45
+ # Mark dtype as experimental if advanced indexing fails
46
+ _EXPERIMENTAL_DTYPES.add(dtype_tuple)
47
+ return _fallback_permute(value, perm, dim)
48
+
49
+
50
+ def _fallback_permute(
51
+ value: torch.Tensor, perm: torch.Tensor, dim: int
52
+ ) -> torch.Tensor:
53
+ """
54
+ Fallback permutation method for experimental dtypes.
55
+
56
+ :param value: tensor to permute
57
+ :param perm: permutation map
58
+ :param dim: dimension along which to apply permutation
59
+ :return: permuted value
60
+ """
61
+ value_ret = value.clone() # cannot use zeros_like b/c of missing impl.
62
+ orig_slices = [slice(None)] * (dim + 1)
63
+ perm_slices = [slice(None)] * (dim + 1)
64
+
65
+ for index, perm_index in enumerate(perm):
66
+ orig_slices[dim] = index
67
+ perm_slices[dim] = perm_index
68
+ value_ret[tuple(orig_slices)] = value[tuple(perm_slices)]
69
+
70
+ return value_ret
@@ -234,5 +234,7 @@ def is_quantization_param(name: str) -> bool:
234
234
  return True
235
235
  if name.endswith("zero_point"):
236
236
  return True
237
+ if name.endswith("g_idx"):
238
+ return True
237
239
 
238
240
  return False
@@ -28,6 +28,7 @@ __all__ = [
28
28
  "mask_creator",
29
29
  ]
30
30
 
31
+
31
32
  # This is PyTorch implementation of main part of reorder_meta()
32
33
  # function, from tools/util/include/cutlass/util/host_reorder.h file
33
34
  # of CUTLASS source tree. Furthermore, CUTLASS template for sparse
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: compressed-tensors-nightly
3
- Version: 0.5.0.20240814
3
+ Version: 0.5.0.20240830
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -10,8 +10,9 @@ Description-Content-Type: text/markdown
10
10
  License-File: LICENSE
11
11
  Requires-Dist: torch>=1.7.0
12
12
  Requires-Dist: transformers
13
- Requires-Dist: accelerate
14
13
  Requires-Dist: pydantic>=2.0
14
+ Provides-Extra: accelerate
15
+ Requires-Dist: accelerate; extra == "accelerate"
15
16
  Provides-Extra: dev
16
17
  Requires-Dist: black==22.12.0; extra == "dev"
17
18
  Requires-Dist: isort==5.8.0; extra == "dev"
@@ -0,0 +1,52 @@
1
+ compressed_tensors/__init__.py,sha256=SV1csvHUVCd8kHXz6UDZim1HZ_fAVG3vfk-j_4Bb6hY,789
2
+ compressed_tensors/base.py,sha256=Mq4mfVQcJhNpha-BXzpOfpmFIdl01o09BJE7D2oQ_00,796
3
+ compressed_tensors/version.py,sha256=DdMT4o5D6_t26gTuvhF1Q9HPeXY6vV5g7XMprWuHLdI,1586
4
+ compressed_tensors/compressors/__init__.py,sha256=wmX4VnkUTS63xBwK5-6w8FP78bNZpcdcqvf2KOEC5E4,1133
5
+ compressed_tensors/compressors/base.py,sha256=4BO07h28Epbl2ED43lORnPGmBZ3pMdaoLYym_LJTpPQ,9846
6
+ compressed_tensors/compressors/dense.py,sha256=xcWECjcRY4INN6jC7vHx5wvUX3NmnKlxA9SVE1A6m2Q,1267
7
+ compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24SCfhzZ-RusXfw,5403
8
+ compressed_tensors/compressors/marlin_24.py,sha256=e7fGUyZbjUpA5VUMCPxqcYPGNiwoDKupHJaXWCoVKRw,9410
9
+ compressed_tensors/compressors/model_compressor.py,sha256=Yv2V8Ey6AFDg2Tmvwc7-E_AnMFkeIy_HVu62ct650AI,16507
10
+ compressed_tensors/compressors/naive_quantized.py,sha256=z3h3ca5xKCN69mahutxcbzdv-OysiaxaM8P-Qum6zUQ,4823
11
+ compressed_tensors/compressors/pack_quantized.py,sha256=27RVmJ2wg2dvCoawj407HSmKT3VPGJ6ujAMHlT26WlI,7571
12
+ compressed_tensors/compressors/sparse_bitmask.py,sha256=kiDwBlFV0sJGLcIdDYxIiuF64ccgwDfqq1hWRQThYDc,8647
13
+ compressed_tensors/config/__init__.py,sha256=ZBqWn3r6ku1qfmlHHYp0mQueY0i7Pwhr9rbQk9dDlMc,704
14
+ compressed_tensors/config/base.py,sha256=caSZ7xZ_kgcHRMXZ5hM1i6TKbgY__CkiSjZ93imHZQ0,1562
15
+ compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74jNbjks,1317
16
+ compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5ynVAUeiiYpS1Gt8,1308
17
+ compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
18
+ compressed_tensors/linear/compressed_linear.py,sha256=G0gEFfxLAUsgRcnfSV-PKz1ZBNTVokOauOoup7SE1mw,3210
19
+ compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
20
+ compressed_tensors/quantization/quant_args.py,sha256=wSC2ve1P-XRwZUpqEaqvQpj1Xe0EGgmmPEjPk9YEnyg,6797
21
+ compressed_tensors/quantization/quant_config.py,sha256=NpVu8YJ4Xw2pIQW_PGaNaml8kx1bUnxkvb0jBYWbKdE,9971
22
+ compressed_tensors/quantization/quant_scheme.py,sha256=_RKOFJI0T5xJVBLX63UeYkSY4EFAecsBnqzUIVBjeU0,6014
23
+ compressed_tensors/quantization/lifecycle/__init__.py,sha256=MXE2E7GfIfRRfhrdGy2Og3AZOz5N59B0ZGFcsD89y6c,821
24
+ compressed_tensors/quantization/lifecycle/apply.py,sha256=uftWFunr_CpCZM_qWfo2O1USXKB2qSYD1pBJsO8BuCU,15285
25
+ compressed_tensors/quantization/lifecycle/calibration.py,sha256=PlS_EqCOPqJD3QKuLPXO9AOtDzXtQWvEBTynFv-FFVw,2698
26
+ compressed_tensors/quantization/lifecycle/compressed.py,sha256=laNDwvhk4S925qWTPHCufo4uDdMo24NDV1qhsAkf5Iw,2225
27
+ compressed_tensors/quantization/lifecycle/forward.py,sha256=fZMSrUXX2NnkQiappEpT5SO-6JxbX5wiw9hyjfKNIZo,13538
28
+ compressed_tensors/quantization/lifecycle/frozen.py,sha256=h1XYt89MouBTf3jTYLG_6OdFxIu5q2N8tPjsy6J4E6Y,1726
29
+ compressed_tensors/quantization/lifecycle/helpers.py,sha256=TmLY_G5VP_Fg2Ywio_dxoHRTxOKZdT7_aG5S9WtD4zI,2424
30
+ compressed_tensors/quantization/lifecycle/initialize.py,sha256=r8GNYIUYVHJ-539mHKnhhGysCluaOG6VieH6CQD4eeo,7112
31
+ compressed_tensors/quantization/observers/__init__.py,sha256=4Sa7rqi5RB_S5bPO8KmncETiqDsoMBhwP37arlQym8s,764
32
+ compressed_tensors/quantization/observers/base.py,sha256=5ovQicWPYHjIxr6-EkQ4lgOX0PpI9g23iSzKpxjM1Zg,8420
33
+ compressed_tensors/quantization/observers/helpers.py,sha256=s_A23Qa_BLfOdHJCN5bm-qPWkhjjj_RIVrhSp1Y9Dtk,4211
34
+ compressed_tensors/quantization/observers/memoryless.py,sha256=jH_c6K3gxf4W3VNXQ7tbnP-J_86QTrEfjBn6Kh1C-H8,2165
35
+ compressed_tensors/quantization/observers/min_max.py,sha256=sQXqU3z-voxIDfR_9mQzwQUflZj2sASm_G8CYaXntFw,3865
36
+ compressed_tensors/quantization/observers/mse.py,sha256=Aeh-253Vbab1F8cYuBiGNn4OXWJ67wXQ_JVfl3mu2a8,6034
37
+ compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
38
+ compressed_tensors/quantization/utils/helpers.py,sha256=YjXABJQUnelof-z7qcwck6fnrFLh4uMSrOmPiqNp_RY,8591
39
+ compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
40
+ compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85SLG77nml2iA,11890
41
+ compressed_tensors/utils/__init__.py,sha256=gS4gSU2pwcAbsKj-6YMaqhm25udFy6ISYaWBf-myRSM,808
42
+ compressed_tensors/utils/helpers.py,sha256=bh4G8mj_YCRf8Bo2FQ9FkIIZXY8xqqPjckNnVYB0gBA,3557
43
+ compressed_tensors/utils/offload.py,sha256=d9q8LNe8HyF8tOjgjA7QGLD3HRysmNp0d8eBbdqBgIM,4089
44
+ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
45
+ compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
46
+ compressed_tensors/utils/safetensors_load.py,sha256=m08ANVuTBxQdoa6LufDgcNJ7wCLDJolyZljB8VEybAU,8578
47
+ compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
48
+ compressed_tensors_nightly-0.5.0.20240830.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
49
+ compressed_tensors_nightly-0.5.0.20240830.dist-info/METADATA,sha256=t2iwpUePnptstcRQh5B6A7OkdXVIJKUACJ-U6830_VM,6799
50
+ compressed_tensors_nightly-0.5.0.20240830.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
51
+ compressed_tensors_nightly-0.5.0.20240830.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
52
+ compressed_tensors_nightly-0.5.0.20240830.dist-info/RECORD,,
@@ -1,48 +0,0 @@
1
- compressed_tensors/__init__.py,sha256=SV1csvHUVCd8kHXz6UDZim1HZ_fAVG3vfk-j_4Bb6hY,789
2
- compressed_tensors/base.py,sha256=Mq4mfVQcJhNpha-BXzpOfpmFIdl01o09BJE7D2oQ_00,796
3
- compressed_tensors/version.py,sha256=DdMT4o5D6_t26gTuvhF1Q9HPeXY6vV5g7XMprWuHLdI,1586
4
- compressed_tensors/compressors/__init__.py,sha256=wmX4VnkUTS63xBwK5-6w8FP78bNZpcdcqvf2KOEC5E4,1133
5
- compressed_tensors/compressors/base.py,sha256=-rqT2h9G2iwDkwrVj0d0jxxn9h0dccJA1mqOzVEkwGM,2144
6
- compressed_tensors/compressors/dense.py,sha256=xcWECjcRY4INN6jC7vHx5wvUX3NmnKlxA9SVE1A6m2Q,1267
7
- compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24SCfhzZ-RusXfw,5403
8
- compressed_tensors/compressors/marlin_24.py,sha256=e7fGUyZbjUpA5VUMCPxqcYPGNiwoDKupHJaXWCoVKRw,9410
9
- compressed_tensors/compressors/model_compressor.py,sha256=b7jPE4czwP9uulIZML5qUQAvQaQzElwzUGwat7jlpgI,13352
10
- compressed_tensors/compressors/naive_quantized.py,sha256=6_1wuTF96-lw-UzzrsiEX_ipciKiQQJoZ8uotVwtbyQ,5569
11
- compressed_tensors/compressors/pack_quantized.py,sha256=tnhqvkko6fIaTywI2JNvh5lE2xXWKJ_hYShv_s6C9Vk,8506
12
- compressed_tensors/compressors/sparse_bitmask.py,sha256=kiDwBlFV0sJGLcIdDYxIiuF64ccgwDfqq1hWRQThYDc,8647
13
- compressed_tensors/config/__init__.py,sha256=ZBqWn3r6ku1qfmlHHYp0mQueY0i7Pwhr9rbQk9dDlMc,704
14
- compressed_tensors/config/base.py,sha256=caSZ7xZ_kgcHRMXZ5hM1i6TKbgY__CkiSjZ93imHZQ0,1562
15
- compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74jNbjks,1317
16
- compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5ynVAUeiiYpS1Gt8,1308
17
- compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
18
- compressed_tensors/quantization/quant_args.py,sha256=Vc_tWSTcbZZsMJlACpLq4JEPvGx87izc8VEx-mcXjoM,5621
19
- compressed_tensors/quantization/quant_config.py,sha256=NpVu8YJ4Xw2pIQW_PGaNaml8kx1bUnxkvb0jBYWbKdE,9971
20
- compressed_tensors/quantization/quant_scheme.py,sha256=_RKOFJI0T5xJVBLX63UeYkSY4EFAecsBnqzUIVBjeU0,6014
21
- compressed_tensors/quantization/lifecycle/__init__.py,sha256=MXE2E7GfIfRRfhrdGy2Og3AZOz5N59B0ZGFcsD89y6c,821
22
- compressed_tensors/quantization/lifecycle/apply.py,sha256=UGnccJ4QPXU14lRqJkre95A_Nn5jYMAE0mWnsMi26_s,13900
23
- compressed_tensors/quantization/lifecycle/calibration.py,sha256=zE5jtW-e5j8vrLO7FqhX3oUSNhjrg4FsRFiG6vDu7ME,2637
24
- compressed_tensors/quantization/lifecycle/compressed.py,sha256=VreB10xPwgSLQQlTu20UCrFpRS--cA7-lx5s7nrPPrg,2247
25
- compressed_tensors/quantization/lifecycle/forward.py,sha256=6PSXYcf-R1dOY8zsuIWnBaoyARNymYc3-qvV6-L7SlI,12397
26
- compressed_tensors/quantization/lifecycle/frozen.py,sha256=h1XYt89MouBTf3jTYLG_6OdFxIu5q2N8tPjsy6J4E6Y,1726
27
- compressed_tensors/quantization/lifecycle/helpers.py,sha256=xDkM3yVpGVnwAdg2aUOmrlDPaOksi-bavSQ5mMeOQlk,1651
28
- compressed_tensors/quantization/lifecycle/initialize.py,sha256=oCD8pgmHT3lW5J7zdsSN3YzEQIhTfE7M01R5Wb0wpck,5801
29
- compressed_tensors/quantization/observers/__init__.py,sha256=DNH31NQYrIBBcmHsMyFA6whh4pbRsLwuNa6L8AeXaGc,745
30
- compressed_tensors/quantization/observers/base.py,sha256=2WO7N2eyXf1r1gxVidos1bUS5o7pcrpug4gQgHIazrQ,6794
31
- compressed_tensors/quantization/observers/helpers.py,sha256=s_A23Qa_BLfOdHJCN5bm-qPWkhjjj_RIVrhSp1Y9Dtk,4211
32
- compressed_tensors/quantization/observers/memoryless.py,sha256=jH_c6K3gxf4W3VNXQ7tbnP-J_86QTrEfjBn6Kh1C-H8,2165
33
- compressed_tensors/quantization/observers/min_max.py,sha256=UK7zCMzxv9GGn6BflBxdajV20RiWaCY2RHcvZodCP1w,3669
34
- compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
35
- compressed_tensors/quantization/utils/helpers.py,sha256=YjXABJQUnelof-z7qcwck6fnrFLh4uMSrOmPiqNp_RY,8591
36
- compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
37
- compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85SLG77nml2iA,11890
38
- compressed_tensors/utils/__init__.py,sha256=rvbIJlvdKYn4iX7r3KP6peCbU5uyMzgxwhsQstLoMxQ,785
39
- compressed_tensors/utils/helpers.py,sha256=d3yP9ViQ8R3GzMHfohxNlaokzyrRuj2PyjxWAJZmSws,3156
40
- compressed_tensors/utils/offload.py,sha256=qAMwoFT3WEQ9nB_SegE12ob8ghDugddQseE6z4vpZEE,3900
41
- compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
42
- compressed_tensors/utils/safetensors_load.py,sha256=0MheXwx1jeY12PeISppiSIZHs6rmN2YddwPpFb9V67I,8527
43
- compressed_tensors/utils/semi_structured_conversions.py,sha256=g1EZHzdv-ko7ufPX430dp7wE33o6FWJXuSP4zZydCu0,13488
44
- compressed_tensors_nightly-0.5.0.20240814.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
45
- compressed_tensors_nightly-0.5.0.20240814.dist-info/METADATA,sha256=BSOm6u2fWJxLBox3AwpDt9mo-vUBqToHwv3qtSEKPDQ,6749
46
- compressed_tensors_nightly-0.5.0.20240814.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
47
- compressed_tensors_nightly-0.5.0.20240814.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
48
- compressed_tensors_nightly-0.5.0.20240814.dist-info/RECORD,,