compressed-tensors-nightly 0.8.0.20241121__tar.gz → 0.8.0.20241123__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. {compressed-tensors-nightly-0.8.0.20241121/src/compressed_tensors_nightly.egg-info → compressed-tensors-nightly-0.8.0.20241123}/PKG-INFO +1 -1
  2. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +17 -5
  3. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/linear/compressed_linear.py +3 -1
  4. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/quantization/lifecycle/apply.py +2 -1
  5. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/quantization/quant_config.py +2 -2
  6. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/quantization/quant_scheme.py +1 -23
  7. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123/src/compressed_tensors_nightly.egg-info}/PKG-INFO +1 -1
  8. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/LICENSE +0 -0
  9. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/README.md +0 -0
  10. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/pyproject.toml +0 -0
  11. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/setup.cfg +0 -0
  12. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/setup.py +0 -0
  13. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/__init__.py +0 -0
  14. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/base.py +0 -0
  15. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/compressors/__init__.py +0 -0
  16. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/compressors/base.py +0 -0
  17. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/compressors/helpers.py +0 -0
  18. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
  19. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
  20. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
  21. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
  22. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
  23. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
  24. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
  25. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
  26. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
  27. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
  28. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
  29. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/config/__init__.py +0 -0
  30. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/config/base.py +0 -0
  31. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/config/dense.py +0 -0
  32. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  33. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/linear/__init__.py +0 -0
  34. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/quantization/__init__.py +0 -0
  35. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  36. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
  37. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
  38. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
  39. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
  40. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/quantization/quant_args.py +0 -0
  41. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
  42. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
  43. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/registry/__init__.py +0 -0
  44. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/registry/registry.py +0 -0
  45. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/utils/__init__.py +0 -0
  46. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/utils/helpers.py +0 -0
  47. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/utils/offload.py +0 -0
  48. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/utils/permutations_24.py +0 -0
  49. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/utils/permute.py +0 -0
  50. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/utils/safetensors_load.py +0 -0
  51. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
  52. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors/version.py +0 -0
  53. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors_nightly.egg-info/SOURCES.txt +0 -0
  54. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors_nightly.egg-info/dependency_links.txt +0 -0
  55. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors_nightly.egg-info/requires.txt +0 -0
  56. {compressed-tensors-nightly-0.8.0.20241121 → compressed-tensors-nightly-0.8.0.20241123}/src/compressed_tensors_nightly.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: compressed-tensors-nightly
3
- Version: 0.8.0.20241121
3
+ Version: 0.8.0.20241123
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -24,7 +24,6 @@ import compressed_tensors
24
24
  import torch
25
25
  import transformers
26
26
  from compressed_tensors.base import (
27
- COMPRESSION_CONFIG_NAME,
28
27
  COMPRESSION_VERSION_NAME,
29
28
  QUANTIZATION_CONFIG_NAME,
30
29
  QUANTIZATION_METHOD_NAME,
@@ -39,6 +38,7 @@ from compressed_tensors.quantization import (
39
38
  apply_quantization_config,
40
39
  load_pretrained_quantization,
41
40
  )
41
+ from compressed_tensors.quantization.quant_args import QuantizationArgs
42
42
  from compressed_tensors.quantization.utils import (
43
43
  is_module_quantized,
44
44
  iter_named_leaf_modules,
@@ -103,12 +103,14 @@ class ModelCompressor:
103
103
  :return: compressor for the configs, or None if model is not compressed
104
104
  """
105
105
  config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
106
- compression_config = getattr(config, COMPRESSION_CONFIG_NAME, None)
106
+ compression_config = getattr(config, QUANTIZATION_CONFIG_NAME, None)
107
+
107
108
  return cls.from_compression_config(compression_config)
108
109
 
109
110
  @classmethod
110
111
  def from_compression_config(
111
- cls, compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"]
112
+ cls,
113
+ compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"],
112
114
  ):
113
115
  """
114
116
  :param compression_config:
@@ -265,7 +267,11 @@ class ModelCompressor:
265
267
  state_dict = model.state_dict()
266
268
 
267
269
  compressed_state_dict = state_dict
268
- quantized_modules_to_args = map_modules_to_quant_args(model)
270
+
271
+ quantized_modules_to_args: Dict[
272
+ str, QuantizationArgs
273
+ ] = map_modules_to_quant_args(model)
274
+
269
275
  if self.quantization_compressor is not None:
270
276
  compressed_state_dict = self.quantization_compressor.compress(
271
277
  state_dict, names_to_scheme=quantized_modules_to_args
@@ -369,7 +375,13 @@ class ModelCompressor:
369
375
  update_parameter_data(module, data, param_name)
370
376
 
371
377
 
372
- def map_modules_to_quant_args(model: Module) -> Dict:
378
+ def map_modules_to_quant_args(model: Module) -> Dict[str, QuantizationArgs]:
379
+ """
380
+ Given a pytorch model, map out the submodule name (usually linear layers)
381
+ to the QuantizationArgs
382
+
383
+ :param model: pytorch model
384
+ """
373
385
  quantized_modules_to_args = {}
374
386
  for name, submodule in iter_named_leaf_modules(model):
375
387
  if is_module_quantized(submodule):
@@ -12,6 +12,8 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import Dict, Tuple
16
+
15
17
  import torch
16
18
  from compressed_tensors.compressors.base import BaseCompressor
17
19
  from compressed_tensors.quantization import (
@@ -53,7 +55,7 @@ class CompressedLinear(Linear):
53
55
  )
54
56
 
55
57
  # get the shape and dtype of compressed parameters
56
- compression_params = module.compressor.compression_param_info(
58
+ compression_params: Dict[str, Tuple] = module.compressor.compression_param_info(
57
59
  module.weight.shape, quantization_scheme.weights
58
60
  )
59
61
 
@@ -106,7 +106,8 @@ def apply_quantization_config(
106
106
  model: Module, config: Union[QuantizationConfig, None], run_compressed: bool = False
107
107
  ) -> OrderedDict:
108
108
  """
109
- Initializes the model for quantization in-place based on the given config
109
+ Initializes the model for quantization in-place based on the given config.
110
+ Optionally coverts quantizable modules to compressed_linear modules
110
111
 
111
112
  :param model: model to apply quantization config to
112
113
  :param config: quantization config
@@ -132,9 +132,9 @@ class QuantizationConfig(BaseModel):
132
132
  `k_proj` and `v_proj` in their names. If this is not the case
133
133
  and kv_cache_scheme != None, the quantization of kv cache will fail
134
134
  :global_compression_ratio: optional informational config to report the model
135
- compression ratio acheived by the quantization config
135
+ compression ratio acheived by the quantization config
136
136
  :ignore: optional list of layers to ignore from config_groups. Layers in this list
137
- are not quantized even if they match up with a target in config_groups
137
+ are not quantized even if they match up with a target in config_groups
138
138
  """
139
139
 
140
140
  config_groups: Dict[str, Union[QuantizationScheme, List[str]]]
@@ -36,7 +36,7 @@ class QuantizationScheme(BaseModel):
36
36
  of modules should be quantized
37
37
 
38
38
  :param targets: list of modules to apply the QuantizationArgs to, can be layer
39
- names, layer types or a regular expression
39
+ names, layer types or a regular expression, typically ["Linear"]
40
40
  :param weights: quantization config for layer weights
41
41
  :param input_activations: quantization config for layer inputs
42
42
  :param output_activations: quantization config for layer outputs
@@ -47,28 +47,6 @@ class QuantizationScheme(BaseModel):
47
47
  input_activations: Optional[QuantizationArgs] = None
48
48
  output_activations: Optional[QuantizationArgs] = None
49
49
 
50
- @classmethod
51
- def default_scheme(
52
- cls,
53
- targets: Optional[List[str]] = None,
54
- ):
55
-
56
- if targets is None:
57
- # default to quantizing all Linear layers
58
- targets = ["Linear"]
59
-
60
- # by default, activations and weights are left unquantized
61
- weights = None
62
- input_activations = None
63
- output_activations = None
64
-
65
- return cls(
66
- targets=targets,
67
- weights=weights,
68
- input_activations=input_activations,
69
- output_activations=output_activations,
70
- )
71
-
72
50
 
73
51
  """
74
52
  Pre-Set Quantization Scheme Args
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: compressed-tensors-nightly
3
- Version: 0.8.0.20241121
3
+ Version: 0.8.0.20241123
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.