compressed-tensors 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. compressed_tensors/__init__.py +21 -0
  2. compressed_tensors/base.py +16 -0
  3. compressed_tensors/compressors/__init__.py +25 -0
  4. compressed_tensors/compressors/base.py +79 -0
  5. compressed_tensors/compressors/dense.py +34 -0
  6. compressed_tensors/compressors/helpers.py +161 -0
  7. compressed_tensors/compressors/sparse_bitmask.py +238 -0
  8. compressed_tensors/config/__init__.py +18 -0
  9. compressed_tensors/config/base.py +42 -0
  10. compressed_tensors/config/dense.py +36 -0
  11. compressed_tensors/config/sparse_bitmask.py +36 -0
  12. compressed_tensors/quantization/__init__.py +21 -0
  13. compressed_tensors/quantization/lifecycle/__init__.py +22 -0
  14. compressed_tensors/quantization/lifecycle/apply.py +173 -0
  15. compressed_tensors/quantization/lifecycle/calibration.py +51 -0
  16. compressed_tensors/quantization/lifecycle/forward.py +136 -0
  17. compressed_tensors/quantization/lifecycle/frozen.py +46 -0
  18. compressed_tensors/quantization/lifecycle/initialize.py +96 -0
  19. compressed_tensors/quantization/observers/__init__.py +21 -0
  20. compressed_tensors/quantization/observers/base.py +69 -0
  21. compressed_tensors/quantization/observers/helpers.py +53 -0
  22. compressed_tensors/quantization/observers/memoryless.py +48 -0
  23. compressed_tensors/quantization/observers/min_max.py +65 -0
  24. compressed_tensors/quantization/quant_args.py +85 -0
  25. compressed_tensors/quantization/quant_config.py +171 -0
  26. compressed_tensors/quantization/quant_scheme.py +39 -0
  27. compressed_tensors/quantization/utils/__init__.py +16 -0
  28. compressed_tensors/quantization/utils/helpers.py +115 -0
  29. compressed_tensors/registry/__init__.py +17 -0
  30. compressed_tensors/registry/registry.py +360 -0
  31. compressed_tensors/utils/__init__.py +16 -0
  32. compressed_tensors/utils/helpers.py +151 -0
  33. compressed_tensors/utils/safetensors_load.py +237 -0
  34. compressed_tensors-0.3.0.dist-info/METADATA +22 -0
  35. compressed_tensors-0.3.0.dist-info/RECORD +37 -0
  36. compressed_tensors-0.3.0.dist-info/WHEEL +5 -0
  37. compressed_tensors-0.3.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,237 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import json
16
+ import os
17
+ import re
18
+ import struct
19
+ from typing import Dict, List, Optional
20
+
21
+ from safetensors import safe_open
22
+ from torch import Tensor
23
+ from transformers.utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, cached_file
24
+
25
+
26
+ __all__ = [
27
+ "get_safetensors_folder",
28
+ "get_safetensors_header",
29
+ "match_param_name",
30
+ "merge_names",
31
+ "get_weight_mappings",
32
+ "get_nested_weight_mappings",
33
+ "get_quantization_state_dict",
34
+ ]
35
+
36
+
37
+ def get_safetensors_folder(
38
+ pretrained_model_name_or_path: str, cache_dir: Optional[str] = None
39
+ ) -> str:
40
+ """
41
+ Given a Hugging Face stub or a local path, return the folder containing the
42
+ safetensors weight files
43
+
44
+ :param pretrained_model_name_or_path: local path to model or HF stub
45
+ :param cache_dir: optional cache dir to search through, if none is specified the
46
+ model will be searched for in the default TRANSFORMERS_CACHE
47
+ :return: local folder containing model data
48
+ """
49
+ if os.path.exists(pretrained_model_name_or_path):
50
+ # argument is a path to a local folder
51
+ return os.path.abspath(pretrained_model_name_or_path)
52
+
53
+ safetensors_path = cached_file(
54
+ pretrained_model_name_or_path,
55
+ SAFE_WEIGHTS_NAME,
56
+ cache_dir=cache_dir,
57
+ _raise_exceptions_for_missing_entries=False,
58
+ )
59
+ index_path = cached_file(
60
+ pretrained_model_name_or_path,
61
+ SAFE_WEIGHTS_INDEX_NAME,
62
+ cache_dir=cache_dir,
63
+ _raise_exceptions_for_missing_entries=False,
64
+ )
65
+ if safetensors_path is not None:
66
+ # found a single cached safetensors file
67
+ return os.path.split(safetensors_path)[0]
68
+ if index_path is not None:
69
+ # found a cached safetensors weight index file
70
+ return os.path.split(index_path)[0]
71
+
72
+ # model weights could not be found locally or cached from HF Hub
73
+ raise ValueError(
74
+ "Could not locate safetensors weight or index file from "
75
+ f"{pretrained_model_name_or_path}."
76
+ )
77
+
78
+
79
+ def get_safetensors_header(safetensors_path: str) -> Dict[str, str]:
80
+ """
81
+ Extracts the metadata from a safetensors file as JSON
82
+
83
+ :param safetensors_path: path to a safetensors file
84
+ :return: dictionary of metadata extracted from the safetensors file
85
+ """
86
+ with open(safetensors_path, "rb") as f:
87
+ length_of_header = struct.unpack("<Q", f.read(8))[0]
88
+ header_data = f.read(length_of_header)
89
+ header = json.loads(header_data)
90
+
91
+ return header
92
+
93
+
94
+ def match_param_name(full_name: str, param_name: str) -> str:
95
+ """
96
+ Helper function extracting the uncompressed parameterized layer name from a
97
+ compressed name. Assumes the compressed name was merged using merge_names.
98
+
99
+ :param full_name: full name of parameter in compressed model
100
+ :param param_name: compression paramater name
101
+ :return: uncompressed name of the uncompressed parameterized layer
102
+ """
103
+ pattern = r"^(.*)\." + param_name + r"$"
104
+ regex = re.findall(pattern, full_name)
105
+ if len(regex) == 0:
106
+ return None
107
+ return regex[0]
108
+
109
+
110
+ def merge_names(parent_name: str, child_name: str) -> str:
111
+ """
112
+ Helper function for merging an uncompressed parameterized layer name with a
113
+ compression parameter. Names merged with this function can then be parsed by
114
+ match_param_name.
115
+
116
+ :param parent_name: uncompressed parameterized layer name
117
+ :param child_name: compression parameter name
118
+ :return: merged compressed name
119
+ """
120
+ return parent_name + "." + child_name
121
+
122
+
123
+ def get_weight_mappings(path_to_model_or_tensors: str) -> Dict[str, str]:
124
+ """
125
+ Takes a path to a state dict saved in safetensors format and returns a mapping
126
+ from parameterized layer name to file location.
127
+
128
+ {
129
+ layer.weight.bitmask: file_location,
130
+ layer.weight.row_offsets: file_location,
131
+ layer.weight.shape: file_location,
132
+ layer.weight.compressed: file_location
133
+ }
134
+
135
+ This generalizes to cases where the model is split into multiple safetensors files
136
+
137
+ :param path_to_model_or_tensors: path to directory that contains
138
+ safetensors (must contain either a single file or multiple files with an index),
139
+ or a path to a single safetensors file
140
+ :return: mapping of parameterized layer name to file location
141
+ """
142
+
143
+ if os.path.isfile(path_to_model_or_tensors):
144
+ # we have a single safetensors file to read
145
+ header = get_safetensors_header(path_to_model_or_tensors)
146
+ for key in header.keys():
147
+ header[key] = path_to_model_or_tensors
148
+ header.pop("__metadata__", None)
149
+ else:
150
+ # we have a directory with multiple safetensors files
151
+ safetensors_path = os.path.join(path_to_model_or_tensors, SAFE_WEIGHTS_NAME)
152
+ index_path = os.path.join(path_to_model_or_tensors, SAFE_WEIGHTS_INDEX_NAME)
153
+ if os.path.exists(safetensors_path):
154
+ # we have a single safetensors file to read
155
+ header = get_safetensors_header(safetensors_path)
156
+ for key in header.keys():
157
+ header[key] = SAFE_WEIGHTS_NAME
158
+ header.pop("__metadata__", None)
159
+ elif os.path.exists(index_path):
160
+ # we have multiple safetensors file, read from index
161
+ with open(index_path, "r", encoding="utf-8") as f:
162
+ index = json.load(f)
163
+ header = index["weight_map"]
164
+ else:
165
+ raise ValueError(
166
+ "Could not find a safetensors weight "
167
+ f"or index file at {path_to_model_or_tensors}"
168
+ )
169
+
170
+ # convert weight locations to full paths
171
+ for key, value in header.items():
172
+ header[key] = os.path.join(path_to_model_or_tensors, value)
173
+
174
+ return header
175
+
176
+
177
+ def get_nested_weight_mappings(
178
+ model_path: str, params_to_nest: List[str]
179
+ ) -> Dict[str, Dict[str, str]]:
180
+ """
181
+ Takes a path to a state dict saved in safetensors format and returns a nested
182
+ mapping from uncompressed parameterized layer names to the file locations of each
183
+ of the layers compression parameters.
184
+
185
+ layer.weight: {
186
+ bitmask: file_location,
187
+ row_offsets: file_location,
188
+ shape: file_location,
189
+ compressed: file_location
190
+ }
191
+
192
+ This generalizes to cases where the model is split into multiple safetensors files
193
+
194
+ :param model_path: path to safetensors state dict, must contain either a single
195
+ safetensors file or multiple files with an index
196
+ :return: nested mapping of parameterized layer name to file location
197
+ """
198
+ weight_mappings = get_weight_mappings(model_path)
199
+
200
+ nested_weight_mappings = {}
201
+ for key in weight_mappings.keys():
202
+ for param_name in params_to_nest:
203
+ maybe_match = match_param_name(key, param_name)
204
+ if maybe_match is not None:
205
+ dense_param = maybe_match
206
+ if dense_param not in nested_weight_mappings:
207
+ nested_weight_mappings[dense_param] = {}
208
+ nested_weight_mappings[dense_param][param_name] = weight_mappings[key]
209
+
210
+ return nested_weight_mappings
211
+
212
+
213
+ def get_quantization_state_dict(model_path: str) -> Dict[str, Tensor]:
214
+ weight_mappings = get_weight_mappings(model_path)
215
+ state_dict = {}
216
+ for weight_name, safe_path in weight_mappings.items():
217
+ if not _is_quantization_weight(weight_name):
218
+ continue
219
+ with safe_open(safe_path, framework="pt", device="cpu") as f:
220
+ state_dict[weight_name] = f.get_tensor(weight_name)
221
+
222
+ return state_dict
223
+
224
+
225
+ def _is_quantization_weight(name: str) -> bool:
226
+ """
227
+ Checks is a parameter name is associated with a quantization parameter
228
+
229
+ :param name: parameter name to check
230
+ :return: True if parameter name is a quantization parameter, else False
231
+ """
232
+ if name.endswith("_scale"):
233
+ return True
234
+ if name.endswith("zero_point"):
235
+ return True
236
+
237
+ return False
@@ -0,0 +1,22 @@
1
+ Metadata-Version: 2.1
2
+ Name: compressed-tensors
3
+ Version: 0.3.0
4
+ Summary: Library for utilization of compressed safetensors of neural network models
5
+ Home-page: UNKNOWN
6
+ Author: Neuralmagic, Inc.
7
+ Author-email: support@neuralmagic.com
8
+ License: UNKNOWN
9
+ Platform: UNKNOWN
10
+ Requires-Dist: pydantic <2.7
11
+ Requires-Dist: torch >=1.7.0
12
+ Requires-Dist: transformers <=4.40
13
+ Provides-Extra: dev
14
+ Requires-Dist: black ==22.12.0 ; extra == 'dev'
15
+ Requires-Dist: flake8 >=3.8.3 ; extra == 'dev'
16
+ Requires-Dist: isort ==5.8.0 ; extra == 'dev'
17
+ Requires-Dist: nbconvert >=7.16.3 ; extra == 'dev'
18
+ Requires-Dist: pytest >=6.0.0 ; extra == 'dev'
19
+ Requires-Dist: wheel >=0.36.2 ; extra == 'dev'
20
+
21
+ UNKNOWN
22
+
@@ -0,0 +1,37 @@
1
+ compressed_tensors/__init__.py,sha256=SV1csvHUVCd8kHXz6UDZim1HZ_fAVG3vfk-j_4Bb6hY,789
2
+ compressed_tensors/base.py,sha256=8zbgK87LpHkKoSknM55svXCT4E4dLLjPijwF9HfzmsQ,717
3
+ compressed_tensors/compressors/__init__.py,sha256=3ZHKWSIWTjMx8XXgLtoP9JaVaCTvRecguLZTxLAAkKk,898
4
+ compressed_tensors/compressors/base.py,sha256=F1smyJ6x2Sfq43tuP0QE9wZuhVqnewq-XUFPMtdU9yQ,2936
5
+ compressed_tensors/compressors/dense.py,sha256=_VTusI3XjaY-zOdB_d7z4zOgPTJi9TJZZHF13g9ulS4,1263
6
+ compressed_tensors/compressors/helpers.py,sha256=kSseqbwnu3JHZUKH8u4kQo5bmd87FvCcmWe0u2ikysA,6421
7
+ compressed_tensors/compressors/sparse_bitmask.py,sha256=PYAK_Hcy2T57zlbpwl1FYkslluIr2x-d0Rh048YAtpI,8639
8
+ compressed_tensors/config/__init__.py,sha256=ZBqWn3r6ku1qfmlHHYp0mQueY0i7Pwhr9rbQk9dDlMc,704
9
+ compressed_tensors/config/base.py,sha256=IP-3Y416w-811WozDzKHycIBXjdlG4Ddy7vpbwhOPD8,1373
10
+ compressed_tensors/config/dense.py,sha256=xtkri7DkP7USu44FnSoTgTSqdGegCBtjRf3DfblSEL0,1311
11
+ compressed_tensors/config/sparse_bitmask.py,sha256=y8fmQaOoGjIiI4FR6BJjfIqisAcqNQ_zjKyjT75bXwY,1284
12
+ compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
13
+ compressed_tensors/quantization/quant_args.py,sha256=dxSrq0_88ORQXcyIMYqoMZJvYEjnqdYl37f7lgZQqhw,2742
14
+ compressed_tensors/quantization/quant_config.py,sha256=DWx8ae3gDlw99zAn3MUN9I4qeksbbmITmOXHRynqPB8,6650
15
+ compressed_tensors/quantization/quant_scheme.py,sha256=X3oqmZPiIKtX5tEKKUj-0N6hB68NeiU2b1GcQEQPadQ,1480
16
+ compressed_tensors/quantization/lifecycle/__init__.py,sha256=fM9XBtPgJX6z54PTm3Sd0SpK5od95ibwaSf2FFR8DqE,772
17
+ compressed_tensors/quantization/lifecycle/apply.py,sha256=WXUL3q1g0s244k0wuqGYZPXTXiscdyrp7RScN2j_KGA,6651
18
+ compressed_tensors/quantization/lifecycle/calibration.py,sha256=mLns4jlaWmBwOW8Jtlm5bMX-JET1AiZYUBO7qa-XuxI,1776
19
+ compressed_tensors/quantization/lifecycle/forward.py,sha256=hnjk7pocZLDhLdMx237FKayYdvsdKbYSjTmSN5xbQO8,4599
20
+ compressed_tensors/quantization/lifecycle/frozen.py,sha256=NHNmlDIaxurifqeI_qZC8xa4BstQsBNdOCXJjRzAfNU,1596
21
+ compressed_tensors/quantization/lifecycle/initialize.py,sha256=8pifqZQSgVqWYI_Qtv6QfBICPbCTFHy48OWPeQsxEHQ,3578
22
+ compressed_tensors/quantization/observers/__init__.py,sha256=DNH31NQYrIBBcmHsMyFA6whh4pbRsLwuNa6L8AeXaGc,745
23
+ compressed_tensors/quantization/observers/base.py,sha256=O76dAxkin7bB602e9kjmxc84p71-PxBtjIq5L69xplI,2786
24
+ compressed_tensors/quantization/observers/helpers.py,sha256=SxvOf9zwZ9NDRC3E4Xm7z3RqHcbcPtCABLKX9GnGGHM,2109
25
+ compressed_tensors/quantization/observers/memoryless.py,sha256=3f6bUlcf5mzOHPkTRhoQ7Zd8xu_pUmj8e3Y85fGysSU,1848
26
+ compressed_tensors/quantization/observers/min_max.py,sha256=uAcZd5aY6WKM-KumTb2ybX28s8iKGVy6Nrje5Sddqew,2439
27
+ compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
28
+ compressed_tensors/quantization/utils/helpers.py,sha256=N_wYfrPcFr__Q1mn6mHoNUTclwpTW8P5PDHkR7GvXWo,3694
29
+ compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
30
+ compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85SLG77nml2iA,11890
31
+ compressed_tensors/utils/__init__.py,sha256=5DrYjoZbaEvSkJcC-GRSbM_RBHVF4tG9gMd3zsJnjLw,665
32
+ compressed_tensors/utils/helpers.py,sha256=wLgiPrk7Vn29AijOGQGk3UnXItRd1jpROS6FxHoC4VQ,5530
33
+ compressed_tensors/utils/safetensors_load.py,sha256=wo9UirGrGlenBqZeqotvpCT7D5MEdjCo2J3HeRaIFoU,8502
34
+ compressed_tensors-0.3.0.dist-info/METADATA,sha256=REuv9Fm1mYXww7d8VOWwPWYkAhImzn_PEhiPn_V5Hdo,673
35
+ compressed_tensors-0.3.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
36
+ compressed_tensors-0.3.0.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
37
+ compressed_tensors-0.3.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: bdist_wheel (0.43.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ compressed_tensors