compressed-tensors 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors/__init__.py +21 -0
- compressed_tensors/base.py +16 -0
- compressed_tensors/compressors/__init__.py +25 -0
- compressed_tensors/compressors/base.py +79 -0
- compressed_tensors/compressors/dense.py +34 -0
- compressed_tensors/compressors/helpers.py +161 -0
- compressed_tensors/compressors/sparse_bitmask.py +238 -0
- compressed_tensors/config/__init__.py +18 -0
- compressed_tensors/config/base.py +42 -0
- compressed_tensors/config/dense.py +36 -0
- compressed_tensors/config/sparse_bitmask.py +36 -0
- compressed_tensors/quantization/__init__.py +21 -0
- compressed_tensors/quantization/lifecycle/__init__.py +22 -0
- compressed_tensors/quantization/lifecycle/apply.py +173 -0
- compressed_tensors/quantization/lifecycle/calibration.py +51 -0
- compressed_tensors/quantization/lifecycle/forward.py +136 -0
- compressed_tensors/quantization/lifecycle/frozen.py +46 -0
- compressed_tensors/quantization/lifecycle/initialize.py +96 -0
- compressed_tensors/quantization/observers/__init__.py +21 -0
- compressed_tensors/quantization/observers/base.py +69 -0
- compressed_tensors/quantization/observers/helpers.py +53 -0
- compressed_tensors/quantization/observers/memoryless.py +48 -0
- compressed_tensors/quantization/observers/min_max.py +65 -0
- compressed_tensors/quantization/quant_args.py +85 -0
- compressed_tensors/quantization/quant_config.py +171 -0
- compressed_tensors/quantization/quant_scheme.py +39 -0
- compressed_tensors/quantization/utils/__init__.py +16 -0
- compressed_tensors/quantization/utils/helpers.py +115 -0
- compressed_tensors/registry/__init__.py +17 -0
- compressed_tensors/registry/registry.py +360 -0
- compressed_tensors/utils/__init__.py +16 -0
- compressed_tensors/utils/helpers.py +151 -0
- compressed_tensors/utils/safetensors_load.py +237 -0
- compressed_tensors-0.3.0.dist-info/METADATA +22 -0
- compressed_tensors-0.3.0.dist-info/RECORD +37 -0
- compressed_tensors-0.3.0.dist-info/WHEEL +5 -0
- compressed_tensors-0.3.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,237 @@
|
|
1
|
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing,
|
10
|
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
import json
|
16
|
+
import os
|
17
|
+
import re
|
18
|
+
import struct
|
19
|
+
from typing import Dict, List, Optional
|
20
|
+
|
21
|
+
from safetensors import safe_open
|
22
|
+
from torch import Tensor
|
23
|
+
from transformers.utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, cached_file
|
24
|
+
|
25
|
+
|
26
|
+
__all__ = [
|
27
|
+
"get_safetensors_folder",
|
28
|
+
"get_safetensors_header",
|
29
|
+
"match_param_name",
|
30
|
+
"merge_names",
|
31
|
+
"get_weight_mappings",
|
32
|
+
"get_nested_weight_mappings",
|
33
|
+
"get_quantization_state_dict",
|
34
|
+
]
|
35
|
+
|
36
|
+
|
37
|
+
def get_safetensors_folder(
|
38
|
+
pretrained_model_name_or_path: str, cache_dir: Optional[str] = None
|
39
|
+
) -> str:
|
40
|
+
"""
|
41
|
+
Given a Hugging Face stub or a local path, return the folder containing the
|
42
|
+
safetensors weight files
|
43
|
+
|
44
|
+
:param pretrained_model_name_or_path: local path to model or HF stub
|
45
|
+
:param cache_dir: optional cache dir to search through, if none is specified the
|
46
|
+
model will be searched for in the default TRANSFORMERS_CACHE
|
47
|
+
:return: local folder containing model data
|
48
|
+
"""
|
49
|
+
if os.path.exists(pretrained_model_name_or_path):
|
50
|
+
# argument is a path to a local folder
|
51
|
+
return os.path.abspath(pretrained_model_name_or_path)
|
52
|
+
|
53
|
+
safetensors_path = cached_file(
|
54
|
+
pretrained_model_name_or_path,
|
55
|
+
SAFE_WEIGHTS_NAME,
|
56
|
+
cache_dir=cache_dir,
|
57
|
+
_raise_exceptions_for_missing_entries=False,
|
58
|
+
)
|
59
|
+
index_path = cached_file(
|
60
|
+
pretrained_model_name_or_path,
|
61
|
+
SAFE_WEIGHTS_INDEX_NAME,
|
62
|
+
cache_dir=cache_dir,
|
63
|
+
_raise_exceptions_for_missing_entries=False,
|
64
|
+
)
|
65
|
+
if safetensors_path is not None:
|
66
|
+
# found a single cached safetensors file
|
67
|
+
return os.path.split(safetensors_path)[0]
|
68
|
+
if index_path is not None:
|
69
|
+
# found a cached safetensors weight index file
|
70
|
+
return os.path.split(index_path)[0]
|
71
|
+
|
72
|
+
# model weights could not be found locally or cached from HF Hub
|
73
|
+
raise ValueError(
|
74
|
+
"Could not locate safetensors weight or index file from "
|
75
|
+
f"{pretrained_model_name_or_path}."
|
76
|
+
)
|
77
|
+
|
78
|
+
|
79
|
+
def get_safetensors_header(safetensors_path: str) -> Dict[str, str]:
|
80
|
+
"""
|
81
|
+
Extracts the metadata from a safetensors file as JSON
|
82
|
+
|
83
|
+
:param safetensors_path: path to a safetensors file
|
84
|
+
:return: dictionary of metadata extracted from the safetensors file
|
85
|
+
"""
|
86
|
+
with open(safetensors_path, "rb") as f:
|
87
|
+
length_of_header = struct.unpack("<Q", f.read(8))[0]
|
88
|
+
header_data = f.read(length_of_header)
|
89
|
+
header = json.loads(header_data)
|
90
|
+
|
91
|
+
return header
|
92
|
+
|
93
|
+
|
94
|
+
def match_param_name(full_name: str, param_name: str) -> str:
|
95
|
+
"""
|
96
|
+
Helper function extracting the uncompressed parameterized layer name from a
|
97
|
+
compressed name. Assumes the compressed name was merged using merge_names.
|
98
|
+
|
99
|
+
:param full_name: full name of parameter in compressed model
|
100
|
+
:param param_name: compression paramater name
|
101
|
+
:return: uncompressed name of the uncompressed parameterized layer
|
102
|
+
"""
|
103
|
+
pattern = r"^(.*)\." + param_name + r"$"
|
104
|
+
regex = re.findall(pattern, full_name)
|
105
|
+
if len(regex) == 0:
|
106
|
+
return None
|
107
|
+
return regex[0]
|
108
|
+
|
109
|
+
|
110
|
+
def merge_names(parent_name: str, child_name: str) -> str:
|
111
|
+
"""
|
112
|
+
Helper function for merging an uncompressed parameterized layer name with a
|
113
|
+
compression parameter. Names merged with this function can then be parsed by
|
114
|
+
match_param_name.
|
115
|
+
|
116
|
+
:param parent_name: uncompressed parameterized layer name
|
117
|
+
:param child_name: compression parameter name
|
118
|
+
:return: merged compressed name
|
119
|
+
"""
|
120
|
+
return parent_name + "." + child_name
|
121
|
+
|
122
|
+
|
123
|
+
def get_weight_mappings(path_to_model_or_tensors: str) -> Dict[str, str]:
|
124
|
+
"""
|
125
|
+
Takes a path to a state dict saved in safetensors format and returns a mapping
|
126
|
+
from parameterized layer name to file location.
|
127
|
+
|
128
|
+
{
|
129
|
+
layer.weight.bitmask: file_location,
|
130
|
+
layer.weight.row_offsets: file_location,
|
131
|
+
layer.weight.shape: file_location,
|
132
|
+
layer.weight.compressed: file_location
|
133
|
+
}
|
134
|
+
|
135
|
+
This generalizes to cases where the model is split into multiple safetensors files
|
136
|
+
|
137
|
+
:param path_to_model_or_tensors: path to directory that contains
|
138
|
+
safetensors (must contain either a single file or multiple files with an index),
|
139
|
+
or a path to a single safetensors file
|
140
|
+
:return: mapping of parameterized layer name to file location
|
141
|
+
"""
|
142
|
+
|
143
|
+
if os.path.isfile(path_to_model_or_tensors):
|
144
|
+
# we have a single safetensors file to read
|
145
|
+
header = get_safetensors_header(path_to_model_or_tensors)
|
146
|
+
for key in header.keys():
|
147
|
+
header[key] = path_to_model_or_tensors
|
148
|
+
header.pop("__metadata__", None)
|
149
|
+
else:
|
150
|
+
# we have a directory with multiple safetensors files
|
151
|
+
safetensors_path = os.path.join(path_to_model_or_tensors, SAFE_WEIGHTS_NAME)
|
152
|
+
index_path = os.path.join(path_to_model_or_tensors, SAFE_WEIGHTS_INDEX_NAME)
|
153
|
+
if os.path.exists(safetensors_path):
|
154
|
+
# we have a single safetensors file to read
|
155
|
+
header = get_safetensors_header(safetensors_path)
|
156
|
+
for key in header.keys():
|
157
|
+
header[key] = SAFE_WEIGHTS_NAME
|
158
|
+
header.pop("__metadata__", None)
|
159
|
+
elif os.path.exists(index_path):
|
160
|
+
# we have multiple safetensors file, read from index
|
161
|
+
with open(index_path, "r", encoding="utf-8") as f:
|
162
|
+
index = json.load(f)
|
163
|
+
header = index["weight_map"]
|
164
|
+
else:
|
165
|
+
raise ValueError(
|
166
|
+
"Could not find a safetensors weight "
|
167
|
+
f"or index file at {path_to_model_or_tensors}"
|
168
|
+
)
|
169
|
+
|
170
|
+
# convert weight locations to full paths
|
171
|
+
for key, value in header.items():
|
172
|
+
header[key] = os.path.join(path_to_model_or_tensors, value)
|
173
|
+
|
174
|
+
return header
|
175
|
+
|
176
|
+
|
177
|
+
def get_nested_weight_mappings(
|
178
|
+
model_path: str, params_to_nest: List[str]
|
179
|
+
) -> Dict[str, Dict[str, str]]:
|
180
|
+
"""
|
181
|
+
Takes a path to a state dict saved in safetensors format and returns a nested
|
182
|
+
mapping from uncompressed parameterized layer names to the file locations of each
|
183
|
+
of the layers compression parameters.
|
184
|
+
|
185
|
+
layer.weight: {
|
186
|
+
bitmask: file_location,
|
187
|
+
row_offsets: file_location,
|
188
|
+
shape: file_location,
|
189
|
+
compressed: file_location
|
190
|
+
}
|
191
|
+
|
192
|
+
This generalizes to cases where the model is split into multiple safetensors files
|
193
|
+
|
194
|
+
:param model_path: path to safetensors state dict, must contain either a single
|
195
|
+
safetensors file or multiple files with an index
|
196
|
+
:return: nested mapping of parameterized layer name to file location
|
197
|
+
"""
|
198
|
+
weight_mappings = get_weight_mappings(model_path)
|
199
|
+
|
200
|
+
nested_weight_mappings = {}
|
201
|
+
for key in weight_mappings.keys():
|
202
|
+
for param_name in params_to_nest:
|
203
|
+
maybe_match = match_param_name(key, param_name)
|
204
|
+
if maybe_match is not None:
|
205
|
+
dense_param = maybe_match
|
206
|
+
if dense_param not in nested_weight_mappings:
|
207
|
+
nested_weight_mappings[dense_param] = {}
|
208
|
+
nested_weight_mappings[dense_param][param_name] = weight_mappings[key]
|
209
|
+
|
210
|
+
return nested_weight_mappings
|
211
|
+
|
212
|
+
|
213
|
+
def get_quantization_state_dict(model_path: str) -> Dict[str, Tensor]:
|
214
|
+
weight_mappings = get_weight_mappings(model_path)
|
215
|
+
state_dict = {}
|
216
|
+
for weight_name, safe_path in weight_mappings.items():
|
217
|
+
if not _is_quantization_weight(weight_name):
|
218
|
+
continue
|
219
|
+
with safe_open(safe_path, framework="pt", device="cpu") as f:
|
220
|
+
state_dict[weight_name] = f.get_tensor(weight_name)
|
221
|
+
|
222
|
+
return state_dict
|
223
|
+
|
224
|
+
|
225
|
+
def _is_quantization_weight(name: str) -> bool:
|
226
|
+
"""
|
227
|
+
Checks is a parameter name is associated with a quantization parameter
|
228
|
+
|
229
|
+
:param name: parameter name to check
|
230
|
+
:return: True if parameter name is a quantization parameter, else False
|
231
|
+
"""
|
232
|
+
if name.endswith("_scale"):
|
233
|
+
return True
|
234
|
+
if name.endswith("zero_point"):
|
235
|
+
return True
|
236
|
+
|
237
|
+
return False
|
@@ -0,0 +1,22 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: compressed-tensors
|
3
|
+
Version: 0.3.0
|
4
|
+
Summary: Library for utilization of compressed safetensors of neural network models
|
5
|
+
Home-page: UNKNOWN
|
6
|
+
Author: Neuralmagic, Inc.
|
7
|
+
Author-email: support@neuralmagic.com
|
8
|
+
License: UNKNOWN
|
9
|
+
Platform: UNKNOWN
|
10
|
+
Requires-Dist: pydantic <2.7
|
11
|
+
Requires-Dist: torch >=1.7.0
|
12
|
+
Requires-Dist: transformers <=4.40
|
13
|
+
Provides-Extra: dev
|
14
|
+
Requires-Dist: black ==22.12.0 ; extra == 'dev'
|
15
|
+
Requires-Dist: flake8 >=3.8.3 ; extra == 'dev'
|
16
|
+
Requires-Dist: isort ==5.8.0 ; extra == 'dev'
|
17
|
+
Requires-Dist: nbconvert >=7.16.3 ; extra == 'dev'
|
18
|
+
Requires-Dist: pytest >=6.0.0 ; extra == 'dev'
|
19
|
+
Requires-Dist: wheel >=0.36.2 ; extra == 'dev'
|
20
|
+
|
21
|
+
UNKNOWN
|
22
|
+
|
@@ -0,0 +1,37 @@
|
|
1
|
+
compressed_tensors/__init__.py,sha256=SV1csvHUVCd8kHXz6UDZim1HZ_fAVG3vfk-j_4Bb6hY,789
|
2
|
+
compressed_tensors/base.py,sha256=8zbgK87LpHkKoSknM55svXCT4E4dLLjPijwF9HfzmsQ,717
|
3
|
+
compressed_tensors/compressors/__init__.py,sha256=3ZHKWSIWTjMx8XXgLtoP9JaVaCTvRecguLZTxLAAkKk,898
|
4
|
+
compressed_tensors/compressors/base.py,sha256=F1smyJ6x2Sfq43tuP0QE9wZuhVqnewq-XUFPMtdU9yQ,2936
|
5
|
+
compressed_tensors/compressors/dense.py,sha256=_VTusI3XjaY-zOdB_d7z4zOgPTJi9TJZZHF13g9ulS4,1263
|
6
|
+
compressed_tensors/compressors/helpers.py,sha256=kSseqbwnu3JHZUKH8u4kQo5bmd87FvCcmWe0u2ikysA,6421
|
7
|
+
compressed_tensors/compressors/sparse_bitmask.py,sha256=PYAK_Hcy2T57zlbpwl1FYkslluIr2x-d0Rh048YAtpI,8639
|
8
|
+
compressed_tensors/config/__init__.py,sha256=ZBqWn3r6ku1qfmlHHYp0mQueY0i7Pwhr9rbQk9dDlMc,704
|
9
|
+
compressed_tensors/config/base.py,sha256=IP-3Y416w-811WozDzKHycIBXjdlG4Ddy7vpbwhOPD8,1373
|
10
|
+
compressed_tensors/config/dense.py,sha256=xtkri7DkP7USu44FnSoTgTSqdGegCBtjRf3DfblSEL0,1311
|
11
|
+
compressed_tensors/config/sparse_bitmask.py,sha256=y8fmQaOoGjIiI4FR6BJjfIqisAcqNQ_zjKyjT75bXwY,1284
|
12
|
+
compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
|
13
|
+
compressed_tensors/quantization/quant_args.py,sha256=dxSrq0_88ORQXcyIMYqoMZJvYEjnqdYl37f7lgZQqhw,2742
|
14
|
+
compressed_tensors/quantization/quant_config.py,sha256=DWx8ae3gDlw99zAn3MUN9I4qeksbbmITmOXHRynqPB8,6650
|
15
|
+
compressed_tensors/quantization/quant_scheme.py,sha256=X3oqmZPiIKtX5tEKKUj-0N6hB68NeiU2b1GcQEQPadQ,1480
|
16
|
+
compressed_tensors/quantization/lifecycle/__init__.py,sha256=fM9XBtPgJX6z54PTm3Sd0SpK5od95ibwaSf2FFR8DqE,772
|
17
|
+
compressed_tensors/quantization/lifecycle/apply.py,sha256=WXUL3q1g0s244k0wuqGYZPXTXiscdyrp7RScN2j_KGA,6651
|
18
|
+
compressed_tensors/quantization/lifecycle/calibration.py,sha256=mLns4jlaWmBwOW8Jtlm5bMX-JET1AiZYUBO7qa-XuxI,1776
|
19
|
+
compressed_tensors/quantization/lifecycle/forward.py,sha256=hnjk7pocZLDhLdMx237FKayYdvsdKbYSjTmSN5xbQO8,4599
|
20
|
+
compressed_tensors/quantization/lifecycle/frozen.py,sha256=NHNmlDIaxurifqeI_qZC8xa4BstQsBNdOCXJjRzAfNU,1596
|
21
|
+
compressed_tensors/quantization/lifecycle/initialize.py,sha256=8pifqZQSgVqWYI_Qtv6QfBICPbCTFHy48OWPeQsxEHQ,3578
|
22
|
+
compressed_tensors/quantization/observers/__init__.py,sha256=DNH31NQYrIBBcmHsMyFA6whh4pbRsLwuNa6L8AeXaGc,745
|
23
|
+
compressed_tensors/quantization/observers/base.py,sha256=O76dAxkin7bB602e9kjmxc84p71-PxBtjIq5L69xplI,2786
|
24
|
+
compressed_tensors/quantization/observers/helpers.py,sha256=SxvOf9zwZ9NDRC3E4Xm7z3RqHcbcPtCABLKX9GnGGHM,2109
|
25
|
+
compressed_tensors/quantization/observers/memoryless.py,sha256=3f6bUlcf5mzOHPkTRhoQ7Zd8xu_pUmj8e3Y85fGysSU,1848
|
26
|
+
compressed_tensors/quantization/observers/min_max.py,sha256=uAcZd5aY6WKM-KumTb2ybX28s8iKGVy6Nrje5Sddqew,2439
|
27
|
+
compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
|
28
|
+
compressed_tensors/quantization/utils/helpers.py,sha256=N_wYfrPcFr__Q1mn6mHoNUTclwpTW8P5PDHkR7GvXWo,3694
|
29
|
+
compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
|
30
|
+
compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85SLG77nml2iA,11890
|
31
|
+
compressed_tensors/utils/__init__.py,sha256=5DrYjoZbaEvSkJcC-GRSbM_RBHVF4tG9gMd3zsJnjLw,665
|
32
|
+
compressed_tensors/utils/helpers.py,sha256=wLgiPrk7Vn29AijOGQGk3UnXItRd1jpROS6FxHoC4VQ,5530
|
33
|
+
compressed_tensors/utils/safetensors_load.py,sha256=wo9UirGrGlenBqZeqotvpCT7D5MEdjCo2J3HeRaIFoU,8502
|
34
|
+
compressed_tensors-0.3.0.dist-info/METADATA,sha256=REuv9Fm1mYXww7d8VOWwPWYkAhImzn_PEhiPn_V5Hdo,673
|
35
|
+
compressed_tensors-0.3.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
36
|
+
compressed_tensors-0.3.0.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
|
37
|
+
compressed_tensors-0.3.0.dist-info/RECORD,,
|
@@ -0,0 +1 @@
|
|
1
|
+
compressed_tensors
|