mediapipe-nightly 0.10.10.post20240216__cp311-cp311-macosx_11_0_universal2.whl → 0.10.10.post20240220__cp311-cp311-macosx_11_0_universal2.whl
Sign up to get free protection for your applications and to get access to all the features.
- mediapipe/__init__.py +1 -1
- mediapipe/python/_framework_bindings.cpython-311-darwin.so +0 -0
- mediapipe/tasks/python/__init__.py +1 -0
- mediapipe/tasks/python/genai/__init__.py +14 -0
- mediapipe/tasks/python/genai/converter/__init__.py +24 -0
- mediapipe/tasks/python/genai/converter/converter_base.py +172 -0
- mediapipe/tasks/python/genai/converter/converter_factory.py +79 -0
- mediapipe/tasks/python/genai/converter/llm_converter.py +213 -0
- mediapipe/tasks/python/genai/converter/pytorch_converter.py +315 -0
- mediapipe/tasks/python/genai/converter/pytorch_converter_test.py +86 -0
- mediapipe/tasks/python/genai/converter/quantization_util.py +516 -0
- mediapipe/tasks/python/genai/converter/quantization_util_test.py +259 -0
- mediapipe/tasks/python/genai/converter/safetensors_converter.py +521 -0
- mediapipe/tasks/python/genai/converter/safetensors_converter_test.py +83 -0
- mediapipe/tasks/python/genai/converter/weight_bins_writer.py +111 -0
- mediapipe/tasks/python/genai/converter/weight_bins_writer_test.py +62 -0
- mediapipe/version.txt +1 -1
- {mediapipe_nightly-0.10.10.post20240216.dist-info → mediapipe_nightly-0.10.10.post20240220.dist-info}/METADATA +1 -1
- {mediapipe_nightly-0.10.10.post20240216.dist-info → mediapipe_nightly-0.10.10.post20240220.dist-info}/RECORD +21 -8
- {mediapipe_nightly-0.10.10.post20240216.dist-info → mediapipe_nightly-0.10.10.post20240220.dist-info}/LICENSE +0 -0
- {mediapipe_nightly-0.10.10.post20240216.dist-info → mediapipe_nightly-0.10.10.post20240220.dist-info}/WHEEL +0 -0
- {mediapipe_nightly-0.10.10.post20240216.dist-info → mediapipe_nightly-0.10.10.post20240220.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,315 @@
|
|
1
|
+
# Copyright 2024 The MediaPipe Authors.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
"""CkptLoader implementation for loading the Pytorch file."""
|
16
|
+
|
17
|
+
import enum
|
18
|
+
import os
|
19
|
+
from typing import List, Optional
|
20
|
+
|
21
|
+
import numpy as np
|
22
|
+
import torch
|
23
|
+
|
24
|
+
from mediapipe.tasks.python.genai.converter import converter_base
|
25
|
+
|
26
|
+
|
27
|
+
class _PytorchReader:
|
28
|
+
"""Pytorch reader."""
|
29
|
+
|
30
|
+
def __init__(self, model_path: str):
|
31
|
+
if not os.path.exists(model_path):
|
32
|
+
raise ValueError(f"{model_path} does not exists.")
|
33
|
+
self._model = torch.load(model_path, map_location=torch.device("cpu"))
|
34
|
+
|
35
|
+
def read_tensor_as_numpy(self, tensor_name) -> np.ndarray:
|
36
|
+
tensor = (
|
37
|
+
self._model[tensor_name]
|
38
|
+
.to(torch.float32)
|
39
|
+
.t()
|
40
|
+
.contiguous()
|
41
|
+
.detach()
|
42
|
+
.cpu()
|
43
|
+
.numpy()
|
44
|
+
)
|
45
|
+
return tensor
|
46
|
+
|
47
|
+
def get_tensor_names(self) -> List[str]:
|
48
|
+
names = list(self._model.keys())
|
49
|
+
return names
|
50
|
+
|
51
|
+
|
52
|
+
class LayerType(enum.Enum):
|
53
|
+
"""Enum for layer type."""
|
54
|
+
|
55
|
+
NONE = 0
|
56
|
+
ATTENTION = 1 # Layer is part of the attention module.
|
57
|
+
FEEDFORWARD = 2 # Layer is part of the feedforward module in the Transformer.
|
58
|
+
EMBEDDING = 3 # Layer is the embedding lookup or final projection layer.
|
59
|
+
LAYER_NORM = (
|
60
|
+
4 # Layer is layer normalization before and after attention layer.
|
61
|
+
)
|
62
|
+
|
63
|
+
@classmethod
|
64
|
+
def get_layer_type(cls, layer_name: str):
|
65
|
+
"""Gets the layer type of the given layer name."""
|
66
|
+
ffn_layers = [
|
67
|
+
"mlp",
|
68
|
+
]
|
69
|
+
attn_layers = [
|
70
|
+
"self_attention",
|
71
|
+
]
|
72
|
+
emb_layers = [
|
73
|
+
"word_embeddings",
|
74
|
+
"lm_head",
|
75
|
+
]
|
76
|
+
layer_norms = [
|
77
|
+
"input_layernorm",
|
78
|
+
"post_attention_layernorm",
|
79
|
+
"ln_f",
|
80
|
+
]
|
81
|
+
if any(sub_name in layer_name for sub_name in attn_layers):
|
82
|
+
return LayerType.ATTENTION
|
83
|
+
if any(sub_name in layer_name for sub_name in ffn_layers):
|
84
|
+
return LayerType.FEEDFORWARD
|
85
|
+
if any(sub_name in layer_name for sub_name in emb_layers):
|
86
|
+
return LayerType.EMBEDDING
|
87
|
+
if any(sub_name in layer_name for sub_name in layer_norms):
|
88
|
+
return LayerType.LAYER_NORM
|
89
|
+
else:
|
90
|
+
return LayerType.NONE
|
91
|
+
|
92
|
+
|
93
|
+
class FalconMapper(converter_base.LayerActionMapperBase):
|
94
|
+
"""LayerActionMapper for handling the Falcon-rw-1b model."""
|
95
|
+
|
96
|
+
def __init__(
|
97
|
+
self,
|
98
|
+
is_symmetric: bool,
|
99
|
+
attention_quant_bits: int,
|
100
|
+
feedforward_quant_bits: int,
|
101
|
+
embedding_quant_bits: int,
|
102
|
+
backend: str,
|
103
|
+
reader: _PytorchReader,
|
104
|
+
):
|
105
|
+
super().__init__(
|
106
|
+
is_symmetric=is_symmetric,
|
107
|
+
attention_quant_bits=attention_quant_bits,
|
108
|
+
feedforward_quant_bits=feedforward_quant_bits,
|
109
|
+
embedding_quant_bits=embedding_quant_bits,
|
110
|
+
backend=backend,
|
111
|
+
)
|
112
|
+
self._reader = reader
|
113
|
+
|
114
|
+
def map_to_actions(
|
115
|
+
self, layer_name: str
|
116
|
+
) -> Optional[List[converter_base.QuantizationAction]]:
|
117
|
+
"""Map the given layer name to actions."""
|
118
|
+
actions = []
|
119
|
+
tensor_value = self._reader.read_tensor_as_numpy(layer_name)
|
120
|
+
if "query_key_value" in layer_name:
|
121
|
+
qkv_tensors = self._decompose_falcon_qkv(tensor_value)
|
122
|
+
for tensor, name in zip(qkv_tensors, ["q", "k", "v"]):
|
123
|
+
decomposed_name = layer_name.replace("query_key_value", name)
|
124
|
+
action = self._map_to_action_helper(tensor, decomposed_name)
|
125
|
+
actions.append(action)
|
126
|
+
else:
|
127
|
+
actions.append(self._map_to_action_helper(tensor_value, layer_name))
|
128
|
+
return actions
|
129
|
+
|
130
|
+
def _map_to_action_helper(
|
131
|
+
self, tensor_value: np.ndarray, layer_name: str
|
132
|
+
) -> converter_base.QuantizationAction:
|
133
|
+
quantize_axis = None
|
134
|
+
quantize_bits = None
|
135
|
+
layer_type = LayerType.get_layer_type(layer_name)
|
136
|
+
|
137
|
+
if layer_type != LayerType.LAYER_NORM and layer_name.endswith(".weight"):
|
138
|
+
layer_type = LayerType.get_layer_type(layer_name)
|
139
|
+
quantize_axis = [0]
|
140
|
+
if layer_type == LayerType.FEEDFORWARD:
|
141
|
+
quantize_bits = self._feedforward_quant_bits
|
142
|
+
elif layer_type == LayerType.ATTENTION:
|
143
|
+
quantize_bits = self._attention_quant_bits
|
144
|
+
if self._backend == "cpu" and ".dense." in layer_name:
|
145
|
+
tensor_value = np.transpose(tensor_value)
|
146
|
+
quantize_axis = [1]
|
147
|
+
elif layer_type == LayerType.EMBEDDING:
|
148
|
+
quantize_bits = self._embedding_quant_bits
|
149
|
+
if self._backend == "cpu" and "word_embeddings" in layer_name:
|
150
|
+
tensor_value = np.transpose(tensor_value)
|
151
|
+
quantize_axis = [1]
|
152
|
+
target_name = self.update_target_name(layer_name)
|
153
|
+
|
154
|
+
return converter_base.QuantizationAction(
|
155
|
+
tensor_name=layer_name,
|
156
|
+
tensor_value=tensor_value,
|
157
|
+
target_name=target_name,
|
158
|
+
quantize_axis=quantize_axis,
|
159
|
+
quantize_bits=quantize_bits,
|
160
|
+
pack_dim=0,
|
161
|
+
)
|
162
|
+
|
163
|
+
def _decompose_falcon_qkv(self, tensor_value: np.ndarray) -> List[np.ndarray]:
|
164
|
+
"""Decomposes combined qkv tensor used in falcon model into separate q, k and v tensors."""
|
165
|
+
chunk_size = 64
|
166
|
+
hidden_size = 2048
|
167
|
+
|
168
|
+
tensor_value = tensor_value.transpose()
|
169
|
+
|
170
|
+
q_tensor = np.zeros(
|
171
|
+
(hidden_size,)
|
172
|
+
+ ((hidden_size,) if len(tensor_value.shape) == 2 else ()),
|
173
|
+
dtype=tensor_value.dtype,
|
174
|
+
)
|
175
|
+
k_tensor = np.zeros_like(q_tensor, dtype=tensor_value.dtype)
|
176
|
+
v_tensor = np.zeros_like(k_tensor, dtype=tensor_value.dtype)
|
177
|
+
|
178
|
+
j = 0
|
179
|
+
for i in range(0 * chunk_size, hidden_size * 3, chunk_size * 3):
|
180
|
+
q_tensor[j : j + chunk_size] = tensor_value[i : i + chunk_size]
|
181
|
+
j += chunk_size
|
182
|
+
|
183
|
+
j = 0
|
184
|
+
for i in range(1 * chunk_size, hidden_size * 3, chunk_size * 3):
|
185
|
+
k_tensor[j : j + chunk_size] = tensor_value[i : i + chunk_size]
|
186
|
+
j += chunk_size
|
187
|
+
|
188
|
+
j = 0
|
189
|
+
for i in range(2 * chunk_size, hidden_size * 3, chunk_size * 3):
|
190
|
+
v_tensor[j : j + chunk_size] = tensor_value[i : i + chunk_size]
|
191
|
+
j += chunk_size
|
192
|
+
|
193
|
+
return [
|
194
|
+
np.ascontiguousarray(q_tensor.transpose()),
|
195
|
+
np.ascontiguousarray(k_tensor.transpose()),
|
196
|
+
np.ascontiguousarray(v_tensor.transpose()),
|
197
|
+
]
|
198
|
+
|
199
|
+
def update_target_name(self, target_name: str) -> str:
|
200
|
+
"""Updates the target name to match the tensor name convention."""
|
201
|
+
layer_type = LayerType.get_layer_type(target_name)
|
202
|
+
|
203
|
+
target_name = target_name.replace(
|
204
|
+
"transformer.h.", "params.lm.transformer.x_layers_"
|
205
|
+
)
|
206
|
+
|
207
|
+
if layer_type == LayerType.FEEDFORWARD:
|
208
|
+
target_name = target_name.replace(".weight", ".linear.w")
|
209
|
+
target_name = target_name.replace(".bias", ".bias.b")
|
210
|
+
target_name = target_name.replace(
|
211
|
+
"mlp.dense_h_to_4h", "ff_layer.ffn_layer1"
|
212
|
+
)
|
213
|
+
target_name = target_name.replace(
|
214
|
+
"mlp.dense_4h_to_h", "ff_layer.ffn_layer2"
|
215
|
+
)
|
216
|
+
elif layer_type == LayerType.ATTENTION:
|
217
|
+
target_name = target_name.replace("dense", "post")
|
218
|
+
target_name = target_name.replace(".weight", ".linear.w")
|
219
|
+
target_name = target_name.replace(".bias", ".bias.b")
|
220
|
+
elif layer_type == LayerType.EMBEDDING:
|
221
|
+
target_name = target_name.replace(
|
222
|
+
"transformer.word_embeddings", "params.lm.token_embedding"
|
223
|
+
)
|
224
|
+
target_name = target_name.replace(
|
225
|
+
"lm_head", "params.lm.softmax.logits_ffn"
|
226
|
+
)
|
227
|
+
target_name = target_name.replace(".weight", ".w")
|
228
|
+
elif layer_type == LayerType.LAYER_NORM:
|
229
|
+
target_name = target_name.replace("input_layernorm", "pre_layer_norm")
|
230
|
+
target_name = target_name.replace(
|
231
|
+
"pre_layer_norm.weight", "pre_layer_norm.scale"
|
232
|
+
)
|
233
|
+
if self._backend == "cpu":
|
234
|
+
target_name = target_name.replace(
|
235
|
+
"post_attention_layernorm", "ff_layer.pre_layer_norm"
|
236
|
+
)
|
237
|
+
target_name = target_name.replace(
|
238
|
+
"ff_layer.pre_layer_norm.weight", "ff_layer.pre_layer_norm.scale"
|
239
|
+
)
|
240
|
+
else:
|
241
|
+
target_name = target_name.replace(
|
242
|
+
"post_attention_layernorm", "post_layer_norm"
|
243
|
+
)
|
244
|
+
target_name = target_name.replace(
|
245
|
+
"post_layer_norm.weight", "post_layer_norm.scale"
|
246
|
+
)
|
247
|
+
target_name = target_name.replace(
|
248
|
+
"transformer.ln_f.weight", "params.lm.final_ln.scale"
|
249
|
+
)
|
250
|
+
target_name = target_name.replace(
|
251
|
+
"transformer.ln_f.bias", "params.lm.final_ln.bias"
|
252
|
+
)
|
253
|
+
|
254
|
+
return target_name
|
255
|
+
|
256
|
+
|
257
|
+
class PytorchCkptLoader(converter_base.CkptLoaderBase):
|
258
|
+
"""CkptLoader implementation for loading the Pytorch model."""
|
259
|
+
|
260
|
+
def __init__(
|
261
|
+
self,
|
262
|
+
ckpt_path: str,
|
263
|
+
is_symmetric: bool,
|
264
|
+
attention_quant_bits: int,
|
265
|
+
feedforward_quant_bits: int,
|
266
|
+
embedding_quant_bits: int,
|
267
|
+
special_model: str,
|
268
|
+
backend: str,
|
269
|
+
):
|
270
|
+
"""Initializes the loader.
|
271
|
+
|
272
|
+
Args:
|
273
|
+
ckpt_path: The filepath to the safetensors file.
|
274
|
+
is_symmetric: Whether to apply symmetric or asymmetric quantization.
|
275
|
+
attention_quant_bits: An integer that specify the target quantization bits
|
276
|
+
(support 8 or 4) for the attention layers.
|
277
|
+
feedforward_quant_bits: An integer that specify the target quantization
|
278
|
+
bits (support 8 or 4) for the feedforward layers in each Transformer
|
279
|
+
blocks.
|
280
|
+
embedding_quant_bits: An integer that specify the target quantization bits
|
281
|
+
(support 8 or 4) for the embedding (and the final projection) layers.
|
282
|
+
special_model: A string that indicates which input model is and whether
|
283
|
+
any special treatment is needed.
|
284
|
+
backend: A string indicating the backend used when converting this model.
|
285
|
+
Valid options are "cpu" and "gpu".
|
286
|
+
"""
|
287
|
+
super().__init__(
|
288
|
+
ckpt_path,
|
289
|
+
is_symmetric,
|
290
|
+
attention_quant_bits,
|
291
|
+
feedforward_quant_bits,
|
292
|
+
embedding_quant_bits,
|
293
|
+
)
|
294
|
+
|
295
|
+
self._special_model = special_model
|
296
|
+
self._reader = _PytorchReader(ckpt_path)
|
297
|
+
if special_model in ["FALCON_RW_1B"]:
|
298
|
+
self.mapper = FalconMapper(
|
299
|
+
is_symmetric,
|
300
|
+
attention_quant_bits,
|
301
|
+
feedforward_quant_bits,
|
302
|
+
embedding_quant_bits,
|
303
|
+
backend,
|
304
|
+
self._reader,
|
305
|
+
)
|
306
|
+
else:
|
307
|
+
raise ValueError(f"Unknown special model: {special_model}")
|
308
|
+
|
309
|
+
def load_to_actions(self):
|
310
|
+
tensor_names = self._reader.get_tensor_names()
|
311
|
+
actions = []
|
312
|
+
for tensor_name in tensor_names:
|
313
|
+
tensor_actions = self.mapper.map_to_actions(tensor_name)
|
314
|
+
actions.extend(tensor_actions)
|
315
|
+
return actions
|
@@ -0,0 +1,86 @@
|
|
1
|
+
# Copyright 2024 The MediaPipe Authors.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
"""Unit tests for pytorch_converter."""
|
16
|
+
|
17
|
+
import os
|
18
|
+
|
19
|
+
from absl.testing import absltest
|
20
|
+
from absl.testing import parameterized
|
21
|
+
|
22
|
+
from mediapipe.tasks.python.genai.converter import pytorch_converter
|
23
|
+
from mediapipe.tasks.python.test import test_utils
|
24
|
+
|
25
|
+
_TEST_DATA_DIR = 'mediapipe/tasks/testdata/text'
|
26
|
+
_PYTORCH_FILE = test_utils.get_test_data_path(
|
27
|
+
os.path.join(_TEST_DATA_DIR, 'falcon_rw_1b_test_weight.pt')
|
28
|
+
)
|
29
|
+
|
30
|
+
|
31
|
+
class PytorchConverterTest(parameterized.TestCase):
|
32
|
+
VARIABLE_NAMES = [
|
33
|
+
'transformer.word_embeddings.weight',
|
34
|
+
'transformer.h.0.input_layernorm.weight',
|
35
|
+
'transformer.h.0.input_layernorm.bias',
|
36
|
+
'transformer.h.0.self_attention.query_key_value.weight',
|
37
|
+
'transformer.h.0.self_attention.query_key_value.bias',
|
38
|
+
'transformer.h.0.self_attention.dense.weight',
|
39
|
+
'transformer.h.0.self_attention.dense.bias',
|
40
|
+
'transformer.h.0.post_attention_layernorm.weight',
|
41
|
+
'transformer.h.0.post_attention_layernorm.bias',
|
42
|
+
'transformer.h.0.mlp.dense_h_to_4h.weight',
|
43
|
+
'transformer.h.0.mlp.dense_h_to_4h.bias',
|
44
|
+
'transformer.h.0.mlp.dense_4h_to_h.weight',
|
45
|
+
'transformer.h.0.mlp.dense_4h_to_h.bias',
|
46
|
+
'transformer.ln_f.weight',
|
47
|
+
'transformer.ln_f.bias',
|
48
|
+
'lm_head.weight',
|
49
|
+
]
|
50
|
+
|
51
|
+
def test_init(self):
|
52
|
+
loader = pytorch_converter.PytorchCkptLoader(
|
53
|
+
ckpt_path=_PYTORCH_FILE,
|
54
|
+
is_symmetric=True,
|
55
|
+
attention_quant_bits=8,
|
56
|
+
feedforward_quant_bits=8,
|
57
|
+
embedding_quant_bits=8,
|
58
|
+
special_model='FALCON_RW_1B',
|
59
|
+
backend='cpu',
|
60
|
+
)
|
61
|
+
self.assertEqual(loader._ckpt_path, _PYTORCH_FILE)
|
62
|
+
self.assertEqual(loader._is_symmetric, True)
|
63
|
+
self.assertEqual(loader._attention_quant_bits, 8)
|
64
|
+
self.assertEqual(loader._feedforward_quant_bits, 8)
|
65
|
+
|
66
|
+
@parameterized.product(
|
67
|
+
quant_bits=(4, 8),
|
68
|
+
)
|
69
|
+
def test_load_to_actions(self, quant_bits):
|
70
|
+
loader = pytorch_converter.PytorchCkptLoader(
|
71
|
+
ckpt_path=_PYTORCH_FILE,
|
72
|
+
is_symmetric=True,
|
73
|
+
attention_quant_bits=8,
|
74
|
+
feedforward_quant_bits=quant_bits,
|
75
|
+
embedding_quant_bits=8,
|
76
|
+
special_model='FALCON_RW_1B',
|
77
|
+
backend='cpu',
|
78
|
+
)
|
79
|
+
actions = loader.load_to_actions()
|
80
|
+
# There are 16 layers in the model, but qkv weight and bias would be
|
81
|
+
# decomposed to q, k, v tensors, so there would be 20 quantization actions.
|
82
|
+
self.assertLen(actions, 20)
|
83
|
+
|
84
|
+
|
85
|
+
if __name__ == '__main__':
|
86
|
+
absltest.main()
|