mediapipe-nightly 0.10.10.post20240216__cp311-cp311-macosx_11_0_universal2.whl → 0.10.10.post20240220__cp311-cp311-macosx_11_0_universal2.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (22) hide show
  1. mediapipe/__init__.py +1 -1
  2. mediapipe/python/_framework_bindings.cpython-311-darwin.so +0 -0
  3. mediapipe/tasks/python/__init__.py +1 -0
  4. mediapipe/tasks/python/genai/__init__.py +14 -0
  5. mediapipe/tasks/python/genai/converter/__init__.py +24 -0
  6. mediapipe/tasks/python/genai/converter/converter_base.py +172 -0
  7. mediapipe/tasks/python/genai/converter/converter_factory.py +79 -0
  8. mediapipe/tasks/python/genai/converter/llm_converter.py +213 -0
  9. mediapipe/tasks/python/genai/converter/pytorch_converter.py +315 -0
  10. mediapipe/tasks/python/genai/converter/pytorch_converter_test.py +86 -0
  11. mediapipe/tasks/python/genai/converter/quantization_util.py +516 -0
  12. mediapipe/tasks/python/genai/converter/quantization_util_test.py +259 -0
  13. mediapipe/tasks/python/genai/converter/safetensors_converter.py +521 -0
  14. mediapipe/tasks/python/genai/converter/safetensors_converter_test.py +83 -0
  15. mediapipe/tasks/python/genai/converter/weight_bins_writer.py +111 -0
  16. mediapipe/tasks/python/genai/converter/weight_bins_writer_test.py +62 -0
  17. mediapipe/version.txt +1 -1
  18. {mediapipe_nightly-0.10.10.post20240216.dist-info → mediapipe_nightly-0.10.10.post20240220.dist-info}/METADATA +1 -1
  19. {mediapipe_nightly-0.10.10.post20240216.dist-info → mediapipe_nightly-0.10.10.post20240220.dist-info}/RECORD +21 -8
  20. {mediapipe_nightly-0.10.10.post20240216.dist-info → mediapipe_nightly-0.10.10.post20240220.dist-info}/LICENSE +0 -0
  21. {mediapipe_nightly-0.10.10.post20240216.dist-info → mediapipe_nightly-0.10.10.post20240220.dist-info}/WHEEL +0 -0
  22. {mediapipe_nightly-0.10.10.post20240216.dist-info → mediapipe_nightly-0.10.10.post20240220.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,315 @@
1
+ # Copyright 2024 The MediaPipe Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """CkptLoader implementation for loading the Pytorch file."""
16
+
17
+ import enum
18
+ import os
19
+ from typing import List, Optional
20
+
21
+ import numpy as np
22
+ import torch
23
+
24
+ from mediapipe.tasks.python.genai.converter import converter_base
25
+
26
+
27
+ class _PytorchReader:
28
+ """Pytorch reader."""
29
+
30
+ def __init__(self, model_path: str):
31
+ if not os.path.exists(model_path):
32
+ raise ValueError(f"{model_path} does not exists.")
33
+ self._model = torch.load(model_path, map_location=torch.device("cpu"))
34
+
35
+ def read_tensor_as_numpy(self, tensor_name) -> np.ndarray:
36
+ tensor = (
37
+ self._model[tensor_name]
38
+ .to(torch.float32)
39
+ .t()
40
+ .contiguous()
41
+ .detach()
42
+ .cpu()
43
+ .numpy()
44
+ )
45
+ return tensor
46
+
47
+ def get_tensor_names(self) -> List[str]:
48
+ names = list(self._model.keys())
49
+ return names
50
+
51
+
52
+ class LayerType(enum.Enum):
53
+ """Enum for layer type."""
54
+
55
+ NONE = 0
56
+ ATTENTION = 1 # Layer is part of the attention module.
57
+ FEEDFORWARD = 2 # Layer is part of the feedforward module in the Transformer.
58
+ EMBEDDING = 3 # Layer is the embedding lookup or final projection layer.
59
+ LAYER_NORM = (
60
+ 4 # Layer is layer normalization before and after attention layer.
61
+ )
62
+
63
+ @classmethod
64
+ def get_layer_type(cls, layer_name: str):
65
+ """Gets the layer type of the given layer name."""
66
+ ffn_layers = [
67
+ "mlp",
68
+ ]
69
+ attn_layers = [
70
+ "self_attention",
71
+ ]
72
+ emb_layers = [
73
+ "word_embeddings",
74
+ "lm_head",
75
+ ]
76
+ layer_norms = [
77
+ "input_layernorm",
78
+ "post_attention_layernorm",
79
+ "ln_f",
80
+ ]
81
+ if any(sub_name in layer_name for sub_name in attn_layers):
82
+ return LayerType.ATTENTION
83
+ if any(sub_name in layer_name for sub_name in ffn_layers):
84
+ return LayerType.FEEDFORWARD
85
+ if any(sub_name in layer_name for sub_name in emb_layers):
86
+ return LayerType.EMBEDDING
87
+ if any(sub_name in layer_name for sub_name in layer_norms):
88
+ return LayerType.LAYER_NORM
89
+ else:
90
+ return LayerType.NONE
91
+
92
+
93
+ class FalconMapper(converter_base.LayerActionMapperBase):
94
+ """LayerActionMapper for handling the Falcon-rw-1b model."""
95
+
96
+ def __init__(
97
+ self,
98
+ is_symmetric: bool,
99
+ attention_quant_bits: int,
100
+ feedforward_quant_bits: int,
101
+ embedding_quant_bits: int,
102
+ backend: str,
103
+ reader: _PytorchReader,
104
+ ):
105
+ super().__init__(
106
+ is_symmetric=is_symmetric,
107
+ attention_quant_bits=attention_quant_bits,
108
+ feedforward_quant_bits=feedforward_quant_bits,
109
+ embedding_quant_bits=embedding_quant_bits,
110
+ backend=backend,
111
+ )
112
+ self._reader = reader
113
+
114
+ def map_to_actions(
115
+ self, layer_name: str
116
+ ) -> Optional[List[converter_base.QuantizationAction]]:
117
+ """Map the given layer name to actions."""
118
+ actions = []
119
+ tensor_value = self._reader.read_tensor_as_numpy(layer_name)
120
+ if "query_key_value" in layer_name:
121
+ qkv_tensors = self._decompose_falcon_qkv(tensor_value)
122
+ for tensor, name in zip(qkv_tensors, ["q", "k", "v"]):
123
+ decomposed_name = layer_name.replace("query_key_value", name)
124
+ action = self._map_to_action_helper(tensor, decomposed_name)
125
+ actions.append(action)
126
+ else:
127
+ actions.append(self._map_to_action_helper(tensor_value, layer_name))
128
+ return actions
129
+
130
+ def _map_to_action_helper(
131
+ self, tensor_value: np.ndarray, layer_name: str
132
+ ) -> converter_base.QuantizationAction:
133
+ quantize_axis = None
134
+ quantize_bits = None
135
+ layer_type = LayerType.get_layer_type(layer_name)
136
+
137
+ if layer_type != LayerType.LAYER_NORM and layer_name.endswith(".weight"):
138
+ layer_type = LayerType.get_layer_type(layer_name)
139
+ quantize_axis = [0]
140
+ if layer_type == LayerType.FEEDFORWARD:
141
+ quantize_bits = self._feedforward_quant_bits
142
+ elif layer_type == LayerType.ATTENTION:
143
+ quantize_bits = self._attention_quant_bits
144
+ if self._backend == "cpu" and ".dense." in layer_name:
145
+ tensor_value = np.transpose(tensor_value)
146
+ quantize_axis = [1]
147
+ elif layer_type == LayerType.EMBEDDING:
148
+ quantize_bits = self._embedding_quant_bits
149
+ if self._backend == "cpu" and "word_embeddings" in layer_name:
150
+ tensor_value = np.transpose(tensor_value)
151
+ quantize_axis = [1]
152
+ target_name = self.update_target_name(layer_name)
153
+
154
+ return converter_base.QuantizationAction(
155
+ tensor_name=layer_name,
156
+ tensor_value=tensor_value,
157
+ target_name=target_name,
158
+ quantize_axis=quantize_axis,
159
+ quantize_bits=quantize_bits,
160
+ pack_dim=0,
161
+ )
162
+
163
+ def _decompose_falcon_qkv(self, tensor_value: np.ndarray) -> List[np.ndarray]:
164
+ """Decomposes combined qkv tensor used in falcon model into separate q, k and v tensors."""
165
+ chunk_size = 64
166
+ hidden_size = 2048
167
+
168
+ tensor_value = tensor_value.transpose()
169
+
170
+ q_tensor = np.zeros(
171
+ (hidden_size,)
172
+ + ((hidden_size,) if len(tensor_value.shape) == 2 else ()),
173
+ dtype=tensor_value.dtype,
174
+ )
175
+ k_tensor = np.zeros_like(q_tensor, dtype=tensor_value.dtype)
176
+ v_tensor = np.zeros_like(k_tensor, dtype=tensor_value.dtype)
177
+
178
+ j = 0
179
+ for i in range(0 * chunk_size, hidden_size * 3, chunk_size * 3):
180
+ q_tensor[j : j + chunk_size] = tensor_value[i : i + chunk_size]
181
+ j += chunk_size
182
+
183
+ j = 0
184
+ for i in range(1 * chunk_size, hidden_size * 3, chunk_size * 3):
185
+ k_tensor[j : j + chunk_size] = tensor_value[i : i + chunk_size]
186
+ j += chunk_size
187
+
188
+ j = 0
189
+ for i in range(2 * chunk_size, hidden_size * 3, chunk_size * 3):
190
+ v_tensor[j : j + chunk_size] = tensor_value[i : i + chunk_size]
191
+ j += chunk_size
192
+
193
+ return [
194
+ np.ascontiguousarray(q_tensor.transpose()),
195
+ np.ascontiguousarray(k_tensor.transpose()),
196
+ np.ascontiguousarray(v_tensor.transpose()),
197
+ ]
198
+
199
+ def update_target_name(self, target_name: str) -> str:
200
+ """Updates the target name to match the tensor name convention."""
201
+ layer_type = LayerType.get_layer_type(target_name)
202
+
203
+ target_name = target_name.replace(
204
+ "transformer.h.", "params.lm.transformer.x_layers_"
205
+ )
206
+
207
+ if layer_type == LayerType.FEEDFORWARD:
208
+ target_name = target_name.replace(".weight", ".linear.w")
209
+ target_name = target_name.replace(".bias", ".bias.b")
210
+ target_name = target_name.replace(
211
+ "mlp.dense_h_to_4h", "ff_layer.ffn_layer1"
212
+ )
213
+ target_name = target_name.replace(
214
+ "mlp.dense_4h_to_h", "ff_layer.ffn_layer2"
215
+ )
216
+ elif layer_type == LayerType.ATTENTION:
217
+ target_name = target_name.replace("dense", "post")
218
+ target_name = target_name.replace(".weight", ".linear.w")
219
+ target_name = target_name.replace(".bias", ".bias.b")
220
+ elif layer_type == LayerType.EMBEDDING:
221
+ target_name = target_name.replace(
222
+ "transformer.word_embeddings", "params.lm.token_embedding"
223
+ )
224
+ target_name = target_name.replace(
225
+ "lm_head", "params.lm.softmax.logits_ffn"
226
+ )
227
+ target_name = target_name.replace(".weight", ".w")
228
+ elif layer_type == LayerType.LAYER_NORM:
229
+ target_name = target_name.replace("input_layernorm", "pre_layer_norm")
230
+ target_name = target_name.replace(
231
+ "pre_layer_norm.weight", "pre_layer_norm.scale"
232
+ )
233
+ if self._backend == "cpu":
234
+ target_name = target_name.replace(
235
+ "post_attention_layernorm", "ff_layer.pre_layer_norm"
236
+ )
237
+ target_name = target_name.replace(
238
+ "ff_layer.pre_layer_norm.weight", "ff_layer.pre_layer_norm.scale"
239
+ )
240
+ else:
241
+ target_name = target_name.replace(
242
+ "post_attention_layernorm", "post_layer_norm"
243
+ )
244
+ target_name = target_name.replace(
245
+ "post_layer_norm.weight", "post_layer_norm.scale"
246
+ )
247
+ target_name = target_name.replace(
248
+ "transformer.ln_f.weight", "params.lm.final_ln.scale"
249
+ )
250
+ target_name = target_name.replace(
251
+ "transformer.ln_f.bias", "params.lm.final_ln.bias"
252
+ )
253
+
254
+ return target_name
255
+
256
+
257
+ class PytorchCkptLoader(converter_base.CkptLoaderBase):
258
+ """CkptLoader implementation for loading the Pytorch model."""
259
+
260
+ def __init__(
261
+ self,
262
+ ckpt_path: str,
263
+ is_symmetric: bool,
264
+ attention_quant_bits: int,
265
+ feedforward_quant_bits: int,
266
+ embedding_quant_bits: int,
267
+ special_model: str,
268
+ backend: str,
269
+ ):
270
+ """Initializes the loader.
271
+
272
+ Args:
273
+ ckpt_path: The filepath to the safetensors file.
274
+ is_symmetric: Whether to apply symmetric or asymmetric quantization.
275
+ attention_quant_bits: An integer that specify the target quantization bits
276
+ (support 8 or 4) for the attention layers.
277
+ feedforward_quant_bits: An integer that specify the target quantization
278
+ bits (support 8 or 4) for the feedforward layers in each Transformer
279
+ blocks.
280
+ embedding_quant_bits: An integer that specify the target quantization bits
281
+ (support 8 or 4) for the embedding (and the final projection) layers.
282
+ special_model: A string that indicates which input model is and whether
283
+ any special treatment is needed.
284
+ backend: A string indicating the backend used when converting this model.
285
+ Valid options are "cpu" and "gpu".
286
+ """
287
+ super().__init__(
288
+ ckpt_path,
289
+ is_symmetric,
290
+ attention_quant_bits,
291
+ feedforward_quant_bits,
292
+ embedding_quant_bits,
293
+ )
294
+
295
+ self._special_model = special_model
296
+ self._reader = _PytorchReader(ckpt_path)
297
+ if special_model in ["FALCON_RW_1B"]:
298
+ self.mapper = FalconMapper(
299
+ is_symmetric,
300
+ attention_quant_bits,
301
+ feedforward_quant_bits,
302
+ embedding_quant_bits,
303
+ backend,
304
+ self._reader,
305
+ )
306
+ else:
307
+ raise ValueError(f"Unknown special model: {special_model}")
308
+
309
+ def load_to_actions(self):
310
+ tensor_names = self._reader.get_tensor_names()
311
+ actions = []
312
+ for tensor_name in tensor_names:
313
+ tensor_actions = self.mapper.map_to_actions(tensor_name)
314
+ actions.extend(tensor_actions)
315
+ return actions
@@ -0,0 +1,86 @@
1
+ # Copyright 2024 The MediaPipe Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Unit tests for pytorch_converter."""
16
+
17
+ import os
18
+
19
+ from absl.testing import absltest
20
+ from absl.testing import parameterized
21
+
22
+ from mediapipe.tasks.python.genai.converter import pytorch_converter
23
+ from mediapipe.tasks.python.test import test_utils
24
+
25
+ _TEST_DATA_DIR = 'mediapipe/tasks/testdata/text'
26
+ _PYTORCH_FILE = test_utils.get_test_data_path(
27
+ os.path.join(_TEST_DATA_DIR, 'falcon_rw_1b_test_weight.pt')
28
+ )
29
+
30
+
31
+ class PytorchConverterTest(parameterized.TestCase):
32
+ VARIABLE_NAMES = [
33
+ 'transformer.word_embeddings.weight',
34
+ 'transformer.h.0.input_layernorm.weight',
35
+ 'transformer.h.0.input_layernorm.bias',
36
+ 'transformer.h.0.self_attention.query_key_value.weight',
37
+ 'transformer.h.0.self_attention.query_key_value.bias',
38
+ 'transformer.h.0.self_attention.dense.weight',
39
+ 'transformer.h.0.self_attention.dense.bias',
40
+ 'transformer.h.0.post_attention_layernorm.weight',
41
+ 'transformer.h.0.post_attention_layernorm.bias',
42
+ 'transformer.h.0.mlp.dense_h_to_4h.weight',
43
+ 'transformer.h.0.mlp.dense_h_to_4h.bias',
44
+ 'transformer.h.0.mlp.dense_4h_to_h.weight',
45
+ 'transformer.h.0.mlp.dense_4h_to_h.bias',
46
+ 'transformer.ln_f.weight',
47
+ 'transformer.ln_f.bias',
48
+ 'lm_head.weight',
49
+ ]
50
+
51
+ def test_init(self):
52
+ loader = pytorch_converter.PytorchCkptLoader(
53
+ ckpt_path=_PYTORCH_FILE,
54
+ is_symmetric=True,
55
+ attention_quant_bits=8,
56
+ feedforward_quant_bits=8,
57
+ embedding_quant_bits=8,
58
+ special_model='FALCON_RW_1B',
59
+ backend='cpu',
60
+ )
61
+ self.assertEqual(loader._ckpt_path, _PYTORCH_FILE)
62
+ self.assertEqual(loader._is_symmetric, True)
63
+ self.assertEqual(loader._attention_quant_bits, 8)
64
+ self.assertEqual(loader._feedforward_quant_bits, 8)
65
+
66
+ @parameterized.product(
67
+ quant_bits=(4, 8),
68
+ )
69
+ def test_load_to_actions(self, quant_bits):
70
+ loader = pytorch_converter.PytorchCkptLoader(
71
+ ckpt_path=_PYTORCH_FILE,
72
+ is_symmetric=True,
73
+ attention_quant_bits=8,
74
+ feedforward_quant_bits=quant_bits,
75
+ embedding_quant_bits=8,
76
+ special_model='FALCON_RW_1B',
77
+ backend='cpu',
78
+ )
79
+ actions = loader.load_to_actions()
80
+ # There are 16 layers in the model, but qkv weight and bias would be
81
+ # decomposed to q, k, v tensors, so there would be 20 quantization actions.
82
+ self.assertLen(actions, 20)
83
+
84
+
85
+ if __name__ == '__main__':
86
+ absltest.main()