mediapipe-nightly 0.10.10.post20240216__cp310-cp310-macosx_11_0_x86_64.whl → 0.10.10.post20240220__cp310-cp310-macosx_11_0_x86_64.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (21) hide show
  1. mediapipe/__init__.py +1 -1
  2. mediapipe/python/_framework_bindings.cpython-310-darwin.so +0 -0
  3. mediapipe/tasks/python/__init__.py +1 -0
  4. mediapipe/tasks/python/genai/__init__.py +14 -0
  5. mediapipe/tasks/python/genai/converter/__init__.py +24 -0
  6. mediapipe/tasks/python/genai/converter/converter_base.py +172 -0
  7. mediapipe/tasks/python/genai/converter/converter_factory.py +79 -0
  8. mediapipe/tasks/python/genai/converter/llm_converter.py +213 -0
  9. mediapipe/tasks/python/genai/converter/pytorch_converter.py +315 -0
  10. mediapipe/tasks/python/genai/converter/pytorch_converter_test.py +86 -0
  11. mediapipe/tasks/python/genai/converter/quantization_util.py +516 -0
  12. mediapipe/tasks/python/genai/converter/quantization_util_test.py +259 -0
  13. mediapipe/tasks/python/genai/converter/safetensors_converter.py +521 -0
  14. mediapipe/tasks/python/genai/converter/safetensors_converter_test.py +83 -0
  15. mediapipe/tasks/python/genai/converter/weight_bins_writer.py +111 -0
  16. mediapipe/tasks/python/genai/converter/weight_bins_writer_test.py +62 -0
  17. {mediapipe_nightly-0.10.10.post20240216.dist-info → mediapipe_nightly-0.10.10.post20240220.dist-info}/METADATA +1 -1
  18. {mediapipe_nightly-0.10.10.post20240216.dist-info → mediapipe_nightly-0.10.10.post20240220.dist-info}/RECORD +21 -8
  19. {mediapipe_nightly-0.10.10.post20240216.dist-info → mediapipe_nightly-0.10.10.post20240220.dist-info}/LICENSE +0 -0
  20. {mediapipe_nightly-0.10.10.post20240216.dist-info → mediapipe_nightly-0.10.10.post20240220.dist-info}/WHEEL +0 -0
  21. {mediapipe_nightly-0.10.10.post20240216.dist-info → mediapipe_nightly-0.10.10.post20240220.dist-info}/top_level.txt +0 -0
mediapipe/__init__.py CHANGED
@@ -23,4 +23,4 @@ del modules
23
23
  del python
24
24
  del mediapipe
25
25
  del util
26
- __version__ = '0.10.10-20240216'
26
+ __version__ = '0.10.10-20240220'
@@ -17,6 +17,7 @@
17
17
  from . import audio
18
18
  from . import components
19
19
  from . import core
20
+ from . import genai
20
21
  from . import text
21
22
  from . import vision
22
23
 
@@ -0,0 +1,14 @@
1
+ # Copyright 2024 The MediaPipe Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """MediaPipe GenAI Python API."""
@@ -0,0 +1,24 @@
1
+ # Copyright 2022 The MediaPipe Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """MediaPipe Tasks GenAI Converter API."""
16
+
17
+ import mediapipe.tasks.python.genai.converter.llm_converter
18
+
19
+ ConversionConfig = llm_converter.ConversionConfig
20
+ convert_checkpoint = llm_converter.convert_checkpoint
21
+
22
+ # Remove unnecessary modules to avoid duplication in API docs.
23
+ del mediapipe
24
+ del llm_converter
@@ -0,0 +1,172 @@
1
+ # Copyright 2024 The MediaPipe Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Defines a couple base classes for the conversion/quantization process."""
16
+
17
+ import os
18
+ from typing import Dict, List, Optional, Tuple
19
+ import numpy as np
20
+
21
+
22
+ class QuantizationAction:
23
+ """Container of the tensor values and its corresponding quantization settings.
24
+
25
+ The contrainer is responsible for hosting all of the information that is
26
+ required to execute the weight-only quantization.
27
+
28
+ Attributes:
29
+ tensor_name: A string that represents the input tensor name.
30
+ tensor_value: A numpy array that contains the unquantized tensor values.
31
+ target_name: A string that represents the updated tensor name.
32
+ quantize_axis: A list of integers representing the dimensions to be
33
+ quantized along. For example, if an input tensor has shape [128, 256] and
34
+ the quantize_axis==[0], it means the quantization happens along the 0-th
35
+ dimension, resulting in [256] scaling factors.
36
+ quantize_bits: An integer that specifies the target quantization bits. It
37
+ currently only supports either 8 or 4 bits.
38
+ pack_dim: An integer specifying which dimension to pack the quantized bits.
39
+ This is only applicable when the quantize_bits == 4.
40
+ """
41
+
42
+ def __init__(
43
+ self,
44
+ tensor_name: str,
45
+ tensor_value: Optional[np.ndarray] = None,
46
+ target_name: Optional[str] = None,
47
+ quantize_axis: Optional[List[int]] = None,
48
+ quantize_bits: Optional[int] = None,
49
+ pack_dim: Optional[int] = 0,
50
+ ):
51
+ """Initializes the model attributes."""
52
+ self.tensor_name = tensor_name
53
+ self.tensor_value = tensor_value
54
+ self.target_name = target_name
55
+ self.quantize_axis = quantize_axis
56
+ self.quantize_bits = quantize_bits
57
+ self.pack_dim = pack_dim
58
+
59
+ def __str__(self) -> str:
60
+ output_string = "QuantizationAction(\n"
61
+ output_string += f" tensor_name: {self.tensor_name}\n"
62
+ output_string += f" target_name: {self.target_name}\n"
63
+ output_string += f" quantize_axis: {self.quantize_axis}\n"
64
+ output_string += f" quantize_bits: {self.quantize_bits}\n"
65
+ output_string += f" pack_dim: {self.pack_dim}\n"
66
+ if self.tensor_value is not None:
67
+ output_string += f" tensor_value: {self.tensor_value.shape}\n"
68
+ output_string += ")\n"
69
+ return output_string
70
+
71
+
72
+ class CkptLoaderBase:
73
+ """Base class for loading the checkpoint.
74
+
75
+ This class is responsible for loading the checkpoint files into the layer
76
+ weight tensors (as numpy arrays) + quantization setting information (8/4
77
+ bits). The returned data should be a list of QuantizationAction that describes
78
+ how to quantize each layer weights.
79
+ """
80
+
81
+ def __init__(
82
+ self,
83
+ ckpt_path: str,
84
+ is_symmetric: bool,
85
+ attention_quant_bits: int,
86
+ feedforward_quant_bits: int,
87
+ embedding_quant_bits: int,
88
+ ):
89
+ """Initializes the loader.
90
+
91
+ Args:
92
+ ckpt_path: The filepath to the checkpoint.
93
+ is_symmetric: Whether to apply symmetric or asymmetric quantization.
94
+ attention_quant_bits: An integer that specify the target quantization bits
95
+ (support 8 or 4) for the attention layers.
96
+ feedforward_quant_bits: An integer that specify the target quantization
97
+ bits (support 8 or 4) for the feedforward layers in each Transformer
98
+ blocks.
99
+ embedding_quant_bits: An integer that specify the target quantization bits
100
+ (support 8 or 4) for the embedding (and the final projection) layers.
101
+ """
102
+ self._ckpt_path = ckpt_path
103
+ self._is_symmetric = is_symmetric
104
+ self._attention_quant_bits = attention_quant_bits
105
+ self._feedforward_quant_bits = feedforward_quant_bits
106
+ self._embedding_quant_bits = embedding_quant_bits
107
+
108
+ def load_to_actions(self) -> List[Optional[QuantizationAction]]:
109
+ """Loads the checkpoint and returns the quantization actions."""
110
+ raise NotImplementedError("The load_to_actions method is not implemented.")
111
+
112
+
113
+ class LayerActionMapperBase:
114
+ """Base class for mapping the layer weights to quantization actions.
115
+
116
+ This class is responsible for mapping from each layer to its corresponding
117
+ quantization information (e.g. target quantization bits / updated tensor
118
+ name...).
119
+ """
120
+
121
+ def __init__(
122
+ self,
123
+ is_symmetric: bool,
124
+ attention_quant_bits: int,
125
+ feedforward_quant_bits: int,
126
+ embedding_quant_bits: int,
127
+ backend: str,
128
+ ):
129
+ self._is_symmetric = is_symmetric
130
+ self._attention_quant_bits = attention_quant_bits
131
+ self._feedforward_quant_bits = feedforward_quant_bits
132
+ self._embedding_quant_bits = embedding_quant_bits
133
+ self._backend = backend
134
+
135
+ def map_to_actions(
136
+ self, layer_name: str
137
+ ) -> Optional[List[QuantizationAction]]:
138
+ """Maps the layer weights to quantization actions.
139
+
140
+ Args:
141
+ layer_name: A string representing the name of the layer weight. Note that
142
+ it is expected the layer information is contained in the name which is
143
+ enough to determine the target quantization information. Any child class
144
+ is expected to implement this function.
145
+ """
146
+ raise NotImplementedError("The map_to_actions method is not implemented.")
147
+
148
+
149
+ class ModelWriterBase:
150
+ """Base class for writing the quantized model.
151
+
152
+ This class is responsible for taking a dictionary of the quantized
153
+ tensors/names and writing them into the format that can be loaded by the
154
+ on-device inference engine.
155
+ """
156
+
157
+ def __init__(self, output_dir: str, backend: str):
158
+ """Initializes the class.
159
+
160
+ Args:
161
+ output_dir: A string that represents the output directory to write the
162
+ resulting file(s).
163
+ backend: A string that represents the target backend to run the output
164
+ file(s).
165
+ """
166
+ self._output_dir = output_dir
167
+ if not os.path.exists(self._output_dir):
168
+ os.mkdir(self._output_dir)
169
+ self._backend = backend
170
+
171
+ def write_variables(self, variables: Dict[str, Tuple[np.ndarray, bool]]):
172
+ raise NotImplementedError("The write_variables method is not implemented.")
@@ -0,0 +1,79 @@
1
+ # Copyright 2024 The MediaPipe Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Utility library that helps create the converter instances."""
16
+ from mediapipe.tasks.python.genai.converter import converter_base
17
+ from mediapipe.tasks.python.genai.converter import pytorch_converter
18
+ from mediapipe.tasks.python.genai.converter import safetensors_converter
19
+ from mediapipe.tasks.python.genai.converter import weight_bins_writer
20
+
21
+
22
+ def create_ckpt_loader(
23
+ ckpt_format: str, *args, **kwargs
24
+ ) -> converter_base.CkptLoaderBase:
25
+ """Creates the checkpoint loader.
26
+
27
+ Args:
28
+ ckpt_format: A string that indicates which input checkpoint format is.
29
+ *args: Additional arguments to be passed into the loader.
30
+ **kwargs: Additional arguments to be passed into the loader.
31
+
32
+ Returns:
33
+ A created CkptLoader instance.
34
+ """
35
+ del args
36
+ if ckpt_format == "pytorch":
37
+ return pytorch_converter.PytorchCkptLoader(
38
+ ckpt_path=kwargs["ckpt_path"],
39
+ is_symmetric=kwargs["is_symmetric"],
40
+ attention_quant_bits=kwargs["attention_quant_bits"],
41
+ feedforward_quant_bits=kwargs["feedforward_quant_bits"],
42
+ embedding_quant_bits=kwargs["embedding_quant_bits"],
43
+ special_model=kwargs["special_model"],
44
+ backend=kwargs["backend"],
45
+ )
46
+ elif ckpt_format == "safetensors":
47
+ return safetensors_converter.SafetensorsCkptLoader(
48
+ ckpt_path=kwargs["ckpt_path"],
49
+ is_symmetric=kwargs["is_symmetric"],
50
+ attention_quant_bits=kwargs["attention_quant_bits"],
51
+ feedforward_quant_bits=kwargs["feedforward_quant_bits"],
52
+ embedding_quant_bits=kwargs["embedding_quant_bits"],
53
+ special_model=kwargs["special_model"],
54
+ backend=kwargs["backend"],
55
+ )
56
+ else:
57
+ raise ValueError(f"Unknown checkpoint format: {ckpt_format}")
58
+
59
+
60
+ def create_writer(
61
+ writer_type: str, *args, **kwargs
62
+ ) -> converter_base.ModelWriterBase:
63
+ """Creates the model writer.
64
+
65
+ Args:
66
+ writer_type: A string the indicates which model writer to create.
67
+ *args: Additional arguments to be passed into the loader.
68
+ **kwargs: Additional arguments to be passed into the loader.
69
+
70
+ Returns:
71
+ A created ModelWriter instance.
72
+ """
73
+ del args
74
+ if writer_type == "weight_bins":
75
+ return weight_bins_writer.WeightBinsWriter(
76
+ output_dir=kwargs["output_dir"], backend=kwargs["backend"]
77
+ )
78
+ else:
79
+ raise ValueError(f"Unknown writer type: {writer_type}")
@@ -0,0 +1,213 @@
1
+ """Functions to perform the checkpoint conversion."""
2
+
3
+ import os
4
+ from typing import List, Optional
5
+
6
+ from absl import logging
7
+
8
+ from mediapipe.tasks.python.genai.converter import converter_base
9
+ from mediapipe.tasks.python.genai.converter import converter_factory
10
+ from mediapipe.tasks.python.genai.converter import model_ckpt_util
11
+ from mediapipe.tasks.python.genai.converter import quantization_util
12
+
13
+
14
+ class ConversionConfig(object):
15
+ """Config for checkpoint conversion.
16
+
17
+ Attributes:
18
+ input_ckpt: Directory or path for the input checkpoint.
19
+ ckpt_format: Checkpoint format, e.g. 'safetensors', 'pytorch'.
20
+ model_type: Name of the model, e.g. GEMMA_2B.
21
+ backend: Target backend to run the model. Can be either "cpu" or "gpu".
22
+ output_dir: Where the output file(s) to be stored.
23
+ is_symmetric: Whether to quantize symmetrically.
24
+ attention_quant_bits: Target quantization bits for the attention layers.
25
+ feedforward_quant_bits: Target quantization bits for the feedforward layers.
26
+ embedding_quant_bits: Target quantization bits for the embedding layers.
27
+ combine_file_only: Whether to combine the weight files only (assuming the
28
+ weight files are already existed).
29
+ vocab_model_file: The file path to the 1) SentencePiece vocab model; 2)
30
+ Hugging Face BPE tokenizer files; 1) is applicable for the Gemma model and
31
+ 2) is applicable for other models. When 2) is used, the provided path is
32
+ expected to point to a directory that contains both tokenizer.json and
33
+ tokenizer_config.json files.
34
+ output_tflite_file: (optional) the output tflite filename. If not provided,
35
+ the output will be `model.tflite` stored in the output_dir.
36
+ """
37
+
38
+ def __init__(
39
+ self,
40
+ input_ckpt: str,
41
+ ckpt_format: str,
42
+ model_type: str,
43
+ backend: str,
44
+ output_dir: str,
45
+ is_symmetric: bool = True,
46
+ attention_quant_bits: int = 8,
47
+ feedforward_quant_bits: int = 8,
48
+ embedding_quant_bits: int = 8,
49
+ combine_file_only: bool = False,
50
+ vocab_model_file: str = '',
51
+ output_tflite_file: Optional[str] = None,
52
+ ):
53
+ self.input_ckpt = input_ckpt
54
+ self.ckpt_format = ckpt_format
55
+ self.model_type = model_type
56
+ self.backend = backend
57
+ if os.path.isfile(output_dir):
58
+ raise ValueError('Output directory mush not point to an existing file.')
59
+ if not os.path.isdir(output_dir):
60
+ logging.info('Creating output directory: %s', output_dir)
61
+ os.makedirs(output_dir, exist_ok=True)
62
+ self.output_dir = output_dir
63
+ self.is_symmetric = is_symmetric
64
+ self.attention_quant_bits = attention_quant_bits
65
+ self.feedforward_quant_bits = feedforward_quant_bits
66
+ self.embedding_quant_bits = embedding_quant_bits
67
+ self.combine_file_only = combine_file_only
68
+ self.vocab_model_file = vocab_model_file
69
+ if output_tflite_file:
70
+ parent_dir = os.path.dirname(output_tflite_file)
71
+ if not os.path.isdir(parent_dir):
72
+ logging.info('Creating tflite parent directory: %s', parent_dir)
73
+ os.makedirs(parent_dir, exist_ok=True)
74
+ self.output_tflite_file = output_tflite_file
75
+ else:
76
+ self.output_tflite_file = os.path.join(output_dir, 'model.tflite')
77
+
78
+
79
+ def quantize_by_actions(
80
+ actions: List[converter_base.QuantizationAction],
81
+ backend: str,
82
+ is_symmetric: bool,
83
+ ):
84
+ """Quantizes the weights by actions.
85
+
86
+ Args:
87
+ actions: A list of QuantizationAction that contains the information and
88
+ tensor values to be quantized.
89
+ backend: Target backend to run the model. Can be either "cpu" or "gpu".
90
+ is_symmetric: Whether to quantize symmetrically.
91
+
92
+ Returns:
93
+ A dictionary that maps from the updated tensor names to the quantized
94
+ tensor values + a boolean that indicates whether the tensor values need to
95
+ be packed (only applicable for the 4-bit quantized weights).
96
+ """
97
+ output_tensors = {}
98
+ for action in actions:
99
+ if action.quantize_axis:
100
+ pack = action.quantize_bits == 4
101
+ if is_symmetric:
102
+ target_var, scale = quantization_util.quantize_tensor(
103
+ var=action.tensor_value,
104
+ axis=action.quantize_axis,
105
+ sym=is_symmetric,
106
+ number_bits=action.quantize_bits,
107
+ )
108
+ output_tensors[action.target_name] = (target_var, pack)
109
+ output_tensors[action.target_name + '_quantized_scale'] = (scale, False)
110
+ else:
111
+ target_var, scale, zp = quantization_util.quantize_tensor(
112
+ var=action.tensor_value,
113
+ axis=action.quantize_axis,
114
+ sym=is_symmetric,
115
+ number_bits=action.quantize_bits,
116
+ )
117
+ if backend == 'cpu' and (action.quantize_bits == 4):
118
+ target_var, scale, zp = quantization_util.update_to_uint4(
119
+ target_var, scale, zp
120
+ )
121
+ output_tensors[action.target_name] = (target_var, pack)
122
+ output_tensors[action.target_name + '_quantized_scale'] = (scale, False)
123
+ output_tensors[action.target_name + '_quantized_zp'] = (zp, False)
124
+ else:
125
+ output_tensors[action.target_name] = (action.tensor_value, False)
126
+ return output_tensors
127
+
128
+
129
+ def combined_weight_bins_to_tflite(
130
+ model_type: str,
131
+ backend: str,
132
+ weight_path: str,
133
+ output_tflite_file: str,
134
+ vocab_model_file: str,
135
+ ):
136
+ """Combines weight files to tflite file."""
137
+ # TODO: Figure out whether to clean up the weight files after this.
138
+ if backend == 'cpu':
139
+ model_ckpt_util.GenerateCpuTfLite(
140
+ model_type,
141
+ weight_path,
142
+ vocab_model_file,
143
+ True,
144
+ output_tflite_file,
145
+ )
146
+ elif backend == 'gpu':
147
+ model_ckpt_util.GenerateGpuTfLite(
148
+ model_type,
149
+ weight_path,
150
+ vocab_model_file,
151
+ True,
152
+ output_tflite_file,
153
+ )
154
+ else:
155
+ raise ValueError('Unsupported backend: %s' % backend)
156
+
157
+
158
+ def convert_bpe_vocab(vocab_model_file: str, output_dir: str) -> str:
159
+ if not os.path.isdir(vocab_model_file):
160
+ raise ValueError(
161
+ 'The input BPE vocab model file path is expected to be a directory that'
162
+ ' conatins both tokenizer.json and tokenizer_config.json files.'
163
+ )
164
+ output_vocab_file = os.path.join(output_dir, 'spm.model')
165
+ model_ckpt_util.ConvertHfTokenizer(vocab_model_file, output_vocab_file)
166
+ return output_vocab_file
167
+
168
+
169
+ def convert_checkpoint(config: ConversionConfig) -> None:
170
+ """Converts the checkpoint to tflite file."""
171
+ logging.info('input folder: %s', config.input_ckpt)
172
+
173
+ if config.model_type == 'GEMMA_2B':
174
+ vocab_model_path = config.vocab_model_file
175
+ else:
176
+ vocab_model_path = convert_bpe_vocab(
177
+ config.vocab_model_file, config.output_dir
178
+ )
179
+
180
+ if not config.combine_file_only:
181
+ # Load the layer weights and prepare the quantization configurations.
182
+ loader = converter_factory.create_ckpt_loader(
183
+ config.ckpt_format,
184
+ ckpt_path=config.input_ckpt,
185
+ is_symmetric=config.is_symmetric,
186
+ backend=config.backend,
187
+ attention_quant_bits=config.attention_quant_bits,
188
+ feedforward_quant_bits=config.feedforward_quant_bits,
189
+ embedding_quant_bits=config.embedding_quant_bits,
190
+ special_model=config.model_type,
191
+ )
192
+ actions = loader.load_to_actions()
193
+
194
+ # Quantize the weights.
195
+ quantized_tensors = quantize_by_actions(
196
+ actions, config.backend, config.is_symmetric
197
+ )
198
+
199
+ # Write the quantized tensors into file(s).
200
+ writer = converter_factory.create_writer(
201
+ writer_type='weight_bins',
202
+ output_dir=config.output_dir,
203
+ backend=config.backend,
204
+ )
205
+ writer.write_variables(quantized_tensors)
206
+
207
+ combined_weight_bins_to_tflite(
208
+ config.model_type,
209
+ config.backend,
210
+ weight_path=config.output_dir,
211
+ output_tflite_file=config.output_tflite_file,
212
+ vocab_model_file=vocab_model_path,
213
+ )