mediapipe-nightly 0.10.10.post20240216__cp310-cp310-macosx_11_0_x86_64.whl → 0.10.10.post20240220__cp310-cp310-macosx_11_0_x86_64.whl
Sign up to get free protection for your applications and to get access to all the features.
- mediapipe/__init__.py +1 -1
- mediapipe/python/_framework_bindings.cpython-310-darwin.so +0 -0
- mediapipe/tasks/python/__init__.py +1 -0
- mediapipe/tasks/python/genai/__init__.py +14 -0
- mediapipe/tasks/python/genai/converter/__init__.py +24 -0
- mediapipe/tasks/python/genai/converter/converter_base.py +172 -0
- mediapipe/tasks/python/genai/converter/converter_factory.py +79 -0
- mediapipe/tasks/python/genai/converter/llm_converter.py +213 -0
- mediapipe/tasks/python/genai/converter/pytorch_converter.py +315 -0
- mediapipe/tasks/python/genai/converter/pytorch_converter_test.py +86 -0
- mediapipe/tasks/python/genai/converter/quantization_util.py +516 -0
- mediapipe/tasks/python/genai/converter/quantization_util_test.py +259 -0
- mediapipe/tasks/python/genai/converter/safetensors_converter.py +521 -0
- mediapipe/tasks/python/genai/converter/safetensors_converter_test.py +83 -0
- mediapipe/tasks/python/genai/converter/weight_bins_writer.py +111 -0
- mediapipe/tasks/python/genai/converter/weight_bins_writer_test.py +62 -0
- {mediapipe_nightly-0.10.10.post20240216.dist-info → mediapipe_nightly-0.10.10.post20240220.dist-info}/METADATA +1 -1
- {mediapipe_nightly-0.10.10.post20240216.dist-info → mediapipe_nightly-0.10.10.post20240220.dist-info}/RECORD +21 -8
- {mediapipe_nightly-0.10.10.post20240216.dist-info → mediapipe_nightly-0.10.10.post20240220.dist-info}/LICENSE +0 -0
- {mediapipe_nightly-0.10.10.post20240216.dist-info → mediapipe_nightly-0.10.10.post20240220.dist-info}/WHEEL +0 -0
- {mediapipe_nightly-0.10.10.post20240216.dist-info → mediapipe_nightly-0.10.10.post20240220.dist-info}/top_level.txt +0 -0
mediapipe/__init__.py
CHANGED
Binary file
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# Copyright 2024 The MediaPipe Authors.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
"""MediaPipe GenAI Python API."""
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# Copyright 2022 The MediaPipe Authors.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
"""MediaPipe Tasks GenAI Converter API."""
|
16
|
+
|
17
|
+
import mediapipe.tasks.python.genai.converter.llm_converter
|
18
|
+
|
19
|
+
ConversionConfig = llm_converter.ConversionConfig
|
20
|
+
convert_checkpoint = llm_converter.convert_checkpoint
|
21
|
+
|
22
|
+
# Remove unnecessary modules to avoid duplication in API docs.
|
23
|
+
del mediapipe
|
24
|
+
del llm_converter
|
@@ -0,0 +1,172 @@
|
|
1
|
+
# Copyright 2024 The MediaPipe Authors.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
"""Defines a couple base classes for the conversion/quantization process."""
|
16
|
+
|
17
|
+
import os
|
18
|
+
from typing import Dict, List, Optional, Tuple
|
19
|
+
import numpy as np
|
20
|
+
|
21
|
+
|
22
|
+
class QuantizationAction:
|
23
|
+
"""Container of the tensor values and its corresponding quantization settings.
|
24
|
+
|
25
|
+
The contrainer is responsible for hosting all of the information that is
|
26
|
+
required to execute the weight-only quantization.
|
27
|
+
|
28
|
+
Attributes:
|
29
|
+
tensor_name: A string that represents the input tensor name.
|
30
|
+
tensor_value: A numpy array that contains the unquantized tensor values.
|
31
|
+
target_name: A string that represents the updated tensor name.
|
32
|
+
quantize_axis: A list of integers representing the dimensions to be
|
33
|
+
quantized along. For example, if an input tensor has shape [128, 256] and
|
34
|
+
the quantize_axis==[0], it means the quantization happens along the 0-th
|
35
|
+
dimension, resulting in [256] scaling factors.
|
36
|
+
quantize_bits: An integer that specifies the target quantization bits. It
|
37
|
+
currently only supports either 8 or 4 bits.
|
38
|
+
pack_dim: An integer specifying which dimension to pack the quantized bits.
|
39
|
+
This is only applicable when the quantize_bits == 4.
|
40
|
+
"""
|
41
|
+
|
42
|
+
def __init__(
|
43
|
+
self,
|
44
|
+
tensor_name: str,
|
45
|
+
tensor_value: Optional[np.ndarray] = None,
|
46
|
+
target_name: Optional[str] = None,
|
47
|
+
quantize_axis: Optional[List[int]] = None,
|
48
|
+
quantize_bits: Optional[int] = None,
|
49
|
+
pack_dim: Optional[int] = 0,
|
50
|
+
):
|
51
|
+
"""Initializes the model attributes."""
|
52
|
+
self.tensor_name = tensor_name
|
53
|
+
self.tensor_value = tensor_value
|
54
|
+
self.target_name = target_name
|
55
|
+
self.quantize_axis = quantize_axis
|
56
|
+
self.quantize_bits = quantize_bits
|
57
|
+
self.pack_dim = pack_dim
|
58
|
+
|
59
|
+
def __str__(self) -> str:
|
60
|
+
output_string = "QuantizationAction(\n"
|
61
|
+
output_string += f" tensor_name: {self.tensor_name}\n"
|
62
|
+
output_string += f" target_name: {self.target_name}\n"
|
63
|
+
output_string += f" quantize_axis: {self.quantize_axis}\n"
|
64
|
+
output_string += f" quantize_bits: {self.quantize_bits}\n"
|
65
|
+
output_string += f" pack_dim: {self.pack_dim}\n"
|
66
|
+
if self.tensor_value is not None:
|
67
|
+
output_string += f" tensor_value: {self.tensor_value.shape}\n"
|
68
|
+
output_string += ")\n"
|
69
|
+
return output_string
|
70
|
+
|
71
|
+
|
72
|
+
class CkptLoaderBase:
|
73
|
+
"""Base class for loading the checkpoint.
|
74
|
+
|
75
|
+
This class is responsible for loading the checkpoint files into the layer
|
76
|
+
weight tensors (as numpy arrays) + quantization setting information (8/4
|
77
|
+
bits). The returned data should be a list of QuantizationAction that describes
|
78
|
+
how to quantize each layer weights.
|
79
|
+
"""
|
80
|
+
|
81
|
+
def __init__(
|
82
|
+
self,
|
83
|
+
ckpt_path: str,
|
84
|
+
is_symmetric: bool,
|
85
|
+
attention_quant_bits: int,
|
86
|
+
feedforward_quant_bits: int,
|
87
|
+
embedding_quant_bits: int,
|
88
|
+
):
|
89
|
+
"""Initializes the loader.
|
90
|
+
|
91
|
+
Args:
|
92
|
+
ckpt_path: The filepath to the checkpoint.
|
93
|
+
is_symmetric: Whether to apply symmetric or asymmetric quantization.
|
94
|
+
attention_quant_bits: An integer that specify the target quantization bits
|
95
|
+
(support 8 or 4) for the attention layers.
|
96
|
+
feedforward_quant_bits: An integer that specify the target quantization
|
97
|
+
bits (support 8 or 4) for the feedforward layers in each Transformer
|
98
|
+
blocks.
|
99
|
+
embedding_quant_bits: An integer that specify the target quantization bits
|
100
|
+
(support 8 or 4) for the embedding (and the final projection) layers.
|
101
|
+
"""
|
102
|
+
self._ckpt_path = ckpt_path
|
103
|
+
self._is_symmetric = is_symmetric
|
104
|
+
self._attention_quant_bits = attention_quant_bits
|
105
|
+
self._feedforward_quant_bits = feedforward_quant_bits
|
106
|
+
self._embedding_quant_bits = embedding_quant_bits
|
107
|
+
|
108
|
+
def load_to_actions(self) -> List[Optional[QuantizationAction]]:
|
109
|
+
"""Loads the checkpoint and returns the quantization actions."""
|
110
|
+
raise NotImplementedError("The load_to_actions method is not implemented.")
|
111
|
+
|
112
|
+
|
113
|
+
class LayerActionMapperBase:
|
114
|
+
"""Base class for mapping the layer weights to quantization actions.
|
115
|
+
|
116
|
+
This class is responsible for mapping from each layer to its corresponding
|
117
|
+
quantization information (e.g. target quantization bits / updated tensor
|
118
|
+
name...).
|
119
|
+
"""
|
120
|
+
|
121
|
+
def __init__(
|
122
|
+
self,
|
123
|
+
is_symmetric: bool,
|
124
|
+
attention_quant_bits: int,
|
125
|
+
feedforward_quant_bits: int,
|
126
|
+
embedding_quant_bits: int,
|
127
|
+
backend: str,
|
128
|
+
):
|
129
|
+
self._is_symmetric = is_symmetric
|
130
|
+
self._attention_quant_bits = attention_quant_bits
|
131
|
+
self._feedforward_quant_bits = feedforward_quant_bits
|
132
|
+
self._embedding_quant_bits = embedding_quant_bits
|
133
|
+
self._backend = backend
|
134
|
+
|
135
|
+
def map_to_actions(
|
136
|
+
self, layer_name: str
|
137
|
+
) -> Optional[List[QuantizationAction]]:
|
138
|
+
"""Maps the layer weights to quantization actions.
|
139
|
+
|
140
|
+
Args:
|
141
|
+
layer_name: A string representing the name of the layer weight. Note that
|
142
|
+
it is expected the layer information is contained in the name which is
|
143
|
+
enough to determine the target quantization information. Any child class
|
144
|
+
is expected to implement this function.
|
145
|
+
"""
|
146
|
+
raise NotImplementedError("The map_to_actions method is not implemented.")
|
147
|
+
|
148
|
+
|
149
|
+
class ModelWriterBase:
|
150
|
+
"""Base class for writing the quantized model.
|
151
|
+
|
152
|
+
This class is responsible for taking a dictionary of the quantized
|
153
|
+
tensors/names and writing them into the format that can be loaded by the
|
154
|
+
on-device inference engine.
|
155
|
+
"""
|
156
|
+
|
157
|
+
def __init__(self, output_dir: str, backend: str):
|
158
|
+
"""Initializes the class.
|
159
|
+
|
160
|
+
Args:
|
161
|
+
output_dir: A string that represents the output directory to write the
|
162
|
+
resulting file(s).
|
163
|
+
backend: A string that represents the target backend to run the output
|
164
|
+
file(s).
|
165
|
+
"""
|
166
|
+
self._output_dir = output_dir
|
167
|
+
if not os.path.exists(self._output_dir):
|
168
|
+
os.mkdir(self._output_dir)
|
169
|
+
self._backend = backend
|
170
|
+
|
171
|
+
def write_variables(self, variables: Dict[str, Tuple[np.ndarray, bool]]):
|
172
|
+
raise NotImplementedError("The write_variables method is not implemented.")
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# Copyright 2024 The MediaPipe Authors.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
"""Utility library that helps create the converter instances."""
|
16
|
+
from mediapipe.tasks.python.genai.converter import converter_base
|
17
|
+
from mediapipe.tasks.python.genai.converter import pytorch_converter
|
18
|
+
from mediapipe.tasks.python.genai.converter import safetensors_converter
|
19
|
+
from mediapipe.tasks.python.genai.converter import weight_bins_writer
|
20
|
+
|
21
|
+
|
22
|
+
def create_ckpt_loader(
|
23
|
+
ckpt_format: str, *args, **kwargs
|
24
|
+
) -> converter_base.CkptLoaderBase:
|
25
|
+
"""Creates the checkpoint loader.
|
26
|
+
|
27
|
+
Args:
|
28
|
+
ckpt_format: A string that indicates which input checkpoint format is.
|
29
|
+
*args: Additional arguments to be passed into the loader.
|
30
|
+
**kwargs: Additional arguments to be passed into the loader.
|
31
|
+
|
32
|
+
Returns:
|
33
|
+
A created CkptLoader instance.
|
34
|
+
"""
|
35
|
+
del args
|
36
|
+
if ckpt_format == "pytorch":
|
37
|
+
return pytorch_converter.PytorchCkptLoader(
|
38
|
+
ckpt_path=kwargs["ckpt_path"],
|
39
|
+
is_symmetric=kwargs["is_symmetric"],
|
40
|
+
attention_quant_bits=kwargs["attention_quant_bits"],
|
41
|
+
feedforward_quant_bits=kwargs["feedforward_quant_bits"],
|
42
|
+
embedding_quant_bits=kwargs["embedding_quant_bits"],
|
43
|
+
special_model=kwargs["special_model"],
|
44
|
+
backend=kwargs["backend"],
|
45
|
+
)
|
46
|
+
elif ckpt_format == "safetensors":
|
47
|
+
return safetensors_converter.SafetensorsCkptLoader(
|
48
|
+
ckpt_path=kwargs["ckpt_path"],
|
49
|
+
is_symmetric=kwargs["is_symmetric"],
|
50
|
+
attention_quant_bits=kwargs["attention_quant_bits"],
|
51
|
+
feedforward_quant_bits=kwargs["feedforward_quant_bits"],
|
52
|
+
embedding_quant_bits=kwargs["embedding_quant_bits"],
|
53
|
+
special_model=kwargs["special_model"],
|
54
|
+
backend=kwargs["backend"],
|
55
|
+
)
|
56
|
+
else:
|
57
|
+
raise ValueError(f"Unknown checkpoint format: {ckpt_format}")
|
58
|
+
|
59
|
+
|
60
|
+
def create_writer(
|
61
|
+
writer_type: str, *args, **kwargs
|
62
|
+
) -> converter_base.ModelWriterBase:
|
63
|
+
"""Creates the model writer.
|
64
|
+
|
65
|
+
Args:
|
66
|
+
writer_type: A string the indicates which model writer to create.
|
67
|
+
*args: Additional arguments to be passed into the loader.
|
68
|
+
**kwargs: Additional arguments to be passed into the loader.
|
69
|
+
|
70
|
+
Returns:
|
71
|
+
A created ModelWriter instance.
|
72
|
+
"""
|
73
|
+
del args
|
74
|
+
if writer_type == "weight_bins":
|
75
|
+
return weight_bins_writer.WeightBinsWriter(
|
76
|
+
output_dir=kwargs["output_dir"], backend=kwargs["backend"]
|
77
|
+
)
|
78
|
+
else:
|
79
|
+
raise ValueError(f"Unknown writer type: {writer_type}")
|
@@ -0,0 +1,213 @@
|
|
1
|
+
"""Functions to perform the checkpoint conversion."""
|
2
|
+
|
3
|
+
import os
|
4
|
+
from typing import List, Optional
|
5
|
+
|
6
|
+
from absl import logging
|
7
|
+
|
8
|
+
from mediapipe.tasks.python.genai.converter import converter_base
|
9
|
+
from mediapipe.tasks.python.genai.converter import converter_factory
|
10
|
+
from mediapipe.tasks.python.genai.converter import model_ckpt_util
|
11
|
+
from mediapipe.tasks.python.genai.converter import quantization_util
|
12
|
+
|
13
|
+
|
14
|
+
class ConversionConfig(object):
|
15
|
+
"""Config for checkpoint conversion.
|
16
|
+
|
17
|
+
Attributes:
|
18
|
+
input_ckpt: Directory or path for the input checkpoint.
|
19
|
+
ckpt_format: Checkpoint format, e.g. 'safetensors', 'pytorch'.
|
20
|
+
model_type: Name of the model, e.g. GEMMA_2B.
|
21
|
+
backend: Target backend to run the model. Can be either "cpu" or "gpu".
|
22
|
+
output_dir: Where the output file(s) to be stored.
|
23
|
+
is_symmetric: Whether to quantize symmetrically.
|
24
|
+
attention_quant_bits: Target quantization bits for the attention layers.
|
25
|
+
feedforward_quant_bits: Target quantization bits for the feedforward layers.
|
26
|
+
embedding_quant_bits: Target quantization bits for the embedding layers.
|
27
|
+
combine_file_only: Whether to combine the weight files only (assuming the
|
28
|
+
weight files are already existed).
|
29
|
+
vocab_model_file: The file path to the 1) SentencePiece vocab model; 2)
|
30
|
+
Hugging Face BPE tokenizer files; 1) is applicable for the Gemma model and
|
31
|
+
2) is applicable for other models. When 2) is used, the provided path is
|
32
|
+
expected to point to a directory that contains both tokenizer.json and
|
33
|
+
tokenizer_config.json files.
|
34
|
+
output_tflite_file: (optional) the output tflite filename. If not provided,
|
35
|
+
the output will be `model.tflite` stored in the output_dir.
|
36
|
+
"""
|
37
|
+
|
38
|
+
def __init__(
|
39
|
+
self,
|
40
|
+
input_ckpt: str,
|
41
|
+
ckpt_format: str,
|
42
|
+
model_type: str,
|
43
|
+
backend: str,
|
44
|
+
output_dir: str,
|
45
|
+
is_symmetric: bool = True,
|
46
|
+
attention_quant_bits: int = 8,
|
47
|
+
feedforward_quant_bits: int = 8,
|
48
|
+
embedding_quant_bits: int = 8,
|
49
|
+
combine_file_only: bool = False,
|
50
|
+
vocab_model_file: str = '',
|
51
|
+
output_tflite_file: Optional[str] = None,
|
52
|
+
):
|
53
|
+
self.input_ckpt = input_ckpt
|
54
|
+
self.ckpt_format = ckpt_format
|
55
|
+
self.model_type = model_type
|
56
|
+
self.backend = backend
|
57
|
+
if os.path.isfile(output_dir):
|
58
|
+
raise ValueError('Output directory mush not point to an existing file.')
|
59
|
+
if not os.path.isdir(output_dir):
|
60
|
+
logging.info('Creating output directory: %s', output_dir)
|
61
|
+
os.makedirs(output_dir, exist_ok=True)
|
62
|
+
self.output_dir = output_dir
|
63
|
+
self.is_symmetric = is_symmetric
|
64
|
+
self.attention_quant_bits = attention_quant_bits
|
65
|
+
self.feedforward_quant_bits = feedforward_quant_bits
|
66
|
+
self.embedding_quant_bits = embedding_quant_bits
|
67
|
+
self.combine_file_only = combine_file_only
|
68
|
+
self.vocab_model_file = vocab_model_file
|
69
|
+
if output_tflite_file:
|
70
|
+
parent_dir = os.path.dirname(output_tflite_file)
|
71
|
+
if not os.path.isdir(parent_dir):
|
72
|
+
logging.info('Creating tflite parent directory: %s', parent_dir)
|
73
|
+
os.makedirs(parent_dir, exist_ok=True)
|
74
|
+
self.output_tflite_file = output_tflite_file
|
75
|
+
else:
|
76
|
+
self.output_tflite_file = os.path.join(output_dir, 'model.tflite')
|
77
|
+
|
78
|
+
|
79
|
+
def quantize_by_actions(
|
80
|
+
actions: List[converter_base.QuantizationAction],
|
81
|
+
backend: str,
|
82
|
+
is_symmetric: bool,
|
83
|
+
):
|
84
|
+
"""Quantizes the weights by actions.
|
85
|
+
|
86
|
+
Args:
|
87
|
+
actions: A list of QuantizationAction that contains the information and
|
88
|
+
tensor values to be quantized.
|
89
|
+
backend: Target backend to run the model. Can be either "cpu" or "gpu".
|
90
|
+
is_symmetric: Whether to quantize symmetrically.
|
91
|
+
|
92
|
+
Returns:
|
93
|
+
A dictionary that maps from the updated tensor names to the quantized
|
94
|
+
tensor values + a boolean that indicates whether the tensor values need to
|
95
|
+
be packed (only applicable for the 4-bit quantized weights).
|
96
|
+
"""
|
97
|
+
output_tensors = {}
|
98
|
+
for action in actions:
|
99
|
+
if action.quantize_axis:
|
100
|
+
pack = action.quantize_bits == 4
|
101
|
+
if is_symmetric:
|
102
|
+
target_var, scale = quantization_util.quantize_tensor(
|
103
|
+
var=action.tensor_value,
|
104
|
+
axis=action.quantize_axis,
|
105
|
+
sym=is_symmetric,
|
106
|
+
number_bits=action.quantize_bits,
|
107
|
+
)
|
108
|
+
output_tensors[action.target_name] = (target_var, pack)
|
109
|
+
output_tensors[action.target_name + '_quantized_scale'] = (scale, False)
|
110
|
+
else:
|
111
|
+
target_var, scale, zp = quantization_util.quantize_tensor(
|
112
|
+
var=action.tensor_value,
|
113
|
+
axis=action.quantize_axis,
|
114
|
+
sym=is_symmetric,
|
115
|
+
number_bits=action.quantize_bits,
|
116
|
+
)
|
117
|
+
if backend == 'cpu' and (action.quantize_bits == 4):
|
118
|
+
target_var, scale, zp = quantization_util.update_to_uint4(
|
119
|
+
target_var, scale, zp
|
120
|
+
)
|
121
|
+
output_tensors[action.target_name] = (target_var, pack)
|
122
|
+
output_tensors[action.target_name + '_quantized_scale'] = (scale, False)
|
123
|
+
output_tensors[action.target_name + '_quantized_zp'] = (zp, False)
|
124
|
+
else:
|
125
|
+
output_tensors[action.target_name] = (action.tensor_value, False)
|
126
|
+
return output_tensors
|
127
|
+
|
128
|
+
|
129
|
+
def combined_weight_bins_to_tflite(
|
130
|
+
model_type: str,
|
131
|
+
backend: str,
|
132
|
+
weight_path: str,
|
133
|
+
output_tflite_file: str,
|
134
|
+
vocab_model_file: str,
|
135
|
+
):
|
136
|
+
"""Combines weight files to tflite file."""
|
137
|
+
# TODO: Figure out whether to clean up the weight files after this.
|
138
|
+
if backend == 'cpu':
|
139
|
+
model_ckpt_util.GenerateCpuTfLite(
|
140
|
+
model_type,
|
141
|
+
weight_path,
|
142
|
+
vocab_model_file,
|
143
|
+
True,
|
144
|
+
output_tflite_file,
|
145
|
+
)
|
146
|
+
elif backend == 'gpu':
|
147
|
+
model_ckpt_util.GenerateGpuTfLite(
|
148
|
+
model_type,
|
149
|
+
weight_path,
|
150
|
+
vocab_model_file,
|
151
|
+
True,
|
152
|
+
output_tflite_file,
|
153
|
+
)
|
154
|
+
else:
|
155
|
+
raise ValueError('Unsupported backend: %s' % backend)
|
156
|
+
|
157
|
+
|
158
|
+
def convert_bpe_vocab(vocab_model_file: str, output_dir: str) -> str:
|
159
|
+
if not os.path.isdir(vocab_model_file):
|
160
|
+
raise ValueError(
|
161
|
+
'The input BPE vocab model file path is expected to be a directory that'
|
162
|
+
' conatins both tokenizer.json and tokenizer_config.json files.'
|
163
|
+
)
|
164
|
+
output_vocab_file = os.path.join(output_dir, 'spm.model')
|
165
|
+
model_ckpt_util.ConvertHfTokenizer(vocab_model_file, output_vocab_file)
|
166
|
+
return output_vocab_file
|
167
|
+
|
168
|
+
|
169
|
+
def convert_checkpoint(config: ConversionConfig) -> None:
|
170
|
+
"""Converts the checkpoint to tflite file."""
|
171
|
+
logging.info('input folder: %s', config.input_ckpt)
|
172
|
+
|
173
|
+
if config.model_type == 'GEMMA_2B':
|
174
|
+
vocab_model_path = config.vocab_model_file
|
175
|
+
else:
|
176
|
+
vocab_model_path = convert_bpe_vocab(
|
177
|
+
config.vocab_model_file, config.output_dir
|
178
|
+
)
|
179
|
+
|
180
|
+
if not config.combine_file_only:
|
181
|
+
# Load the layer weights and prepare the quantization configurations.
|
182
|
+
loader = converter_factory.create_ckpt_loader(
|
183
|
+
config.ckpt_format,
|
184
|
+
ckpt_path=config.input_ckpt,
|
185
|
+
is_symmetric=config.is_symmetric,
|
186
|
+
backend=config.backend,
|
187
|
+
attention_quant_bits=config.attention_quant_bits,
|
188
|
+
feedforward_quant_bits=config.feedforward_quant_bits,
|
189
|
+
embedding_quant_bits=config.embedding_quant_bits,
|
190
|
+
special_model=config.model_type,
|
191
|
+
)
|
192
|
+
actions = loader.load_to_actions()
|
193
|
+
|
194
|
+
# Quantize the weights.
|
195
|
+
quantized_tensors = quantize_by_actions(
|
196
|
+
actions, config.backend, config.is_symmetric
|
197
|
+
)
|
198
|
+
|
199
|
+
# Write the quantized tensors into file(s).
|
200
|
+
writer = converter_factory.create_writer(
|
201
|
+
writer_type='weight_bins',
|
202
|
+
output_dir=config.output_dir,
|
203
|
+
backend=config.backend,
|
204
|
+
)
|
205
|
+
writer.write_variables(quantized_tensors)
|
206
|
+
|
207
|
+
combined_weight_bins_to_tflite(
|
208
|
+
config.model_type,
|
209
|
+
config.backend,
|
210
|
+
weight_path=config.output_dir,
|
211
|
+
output_tflite_file=config.output_tflite_file,
|
212
|
+
vocab_model_file=vocab_model_path,
|
213
|
+
)
|