mediapipe-nightly 0.10.10.post20240216__cp310-cp310-macosx_11_0_universal2.whl → 0.10.10.post20240220__cp310-cp310-macosx_11_0_universal2.whl
Sign up to get free protection for your applications and to get access to all the features.
- mediapipe/__init__.py +1 -1
- mediapipe/python/_framework_bindings.cpython-310-darwin.so +0 -0
- mediapipe/tasks/python/__init__.py +1 -0
- mediapipe/tasks/python/genai/__init__.py +14 -0
- mediapipe/tasks/python/genai/converter/__init__.py +24 -0
- mediapipe/tasks/python/genai/converter/converter_base.py +172 -0
- mediapipe/tasks/python/genai/converter/converter_factory.py +79 -0
- mediapipe/tasks/python/genai/converter/llm_converter.py +213 -0
- mediapipe/tasks/python/genai/converter/pytorch_converter.py +315 -0
- mediapipe/tasks/python/genai/converter/pytorch_converter_test.py +86 -0
- mediapipe/tasks/python/genai/converter/quantization_util.py +516 -0
- mediapipe/tasks/python/genai/converter/quantization_util_test.py +259 -0
- mediapipe/tasks/python/genai/converter/safetensors_converter.py +521 -0
- mediapipe/tasks/python/genai/converter/safetensors_converter_test.py +83 -0
- mediapipe/tasks/python/genai/converter/weight_bins_writer.py +111 -0
- mediapipe/tasks/python/genai/converter/weight_bins_writer_test.py +62 -0
- mediapipe/version.txt +1 -1
- {mediapipe_nightly-0.10.10.post20240216.dist-info → mediapipe_nightly-0.10.10.post20240220.dist-info}/METADATA +1 -1
- {mediapipe_nightly-0.10.10.post20240216.dist-info → mediapipe_nightly-0.10.10.post20240220.dist-info}/RECORD +21 -8
- {mediapipe_nightly-0.10.10.post20240216.dist-info → mediapipe_nightly-0.10.10.post20240220.dist-info}/LICENSE +0 -0
- {mediapipe_nightly-0.10.10.post20240216.dist-info → mediapipe_nightly-0.10.10.post20240220.dist-info}/WHEEL +0 -0
- {mediapipe_nightly-0.10.10.post20240216.dist-info → mediapipe_nightly-0.10.10.post20240220.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,259 @@
|
|
1
|
+
# Copyright 2024 The MediaPipe Authors.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
"""Tests for quantization_util."""
|
16
|
+
|
17
|
+
from absl.testing import absltest
|
18
|
+
import jax
|
19
|
+
from jax import numpy as jnp
|
20
|
+
import numpy as np
|
21
|
+
|
22
|
+
from mediapipe.tasks.python.genai.converter import quantization_util
|
23
|
+
|
24
|
+
|
25
|
+
_dtype = lambda x: getattr(x, 'dtype', None) or np.asarray(x).dtype
|
26
|
+
|
27
|
+
|
28
|
+
class TestCase(absltest.TestCase):
|
29
|
+
|
30
|
+
def assertAllClose(
|
31
|
+
self, x, y, check_dtypes=True, rtol=1e-5, atol=1e-5, **kwargs
|
32
|
+
):
|
33
|
+
"""Wrapper for np.testing.assert_allclose()."""
|
34
|
+
x = np.asarray(x)
|
35
|
+
y = np.asarray(y)
|
36
|
+
if check_dtypes:
|
37
|
+
self.assertDtypesMatch(x, y)
|
38
|
+
x = x.astype(np.float32) if x.dtype == jnp.bfloat16 else x
|
39
|
+
y = y.astype(np.float32) if y.dtype == jnp.bfloat16 else y
|
40
|
+
np.testing.assert_allclose(x, y, rtol=rtol, atol=atol, **kwargs)
|
41
|
+
|
42
|
+
def assertDtypesMatch(self, x, y):
|
43
|
+
self.assertEqual(
|
44
|
+
jax.dtypes.canonicalize_dtype(_dtype(x)),
|
45
|
+
jax.dtypes.canonicalize_dtype(_dtype(y)),
|
46
|
+
)
|
47
|
+
|
48
|
+
|
49
|
+
class Quantize8BTest(TestCase):
|
50
|
+
|
51
|
+
def test_quantize_symmetric(self):
|
52
|
+
inputs = np.array([[1.2, 3.1, 5.5, 2.9], [0.2, -1.5, 3.3, 4.0]])
|
53
|
+
qx, scale = quantization_util.quantize_tensor(inputs, axis=[1])
|
54
|
+
|
55
|
+
self.assertAllClose(
|
56
|
+
qx, np.array([[28, 72, 127, 67], [6, -48, 105, 127]], dtype=np.int8)
|
57
|
+
)
|
58
|
+
self.assertAllClose(
|
59
|
+
scale, np.array([0.04330709, 0.03149606], dtype=np.float32)
|
60
|
+
)
|
61
|
+
|
62
|
+
def test_quantize_symmetric_with_dimension_size_one_unquantized(self):
|
63
|
+
# inputs shape: (2, 1, 4), quantization axis 2.
|
64
|
+
inputs = np.array([[[1.2, 3.1, 5.5, 2.9]], [[0.2, -1.5, 3.3, 4.0]]])
|
65
|
+
qx, scale = quantization_util.quantize_tensor(inputs, axis=[2])
|
66
|
+
|
67
|
+
self.assertAllClose(
|
68
|
+
qx, np.array([[[28, 72, 127, 67]], [[6, -48, 105, 127]]], dtype=np.int8)
|
69
|
+
)
|
70
|
+
# expected scale shape: (2, 1)
|
71
|
+
self.assertAllClose(
|
72
|
+
scale, np.array([[0.04330709], [0.03149606]], dtype=np.float32)
|
73
|
+
)
|
74
|
+
|
75
|
+
def test_quantize_asymmetric(self):
|
76
|
+
inputs = np.array([[1.2, 3.1, 5.5, 2.9], [0.2, -1.5, 3.3, 4.0]])
|
77
|
+
qx, scale, zp = quantization_util.quantize_tensor(
|
78
|
+
inputs, axis=[1], sym=False
|
79
|
+
)
|
80
|
+
|
81
|
+
self.assertAllClose(
|
82
|
+
qx,
|
83
|
+
np.array([[-128, -15, 127, -27], [-49, -128, 95, 127]], dtype=np.int8),
|
84
|
+
)
|
85
|
+
self.assertAllClose(scale, np.array([0.016863, 0.021569], dtype=np.float32))
|
86
|
+
self.assertAllClose(zp, np.array([-3.358431, -1.260784], dtype=np.float32))
|
87
|
+
|
88
|
+
|
89
|
+
class Quantize8BFPTest(TestCase):
|
90
|
+
|
91
|
+
def test_quantize_symmetric(self):
|
92
|
+
inputs = np.array([[1.0, 2.0, 5.5, 2.9], [0.02, -0.01, 3.3, 4.0]])
|
93
|
+
qx, scale = quantization_util.quantize_tensor(inputs, axis=[1], use_fp=True)
|
94
|
+
|
95
|
+
self.assertAllClose(
|
96
|
+
qx,
|
97
|
+
np.array([[106, 114, 126, 119], [65, -71, 124, 126]], dtype=np.int8),
|
98
|
+
)
|
99
|
+
self.assertAllClose(
|
100
|
+
scale, np.array([0.01227679, 0.00892857], dtype=np.float32)
|
101
|
+
)
|
102
|
+
|
103
|
+
def test_quantize_symmetric_with_dimension_size_one_unquantized(self):
|
104
|
+
# inputs shape: (2, 1, 4), quantization axis 2.
|
105
|
+
inputs = np.array([[[1.0, 2.0, 5.5, 2.9]], [[0.02, -0.01, 3.3, 4.0]]])
|
106
|
+
qx, scale = quantization_util.quantize_tensor(inputs, axis=[2], use_fp=True)
|
107
|
+
|
108
|
+
self.assertAllClose(
|
109
|
+
qx,
|
110
|
+
np.array(
|
111
|
+
[[[106, 114, 126, 119]], [[65, -71, 124, 126]]], dtype=np.int8
|
112
|
+
),
|
113
|
+
)
|
114
|
+
# expected scale shape: (2, 1)
|
115
|
+
self.assertAllClose(
|
116
|
+
scale, np.array([[0.01227679], [0.00892857]], dtype=np.float32)
|
117
|
+
)
|
118
|
+
|
119
|
+
def test_quantize_asymmetric(self):
|
120
|
+
inputs = np.array([[-1.0, -2.0, -2.01, 2.9], [0.02, -0.15, 3.3, 4.0]])
|
121
|
+
qx, scale, zp = quantization_util.quantize_tensor(
|
122
|
+
inputs, axis=[1], sym=False, use_fp=True
|
123
|
+
)
|
124
|
+
|
125
|
+
self.assertAllClose(
|
126
|
+
qx,
|
127
|
+
np.array([[-8, -2, -2, 126], [-3, -2, 121, 126]], dtype=np.int8),
|
128
|
+
)
|
129
|
+
self.assertAllClose(
|
130
|
+
scale, np.array([0.00547991, 0.0046317], dtype=np.float32)
|
131
|
+
)
|
132
|
+
self.assertAllClose(
|
133
|
+
zp, np.array([-0.4449999, -1.9250002], dtype=np.float32)
|
134
|
+
)
|
135
|
+
|
136
|
+
def test_quantize_add_scale_eps(self):
|
137
|
+
inputs = np.array([[0.0, 0.0, 0.0, 0.0], [-4.0, -4.0, -4.0, -4.0]])
|
138
|
+
_, scale, _ = quantization_util.quantize_tensor(
|
139
|
+
inputs, axis=[1], sym=False, use_fp=True, add_scale_eps=True
|
140
|
+
)
|
141
|
+
self.assertAllClose(
|
142
|
+
scale, np.array([np.finfo(np.float32).eps, np.finfo(np.float32).eps])
|
143
|
+
)
|
144
|
+
_, scale, _ = quantization_util.quantize_tensor(
|
145
|
+
inputs, axis=[1], sym=False, use_fp=True, add_scale_eps=False
|
146
|
+
)
|
147
|
+
self.assertAllClose(scale, np.array([1.0, 1.0]))
|
148
|
+
|
149
|
+
|
150
|
+
class Quantize4BTest(TestCase):
|
151
|
+
|
152
|
+
def test_quantize_symmetric(self):
|
153
|
+
inputs = np.array([[1.2, 3.1, 5.5, 2.9], [0.2, -1.5, 3.3, 4.0]])
|
154
|
+
qx, scale = quantization_util.quantize_tensor(
|
155
|
+
inputs, axis=[1], number_bits=4
|
156
|
+
)
|
157
|
+
self.assertAllClose(
|
158
|
+
qx, np.array([[2, 4, 7, 4], [0, -3, 6, 7]], dtype=np.int8)
|
159
|
+
)
|
160
|
+
self.assertAllClose(
|
161
|
+
scale, np.array([0.78571427, 0.5714286], dtype=np.float32)
|
162
|
+
)
|
163
|
+
|
164
|
+
def test_quantize_symmetric_with_dimension_size_one_unquantized(self):
|
165
|
+
# inputs shape: (2, 1, 4), quantization axis 2.
|
166
|
+
inputs = np.array([[[1.2, 3.1, 5.5, 2.9]], [[0.2, -1.5, 3.3, 4.0]]])
|
167
|
+
qx, scale = quantization_util.quantize_tensor(
|
168
|
+
inputs, axis=[2], number_bits=4
|
169
|
+
)
|
170
|
+
|
171
|
+
self.assertAllClose(
|
172
|
+
qx, np.array([[[2, 4, 7, 4]], [[0, -3, 6, 7]]], dtype=np.int8)
|
173
|
+
)
|
174
|
+
# expected scale shape: (2, 1)
|
175
|
+
self.assertAllClose(
|
176
|
+
scale, np.array([[0.78571427], [0.5714286]], dtype=np.float32)
|
177
|
+
)
|
178
|
+
|
179
|
+
def test_quantize_asymmetric(self):
|
180
|
+
inputs = np.array([[1.2, 3.1, 5.5, 2.9], [0.2, -1.5, 3.3, 4.0]])
|
181
|
+
qx, scale, zp = quantization_util.quantize_tensor(
|
182
|
+
inputs, axis=[1], sym=False, number_bits=4
|
183
|
+
)
|
184
|
+
|
185
|
+
self.assertAllClose(
|
186
|
+
qx,
|
187
|
+
np.array([[-8, -1, 7, -2], [-3, -8, 5, 7]], dtype=np.int8),
|
188
|
+
)
|
189
|
+
self.assertAllClose(
|
190
|
+
scale, np.array([0.2866667, 0.36666667], dtype=np.float32)
|
191
|
+
)
|
192
|
+
self.assertAllClose(
|
193
|
+
zp, np.array([-3.4933336, -1.4333334], dtype=np.float32)
|
194
|
+
)
|
195
|
+
|
196
|
+
|
197
|
+
class QuantizationUtilTest(TestCase):
|
198
|
+
|
199
|
+
def test_update_to_uint4_sym(self):
|
200
|
+
inputs = np.array([[1.2, 3.1, -5.5, 2.9], [0.2, -1.5, 3.3, 4.0]])
|
201
|
+
qx, scale = quantization_util.quantize_tensor(
|
202
|
+
inputs, axis=[1], sym=True, number_bits=4
|
203
|
+
)
|
204
|
+
dequant_from_int4 = qx * np.expand_dims(scale, -1)
|
205
|
+
qx_n, scale_n, zp_n = quantization_util.update_to_uint4(qx, scale)
|
206
|
+
self.assertEmpty(zp_n.shape) # A scalar numpy array.
|
207
|
+
dequant_from_uint4 = np.expand_dims(scale_n, -1) * (qx_n - zp_n)
|
208
|
+
np.testing.assert_allclose(dequant_from_int4, dequant_from_uint4)
|
209
|
+
|
210
|
+
def test_update_to_uint4_sym_combined(self):
|
211
|
+
inputs = np.array(
|
212
|
+
[[-1.2, 3.5, -6.2, 1.7], [1.2, 3.1, -5.5, 2.9], [0.2, -1.5, 3.3, 4.0]]
|
213
|
+
)
|
214
|
+
qx, scale = quantization_util.quantize_tensor(
|
215
|
+
inputs, axis=[1], sym=True, number_bits=4
|
216
|
+
)
|
217
|
+
dequant_from_int4 = qx * np.expand_dims(scale, -1)
|
218
|
+
qx_n, scale_n, zp_n = quantization_util.update_to_uint4(qx, scale)
|
219
|
+
self.assertEqual(zp_n.shape[0], 3)
|
220
|
+
dequant_from_uint4 = np.expand_dims(scale_n, -1) * (
|
221
|
+
qx_n - np.expand_dims(zp_n, -1)
|
222
|
+
)
|
223
|
+
np.testing.assert_allclose(dequant_from_int4, dequant_from_uint4)
|
224
|
+
|
225
|
+
def test_update_to_uint4_asym(self):
|
226
|
+
inputs = np.array([[1.0, 8.0, -3.0, 2.0], [-3.0, 2.0, 1.0, 8.0]])
|
227
|
+
qx, scale, zp = quantization_util.quantize_tensor(
|
228
|
+
inputs, axis=[1], sym=False, number_bits=4
|
229
|
+
)
|
230
|
+
qx_n, scale_n, zp_n = quantization_util.update_to_uint4(qx, scale, zp)
|
231
|
+
expected_dequant = np.array([
|
232
|
+
[0.0, 7.333333, -3.666667, 1.466667],
|
233
|
+
[-3.666667, 1.466667, 0.0, 7.333333],
|
234
|
+
])
|
235
|
+
dequant_from_uint4 = np.expand_dims(scale_n, -1) * (qx_n - zp_n)
|
236
|
+
np.testing.assert_allclose(dequant_from_uint4, expected_dequant, rtol=1e-05)
|
237
|
+
|
238
|
+
def test_update_to_uint4_asym_combined(self):
|
239
|
+
inputs = np.array(
|
240
|
+
[[1.0, 8.0, -3.0, 2.0], [-3.0, 2.0, 1.0, 8.0], [2.0, 1.0, 8.0, -3.0]]
|
241
|
+
)
|
242
|
+
qx, scale, zp = quantization_util.quantize_tensor(
|
243
|
+
inputs, axis=[1], sym=False, number_bits=4
|
244
|
+
)
|
245
|
+
qx_n, scale_n, zp_n = quantization_util.update_to_uint4(qx, scale, zp)
|
246
|
+
self.assertEqual(zp_n.shape[0], 3)
|
247
|
+
expected_dequant = np.array([
|
248
|
+
[0.0, 7.333333, -3.666667, 1.466667],
|
249
|
+
[-3.666667, 1.466667, 0.0, 7.333333],
|
250
|
+
[1.466667, 0.0, 7.333333, -3.666667],
|
251
|
+
])
|
252
|
+
dequant_from_uint4 = np.expand_dims(scale_n, -1) * (
|
253
|
+
qx_n - np.expand_dims(zp_n, -1)
|
254
|
+
)
|
255
|
+
np.testing.assert_allclose(dequant_from_uint4, expected_dequant, rtol=1e-05)
|
256
|
+
|
257
|
+
|
258
|
+
if __name__ == '__main__':
|
259
|
+
absltest.main()
|