ai-edge-quantizer-nightly 0.0.1.dev20250317__py3-none-any.whl → 0.1.0.dev20250318__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,6 +17,7 @@
17
17
 
18
18
  import enum
19
19
  import functools
20
+ from immutabledict import immutabledict
20
21
  from ai_edge_quantizer import algorithm_manager_api
21
22
  from ai_edge_quantizer import default_policy
22
23
  from ai_edge_quantizer import qtyping
@@ -24,6 +25,7 @@ from ai_edge_quantizer.algorithms.nonlinear_quantize import float_casting
24
25
  from ai_edge_quantizer.algorithms.uniform_quantize import common_quantize
25
26
  from ai_edge_quantizer.algorithms.uniform_quantize import dequantized_weight_recovery
26
27
  from ai_edge_quantizer.algorithms.uniform_quantize import naive_min_max_quantize
28
+ from ai_edge_quantizer.algorithms.uniform_quantize import octav
27
29
 
28
30
  # TODO: b/399775701 - Clean up this file.
29
31
 
@@ -55,6 +57,7 @@ class AlgorithmName(str, enum.Enum):
55
57
  MIN_MAX_UNIFORM_QUANT = naive_min_max_quantize.ALGORITHM_KEY
56
58
  FLOAT_CASTING = float_casting.ALGORITHM_KEY
57
59
  DEQUANTIZED_WEIGHT_RECOVERY = dequantized_weight_recovery.ALGORITHM_KEY
60
+ OCTAV = octav.ALGORITHM_KEY
58
61
 
59
62
  ### MIN/MAX_UNIFORM_QUANT ###
60
63
 
@@ -188,3 +191,61 @@ for (
188
191
  dequantized_weight_recovery.get_tensor_quant_params,
189
192
  ),
190
193
  )
194
+
195
+
196
+ # Register OCTAV algorithm.
197
+ register_op_quant_config_validation_func(
198
+ AlgorithmName.OCTAV,
199
+ common_quantize.check_op_quantization_config,
200
+ )
201
+
202
+ # Register a config check policy for OCTAV algorithm.
203
+ register_config_check_policy_func(
204
+ AlgorithmName.OCTAV,
205
+ default_policy.DEFAULT_CONFIG_CHECK_POLICY,
206
+ )
207
+
208
+ _OCTAV_OP_NAME_MATERIALIZE_FUNC_DICT = immutabledict({
209
+ _TFLOpName.INPUT: common_quantize.materialize_input,
210
+ _TFLOpName.OUTPUT: common_quantize.materialize_output,
211
+ _TFLOpName.FULLY_CONNECTED: common_quantize.materialize_fc_conv,
212
+ _TFLOpName.BATCH_MATMUL: common_quantize.materialize_batch_matmul,
213
+ _TFLOpName.CONV_2D: common_quantize.materialize_fc_conv,
214
+ _TFLOpName.DEPTHWISE_CONV_2D: common_quantize.materialize_fc_conv,
215
+ _TFLOpName.CONV_2D_TRANSPOSE: common_quantize.materialize_conv2d_transpose,
216
+ _TFLOpName.RESHAPE: common_quantize.materialize_reshape,
217
+ _TFLOpName.AVERAGE_POOL_2D: common_quantize.materialize_average_pool_2d,
218
+ _TFLOpName.EMBEDDING_LOOKUP: common_quantize.materialize_embedding_lookup,
219
+ _TFLOpName.SOFTMAX: common_quantize.materialize_softmax_and_logistic,
220
+ _TFLOpName.TANH: common_quantize.materialize_tanh,
221
+ _TFLOpName.TRANSPOSE: common_quantize.materialize_transpose,
222
+ _TFLOpName.GELU: common_quantize.materialize_gelu,
223
+ _TFLOpName.ADD: common_quantize.materialize_add,
224
+ _TFLOpName.SUB: common_quantize.materialize_sub,
225
+ _TFLOpName.MUL: common_quantize.materialize_mul,
226
+ _TFLOpName.MEAN: common_quantize.materialize_mean,
227
+ _TFLOpName.RSQRT: common_quantize.materialize_rsqrt,
228
+ _TFLOpName.CONCATENATION: common_quantize.materialize_concatenation,
229
+ _TFLOpName.STRIDED_SLICE: common_quantize.materialize_strided_slice,
230
+ _TFLOpName.SPLIT: common_quantize.materialize_split,
231
+ _TFLOpName.LOGISTIC: common_quantize.materialize_softmax_and_logistic,
232
+ _TFLOpName.SLICE: common_quantize.materialize_slice,
233
+ _TFLOpName.SUM: common_quantize.materialize_sum,
234
+ _TFLOpName.SELECT_V2: common_quantize.materialize_select_v2,
235
+ _TFLOpName.DYNAMIC_UPDATE_SLICE: (
236
+ common_quantize.materialize_dynamic_update_slice
237
+ ),
238
+ _TFLOpName.STABLEHLO_COMPOSITE: common_quantize.materialize_composite,
239
+ })
240
+
241
+ for op_name, materialize_func in _OCTAV_OP_NAME_MATERIALIZE_FUNC_DICT.items():
242
+ register_quantized_op(
243
+ AlgorithmName.OCTAV,
244
+ op_name,
245
+ naive_min_max_quantize.init_qsvs,
246
+ calibration_func=naive_min_max_quantize.min_max_calibrate,
247
+ materialize_func=functools.partial(
248
+ materialize_func,
249
+ octav.get_tensor_quant_params,
250
+ ),
251
+ )
@@ -23,7 +23,7 @@ to implement the get_tensor_quant_params_fn with the
23
23
  qtyping.GetTensorQuantParamsFuncSignature signature.
24
24
  """
25
25
 
26
- from typing import Any
26
+ from typing import Any, Optional, Sequence
27
27
  import numpy as np
28
28
  from ai_edge_quantizer import qtyping
29
29
  from ai_edge_quantizer.algorithms.uniform_quantize import uniform_quantize_tensor
@@ -669,3 +669,153 @@ def materialize_split(
669
669
  constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
670
670
  inputs_to_ignore=[0], # Split dimension does not need to be quantized.
671
671
  )
672
+
673
+
674
+ def _get_tensor_shape_for_blockwise(
675
+ tensor_shape: Sequence[int], quantized_dim: int, block_size: int
676
+ ) -> list[int]:
677
+ """Get the tensor shape for blockwise quantization.
678
+
679
+ This function splits the quantize dimension of the tensor into blocks and the
680
+ dim/blocks. Hence, min/max of the tensor can be calculated for each block
681
+ using existing functions.
682
+
683
+ Args:
684
+ tensor_shape: The original shape of the tensor.
685
+ quantized_dim: The dimension to be quantized blockwise.
686
+ block_size: The size of the block.
687
+
688
+ Returns:
689
+ The new tensor shape for calculating scale and zp for blockwise
690
+ quantization.
691
+ """
692
+ new_shape = []
693
+ for index, val in enumerate(tensor_shape):
694
+ if index == quantized_dim:
695
+ new_shape.append(int(val / block_size))
696
+ new_shape.append(block_size)
697
+ else:
698
+ new_shape.append(val)
699
+ return new_shape
700
+
701
+
702
+ def _reshape_data_for_blockwise(
703
+ tensor_data: np.ndarray, quantized_dim: int, block_size: int
704
+ ) -> tuple[np.ndarray, int]:
705
+ """Reshapes data for blockwise quantization.
706
+
707
+ Args:
708
+ tensor_data: The original tensor data.
709
+ quantized_dim: The dimension to be quantized blockwise.
710
+ block_size: The size of the block.
711
+
712
+ Returns:
713
+ A tuple containing the reshaped tensor data and the new reduce dimension.
714
+ """
715
+ new_shape = _get_tensor_shape_for_blockwise(
716
+ tensor_data.shape, quantized_dim, block_size
717
+ )
718
+ reshaped_data = tensor_data.reshape(new_shape)
719
+ return reshaped_data, quantized_dim + 1
720
+
721
+
722
+ def broadcast_scale_zp_for_blockwise(
723
+ tensor_content: np.ndarray,
724
+ quant_params: qtyping.UniformQuantParams,
725
+ ) -> qtyping.UniformQuantParams:
726
+ """Broadcasts scale and zp for blockwise quantization.
727
+
728
+ Args:
729
+ tensor_content: The original tensor data.
730
+ quant_params: The quantization parameters.
731
+ `quant_params.quantized_dimension` must be specified.
732
+ `quant_params.block_size` must be specified and positive.
733
+
734
+ Returns:
735
+ The updated quantization parameters with broadcasted scale and zp for
736
+ correct constant quantization.
737
+ """
738
+ if quant_params.quantized_dimension is None:
739
+ raise ValueError("Quantized dimension must be specified.")
740
+ if quant_params.block_size is None or quant_params.block_size <= 0:
741
+ raise ValueError("Block size must be specified and positive.")
742
+ quantized_dim = quant_params.quantized_dimension
743
+ expanded_tensor_shape = _get_tensor_shape_for_blockwise(
744
+ tensor_content.shape, quantized_dim, quant_params.block_size
745
+ )
746
+ expanded_scale = np.reshape(
747
+ np.broadcast_to(
748
+ np.expand_dims(quant_params.scale, quantized_dim + 1),
749
+ expanded_tensor_shape,
750
+ ),
751
+ tensor_content.shape,
752
+ )
753
+ expanded_zp = np.reshape(
754
+ np.broadcast_to(
755
+ np.expand_dims(quant_params.zero_point, quantized_dim + 1),
756
+ expanded_tensor_shape,
757
+ ),
758
+ tensor_content.shape,
759
+ )
760
+ return qtyping.UniformQuantParams(
761
+ scale=expanded_scale,
762
+ zero_point=expanded_zp,
763
+ num_bits=quant_params.num_bits,
764
+ symmetric=quant_params.symmetric,
765
+ quantized_dimension=quantized_dim,
766
+ block_size=quant_params.block_size,
767
+ )
768
+
769
+
770
+ def init_tensor_min_max(
771
+ tensor_data: Optional[np.ndarray],
772
+ op_info: qtyping.OpInfo,
773
+ ) -> qtyping.QSV:
774
+ """Initialize the min/max for a tensor.
775
+
776
+ This function initializes the min/max values for a tensor.
777
+
778
+ Args:
779
+ tensor_data: The tensor data.
780
+ op_info: Aggregated information about the op (e.g., quantization config).
781
+
782
+ Returns:
783
+ A dictionary containing the min/max values for the tensor, or an empty
784
+ dictionary if the tensor data is None.
785
+ """
786
+ if tensor_data is None:
787
+ return {}
788
+ else:
789
+ weight_tensor_config = op_info.op_quant_config.weight_tensor_config
790
+ quantized_dim = None
791
+ if weight_tensor_config is not None and (
792
+ weight_tensor_config.granularity == qtyping.QuantGranularity.CHANNELWISE
793
+ or weight_tensor_config.granularity
794
+ == qtyping.QuantGranularity.BLOCKWISE
795
+ ):
796
+ quantized_dim = common_utils.get_weight_quantized_dim(
797
+ op_info, tensor_data
798
+ )
799
+ if (
800
+ weight_tensor_config is not None
801
+ and weight_tensor_config.granularity
802
+ == qtyping.QuantGranularity.BLOCKWISE
803
+ ):
804
+ reshaped_data, reduce_dims = _reshape_data_for_blockwise(
805
+ tensor_data,
806
+ quantized_dim,
807
+ weight_tensor_config.block_size,
808
+ )
809
+ return {
810
+ "min": np.min(reshaped_data, axis=reduce_dims, keepdims=False),
811
+ "max": np.max(reshaped_data, axis=reduce_dims, keepdims=False),
812
+ }
813
+
814
+ else:
815
+ reduce_dims = common_utils.get_reduce_dims(
816
+ quantized_dim, tensor_data.shape
817
+ )
818
+ return {
819
+ "min": np.min(tensor_data, axis=reduce_dims, keepdims=True),
820
+ "max": np.max(tensor_data, axis=reduce_dims, keepdims=True),
821
+ }
@@ -15,10 +15,10 @@
15
15
 
16
16
  """Performs naive min/max uniform quantization."""
17
17
 
18
- from collections.abc import Sequence
19
18
  from typing import Any, Optional
20
19
  import numpy as np
21
20
  from ai_edge_quantizer import qtyping
21
+ from ai_edge_quantizer.algorithms.uniform_quantize import common_quantize
22
22
  from ai_edge_quantizer.algorithms.uniform_quantize import uniform_quantize_tensor
23
23
  from ai_edge_quantizer.algorithms.utils import common_utils
24
24
  from ai_edge_quantizer.utils import tfl_flatbuffer_utils
@@ -29,143 +29,6 @@ _QuantTransformation = qtyping.QuantTransformation
29
29
  _IntType = uniform_quantize_tensor.IntType
30
30
 
31
31
 
32
- def _init_tensor_min_max(
33
- tensor_data: Optional[np.ndarray],
34
- op_info: qtyping.OpInfo,
35
- ) -> qtyping.QSV:
36
- """Initialize the min/max for a tensor."""
37
- if tensor_data is None:
38
- return {}
39
- else:
40
- weight_tensor_config = op_info.op_quant_config.weight_tensor_config
41
- quantized_dim = None
42
- if weight_tensor_config is not None and (
43
- weight_tensor_config.granularity == qtyping.QuantGranularity.CHANNELWISE
44
- or weight_tensor_config.granularity
45
- == qtyping.QuantGranularity.BLOCKWISE
46
- ):
47
- quantized_dim = common_utils.get_weight_quantized_dim(
48
- op_info, tensor_data
49
- )
50
- if (
51
- weight_tensor_config is not None
52
- and weight_tensor_config.granularity
53
- == qtyping.QuantGranularity.BLOCKWISE
54
- ):
55
- reshaped_data, reduce_dims = _reshape_data_for_blockwise(
56
- tensor_data,
57
- quantized_dim,
58
- weight_tensor_config.block_size,
59
- )
60
- return {
61
- "min": np.min(reshaped_data, axis=reduce_dims, keepdims=False),
62
- "max": np.max(reshaped_data, axis=reduce_dims, keepdims=False),
63
- }
64
-
65
- else:
66
- reduce_dims = common_utils.get_reduce_dims(
67
- quantized_dim, tensor_data.shape
68
- )
69
- return {
70
- "min": np.min(tensor_data, axis=reduce_dims, keepdims=True),
71
- "max": np.max(tensor_data, axis=reduce_dims, keepdims=True),
72
- }
73
-
74
-
75
- def _get_tensor_shape_for_blockwise(
76
- tensor_shape: Sequence[int], quantized_dim: int, block_size: int
77
- ) -> list[int]:
78
- """Get the tensor shape for blockwise quantization.
79
-
80
- This function splits the quantize dimension of the tensor into blocks and the
81
- dim/blocks. Hence, min/max of the tensor can be calculated for each block
82
- using existing functions.
83
-
84
- Args:
85
- tensor_shape: The original shape of the tensor.
86
- quantized_dim: The dimension to be quantized blockwise.
87
- block_size: The size of the block.
88
-
89
- Returns:
90
- The new tensor shape for calculating scale and zp for blockwise
91
- quantization.
92
- """
93
- new_shape = []
94
- for index, val in enumerate(tensor_shape):
95
- if index == quantized_dim:
96
- new_shape.append(int(val / block_size))
97
- new_shape.append(block_size)
98
- else:
99
- new_shape.append(val)
100
- return new_shape
101
-
102
-
103
- def _reshape_data_for_blockwise(
104
- tensor_data: np.ndarray, quantized_dim: int, block_size: int
105
- ) -> tuple[np.ndarray, int]:
106
- """Reshapes data for blockwise quantization.
107
-
108
- Args:
109
- tensor_data: The original tensor data.
110
- quantized_dim: The dimension to be quantized blockwise.
111
- block_size: The size of the block.
112
-
113
- Returns:
114
- A tuple containing the reshaped tensor data and the new reduce dimension.
115
- """
116
- new_shape = _get_tensor_shape_for_blockwise(
117
- tensor_data.shape, quantized_dim, block_size
118
- )
119
- reshaped_data = tensor_data.reshape(new_shape)
120
- return reshaped_data, quantized_dim + 1
121
-
122
-
123
- def _broadcast_scale_zp_for_blockwise(
124
- tensor_content: np.ndarray,
125
- quant_params: qtyping.UniformQuantParams,
126
- ) -> qtyping.UniformQuantParams:
127
- """Broadcasts scale and zp for blockwise quantization.
128
-
129
- Args:
130
- tensor_content: The original tensor data.
131
- quant_params: The quantization parameters.
132
-
133
- Returns:
134
- The updated quantization parameters with broadcasted scale and zp for
135
- correct constant quantization.
136
- """
137
- if quant_params.quantized_dimension is None:
138
- raise ValueError("Quantized dimension must be specified.")
139
- if quant_params.block_size is None or quant_params.block_size <= 0:
140
- raise ValueError("Block size must be specified and positive.")
141
- quantized_dim = quant_params.quantized_dimension
142
- expanded_tensor_shape = _get_tensor_shape_for_blockwise(
143
- tensor_content.shape, quantized_dim, quant_params.block_size
144
- )
145
- expanded_scale = np.reshape(
146
- np.broadcast_to(
147
- np.expand_dims(quant_params.scale, quantized_dim + 1),
148
- expanded_tensor_shape,
149
- ),
150
- tensor_content.shape,
151
- )
152
- expanded_zp = np.reshape(
153
- np.broadcast_to(
154
- np.expand_dims(quant_params.zero_point, quantized_dim + 1),
155
- expanded_tensor_shape,
156
- ),
157
- tensor_content.shape,
158
- )
159
- return qtyping.UniformQuantParams(
160
- scale=expanded_scale,
161
- zero_point=expanded_zp,
162
- num_bits=quant_params.num_bits,
163
- symmetric=quant_params.symmetric,
164
- quantized_dimension=quantized_dim,
165
- block_size=quant_params.block_size,
166
- )
167
-
168
-
169
32
  def get_tensor_quant_params(
170
33
  op_info: qtyping.OpInfo,
171
34
  tensor_quant_config: qtyping.TensorQuantizationConfig,
@@ -191,7 +54,7 @@ def get_tensor_quant_params(
191
54
  # weight-only and DRQ do not require calibration, thus it is
192
55
  # possible that this information is missing here. In that case we
193
56
  # collect min/max on the spot.
194
- tensor_min_max = _init_tensor_min_max(
57
+ tensor_min_max = common_quantize.init_tensor_min_max(
195
58
  tensor_content,
196
59
  op_info,
197
60
  )
@@ -238,7 +101,7 @@ def get_tensor_quant_params(
238
101
  # The reshaping for blockwise quantization is unique hence we do this here
239
102
  # to avoid unexpected broadcast behavior downstream.
240
103
  if tensor_quant_config.granularity == qtyping.QuantGranularity.BLOCKWISE:
241
- quant_params = _broadcast_scale_zp_for_blockwise(
104
+ quant_params = common_quantize.broadcast_scale_zp_for_blockwise(
242
105
  tensor_content, quant_params
243
106
  )
244
107
 
@@ -286,7 +149,7 @@ def init_qsvs(
286
149
  tensor_data = tfl_flatbuffer_utils.get_tensor_data(
287
150
  tensor, graph_info.buffers
288
151
  )
289
- op_qsvs[tensor_name] = _init_tensor_min_max(
152
+ op_qsvs[tensor_name] = common_quantize.init_tensor_min_max(
290
153
  tensor_data,
291
154
  op_info,
292
155
  )
@@ -297,7 +160,7 @@ def init_qsvs(
297
160
  tensor_data = tfl_flatbuffer_utils.get_tensor_data(
298
161
  tensor, graph_info.buffers
299
162
  )
300
- op_qsvs[tensor_name] = _init_tensor_min_max(
163
+ op_qsvs[tensor_name] = common_quantize.init_tensor_min_max(
301
164
  tensor_data,
302
165
  op_info,
303
166
  )
@@ -0,0 +1,174 @@
1
+ # Copyright 2024 The AI Edge Quantizer Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ """Implements the OCTAV quantization."""
17
+
18
+ import dataclasses
19
+ from typing import Any, Optional, Sequence, Union
20
+ import numpy as np
21
+ from ai_edge_quantizer import qtyping
22
+ from ai_edge_quantizer.algorithms.uniform_quantize import common_quantize
23
+ from ai_edge_quantizer.algorithms.uniform_quantize import naive_min_max_quantize
24
+ from ai_edge_quantizer.algorithms.uniform_quantize import uniform_quantize_tensor
25
+ from ai_edge_quantizer.algorithms.utils import common_utils
26
+
27
+ ALGORITHM_KEY = "OCTAV"
28
+
29
+
30
+ def _guess_clipping_with_octav(
31
+ x: np.ndarray,
32
+ bits: int,
33
+ axis: Union[int, Sequence[int]],
34
+ max_iterations: int,
35
+ exponent_divisor: float,
36
+ early_stop: bool = True,
37
+ ) -> np.ndarray:
38
+ """Returns a tensor of absolute clipping constants for a tensor using OCTAV.
39
+
40
+ This method implements equation (6) from the OCTAV paper:
41
+ https://arxiv.org/abs/2206.06501
42
+
43
+ Args:
44
+ x: Tensor data to return guesses for.
45
+ bits: Number of bits used during quantization.
46
+ axis: Axis to reduce the tensor along to get the guesses.
47
+ max_iterations: Number of Newton-Raphson iterations to use.
48
+ exponent_divisor: What factor to divide the 4^-bits term by. In the paper,
49
+ 3.0 is optimal for signed ints and 12.0 for unsigned ints.
50
+ early_stop: If True, stop the iteration if the guess doesn't change.
51
+
52
+ Returns:
53
+ A tensor of shape [num_channels] with clipping constant guesses.
54
+ """
55
+ magnitude = np.abs(x)
56
+ x_reduced = np.mean(x, axis=axis, keepdims=True)
57
+ old_guess = np.zeros(x_reduced.shape)
58
+ guess = np.ones(x_reduced.shape)
59
+ for _ in range(max_iterations):
60
+ if early_stop and np.allclose(guess, old_guess):
61
+ break
62
+ guess_broadcasted = np.broadcast_to(guess, magnitude.shape)
63
+ guess_mask = np.asarray(magnitude < guess_broadcasted, dtype=x.dtype)
64
+ numerator = np.sum(
65
+ magnitude * np.asarray(1.0 - guess_mask), axis=axis, keepdims=True
66
+ )
67
+ denominator1 = (4.0 ** (-bits) / exponent_divisor) * np.sum(
68
+ guess_mask, axis=axis, keepdims=True
69
+ )
70
+ denominator2 = np.sum(1.0 - guess_mask, axis=axis, keepdims=True)
71
+ old_guess = guess
72
+ guess = numerator / (denominator1 + denominator2)
73
+
74
+ return guess
75
+
76
+
77
+ def get_tensor_quant_params(
78
+ op_info: qtyping.OpInfo,
79
+ tensor_quant_config: qtyping.TensorQuantizationConfig,
80
+ tensor_content: Optional[np.ndarray] = None,
81
+ tensor_qsv: Optional[dict[str, Any]] = None,
82
+ ) -> qtyping.UniformQuantParams:
83
+ """Returns the quantization parameters for a tensor.
84
+
85
+ Args:
86
+ op_info: Aggregated information about the op (e.g., quantization config).
87
+ tensor_quant_config: The quantization config for the tensor.
88
+ tensor_content: The content of the tensor. When None, it means the tensor is
89
+ not a weight tensor (e.g. static quantization) so we fallback to using
90
+ naive_min_max_quantize.
91
+ tensor_qsv: A dictionary containing the min/max of the tensor.
92
+
93
+ Raises:
94
+ ValueError: If the blockwise quantization is requested.
95
+ ValueError: If the asymmetric quantization is requested.
96
+ ValueError: `tensor_qsv` must contain min/max values, or `tensor_content`
97
+ must be provided so that they can be inferred.
98
+ """
99
+ # Fallback to naive_min_max_quantize.py for non-weight tensors.
100
+ if tensor_content is None:
101
+ return naive_min_max_quantize.get_tensor_quant_params(
102
+ op_info, tensor_quant_config, tensor_content, tensor_qsv
103
+ )
104
+
105
+ if (
106
+ tensor_quant_config.granularity != qtyping.QuantGranularity.CHANNELWISE
107
+ and tensor_quant_config.granularity != qtyping.QuantGranularity.TENSORWISE
108
+ ):
109
+ raise ValueError(
110
+ f"Unsupported granularity: {tensor_quant_config.granularity}."
111
+ )
112
+
113
+ if not tensor_quant_config.symmetric:
114
+ raise ValueError(
115
+ f"Unsupported symmetry: {tensor_quant_config.symmetric}. OCTAV"
116
+ " supports symmetric quantization only for now."
117
+ )
118
+
119
+ if tensor_qsv is None:
120
+ # We need min/max to calculate quantization parameters, which
121
+ # should be collected during the calibration process. However,
122
+ # weight-only and DRQ do not require calibration, thus it is
123
+ # possible that this information is missing here. In that case we
124
+ # collect min/max on the spot.
125
+ tensor_min_max = common_quantize.init_tensor_min_max(
126
+ tensor_content,
127
+ op_info,
128
+ )
129
+ else:
130
+ tensor_min_max = tensor_qsv
131
+
132
+ if "min" not in tensor_min_max or "max" not in tensor_min_max:
133
+ raise ValueError(
134
+ "min and max must be provided to produce tensor quantization"
135
+ " parameters. Check if the correct calibration results are passed into"
136
+ " the ParamsGenerator."
137
+ )
138
+
139
+ quantized_dim = None
140
+ if tensor_quant_config.granularity == qtyping.QuantGranularity.CHANNELWISE:
141
+ quantized_dim = common_utils.get_weight_quantized_dim(
142
+ op_info, tensor_content
143
+ )
144
+
145
+ clipping_constants = _guess_clipping_with_octav(
146
+ tensor_content,
147
+ tensor_quant_config.num_bits,
148
+ common_utils.get_reduce_dims(quantized_dim, tensor_content.shape),
149
+ max_iterations=10,
150
+ exponent_divisor=3.0 if tensor_quant_config.symmetric else 12.0,
151
+ )
152
+
153
+ zp, scale = uniform_quantize_tensor.tensor_zp_scale_from_min_max(
154
+ tensor_min_max["min"],
155
+ tensor_min_max["max"],
156
+ tensor_quant_config.num_bits,
157
+ tensor_quant_config.symmetric,
158
+ clipping_constants,
159
+ )
160
+
161
+ quant_params = qtyping.UniformQuantParams(
162
+ scale=scale,
163
+ zero_point=zp,
164
+ num_bits=tensor_quant_config.num_bits,
165
+ symmetric=tensor_quant_config.symmetric,
166
+ quantized_dimension=quantized_dim,
167
+ block_size=tensor_quant_config.block_size,
168
+ )
169
+
170
+ quantized_vars = uniform_quantize_tensor.uniform_quantize(
171
+ tensor_content, quant_params
172
+ )
173
+
174
+ return dataclasses.replace(quant_params, quantized_data=quantized_vars)
@@ -0,0 +1,186 @@
1
+ # Copyright 2024 The AI Edge Quantizer Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ import os
17
+ from typing import cast
18
+
19
+ from absl.testing import parameterized
20
+ import numpy as np
21
+
22
+ from tensorflow.python.platform import googletest
23
+ from ai_edge_quantizer import qtyping
24
+ from ai_edge_quantizer.algorithms.uniform_quantize import octav
25
+ from ai_edge_quantizer.utils import test_utils
26
+ from ai_edge_quantizer.utils import tfl_flatbuffer_utils
27
+
28
+
29
+ class OctavQuantizeTest(parameterized.TestCase):
30
+ """Tests for general functions for OCTAV."""
31
+
32
+ def setUp(self):
33
+ super().setUp()
34
+ np.random.seed(666)
35
+ self._test_model_path = os.path.join(
36
+ test_utils.get_path_to_datafile("../../tests/models"),
37
+ "conv_fc_mnist.tflite",
38
+ )
39
+ self._test_model = tfl_flatbuffer_utils.read_model(self._test_model_path)
40
+ # The test model has one subgraph for now.
41
+ self._graph_info = qtyping.GraphInfo(
42
+ subgraph_tensors=self._test_model.subgraphs[0].tensors,
43
+ buffers=self._test_model.buffers,
44
+ )
45
+ self._tensor_name_to_qsv = {}
46
+ subgraph0 = self._test_model.subgraphs[0]
47
+ subgraph_op_index = 3
48
+ fc_op = subgraph0.operators[subgraph_op_index]
49
+ self._fc_op_info = qtyping.OpInfo(
50
+ op=fc_op,
51
+ op_name=qtyping.TFLOperationName.FULLY_CONNECTED,
52
+ subgraph_op_index=subgraph_op_index,
53
+ op_quant_config=qtyping.OpQuantizationConfig(
54
+ weight_tensor_config=None,
55
+ ),
56
+ )
57
+
58
+ def test_get_tensor_quant_params_unsupported_granularity_assert(self):
59
+ err_msg = "Unsupported granularity"
60
+ test_data = np.array([[-7, 7], [4, -4], [4, -4], [7, 7]])
61
+ with self.assertRaisesWithPredicateMatch(
62
+ ValueError, lambda err: err_msg in str(err)
63
+ ):
64
+ _ = octav.get_tensor_quant_params(
65
+ op_info=self._fc_op_info,
66
+ tensor_quant_config=qtyping.TensorQuantizationConfig(
67
+ num_bits=4,
68
+ symmetric=True,
69
+ granularity=qtyping.QuantGranularity.BLOCKWISE,
70
+ ),
71
+ tensor_content=test_data,
72
+ )
73
+
74
+ def test_get_tensor_quant_params_unsupported_symmetry(self):
75
+ err_msg = "Unsupported symmetry"
76
+ test_data = np.array([[-7, 7], [4, -4], [4, -4], [7, 7]])
77
+ with self.assertRaisesWithPredicateMatch(
78
+ ValueError, lambda err: err_msg in str(err)
79
+ ):
80
+ _ = octav.get_tensor_quant_params(
81
+ op_info=self._fc_op_info,
82
+ tensor_quant_config=qtyping.TensorQuantizationConfig(
83
+ num_bits=4,
84
+ symmetric=False,
85
+ granularity=qtyping.QuantGranularity.CHANNELWISE,
86
+ ),
87
+ tensor_content=test_data,
88
+ )
89
+
90
+ def test_get_tensor_quant_params_success_with_qsv(self):
91
+ # Fall back to naive_min_max_quantize.py for non-weight tensors.
92
+ tensor_quant_params = octav.get_tensor_quant_params(
93
+ op_info=self._fc_op_info,
94
+ tensor_quant_config=qtyping.TensorQuantizationConfig(
95
+ num_bits=8,
96
+ granularity=qtyping.QuantGranularity.TENSORWISE,
97
+ ),
98
+ tensor_qsv={
99
+ "min": np.array([-1]),
100
+ "max": np.array([1]),
101
+ },
102
+ )
103
+
104
+ self.assertIsNone(tensor_quant_params.quantized_dimension)
105
+ scale = tensor_quant_params.scale
106
+ self.assertEqual(scale.shape, (1,))
107
+ self.assertSequenceAlmostEqual(scale.flatten(), [1 / 127])
108
+
109
+ # Zero point should be zero for symmetric quantization.
110
+ zp = tensor_quant_params.zero_point
111
+ self.assertEqual(np.sum(zp), 0)
112
+ self.assertEqual(zp.shape, (1,))
113
+
114
+ def test_get_tensor_quant_params_sanity_tensorwise(self):
115
+ test_data = np.array([
116
+ [-1e5, 25, -50, 75, -100, 125],
117
+ [25, -30, 50, -75, 1e5, -125],
118
+ [50, -60, 70, -80, 90, -100],
119
+ ])
120
+ quant_params = octav.get_tensor_quant_params(
121
+ op_info=self._fc_op_info,
122
+ tensor_quant_config=qtyping.TensorQuantizationConfig(
123
+ num_bits=4,
124
+ symmetric=True,
125
+ granularity=qtyping.QuantGranularity.TENSORWISE,
126
+ ),
127
+ tensor_content=test_data,
128
+ )
129
+ adjusted_test_data = quant_params.quantized_data * quant_params.scale
130
+ real_max = np.max(np.abs(test_data))
131
+ adjusted_max = np.max(np.abs(adjusted_test_data))
132
+
133
+ # Check that some clipping occurred.
134
+ with self.subTest(name="SanityCheckClipping"):
135
+ self.assertLess(adjusted_max, real_max)
136
+
137
+ with self.subTest(name="SanityCheckQuantParamsShapes"):
138
+ self.assertEqual(quant_params.zero_point.shape, (1, 1))
139
+ self.assertEqual(quant_params.scale.shape, (1, 1))
140
+ self.assertIsNone(quant_params.quantized_dimension)
141
+ self.assertIsNotNone(quant_params.quantized_data)
142
+ self.assertTupleEqual(
143
+ cast(np.ndarray, quant_params.quantized_data).shape, test_data.shape
144
+ )
145
+
146
+ with self.subTest(name="SanityCheckQuantParamsValues"):
147
+ self.assertTrue(np.all(quant_params.zero_point == 0))
148
+
149
+ def test_get_tensor_quant_params_sanity_channelwise(self):
150
+ test_data = np.array([
151
+ [-1e5, 25, -50, 75, -100, 125],
152
+ [25, -30, 50, -75, 1e5, -125],
153
+ [50, -60, 70, -80, 90, -100],
154
+ ])
155
+ quant_params = octav.get_tensor_quant_params(
156
+ op_info=self._fc_op_info,
157
+ tensor_quant_config=qtyping.TensorQuantizationConfig(
158
+ num_bits=4,
159
+ symmetric=True,
160
+ granularity=qtyping.QuantGranularity.CHANNELWISE,
161
+ ),
162
+ tensor_content=test_data,
163
+ )
164
+ adjusted_test_data = quant_params.quantized_data * quant_params.scale
165
+ for i, row in enumerate(test_data):
166
+ real_max = np.max(np.abs(row))
167
+ adjusted_max = np.max(np.abs(adjusted_test_data[i]))
168
+ # Check that some clipping occurred.
169
+ with self.subTest(name="SanityCheckClipping"):
170
+ self.assertLess(adjusted_max, real_max)
171
+
172
+ with self.subTest(name="SanityCheckQuantParamsShapes"):
173
+ self.assertEqual(quant_params.zero_point.shape, (test_data.shape[0], 1))
174
+ self.assertEqual(quant_params.scale.shape, (test_data.shape[0], 1))
175
+ self.assertIsNotNone(quant_params.quantized_data)
176
+ self.assertTupleEqual(
177
+ cast(np.ndarray, quant_params.quantized_data).shape, test_data.shape
178
+ )
179
+
180
+ with self.subTest(name="SanityCheckQuantParamsValues"):
181
+ self.assertTrue(np.all(quant_params.zero_point == 0))
182
+ self.assertEqual(quant_params.quantized_dimension, 0)
183
+
184
+
185
+ if __name__ == "__main__":
186
+ googletest.main()
@@ -16,6 +16,7 @@
16
16
  """Uniform quantize in tensor level."""
17
17
 
18
18
  import dataclasses
19
+ from typing import Optional
19
20
  import numpy as np
20
21
  from ai_edge_quantizer import qtyping
21
22
 
@@ -237,7 +238,11 @@ def symmetric_quantize_bias_tensor(
237
238
 
238
239
 
239
240
  def tensor_zp_scale_from_min_max(
240
- min_value, max_value, num_bits: int, symmetric: bool
241
+ min_value,
242
+ max_value,
243
+ num_bits: int,
244
+ symmetric: bool,
245
+ clipping_values: Optional[np.ndarray] = None,
241
246
  ):
242
247
  """Get zero point and scale from min and max value.
243
248
 
@@ -246,6 +251,10 @@ def tensor_zp_scale_from_min_max(
246
251
  max_value: The maximum value of the tensor (channel-wise supported).
247
252
  num_bits: The number of bits of the tensor.
248
253
  symmetric: Whether the tensor is symmetric.
254
+ clipping_values: Absolute clipping values to apply to the tensor. This will
255
+ clip the tensors to the range [-clipping_values, clipping_values]. This
256
+ should be the same shape as min_value and max_value. If None, no clipping
257
+ will be applied.
249
258
 
250
259
  Returns:
251
260
  The zero point and scale of the tensor.
@@ -261,6 +270,8 @@ def tensor_zp_scale_from_min_max(
261
270
  if symmetric:
262
271
  bound = np.maximum(np.abs(min_value), np.abs(max_value))
263
272
  bound = np.maximum(bound, min_bound)
273
+ if clipping_values is not None:
274
+ bound = np.clip(bound, -clipping_values, clipping_values)
264
275
  if not qtype.signed:
265
276
  half_q = (qmax - 1) / 2
266
277
  scale = bound / half_q
@@ -268,7 +279,6 @@ def tensor_zp_scale_from_min_max(
268
279
  else:
269
280
  scale = bound / qmax
270
281
  zp = np.zeros_like(scale, dtype=np.int32)
271
-
272
282
  else:
273
283
  # Include 0 to the range to support zero-padding.
274
284
  # See: https://arxiv.org/pdf/1712.05877.pdf
@@ -276,6 +286,8 @@ def tensor_zp_scale_from_min_max(
276
286
  bound_max = np.maximum(max_value, np.zeros_like(max_value))
277
287
  bound_min = np.minimum(min_value, np.zeros_like(min_value))
278
288
  bound = np.maximum(bound_max - bound_min, min_bound)
289
+ if clipping_values is not None:
290
+ bound = np.clip(bound, -clipping_values, clipping_values)
279
291
  scale = bound / (qmax - qmin)
280
292
  zp = qmin - bound_min / scale
281
293
  zp = np.rint(zp)
@@ -352,6 +352,33 @@ class TensorUtilsTest(parameterized.TestCase):
352
352
  # Range has to be extended to include zero.
353
353
  self.assertEqual(calculated_min, 0)
354
354
 
355
+ @parameterized.parameters(
356
+ # number of bits, is_symmetric, max bound of the quantized range.
357
+ (4, True, 7),
358
+ (8, False, 255),
359
+ )
360
+ def test_tensor_zp_scale_from_min_max_with_clipping(
361
+ self, num_bits, symmetric, quantized_bound
362
+ ):
363
+ min_val = np.array([[1.0]])
364
+ max_val = np.array([[5.0]])
365
+ clipping_values = np.array([4.0])
366
+ zp, scale = uniform_quantize_tensor.tensor_zp_scale_from_min_max(
367
+ min_val, max_val, num_bits, symmetric, clipping_values
368
+ )
369
+ expected_scale = clipping_values / quantized_bound
370
+
371
+ with self.subTest(name="CheckShapes"):
372
+ self.assertEqual(zp.shape, scale.shape)
373
+ self.assertEqual(zp.shape, (1, 1))
374
+
375
+ if symmetric:
376
+ with self.subTest(name="CheckSymmetricZpValue"):
377
+ self.assertEqual(zp[0], 0)
378
+
379
+ with self.subTest(name="CheckScaleValue"):
380
+ self.assertEqual(scale[0], expected_scale)
381
+
355
382
 
356
383
  if __name__ == "__main__":
357
384
  googletest.main()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ai-edge-quantizer-nightly
3
- Version: 0.0.1.dev20250317
3
+ Version: 0.1.0.dev20250318
4
4
  Summary: A quantizer for advanced developers to quantize converted AI Edge models.
5
5
  Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
6
6
  Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
@@ -27,7 +27,7 @@ License-File: LICENSE
27
27
  Requires-Dist: immutabledict
28
28
  Requires-Dist: numpy
29
29
  Requires-Dist: tf-nightly>=2.17.0.dev20240509
30
- Requires-Dist: ai-edge-litert-nightly
30
+ Requires-Dist: ai-edge-litert>=1.2.0
31
31
 
32
32
  It aims to facilitate advanced users to strive for optimal performance on
33
33
  resource demanding models (e.g., GenAI models).
@@ -1,5 +1,5 @@
1
1
  ai_edge_quantizer/__init__.py,sha256=4pFSkukSwahYyzwqia0yPRyz8TnFQfGRthVJhYpMWas,793
2
- ai_edge_quantizer/algorithm_manager.py,sha256=VZx4HvGEgt6XAS-b0breFPioLfKkAFNG71VLSG4aKS8,7712
2
+ ai_edge_quantizer/algorithm_manager.py,sha256=sOZ1T8n0YYi_ijDDuzryNJi2HUPggeo9uWNJri3elv0,10431
3
3
  ai_edge_quantizer/algorithm_manager_api.py,sha256=u903TG0s1uIDhJqfeJne3CFl8A93phZrwgV2-hwdcXU,9247
4
4
  ai_edge_quantizer/algorithm_manager_api_test.py,sha256=tL_ozYFTsOPX8qGcti0KTz37nVsCxf0SSG5C45SyT-g,7319
5
5
  ai_edge_quantizer/calibrator.py,sha256=n7AD9j7UScR-CieoI6DQRMeiG_fhLBfSLRiM4460xaM,11895
@@ -28,14 +28,16 @@ ai_edge_quantizer/algorithms/nonlinear_quantize/__init__.py,sha256=lpq1g2ayg3lCP
28
28
  ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting.py,sha256=Bs9CK7wZAw6jNaZ8xEtbwO2vM34VYXNZSMVWvxJo9nw,9297
29
29
  ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting_test.py,sha256=s64eDDH9bmRWy6Bl1peHnhGewLnFJjvnhYOdjo1zYOA,22625
30
30
  ai_edge_quantizer/algorithms/uniform_quantize/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
31
- ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=osvXIwVVEi5DRiT_MpJpAXGZCVMEoR0tcc6EwuAtcp0,22330
31
+ ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=LnItMEsR47qe8T5pg9UI5NGfhi4cOxt0vAU35IkWnaY,27163
32
32
  ai_edge_quantizer/algorithms/uniform_quantize/common_quantize_test.py,sha256=qMmKbWqxrCoVKbLKHn9WuCrGKPfHkEyU0Nmhokh8Qeo,2597
33
33
  ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha256=OTXjEZ3Ctq3ffYzisX-6HwgK_DuA7uos_aap5PiIUPE,8686
34
34
  ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py,sha256=y7BK11fkF63Ex_Jzg3fbIdy0D_Ca6HuvChVZR7Uwggc,8073
35
- ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=fBqSidFVKZmdO-xIFfwZPdIN1eLJjOik8mUZxZj2ljk,12149
35
+ ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py,sha256=aWHU4rneBv7ErufEWKQGAWTK-pgfn-rG9mAkC0d9V6Q,7871
36
36
  ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha256=Hok09dloSyBfD0oDM5VABdSZjM9JWSQhm_hDHNbFujA,7640
37
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=Q_vx7YN7KMpjubsngxRdJ4bfdSIV-gmXjtVuxIkZuX4,11078
38
- ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=WZ4_bvbG999nOtCIqn7mrMnpRdoJOdiyzxhsL_QiPHA,11395
37
+ ai_edge_quantizer/algorithms/uniform_quantize/octav.py,sha256=e5wYtki-vl739gSVAZHAKcs2hA87GvFUjVoSUPlnkyM,6433
38
+ ai_edge_quantizer/algorithms/uniform_quantize/octav_test.py,sha256=IcTOaJ1pxtqsitqxOEP9LROVEP_19VFutHalqNied4I,6940
39
+ ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=WmZzKQlzfu9gFr9SbUDoPY3rFqTl363om8-0rTLwotw,11629
40
+ ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=G2PFpHhF-6OOuAwQ1lei63QEIm7uzIZJ62qpgA02qTM,12288
39
41
  ai_edge_quantizer/algorithms/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
40
42
  ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=4qSlVNx3-91kJufnnJV1RdVRXBPapylZkrAp2nywoao,34581
41
43
  ai_edge_quantizer/algorithms/utils/common_utils_test.py,sha256=zqapGEfYhjQWe9cNGPLmdbwtEUUYQRhlO_kNe0cXX6E,18104
@@ -62,8 +64,8 @@ ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=x2xA2CFPpe_2trcV8v5xGaBE
62
64
  ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=Op3JxtOqlrjzmYF18jnnstL1k9xiY9kKJ8S2vklKGkc,11327
63
65
  ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
64
66
  ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
65
- ai_edge_quantizer_nightly-0.0.1.dev20250317.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
66
- ai_edge_quantizer_nightly-0.0.1.dev20250317.dist-info/METADATA,sha256=Y7H57IqB-YuZ_cSLtY16TQmBsudtOQPoAmFU0MfbyvU,1528
67
- ai_edge_quantizer_nightly-0.0.1.dev20250317.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
68
- ai_edge_quantizer_nightly-0.0.1.dev20250317.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
69
- ai_edge_quantizer_nightly-0.0.1.dev20250317.dist-info/RECORD,,
67
+ ai_edge_quantizer_nightly-0.1.0.dev20250318.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
68
+ ai_edge_quantizer_nightly-0.1.0.dev20250318.dist-info/METADATA,sha256=_g6GbgGj2wuFphtUCwZAcBXPBHhBbVFIOOUDPquuIHs,1527
69
+ ai_edge_quantizer_nightly-0.1.0.dev20250318.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
70
+ ai_edge_quantizer_nightly-0.1.0.dev20250318.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
71
+ ai_edge_quantizer_nightly-0.1.0.dev20250318.dist-info/RECORD,,