ai-edge-quantizer-nightly 0.0.1.dev20250310__py3-none-any.whl → 0.0.1.dev20250312__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_edge_quantizer/algorithm_manager.py +1 -0
- ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py +15 -0
- ai_edge_quantizer/algorithms/utils/common_utils.py +0 -1
- ai_edge_quantizer/calibrator.py +51 -38
- ai_edge_quantizer/default_policy.py +36 -3
- ai_edge_quantizer/params_generator.py +29 -7
- ai_edge_quantizer/qtyping.py +3 -0
- ai_edge_quantizer/transformation_instruction_generator.py +23 -25
- ai_edge_quantizer/transformation_performer.py +5 -0
- ai_edge_quantizer/transformations/duplicate_buffer.py +45 -0
- ai_edge_quantizer/transformations/duplicate_buffer_test.py +106 -0
- ai_edge_quantizer/transformations/transformation_utils.py +25 -7
- ai_edge_quantizer/transformations/transformation_utils_test.py +32 -0
- ai_edge_quantizer/utils/tfl_flatbuffer_utils.py +47 -36
- ai_edge_quantizer/utils/tfl_interpreter_utils.py +4 -2
- {ai_edge_quantizer_nightly-0.0.1.dev20250310.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20250312.dist-info}/METADATA +1 -1
- {ai_edge_quantizer_nightly-0.0.1.dev20250310.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20250312.dist-info}/RECORD +20 -18
- {ai_edge_quantizer_nightly-0.0.1.dev20250310.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20250312.dist-info}/LICENSE +0 -0
- {ai_edge_quantizer_nightly-0.0.1.dev20250310.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20250312.dist-info}/WHEEL +0 -0
- {ai_edge_quantizer_nightly-0.0.1.dev20250310.dist-info → ai_edge_quantizer_nightly-0.0.1.dev20250312.dist-info}/top_level.txt +0 -0
@@ -100,6 +100,7 @@ MIN_MAX_OP_NAME_MATERIALIZE_FUNC_DICT = {
|
|
100
100
|
_TFLOpName.DYNAMIC_UPDATE_SLICE: (
|
101
101
|
common_quantize.materialize_dynamic_update_slice
|
102
102
|
),
|
103
|
+
_TFLOpName.STABLEHLO_COMPOSITE: common_quantize.materialize_composite,
|
103
104
|
}
|
104
105
|
for op_name, materialize_func in MIN_MAX_OP_NAME_MATERIALIZE_FUNC_DICT.items():
|
105
106
|
register_quantized_op(
|
@@ -110,6 +110,21 @@ def materialize_output(
|
|
110
110
|
)
|
111
111
|
|
112
112
|
|
113
|
+
def materialize_composite(
|
114
|
+
get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
|
115
|
+
op_info: qtyping.OpInfo,
|
116
|
+
graph_info: qtyping.GraphInfo,
|
117
|
+
tensor_name_to_qsv: dict[str, Any],
|
118
|
+
) -> list[qtyping.TensorTransformationParams]:
|
119
|
+
"""Materialize tensors in the virtual output op."""
|
120
|
+
return common_utils.materialize_standard_op(
|
121
|
+
op_info,
|
122
|
+
graph_info,
|
123
|
+
tensor_name_to_qsv,
|
124
|
+
get_tensor_quant_params_fn,
|
125
|
+
)
|
126
|
+
|
127
|
+
|
113
128
|
def materialize_add(
|
114
129
|
get_tensor_quant_params_fn: qtyping.GetTensorQuantParamsFuncSignature,
|
115
130
|
op_info: qtyping.OpInfo,
|
ai_edge_quantizer/calibrator.py
CHANGED
@@ -23,6 +23,7 @@ from absl import logging
|
|
23
23
|
import numpy as np
|
24
24
|
|
25
25
|
from ai_edge_quantizer import algorithm_manager
|
26
|
+
from ai_edge_quantizer import default_policy as policy
|
26
27
|
from ai_edge_quantizer import qtyping
|
27
28
|
from ai_edge_quantizer import recipe_manager
|
28
29
|
from ai_edge_quantizer.utils import calibration_utils
|
@@ -124,50 +125,62 @@ class Calibrator:
|
|
124
125
|
)
|
125
126
|
if cache_output:
|
126
127
|
self._cached_output.append(signature_output)
|
127
|
-
self._tensor_content_map.update(
|
128
|
-
tfl_interpreter_utils.get_tensor_name_to_content_map(
|
129
|
-
self._tfl_interpreter, subgraph_idx
|
130
|
-
)
|
131
|
-
)
|
132
128
|
|
133
129
|
# Step2: go through each op in subgraph to update quantization
|
134
130
|
# statistic values.
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
)
|
143
|
-
for op in subgraph.operators:
|
144
|
-
if isinstance(op, qtyping.IOOperator):
|
145
|
-
op_key = op.op_key
|
146
|
-
else:
|
147
|
-
op_code = op_codes[op.opcodeIndex].builtinCode
|
148
|
-
if op_code not in tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME:
|
149
|
-
continue
|
150
|
-
op_key = tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME[op_code]
|
151
|
-
# Step2.1: query the quantization_recipe to get op quantization
|
152
|
-
# settings.
|
153
|
-
op_scope = self._get_op_scope(op, subgraph.tensors)
|
154
|
-
algorithm_name, _ = model_recipe_manager.get_quantization_configs(
|
155
|
-
op_key, op_scope
|
131
|
+
subgraphs_inds = [subgraph_idx]
|
132
|
+
while subgraphs_inds:
|
133
|
+
subgraph_ind = subgraphs_inds.pop()
|
134
|
+
self._tensor_content_map.update(
|
135
|
+
tfl_interpreter_utils.get_tensor_name_to_content_map(
|
136
|
+
self._tfl_interpreter, subgraph_ind
|
137
|
+
)
|
156
138
|
)
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
# function.
|
161
|
-
calibrate_func = algorithm_manager.get_quantization_func(
|
162
|
-
algorithm_name, op_key, qtyping.QuantizeMode.CALIBRATE
|
139
|
+
subgraph = self._flatbuffer_model.subgraphs[subgraph_ind]
|
140
|
+
graph_info = qtyping.GraphInfo(
|
141
|
+
subgraph.tensors, self._flatbuffer_model.buffers
|
163
142
|
)
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
op_updated_tensor_name = self._update_qsvs(
|
168
|
-
op_qsvs, updated_tensor_names, qsv_update_func
|
143
|
+
# Add input/output operators to the subgraph.
|
144
|
+
subgraph.operators += (
|
145
|
+
tfl_flatbuffer_utils.get_subgraph_input_output_operators(subgraph)
|
169
146
|
)
|
170
|
-
|
147
|
+
for op in subgraph.operators:
|
148
|
+
if isinstance(op, qtyping.IOOperator):
|
149
|
+
op_key = op.op_key
|
150
|
+
else:
|
151
|
+
op_code = op_codes[op.opcodeIndex].builtinCode
|
152
|
+
if op_code not in tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME:
|
153
|
+
continue
|
154
|
+
op_key = tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME[op_code]
|
155
|
+
# Step2.1: query the quantization_recipe to get op quantization
|
156
|
+
# settings.
|
157
|
+
op_scope = self._get_op_scope(op, subgraph.tensors)
|
158
|
+
algorithm_name, _ = model_recipe_manager.get_quantization_configs(
|
159
|
+
op_key, op_scope
|
160
|
+
)
|
161
|
+
if algorithm_name == algorithm_manager.AlgorithmName.NO_QUANTIZE:
|
162
|
+
continue
|
163
|
+
if policy.is_conditionally_unquantized(op):
|
164
|
+
continue
|
165
|
+
|
166
|
+
# Step2.2: query algorithm_manager to get/call the related
|
167
|
+
# calibration function.
|
168
|
+
calibrate_func = algorithm_manager.get_quantization_func(
|
169
|
+
algorithm_name, op_key, qtyping.QuantizeMode.CALIBRATE
|
170
|
+
)
|
171
|
+
op_qsvs = calibrate_func(op, graph_info, self._tensor_content_map)
|
172
|
+
# Step3: Update tensor qsvs with the new values. Ignore the tensor
|
173
|
+
# names that are already updated in this round of calibration.
|
174
|
+
op_updated_tensor_name = self._update_qsvs(
|
175
|
+
op_qsvs, updated_tensor_names, qsv_update_func
|
176
|
+
)
|
177
|
+
updated_tensor_names.update(op_updated_tensor_name)
|
178
|
+
|
179
|
+
# Step4: Invoke any subgraphs invoked as a side effect of the op.
|
180
|
+
subgraphs_inds.extend(
|
181
|
+
tfl_flatbuffer_utils.get_op_side_effect_subgraphs(op)
|
182
|
+
)
|
183
|
+
|
171
184
|
# Reset interpreter after one round of calibration.
|
172
185
|
self._tfl_interpreter.reset_all_variables()
|
173
186
|
|
@@ -18,8 +18,10 @@
|
|
18
18
|
import collections
|
19
19
|
import copy
|
20
20
|
import json
|
21
|
-
from typing import Any
|
21
|
+
from typing import Any, Union
|
22
22
|
from ai_edge_quantizer import qtyping
|
23
|
+
from ai_edge_litert import schema_py_generated as schema # pylint:disable=g-direct-tensorflow-import
|
24
|
+
from tensorflow.lite.tools import flatbuffer_utils # pylint: disable=g-direct-tensorflow-import
|
23
25
|
|
24
26
|
_TFLOpName = qtyping.TFLOperationName
|
25
27
|
_OpQuantizationConfig = qtyping.OpQuantizationConfig
|
@@ -168,7 +170,9 @@ DEFAULT_JSON_POLICY = """
|
|
168
170
|
"EMBEDDING_LOOKUP",
|
169
171
|
"SUM",
|
170
172
|
"SELECT_V2",
|
171
|
-
"DYNAMIC_UPDATE_SLICE"
|
173
|
+
"DYNAMIC_UPDATE_SLICE",
|
174
|
+
"SELECT_V2",
|
175
|
+
"STABLEHLO_COMPOSITE"
|
172
176
|
],
|
173
177
|
"static_wi8_ai8": [
|
174
178
|
"ADD",
|
@@ -197,7 +201,9 @@ DEFAULT_JSON_POLICY = """
|
|
197
201
|
"EMBEDDING_LOOKUP",
|
198
202
|
"SUM",
|
199
203
|
"SELECT_V2",
|
200
|
-
"DYNAMIC_UPDATE_SLICE"
|
204
|
+
"DYNAMIC_UPDATE_SLICE",
|
205
|
+
"SELECT_V2",
|
206
|
+
"STABLEHLO_COMPOSITE"
|
201
207
|
],
|
202
208
|
"static_wi4_ai8": ["FULLY_CONNECTED", "CONV_2D", "INPUT", "OUTPUT", "EMBEDDING_LOOKUP"],
|
203
209
|
"static_wi4_ai16": ["FULLY_CONNECTED", "CONV_2D", "INPUT", "OUTPUT", "EMBEDDING_LOOKUP"],
|
@@ -288,6 +294,33 @@ def _unroll_json_config(
|
|
288
294
|
return quant_configs
|
289
295
|
|
290
296
|
|
297
|
+
# TODO: b/401024954 - Have a better way to specify recipes based on op options.
|
298
|
+
def is_conditionally_unquantized(
|
299
|
+
op: Union[schema.Operator, schema.OperatorT],
|
300
|
+
) -> bool:
|
301
|
+
"""Checks if the operator is conditionally unquantized.
|
302
|
+
|
303
|
+
We may want to quantize an op only when its has certain options.
|
304
|
+
Policies/recipes
|
305
|
+
are not aware of op options, so it is checked here.
|
306
|
+
|
307
|
+
Args:
|
308
|
+
op: The operator to check.
|
309
|
+
|
310
|
+
Returns:
|
311
|
+
True if the operator is conditionally unquantized, False otherwise.
|
312
|
+
"""
|
313
|
+
if opts := flatbuffer_utils.get_options_as(
|
314
|
+
op, schema.StableHLOCompositeOptionsT
|
315
|
+
):
|
316
|
+
name: bytes = opts.name
|
317
|
+
# Non npu_call composites may have a kernel and as such will not be
|
318
|
+
# quantized.
|
319
|
+
return ("od" + "ml.npu_call") not in name.decode("utf-8")
|
320
|
+
|
321
|
+
return False
|
322
|
+
|
323
|
+
|
291
324
|
def update_default_config_policy(raw_json_policy: str):
|
292
325
|
"""Updates the default config check policy."""
|
293
326
|
json_policy_content = json.loads(raw_json_policy)
|
@@ -19,6 +19,7 @@ import copy
|
|
19
19
|
from typing import Any, Optional, Union
|
20
20
|
|
21
21
|
from ai_edge_quantizer import algorithm_manager
|
22
|
+
from ai_edge_quantizer import default_policy as policy
|
22
23
|
from ai_edge_quantizer import qtyping
|
23
24
|
from ai_edge_quantizer import recipe_manager
|
24
25
|
from ai_edge_quantizer.utils import tfl_flatbuffer_utils
|
@@ -73,8 +74,12 @@ class ParamsGenerator:
|
|
73
74
|
if model_qsvs is None:
|
74
75
|
model_qsvs = {}
|
75
76
|
|
77
|
+
skip_subgraphs = set()
|
76
78
|
op_codes = self.flatbuffer_model.operatorCodes
|
77
|
-
for subgraph in self.flatbuffer_model.subgraphs:
|
79
|
+
for sg_ind, subgraph in enumerate(self.flatbuffer_model.subgraphs):
|
80
|
+
if sg_ind in skip_subgraphs:
|
81
|
+
continue
|
82
|
+
|
78
83
|
graph_info = qtyping.GraphInfo(
|
79
84
|
subgraph.tensors, self.flatbuffer_model.buffers
|
80
85
|
)
|
@@ -103,10 +108,19 @@ class ParamsGenerator:
|
|
103
108
|
algorithm_name, op_quant_config = (
|
104
109
|
model_recipe_manager.get_quantization_configs(op_key, op_scope)
|
105
110
|
)
|
111
|
+
if policy.is_conditionally_unquantized(op):
|
112
|
+
algorithm_name = algorithm_manager.AlgorithmName.NO_QUANTIZE
|
113
|
+
|
106
114
|
if algorithm_name == algorithm_manager.AlgorithmName.NO_QUANTIZE:
|
115
|
+
side_effect_subgraphs = (
|
116
|
+
tfl_flatbuffer_utils.get_op_side_effect_subgraphs(op)
|
117
|
+
)
|
118
|
+
skip_subgraphs.update(side_effect_subgraphs)
|
119
|
+
|
107
120
|
op_quant_results = self._get_params_for_no_quant_op(
|
108
121
|
subgraph_op_id, op, subgraph.tensors
|
109
122
|
)
|
123
|
+
|
110
124
|
else:
|
111
125
|
op_info = qtyping.OpInfo(op, op_key, subgraph_op_id, op_quant_config)
|
112
126
|
# Step2: query algorithm_manager to get/call the related function.
|
@@ -259,17 +273,25 @@ class ParamsGenerator:
|
|
259
273
|
RuntimeError: If the tensors sharing the same buffer have different
|
260
274
|
quantization settings.
|
261
275
|
"""
|
276
|
+
def get_result(tensor: Any):
|
277
|
+
return self.model_quant_results.get(
|
278
|
+
tfl_flatbuffer_utils.get_tensor_name(tensor), None
|
279
|
+
)
|
280
|
+
|
262
281
|
for tensors in self.buffer_to_tensors.values():
|
263
282
|
if len(tensors) <= 1:
|
264
283
|
continue
|
284
|
+
|
265
285
|
first_tensor = tensors[0]
|
266
|
-
first_tensor_params =
|
267
|
-
|
268
|
-
|
286
|
+
first_tensor_params = get_result(first_tensor)
|
287
|
+
if first_tensor_params is None:
|
288
|
+
continue
|
289
|
+
|
269
290
|
for tensor in tensors[1:]:
|
270
|
-
tensor_params =
|
271
|
-
|
272
|
-
|
291
|
+
tensor_params = get_result(tensor)
|
292
|
+
if tensor_params is None:
|
293
|
+
continue
|
294
|
+
|
273
295
|
if not _compatible_tensor_transformation_params(
|
274
296
|
first_tensor_params, tensor_params
|
275
297
|
):
|
ai_edge_quantizer/qtyping.py
CHANGED
@@ -61,6 +61,7 @@ class TFLOperationName(str, enum.Enum):
|
|
61
61
|
SUM = 'SUM'
|
62
62
|
SELECT_V2 = 'SELECT_V2'
|
63
63
|
DYNAMIC_UPDATE_SLICE = 'DYNAMIC_UPDATE_SLICE'
|
64
|
+
STABLEHLO_COMPOSITE = 'STABLEHLO_COMPOSITE'
|
64
65
|
|
65
66
|
|
66
67
|
class QuantizeMode(enum.Enum):
|
@@ -108,6 +109,8 @@ class QuantTransformation(enum.Enum):
|
|
108
109
|
# Create pattern for emulated subchannel quantization, only support fully
|
109
110
|
# connected op.
|
110
111
|
EMULATED_SUBCHANNEL = 4
|
112
|
+
# Duplicate the buffer.
|
113
|
+
DUPLICATE_BUFFER = 5
|
111
114
|
|
112
115
|
|
113
116
|
@dataclasses.dataclass(frozen=True)
|
@@ -458,49 +458,47 @@ class TransformationInstructionsGenerator:
|
|
458
458
|
self,
|
459
459
|
param: qtyping.TensorTransformationParams,
|
460
460
|
) -> qtyping.TensorTransformationInsts:
|
461
|
-
"""
|
461
|
+
"""Convert single tensor quant params to transformation instructions.
|
462
462
|
|
463
463
|
Args:
|
464
|
-
param:
|
464
|
+
param: Quantization parameters of a tensor in the graph.
|
465
465
|
|
466
466
|
Returns:
|
467
|
-
|
467
|
+
Transformations to be applied to the given tensor.
|
468
468
|
"""
|
469
|
-
#
|
469
|
+
# Setup the structure.
|
470
470
|
tensor_info = self._tensor_name_to_graph_info[param.tensor_name]
|
471
471
|
tensor_trans_insts = qtyping.TensorTransformationInsts(
|
472
472
|
param.tensor_name, tensor_info.subgraph_id, []
|
473
473
|
)
|
474
474
|
|
475
|
-
#
|
476
|
-
consumer_group = self._group_consumer_transformations(param)
|
477
|
-
# at this point, starting from index 1 of consumer_group, we're having sets
|
478
|
-
# that represents transformations that can be grouped together
|
479
|
-
transformations_available_for_vertical_optimization = (
|
480
|
-
self._produce_transformation_for_vertical_opt(consumer_group, param)
|
481
|
-
)
|
482
|
-
other_consumer_transformations = (
|
483
|
-
self._produce_consumer_transformations_unavailable_for_vertical_opt(
|
484
|
-
consumer_group, param
|
485
|
-
)
|
486
|
-
)
|
487
|
-
|
475
|
+
# Add all producer rules.
|
488
476
|
transformations = []
|
489
|
-
|
490
|
-
|
491
|
-
if producer_params:
|
492
|
-
for transformation in producer_params.transformations:
|
477
|
+
if param.producer:
|
478
|
+
for transformation in param.producer.transformations:
|
493
479
|
transformations.append(
|
494
480
|
qtyping.TransformationInst(
|
495
481
|
transformation,
|
496
482
|
tensor_info.tensor_id,
|
497
483
|
tensor_info.producer,
|
498
484
|
tensor_info.consumers,
|
499
|
-
|
485
|
+
param.producer.parameters,
|
500
486
|
)
|
501
487
|
)
|
502
488
|
|
503
|
-
#
|
489
|
+
# Horizontal optimization.
|
490
|
+
consumer_group = self._group_consumer_transformations(param)
|
491
|
+
# At this point, starting from index 1 of consumer_group, we're having sets
|
492
|
+
# that represent transformations that can be grouped together.
|
493
|
+
transformations_available_for_vertical_optimization = (
|
494
|
+
self._produce_transformation_for_vertical_opt(consumer_group, param)
|
495
|
+
)
|
496
|
+
other_consumer_transformations = (
|
497
|
+
self._produce_consumer_transformations_unavailable_for_vertical_opt(
|
498
|
+
consumer_group, param
|
499
|
+
)
|
500
|
+
)
|
501
|
+
# Apply vertical optimization.
|
504
502
|
last_producer_rule_idx = len(transformations) - 1
|
505
503
|
if last_producer_rule_idx >= 0:
|
506
504
|
transformations += self._apply_vertical_optimization(
|
@@ -509,11 +507,11 @@ class TransformationInstructionsGenerator:
|
|
509
507
|
)
|
510
508
|
else:
|
511
509
|
transformations += transformations_available_for_vertical_optimization
|
512
|
-
# Adding other consumers rules
|
510
|
+
# Adding other consumers rules.
|
513
511
|
transformations += other_consumer_transformations
|
514
512
|
tensor_trans_insts.instructions = transformations
|
515
513
|
# Check the generated transformation instructions are valid, the function
|
516
|
-
# will raise an error if the instructions are not valid
|
514
|
+
# will raise an error if the instructions are not valid.
|
517
515
|
self._check_tensor_transformation_instructions_valid(tensor_trans_insts)
|
518
516
|
|
519
517
|
return tensor_trans_insts
|
@@ -18,6 +18,7 @@
|
|
18
18
|
import numpy as np
|
19
19
|
from ai_edge_quantizer import qtyping
|
20
20
|
from ai_edge_quantizer.transformations import dequant_insert
|
21
|
+
from ai_edge_quantizer.transformations import duplicate_buffer
|
21
22
|
from ai_edge_quantizer.transformations import emulated_subchannel
|
22
23
|
from ai_edge_quantizer.transformations import quant_insert
|
23
24
|
from ai_edge_quantizer.transformations import quantize_tensor
|
@@ -68,6 +69,9 @@ class TransformationPerformer:
|
|
68
69
|
emulated_subchannel.emulated_subchannel
|
69
70
|
),
|
70
71
|
qtyping.QuantTransformation.ADD_QUANTIZE: quant_insert.insert_quant,
|
72
|
+
qtyping.QuantTransformation.DUPLICATE_BUFFER: (
|
73
|
+
duplicate_buffer.duplicate_buffer
|
74
|
+
),
|
71
75
|
}
|
72
76
|
# transformations are seprated in two categories:
|
73
77
|
# op_insertion_transformations are transformations that only insert ops
|
@@ -77,6 +81,7 @@ class TransformationPerformer:
|
|
77
81
|
qtyping.QuantTransformation.ADD_DEQUANTIZE,
|
78
82
|
qtyping.QuantTransformation.QUANTIZE_TENSOR,
|
79
83
|
qtyping.QuantTransformation.ADD_QUANTIZE,
|
84
|
+
qtyping.QuantTransformation.DUPLICATE_BUFFER,
|
80
85
|
])
|
81
86
|
self._op_replacement_transformations = set(
|
82
87
|
[qtyping.QuantTransformation.EMULATED_SUBCHANNEL]
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# Copyright 2024 The AI Edge Quantizer Authors.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# ==============================================================================
|
15
|
+
|
16
|
+
"""Duplicate buffer transformation."""
|
17
|
+
|
18
|
+
from ai_edge_quantizer import qtyping
|
19
|
+
from ai_edge_quantizer.transformations import transformation_utils
|
20
|
+
from ai_edge_quantizer.utils import tfl_flatbuffer_utils
|
21
|
+
|
22
|
+
|
23
|
+
def duplicate_buffer(
|
24
|
+
transformation_input: transformation_utils.TransformationInput,
|
25
|
+
) -> qtyping.TransformationInfo:
|
26
|
+
"""Duplicates the buffer of the tensor."""
|
27
|
+
tensor_id = transformation_input.tensor_id
|
28
|
+
tensor = transformation_input.subgraph.tensors[tensor_id]
|
29
|
+
buffer_data = transformation_input.buffers[tensor.buffer].data
|
30
|
+
if buffer_data is None:
|
31
|
+
tensor_name = tfl_flatbuffer_utils.get_tensor_name(tensor)
|
32
|
+
raise ValueError(
|
33
|
+
'Duplicate Buffer transformation supports only constant tensors.'
|
34
|
+
f' Tensor {tensor_name} is not constant.'
|
35
|
+
)
|
36
|
+
|
37
|
+
duplicated_buffer_id = transformation_utils.add_new_constant_buffer(
|
38
|
+
data=buffer_data,
|
39
|
+
buffers=transformation_input.buffers,
|
40
|
+
)
|
41
|
+
tensor.buffer = duplicated_buffer_id
|
42
|
+
|
43
|
+
return qtyping.TransformationInfo(
|
44
|
+
op_id=0, num_ops_added=0, output_tensor_id=tensor_id
|
45
|
+
)
|
@@ -0,0 +1,106 @@
|
|
1
|
+
# Copyright 2024 The AI Edge Quantizer Authors.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# ==============================================================================
|
15
|
+
|
16
|
+
import os
|
17
|
+
import numpy as np
|
18
|
+
from tensorflow.python.platform import googletest
|
19
|
+
from ai_edge_quantizer import qtyping
|
20
|
+
from ai_edge_quantizer.transformations import duplicate_buffer
|
21
|
+
from ai_edge_quantizer.transformations import transformation_utils
|
22
|
+
from ai_edge_quantizer.utils import test_utils
|
23
|
+
from ai_edge_quantizer.utils import tfl_flatbuffer_utils
|
24
|
+
|
25
|
+
TEST_DATA_PREFIX_PATH = test_utils.get_path_to_datafile('..')
|
26
|
+
|
27
|
+
|
28
|
+
class DuplicateBufferTest(googletest.TestCase):
|
29
|
+
|
30
|
+
def setUp(self):
|
31
|
+
super().setUp()
|
32
|
+
model_path = os.path.join(
|
33
|
+
TEST_DATA_PREFIX_PATH, 'tests/models/weight_sharing_fcs.tflite'
|
34
|
+
)
|
35
|
+
self.model = tfl_flatbuffer_utils.read_model(model_path)
|
36
|
+
|
37
|
+
def _get_transformation_input(
|
38
|
+
self, subgraph_idx: int, tensor_idx: int
|
39
|
+
) -> transformation_utils.TransformationInput:
|
40
|
+
return transformation_utils.TransformationInput(
|
41
|
+
tensor_id=tensor_idx,
|
42
|
+
buffers=self.model.buffers,
|
43
|
+
# Dummy params below.
|
44
|
+
op_codes=self.model.operatorCodes,
|
45
|
+
subgraph=self.model.subgraphs[subgraph_idx],
|
46
|
+
producer=-1,
|
47
|
+
consumers=[],
|
48
|
+
quant_params=qtyping.UniformQuantParams(
|
49
|
+
num_bits=8,
|
50
|
+
quantized_dimension=None,
|
51
|
+
scale=np.ones(1),
|
52
|
+
zero_point=np.zeros(1),
|
53
|
+
),
|
54
|
+
)
|
55
|
+
|
56
|
+
def test_constant_buffer_is_correctly_duplicated(self):
|
57
|
+
# Duplicate the FC weight tensor in the second subgraph.
|
58
|
+
subgraph_idx = 1
|
59
|
+
subgraph = self.model.subgraphs[subgraph_idx]
|
60
|
+
weight_tensor_idx = 1
|
61
|
+
prev_buffer_id = subgraph.tensors[weight_tensor_idx].buffer
|
62
|
+
prev_num_buffers = len(self.model.buffers)
|
63
|
+
transformation_input = self._get_transformation_input(
|
64
|
+
subgraph_idx, weight_tensor_idx
|
65
|
+
)
|
66
|
+
transformation_info = duplicate_buffer.duplicate_buffer(
|
67
|
+
transformation_input
|
68
|
+
)
|
69
|
+
self.assertEqual(transformation_info.op_id, 0)
|
70
|
+
self.assertEqual(transformation_info.num_ops_added, 0)
|
71
|
+
self.assertEqual(transformation_info.output_tensor_id, 1)
|
72
|
+
# Check that a new buffer was added.
|
73
|
+
self.assertLen(self.model.buffers, prev_num_buffers + 1)
|
74
|
+
# Check that the new buffer is used by the weight tensor.
|
75
|
+
new_buffer_id = len(self.model.buffers) - 1
|
76
|
+
self.assertEqual(subgraph.tensors[weight_tensor_idx].buffer, new_buffer_id)
|
77
|
+
# Check that the new buffer has the same data as the original one.
|
78
|
+
self.assertTrue(
|
79
|
+
np.all(
|
80
|
+
np.frombuffer(
|
81
|
+
self.model.buffers[new_buffer_id].data,
|
82
|
+
dtype=np.float32,
|
83
|
+
)
|
84
|
+
== np.frombuffer(
|
85
|
+
self.model.buffers[prev_buffer_id].data,
|
86
|
+
dtype=np.float32,
|
87
|
+
)
|
88
|
+
)
|
89
|
+
)
|
90
|
+
|
91
|
+
def test_duplicate_buffer_raises_error_when_tensor_is_not_constant(self):
|
92
|
+
# Duplicate the FC input tensor in the second subgraph.
|
93
|
+
subgraph_idx = 1
|
94
|
+
weight_tensor_idx = 0
|
95
|
+
transformation_input = self._get_transformation_input(
|
96
|
+
subgraph_idx, weight_tensor_idx
|
97
|
+
)
|
98
|
+
with self.assertRaisesRegex(
|
99
|
+
ValueError,
|
100
|
+
'Duplicate Buffer transformation supports only constant tensors.',
|
101
|
+
):
|
102
|
+
duplicate_buffer.duplicate_buffer(transformation_input)
|
103
|
+
|
104
|
+
|
105
|
+
if __name__ == '__main__':
|
106
|
+
googletest.main()
|
@@ -69,6 +69,29 @@ def add_op_code(
|
|
69
69
|
return len(model_op_codes) - 1
|
70
70
|
|
71
71
|
|
72
|
+
def add_new_constant_buffer(
|
73
|
+
data: np.ndarray,
|
74
|
+
buffers: list[schema_py_generated.BufferT],
|
75
|
+
) -> int:
|
76
|
+
"""Add a new constant buffer to the model.
|
77
|
+
|
78
|
+
Args:
|
79
|
+
data: The data of the new tensor.
|
80
|
+
buffers: The buffers of the model.
|
81
|
+
|
82
|
+
Returns:
|
83
|
+
The index of the new buffer in the model.
|
84
|
+
"""
|
85
|
+
new_buffer = schema_py_generated.BufferT()
|
86
|
+
new_buffer.data = np.frombuffer(data.tobytes(), dtype=np.uint8).flatten()
|
87
|
+
new_buffer.offset = 0
|
88
|
+
new_buffer.size = 0
|
89
|
+
new_buffer_id = len(buffers)
|
90
|
+
buffers.append(new_buffer)
|
91
|
+
|
92
|
+
return new_buffer_id
|
93
|
+
|
94
|
+
|
72
95
|
def add_new_constant_tensor(
|
73
96
|
tensor_name: str,
|
74
97
|
data: np.ndarray,
|
@@ -88,16 +111,11 @@ def add_new_constant_tensor(
|
|
88
111
|
Returns:
|
89
112
|
The index of the new tensor in the subgraph.
|
90
113
|
"""
|
91
|
-
|
92
|
-
tensor_buffer.data = np.frombuffer(data.tobytes(), dtype=np.uint8).flatten()
|
93
|
-
tensor_buffer.offset = 0
|
94
|
-
tensor_buffer.size = 0
|
95
|
-
tensor_buffer_id = len(buffers)
|
96
|
-
buffers.append(tensor_buffer)
|
114
|
+
new_buffer_id = add_new_constant_buffer(data, buffers)
|
97
115
|
|
98
116
|
new_tensor = schema_py_generated.TensorT()
|
99
117
|
new_tensor.shape = data.shape
|
100
|
-
new_tensor.buffer =
|
118
|
+
new_tensor.buffer = new_buffer_id
|
101
119
|
new_tensor.type = tensor_type
|
102
120
|
new_tensor.name = tensor_name
|
103
121
|
new_tensor_id = len(subgraph.tensors)
|
@@ -55,6 +55,38 @@ class TransformationUtilsTest(parameterized.TestCase):
|
|
55
55
|
)
|
56
56
|
self.assertEqual(expected, got)
|
57
57
|
|
58
|
+
@parameterized.named_parameters(
|
59
|
+
dict(
|
60
|
+
testcase_name="float32",
|
61
|
+
data=np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32),
|
62
|
+
),
|
63
|
+
dict(
|
64
|
+
testcase_name="int8",
|
65
|
+
data=np.array([[1, 2], [3, 4]], dtype=np.int8),
|
66
|
+
),
|
67
|
+
)
|
68
|
+
def test_add_new_constant_buffer(self, data):
|
69
|
+
"""Tests if the constant buffer is added to the model."""
|
70
|
+
prev_num_buffers = len(self.model.buffers) - 1
|
71
|
+
new_buffer_idx = transformation_utils.add_new_constant_buffer(
|
72
|
+
data=data,
|
73
|
+
buffers=self.model.buffers,
|
74
|
+
)
|
75
|
+
self.assertEqual(new_buffer_idx, prev_num_buffers + 1)
|
76
|
+
|
77
|
+
expected_buffer_data = (
|
78
|
+
np.frombuffer(
|
79
|
+
data.tobytes(),
|
80
|
+
dtype=np.uint8,
|
81
|
+
)
|
82
|
+
.flatten()
|
83
|
+
.tolist()
|
84
|
+
)
|
85
|
+
self.assertEqual(
|
86
|
+
self.model.buffers[new_buffer_idx].data.tolist(),
|
87
|
+
expected_buffer_data,
|
88
|
+
)
|
89
|
+
|
58
90
|
@parameterized.named_parameters(
|
59
91
|
dict(
|
60
92
|
testcase_name="float32",
|
@@ -21,49 +21,40 @@ import immutabledict
|
|
21
21
|
import numpy as np
|
22
22
|
|
23
23
|
from ai_edge_quantizer import qtyping
|
24
|
-
from ai_edge_litert import schema_py_generated # pylint:disable=g-direct-tensorflow-import
|
24
|
+
from ai_edge_litert import schema_py_generated as schema # pylint:disable=g-direct-tensorflow-import
|
25
25
|
from tensorflow.lite.tools import flatbuffer_utils # pylint: disable=g-direct-tensorflow-import
|
26
26
|
from tensorflow.python.platform import gfile # pylint: disable=g-direct-tensorflow-import
|
27
27
|
|
28
28
|
_TFLOpName = qtyping.TFLOperationName
|
29
29
|
|
30
30
|
TFL_OP_NAME_TO_CODE = immutabledict.immutabledict({
|
31
|
-
_TFLOpName.FULLY_CONNECTED:
|
32
|
-
|
33
|
-
|
34
|
-
_TFLOpName.
|
35
|
-
_TFLOpName.
|
36
|
-
_TFLOpName.
|
37
|
-
|
38
|
-
|
39
|
-
_TFLOpName.
|
40
|
-
|
41
|
-
|
42
|
-
_TFLOpName.
|
43
|
-
|
44
|
-
|
45
|
-
_TFLOpName.
|
46
|
-
_TFLOpName.
|
47
|
-
|
48
|
-
|
49
|
-
_TFLOpName.
|
50
|
-
_TFLOpName.
|
51
|
-
_TFLOpName.
|
52
|
-
_TFLOpName.
|
53
|
-
_TFLOpName.
|
54
|
-
_TFLOpName.
|
55
|
-
_TFLOpName.
|
56
|
-
_TFLOpName.MEAN: schema_py_generated.BuiltinOperator.MEAN,
|
57
|
-
_TFLOpName.RSQRT: schema_py_generated.BuiltinOperator.RSQRT,
|
58
|
-
_TFLOpName.CONCATENATION: schema_py_generated.BuiltinOperator.CONCATENATION,
|
59
|
-
_TFLOpName.STRIDED_SLICE: schema_py_generated.BuiltinOperator.STRIDED_SLICE,
|
60
|
-
_TFLOpName.SPLIT: schema_py_generated.BuiltinOperator.SPLIT,
|
61
|
-
_TFLOpName.LOGISTIC: schema_py_generated.BuiltinOperator.LOGISTIC,
|
62
|
-
_TFLOpName.SLICE: schema_py_generated.BuiltinOperator.SLICE,
|
63
|
-
_TFLOpName.SUM: schema_py_generated.BuiltinOperator.SUM,
|
64
|
-
_TFLOpName.SELECT_V2: schema_py_generated.BuiltinOperator.SELECT_V2,
|
31
|
+
_TFLOpName.FULLY_CONNECTED: schema.BuiltinOperator.FULLY_CONNECTED,
|
32
|
+
_TFLOpName.BATCH_MATMUL: schema.BuiltinOperator.BATCH_MATMUL,
|
33
|
+
_TFLOpName.CONV_2D: schema.BuiltinOperator.CONV_2D,
|
34
|
+
_TFLOpName.DEPTHWISE_CONV_2D: schema.BuiltinOperator.DEPTHWISE_CONV_2D,
|
35
|
+
_TFLOpName.CONV_2D_TRANSPOSE: schema.BuiltinOperator.TRANSPOSE_CONV,
|
36
|
+
_TFLOpName.EMBEDDING_LOOKUP: schema.BuiltinOperator.EMBEDDING_LOOKUP,
|
37
|
+
_TFLOpName.SOFTMAX: schema.BuiltinOperator.SOFTMAX,
|
38
|
+
_TFLOpName.AVERAGE_POOL_2D: schema.BuiltinOperator.AVERAGE_POOL_2D,
|
39
|
+
_TFLOpName.RESHAPE: schema.BuiltinOperator.RESHAPE,
|
40
|
+
_TFLOpName.TANH: schema.BuiltinOperator.TANH,
|
41
|
+
_TFLOpName.TRANSPOSE: schema.BuiltinOperator.TRANSPOSE,
|
42
|
+
_TFLOpName.GELU: schema.BuiltinOperator.GELU,
|
43
|
+
_TFLOpName.ADD: schema.BuiltinOperator.ADD,
|
44
|
+
_TFLOpName.SUB: schema.BuiltinOperator.SUB,
|
45
|
+
_TFLOpName.MUL: schema.BuiltinOperator.MUL,
|
46
|
+
_TFLOpName.MEAN: schema.BuiltinOperator.MEAN,
|
47
|
+
_TFLOpName.RSQRT: schema.BuiltinOperator.RSQRT,
|
48
|
+
_TFLOpName.CONCATENATION: schema.BuiltinOperator.CONCATENATION,
|
49
|
+
_TFLOpName.STRIDED_SLICE: schema.BuiltinOperator.STRIDED_SLICE,
|
50
|
+
_TFLOpName.SPLIT: schema.BuiltinOperator.SPLIT,
|
51
|
+
_TFLOpName.LOGISTIC: schema.BuiltinOperator.LOGISTIC,
|
52
|
+
_TFLOpName.SLICE: schema.BuiltinOperator.SLICE,
|
53
|
+
_TFLOpName.SUM: schema.BuiltinOperator.SUM,
|
54
|
+
_TFLOpName.SELECT_V2: schema.BuiltinOperator.SELECT_V2,
|
55
|
+
_TFLOpName.STABLEHLO_COMPOSITE: schema.BuiltinOperator.STABLEHLO_COMPOSITE,
|
65
56
|
_TFLOpName.DYNAMIC_UPDATE_SLICE: (
|
66
|
-
|
57
|
+
schema.BuiltinOperator.DYNAMIC_UPDATE_SLICE
|
67
58
|
),
|
68
59
|
})
|
69
60
|
|
@@ -318,3 +309,23 @@ def get_subgraph_input_output_operators(
|
|
318
309
|
op_key=qtyping.TFLOperationName.OUTPUT,
|
319
310
|
)
|
320
311
|
return [input_op, output_op]
|
312
|
+
|
313
|
+
|
314
|
+
def get_op_side_effect_subgraphs(
|
315
|
+
op: Union[schema.Operator, schema.OperatorT],
|
316
|
+
) -> list[int]:
|
317
|
+
"""Get indices of any subgraphs invoked as a side effect of the operator.
|
318
|
+
|
319
|
+
Args:
|
320
|
+
op: The operator object.
|
321
|
+
|
322
|
+
Returns:
|
323
|
+
A list of subgraph indices invoked by the operator. Empty if the operator
|
324
|
+
does not invoke any subgraphs.
|
325
|
+
"""
|
326
|
+
if opts := flatbuffer_utils.get_options_as(
|
327
|
+
op, schema.StableHLOCompositeOptionsT
|
328
|
+
):
|
329
|
+
return [opts.decompositionSubgraphIndex]
|
330
|
+
# Can add other nested ops here (control flow ops, etc).
|
331
|
+
return []
|
@@ -216,8 +216,10 @@ def get_tensor_name_to_details_map(
|
|
216
216
|
"""
|
217
217
|
tensor_name_to_detail = {}
|
218
218
|
for tensor_detail in tflite_interpreter.get_tensor_details(subgraph_index):
|
219
|
-
# Don't return temporary, unnamed tensors
|
220
|
-
|
219
|
+
# Don't return temporary, unnamed tensors or scratch tensors.
|
220
|
+
# tensor_detail doesn't include the allocation size (bytes) or an
|
221
|
+
# indicator of scratch tensors, so use the name to filter them out.
|
222
|
+
if not tensor_detail["name"] or "scratch" in tensor_detail["name"]:
|
221
223
|
continue
|
222
224
|
tensor_name_to_detail[tensor_detail["name"]] = tensor_detail
|
223
225
|
return tensor_name_to_detail
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ai-edge-quantizer-nightly
|
3
|
-
Version: 0.0.1.
|
3
|
+
Version: 0.0.1.dev20250312
|
4
4
|
Summary: A quantizer for advanced developers to quantize converted AI Edge models.
|
5
5
|
Home-page: https://github.com/google-ai-edge/ai-edge-quantizer
|
6
6
|
Keywords: On-Device ML,AI,Google,TFLite,Quantization,LLMs,GenAI
|
@@ -1,34 +1,34 @@
|
|
1
1
|
ai_edge_quantizer/__init__.py,sha256=4pFSkukSwahYyzwqia0yPRyz8TnFQfGRthVJhYpMWas,793
|
2
|
-
ai_edge_quantizer/algorithm_manager.py,sha256=
|
2
|
+
ai_edge_quantizer/algorithm_manager.py,sha256=VZx4HvGEgt6XAS-b0breFPioLfKkAFNG71VLSG4aKS8,7712
|
3
3
|
ai_edge_quantizer/algorithm_manager_api.py,sha256=u903TG0s1uIDhJqfeJne3CFl8A93phZrwgV2-hwdcXU,9247
|
4
4
|
ai_edge_quantizer/algorithm_manager_api_test.py,sha256=tL_ozYFTsOPX8qGcti0KTz37nVsCxf0SSG5C45SyT-g,7319
|
5
|
-
ai_edge_quantizer/calibrator.py,sha256=
|
5
|
+
ai_edge_quantizer/calibrator.py,sha256=n7AD9j7UScR-CieoI6DQRMeiG_fhLBfSLRiM4460xaM,11895
|
6
6
|
ai_edge_quantizer/calibrator_test.py,sha256=C_oWOaRugPKYX74jF-eRFH-k6nGOdA8I9_uPiocaOuE,11900
|
7
7
|
ai_edge_quantizer/conftest.py,sha256=SxCz-5LlRD_lQm4hQc4c6IGG7DS8d7IyEWY9gnscPN0,794
|
8
|
-
ai_edge_quantizer/default_policy.py,sha256=
|
8
|
+
ai_edge_quantizer/default_policy.py,sha256=2y9p7iZIESB4ozPwjiodgPTLlnmHxQKkwKcYSfc80JI,10277
|
9
9
|
ai_edge_quantizer/model_modifier.py,sha256=Z8EYtrz4zhCFpzd1zVwl2AetVE3BGBf5OvB2DbVQuds,5850
|
10
10
|
ai_edge_quantizer/model_modifier_test.py,sha256=cJd04SLOG-fQZZNZPcisoBLx3cLtWEwGqUBbLb-pif4,4751
|
11
11
|
ai_edge_quantizer/model_validator.py,sha256=fRNz0jO54cthPTibsCuViUXUuFRHl_fbvEiCukIVy20,13030
|
12
12
|
ai_edge_quantizer/model_validator_test.py,sha256=EeqOP_mrZsnZ3rug756s0ryDDqd2KgIDld5Lm_gDuWY,13020
|
13
|
-
ai_edge_quantizer/params_generator.py,sha256=
|
13
|
+
ai_edge_quantizer/params_generator.py,sha256=wT_TyW9jIaOfvLr9a1JR_kxwZtPXCvDFF0n_2QcrDZg,14087
|
14
14
|
ai_edge_quantizer/params_generator_test.py,sha256=d9JwR-yxNJgg1SW-m8sFFPkIRdhgsDwMpVKsBQFL0gg,37658
|
15
|
-
ai_edge_quantizer/qtyping.py,sha256=
|
15
|
+
ai_edge_quantizer/qtyping.py,sha256=UBZ3HgO8IDLY6VJmO05rGtFv_idMD3Os3WWsnriA0NA,15235
|
16
16
|
ai_edge_quantizer/quantizer.py,sha256=g3DMqFMrMpt9jQttCE0WcdNbMtk0JZnmN5MmCHrNdyM,13202
|
17
17
|
ai_edge_quantizer/quantizer_test.py,sha256=38oTMJwMmxwPDeqT3eaVbazjtuIUIzMQ3mJNKh_eNQY,20493
|
18
18
|
ai_edge_quantizer/recipe.py,sha256=r5tJiUs-ihZFzeK_jP2sUIUgTqZsL5SWvbUokuIUPDo,2251
|
19
19
|
ai_edge_quantizer/recipe_manager.py,sha256=qcGUD7e7BISKdsY9WH2rdaRR3acmzSA5qMezGNbzlpo,8931
|
20
20
|
ai_edge_quantizer/recipe_manager_test.py,sha256=LulVxsYp6TBGFI2PLCUCd4VsFq8ELpC7kMNkUjsLgbo,32230
|
21
21
|
ai_edge_quantizer/recipe_test.py,sha256=Fg_sfxovI2fRjk5qdu18ghOvXdUvhDR1TxbE0GHDczc,3381
|
22
|
-
ai_edge_quantizer/transformation_instruction_generator.py,sha256=
|
22
|
+
ai_edge_quantizer/transformation_instruction_generator.py,sha256=WkECCO85lLs4cEnjZF5eVGbtuul4P8N77gUxUCK9ESY,21605
|
23
23
|
ai_edge_quantizer/transformation_instruction_generator_test.py,sha256=23MfOBiXv5Wq9FKJen7DrJ66T58qxf4ECriIY7V013k,39113
|
24
|
-
ai_edge_quantizer/transformation_performer.py,sha256=
|
24
|
+
ai_edge_quantizer/transformation_performer.py,sha256=4GCCOw5WBhO0UUfFoLlbfUh3RfnffF9qI_yEpdWlyps,11181
|
25
25
|
ai_edge_quantizer/transformation_performer_test.py,sha256=m3V6nd6jsjd6jVId5wTBNuyDB2h2p4tHlMWhlnomlJo,13341
|
26
26
|
ai_edge_quantizer/algorithms/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
27
27
|
ai_edge_quantizer/algorithms/nonlinear_quantize/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
28
28
|
ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting.py,sha256=Bs9CK7wZAw6jNaZ8xEtbwO2vM34VYXNZSMVWvxJo9nw,9297
|
29
29
|
ai_edge_quantizer/algorithms/nonlinear_quantize/float_casting_test.py,sha256=s64eDDH9bmRWy6Bl1peHnhGewLnFJjvnhYOdjo1zYOA,22625
|
30
30
|
ai_edge_quantizer/algorithms/uniform_quantize/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
31
|
-
ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=
|
31
|
+
ai_edge_quantizer/algorithms/uniform_quantize/common_quantize.py,sha256=osvXIwVVEi5DRiT_MpJpAXGZCVMEoR0tcc6EwuAtcp0,22330
|
32
32
|
ai_edge_quantizer/algorithms/uniform_quantize/common_quantize_test.py,sha256=qMmKbWqxrCoVKbLKHn9WuCrGKPfHkEyU0Nmhokh8Qeo,2597
|
33
33
|
ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery.py,sha256=OTXjEZ3Ctq3ffYzisX-6HwgK_DuA7uos_aap5PiIUPE,8686
|
34
34
|
ai_edge_quantizer/algorithms/uniform_quantize/dequantized_weight_recovery_test.py,sha256=y7BK11fkF63Ex_Jzg3fbIdy0D_Ca6HuvChVZR7Uwggc,8073
|
@@ -37,31 +37,33 @@ ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_test.py,sha
|
|
37
37
|
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor.py,sha256=Q_vx7YN7KMpjubsngxRdJ4bfdSIV-gmXjtVuxIkZuX4,11078
|
38
38
|
ai_edge_quantizer/algorithms/uniform_quantize/uniform_quantize_tensor_test.py,sha256=WZ4_bvbG999nOtCIqn7mrMnpRdoJOdiyzxhsL_QiPHA,11395
|
39
39
|
ai_edge_quantizer/algorithms/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
40
|
-
ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=
|
40
|
+
ai_edge_quantizer/algorithms/utils/common_utils.py,sha256=4qSlVNx3-91kJufnnJV1RdVRXBPapylZkrAp2nywoao,34581
|
41
41
|
ai_edge_quantizer/algorithms/utils/common_utils_test.py,sha256=zqapGEfYhjQWe9cNGPLmdbwtEUUYQRhlO_kNe0cXX6E,18104
|
42
42
|
ai_edge_quantizer/transformations/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
43
43
|
ai_edge_quantizer/transformations/dequant_insert.py,sha256=sL1LHFVzBDSd9jgrzlHz38LWU0bwmVX7iBkaNcui0ts,3566
|
44
44
|
ai_edge_quantizer/transformations/dequant_insert_test.py,sha256=NJ18PnG71_AvUPz3Cr_TmG6URMeBfa7IiDDyddfTkKQ,10830
|
45
|
+
ai_edge_quantizer/transformations/duplicate_buffer.py,sha256=sEod0EtmcHX0VDqBCI4BYCX9CSRyDtx2vmjtOentFiY,1743
|
46
|
+
ai_edge_quantizer/transformations/duplicate_buffer_test.py,sha256=YYWl3Q5WF60s8T8pLzzA8TCSxz-i7dqc03dJt1LtMw4,3880
|
45
47
|
ai_edge_quantizer/transformations/emulated_subchannel.py,sha256=HVaRxoC8PCAvy3xeMv3OIymukUy_yW1zK0xN8Ann6I4,13602
|
46
48
|
ai_edge_quantizer/transformations/emulated_subchannel_test.py,sha256=gZP6u9NdPXl7s19qB_Un8evou9ZZV6I9Gy0E1rdobHM,7722
|
47
49
|
ai_edge_quantizer/transformations/quant_insert.py,sha256=jn6HsJaV-sqBiFPY-Aqbd64t8zgcYVkEkZI375x_FWY,3958
|
48
50
|
ai_edge_quantizer/transformations/quant_insert_test.py,sha256=X9ptPDvJCFkR5tejKnD1SlHFGPazQTW-wNNMV9MEAuw,10107
|
49
51
|
ai_edge_quantizer/transformations/quantize_tensor.py,sha256=6CyUFR7fGmzbS-mSuDlSSCJJGxY9X_WnCmEuKqL4LzQ,7864
|
50
52
|
ai_edge_quantizer/transformations/quantize_tensor_test.py,sha256=QnJmQ_-XN5X0oR57FoY9bWGTp7migf11psbdO9R2pLg,9050
|
51
|
-
ai_edge_quantizer/transformations/transformation_utils.py,sha256=
|
52
|
-
ai_edge_quantizer/transformations/transformation_utils_test.py,sha256=
|
53
|
+
ai_edge_quantizer/transformations/transformation_utils.py,sha256=R42OIbzwQ7JYJ-Qt46jsqwb6u4MfDGiIPCRZCUGLVCw,4664
|
54
|
+
ai_edge_quantizer/transformations/transformation_utils_test.py,sha256=xH64SF3UHDh84vYbt-WvmXNjM-Jg-mefES1ACO1tkqw,6269
|
53
55
|
ai_edge_quantizer/utils/__init__.py,sha256=lpq1g2ayg3lCPLy79t2VicYcnGKw64FfYIj1V7J-4m8,676
|
54
56
|
ai_edge_quantizer/utils/calibration_utils.py,sha256=1Fj9MIO6aLZIRgyd4axvZN4S_O64nB_-Miu1WP664js,2536
|
55
57
|
ai_edge_quantizer/utils/calibration_utils_test.py,sha256=Z-AcdTieesWFKyKBb08ZXm4Mgu6cvJ4bg2-MJ7hLD10,2856
|
56
58
|
ai_edge_quantizer/utils/test_utils.py,sha256=HwZCIpO9fJRAhuN6t6voXKOYQtcioFtt_tpkAlDsAYk,6205
|
57
|
-
ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=
|
59
|
+
ai_edge_quantizer/utils/tfl_flatbuffer_utils.py,sha256=_A-h_MqwElzjgkLDmXTZ1iAIWtTRcLjSFGfjNT8fuHU,10480
|
58
60
|
ai_edge_quantizer/utils/tfl_flatbuffer_utils_test.py,sha256=AbyDxoM62k4ojD8gPdkWo--xe5hlX3t0kobQSA80kuk,7740
|
59
|
-
ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=
|
61
|
+
ai_edge_quantizer/utils/tfl_interpreter_utils.py,sha256=x2xA2CFPpe_2trcV8v5xGaBETvVCfwAcJuq6yieGJ0Y,12687
|
60
62
|
ai_edge_quantizer/utils/tfl_interpreter_utils_test.py,sha256=Op3JxtOqlrjzmYF18jnnstL1k9xiY9kKJ8S2vklKGkc,11327
|
61
63
|
ai_edge_quantizer/utils/validation_utils.py,sha256=oYw33Sg547AqtGw-choPUJmp9SAKkV46J_ddqSsum2Q,3950
|
62
64
|
ai_edge_quantizer/utils/validation_utils_test.py,sha256=V_qNDikPD4OPB-siOLQCWNVWTAu87h2IgNYt7teFd-o,2934
|
63
|
-
ai_edge_quantizer_nightly-0.0.1.
|
64
|
-
ai_edge_quantizer_nightly-0.0.1.
|
65
|
-
ai_edge_quantizer_nightly-0.0.1.
|
66
|
-
ai_edge_quantizer_nightly-0.0.1.
|
67
|
-
ai_edge_quantizer_nightly-0.0.1.
|
65
|
+
ai_edge_quantizer_nightly-0.0.1.dev20250312.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
66
|
+
ai_edge_quantizer_nightly-0.0.1.dev20250312.dist-info/METADATA,sha256=ZRyM3EF8KAmJpo0QjIpL6Lv1qPQ_2tGLP4o_ARspijs,1528
|
67
|
+
ai_edge_quantizer_nightly-0.0.1.dev20250312.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
68
|
+
ai_edge_quantizer_nightly-0.0.1.dev20250312.dist-info/top_level.txt,sha256=8QTfPnFXNVUhScFLaa-NWZMFWMn72M50DVPubpwWB1g,18
|
69
|
+
ai_edge_quantizer_nightly-0.0.1.dev20250312.dist-info/RECORD,,
|
File without changes
|
File without changes
|