JSTprove 1.1.0__py3-none-macosx_11_0_arm64.whl → 1.3.0__py3-none-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of JSTprove might be problematic. Click here for more details.
- {jstprove-1.1.0.dist-info → jstprove-1.3.0.dist-info}/METADATA +3 -3
- {jstprove-1.1.0.dist-info → jstprove-1.3.0.dist-info}/RECORD +40 -26
- python/core/binaries/onnx_generic_circuit_1-3-0 +0 -0
- python/core/circuits/base.py +29 -12
- python/core/circuits/errors.py +1 -2
- python/core/model_processing/converters/base.py +3 -3
- python/core/model_processing/converters/onnx_converter.py +28 -27
- python/core/model_processing/onnx_custom_ops/__init__.py +5 -4
- python/core/model_processing/onnx_custom_ops/batchnorm.py +64 -0
- python/core/model_processing/onnx_custom_ops/mul.py +66 -0
- python/core/model_processing/onnx_quantizer/exceptions.py +2 -2
- python/core/model_processing/onnx_quantizer/layers/base.py +101 -0
- python/core/model_processing/onnx_quantizer/layers/batchnorm.py +224 -0
- python/core/model_processing/onnx_quantizer/layers/clip.py +92 -0
- python/core/model_processing/onnx_quantizer/layers/max.py +49 -0
- python/core/model_processing/onnx_quantizer/layers/min.py +54 -0
- python/core/model_processing/onnx_quantizer/layers/mul.py +53 -0
- python/core/model_processing/onnx_quantizer/layers/sub.py +54 -0
- python/core/model_processing/onnx_quantizer/onnx_op_quantizer.py +43 -0
- python/core/model_templates/circuit_template.py +48 -38
- python/core/utils/errors.py +1 -1
- python/core/utils/scratch_tests.py +29 -23
- python/scripts/gen_and_bench.py +2 -2
- python/tests/circuit_e2e_tests/circuit_model_developer_test.py +18 -14
- python/tests/circuit_e2e_tests/helper_fns_for_tests.py +11 -13
- python/tests/circuit_parent_classes/test_ort_custom_layers.py +35 -53
- python/tests/onnx_quantizer_tests/layers/base.py +1 -3
- python/tests/onnx_quantizer_tests/layers/batchnorm_config.py +190 -0
- python/tests/onnx_quantizer_tests/layers/clip_config.py +127 -0
- python/tests/onnx_quantizer_tests/layers/max_config.py +100 -0
- python/tests/onnx_quantizer_tests/layers/min_config.py +94 -0
- python/tests/onnx_quantizer_tests/layers/mul_config.py +102 -0
- python/tests/onnx_quantizer_tests/layers/sub_config.py +102 -0
- python/tests/onnx_quantizer_tests/layers_tests/test_integration.py +6 -5
- python/tests/onnx_quantizer_tests/layers_tests/test_quantize.py +8 -1
- python/tests/onnx_quantizer_tests/test_registered_quantizers.py +17 -8
- python/core/binaries/onnx_generic_circuit_1-1-0 +0 -0
- {jstprove-1.1.0.dist-info → jstprove-1.3.0.dist-info}/WHEEL +0 -0
- {jstprove-1.1.0.dist-info → jstprove-1.3.0.dist-info}/entry_points.txt +0 -0
- {jstprove-1.1.0.dist-info → jstprove-1.3.0.dist-info}/licenses/LICENSE +0 -0
- {jstprove-1.1.0.dist-info → jstprove-1.3.0.dist-info}/top_level.txt +0 -0
|
@@ -418,6 +418,40 @@ class BaseOpQuantizer:
|
|
|
418
418
|
|
|
419
419
|
|
|
420
420
|
class QuantizerBase:
|
|
421
|
+
"""
|
|
422
|
+
Shared mixin implementing the generic INT64 quantization pipeline.
|
|
423
|
+
|
|
424
|
+
IMPORTANT:
|
|
425
|
+
QuantizerBase is *not* a standalone quantizer. It must always be
|
|
426
|
+
combined with BaseOpQuantizer via multiple inheritance:
|
|
427
|
+
|
|
428
|
+
class FooQuantizer(BaseOpQuantizer, QuantizeFoo):
|
|
429
|
+
...
|
|
430
|
+
|
|
431
|
+
BaseOpQuantizer supplies required methods and attributes that
|
|
432
|
+
QuantizerBase relies on:
|
|
433
|
+
- add_scaled_initializer_inputs
|
|
434
|
+
- insert_scale_node
|
|
435
|
+
- get_scaling
|
|
436
|
+
- new_initializers (initializer buffer shared with converter)
|
|
437
|
+
|
|
438
|
+
If a subclass inherits QuantizerBase without BaseOpQuantizer,
|
|
439
|
+
QuantizerBase.quantize() will raise attribute errors at runtime.
|
|
440
|
+
|
|
441
|
+
This mixin centralizes:
|
|
442
|
+
- attribute extraction/merging
|
|
443
|
+
- optional initializer scaling (USE_WB + SCALE_PLAN)
|
|
444
|
+
- optional rescaling of outputs (USE_SCALING)
|
|
445
|
+
- creation of the final quantized NodeProto
|
|
446
|
+
|
|
447
|
+
The Quantize<Op> mixins should define:
|
|
448
|
+
- OP_TYPE
|
|
449
|
+
- DOMAIN
|
|
450
|
+
- USE_WB (bool)
|
|
451
|
+
- USE_SCALING (bool)
|
|
452
|
+
- SCALE_PLAN (dict[int,int]) if initializer scaling is enabled
|
|
453
|
+
"""
|
|
454
|
+
|
|
421
455
|
OP_TYPE = None
|
|
422
456
|
DOMAIN = "ai.onnx.contrib"
|
|
423
457
|
DEFAULT_ATTRS: ClassVar = {}
|
|
@@ -479,6 +513,73 @@ class QuantizerBase:
|
|
|
479
513
|
nodes.append(quantized_node)
|
|
480
514
|
return nodes
|
|
481
515
|
|
|
516
|
+
def pre_analysis_transform(
|
|
517
|
+
self: QuantizerBase,
|
|
518
|
+
node: onnx.NodeProto,
|
|
519
|
+
graph: onnx.GraphProto,
|
|
520
|
+
initializer_map: dict[str, onnx.TensorProto],
|
|
521
|
+
scale_base: int,
|
|
522
|
+
scale_exponent: int,
|
|
523
|
+
) -> None:
|
|
524
|
+
"""
|
|
525
|
+
pre_analysis_transform aims to transform the given layer along the
|
|
526
|
+
same lines as it would be transformed for the quantized model, but
|
|
527
|
+
for the weights and biases file instead, to be sent to the backend
|
|
528
|
+
|
|
529
|
+
Default pre-analysis behavior:
|
|
530
|
+
|
|
531
|
+
- If the subclass uses weights/bias (`USE_WB=True`), apply the SAME
|
|
532
|
+
scaling rules as quantization, but directly mutate the initializers.
|
|
533
|
+
|
|
534
|
+
- Subclasses can override this to implement more complex rewrites
|
|
535
|
+
(e.g., BatchNorm → Mul/Add).
|
|
536
|
+
|
|
537
|
+
Args:
|
|
538
|
+
node (onnx.NodeProto): Node to transform.
|
|
539
|
+
graph (onnx.GraphProto): Rest of the Onnx graph for initializers.
|
|
540
|
+
initializer_map (dict[str, onnx.TensorProto]): The initializer map.
|
|
541
|
+
|
|
542
|
+
scale_base (int): Scaling base.
|
|
543
|
+
scale_exponent (int): Scaling exponent.
|
|
544
|
+
|
|
545
|
+
NOTE
|
|
546
|
+
- The resulting model will not make accurate prediction and should be
|
|
547
|
+
used solely for analysis and keeping track of w_and_b
|
|
548
|
+
"""
|
|
549
|
+
# If subclass does not want auto-scaling, do nothing
|
|
550
|
+
if not getattr(self, "USE_WB", False):
|
|
551
|
+
return
|
|
552
|
+
|
|
553
|
+
# Each quantizer defines which inputs to scale (Weight:1x, Bias:2x etc.)
|
|
554
|
+
scale_plan = getattr(self, "SCALE_PLAN", {})
|
|
555
|
+
|
|
556
|
+
# Perform the same scaling as quantization, but directly modify initializers
|
|
557
|
+
for input_idx, scale_mult in scale_plan.items():
|
|
558
|
+
if input_idx >= len(node.input):
|
|
559
|
+
continue
|
|
560
|
+
|
|
561
|
+
name = node.input[input_idx]
|
|
562
|
+
if name not in initializer_map:
|
|
563
|
+
continue # optional input missing
|
|
564
|
+
|
|
565
|
+
tensor = initializer_map[name]
|
|
566
|
+
arr = numpy_helper.to_array(tensor).astype(np.float64)
|
|
567
|
+
|
|
568
|
+
scale = scale_base ** (scale_exponent * scale_mult)
|
|
569
|
+
new_arr = arr * scale
|
|
570
|
+
|
|
571
|
+
# Replace initializer directly
|
|
572
|
+
new_tensor = numpy_helper.from_array(new_arr, name=tensor.name)
|
|
573
|
+
|
|
574
|
+
# Modify graph initializer in place
|
|
575
|
+
for j in range(len(graph.initializer)):
|
|
576
|
+
if graph.initializer[j].name == tensor.name:
|
|
577
|
+
del graph.initializer[j]
|
|
578
|
+
break
|
|
579
|
+
graph.initializer.append(new_tensor)
|
|
580
|
+
|
|
581
|
+
initializer_map[tensor.name] = new_tensor
|
|
582
|
+
|
|
482
583
|
|
|
483
584
|
class PassthroughQuantizer(BaseOpQuantizer):
|
|
484
585
|
"""
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, ClassVar
|
|
4
|
+
|
|
5
|
+
from python.core.circuits.errors import CircuitConfigurationError
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
import onnx
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from onnx import helper, numpy_helper
|
|
12
|
+
|
|
13
|
+
from python.core.model_processing.onnx_custom_ops.onnx_helpers import extract_attributes
|
|
14
|
+
from python.core.model_processing.onnx_quantizer.exceptions import InvalidParamError
|
|
15
|
+
from python.core.model_processing.onnx_quantizer.layers.base import (
|
|
16
|
+
BaseOpQuantizer,
|
|
17
|
+
QuantizerBase,
|
|
18
|
+
ScaleConfig,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class QuantizeBatchnorm(QuantizerBase):
|
|
23
|
+
OP_TYPE = "Int64BatchNorm"
|
|
24
|
+
USE_WB = True
|
|
25
|
+
USE_SCALING = False
|
|
26
|
+
SCALE_PLAN: ClassVar = {}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class BatchnormQuantizer(BaseOpQuantizer, QuantizeBatchnorm):
|
|
30
|
+
"""
|
|
31
|
+
Quantizer for ONNX Batchnorm layers.
|
|
32
|
+
|
|
33
|
+
- Uses standard ONNX Batchnorm layer in standard domain, and
|
|
34
|
+
makes relevant additional changes to the graph.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self: BatchnormQuantizer,
|
|
39
|
+
new_initializers: list[onnx.TensorProto] | None = None,
|
|
40
|
+
) -> None:
|
|
41
|
+
super().__init__()
|
|
42
|
+
# Only replace if caller provided something
|
|
43
|
+
if new_initializers is not None:
|
|
44
|
+
self.new_initializers = new_initializers
|
|
45
|
+
|
|
46
|
+
def _compute_mul_add(
|
|
47
|
+
self: BatchnormQuantizer,
|
|
48
|
+
initializer_map: dict[str, onnx.TensorProto],
|
|
49
|
+
node: onnx.NodeProto,
|
|
50
|
+
scale_base: int,
|
|
51
|
+
scale_exponent: int,
|
|
52
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
|
53
|
+
"""
|
|
54
|
+
Compute the 'mul' and 'add' tensors for BatchNorm folding.
|
|
55
|
+
"""
|
|
56
|
+
self._validate_inputs(node=node)
|
|
57
|
+
# ONNX BatchNorm inputs: [X, scale, bias, mean, var]
|
|
58
|
+
scale_factor = scale_base**scale_exponent
|
|
59
|
+
scale = numpy_helper.to_array(initializer_map[node.input[1]]).astype(np.float32)
|
|
60
|
+
bias = numpy_helper.to_array(initializer_map[node.input[2]]).astype(np.float32)
|
|
61
|
+
mean = numpy_helper.to_array(initializer_map[node.input[3]]).astype(np.float32)
|
|
62
|
+
var = numpy_helper.to_array(initializer_map[node.input[4]]).astype(np.float32)
|
|
63
|
+
|
|
64
|
+
# Find epsilon attribute
|
|
65
|
+
epsilon_attr = next((a for a in node.attribute if a.name == "epsilon"), None)
|
|
66
|
+
epsilon = float(epsilon_attr.f) if epsilon_attr else 1e-5
|
|
67
|
+
|
|
68
|
+
mul = scale / np.sqrt(var + epsilon)
|
|
69
|
+
add = bias - mean * mul
|
|
70
|
+
scaled_add = add * (scale_factor**2)
|
|
71
|
+
scaled_mul = scale_factor * mul
|
|
72
|
+
return scaled_mul, scaled_add
|
|
73
|
+
|
|
74
|
+
def pre_analysis_transform(
|
|
75
|
+
self: BatchnormQuantizer,
|
|
76
|
+
node: onnx.NodeProto,
|
|
77
|
+
graph: onnx.GraphProto,
|
|
78
|
+
initializer_map: dict[str, onnx.TensorProto],
|
|
79
|
+
scale_base: int,
|
|
80
|
+
scale_exponent: int,
|
|
81
|
+
) -> None:
|
|
82
|
+
# Compute linearized BN tensors
|
|
83
|
+
mul, add = self._compute_mul_add(
|
|
84
|
+
initializer_map,
|
|
85
|
+
node,
|
|
86
|
+
scale_base=scale_base,
|
|
87
|
+
scale_exponent=scale_exponent,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# Name base
|
|
91
|
+
node_name = node.name if node.name else node.input[0]
|
|
92
|
+
mul_name = f"{node_name}_mul"
|
|
93
|
+
add_name = f"{node_name}_add"
|
|
94
|
+
|
|
95
|
+
# Create ONNX tensors
|
|
96
|
+
mul_tensor = numpy_helper.from_array(mul.astype(np.int64), name=mul_name)
|
|
97
|
+
add_tensor = numpy_helper.from_array(add.astype(np.int64), name=add_name)
|
|
98
|
+
|
|
99
|
+
# Insert them into the graph
|
|
100
|
+
graph.initializer.extend([mul_tensor, add_tensor])
|
|
101
|
+
initializer_map[mul_name] = mul_tensor
|
|
102
|
+
initializer_map[add_name] = add_tensor
|
|
103
|
+
self.new_initializers.extend([mul_tensor, add_tensor])
|
|
104
|
+
|
|
105
|
+
node.input[:] = [node.input[0], mul_name, add_name]
|
|
106
|
+
|
|
107
|
+
del node.attribute[:]
|
|
108
|
+
|
|
109
|
+
def quantize(
|
|
110
|
+
self,
|
|
111
|
+
node: onnx.NodeProto,
|
|
112
|
+
graph: onnx.GraphProto,
|
|
113
|
+
scale_config: ScaleConfig,
|
|
114
|
+
initializer_map: dict[str, onnx.TensorProto],
|
|
115
|
+
) -> list[onnx.NodeProto]:
|
|
116
|
+
_ = graph
|
|
117
|
+
|
|
118
|
+
nodes: list[onnx.NodeProto] = []
|
|
119
|
+
|
|
120
|
+
# 1. Compute unscaled float mul/add coefficients
|
|
121
|
+
mul, add = self._compute_mul_add(
|
|
122
|
+
initializer_map,
|
|
123
|
+
node,
|
|
124
|
+
scale_base=1,
|
|
125
|
+
scale_exponent=1,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
node_name = node.name if node.name else node.input[0]
|
|
129
|
+
mul_name = f"{node_name}_mul"
|
|
130
|
+
add_name = f"{node_name}_add"
|
|
131
|
+
|
|
132
|
+
# 2. Store unscaled mul and add initializers (as floats)
|
|
133
|
+
scale_value = self.get_scaling(scale_config.base, scale_config.exponent)
|
|
134
|
+
scale_name = f"{node.name}_int_scaler"
|
|
135
|
+
scale_tensor = numpy_helper.from_array(
|
|
136
|
+
np.array([scale_value], dtype=np.int64),
|
|
137
|
+
name=scale_name,
|
|
138
|
+
)
|
|
139
|
+
self.new_initializers.append(scale_tensor)
|
|
140
|
+
|
|
141
|
+
mul_tensor = numpy_helper.from_array(mul.astype(np.float32), name=mul_name)
|
|
142
|
+
add_tensor = numpy_helper.from_array(add.astype(np.float32), name=add_name)
|
|
143
|
+
|
|
144
|
+
initializer_map[mul_name] = mul_tensor
|
|
145
|
+
initializer_map[add_name] = add_tensor
|
|
146
|
+
|
|
147
|
+
# 3. Insert scale and cast for mul_tensor
|
|
148
|
+
scaled_mul_name, mul_scale_node, mul_cast_node = self.insert_scale_node(
|
|
149
|
+
tensor=mul_tensor,
|
|
150
|
+
scale_base=scale_config.base,
|
|
151
|
+
scale_exponent=scale_config.exponent,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# 4. Insert scale and cast for add_tensor
|
|
155
|
+
scaled_add_name, add_scale_node, add_cast_node = self.insert_scale_node(
|
|
156
|
+
tensor=add_tensor,
|
|
157
|
+
scale_base=scale_config.base,
|
|
158
|
+
scale_exponent=scale_config.exponent * 2,
|
|
159
|
+
)
|
|
160
|
+
# Note, order is important here
|
|
161
|
+
nodes.extend(
|
|
162
|
+
[
|
|
163
|
+
mul_scale_node,
|
|
164
|
+
mul_cast_node,
|
|
165
|
+
add_scale_node,
|
|
166
|
+
add_cast_node,
|
|
167
|
+
],
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# 5. Build final Int64BatchNorm node
|
|
171
|
+
attrs = extract_attributes(node)
|
|
172
|
+
for k, v in getattr(self, "DEFAULT_ATTRS", {}).items():
|
|
173
|
+
attrs.setdefault(k, v)
|
|
174
|
+
attrs["rescale"] = 1
|
|
175
|
+
|
|
176
|
+
quant_node = helper.make_node(
|
|
177
|
+
self.OP_TYPE, # Should be "Int64BatchNorm"
|
|
178
|
+
inputs=[
|
|
179
|
+
node.input[0], # original X
|
|
180
|
+
scaled_mul_name, # scaled mul
|
|
181
|
+
scaled_add_name, # scaled add
|
|
182
|
+
scale_name, # scaling factor
|
|
183
|
+
],
|
|
184
|
+
outputs=node.output,
|
|
185
|
+
name=node.name,
|
|
186
|
+
domain=self.DOMAIN,
|
|
187
|
+
**attrs,
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
nodes.append(quant_node)
|
|
191
|
+
return nodes
|
|
192
|
+
|
|
193
|
+
def check_supported(
|
|
194
|
+
self: BatchnormQuantizer,
|
|
195
|
+
node: onnx.NodeProto,
|
|
196
|
+
initializer_map: dict[str, onnx.TensorProto] | None = None,
|
|
197
|
+
) -> None:
|
|
198
|
+
"""
|
|
199
|
+
For our current implementation, all batchnorm inputs
|
|
200
|
+
(scale, variance, mean, etc.)
|
|
201
|
+
must be initializers to the circuit and not inputs from earlier in the graph.
|
|
202
|
+
"""
|
|
203
|
+
|
|
204
|
+
if initializer_map is None:
|
|
205
|
+
msg = "initializer_map is required for BatchNorm support check"
|
|
206
|
+
raise CircuitConfigurationError(node.name, node.op_type, msg)
|
|
207
|
+
|
|
208
|
+
self._validate_inputs(node=node)
|
|
209
|
+
|
|
210
|
+
# First, check to make sure that each of the batchnorm inputs are initializers
|
|
211
|
+
initializer_inputs = node.input[1:]
|
|
212
|
+
if not all(i in initializer_map for i in initializer_inputs):
|
|
213
|
+
msg = "Unsupported BatchNorm with normalization inputs not in initializers"
|
|
214
|
+
raise InvalidParamError(node.name, node.op_type, msg)
|
|
215
|
+
|
|
216
|
+
def _validate_inputs(self, node: onnx.NodeProto) -> None:
|
|
217
|
+
"""Validate BatchNorm has required inputs in initializer_map."""
|
|
218
|
+
num_inputs = 5
|
|
219
|
+
if len(node.input) < num_inputs:
|
|
220
|
+
raise InvalidParamError(
|
|
221
|
+
node.name,
|
|
222
|
+
node.op_type,
|
|
223
|
+
f"BatchNorm requires 5 inputs, got {len(node.input)}",
|
|
224
|
+
)
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, ClassVar
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
import onnx
|
|
7
|
+
|
|
8
|
+
from python.core.model_processing.onnx_quantizer.layers.base import (
|
|
9
|
+
BaseOpQuantizer,
|
|
10
|
+
QuantizerBase,
|
|
11
|
+
ScaleConfig,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class QuantizeClip(QuantizerBase):
|
|
16
|
+
"""
|
|
17
|
+
Quantization traits for ONNX Clip.
|
|
18
|
+
|
|
19
|
+
Semantics:
|
|
20
|
+
- X is already scaled/cast to INT64 at the graph boundary by the converter.
|
|
21
|
+
- Clip is elementwise + broadcasting.
|
|
22
|
+
- The bound inputs (min, max) should live in the *same* fixed-point scale
|
|
23
|
+
as X so that Clip(alpha*x; alpha*a, alpha*b) matches the original Clip(x; a, b).
|
|
24
|
+
|
|
25
|
+
Implementation:
|
|
26
|
+
- Treat inputs 1 and 2 (min, max) like "WB-style" slots: we let the
|
|
27
|
+
QuantizerBase machinery rescale / cast those inputs using the same
|
|
28
|
+
global scale factor.
|
|
29
|
+
- No extra internal scaling input is added (USE_SCALING = False).
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
OP_TYPE = "Clip"
|
|
33
|
+
DOMAIN = "" # standard ONNX domain
|
|
34
|
+
|
|
35
|
+
# We DO want WB-style handling so that min/max initializers get quantized:
|
|
36
|
+
USE_WB = True
|
|
37
|
+
|
|
38
|
+
# Clip does not introduce its own scale input; it just runs in the
|
|
39
|
+
# existing fixed-point scale.
|
|
40
|
+
USE_SCALING = False
|
|
41
|
+
|
|
42
|
+
# Scale-plan for WB-style slots:
|
|
43
|
+
# - Input index 1: min
|
|
44
|
+
# - Input index 2: max
|
|
45
|
+
# Each should be scaled once by the global alpha (same as activations).
|
|
46
|
+
SCALE_PLAN: ClassVar = {1: 1, 2: 1}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class ClipQuantizer(BaseOpQuantizer, QuantizeClip):
|
|
50
|
+
"""
|
|
51
|
+
Quantizer for ONNX Clip.
|
|
52
|
+
|
|
53
|
+
- Keeps the node op_type as "Clip".
|
|
54
|
+
- Ensures that any bound inputs (min, max), whether they are dynamic
|
|
55
|
+
inputs or initializers, are converted to the same INT64 fixed-point
|
|
56
|
+
representation as A.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def __init__(
|
|
60
|
+
self,
|
|
61
|
+
new_initializers: dict[str, onnx.TensorProto] | None = None,
|
|
62
|
+
) -> None:
|
|
63
|
+
# Match Max/Min/Add: we simply share the new_initializers dict
|
|
64
|
+
# with the converter so any constants we add are collected.
|
|
65
|
+
self.new_initializers = new_initializers
|
|
66
|
+
|
|
67
|
+
def quantize(
|
|
68
|
+
self,
|
|
69
|
+
node: onnx.NodeProto,
|
|
70
|
+
graph: onnx.GraphProto,
|
|
71
|
+
scale_config: ScaleConfig,
|
|
72
|
+
initializer_map: dict[str, onnx.TensorProto],
|
|
73
|
+
) -> list[onnx.NodeProto]:
|
|
74
|
+
# Delegate to the shared QuantizerBase logic, which will:
|
|
75
|
+
# - keep X as-is (already scaled/cast by the converter),
|
|
76
|
+
# - rescale / cast min/max according to SCALE_PLAN,
|
|
77
|
+
# - update initializers as needed.
|
|
78
|
+
return QuantizeClip.quantize(self, node, graph, scale_config, initializer_map)
|
|
79
|
+
|
|
80
|
+
def check_supported(
|
|
81
|
+
self,
|
|
82
|
+
node: onnx.NodeProto,
|
|
83
|
+
initializer_map: dict[str, onnx.TensorProto] | None = None,
|
|
84
|
+
) -> None:
|
|
85
|
+
"""
|
|
86
|
+
Minimal support check for Clip:
|
|
87
|
+
|
|
88
|
+
- Clip is variadic elementwise with optional min/max as inputs or attrs.
|
|
89
|
+
- We accept both forms; if attrs are present, ORT enforces semantics.
|
|
90
|
+
- Broadcasting is ONNX-standard; we don't restrict further here.
|
|
91
|
+
"""
|
|
92
|
+
_ = node, initializer_map
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, ClassVar
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
import onnx
|
|
7
|
+
|
|
8
|
+
from python.core.model_processing.onnx_quantizer.layers.base import (
|
|
9
|
+
BaseOpQuantizer,
|
|
10
|
+
QuantizerBase,
|
|
11
|
+
ScaleConfig,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class QuantizeMax(QuantizerBase):
|
|
16
|
+
OP_TYPE = "Max"
|
|
17
|
+
DOMAIN = ""
|
|
18
|
+
USE_WB = True
|
|
19
|
+
USE_SCALING = False
|
|
20
|
+
SCALE_PLAN: ClassVar = {1: 1}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class MaxQuantizer(BaseOpQuantizer, QuantizeMax):
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
new_initializers: list[onnx.TensorProto] | None = None,
|
|
27
|
+
) -> None:
|
|
28
|
+
super().__init__()
|
|
29
|
+
if new_initializers is not None:
|
|
30
|
+
# Share the caller-provided buffer instead of the default list.
|
|
31
|
+
self.new_initializers = new_initializers
|
|
32
|
+
|
|
33
|
+
def quantize(
|
|
34
|
+
self,
|
|
35
|
+
node: onnx.NodeProto,
|
|
36
|
+
graph: onnx.GraphProto,
|
|
37
|
+
scale_config: ScaleConfig,
|
|
38
|
+
initializer_map: dict[str, onnx.TensorProto],
|
|
39
|
+
) -> list[onnx.NodeProto]:
|
|
40
|
+
# Delegate to the shared QuantizerBase logic
|
|
41
|
+
return QuantizeMax.quantize(self, node, graph, scale_config, initializer_map)
|
|
42
|
+
|
|
43
|
+
def check_supported(
|
|
44
|
+
self,
|
|
45
|
+
node: onnx.NodeProto,
|
|
46
|
+
initializer_map: dict[str, onnx.TensorProto] | None = None,
|
|
47
|
+
) -> None:
|
|
48
|
+
# If later we want to enforce/relax broadcasting, add it here.
|
|
49
|
+
pass
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, ClassVar
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
import onnx
|
|
7
|
+
|
|
8
|
+
from python.core.model_processing.onnx_quantizer.layers.base import (
|
|
9
|
+
BaseOpQuantizer,
|
|
10
|
+
QuantizerBase,
|
|
11
|
+
ScaleConfig,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class QuantizeMin(QuantizerBase):
|
|
16
|
+
OP_TYPE = "Min"
|
|
17
|
+
DOMAIN = "" # standard ONNX domain
|
|
18
|
+
USE_WB = True # let framework wire inputs/outputs normally
|
|
19
|
+
USE_SCALING = False # passthrough: no internal scaling
|
|
20
|
+
SCALE_PLAN: ClassVar = {1: 1} # elementwise arity plan
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class MinQuantizer(BaseOpQuantizer, QuantizeMin):
|
|
24
|
+
"""
|
|
25
|
+
Passthrough quantizer for elementwise Min.
|
|
26
|
+
We rely on the converter to quantize graph inputs; no extra scaling here.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self: MinQuantizer,
|
|
31
|
+
new_initializers: list[onnx.TensorProto] | None = None,
|
|
32
|
+
) -> None:
|
|
33
|
+
super().__init__()
|
|
34
|
+
if new_initializers is not None:
|
|
35
|
+
self.new_initializers = new_initializers
|
|
36
|
+
|
|
37
|
+
def quantize(
|
|
38
|
+
self: MinQuantizer,
|
|
39
|
+
node: onnx.NodeProto,
|
|
40
|
+
graph: onnx.GraphProto,
|
|
41
|
+
scale_config: ScaleConfig,
|
|
42
|
+
initializer_map: dict[str, onnx.TensorProto],
|
|
43
|
+
) -> list[onnx.NodeProto]:
|
|
44
|
+
# Delegate to QuantizerBase's generic passthrough implementation.
|
|
45
|
+
return QuantizeMin.quantize(self, node, graph, scale_config, initializer_map)
|
|
46
|
+
|
|
47
|
+
def check_supported(
|
|
48
|
+
self: MinQuantizer,
|
|
49
|
+
node: onnx.NodeProto,
|
|
50
|
+
initializer_map: dict[str, onnx.TensorProto] | None = None,
|
|
51
|
+
) -> None:
|
|
52
|
+
# Min has no attributes; elementwise, variadic ≥ 1 input per ONNX spec.
|
|
53
|
+
# We mirror Add/Max broadcasting behavior; no extra checks here.
|
|
54
|
+
_ = node, initializer_map
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, ClassVar
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
import onnx
|
|
7
|
+
|
|
8
|
+
from python.core.model_processing.onnx_quantizer.layers.base import (
|
|
9
|
+
BaseOpQuantizer,
|
|
10
|
+
QuantizerBase,
|
|
11
|
+
ScaleConfig,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class QuantizeMul(QuantizerBase):
|
|
16
|
+
OP_TYPE = "Int64Mul"
|
|
17
|
+
USE_WB = True
|
|
18
|
+
USE_SCALING = True
|
|
19
|
+
SCALE_PLAN: ClassVar = {0: 1, 1: 1}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class MulQuantizer(BaseOpQuantizer, QuantizeMul):
|
|
23
|
+
"""
|
|
24
|
+
Quantizer for ONNX Mul layers.
|
|
25
|
+
|
|
26
|
+
- Uses custom Mul layer to incorporate rescaling, and
|
|
27
|
+
makes relevant additional changes to the graph.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(
|
|
31
|
+
self: MulQuantizer,
|
|
32
|
+
new_initializers: list[onnx.TensorProto] | None = None,
|
|
33
|
+
) -> None:
|
|
34
|
+
super().__init__()
|
|
35
|
+
# Only replace if caller provided something
|
|
36
|
+
if new_initializers is not None:
|
|
37
|
+
self.new_initializers = new_initializers
|
|
38
|
+
|
|
39
|
+
def quantize(
|
|
40
|
+
self: MulQuantizer,
|
|
41
|
+
node: onnx.NodeProto,
|
|
42
|
+
graph: onnx.GraphProto,
|
|
43
|
+
scale_config: ScaleConfig,
|
|
44
|
+
initializer_map: dict[str, onnx.TensorProto],
|
|
45
|
+
) -> list[onnx.NodeProto]:
|
|
46
|
+
return QuantizeMul.quantize(self, node, graph, scale_config, initializer_map)
|
|
47
|
+
|
|
48
|
+
def check_supported(
|
|
49
|
+
self: MulQuantizer,
|
|
50
|
+
node: onnx.NodeProto,
|
|
51
|
+
initializer_map: dict[str, onnx.TensorProto] | None = None,
|
|
52
|
+
) -> None:
|
|
53
|
+
pass
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, ClassVar
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
import onnx
|
|
7
|
+
|
|
8
|
+
from python.core.model_processing.onnx_quantizer.layers.base import (
|
|
9
|
+
BaseOpQuantizer,
|
|
10
|
+
QuantizerBase,
|
|
11
|
+
ScaleConfig,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class QuantizeSub(QuantizerBase):
|
|
16
|
+
OP_TYPE = "Sub"
|
|
17
|
+
DOMAIN = ""
|
|
18
|
+
USE_WB = True
|
|
19
|
+
USE_SCALING = False
|
|
20
|
+
SCALE_PLAN: ClassVar = {0: 1, 1: 1}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class SubQuantizer(BaseOpQuantizer, QuantizeSub):
|
|
24
|
+
"""
|
|
25
|
+
Quantizer for ONNX Sub layers.
|
|
26
|
+
|
|
27
|
+
- Uses standard ONNX Sub layer in standard domain, and
|
|
28
|
+
makes relevant additional changes to the graph.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self: SubQuantizer,
|
|
33
|
+
new_initializers: list[onnx.TensorProto] | None = None,
|
|
34
|
+
) -> None:
|
|
35
|
+
super().__init__()
|
|
36
|
+
# Only replace if caller provided something
|
|
37
|
+
if new_initializers is not None:
|
|
38
|
+
self.new_initializers = new_initializers
|
|
39
|
+
|
|
40
|
+
def quantize(
|
|
41
|
+
self: SubQuantizer,
|
|
42
|
+
node: onnx.NodeProto,
|
|
43
|
+
graph: onnx.GraphProto,
|
|
44
|
+
scale_config: ScaleConfig,
|
|
45
|
+
initializer_map: dict[str, onnx.TensorProto],
|
|
46
|
+
) -> list[onnx.NodeProto]:
|
|
47
|
+
return QuantizeSub.quantize(self, node, graph, scale_config, initializer_map)
|
|
48
|
+
|
|
49
|
+
def check_supported(
|
|
50
|
+
self: SubQuantizer,
|
|
51
|
+
node: onnx.NodeProto,
|
|
52
|
+
initializer_map: dict[str, onnx.TensorProto] | None = None,
|
|
53
|
+
) -> None:
|
|
54
|
+
pass
|