JSTprove 1.1.0__py3-none-macosx_11_0_arm64.whl → 1.3.0__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of JSTprove might be problematic. Click here for more details.

Files changed (41) hide show
  1. {jstprove-1.1.0.dist-info → jstprove-1.3.0.dist-info}/METADATA +3 -3
  2. {jstprove-1.1.0.dist-info → jstprove-1.3.0.dist-info}/RECORD +40 -26
  3. python/core/binaries/onnx_generic_circuit_1-3-0 +0 -0
  4. python/core/circuits/base.py +29 -12
  5. python/core/circuits/errors.py +1 -2
  6. python/core/model_processing/converters/base.py +3 -3
  7. python/core/model_processing/converters/onnx_converter.py +28 -27
  8. python/core/model_processing/onnx_custom_ops/__init__.py +5 -4
  9. python/core/model_processing/onnx_custom_ops/batchnorm.py +64 -0
  10. python/core/model_processing/onnx_custom_ops/mul.py +66 -0
  11. python/core/model_processing/onnx_quantizer/exceptions.py +2 -2
  12. python/core/model_processing/onnx_quantizer/layers/base.py +101 -0
  13. python/core/model_processing/onnx_quantizer/layers/batchnorm.py +224 -0
  14. python/core/model_processing/onnx_quantizer/layers/clip.py +92 -0
  15. python/core/model_processing/onnx_quantizer/layers/max.py +49 -0
  16. python/core/model_processing/onnx_quantizer/layers/min.py +54 -0
  17. python/core/model_processing/onnx_quantizer/layers/mul.py +53 -0
  18. python/core/model_processing/onnx_quantizer/layers/sub.py +54 -0
  19. python/core/model_processing/onnx_quantizer/onnx_op_quantizer.py +43 -0
  20. python/core/model_templates/circuit_template.py +48 -38
  21. python/core/utils/errors.py +1 -1
  22. python/core/utils/scratch_tests.py +29 -23
  23. python/scripts/gen_and_bench.py +2 -2
  24. python/tests/circuit_e2e_tests/circuit_model_developer_test.py +18 -14
  25. python/tests/circuit_e2e_tests/helper_fns_for_tests.py +11 -13
  26. python/tests/circuit_parent_classes/test_ort_custom_layers.py +35 -53
  27. python/tests/onnx_quantizer_tests/layers/base.py +1 -3
  28. python/tests/onnx_quantizer_tests/layers/batchnorm_config.py +190 -0
  29. python/tests/onnx_quantizer_tests/layers/clip_config.py +127 -0
  30. python/tests/onnx_quantizer_tests/layers/max_config.py +100 -0
  31. python/tests/onnx_quantizer_tests/layers/min_config.py +94 -0
  32. python/tests/onnx_quantizer_tests/layers/mul_config.py +102 -0
  33. python/tests/onnx_quantizer_tests/layers/sub_config.py +102 -0
  34. python/tests/onnx_quantizer_tests/layers_tests/test_integration.py +6 -5
  35. python/tests/onnx_quantizer_tests/layers_tests/test_quantize.py +8 -1
  36. python/tests/onnx_quantizer_tests/test_registered_quantizers.py +17 -8
  37. python/core/binaries/onnx_generic_circuit_1-1-0 +0 -0
  38. {jstprove-1.1.0.dist-info → jstprove-1.3.0.dist-info}/WHEEL +0 -0
  39. {jstprove-1.1.0.dist-info → jstprove-1.3.0.dist-info}/entry_points.txt +0 -0
  40. {jstprove-1.1.0.dist-info → jstprove-1.3.0.dist-info}/licenses/LICENSE +0 -0
  41. {jstprove-1.1.0.dist-info → jstprove-1.3.0.dist-info}/top_level.txt +0 -0
@@ -418,6 +418,40 @@ class BaseOpQuantizer:
418
418
 
419
419
 
420
420
  class QuantizerBase:
421
+ """
422
+ Shared mixin implementing the generic INT64 quantization pipeline.
423
+
424
+ IMPORTANT:
425
+ QuantizerBase is *not* a standalone quantizer. It must always be
426
+ combined with BaseOpQuantizer via multiple inheritance:
427
+
428
+ class FooQuantizer(BaseOpQuantizer, QuantizeFoo):
429
+ ...
430
+
431
+ BaseOpQuantizer supplies required methods and attributes that
432
+ QuantizerBase relies on:
433
+ - add_scaled_initializer_inputs
434
+ - insert_scale_node
435
+ - get_scaling
436
+ - new_initializers (initializer buffer shared with converter)
437
+
438
+ If a subclass inherits QuantizerBase without BaseOpQuantizer,
439
+ QuantizerBase.quantize() will raise attribute errors at runtime.
440
+
441
+ This mixin centralizes:
442
+ - attribute extraction/merging
443
+ - optional initializer scaling (USE_WB + SCALE_PLAN)
444
+ - optional rescaling of outputs (USE_SCALING)
445
+ - creation of the final quantized NodeProto
446
+
447
+ The Quantize<Op> mixins should define:
448
+ - OP_TYPE
449
+ - DOMAIN
450
+ - USE_WB (bool)
451
+ - USE_SCALING (bool)
452
+ - SCALE_PLAN (dict[int,int]) if initializer scaling is enabled
453
+ """
454
+
421
455
  OP_TYPE = None
422
456
  DOMAIN = "ai.onnx.contrib"
423
457
  DEFAULT_ATTRS: ClassVar = {}
@@ -479,6 +513,73 @@ class QuantizerBase:
479
513
  nodes.append(quantized_node)
480
514
  return nodes
481
515
 
516
+ def pre_analysis_transform(
517
+ self: QuantizerBase,
518
+ node: onnx.NodeProto,
519
+ graph: onnx.GraphProto,
520
+ initializer_map: dict[str, onnx.TensorProto],
521
+ scale_base: int,
522
+ scale_exponent: int,
523
+ ) -> None:
524
+ """
525
+ pre_analysis_transform aims to transform the given layer along the
526
+ same lines as it would be transformed for the quantized model, but
527
+ for the weights and biases file instead, to be sent to the backend
528
+
529
+ Default pre-analysis behavior:
530
+
531
+ - If the subclass uses weights/bias (`USE_WB=True`), apply the SAME
532
+ scaling rules as quantization, but directly mutate the initializers.
533
+
534
+ - Subclasses can override this to implement more complex rewrites
535
+ (e.g., BatchNorm → Mul/Add).
536
+
537
+ Args:
538
+ node (onnx.NodeProto): Node to transform.
539
+ graph (onnx.GraphProto): Rest of the Onnx graph for initializers.
540
+ initializer_map (dict[str, onnx.TensorProto]): The initializer map.
541
+
542
+ scale_base (int): Scaling base.
543
+ scale_exponent (int): Scaling exponent.
544
+
545
+ NOTE
546
+ - The resulting model will not make accurate prediction and should be
547
+ used solely for analysis and keeping track of w_and_b
548
+ """
549
+ # If subclass does not want auto-scaling, do nothing
550
+ if not getattr(self, "USE_WB", False):
551
+ return
552
+
553
+ # Each quantizer defines which inputs to scale (Weight:1x, Bias:2x etc.)
554
+ scale_plan = getattr(self, "SCALE_PLAN", {})
555
+
556
+ # Perform the same scaling as quantization, but directly modify initializers
557
+ for input_idx, scale_mult in scale_plan.items():
558
+ if input_idx >= len(node.input):
559
+ continue
560
+
561
+ name = node.input[input_idx]
562
+ if name not in initializer_map:
563
+ continue # optional input missing
564
+
565
+ tensor = initializer_map[name]
566
+ arr = numpy_helper.to_array(tensor).astype(np.float64)
567
+
568
+ scale = scale_base ** (scale_exponent * scale_mult)
569
+ new_arr = arr * scale
570
+
571
+ # Replace initializer directly
572
+ new_tensor = numpy_helper.from_array(new_arr, name=tensor.name)
573
+
574
+ # Modify graph initializer in place
575
+ for j in range(len(graph.initializer)):
576
+ if graph.initializer[j].name == tensor.name:
577
+ del graph.initializer[j]
578
+ break
579
+ graph.initializer.append(new_tensor)
580
+
581
+ initializer_map[tensor.name] = new_tensor
582
+
482
583
 
483
584
  class PassthroughQuantizer(BaseOpQuantizer):
484
585
  """
@@ -0,0 +1,224 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, ClassVar
4
+
5
+ from python.core.circuits.errors import CircuitConfigurationError
6
+
7
+ if TYPE_CHECKING:
8
+ import onnx
9
+
10
+ import numpy as np
11
+ from onnx import helper, numpy_helper
12
+
13
+ from python.core.model_processing.onnx_custom_ops.onnx_helpers import extract_attributes
14
+ from python.core.model_processing.onnx_quantizer.exceptions import InvalidParamError
15
+ from python.core.model_processing.onnx_quantizer.layers.base import (
16
+ BaseOpQuantizer,
17
+ QuantizerBase,
18
+ ScaleConfig,
19
+ )
20
+
21
+
22
+ class QuantizeBatchnorm(QuantizerBase):
23
+ OP_TYPE = "Int64BatchNorm"
24
+ USE_WB = True
25
+ USE_SCALING = False
26
+ SCALE_PLAN: ClassVar = {}
27
+
28
+
29
+ class BatchnormQuantizer(BaseOpQuantizer, QuantizeBatchnorm):
30
+ """
31
+ Quantizer for ONNX Batchnorm layers.
32
+
33
+ - Uses standard ONNX Batchnorm layer in standard domain, and
34
+ makes relevant additional changes to the graph.
35
+ """
36
+
37
+ def __init__(
38
+ self: BatchnormQuantizer,
39
+ new_initializers: list[onnx.TensorProto] | None = None,
40
+ ) -> None:
41
+ super().__init__()
42
+ # Only replace if caller provided something
43
+ if new_initializers is not None:
44
+ self.new_initializers = new_initializers
45
+
46
+ def _compute_mul_add(
47
+ self: BatchnormQuantizer,
48
+ initializer_map: dict[str, onnx.TensorProto],
49
+ node: onnx.NodeProto,
50
+ scale_base: int,
51
+ scale_exponent: int,
52
+ ) -> tuple[np.ndarray, np.ndarray]:
53
+ """
54
+ Compute the 'mul' and 'add' tensors for BatchNorm folding.
55
+ """
56
+ self._validate_inputs(node=node)
57
+ # ONNX BatchNorm inputs: [X, scale, bias, mean, var]
58
+ scale_factor = scale_base**scale_exponent
59
+ scale = numpy_helper.to_array(initializer_map[node.input[1]]).astype(np.float32)
60
+ bias = numpy_helper.to_array(initializer_map[node.input[2]]).astype(np.float32)
61
+ mean = numpy_helper.to_array(initializer_map[node.input[3]]).astype(np.float32)
62
+ var = numpy_helper.to_array(initializer_map[node.input[4]]).astype(np.float32)
63
+
64
+ # Find epsilon attribute
65
+ epsilon_attr = next((a for a in node.attribute if a.name == "epsilon"), None)
66
+ epsilon = float(epsilon_attr.f) if epsilon_attr else 1e-5
67
+
68
+ mul = scale / np.sqrt(var + epsilon)
69
+ add = bias - mean * mul
70
+ scaled_add = add * (scale_factor**2)
71
+ scaled_mul = scale_factor * mul
72
+ return scaled_mul, scaled_add
73
+
74
+ def pre_analysis_transform(
75
+ self: BatchnormQuantizer,
76
+ node: onnx.NodeProto,
77
+ graph: onnx.GraphProto,
78
+ initializer_map: dict[str, onnx.TensorProto],
79
+ scale_base: int,
80
+ scale_exponent: int,
81
+ ) -> None:
82
+ # Compute linearized BN tensors
83
+ mul, add = self._compute_mul_add(
84
+ initializer_map,
85
+ node,
86
+ scale_base=scale_base,
87
+ scale_exponent=scale_exponent,
88
+ )
89
+
90
+ # Name base
91
+ node_name = node.name if node.name else node.input[0]
92
+ mul_name = f"{node_name}_mul"
93
+ add_name = f"{node_name}_add"
94
+
95
+ # Create ONNX tensors
96
+ mul_tensor = numpy_helper.from_array(mul.astype(np.int64), name=mul_name)
97
+ add_tensor = numpy_helper.from_array(add.astype(np.int64), name=add_name)
98
+
99
+ # Insert them into the graph
100
+ graph.initializer.extend([mul_tensor, add_tensor])
101
+ initializer_map[mul_name] = mul_tensor
102
+ initializer_map[add_name] = add_tensor
103
+ self.new_initializers.extend([mul_tensor, add_tensor])
104
+
105
+ node.input[:] = [node.input[0], mul_name, add_name]
106
+
107
+ del node.attribute[:]
108
+
109
+ def quantize(
110
+ self,
111
+ node: onnx.NodeProto,
112
+ graph: onnx.GraphProto,
113
+ scale_config: ScaleConfig,
114
+ initializer_map: dict[str, onnx.TensorProto],
115
+ ) -> list[onnx.NodeProto]:
116
+ _ = graph
117
+
118
+ nodes: list[onnx.NodeProto] = []
119
+
120
+ # 1. Compute unscaled float mul/add coefficients
121
+ mul, add = self._compute_mul_add(
122
+ initializer_map,
123
+ node,
124
+ scale_base=1,
125
+ scale_exponent=1,
126
+ )
127
+
128
+ node_name = node.name if node.name else node.input[0]
129
+ mul_name = f"{node_name}_mul"
130
+ add_name = f"{node_name}_add"
131
+
132
+ # 2. Store unscaled mul and add initializers (as floats)
133
+ scale_value = self.get_scaling(scale_config.base, scale_config.exponent)
134
+ scale_name = f"{node.name}_int_scaler"
135
+ scale_tensor = numpy_helper.from_array(
136
+ np.array([scale_value], dtype=np.int64),
137
+ name=scale_name,
138
+ )
139
+ self.new_initializers.append(scale_tensor)
140
+
141
+ mul_tensor = numpy_helper.from_array(mul.astype(np.float32), name=mul_name)
142
+ add_tensor = numpy_helper.from_array(add.astype(np.float32), name=add_name)
143
+
144
+ initializer_map[mul_name] = mul_tensor
145
+ initializer_map[add_name] = add_tensor
146
+
147
+ # 3. Insert scale and cast for mul_tensor
148
+ scaled_mul_name, mul_scale_node, mul_cast_node = self.insert_scale_node(
149
+ tensor=mul_tensor,
150
+ scale_base=scale_config.base,
151
+ scale_exponent=scale_config.exponent,
152
+ )
153
+
154
+ # 4. Insert scale and cast for add_tensor
155
+ scaled_add_name, add_scale_node, add_cast_node = self.insert_scale_node(
156
+ tensor=add_tensor,
157
+ scale_base=scale_config.base,
158
+ scale_exponent=scale_config.exponent * 2,
159
+ )
160
+ # Note, order is important here
161
+ nodes.extend(
162
+ [
163
+ mul_scale_node,
164
+ mul_cast_node,
165
+ add_scale_node,
166
+ add_cast_node,
167
+ ],
168
+ )
169
+
170
+ # 5. Build final Int64BatchNorm node
171
+ attrs = extract_attributes(node)
172
+ for k, v in getattr(self, "DEFAULT_ATTRS", {}).items():
173
+ attrs.setdefault(k, v)
174
+ attrs["rescale"] = 1
175
+
176
+ quant_node = helper.make_node(
177
+ self.OP_TYPE, # Should be "Int64BatchNorm"
178
+ inputs=[
179
+ node.input[0], # original X
180
+ scaled_mul_name, # scaled mul
181
+ scaled_add_name, # scaled add
182
+ scale_name, # scaling factor
183
+ ],
184
+ outputs=node.output,
185
+ name=node.name,
186
+ domain=self.DOMAIN,
187
+ **attrs,
188
+ )
189
+
190
+ nodes.append(quant_node)
191
+ return nodes
192
+
193
+ def check_supported(
194
+ self: BatchnormQuantizer,
195
+ node: onnx.NodeProto,
196
+ initializer_map: dict[str, onnx.TensorProto] | None = None,
197
+ ) -> None:
198
+ """
199
+ For our current implementation, all batchnorm inputs
200
+ (scale, variance, mean, etc.)
201
+ must be initializers to the circuit and not inputs from earlier in the graph.
202
+ """
203
+
204
+ if initializer_map is None:
205
+ msg = "initializer_map is required for BatchNorm support check"
206
+ raise CircuitConfigurationError(node.name, node.op_type, msg)
207
+
208
+ self._validate_inputs(node=node)
209
+
210
+ # First, check to make sure that each of the batchnorm inputs are initializers
211
+ initializer_inputs = node.input[1:]
212
+ if not all(i in initializer_map for i in initializer_inputs):
213
+ msg = "Unsupported BatchNorm with normalization inputs not in initializers"
214
+ raise InvalidParamError(node.name, node.op_type, msg)
215
+
216
+ def _validate_inputs(self, node: onnx.NodeProto) -> None:
217
+ """Validate BatchNorm has required inputs in initializer_map."""
218
+ num_inputs = 5
219
+ if len(node.input) < num_inputs:
220
+ raise InvalidParamError(
221
+ node.name,
222
+ node.op_type,
223
+ f"BatchNorm requires 5 inputs, got {len(node.input)}",
224
+ )
@@ -0,0 +1,92 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, ClassVar
4
+
5
+ if TYPE_CHECKING:
6
+ import onnx
7
+
8
+ from python.core.model_processing.onnx_quantizer.layers.base import (
9
+ BaseOpQuantizer,
10
+ QuantizerBase,
11
+ ScaleConfig,
12
+ )
13
+
14
+
15
+ class QuantizeClip(QuantizerBase):
16
+ """
17
+ Quantization traits for ONNX Clip.
18
+
19
+ Semantics:
20
+ - X is already scaled/cast to INT64 at the graph boundary by the converter.
21
+ - Clip is elementwise + broadcasting.
22
+ - The bound inputs (min, max) should live in the *same* fixed-point scale
23
+ as X so that Clip(alpha*x; alpha*a, alpha*b) matches the original Clip(x; a, b).
24
+
25
+ Implementation:
26
+ - Treat inputs 1 and 2 (min, max) like "WB-style" slots: we let the
27
+ QuantizerBase machinery rescale / cast those inputs using the same
28
+ global scale factor.
29
+ - No extra internal scaling input is added (USE_SCALING = False).
30
+ """
31
+
32
+ OP_TYPE = "Clip"
33
+ DOMAIN = "" # standard ONNX domain
34
+
35
+ # We DO want WB-style handling so that min/max initializers get quantized:
36
+ USE_WB = True
37
+
38
+ # Clip does not introduce its own scale input; it just runs in the
39
+ # existing fixed-point scale.
40
+ USE_SCALING = False
41
+
42
+ # Scale-plan for WB-style slots:
43
+ # - Input index 1: min
44
+ # - Input index 2: max
45
+ # Each should be scaled once by the global alpha (same as activations).
46
+ SCALE_PLAN: ClassVar = {1: 1, 2: 1}
47
+
48
+
49
+ class ClipQuantizer(BaseOpQuantizer, QuantizeClip):
50
+ """
51
+ Quantizer for ONNX Clip.
52
+
53
+ - Keeps the node op_type as "Clip".
54
+ - Ensures that any bound inputs (min, max), whether they are dynamic
55
+ inputs or initializers, are converted to the same INT64 fixed-point
56
+ representation as A.
57
+ """
58
+
59
+ def __init__(
60
+ self,
61
+ new_initializers: dict[str, onnx.TensorProto] | None = None,
62
+ ) -> None:
63
+ # Match Max/Min/Add: we simply share the new_initializers dict
64
+ # with the converter so any constants we add are collected.
65
+ self.new_initializers = new_initializers
66
+
67
+ def quantize(
68
+ self,
69
+ node: onnx.NodeProto,
70
+ graph: onnx.GraphProto,
71
+ scale_config: ScaleConfig,
72
+ initializer_map: dict[str, onnx.TensorProto],
73
+ ) -> list[onnx.NodeProto]:
74
+ # Delegate to the shared QuantizerBase logic, which will:
75
+ # - keep X as-is (already scaled/cast by the converter),
76
+ # - rescale / cast min/max according to SCALE_PLAN,
77
+ # - update initializers as needed.
78
+ return QuantizeClip.quantize(self, node, graph, scale_config, initializer_map)
79
+
80
+ def check_supported(
81
+ self,
82
+ node: onnx.NodeProto,
83
+ initializer_map: dict[str, onnx.TensorProto] | None = None,
84
+ ) -> None:
85
+ """
86
+ Minimal support check for Clip:
87
+
88
+ - Clip is variadic elementwise with optional min/max as inputs or attrs.
89
+ - We accept both forms; if attrs are present, ORT enforces semantics.
90
+ - Broadcasting is ONNX-standard; we don't restrict further here.
91
+ """
92
+ _ = node, initializer_map
@@ -0,0 +1,49 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, ClassVar
4
+
5
+ if TYPE_CHECKING:
6
+ import onnx
7
+
8
+ from python.core.model_processing.onnx_quantizer.layers.base import (
9
+ BaseOpQuantizer,
10
+ QuantizerBase,
11
+ ScaleConfig,
12
+ )
13
+
14
+
15
+ class QuantizeMax(QuantizerBase):
16
+ OP_TYPE = "Max"
17
+ DOMAIN = ""
18
+ USE_WB = True
19
+ USE_SCALING = False
20
+ SCALE_PLAN: ClassVar = {1: 1}
21
+
22
+
23
+ class MaxQuantizer(BaseOpQuantizer, QuantizeMax):
24
+ def __init__(
25
+ self,
26
+ new_initializers: list[onnx.TensorProto] | None = None,
27
+ ) -> None:
28
+ super().__init__()
29
+ if new_initializers is not None:
30
+ # Share the caller-provided buffer instead of the default list.
31
+ self.new_initializers = new_initializers
32
+
33
+ def quantize(
34
+ self,
35
+ node: onnx.NodeProto,
36
+ graph: onnx.GraphProto,
37
+ scale_config: ScaleConfig,
38
+ initializer_map: dict[str, onnx.TensorProto],
39
+ ) -> list[onnx.NodeProto]:
40
+ # Delegate to the shared QuantizerBase logic
41
+ return QuantizeMax.quantize(self, node, graph, scale_config, initializer_map)
42
+
43
+ def check_supported(
44
+ self,
45
+ node: onnx.NodeProto,
46
+ initializer_map: dict[str, onnx.TensorProto] | None = None,
47
+ ) -> None:
48
+ # If later we want to enforce/relax broadcasting, add it here.
49
+ pass
@@ -0,0 +1,54 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, ClassVar
4
+
5
+ if TYPE_CHECKING:
6
+ import onnx
7
+
8
+ from python.core.model_processing.onnx_quantizer.layers.base import (
9
+ BaseOpQuantizer,
10
+ QuantizerBase,
11
+ ScaleConfig,
12
+ )
13
+
14
+
15
+ class QuantizeMin(QuantizerBase):
16
+ OP_TYPE = "Min"
17
+ DOMAIN = "" # standard ONNX domain
18
+ USE_WB = True # let framework wire inputs/outputs normally
19
+ USE_SCALING = False # passthrough: no internal scaling
20
+ SCALE_PLAN: ClassVar = {1: 1} # elementwise arity plan
21
+
22
+
23
+ class MinQuantizer(BaseOpQuantizer, QuantizeMin):
24
+ """
25
+ Passthrough quantizer for elementwise Min.
26
+ We rely on the converter to quantize graph inputs; no extra scaling here.
27
+ """
28
+
29
+ def __init__(
30
+ self: MinQuantizer,
31
+ new_initializers: list[onnx.TensorProto] | None = None,
32
+ ) -> None:
33
+ super().__init__()
34
+ if new_initializers is not None:
35
+ self.new_initializers = new_initializers
36
+
37
+ def quantize(
38
+ self: MinQuantizer,
39
+ node: onnx.NodeProto,
40
+ graph: onnx.GraphProto,
41
+ scale_config: ScaleConfig,
42
+ initializer_map: dict[str, onnx.TensorProto],
43
+ ) -> list[onnx.NodeProto]:
44
+ # Delegate to QuantizerBase's generic passthrough implementation.
45
+ return QuantizeMin.quantize(self, node, graph, scale_config, initializer_map)
46
+
47
+ def check_supported(
48
+ self: MinQuantizer,
49
+ node: onnx.NodeProto,
50
+ initializer_map: dict[str, onnx.TensorProto] | None = None,
51
+ ) -> None:
52
+ # Min has no attributes; elementwise, variadic ≥ 1 input per ONNX spec.
53
+ # We mirror Add/Max broadcasting behavior; no extra checks here.
54
+ _ = node, initializer_map
@@ -0,0 +1,53 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, ClassVar
4
+
5
+ if TYPE_CHECKING:
6
+ import onnx
7
+
8
+ from python.core.model_processing.onnx_quantizer.layers.base import (
9
+ BaseOpQuantizer,
10
+ QuantizerBase,
11
+ ScaleConfig,
12
+ )
13
+
14
+
15
+ class QuantizeMul(QuantizerBase):
16
+ OP_TYPE = "Int64Mul"
17
+ USE_WB = True
18
+ USE_SCALING = True
19
+ SCALE_PLAN: ClassVar = {0: 1, 1: 1}
20
+
21
+
22
+ class MulQuantizer(BaseOpQuantizer, QuantizeMul):
23
+ """
24
+ Quantizer for ONNX Mul layers.
25
+
26
+ - Uses custom Mul layer to incorporate rescaling, and
27
+ makes relevant additional changes to the graph.
28
+ """
29
+
30
+ def __init__(
31
+ self: MulQuantizer,
32
+ new_initializers: list[onnx.TensorProto] | None = None,
33
+ ) -> None:
34
+ super().__init__()
35
+ # Only replace if caller provided something
36
+ if new_initializers is not None:
37
+ self.new_initializers = new_initializers
38
+
39
+ def quantize(
40
+ self: MulQuantizer,
41
+ node: onnx.NodeProto,
42
+ graph: onnx.GraphProto,
43
+ scale_config: ScaleConfig,
44
+ initializer_map: dict[str, onnx.TensorProto],
45
+ ) -> list[onnx.NodeProto]:
46
+ return QuantizeMul.quantize(self, node, graph, scale_config, initializer_map)
47
+
48
+ def check_supported(
49
+ self: MulQuantizer,
50
+ node: onnx.NodeProto,
51
+ initializer_map: dict[str, onnx.TensorProto] | None = None,
52
+ ) -> None:
53
+ pass
@@ -0,0 +1,54 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, ClassVar
4
+
5
+ if TYPE_CHECKING:
6
+ import onnx
7
+
8
+ from python.core.model_processing.onnx_quantizer.layers.base import (
9
+ BaseOpQuantizer,
10
+ QuantizerBase,
11
+ ScaleConfig,
12
+ )
13
+
14
+
15
+ class QuantizeSub(QuantizerBase):
16
+ OP_TYPE = "Sub"
17
+ DOMAIN = ""
18
+ USE_WB = True
19
+ USE_SCALING = False
20
+ SCALE_PLAN: ClassVar = {0: 1, 1: 1}
21
+
22
+
23
+ class SubQuantizer(BaseOpQuantizer, QuantizeSub):
24
+ """
25
+ Quantizer for ONNX Sub layers.
26
+
27
+ - Uses standard ONNX Sub layer in standard domain, and
28
+ makes relevant additional changes to the graph.
29
+ """
30
+
31
+ def __init__(
32
+ self: SubQuantizer,
33
+ new_initializers: list[onnx.TensorProto] | None = None,
34
+ ) -> None:
35
+ super().__init__()
36
+ # Only replace if caller provided something
37
+ if new_initializers is not None:
38
+ self.new_initializers = new_initializers
39
+
40
+ def quantize(
41
+ self: SubQuantizer,
42
+ node: onnx.NodeProto,
43
+ graph: onnx.GraphProto,
44
+ scale_config: ScaleConfig,
45
+ initializer_map: dict[str, onnx.TensorProto],
46
+ ) -> list[onnx.NodeProto]:
47
+ return QuantizeSub.quantize(self, node, graph, scale_config, initializer_map)
48
+
49
+ def check_supported(
50
+ self: SubQuantizer,
51
+ node: onnx.NodeProto,
52
+ initializer_map: dict[str, onnx.TensorProto] | None = None,
53
+ ) -> None:
54
+ pass