PyPI - onnx2tf - Versions diffs - 1.29.14__py3-none-any.whl → 1.29.16__py3-none-any.whl - Mend

onnx2tf 1.29.14py3-none-any.whl → 1.29.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

onnx2tf/__init__.py +1 -1
onnx2tf/onnx2tf.py +141 -0
onnx2tf/ops/Concat.py +67 -41
onnx2tf/ops/DequantizeLinear.py +76 -34
onnx2tf/ops/DynamicQuantizeLinear.py +18 -17
onnx2tf/ops/QLinearConcat.py +245 -26
onnx2tf/ops/QLinearConv.py +70 -75
onnx2tf/ops/QLinearMatMul.py +77 -20
onnx2tf/ops/QuantizeLinear.py +117 -44
onnx2tf/ops/Shape.py +2 -0
onnx2tf/ops/Split.py +33 -8
{onnx2tf-1.29.14.dist-info → onnx2tf-1.29.16.dist-info}/METADATA +3 -3
{onnx2tf-1.29.14.dist-info → onnx2tf-1.29.16.dist-info}/RECORD +15 -15
{onnx2tf-1.29.14.dist-info → onnx2tf-1.29.16.dist-info}/WHEEL +0 -0
{onnx2tf-1.29.14.dist-info → onnx2tf-1.29.16.dist-info}/entry_points.txt +0 -0

onnx2tf/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 from onnx2tf.onnx2tf import convert, main
-__version__ = '1.29.14'
+__version__ = '1.29.16'

onnx2tf/onnx2tf.py CHANGED Viewed

@@ -62,6 +62,146 @@ from onnx2tf.utils.enums import (
 from onnx2tf.utils.logging import *
 from sng4onnx import generate as op_name_auto_generate
+def fuse_expanded_qdq_to_qdq(
+    *,
+    graph: gs.Graph,
+):
+    def _get_const_value(tensor):
+        if isinstance(tensor, gs.Constant):
+            return tensor.values
+        if isinstance(tensor, gs.Variable) and len(tensor.inputs) == 1:
+            producer = tensor.inputs[0]
+            if producer.op == 'Constant' and 'value' in producer.attrs:
+                return producer.attrs['value'].values
+        return None
+    def _split_const_and_var(inputs):
+        if len(inputs) != 2:
+            return None, None
+        const_val = _get_const_value(inputs[0])
+        if const_val is not None:
+            return const_val, inputs[1]
+        const_val = _get_const_value(inputs[1])
+        if const_val is not None:
+            return const_val, inputs[0]
+        return None, None
+    nodes_to_remove = []
+    nodes_to_add = []
+    for round_node in list(graph.nodes):
+        if round_node.op != 'Round' or len(round_node.inputs) < 1:
+            continue
+        round_in = round_node.inputs[0]
+        if len(round_in.inputs) != 1:
+            continue
+        mul1_node = round_in.inputs[0]
+        if mul1_node.op != 'Mul':
+            continue
+        if len(mul1_node.outputs) != 1 or len(mul1_node.outputs[0].outputs) != 1:
+            continue
+        inv_scale, x = _split_const_and_var(mul1_node.inputs)
+        if inv_scale is None or x is None:
+            continue
+        relu_node = round_node.outputs[0].outputs[0] if round_node.outputs else None
+        if relu_node is None:
+            continue
+        if relu_node.op == 'Relu':
+            relu_out = relu_node.outputs[0]
+        elif relu_node.op in ['Max', 'Maximum']:
+            max_const, max_var = _split_const_and_var(relu_node.inputs)
+            if max_const is None or max_var != round_node.outputs[0]:
+                continue
+            if np.asarray(max_const).size != 1 or float(np.asarray(max_const).item()) != 0.0:
+                continue
+            relu_out = relu_node.outputs[0]
+        else:
+            continue
+        if len(relu_out.outputs) != 1:
+            continue
+        min_node = relu_out.outputs[0]
+        if min_node.op not in ['Min', 'Minimum']:
+            continue
+        qmax, min_var = _split_const_and_var(min_node.inputs)
+        if qmax is None or min_var != relu_out:
+            continue
+        if np.asarray(qmax).size != 1:
+            continue
+        if len(min_node.outputs) != 1 or len(min_node.outputs[0].outputs) != 1:
+            continue
+        mul2_node = min_node.outputs[0].outputs[0]
+        if mul2_node.op != 'Mul':
+            continue
+        scale, min_out = _split_const_and_var(mul2_node.inputs)
+        if scale is None or min_out != min_node.outputs[0]:
+            continue
+        if np.asarray(scale).size != 1:
+            continue
+        scale_val = float(np.asarray(scale).item())
+        inv_scale_val = float(np.asarray(inv_scale).item())
+        if scale_val == 0.0 or not np.isfinite(scale_val) or not np.isfinite(inv_scale_val):
+            continue
+        if not np.isclose(scale_val * inv_scale_val, 1.0, rtol=1e-3, atol=1e-6):
+            continue
+        if len(mul2_node.outputs) != 1:
+            continue
+        output_var = mul2_node.outputs[0]
+        # Require linear chain
+        chain_nodes = [mul1_node, round_node, relu_node, min_node, mul2_node]
+        if any(len(n.outputs) == 0 for n in chain_nodes):
+            continue
+        if len(round_node.outputs[0].outputs) != 1 or len(relu_out.outputs) != 1 or len(min_node.outputs[0].outputs) != 1:
+            continue
+        # Build QDQ
+        scale_const = gs.Constant(
+            name=f"{mul2_node.name}_scale",
+            values=np.asarray(scale_val, dtype=np.float32),
+        )
+        zero_const = gs.Constant(
+            name=f"{mul2_node.name}_zero_point",
+            values=np.asarray(0, dtype=np.uint8),
+        )
+        quant_out = gs.Variable(
+            name=f"{output_var.name}_quant",
+            dtype=np.uint8,
+            shape=output_var.shape,
+        )
+        q_node = gs.Node(
+            op="QuantizeLinear",
+            name=f"{mul2_node.name}_QuantizeLinear",
+            inputs=[x, scale_const, zero_const],
+            outputs=[quant_out],
+        )
+        dq_node = gs.Node(
+            op="DequantizeLinear",
+            name=f"{mul2_node.name}_DequantizeLinear",
+            inputs=[quant_out, scale_const, zero_const],
+            outputs=[output_var],
+        )
+        output_var.inputs = [dq_node]
+        nodes_to_add.extend([q_node, dq_node])
+        nodes_to_remove.extend(chain_nodes)
+    if nodes_to_add:
+        graph.nodes.extend(nodes_to_add)
+    if nodes_to_remove:
+        for n in nodes_to_remove:
+            if n in graph.nodes:
+                graph.nodes.remove(n)
+        graph.cleanup().toposort()
 def apply_nonzero_passthrough(
     *,
     graph: gs.Graph,
@@ -848,6 +988,7 @@ def convert(
     if hasattr(onnx_graph, 'metadata_props'):
         metadata_props = onnx_graph.metadata_props
     graph = gs.import_onnx(onnx_graph)
+    fuse_expanded_qdq_to_qdq(graph=graph)
     # Cut the ONNX graph when an input name is specified that interrupts the conversion
     if not input_names_to_interrupt_model_conversion:

onnx2tf/ops/Concat.py CHANGED Viewed

@@ -234,6 +234,31 @@ def make_node(
             and len(value.shape) > 0 else tf.reshape(value, [1]) for value in values
     ]
+    def _infer_concat_axis_runtime(values, fallback_axis):
+        if not values:
+            return fallback_axis
+        shapes = [tf.shape(v) for v in values]
+        shapes = tf.stack(shapes)
+        equal_mask = tf.reduce_all(tf.equal(shapes, shapes[0]), axis=0)
+        diff_mask = tf.cast(tf.logical_not(equal_mask), tf.int32)
+        candidate_count = tf.reduce_sum(diff_mask)
+        axis_from_diff = tf.argmax(diff_mask, axis=0, output_type=tf.int32)
+        fallback_axis_tensor = tf.cast(fallback_axis, tf.int32)
+        is_single = tf.cast(tf.equal(candidate_count, 1), tf.int32)
+        return axis_from_diff * is_single + fallback_axis_tensor * (1 - is_single)
+    axis_is_dynamic = False
+    if len(values) > 0:
+        all_none = True
+        for value in values:
+            if value.shape is not None and value.shape != tf.TensorShape(None):
+                if not all([s is None for s in value.shape]):
+                    all_none = False
+                    break
+        if all_none:
+            axis_is_dynamic = True
+    axis_for_concat = _infer_concat_axis_runtime(values, axis) if axis_is_dynamic else axis
     # Generation of TF OP
     tf_type = None
     if simple_resize:
@@ -271,7 +296,7 @@ def make_node(
             tf_layers_dict[graph_node_output.name]['tf_node'] = \
                 tf.concat(
                     values=values,
-                    axis=axis,
+                    axis=axis_for_concat,
                     name=graph_node.name,
                 )
         except:
@@ -311,51 +336,52 @@ def make_node(
         # This workaround is useful when automatic axis correction is practically difficult,
         # such as when all tensors to be combined originate from Transpose or Reshape.
         # https://github.com/PINTO0309/onnx2tf/issues/473
-        output_tensor_shape = tf_layers_dict[graph_node_output.name]['tf_node'].shape
-        if output_tensor_shape != tf.TensorShape(None):
-            output_tensor_rank = len(output_tensor_shape)
-            if graph_node.outputs[0].shape is not None \
-                and axis != 0 \
-                and output_tensor_rank >= 2 \
-                and before_axis == axis:
-                # Search for valid Concat patterns
-                if not shape_is_equal_ignore_order(list(graph_node.outputs[0].shape), list(output_tensor_shape)):
-                    matched_axes = []
-                    for dummy_axis in range(1, output_tensor_rank):
-                        try:
-                            dummy_concat_tensor = \
-                                tf.concat(
-                                    values=values,
-                                    axis=dummy_axis,
-                                    name=graph_node.name,
-                                )
-                            dummy_output_shape = dummy_concat_tensor.shape
-                            if shape_is_equal_ignore_order(list(graph_node.outputs[0].shape), list(dummy_output_shape)):
-                                matched_axes.append(dummy_axis)
-                        except:
-                            pass
-                    # Review Concat axes only if there is one valid join pattern
-                    if len(matched_axes) == 1:
-                        tf_layers_dict[graph_node_output.name]['tf_node'] = \
-                            tf.concat(
-                                values=values,
-                                axis=matched_axes[0],
-                                name=graph_node.name,
-                            )
-                        axis = matched_axes[0]
-                    elif not nhwc_judge:
-                        onnx_axis = int(graph_node.attrs.get('axis', 0))
-                        onnx_axis = output_tensor_rank - 1 if onnx_axis == -1 else onnx_axis
-                        if onnx_axis == output_tensor_rank - 1 \
-                            and onnx_axis in matched_axes:
+        if not axis_is_dynamic:
+            output_tensor_shape = tf_layers_dict[graph_node_output.name]['tf_node'].shape
+            if output_tensor_shape != tf.TensorShape(None):
+                output_tensor_rank = len(output_tensor_shape)
+                if graph_node.outputs[0].shape is not None \
+                    and axis != 0 \
+                    and output_tensor_rank >= 2 \
+                    and before_axis == axis:
+                    # Search for valid Concat patterns
+                    if not shape_is_equal_ignore_order(list(graph_node.outputs[0].shape), list(output_tensor_shape)):
+                        matched_axes = []
+                        for dummy_axis in range(1, output_tensor_rank):
+                            try:
+                                dummy_concat_tensor = \
+                                    tf.concat(
+                                        values=values,
+                                        axis=dummy_axis,
+                                        name=graph_node.name,
+                                    )
+                                dummy_output_shape = dummy_concat_tensor.shape
+                                if shape_is_equal_ignore_order(list(graph_node.outputs[0].shape), list(dummy_output_shape)):
+                                    matched_axes.append(dummy_axis)
+                            except:
+                                pass
+                        # Review Concat axes only if there is one valid join pattern
+                        if len(matched_axes) == 1:
                             tf_layers_dict[graph_node_output.name]['tf_node'] = \
                                 tf.concat(
                                     values=values,
-                                    axis=onnx_axis,
+                                    axis=matched_axes[0],
                                     name=graph_node.name,
                                 )
-                            axis = onnx_axis
+                            axis = matched_axes[0]
+                        elif not nhwc_judge:
+                            onnx_axis = int(graph_node.attrs.get('axis', 0))
+                            onnx_axis = output_tensor_rank - 1 if onnx_axis == -1 else onnx_axis
+                            if onnx_axis == output_tensor_rank - 1 \
+                                and onnx_axis in matched_axes:
+                                tf_layers_dict[graph_node_output.name]['tf_node'] = \
+                                    tf.concat(
+                                        values=values,
+                                        axis=onnx_axis,
+                                        name=graph_node.name,
+                                    )
+                                axis = onnx_axis
         # Workaround for post-concat accuracy degradation issues
         # Process only in the case of a Concat of two tensors because the process is too redundant.

onnx2tf/ops/DequantizeLinear.py CHANGED Viewed

@@ -15,6 +15,43 @@ from onnx2tf.utils.common_functions import (
     post_process_transpose,
 )
+def _expand_scale_or_zero_point(
+    *,
+    value,
+    input_tensor,
+    axis: int,
+    block_size: int,
+):
+    value_rank = len(value.shape)
+    input_rank = len(input_tensor.shape)
+    if value_rank == 0:
+        return value
+    if input_rank <= 0:
+        return value
+    if axis < 0 or axis >= input_rank:
+        axis = 0
+    # Blocked quantization: expand along axis then slice to input shape
+    if block_size > 0 and value_rank == input_rank:
+        if value.shape[axis] is None \
+            or input_tensor.shape[axis] is None \
+            or value.shape[axis] != input_tensor.shape[axis]:
+            expanded = tf.repeat(value, repeats=block_size, axis=axis)
+            expanded = tf.slice(expanded, [0] * input_rank, tf.shape(input_tensor))
+            return expanded
+        return value
+    # Per-axis quantization: reshape 1-D to broadcast
+    if value_rank == 1 and input_rank is not None:
+        shape = [1] * input_rank
+        shape[axis] = -1
+        return tf.reshape(value, shape)
+    return value
 @print_node_info
 @inverted_operation_enable_disable
@@ -63,6 +100,11 @@ def make_node(
     input_tensor = tf_layers_dict[graph_node_input_1.name]['tf_node'] \
         if isinstance(graph_node_input_1, gs.Variable) else graph_node_input_1
+    input_is_dequantized = False
+    input_nhwc = False
+    if isinstance(graph_node_input_1, gs.Variable):
+        input_is_dequantized = tf_layers_dict.get(graph_node_input_1.name, {}).get('is_dequantized', False)
+        input_nhwc = tf_layers_dict.get(graph_node_input_1.name, {}).get('nhwc', False)
     # Pre-process transpose
     input_tensor = pre_process_transpose(
@@ -72,12 +114,10 @@ def make_node(
         **kwargs,
     )
-    input_tensor_shape = input_tensor.shape
-    input_tensor_rank = len(input_tensor_shape)
+    input_tensor_rank = len(input_tensor.shape)
+    input_tensor_dtype = input_tensor.dtype
     x_scale = tf_layers_dict[graph_node_input_2.name]['tf_node'] \
         if isinstance(graph_node_input_2, gs.Variable) else graph_node_input_2
-    x_scale_shape = x_scale.shape
-    x_scale_rank = len(x_scale_shape)
     x_zero_point = tf_layers_dict[graph_node_input_3.name]['tf_node'] \
         if isinstance(graph_node_input_3, gs.Variable) else graph_node_input_3
@@ -87,48 +127,50 @@ def make_node(
         tensor_rank=input_tensor_rank,
         before_op_output_shape_trans=before_op_output_shape_trans,
     )
+    if input_tensor_rank == 1:
+        axis = 0
     # Preserving Graph Structure (Dict)
     tf_layers_dict[graph_node_output.name] = {
         'optype': graph_node.op,
         'shape': shape,
         'dtype': dtype,
+        'is_dequantized': True,
+        'nhwc': input_nhwc,
     }
     # Generation of TF OP
     input_tensor = tf.cast(input_tensor, tf.float32)
+    x_scale = tf.cast(x_scale, tf.float32)
-    # Reshape process is needed for per-axis dequantization
-    # when scale is a 1-D tensor
-    if x_scale_rank == 1 and x_scale_shape[0] != 1:
-        shape_broadcast = list([1 for _ in range(axis)] + [input_tensor_shape[axis]] + [1 for _ in range(axis + 1, input_tensor_rank)])
-        x_scale = tf.reshape(
-            tensor=x_scale,
-            shape=shape_broadcast,
-        )
-    elif x_scale_rank == 1 and x_scale_shape[0] == 1:
-        shape_broadcast = [1 for i in range(input_tensor_rank)]
-    subed_tensor = input_tensor
-    if len(graph_node.inputs) >= 3 and input_tensor.dtype != tf.int32:
-        x_zero_point = tf.cast(
-            x=x_zero_point,
-            dtype=tf.float32,
-        )
-        x_zero_point = tf.reshape(
-            tensor=x_zero_point,
-            shape=shape_broadcast,
-        ) if x_scale_rank == 1 else x_zero_point
-        subed_tensor = tf.subtract(
-            x=input_tensor,
-            y=x_zero_point,
-        )
-    tf_layers_dict[graph_node_output.name]['tf_node'] = \
-        tf.multiply(
-            x=subed_tensor,
-            y=x_scale,
-        )
+    block_size = int(graph_node.attrs.get('block_size', 0))
+    x_scale = _expand_scale_or_zero_point(
+        value=x_scale,
+        input_tensor=input_tensor,
+        axis=axis,
+        block_size=block_size,
+    )
+    if input_is_dequantized:
+        tf_layers_dict[graph_node_output.name]['tf_node'] = input_tensor
+    else:
+        if x_zero_point is None or input_tensor_dtype == tf.int32:
+            x_zero_point = tf.zeros_like(x_scale)
+        else:
+            x_zero_point = tf.cast(x_zero_point, tf.float32)
+            x_zero_point = _expand_scale_or_zero_point(
+                value=x_zero_point,
+                input_tensor=input_tensor,
+                axis=axis,
+                block_size=block_size,
+            )
+        tf_layers_dict[graph_node_output.name]['tf_node'] = \
+            tf.multiply(
+                x=tf.subtract(input_tensor, x_zero_point),
+                y=x_scale,
+            )
     if hasattr(tf_layers_dict[graph_node_output.name]['tf_node'], 'numpy'):
         tf_layers_dict[graph_node_output.name]['tf_node'] = \

onnx2tf/ops/DynamicQuantizeLinear.py CHANGED Viewed

@@ -43,6 +43,9 @@ def make_node(
         graph_node.inputs[0],
         before_op_output_shape_trans,
     )
+    input_nhwc = False
+    if isinstance(graph_node_input_1, gs.Variable):
+        input_nhwc = tf_layers_dict.get(graph_node_input_1.name, {}).get('nhwc', False)
     graph_node_output_1: gs.Variable = graph_node.outputs[0]
     o1_shape = graph_node_output_1.shape
     o1_dtype = graph_node_output_1.dtype
@@ -58,6 +61,8 @@ def make_node(
         'optype': graph_node.op,
         'shape': o1_shape,
         'dtype': o1_dtype,
+        'is_dequantized': True,
+        'nhwc': input_nhwc,
     }
     tf_layers_dict[graph_node_output_2.name] = {
         'optype': graph_node.op,
@@ -82,35 +87,31 @@ def make_node(
     )
     # Generation of TF OP
-    dtype = tf.uint8
-    qmin = dtype.min
-    qmax = dtype.max
-    min_x = tf.math.minimum(0., tf.math.reduce_min(input_tensor_1))
-    max_x = tf.math.maximum(0., tf.math.reduce_max(input_tensor_1))
+    qmin = 0.0
+    qmax = 255.0
+    min_x = tf.math.minimum(0.0, tf.math.reduce_min(input_tensor_1))
+    max_x = tf.math.maximum(0.0, tf.math.reduce_max(input_tensor_1))
     y_scale = (max_x - min_x) / (qmax - qmin)
     intermediate_zero_point = qmin - (min_x / y_scale)
-    y_zero_point = tf.clip_by_value(
-        tf.round(
-            x=intermediate_zero_point
-        ),
+    clipped_zero_point = tf.clip_by_value(
+        intermediate_zero_point,
         clip_value_min=qmin,
         clip_value_max=qmax,
     )
-    y = tf.cast(
-        tf.clip_by_value(
-            (tf.round(input_tensor_1 / y_scale) + y_zero_point),
-            clip_value_min=qmin,
-            clip_value_max=qmax,
-        ),
-        dtype=dtype,
+    y_zero_point = tf.round(clipped_zero_point)
+    y_quant = tf.clip_by_value(
+        tf.round(input_tensor_1 / y_scale) + y_zero_point,
+        clip_value_min=qmin,
+        clip_value_max=qmax,
     )
+    y = (y_quant - y_zero_point) * y_scale
     tf_layers_dict[graph_node_output_1.name]['tf_node'] = y
     tf_layers_dict[graph_node_output_2.name]['tf_node'] = y_scale
     tf_layers_dict[graph_node_output_3.name]['tf_node'] = \
         tf.cast(
             x=y_zero_point,
-            dtype=dtype,
+            dtype=tf.uint8,
         )
     # Post-process transpose

onnx2tf 1.29.14__py3-none-any.whl → 1.29.16__py3-none-any.whl

onnx2tf 1.29.14py3-none-any.whl → 1.29.16py3-none-any.whl