PyPI - emx-onnx-cgen - Versions diffs - 0.3.8__py3-none-any.whl → 0.4.1.dev0__py3-none-any.whl - Mend

emx-onnx-cgen 0.3.8py3-none-any.whl → 0.4.1.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (137) hide show

emx_onnx_cgen/_build_info.py +1 -1
emx_onnx_cgen/_version.py +2 -2
emx_onnx_cgen/cli.py +1025 -162
emx_onnx_cgen/codegen/__init__.py +2 -0
emx_onnx_cgen/codegen/c_emitter.py +2081 -458
emx_onnx_cgen/compiler.py +157 -75
emx_onnx_cgen/determinism.py +39 -0
emx_onnx_cgen/ir/context.py +25 -15
emx_onnx_cgen/ir/model.py +1 -0
emx_onnx_cgen/ir/op_base.py +32 -7
emx_onnx_cgen/ir/ops/__init__.py +20 -0
emx_onnx_cgen/ir/ops/elementwise.py +138 -22
emx_onnx_cgen/ir/ops/misc.py +95 -0
emx_onnx_cgen/ir/ops/nn.py +361 -38
emx_onnx_cgen/ir/ops/reduce.py +1 -16
emx_onnx_cgen/lowering/__init__.py +9 -0
emx_onnx_cgen/lowering/arg_reduce.py +0 -4
emx_onnx_cgen/lowering/average_pool.py +157 -27
emx_onnx_cgen/lowering/bernoulli.py +73 -0
emx_onnx_cgen/lowering/common.py +48 -0
emx_onnx_cgen/lowering/concat.py +41 -7
emx_onnx_cgen/lowering/conv.py +19 -8
emx_onnx_cgen/lowering/conv_integer.py +103 -0
emx_onnx_cgen/lowering/dequantize_linear.py +128 -0
emx_onnx_cgen/lowering/elementwise.py +140 -43
emx_onnx_cgen/lowering/gather.py +11 -2
emx_onnx_cgen/lowering/gemm.py +7 -124
emx_onnx_cgen/lowering/global_max_pool.py +0 -5
emx_onnx_cgen/lowering/gru.py +323 -0
emx_onnx_cgen/lowering/hamming_window.py +104 -0
emx_onnx_cgen/lowering/hardmax.py +1 -37
emx_onnx_cgen/lowering/identity.py +7 -6
emx_onnx_cgen/lowering/logsoftmax.py +1 -35
emx_onnx_cgen/lowering/lp_pool.py +15 -4
emx_onnx_cgen/lowering/matmul.py +3 -105
emx_onnx_cgen/lowering/optional_has_element.py +28 -0
emx_onnx_cgen/lowering/qlinear_mul.py +116 -0
emx_onnx_cgen/lowering/reduce.py +0 -5
emx_onnx_cgen/lowering/reshape.py +7 -16
emx_onnx_cgen/lowering/shape.py +14 -8
emx_onnx_cgen/lowering/slice.py +14 -4
emx_onnx_cgen/lowering/softmax.py +1 -35
emx_onnx_cgen/lowering/split.py +37 -3
emx_onnx_cgen/lowering/tfidf_vectorizer.py +199 -0
emx_onnx_cgen/lowering/tile.py +38 -1
emx_onnx_cgen/lowering/topk.py +1 -5
emx_onnx_cgen/lowering/transpose.py +9 -3
emx_onnx_cgen/lowering/unsqueeze.py +11 -16
emx_onnx_cgen/lowering/upsample.py +151 -0
emx_onnx_cgen/lowering/variadic.py +1 -1
emx_onnx_cgen/lowering/where.py +0 -5
emx_onnx_cgen/onnx_import.py +578 -14
emx_onnx_cgen/ops.py +3 -0
emx_onnx_cgen/templates/adagrad_op.c.j2 +16 -0
emx_onnx_cgen/templates/arg_reduce_op.c.j2 +18 -0
emx_onnx_cgen/templates/attention_op.c.j2 +189 -0
emx_onnx_cgen/templates/average_pool_op.c.j2 +126 -0
emx_onnx_cgen/templates/batch_norm_op.c.j2 +11 -0
emx_onnx_cgen/templates/bernoulli_op.c.j2 +34 -0
emx_onnx_cgen/templates/binary_op.c.j2 +9 -0
emx_onnx_cgen/templates/cast_op.c.j2 +9 -0
emx_onnx_cgen/templates/clip_op.c.j2 +14 -0
emx_onnx_cgen/templates/concat_op.c.j2 +28 -0
emx_onnx_cgen/templates/constant_of_shape_op.c.j2 +10 -0
emx_onnx_cgen/templates/conv_integer_op.c.j2 +34 -0
emx_onnx_cgen/templates/conv_op.c.j2 +32 -0
emx_onnx_cgen/templates/conv_transpose_op.c.j2 +43 -0
emx_onnx_cgen/templates/cumsum_op.c.j2 +51 -0
emx_onnx_cgen/templates/depth_to_space_op.c.j2 +26 -0
emx_onnx_cgen/templates/dequantize_linear_op.c.j2 +10 -0
emx_onnx_cgen/templates/einsum_op.c.j2 +55 -0
emx_onnx_cgen/templates/expand_op.c.j2 +14 -0
emx_onnx_cgen/templates/eye_like_op.c.j2 +27 -0
emx_onnx_cgen/templates/gather_elements_op.c.j2 +13 -0
emx_onnx_cgen/templates/gather_nd_op.c.j2 +29 -0
emx_onnx_cgen/templates/gather_op.c.j2 +13 -0
emx_onnx_cgen/templates/gemm_op.c.j2 +35 -0
emx_onnx_cgen/templates/grid_sample_op.c.j2 +184 -0
emx_onnx_cgen/templates/group_normalization_op.c.j2 +46 -0
emx_onnx_cgen/templates/gru_op.c.j2 +152 -0
emx_onnx_cgen/templates/hamming_window_op.c.j2 +12 -0
emx_onnx_cgen/templates/hardmax_op.c.j2 +24 -0
emx_onnx_cgen/templates/identity_op.c.j2 +9 -0
emx_onnx_cgen/templates/instance_normalization_op.c.j2 +35 -0
emx_onnx_cgen/templates/layer_normalization_op.c.j2 +65 -0
emx_onnx_cgen/templates/logsoftmax_op.c.j2 +27 -0
emx_onnx_cgen/templates/lp_normalization_op.c.j2 +27 -0
emx_onnx_cgen/templates/lp_pool_op.c.j2 +24 -0
emx_onnx_cgen/templates/lrn_op.c.j2 +20 -0
emx_onnx_cgen/templates/lstm_op.c.j2 +175 -0
emx_onnx_cgen/templates/matmul_op.c.j2 +13 -0
emx_onnx_cgen/templates/maxpool_op.c.j2 +118 -0
emx_onnx_cgen/templates/mean_variance_normalization_op.c.j2 +34 -0
emx_onnx_cgen/templates/multi_input_op.c.j2 +15 -0
emx_onnx_cgen/templates/negative_log_likelihood_loss_op.c.j2 +54 -0
emx_onnx_cgen/templates/nonmax_suppression_op.c.j2 +179 -0
emx_onnx_cgen/templates/nonzero_op.c.j2 +15 -0
emx_onnx_cgen/templates/one_hot_op.c.j2 +25 -0
emx_onnx_cgen/templates/optional_has_element_op.c.j2 +4 -0
emx_onnx_cgen/templates/pad_op.c.j2 +80 -0
emx_onnx_cgen/templates/qlinear_matmul_op.c.j2 +33 -0
emx_onnx_cgen/templates/qlinear_mul_op.c.j2 +18 -0
emx_onnx_cgen/templates/quantize_linear_op.c.j2 +13 -0
emx_onnx_cgen/templates/range_op.c.j2 +8 -0
emx_onnx_cgen/templates/reduce_op.c.j2 +28 -0
emx_onnx_cgen/templates/reduce_op_dynamic.c.j2 +77 -0
emx_onnx_cgen/templates/reshape_op.c.j2 +18 -0
emx_onnx_cgen/templates/resize_op.c.j2 +277 -0
emx_onnx_cgen/templates/rms_normalization_op.c.j2 +28 -0
emx_onnx_cgen/templates/rotary_embedding_op.c.j2 +66 -0
emx_onnx_cgen/templates/scatter_nd_op.c.j2 +52 -0
emx_onnx_cgen/templates/shape_op.c.j2 +6 -0
emx_onnx_cgen/templates/size_op.c.j2 +4 -0
emx_onnx_cgen/templates/slice_op.c.j2 +9 -0
emx_onnx_cgen/templates/slice_op_dynamic.c.j2 +70 -0
emx_onnx_cgen/templates/softmax_cross_entropy_loss_op.c.j2 +105 -0
emx_onnx_cgen/templates/softmax_op.c.j2 +26 -0
emx_onnx_cgen/templates/space_to_depth_op.c.j2 +22 -0
emx_onnx_cgen/templates/split_op.c.j2 +18 -0
emx_onnx_cgen/templates/tensor_scatter_op.c.j2 +44 -0
emx_onnx_cgen/templates/testbench.c.j2 +161 -0
emx_onnx_cgen/templates/tfidf_vectorizer_op.c.j2 +144 -0
emx_onnx_cgen/templates/tile_op.c.j2 +14 -0
emx_onnx_cgen/templates/topk_op.c.j2 +50 -0
emx_onnx_cgen/templates/transpose_op.c.j2 +9 -0
emx_onnx_cgen/templates/trilu_op.c.j2 +33 -0
emx_onnx_cgen/templates/unary_op.c.j2 +23 -0
emx_onnx_cgen/templates/where_op.c.j2 +9 -0
emx_onnx_cgen/verification.py +45 -5
{emx_onnx_cgen-0.3.8.dist-info → emx_onnx_cgen-0.4.1.dev0.dist-info}/METADATA +33 -15
emx_onnx_cgen-0.4.1.dev0.dist-info/RECORD +190 -0
{emx_onnx_cgen-0.3.8.dist-info → emx_onnx_cgen-0.4.1.dev0.dist-info}/WHEEL +1 -1
emx_onnx_cgen/runtime/__init__.py +0 -1
emx_onnx_cgen/runtime/evaluator.py +0 -2955
emx_onnx_cgen-0.3.8.dist-info/RECORD +0 -107
{emx_onnx_cgen-0.3.8.dist-info → emx_onnx_cgen-0.4.1.dev0.dist-info}/entry_points.txt +0 -0
{emx_onnx_cgen-0.3.8.dist-info → emx_onnx_cgen-0.4.1.dev0.dist-info}/top_level.txt +0 -0

emx_onnx_cgen/onnx_import.py CHANGED Viewed

@@ -41,21 +41,19 @@ def _unsupported_value_type(value_info: onnx.ValueInfoProto) -> UnsupportedOpErr
     )
-def _tensor_type(
-    value_info: onnx.ValueInfoProto,
+def _tensor_type_from_proto(
+    tensor_type: onnx.TypeProto.Tensor,
+    name: str,
     *,
     dim_param_override: tuple[str | None, ...] | None = None,
 ) -> TensorType:
-    if value_info.type.WhichOneof("value") != "tensor_type":
-        raise _unsupported_value_type(value_info)
-    tensor_type = value_info.type.tensor_type
     if not tensor_type.HasField("elem_type"):
-        raise ShapeInferenceError(f"Missing elem_type for tensor '{value_info.name}'")
+        raise ShapeInferenceError(f"Missing elem_type for tensor '{name}'")
     dtype = scalar_type_from_onnx(tensor_type.elem_type)
     if dtype is None:
         raise UnsupportedOpError(
             "Unsupported elem_type "
-            f"{_format_elem_type(tensor_type.elem_type)} for tensor '{value_info.name}'."
+            f"{_format_elem_type(tensor_type.elem_type)} for tensor '{name}'."
         )
     shape = []
     dim_params = []
@@ -72,7 +70,7 @@ def _tensor_type(
             if dim_param:
                 shape.append(1)
                 continue
-            raise ShapeInferenceError(f"Dynamic dim for tensor '{value_info.name}'")
+            raise ShapeInferenceError(f"Dynamic dim for tensor '{name}'")
         shape.append(dim.dim_value)
     return TensorType(
         dtype=dtype,
@@ -81,6 +79,40 @@ def _tensor_type(
     )
+def _value_type(
+    value_info: onnx.ValueInfoProto,
+    *,
+    dim_param_override: tuple[str | None, ...] | None = None,
+) -> TensorType:
+    value_kind = value_info.type.WhichOneof("value")
+    if value_kind == "tensor_type":
+        return _tensor_type_from_proto(
+            value_info.type.tensor_type,
+            value_info.name,
+            dim_param_override=dim_param_override,
+        )
+    if value_kind == "optional_type":
+        elem_type = value_info.type.optional_type.elem_type
+        elem_kind = elem_type.WhichOneof("value")
+        if elem_kind != "tensor_type":
+            raise UnsupportedOpError(
+                f"Unsupported optional element type '{elem_kind}' for '{value_info.name}'. "
+                "Hint: export the model with optional tensor inputs/outputs."
+            )
+        tensor_type = _tensor_type_from_proto(
+            elem_type.tensor_type,
+            value_info.name,
+            dim_param_override=dim_param_override,
+        )
+        return TensorType(
+            dtype=tensor_type.dtype,
+            shape=tensor_type.shape,
+            dim_params=tensor_type.dim_params,
+            is_optional=True,
+        )
+    raise _unsupported_value_type(value_info)
 def _values(
     value_infos: Iterable[onnx.ValueInfoProto],
     *,
@@ -90,7 +122,7 @@ def _values(
     return tuple(
         Value(
             name=vi.name,
-            type=_tensor_type(
+            type=_value_type(
                 vi, dim_param_override=dim_param_by_name.get(vi.name)
             ),
         )
@@ -103,8 +135,18 @@ def _collect_dim_params(
 ) -> dict[str, tuple[str | None, ...]]:
     dim_params: dict[str, tuple[str | None, ...]] = {}
     for value_info in value_infos:
+        value_kind = value_info.type.WhichOneof("value")
+        if value_kind == "tensor_type":
+            tensor_type = value_info.type.tensor_type
+        elif value_kind == "optional_type":
+            elem_type = value_info.type.optional_type.elem_type
+            if elem_type.WhichOneof("value") != "tensor_type":
+                continue
+            tensor_type = elem_type.tensor_type
+        else:
+            continue
         dims = []
-        for dim in value_info.type.tensor_type.shape.dim:
+        for dim in tensor_type.shape.dim:
             dim_param = dim.dim_param if dim.HasField("dim_param") else ""
             dims.append(dim_param or None)
         if any(dims):
@@ -112,6 +154,61 @@ def _collect_dim_params(
     return dim_params
+def _value_info_complete(value_info: onnx.ValueInfoProto) -> bool:
+    value_kind = value_info.type.WhichOneof("value")
+    if value_kind == "tensor_type":
+        tensor_type = value_info.type.tensor_type
+    elif value_kind == "optional_type":
+        elem_type = value_info.type.optional_type.elem_type
+        if elem_type.WhichOneof("value") != "tensor_type":
+            return False
+        tensor_type = elem_type.tensor_type
+    else:
+        return False
+    if not tensor_type.HasField("elem_type"):
+        return False
+    if not tensor_type.HasField("shape"):
+        return False
+    for dim in tensor_type.shape.dim:
+        if dim.HasField("dim_value"):
+            continue
+        if dim.HasField("dim_param"):
+            continue
+        return False
+    return True
+def _needs_shape_inference(model: onnx.ModelProto) -> bool:
+    graph = model.graph
+    value_info_by_name = {
+        value_info.name: value_info for value_info in graph.value_info
+    }
+    output_names = {value_info.name for value_info in graph.output}
+    initializer_names = {initializer.name for initializer in graph.initializer}
+    initializer_names.update(
+        sparse_init.name for sparse_init in graph.sparse_initializer
+    )
+    for node in graph.node:
+        for output in node.output:
+            if not output:
+                continue
+            if output in output_names or output in value_info_by_name:
+                continue
+            return True
+    for value_info in graph.value_info:
+        if not _value_info_complete(value_info):
+            return True
+    for value_info in graph.output:
+        if not _value_info_complete(value_info):
+            return True
+    for value_info in graph.input:
+        if value_info.name in initializer_names:
+            continue
+        if not _value_info_complete(value_info):
+            return True
+    return False
 def _initializer(value: onnx.TensorProto) -> Initializer:
     dtype = scalar_type_from_onnx(value.data_type)
     if dtype is None:
@@ -136,6 +233,471 @@ def _node_attrs(node: onnx.NodeProto) -> dict[str, object]:
     return {attr.name: helper.get_attribute_value(attr) for attr in node.attribute}
+def _find_value_info(
+    graph: onnx.GraphProto, name: str
+) -> onnx.ValueInfoProto | None:
+    for value_info in graph.input:
+        if value_info.name == name:
+            return value_info
+    for value_info in graph.value_info:
+        if value_info.name == name:
+            return value_info
+    for value_info in graph.output:
+        if value_info.name == name:
+            return value_info
+    return None
+def _tensor_shape_from_value_info(
+    graph: onnx.GraphProto, name: str
+) -> tuple[int, ...]:
+    value_info = _find_value_info(graph, name)
+    if value_info is None:
+        for initializer in graph.initializer:
+            if initializer.name == name:
+                return tuple(int(dim) for dim in initializer.dims)
+        raise ShapeInferenceError(
+            f"Missing shape for '{name}' in Scan expansion. "
+            "Hint: run ONNX shape inference or export with static shapes."
+        )
+    tensor_type = value_info.type.tensor_type
+    if not tensor_type.HasField("shape"):
+        raise ShapeInferenceError(
+            f"Missing shape for '{name}' in Scan expansion. "
+            "Hint: run ONNX shape inference or export with static shapes."
+        )
+    dims: list[int] = []
+    for dim in tensor_type.shape.dim:
+        if not dim.HasField("dim_value"):
+            raise ShapeInferenceError(
+                f"Dynamic dim for '{name}' in Scan expansion. "
+                "Hint: export with static shapes."
+            )
+        dims.append(int(dim.dim_value))
+    return tuple(dims)
+def _scan_attr_ints(
+    attrs: dict[str, object],
+    key: str,
+    *,
+    default: tuple[int, ...],
+) -> tuple[int, ...]:
+    value = attrs.get(key)
+    if value is None:
+        return default
+    return tuple(int(item) for item in value)
+def _onnx_opset_version(model: onnx.ModelProto) -> int | None:
+    for opset in model.opset_import:
+        if opset.domain in {"", "ai.onnx"}:
+            return int(opset.version)
+    return None
+def _scan_expected_axis(is_opset8: bool) -> int:
+    return 1 if is_opset8 else 0
+def _scan_axes_and_directions(
+    attrs: dict[str, object],
+    *,
+    num_scan_inputs: int,
+    scan_output_count: int,
+    is_opset8: bool,
+) -> None:
+    default_axis = _scan_expected_axis(is_opset8)
+    scan_input_axes = _scan_attr_ints(
+        attrs,
+        "scan_input_axes",
+        default=(default_axis,) * num_scan_inputs,
+    )
+    scan_output_axes = _scan_attr_ints(
+        attrs,
+        "scan_output_axes",
+        default=(default_axis,) * scan_output_count,
+    )
+    scan_input_directions = _scan_attr_ints(
+        attrs,
+        "scan_input_directions",
+        default=(0,) * num_scan_inputs,
+    )
+    scan_output_directions = _scan_attr_ints(
+        attrs,
+        "scan_output_directions",
+        default=(0,) * scan_output_count,
+    )
+    if any(axis != default_axis for axis in scan_input_axes):
+        raise UnsupportedOpError(
+            f"Scan only supports scan_input_axes={default_axis}"
+        )
+    if any(axis != default_axis for axis in scan_output_axes):
+        raise UnsupportedOpError(
+            f"Scan only supports scan_output_axes={default_axis}"
+        )
+    if any(direction != 0 for direction in scan_input_directions):
+        raise UnsupportedOpError(
+            "Scan only supports scan_input_directions=0"
+        )
+    if any(direction != 0 for direction in scan_output_directions):
+        raise UnsupportedOpError(
+            "Scan only supports scan_output_directions=0"
+        )
+def _scan_sequence_length(
+    graph: onnx.GraphProto,
+    scan_input_names: list[str],
+    *,
+    is_opset8: bool,
+) -> tuple[int, int | None]:
+    scan_input_shapes = [
+        _tensor_shape_from_value_info(graph, name)
+        for name in scan_input_names
+    ]
+    if not scan_input_shapes:
+        raise UnsupportedOpError("Scan requires scan inputs")
+    if is_opset8:
+        if any(len(shape) < 2 for shape in scan_input_shapes):
+            raise UnsupportedOpError(
+                "Scan opset 8 inputs must include batch and sequence dims"
+            )
+        batch_size = scan_input_shapes[0][0]
+        sequence_len = scan_input_shapes[0][1]
+        if batch_size != 1:
+            raise UnsupportedOpError(
+                "Scan opset 8 currently supports batch size 1 only"
+            )
+        if sequence_len <= 0:
+            raise UnsupportedOpError("Scan requires positive sequence length")
+        if any(
+            shape[0] != batch_size or shape[1] != sequence_len
+            for shape in scan_input_shapes
+        ):
+            raise UnsupportedOpError(
+                "Scan inputs must share the same batch and sequence length"
+            )
+        return sequence_len, batch_size
+    sequence_len = scan_input_shapes[0][0]
+    if sequence_len <= 0:
+        raise UnsupportedOpError("Scan requires positive sequence length")
+    if any(shape[0] != sequence_len for shape in scan_input_shapes):
+        raise UnsupportedOpError(
+            "Scan inputs must share the same sequence length"
+        )
+    return sequence_len, None
+def _scan_body_initializers(
+    body: onnx.GraphProto,
+    *,
+    prefix: str,
+    new_initializers: list[onnx.TensorProto],
+) -> dict[str, str]:
+    initializer_map: dict[str, str] = {}
+    for initializer in body.initializer:
+        new_name = f"{prefix}_init_{initializer.name}"
+        initializer_map[initializer.name] = new_name
+        array = numpy_helper.to_array(initializer)
+        new_initializers.append(numpy_helper.from_array(array, name=new_name))
+    return initializer_map
+def _scan_state_inputs(
+    graph: onnx.GraphProto,
+    *,
+    prefix: str,
+    state_input_names: list[str],
+    new_nodes: list[onnx.NodeProto],
+    is_opset8: bool,
+    batch_size: int | None,
+) -> list[str]:
+    state_names = list(state_input_names)
+    if is_opset8 and state_input_names:
+        for state_index, state_name in enumerate(state_input_names):
+            state_shape = _tensor_shape_from_value_info(graph, state_name)
+            if not state_shape:
+                raise UnsupportedOpError(
+                    "Scan opset 8 state inputs must be tensors"
+                )
+            if batch_size is not None and state_shape[0] != batch_size:
+                raise UnsupportedOpError(
+                    "Scan opset 8 state inputs must match batch size"
+                )
+            squeezed_name = f"{prefix}_state{state_index}_squeezed"
+            new_nodes.append(
+                helper.make_node(
+                    "Squeeze",
+                    inputs=[state_name],
+                    outputs=[squeezed_name],
+                    name=f"{squeezed_name}_node",
+                    axes=[0],
+                )
+            )
+            state_names[state_index] = squeezed_name
+    return state_names
+def _scan_iteration_inputs(
+    *,
+    prefix: str,
+    iter_index: int,
+    scan_input_names: list[str],
+    new_nodes: list[onnx.NodeProto],
+    is_opset8: bool,
+) -> list[str]:
+    scan_iter_inputs: list[str] = []
+    slice_axis = _scan_expected_axis(is_opset8)
+    squeeze_axes = [0, 1] if is_opset8 else [0]
+    for scan_index, scan_name in enumerate(scan_input_names):
+        slice_out = f"{prefix}_iter{iter_index}_scan{scan_index}_slice"
+        squeeze_out = f"{prefix}_iter{iter_index}_scan{scan_index}_value"
+        new_nodes.append(
+            helper.make_node(
+                "Slice",
+                inputs=[scan_name],
+                outputs=[slice_out],
+                name=f"{slice_out}_node",
+                starts=[iter_index],
+                ends=[iter_index + 1],
+                axes=[slice_axis],
+            )
+        )
+        new_nodes.append(
+            helper.make_node(
+                "Squeeze",
+                inputs=[slice_out],
+                outputs=[squeeze_out],
+                name=f"{squeeze_out}_node",
+                axes=squeeze_axes,
+            )
+        )
+        scan_iter_inputs.append(squeeze_out)
+    return scan_iter_inputs
+def _expand_scan_nodes(model: onnx.ModelProto) -> tuple[onnx.ModelProto, bool]:
+    graph = model.graph
+    opset_version = _onnx_opset_version(model)
+    if opset_version is None:
+        return model, False
+    new_nodes: list[onnx.NodeProto] = []
+    new_initializers: list[onnx.TensorProto] = []
+    scan_index = 0
+    expanded = False
+    is_opset8 = opset_version <= 8
+    for node in graph.node:
+        if node.op_type != "Scan":
+            new_nodes.append(node)
+            continue
+        expanded = True
+        scan_index += 1
+        attrs = _node_attrs(node)
+        body = attrs.get("body")
+        if not isinstance(body, onnx.GraphProto):
+            raise UnsupportedOpError("Scan requires a body graph")
+        num_scan_inputs = int(attrs.get("num_scan_inputs", 0))
+        if num_scan_inputs <= 0:
+            raise UnsupportedOpError("Scan requires num_scan_inputs")
+        input_names = list(node.input)
+        if is_opset8:
+            if not input_names:
+                raise UnsupportedOpError("Scan in opset 8 requires inputs")
+            sequence_lens = input_names.pop(0)
+            if sequence_lens:
+                raise UnsupportedOpError(
+                    "Scan sequence_lens input is not supported"
+                )
+        num_state_inputs = len(input_names) - num_scan_inputs
+        if num_state_inputs < 0:
+            raise UnsupportedOpError("Scan input count is invalid")
+        if len(body.input) != num_state_inputs + num_scan_inputs:
+            raise UnsupportedOpError(
+                "Scan body input count must match state and scan inputs"
+            )
+        if len(body.output) != len(node.output):
+            raise UnsupportedOpError(
+                "Scan body output count must match Scan outputs"
+            )
+        scan_output_count = len(node.output) - num_state_inputs
+        _scan_axes_and_directions(
+            attrs,
+            num_scan_inputs=num_scan_inputs,
+            scan_output_count=scan_output_count,
+            is_opset8=is_opset8,
+        )
+        state_input_names = input_names[:num_state_inputs]
+        scan_input_names = input_names[num_state_inputs:]
+        sequence_len, batch_size = _scan_sequence_length(
+            graph,
+            scan_input_names,
+            is_opset8=is_opset8,
+        )
+        prefix = node.name or f"scan_{scan_index}"
+        initializer_map = _scan_body_initializers(
+            body,
+            prefix=prefix,
+            new_initializers=new_initializers,
+        )
+        state_names = _scan_state_inputs(
+            graph,
+            prefix=prefix,
+            state_input_names=state_input_names,
+            new_nodes=new_nodes,
+            is_opset8=is_opset8,
+            batch_size=batch_size,
+        )
+        scan_output_buffers: list[list[str]] = [
+            [] for _ in range(scan_output_count)
+        ]
+        for iter_index in range(sequence_len):
+            scan_iter_inputs = _scan_iteration_inputs(
+                prefix=prefix,
+                iter_index=iter_index,
+                scan_input_names=scan_input_names,
+                new_nodes=new_nodes,
+                is_opset8=is_opset8,
+            )
+            name_map: dict[str, str] = {}
+            for index, value in enumerate(body.input[:num_state_inputs]):
+                name_map[value.name] = state_names[index]
+            for index, value in enumerate(
+                body.input[num_state_inputs : num_state_inputs + num_scan_inputs]
+            ):
+                name_map[value.name] = scan_iter_inputs[index]
+            for original, mapped in initializer_map.items():
+                name_map[original] = mapped
+            for body_node in body.node:
+                body_attrs = _node_attrs(body_node)
+                mapped_inputs = [
+                    name_map.get(input_name, input_name)
+                    for input_name in body_node.input
+                ]
+                mapped_outputs: list[str] = []
+                for output_name in body_node.output:
+                    if not output_name:
+                        mapped_outputs.append("")
+                        continue
+                    mapped_name = (
+                        f"{prefix}_iter{iter_index}_{output_name}"
+                    )
+                    name_map[output_name] = mapped_name
+                    mapped_outputs.append(mapped_name)
+                new_nodes.append(
+                    helper.make_node(
+                        body_node.op_type,
+                        inputs=mapped_inputs,
+                        outputs=mapped_outputs,
+                        name=(
+                            f"{prefix}_iter{iter_index}_{body_node.name}"
+                            if body_node.name
+                            else ""
+                        ),
+                        domain=body_node.domain,
+                        **body_attrs,
+                    )
+                )
+            for index, output in enumerate(body.output[:num_state_inputs]):
+                mapped_output = name_map.get(output.name)
+                if mapped_output is None:
+                    raise UnsupportedOpError(
+                        "Scan body did not produce a required state output"
+                    )
+                state_names[index] = mapped_output
+            for output_index, output in enumerate(
+                body.output[
+                    num_state_inputs : num_state_inputs + scan_output_count
+                ]
+            ):
+                mapped_output = name_map.get(output.name)
+                if mapped_output is None:
+                    raise UnsupportedOpError(
+                        "Scan body did not produce a required scan output"
+                    )
+                unsqueeze_out = (
+                    f"{prefix}_iter{iter_index}_scanout{output_index}"
+                )
+                unsqueeze_axes = [0, 1] if is_opset8 else [0]
+                new_nodes.append(
+                    helper.make_node(
+                        "Unsqueeze",
+                        inputs=[mapped_output],
+                        outputs=[unsqueeze_out],
+                        name=f"{unsqueeze_out}_node",
+                        axes=unsqueeze_axes,
+                    )
+                )
+                scan_output_buffers[output_index].append(unsqueeze_out)
+        for index, output_name in enumerate(node.output[:num_state_inputs]):
+            state_value = state_names[index]
+            if is_opset8:
+                expanded_state = f"{prefix}_state_output_{index}_expanded"
+                new_nodes.append(
+                    helper.make_node(
+                        "Unsqueeze",
+                        inputs=[state_value],
+                        outputs=[expanded_state],
+                        name=f"{expanded_state}_node",
+                        axes=[0],
+                    )
+                )
+                state_value = expanded_state
+            if state_value == output_name:
+                continue
+            new_nodes.append(
+                helper.make_node(
+                    "Identity",
+                    inputs=[state_value],
+                    outputs=[output_name],
+                    name=f"{prefix}_state_output_{index}",
+                )
+            )
+        for output_index, output_name in enumerate(
+            node.output[num_state_inputs : num_state_inputs + scan_output_count]
+        ):
+            buffer = scan_output_buffers[output_index]
+            concat_axis = _scan_expected_axis(is_opset8)
+            if len(buffer) == 1:
+                new_nodes.append(
+                    helper.make_node(
+                        "Identity",
+                        inputs=buffer,
+                        outputs=[output_name],
+                        name=f"{prefix}_scan_output_{output_index}",
+                    )
+                )
+            else:
+                new_nodes.append(
+                    helper.make_node(
+                        "Concat",
+                        inputs=buffer,
+                        outputs=[output_name],
+                        name=f"{prefix}_scan_output_{output_index}",
+                        axis=concat_axis,
+                    )
+                )
+    if expanded:
+        del graph.node[:]
+        graph.node.extend(new_nodes)
+        if new_initializers:
+            graph.initializer.extend(new_initializers)
+    return model, expanded
 def _constant_initializer(node: onnx.NodeProto) -> Initializer:
     if len(node.output) != 1:
         raise UnsupportedOpError("Constant must have exactly one output")
@@ -209,16 +771,18 @@ def _constant_initializer(node: onnx.NodeProto) -> Initializer:
 def import_onnx(model: onnx.ModelProto) -> Graph:
+    model, _ = _expand_scan_nodes(model)
     dim_param_by_name = _collect_dim_params(
         tuple(model.graph.input) + tuple(model.graph.output)
     )
     opset_imports = tuple(
         (opset.domain, opset.version) for opset in model.opset_import
     )
-    try:
-        model = shape_inference.infer_shapes(model, data_prop=True)
-    except Exception as exc:  # pragma: no cover - onnx inference errors
-        raise ShapeInferenceError("ONNX shape inference failed") from exc
+    if _needs_shape_inference(model):
+        try:
+            model = shape_inference.infer_shapes(model, data_prop=True)
+        except Exception as exc:  # pragma: no cover - onnx inference errors
+            raise ShapeInferenceError("ONNX shape inference failed") from exc
     graph = model.graph
     base_initializers = [_initializer(value) for value in graph.initializer]
     constant_initializers: list[Initializer] = []

emx_onnx_cgen/ops.py CHANGED Viewed

@@ -554,6 +554,9 @@ def unary_op_symbol(function: ScalarFunction, *, dtype: ScalarType) -> str | Non
 def apply_binary_op(
     op_spec: BinaryOpSpec, left: np.ndarray, right: np.ndarray
 ) -> np.ndarray:
+    if op_spec.apply is np.power:
+        with np.errstate(invalid="ignore", divide="ignore", over="ignore"):
+            return op_spec.apply(left, right)
     return op_spec.apply(left, right)

emx_onnx_cgen/templates/adagrad_op.c.j2 ADDED Viewed

@@ -0,0 +1,16 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+    const {{ c_type }} r = {{ rate }}[0] / ({{ one_literal }} + ({{ c_type }}){{ timestep }}[0] * {{ decay_factor_literal }});
+{% for tensor in tensors %}
+{% for dim in tensor.shape %}
+    for (idx_t {{ tensor.loop_vars[loop.index0] }} = 0; {{ tensor.loop_vars[loop.index0] }} < {{ dim }}; ++{{ tensor.loop_vars[loop.index0] }}) {
+{% endfor %}
+        {{ c_type }} g_regularized = {{ norm_coefficient_literal }} * {{ tensor.input_expr }} + {{ tensor.grad_expr }};
+        {{ c_type }} h_new = {{ tensor.acc_expr }} + g_regularized * g_regularized;
+        {{ tensor.acc_output_expr }} = h_new;
+        {{ c_type }} h_adaptive = {{ sqrt_fn }}(h_new) + {{ epsilon_literal }};
+        {{ tensor.output_expr }} = {{ tensor.input_expr }} - r * g_regularized / h_adaptive;
+{% for _ in tensor.shape %}
+    }
+{% endfor %}
+{% endfor %}
+}

emx-onnx-cgen 0.3.8__py3-none-any.whl → 0.4.1.dev0__py3-none-any.whl

emx-onnx-cgen 0.3.8py3-none-any.whl → 0.4.1.dev0py3-none-any.whl