PyPI - emx-onnx-cgen - Versions diffs - 0.3.7__py3-none-any.whl → 0.4.1.dev0__py3-none-any.whl - Mend

emx-onnx-cgen 0.3.7py3-none-any.whl → 0.4.1.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (137) hide show

emx_onnx_cgen/_build_info.py +1 -1
emx_onnx_cgen/_version.py +2 -2
emx_onnx_cgen/cli.py +1025 -162
emx_onnx_cgen/codegen/__init__.py +2 -0
emx_onnx_cgen/codegen/c_emitter.py +2081 -458
emx_onnx_cgen/compiler.py +157 -75
emx_onnx_cgen/determinism.py +39 -0
emx_onnx_cgen/ir/context.py +25 -15
emx_onnx_cgen/ir/model.py +1 -0
emx_onnx_cgen/ir/op_base.py +32 -7
emx_onnx_cgen/ir/ops/__init__.py +20 -0
emx_onnx_cgen/ir/ops/elementwise.py +138 -22
emx_onnx_cgen/ir/ops/misc.py +95 -0
emx_onnx_cgen/ir/ops/nn.py +361 -38
emx_onnx_cgen/ir/ops/reduce.py +1 -16
emx_onnx_cgen/lowering/__init__.py +9 -0
emx_onnx_cgen/lowering/arg_reduce.py +0 -4
emx_onnx_cgen/lowering/average_pool.py +157 -27
emx_onnx_cgen/lowering/bernoulli.py +73 -0
emx_onnx_cgen/lowering/common.py +48 -0
emx_onnx_cgen/lowering/concat.py +41 -7
emx_onnx_cgen/lowering/conv.py +19 -8
emx_onnx_cgen/lowering/conv_integer.py +103 -0
emx_onnx_cgen/lowering/dequantize_linear.py +128 -0
emx_onnx_cgen/lowering/elementwise.py +140 -43
emx_onnx_cgen/lowering/gather.py +11 -2
emx_onnx_cgen/lowering/gemm.py +7 -124
emx_onnx_cgen/lowering/global_max_pool.py +0 -5
emx_onnx_cgen/lowering/gru.py +323 -0
emx_onnx_cgen/lowering/hamming_window.py +104 -0
emx_onnx_cgen/lowering/hardmax.py +1 -37
emx_onnx_cgen/lowering/identity.py +7 -6
emx_onnx_cgen/lowering/logsoftmax.py +1 -35
emx_onnx_cgen/lowering/lp_pool.py +15 -4
emx_onnx_cgen/lowering/matmul.py +3 -105
emx_onnx_cgen/lowering/optional_has_element.py +28 -0
emx_onnx_cgen/lowering/qlinear_mul.py +116 -0
emx_onnx_cgen/lowering/reduce.py +0 -5
emx_onnx_cgen/lowering/reshape.py +7 -16
emx_onnx_cgen/lowering/shape.py +14 -8
emx_onnx_cgen/lowering/slice.py +14 -4
emx_onnx_cgen/lowering/softmax.py +1 -35
emx_onnx_cgen/lowering/split.py +37 -3
emx_onnx_cgen/lowering/tfidf_vectorizer.py +199 -0
emx_onnx_cgen/lowering/tile.py +38 -1
emx_onnx_cgen/lowering/topk.py +1 -5
emx_onnx_cgen/lowering/transpose.py +9 -3
emx_onnx_cgen/lowering/unsqueeze.py +11 -16
emx_onnx_cgen/lowering/upsample.py +151 -0
emx_onnx_cgen/lowering/variadic.py +1 -1
emx_onnx_cgen/lowering/where.py +0 -5
emx_onnx_cgen/onnx_import.py +578 -14
emx_onnx_cgen/ops.py +3 -0
emx_onnx_cgen/templates/adagrad_op.c.j2 +16 -0
emx_onnx_cgen/templates/arg_reduce_op.c.j2 +18 -0
emx_onnx_cgen/templates/attention_op.c.j2 +189 -0
emx_onnx_cgen/templates/average_pool_op.c.j2 +126 -0
emx_onnx_cgen/templates/batch_norm_op.c.j2 +11 -0
emx_onnx_cgen/templates/bernoulli_op.c.j2 +34 -0
emx_onnx_cgen/templates/binary_op.c.j2 +9 -0
emx_onnx_cgen/templates/cast_op.c.j2 +9 -0
emx_onnx_cgen/templates/clip_op.c.j2 +14 -0
emx_onnx_cgen/templates/concat_op.c.j2 +28 -0
emx_onnx_cgen/templates/constant_of_shape_op.c.j2 +10 -0
emx_onnx_cgen/templates/conv_integer_op.c.j2 +34 -0
emx_onnx_cgen/templates/conv_op.c.j2 +32 -0
emx_onnx_cgen/templates/conv_transpose_op.c.j2 +43 -0
emx_onnx_cgen/templates/cumsum_op.c.j2 +51 -0
emx_onnx_cgen/templates/depth_to_space_op.c.j2 +26 -0
emx_onnx_cgen/templates/dequantize_linear_op.c.j2 +10 -0
emx_onnx_cgen/templates/einsum_op.c.j2 +55 -0
emx_onnx_cgen/templates/expand_op.c.j2 +14 -0
emx_onnx_cgen/templates/eye_like_op.c.j2 +27 -0
emx_onnx_cgen/templates/gather_elements_op.c.j2 +13 -0
emx_onnx_cgen/templates/gather_nd_op.c.j2 +29 -0
emx_onnx_cgen/templates/gather_op.c.j2 +13 -0
emx_onnx_cgen/templates/gemm_op.c.j2 +35 -0
emx_onnx_cgen/templates/grid_sample_op.c.j2 +184 -0
emx_onnx_cgen/templates/group_normalization_op.c.j2 +46 -0
emx_onnx_cgen/templates/gru_op.c.j2 +152 -0
emx_onnx_cgen/templates/hamming_window_op.c.j2 +12 -0
emx_onnx_cgen/templates/hardmax_op.c.j2 +24 -0
emx_onnx_cgen/templates/identity_op.c.j2 +9 -0
emx_onnx_cgen/templates/instance_normalization_op.c.j2 +35 -0
emx_onnx_cgen/templates/layer_normalization_op.c.j2 +65 -0
emx_onnx_cgen/templates/logsoftmax_op.c.j2 +27 -0
emx_onnx_cgen/templates/lp_normalization_op.c.j2 +27 -0
emx_onnx_cgen/templates/lp_pool_op.c.j2 +24 -0
emx_onnx_cgen/templates/lrn_op.c.j2 +20 -0
emx_onnx_cgen/templates/lstm_op.c.j2 +175 -0
emx_onnx_cgen/templates/matmul_op.c.j2 +13 -0
emx_onnx_cgen/templates/maxpool_op.c.j2 +118 -0
emx_onnx_cgen/templates/mean_variance_normalization_op.c.j2 +34 -0
emx_onnx_cgen/templates/multi_input_op.c.j2 +15 -0
emx_onnx_cgen/templates/negative_log_likelihood_loss_op.c.j2 +54 -0
emx_onnx_cgen/templates/nonmax_suppression_op.c.j2 +179 -0
emx_onnx_cgen/templates/nonzero_op.c.j2 +15 -0
emx_onnx_cgen/templates/one_hot_op.c.j2 +25 -0
emx_onnx_cgen/templates/optional_has_element_op.c.j2 +4 -0
emx_onnx_cgen/templates/pad_op.c.j2 +80 -0
emx_onnx_cgen/templates/qlinear_matmul_op.c.j2 +33 -0
emx_onnx_cgen/templates/qlinear_mul_op.c.j2 +18 -0
emx_onnx_cgen/templates/quantize_linear_op.c.j2 +13 -0
emx_onnx_cgen/templates/range_op.c.j2 +8 -0
emx_onnx_cgen/templates/reduce_op.c.j2 +28 -0
emx_onnx_cgen/templates/reduce_op_dynamic.c.j2 +77 -0
emx_onnx_cgen/templates/reshape_op.c.j2 +18 -0
emx_onnx_cgen/templates/resize_op.c.j2 +277 -0
emx_onnx_cgen/templates/rms_normalization_op.c.j2 +28 -0
emx_onnx_cgen/templates/rotary_embedding_op.c.j2 +66 -0
emx_onnx_cgen/templates/scatter_nd_op.c.j2 +52 -0
emx_onnx_cgen/templates/shape_op.c.j2 +6 -0
emx_onnx_cgen/templates/size_op.c.j2 +4 -0
emx_onnx_cgen/templates/slice_op.c.j2 +9 -0
emx_onnx_cgen/templates/slice_op_dynamic.c.j2 +70 -0
emx_onnx_cgen/templates/softmax_cross_entropy_loss_op.c.j2 +105 -0
emx_onnx_cgen/templates/softmax_op.c.j2 +26 -0
emx_onnx_cgen/templates/space_to_depth_op.c.j2 +22 -0
emx_onnx_cgen/templates/split_op.c.j2 +18 -0
emx_onnx_cgen/templates/tensor_scatter_op.c.j2 +44 -0
emx_onnx_cgen/templates/testbench.c.j2 +161 -0
emx_onnx_cgen/templates/tfidf_vectorizer_op.c.j2 +144 -0
emx_onnx_cgen/templates/tile_op.c.j2 +14 -0
emx_onnx_cgen/templates/topk_op.c.j2 +50 -0
emx_onnx_cgen/templates/transpose_op.c.j2 +9 -0
emx_onnx_cgen/templates/trilu_op.c.j2 +33 -0
emx_onnx_cgen/templates/unary_op.c.j2 +23 -0
emx_onnx_cgen/templates/where_op.c.j2 +9 -0
emx_onnx_cgen/verification.py +45 -5
{emx_onnx_cgen-0.3.7.dist-info → emx_onnx_cgen-0.4.1.dev0.dist-info}/METADATA +33 -15
emx_onnx_cgen-0.4.1.dev0.dist-info/RECORD +190 -0
{emx_onnx_cgen-0.3.7.dist-info → emx_onnx_cgen-0.4.1.dev0.dist-info}/WHEEL +1 -1
emx_onnx_cgen/runtime/__init__.py +0 -1
emx_onnx_cgen/runtime/evaluator.py +0 -2955
emx_onnx_cgen-0.3.7.dist-info/RECORD +0 -107
{emx_onnx_cgen-0.3.7.dist-info → emx_onnx_cgen-0.4.1.dev0.dist-info}/entry_points.txt +0 -0
{emx_onnx_cgen-0.3.7.dist-info → emx_onnx_cgen-0.4.1.dev0.dist-info}/top_level.txt +0 -0

emx_onnx_cgen/templates/instance_normalization_op.c.j2 ADDED Viewed

@@ -0,0 +1,35 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+{% for dim in shape[:2] %}
+for (idx_t {{ loop_vars[loop.index0] }} = 0; {{ loop_vars[loop.index0] }} < {{ dim }}; ++{{ loop_vars[loop.index0] }}) {
+{% endfor %}
+    {{ c_type }} sum = {{ zero_literal }};
+{% for dim in shape[2:] %}
+    for (idx_t {{ loop_vars[loop.index0 + 2] }} = 0; {{ loop_vars[loop.index0 + 2] }} < {{ dim }}; ++{{ loop_vars[loop.index0 + 2] }}) {
+{% endfor %}
+        sum += {{ input0 }}[{{ loop_vars[0] }}][{{ loop_vars[1] }}]{% for var in loop_vars[2:] %}[{{ var }}]{% endfor %};
+{% for _ in shape[2:] %}
+    }
+{% endfor %}
+    {{ c_type }} mean = sum / {{ spatial_size }};
+    {{ c_type }} var = {{ zero_literal }};
+{% for dim in shape[2:] %}
+    for (idx_t {{ loop_vars[loop.index0 + 2] }} = 0; {{ loop_vars[loop.index0 + 2] }} < {{ dim }}; ++{{ loop_vars[loop.index0 + 2] }}) {
+{% endfor %}
+        {{ c_type }} diff = {{ input0 }}[{{ loop_vars[0] }}][{{ loop_vars[1] }}]{% for var in loop_vars[2:] %}[{{ var }}]{% endfor %} - mean;
+        var += diff * diff;
+{% for _ in shape[2:] %}
+    }
+{% endfor %}
+    {{ c_type }} denom = {{ sqrt_fn }}(var / {{ spatial_size }} + {{ epsilon_literal }});
+{% for dim in shape[2:] %}
+    for (idx_t {{ loop_vars[loop.index0 + 2] }} = 0; {{ loop_vars[loop.index0 + 2] }} < {{ dim }}; ++{{ loop_vars[loop.index0 + 2] }}) {
+{% endfor %}
+        {{ output }}[{{ loop_vars[0] }}][{{ loop_vars[1] }}]{% for var in loop_vars[2:] %}[{{ var }}]{% endfor %} =
+            ({{ input0 }}[{{ loop_vars[0] }}][{{ loop_vars[1] }}]{% for var in loop_vars[2:] %}[{{ var }}]{% endfor %} - mean) / denom * {{ scale }}[{{ loop_vars[1] }}] + {{ bias }}[{{ loop_vars[1] }}];
+{% for _ in shape[2:] %}
+    }
+{% endfor %}
+{% for _ in shape[:2] %}
+}
+{% endfor %}
+}

emx_onnx_cgen/templates/layer_normalization_op.c.j2 ADDED Viewed

@@ -0,0 +1,65 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+{% for dim in prefix_shape %}
+for (idx_t {{ prefix_loop_vars[loop.index0] }} = 0; {{ prefix_loop_vars[loop.index0] }} < {{ dim }}; ++{{ prefix_loop_vars[loop.index0] }}) {
+{% endfor %}
+    {{ acc_type }} sum = {{ acc_zero_literal }};
+{% if use_kahan %}
+    {{ acc_type }} sum_comp = {{ acc_zero_literal }};
+{% endif %}
+{% for dim in norm_shape %}
+    for (idx_t {{ norm_loop_vars[loop.index0] }} = 0; {{ norm_loop_vars[loop.index0] }} < {{ dim }}; ++{{ norm_loop_vars[loop.index0] }}) {
+{% endfor %}
+{% if use_kahan %}
+        {{ acc_type }} kahan_value = ({{ acc_type }}){{ input0 }}{% for var in prefix_loop_vars %}[{{ var }}]{% endfor %}{% for var in norm_loop_vars %}[{{ var }}]{% endfor %};
+        {{ acc_type }} kahan_y = kahan_value - sum_comp;
+        {{ acc_type }} kahan_t = sum + kahan_y;
+        sum_comp = (kahan_t - sum) - kahan_y;
+        sum = kahan_t;
+{% else %}
+        sum += ({{ acc_type }}){{ input0 }}{% for var in prefix_loop_vars %}[{{ var }}]{% endfor %}{% for var in norm_loop_vars %}[{{ var }}]{% endfor %};
+{% endif %}
+{% for _ in norm_shape %}
+    }
+{% endfor %}
+    {{ acc_type }} mean = sum / {{ inner }};
+    {{ acc_type }} var = {{ acc_zero_literal }};
+{% if use_kahan %}
+    {{ acc_type }} var_comp = {{ acc_zero_literal }};
+{% endif %}
+{% for dim in norm_shape %}
+    for (idx_t {{ norm_loop_vars[loop.index0] }} = 0; {{ norm_loop_vars[loop.index0] }} < {{ dim }}; ++{{ norm_loop_vars[loop.index0] }}) {
+{% endfor %}
+        {{ acc_type }} diff = ({{ acc_type }}){{ input0 }}{% for var in prefix_loop_vars %}[{{ var }}]{% endfor %}{% for var in norm_loop_vars %}[{{ var }}]{% endfor %} - mean;
+{% if use_kahan %}
+        {{ acc_type }} kahan_value = diff * diff;
+        {{ acc_type }} kahan_y = kahan_value - var_comp;
+        {{ acc_type }} kahan_t = var + kahan_y;
+        var_comp = (kahan_t - var) - kahan_y;
+        var = kahan_t;
+{% else %}
+        var += diff * diff;
+{% endif %}
+{% for _ in norm_shape %}
+    }
+{% endfor %}
+    var = var / {{ inner }};
+    {{ acc_type }} inv_std = {{ acc_one_literal }} / {{ acc_sqrt_fn }}(var + {{ acc_epsilon_literal }});
+{% if mean_output %}
+    {{ mean_output }}{% for var in mean_index_vars %}[{{ var }}]{% endfor %} = mean;
+{% endif %}
+{% if invstd_output %}
+    {{ invstd_output }}{% for var in mean_index_vars %}[{{ var }}]{% endfor %} = inv_std;
+{% endif %}
+{% for dim in norm_shape %}
+    for (idx_t {{ norm_loop_vars[loop.index0] }} = 0; {{ norm_loop_vars[loop.index0] }} < {{ dim }}; ++{{ norm_loop_vars[loop.index0] }}) {
+{% endfor %}
+        {{ acc_type }} value = (({{ acc_type }}){{ input0 }}{% for var in prefix_loop_vars %}[{{ var }}]{% endfor %}{% for var in norm_loop_vars %}[{{ var }}]{% endfor %} - mean) * inv_std;
+        value = value * {{ scale }}{% for var in scale_index_vars %}[{{ var }}]{% endfor %}{% if bias %} + {{ bias }}{% for var in bias_index_vars %}[{{ var }}]{% endfor %}{% endif %};
+        {{ output }}{% for var in prefix_loop_vars %}[{{ var }}]{% endfor %}{% for var in norm_loop_vars %}[{{ var }}]{% endfor %} = value;
+{% for _ in norm_shape %}
+    }
+{% endfor %}
+{% for _ in prefix_shape %}
+}
+{% endfor %}
+}

emx_onnx_cgen/templates/logsoftmax_op.c.j2 ADDED Viewed

@@ -0,0 +1,27 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+    const {{ c_type }} *input_flat = (const {{ c_type }} *){{ input0 }};
+    {{ c_type }} *output_flat = ({{ c_type }} *){{ output }};
+    const idx_t outer = {{ outer }};
+    const idx_t axis_size = {{ axis_size }};
+    const idx_t inner = {{ inner }};
+    for (idx_t outer_idx = 0; outer_idx < outer; ++outer_idx) {
+        for (idx_t inner_idx = 0; inner_idx < inner; ++inner_idx) {
+            idx_t base = (outer_idx * axis_size * inner) + inner_idx;
+            {{ c_type }} max_value = input_flat[base];
+            for (idx_t axis_idx = 1; axis_idx < axis_size; ++axis_idx) {
+                {{ c_type }} value = input_flat[base + axis_idx * inner];
+                max_value = {{ max_fn }}(max_value, value);
+            }
+            {{ c_type }} sum = 0;
+            for (idx_t axis_idx = 0; axis_idx < axis_size; ++axis_idx) {
+                {{ c_type }} value = {{ exp_fn }}(input_flat[base + axis_idx * inner] - max_value);
+                sum += value;
+            }
+            {{ c_type }} logsum = {{ log_fn }}(sum);
+            for (idx_t axis_idx = 0; axis_idx < axis_size; ++axis_idx) {
+                {{ c_type }} value = input_flat[base + axis_idx * inner] - max_value;
+                output_flat[base + axis_idx * inner] = value - logsum;
+            }
+        }
+    }
+}

emx_onnx_cgen/templates/lp_normalization_op.c.j2 ADDED Viewed

@@ -0,0 +1,27 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+    const {{ c_type }} *input_flat = (const {{ c_type }} *){{ input0 }};
+    {{ c_type }} *output_flat = ({{ c_type }} *){{ output }};
+    const idx_t outer = {{ outer }};
+    const idx_t axis_size = {{ axis_size }};
+    const idx_t inner = {{ inner }};
+    for (idx_t outer_idx = 0; outer_idx < outer; ++outer_idx) {
+        for (idx_t inner_idx = 0; inner_idx < inner; ++inner_idx) {
+            idx_t base = (outer_idx * axis_size * inner) + inner_idx;
+            {{ c_type }} acc = {{ zero_literal }};
+            for (idx_t axis_idx = 0; axis_idx < axis_size; ++axis_idx) {
+                {{ c_type }} value = input_flat[base + axis_idx * inner];
+{% if p == 1 %}
+                acc += {{ abs_fn }}(value);
+{% else %}
+                acc += value * value;
+{% endif %}
+            }
+{% if p == 2 %}
+            acc = {{ sqrt_fn }}(acc);
+{% endif %}
+            for (idx_t axis_idx = 0; axis_idx < axis_size; ++axis_idx) {
+                output_flat[base + axis_idx * inner] = input_flat[base + axis_idx * inner] / acc;
+            }
+        }
+    }
+}

emx_onnx_cgen/templates/lp_pool_op.c.j2 ADDED Viewed

@@ -0,0 +1,24 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+    for (idx_t n = 0; n < {{ batch }}; ++n) {
+        for (idx_t c = 0; c < {{ channels }}; ++c) {
+            for (idx_t oh = 0; oh < {{ out_h }}; ++oh) {
+                for (idx_t ow = 0; ow < {{ out_w }}; ++ow) {
+                    {{ c_type }} acc = {{ zero_literal }};
+                    const idx_t h_start = oh * {{ stride_h }} - {{ pad_top }};
+                    const idx_t w_start = ow * {{ stride_w }} - {{ pad_left }};
+                    for (idx_t kh = 0; kh < {{ kernel_h }}; ++kh) {
+                        for (idx_t kw = 0; kw < {{ kernel_w }}; ++kw) {
+                            const idx_t in_h = h_start + kh * {{ dilation_h }};
+                            const idx_t in_w = w_start + kw * {{ dilation_w }};
+                            if (in_h >= 0 && in_h < {{ in_h }} && in_w >= 0 && in_w < {{ in_w }}) {
+                                {{ c_type }} value = {{ input0 }}[n][c][in_h][in_w];
+                                acc += {{ pow_fn }}({{ abs_fn }}(value), ({{ c_type }}){{ p }});
+                            }
+                        }
+                    }
+                    {{ output }}[n][c][oh][ow] = {{ pow_fn }}(acc, ({{ c_type }})1.0 / ({{ c_type }}){{ p }});
+                }
+            }
+        }
+    }
+}

emx_onnx_cgen/templates/lrn_op.c.j2 ADDED Viewed

@@ -0,0 +1,20 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+{% for dim in shape %}
+for (idx_t {{ loop_vars[loop.index0] }} = 0; {{ loop_vars[loop.index0] }} < {{ dim }}; ++{{ loop_vars[loop.index0] }}) {
+{% endfor %}
+idx_t channel_start = {{ loop_vars[1] }} > {{ half }} ? {{ loop_vars[1] }} - {{ half }} : 0;
+idx_t channel_end = {{ loop_vars[1] }} + {{ half }};
+if (channel_end >= {{ channels }}) {
+    channel_end = {{ channels }} - 1;
+}
+{{ c_type }} sum = {{ zero_literal }};
+for (idx_t c = channel_start; c <= channel_end; ++c) {
+    {{ c_type }} val = {{ input0 }}[{{ loop_vars[0] }}][c]{% for var in loop_vars[2:] %}[{{ var }}]{% endfor %};
+    sum += val * val;
+}
+{{ c_type }} scale = {{ bias_literal }} + {{ alpha_div_size_literal }} * sum;
+{{ output }}{% for var in loop_vars %}[{{ var }}]{% endfor %} = {{ input0 }}{% for var in loop_vars %}[{{ var }}]{% endfor %} / {{ pow_fn }}(scale, {{ beta_literal }});
+{% for _ in shape %}
+}
+{% endfor %}
+}

emx_onnx_cgen/templates/lstm_op.c.j2 ADDED Viewed

@@ -0,0 +1,175 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+    {% for dir in range(num_directions) %}
+    {
+        const int dir = {{ dir }};
+        const int reverse = {% if direction == "reverse" %}1{% elif direction == "bidirectional" and dir == 1 %}1{% else %}0{% endif %};
+        {% set act_f = activation_functions[dir * 3] %}
+        {% set act_g = activation_functions[dir * 3 + 1] %}
+        {% set act_h = activation_functions[dir * 3 + 2] %}
+        {{ c_type }} H_prev[{{ batch_size }}][{{ hidden_size }}];
+        {{ c_type }} C_prev[{{ batch_size }}][{{ hidden_size }}];
+        for (int b = 0; b < {{ batch_size }}; ++b) {
+            for (int h = 0; h < {{ hidden_size }}; ++h) {
+                {% if input_initial_h %}
+                {% if layout == 0 %}
+                H_prev[b][h] = {{ input_initial_h }}[dir][b][h];
+                {% else %}
+                H_prev[b][h] = {{ input_initial_h }}[b][dir][h];
+                {% endif %}
+                {% else %}
+                H_prev[b][h] = {{ zero_literal }};
+                {% endif %}
+                {% if input_initial_c %}
+                {% if layout == 0 %}
+                C_prev[b][h] = {{ input_initial_c }}[dir][b][h];
+                {% else %}
+                C_prev[b][h] = {{ input_initial_c }}[b][dir][h];
+                {% endif %}
+                {% else %}
+                C_prev[b][h] = {{ zero_literal }};
+                {% endif %}
+            }
+        }
+        for (int b = 0; b < {{ batch_size }}; ++b) {
+            int seq_limit = {{ seq_length }};
+            {% if input_sequence_lens %}
+            seq_limit = (int){{ input_sequence_lens }}[b];
+            if (seq_limit < 0) {
+                seq_limit = 0;
+            }
+            if (seq_limit > {{ seq_length }}) {
+                seq_limit = {{ seq_length }};
+            }
+            {% endif %}
+            for (int step = 0; step < seq_limit; ++step) {
+                int t = reverse ? (seq_limit - 1 - step) : step;
+                {{ c_type }} H_next[{{ hidden_size }}];
+                {{ c_type }} C_next[{{ hidden_size }}];
+                for (int h = 0; h < {{ hidden_size }}; ++h) {
+                    {{ c_type }} gate_i = {{ zero_literal }};
+                    {{ c_type }} gate_o = {{ zero_literal }};
+                    {{ c_type }} gate_f = {{ zero_literal }};
+                    {{ c_type }} gate_c = {{ zero_literal }};
+                    for (int i = 0; i < {{ input_size }}; ++i) {
+                        {% if layout == 0 %}
+                        {{ c_type }} x_val = {{ input_x }}[t][b][i];
+                        {% else %}
+                        {{ c_type }} x_val = {{ input_x }}[b][t][i];
+                        {% endif %}
+                        gate_i += x_val * {{ input_w }}[dir][h][i];
+                        gate_o += x_val * {{ input_w }}[dir][{{ hidden_size }} + h][i];
+                        gate_f += x_val * {{ input_w }}[dir][{{ hidden_size }} * 2 + h][i];
+                        gate_c += x_val * {{ input_w }}[dir][{{ hidden_size }} * 3 + h][i];
+                    }
+                    for (int i = 0; i < {{ hidden_size }}; ++i) {
+                        {{ c_type }} h_val = H_prev[b][i];
+                        gate_i += h_val * {{ input_r }}[dir][h][i];
+                        gate_o += h_val * {{ input_r }}[dir][{{ hidden_size }} + h][i];
+                        gate_f += h_val * {{ input_r }}[dir][{{ hidden_size }} * 2 + h][i];
+                        gate_c += h_val * {{ input_r }}[dir][{{ hidden_size }} * 3 + h][i];
+                    }
+                    {% if input_b %}
+                    gate_i += {{ input_b }}[dir][h] + {{ input_b }}[dir][{{ hidden_size }} * 4 + h];
+                    gate_o += {{ input_b }}[dir][{{ hidden_size }} + h] + {{ input_b }}[dir][{{ hidden_size }} * 5 + h];
+                    gate_f += {{ input_b }}[dir][{{ hidden_size }} * 2 + h] + {{ input_b }}[dir][{{ hidden_size }} * 6 + h];
+                    gate_c += {{ input_b }}[dir][{{ hidden_size }} * 3 + h] + {{ input_b }}[dir][{{ hidden_size }} * 7 + h];
+                    {% endif %}
+                    {% if use_clip %}
+                    if (gate_i > {{ clip_literal }}) {
+                        gate_i = {{ clip_literal }};
+                    } else if (gate_i < -{{ clip_literal }}) {
+                        gate_i = -{{ clip_literal }};
+                    }
+                    if (gate_o > {{ clip_literal }}) {
+                        gate_o = {{ clip_literal }};
+                    } else if (gate_o < -{{ clip_literal }}) {
+                        gate_o = -{{ clip_literal }};
+                    }
+                    if (gate_f > {{ clip_literal }}) {
+                        gate_f = {{ clip_literal }};
+                    } else if (gate_f < -{{ clip_literal }}) {
+                        gate_f = -{{ clip_literal }};
+                    }
+                    if (gate_c > {{ clip_literal }}) {
+                        gate_c = {{ clip_literal }};
+                    } else if (gate_c < -{{ clip_literal }}) {
+                        gate_c = -{{ clip_literal }};
+                    }
+                    {% endif %}
+                    {% if input_p %}
+                    {{ c_type }} i_gate = {{ act_f }}(
+                        gate_i + {{ input_p }}[dir][h] * C_prev[b][h]);
+                    {% else %}
+                    {{ c_type }} i_gate = {{ act_f }}(gate_i);
+                    {% endif %}
+                    {% if input_forget %}
+                    {{ c_type }} f_gate = ({{ c_type }}){{ one_literal }} - i_gate;
+                    {% else %}
+                    {% if input_p %}
+                    {{ c_type }} f_gate = {{ act_f }}(
+                        gate_f + {{ input_p }}[dir][{{ hidden_size }} * 2 + h] * C_prev[b][h]);
+                    {% else %}
+                    {{ c_type }} f_gate = {{ act_f }}(gate_f);
+                    {% endif %}
+                    {% endif %}
+                    {{ c_type }} c_gate = {{ act_g }}(gate_c);
+                    {{ c_type }} c_new = f_gate * C_prev[b][h] + i_gate * c_gate;
+                    {% if input_p %}
+                    {{ c_type }} o_gate = {{ act_f }}(
+                        gate_o + {{ input_p }}[dir][{{ hidden_size }} + h] * c_new);
+                    {% else %}
+                    {{ c_type }} o_gate = {{ act_f }}(gate_o);
+                    {% endif %}
+                    {{ c_type }} h_new = o_gate * {{ act_h }}(c_new);
+                    C_next[h] = c_new;
+                    H_next[h] = h_new;
+                    {% if output_y %}
+                    {% if layout == 0 %}
+                    {{ output_y }}[step][dir][b][h] = h_new;
+                    {% else %}
+                    {{ output_y }}[b][step][dir][h] = h_new;
+                    {% endif %}
+                    {% endif %}
+                }
+                for (int h = 0; h < {{ hidden_size }}; ++h) {
+                    C_prev[b][h] = C_next[h];
+                    H_prev[b][h] = H_next[h];
+                }
+            }
+            {% if output_y %}
+            for (int step = seq_limit; step < {{ seq_length }}; ++step) {
+                for (int h = 0; h < {{ hidden_size }}; ++h) {
+                    {% if layout == 0 %}
+                    {{ output_y }}[step][dir][b][h] = {{ zero_literal }};
+                    {% else %}
+                    {{ output_y }}[b][step][dir][h] = {{ zero_literal }};
+                    {% endif %}
+                }
+            }
+            {% endif %}
+        }
+        {% if output_y_h %}
+        for (int b = 0; b < {{ batch_size }}; ++b) {
+            for (int h = 0; h < {{ hidden_size }}; ++h) {
+                {% if layout == 0 %}
+                {{ output_y_h }}[dir][b][h] = H_prev[b][h];
+                {% else %}
+                {{ output_y_h }}[b][dir][h] = H_prev[b][h];
+                {% endif %}
+            }
+        }
+        {% endif %}
+        {% if output_y_c %}
+        for (int b = 0; b < {{ batch_size }}; ++b) {
+            for (int h = 0; h < {{ hidden_size }}; ++h) {
+                {% if layout == 0 %}
+                {{ output_y_c }}[dir][b][h] = C_prev[b][h];
+                {% else %}
+                {{ output_y_c }}[b][dir][h] = C_prev[b][h];
+                {% endif %}
+            }
+        }
+        {% endif %}
+    }
+    {% endfor %}
+}

emx_onnx_cgen/templates/matmul_op.c.j2 ADDED Viewed

@@ -0,0 +1,13 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+{% for idx in range(output_loop_vars | length) %}
+    {% for indent in range(loop.index0) %}    {% endfor %}for (idx_t {{ output_loop_vars[idx] }} = 0; {{ output_loop_vars[idx] }} < {{ output_loop_bounds[idx] }}; ++{{ output_loop_vars[idx] }}) {
+{% endfor %}
+        {% for indent in range(output_loop_vars | length) %}    {% endfor %}{{ acc_type }} acc = {{ zero_literal }};
+        {% for indent in range(output_loop_vars | length) %}    {% endfor %}for (idx_t k = 0; k < {{ k }}; ++k) {
+            {% for indent in range(output_loop_vars | length + 1) %}    {% endfor %}acc += {{ input0_index_expr }} * {{ input1_index_expr }};
+        {% for indent in range(output_loop_vars | length) %}    {% endfor %}}
+        {% for indent in range(output_loop_vars | length) %}    {% endfor %}{{ output_index_expr }} = acc;
+{% for idx in range(output_loop_vars | length) | reverse %}
+    {% for indent in range(loop.index0) %}    {% endfor %}}
+{% endfor %}
+}

emx_onnx_cgen/templates/maxpool_op.c.j2 ADDED Viewed

@@ -0,0 +1,118 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+    for (idx_t n = 0; n < {{ batch }}; ++n) {
+        for (idx_t c = 0; c < {{ channels }}; ++c) {
+{% if spatial_rank == 1 %}
+            for (idx_t ox = 0; ox < {{ out_spatial[0] }}; ++ox) {
+                {{ c_type }} max_value = {{ min_literal }};
+{% if indices %}
+                {{ indices_c_type }} max_index = 0;
+{% endif %}
+                for (idx_t kx = 0; kx < {{ kernel_shape[0] }}; ++kx) {
+                    const idx_t ix = ox * {{ strides[0] }} + kx * {{ dilations[0] }} - {{ pads[0] }};
+                    if (ix >= 0 && ix < {{ in_spatial[0] }}) {
+                        {{ c_type }} val = {{ input0 }}[n][c][ix];
+{% if indices %}
+                        const {{ c_type }} prev_max = max_value;
+                        max_value = {{ max_fn }}(max_value, val);
+                        max_index = (val > prev_max)
+                            ? ({{ indices_c_type }})(((({{ indices_c_type }})n * {{ channels }} + ({{ indices_c_type }})c) * {{ in_spatial[0] }}) + ({{ indices_c_type }})ix)
+                            : max_index;
+{% else %}
+                        max_value = {{ max_fn }}(max_value, val);
+{% endif %}
+                    }
+                }
+                {{ output }}[n][c][ox] = max_value;
+{% if indices %}
+                {{ indices }}[n][c][ox] = max_index;
+{% endif %}
+            }
+{% elif spatial_rank == 2 %}
+            for (idx_t oh = 0; oh < {{ out_spatial[0] }}; ++oh) {
+                for (idx_t ow = 0; ow < {{ out_spatial[1] }}; ++ow) {
+                    {{ c_type }} max_value = {{ min_literal }};
+{% if indices %}
+                    {{ indices_c_type }} max_index = 0;
+{% endif %}
+                    for (idx_t kh = 0; kh < {{ kernel_shape[0] }}; ++kh) {
+                        const idx_t ih = oh * {{ strides[0] }} + kh * {{ dilations[0] }} - {{ pads[0] }};
+                        if (ih >= 0 && ih < {{ in_spatial[0] }}) {
+                            for (idx_t kw = 0; kw < {{ kernel_shape[1] }}; ++kw) {
+                                const idx_t iw = ow * {{ strides[1] }} + kw * {{ dilations[1] }} - {{ pads[1] }};
+                                if (iw >= 0 && iw < {{ in_spatial[1] }}) {
+                                    {{ c_type }} val = {{ input0 }}[n][c][ih][iw];
+{% if indices %}
+                                    const {{ c_type }} prev_max = max_value;
+                                    max_value = {{ max_fn }}(max_value, val);
+                                    max_index = (val > prev_max)
+                                        ? (
+{% if storage_order == 0 %}
+                                            ({{ indices_c_type }})((((({{ indices_c_type }})n * {{ channels }} + ({{ indices_c_type }})c) * {{ in_spatial[0] }} + ({{ indices_c_type }})ih) * {{ in_spatial[1] }}) + ({{ indices_c_type }})iw)
+{% else %}
+                                            ({{ indices_c_type }})(((({{ indices_c_type }})n * {{ channels }} + ({{ indices_c_type }})c) * {{ in_spatial[0] }} * {{ in_spatial[1] }}) + ({{ indices_c_type }})ih + ({{ indices_c_type }})iw * {{ in_spatial[0] }})
+{% endif %}
+                                        )
+                                        : max_index;
+{% else %}
+                                    max_value = {{ max_fn }}(max_value, val);
+{% endif %}
+                                }
+                            }
+                        }
+                    }
+                    {{ output }}[n][c][oh][ow] = max_value;
+{% if indices %}
+                    {{ indices }}[n][c][oh][ow] = max_index;
+{% endif %}
+                }
+            }
+{% elif spatial_rank == 3 %}
+            for (idx_t od = 0; od < {{ out_spatial[0] }}; ++od) {
+                for (idx_t oh = 0; oh < {{ out_spatial[1] }}; ++oh) {
+                    for (idx_t ow = 0; ow < {{ out_spatial[2] }}; ++ow) {
+                        {{ c_type }} max_value = {{ min_literal }};
+{% if indices %}
+                        {{ indices_c_type }} max_index = 0;
+{% endif %}
+                        for (idx_t kd = 0; kd < {{ kernel_shape[0] }}; ++kd) {
+                            const idx_t id = od * {{ strides[0] }} + kd * {{ dilations[0] }} - {{ pads[0] }};
+                            if (id >= 0 && id < {{ in_spatial[0] }}) {
+                                for (idx_t kh = 0; kh < {{ kernel_shape[1] }}; ++kh) {
+                                    const idx_t ih = oh * {{ strides[1] }} + kh * {{ dilations[1] }} - {{ pads[1] }};
+                                    if (ih >= 0 && ih < {{ in_spatial[1] }}) {
+                                        for (idx_t kw = 0; kw < {{ kernel_shape[2] }}; ++kw) {
+                                            const idx_t iw = ow * {{ strides[2] }} + kw * {{ dilations[2] }} - {{ pads[2] }};
+                                            if (iw >= 0 && iw < {{ in_spatial[2] }}) {
+                                                {{ c_type }} val = {{ input0 }}[n][c][id][ih][iw];
+{% if indices %}
+                                                const {{ c_type }} prev_max = max_value;
+                                                max_value = {{ max_fn }}(max_value, val);
+                                                max_index = (val > prev_max)
+                                                    ? (
+{% if storage_order == 0 %}
+                                                        ({{ indices_c_type }})(((((({{ indices_c_type }})n * {{ channels }} + ({{ indices_c_type }})c) * {{ in_spatial[0] }} + ({{ indices_c_type }})id) * {{ in_spatial[1] }} + ({{ indices_c_type }})ih) * {{ in_spatial[2] }}) + ({{ indices_c_type }})iw)
+{% else %}
+                                                        ({{ indices_c_type }})(((({{ indices_c_type }})n * {{ channels }} + ({{ indices_c_type }})c) * {{ in_spatial[0] }} * {{ in_spatial[1] }} * {{ in_spatial[2] }}) + ({{ indices_c_type }})id + ({{ indices_c_type }})ih * {{ in_spatial[0] }} + ({{ indices_c_type }})iw * {{ in_spatial[0] }} * {{ in_spatial[1] }})
+{% endif %}
+                                                    )
+                                                    : max_index;
+{% else %}
+                                                max_value = {{ max_fn }}(max_value, val);
+{% endif %}
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                        {{ output }}[n][c][od][oh][ow] = max_value;
+{% if indices %}
+                        {{ indices }}[n][c][od][oh][ow] = max_index;
+{% endif %}
+                    }
+                }
+            }
+{% endif %}
+        }
+    }
+}

emx_onnx_cgen/templates/mean_variance_normalization_op.c.j2 ADDED Viewed

@@ -0,0 +1,34 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+{% for axis in non_axes %}
+for (idx_t {{ loop_vars[axis] }} = 0; {{ loop_vars[axis] }} < {{ shape[axis] }}; ++{{ loop_vars[axis] }}) {
+{% endfor %}
+    {{ c_type }} sum = {{ zero_literal }};
+{% for axis in axes %}
+    for (idx_t {{ loop_vars[axis] }} = 0; {{ loop_vars[axis] }} < {{ shape[axis] }}; ++{{ loop_vars[axis] }}) {
+{% endfor %}
+        sum += {{ input0 }}{% for var in loop_vars %}[{{ var }}]{% endfor %};
+{% for _ in axes %}
+    }
+{% endfor %}
+    {{ c_type }} mean = sum / {{ reduce_count }};
+    {{ c_type }} var = {{ zero_literal }};
+{% for axis in axes %}
+    for (idx_t {{ loop_vars[axis] }} = 0; {{ loop_vars[axis] }} < {{ shape[axis] }}; ++{{ loop_vars[axis] }}) {
+{% endfor %}
+        {{ c_type }} diff = {{ input0 }}{% for var in loop_vars %}[{{ var }}]{% endfor %} - mean;
+        var += diff * diff;
+{% for _ in axes %}
+    }
+{% endfor %}
+    {{ c_type }} denom = {{ sqrt_fn }}(var / {{ reduce_count }} + {{ epsilon_literal }});
+{% for axis in axes %}
+    for (idx_t {{ loop_vars[axis] }} = 0; {{ loop_vars[axis] }} < {{ shape[axis] }}; ++{{ loop_vars[axis] }}) {
+{% endfor %}
+        {{ output }}{% for var in loop_vars %}[{{ var }}]{% endfor %} = ({{ input0 }}{% for var in loop_vars %}[{{ var }}]{% endfor %} - mean) / denom;
+{% for _ in axes %}
+    }
+{% endfor %}
+{% for _ in non_axes %}
+}
+{% endfor %}
+}

emx_onnx_cgen/templates/multi_input_op.c.j2 ADDED Viewed

@@ -0,0 +1,15 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+{% for dim in shape %}
+for (idx_t {{ loop_vars[loop.index0] }} = 0; {{ loop_vars[loop.index0] }} < {{ dim }}; ++{{ loop_vars[loop.index0] }}) {
+{% endfor %}
+{{ output_expr }} = {{ input_exprs[0] }};
+{% for expr in input_exprs[1:] %}
+{{ output_expr }} = {% if operator_kind == "func" %}{{ operator }}({{ output_expr }}, {{ expr }}){% elif operator_kind == "expr" %}{{ operator_expr }}{% else %}{{ output_expr }} {{ operator }} {{ expr }}{% endif %};
+{% endfor %}
+{% if is_mean %}
+{{ output_expr }} = {{ output_expr }} / {{ mean_scale }};
+{% endif %}
+{% for _ in shape %}
+}
+{% endfor %}
+}

emx_onnx_cgen/templates/negative_log_likelihood_loss_op.c.j2 ADDED Viewed

@@ -0,0 +1,54 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+    const {{ c_type }} *input_flat = (const {{ c_type }} *){{ input0 }};
+    const {{ target_c_type }} *target_flat = (const {{ target_c_type }} *){{ target }};
+    {{ c_type }} *output_flat = ({{ c_type }} *){{ output }};
+    const idx_t n = {{ n }};
+    const idx_t c = {{ c }};
+    const idx_t d = {{ d }};
+{% if reduction != "none" %}
+    {{ acc_type }} loss_sum = {{ acc_zero_literal }};
+{% if reduction == "mean" %}
+    {{ acc_type }} weight_sum = {{ acc_zero_literal }};
+{% endif %}
+{% endif %}
+    for (idx_t n_idx = 0; n_idx < n; ++n_idx) {
+        for (idx_t d_idx = 0; d_idx < d; ++d_idx) {
+            idx_t target_index = n_idx * d + d_idx;
+            {{ target_c_type }} target_value = target_flat[target_index];
+            if ((int64_t)target_value == {{ ignore_index }}) {
+{% if reduction == "none" %}
+                output_flat[target_index] = {{ zero_literal }};
+{% endif %}
+            } else {
+                idx_t class_index = (idx_t)target_value;
+                idx_t input_index = (n_idx * c + class_index) * d + d_idx;
+                {{ acc_type }} value = -({{ acc_type }})input_flat[input_index];
+{% if weight %}
+                {{ acc_type }} sample_weight = {{ weight }}[class_index];
+                value *= sample_weight;
+{% endif %}
+{% if reduction == "none" %}
+                output_flat[target_index] = value;
+{% else %}
+                loss_sum += value;
+{% if reduction == "mean" %}
+{% if weight %}
+                weight_sum += sample_weight;
+{% else %}
+                weight_sum += {{ acc_one_literal }};
+{% endif %}
+{% endif %}
+{% endif %}
+            }
+        }
+    }
+{% if reduction == "mean" %}
+    if (weight_sum == {{ acc_zero_literal }}) {
+        output_flat[0] = {{ zero_literal }};
+    } else {
+        output_flat[0] = loss_sum / weight_sum;
+    }
+{% elif reduction == "sum" %}
+    output_flat[0] = loss_sum;
+{% endif %}
+}

emx-onnx-cgen 0.3.7__py3-none-any.whl → 0.4.1.dev0__py3-none-any.whl

emx-onnx-cgen 0.3.7py3-none-any.whl → 0.4.1.dev0py3-none-any.whl