PyPI - emx-onnx-cgen - Versions diffs - 0.3.7__py3-none-any.whl → 0.4.1.dev0__py3-none-any.whl - Mend

emx-onnx-cgen 0.3.7py3-none-any.whl → 0.4.1.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (137) hide show

emx_onnx_cgen/_build_info.py +1 -1
emx_onnx_cgen/_version.py +2 -2
emx_onnx_cgen/cli.py +1025 -162
emx_onnx_cgen/codegen/__init__.py +2 -0
emx_onnx_cgen/codegen/c_emitter.py +2081 -458
emx_onnx_cgen/compiler.py +157 -75
emx_onnx_cgen/determinism.py +39 -0
emx_onnx_cgen/ir/context.py +25 -15
emx_onnx_cgen/ir/model.py +1 -0
emx_onnx_cgen/ir/op_base.py +32 -7
emx_onnx_cgen/ir/ops/__init__.py +20 -0
emx_onnx_cgen/ir/ops/elementwise.py +138 -22
emx_onnx_cgen/ir/ops/misc.py +95 -0
emx_onnx_cgen/ir/ops/nn.py +361 -38
emx_onnx_cgen/ir/ops/reduce.py +1 -16
emx_onnx_cgen/lowering/__init__.py +9 -0
emx_onnx_cgen/lowering/arg_reduce.py +0 -4
emx_onnx_cgen/lowering/average_pool.py +157 -27
emx_onnx_cgen/lowering/bernoulli.py +73 -0
emx_onnx_cgen/lowering/common.py +48 -0
emx_onnx_cgen/lowering/concat.py +41 -7
emx_onnx_cgen/lowering/conv.py +19 -8
emx_onnx_cgen/lowering/conv_integer.py +103 -0
emx_onnx_cgen/lowering/dequantize_linear.py +128 -0
emx_onnx_cgen/lowering/elementwise.py +140 -43
emx_onnx_cgen/lowering/gather.py +11 -2
emx_onnx_cgen/lowering/gemm.py +7 -124
emx_onnx_cgen/lowering/global_max_pool.py +0 -5
emx_onnx_cgen/lowering/gru.py +323 -0
emx_onnx_cgen/lowering/hamming_window.py +104 -0
emx_onnx_cgen/lowering/hardmax.py +1 -37
emx_onnx_cgen/lowering/identity.py +7 -6
emx_onnx_cgen/lowering/logsoftmax.py +1 -35
emx_onnx_cgen/lowering/lp_pool.py +15 -4
emx_onnx_cgen/lowering/matmul.py +3 -105
emx_onnx_cgen/lowering/optional_has_element.py +28 -0
emx_onnx_cgen/lowering/qlinear_mul.py +116 -0
emx_onnx_cgen/lowering/reduce.py +0 -5
emx_onnx_cgen/lowering/reshape.py +7 -16
emx_onnx_cgen/lowering/shape.py +14 -8
emx_onnx_cgen/lowering/slice.py +14 -4
emx_onnx_cgen/lowering/softmax.py +1 -35
emx_onnx_cgen/lowering/split.py +37 -3
emx_onnx_cgen/lowering/tfidf_vectorizer.py +199 -0
emx_onnx_cgen/lowering/tile.py +38 -1
emx_onnx_cgen/lowering/topk.py +1 -5
emx_onnx_cgen/lowering/transpose.py +9 -3
emx_onnx_cgen/lowering/unsqueeze.py +11 -16
emx_onnx_cgen/lowering/upsample.py +151 -0
emx_onnx_cgen/lowering/variadic.py +1 -1
emx_onnx_cgen/lowering/where.py +0 -5
emx_onnx_cgen/onnx_import.py +578 -14
emx_onnx_cgen/ops.py +3 -0
emx_onnx_cgen/templates/adagrad_op.c.j2 +16 -0
emx_onnx_cgen/templates/arg_reduce_op.c.j2 +18 -0
emx_onnx_cgen/templates/attention_op.c.j2 +189 -0
emx_onnx_cgen/templates/average_pool_op.c.j2 +126 -0
emx_onnx_cgen/templates/batch_norm_op.c.j2 +11 -0
emx_onnx_cgen/templates/bernoulli_op.c.j2 +34 -0
emx_onnx_cgen/templates/binary_op.c.j2 +9 -0
emx_onnx_cgen/templates/cast_op.c.j2 +9 -0
emx_onnx_cgen/templates/clip_op.c.j2 +14 -0
emx_onnx_cgen/templates/concat_op.c.j2 +28 -0
emx_onnx_cgen/templates/constant_of_shape_op.c.j2 +10 -0
emx_onnx_cgen/templates/conv_integer_op.c.j2 +34 -0
emx_onnx_cgen/templates/conv_op.c.j2 +32 -0
emx_onnx_cgen/templates/conv_transpose_op.c.j2 +43 -0
emx_onnx_cgen/templates/cumsum_op.c.j2 +51 -0
emx_onnx_cgen/templates/depth_to_space_op.c.j2 +26 -0
emx_onnx_cgen/templates/dequantize_linear_op.c.j2 +10 -0
emx_onnx_cgen/templates/einsum_op.c.j2 +55 -0
emx_onnx_cgen/templates/expand_op.c.j2 +14 -0
emx_onnx_cgen/templates/eye_like_op.c.j2 +27 -0
emx_onnx_cgen/templates/gather_elements_op.c.j2 +13 -0
emx_onnx_cgen/templates/gather_nd_op.c.j2 +29 -0
emx_onnx_cgen/templates/gather_op.c.j2 +13 -0
emx_onnx_cgen/templates/gemm_op.c.j2 +35 -0
emx_onnx_cgen/templates/grid_sample_op.c.j2 +184 -0
emx_onnx_cgen/templates/group_normalization_op.c.j2 +46 -0
emx_onnx_cgen/templates/gru_op.c.j2 +152 -0
emx_onnx_cgen/templates/hamming_window_op.c.j2 +12 -0
emx_onnx_cgen/templates/hardmax_op.c.j2 +24 -0
emx_onnx_cgen/templates/identity_op.c.j2 +9 -0
emx_onnx_cgen/templates/instance_normalization_op.c.j2 +35 -0
emx_onnx_cgen/templates/layer_normalization_op.c.j2 +65 -0
emx_onnx_cgen/templates/logsoftmax_op.c.j2 +27 -0
emx_onnx_cgen/templates/lp_normalization_op.c.j2 +27 -0
emx_onnx_cgen/templates/lp_pool_op.c.j2 +24 -0
emx_onnx_cgen/templates/lrn_op.c.j2 +20 -0
emx_onnx_cgen/templates/lstm_op.c.j2 +175 -0
emx_onnx_cgen/templates/matmul_op.c.j2 +13 -0
emx_onnx_cgen/templates/maxpool_op.c.j2 +118 -0
emx_onnx_cgen/templates/mean_variance_normalization_op.c.j2 +34 -0
emx_onnx_cgen/templates/multi_input_op.c.j2 +15 -0
emx_onnx_cgen/templates/negative_log_likelihood_loss_op.c.j2 +54 -0
emx_onnx_cgen/templates/nonmax_suppression_op.c.j2 +179 -0
emx_onnx_cgen/templates/nonzero_op.c.j2 +15 -0
emx_onnx_cgen/templates/one_hot_op.c.j2 +25 -0
emx_onnx_cgen/templates/optional_has_element_op.c.j2 +4 -0
emx_onnx_cgen/templates/pad_op.c.j2 +80 -0
emx_onnx_cgen/templates/qlinear_matmul_op.c.j2 +33 -0
emx_onnx_cgen/templates/qlinear_mul_op.c.j2 +18 -0
emx_onnx_cgen/templates/quantize_linear_op.c.j2 +13 -0
emx_onnx_cgen/templates/range_op.c.j2 +8 -0
emx_onnx_cgen/templates/reduce_op.c.j2 +28 -0
emx_onnx_cgen/templates/reduce_op_dynamic.c.j2 +77 -0
emx_onnx_cgen/templates/reshape_op.c.j2 +18 -0
emx_onnx_cgen/templates/resize_op.c.j2 +277 -0
emx_onnx_cgen/templates/rms_normalization_op.c.j2 +28 -0
emx_onnx_cgen/templates/rotary_embedding_op.c.j2 +66 -0
emx_onnx_cgen/templates/scatter_nd_op.c.j2 +52 -0
emx_onnx_cgen/templates/shape_op.c.j2 +6 -0
emx_onnx_cgen/templates/size_op.c.j2 +4 -0
emx_onnx_cgen/templates/slice_op.c.j2 +9 -0
emx_onnx_cgen/templates/slice_op_dynamic.c.j2 +70 -0
emx_onnx_cgen/templates/softmax_cross_entropy_loss_op.c.j2 +105 -0
emx_onnx_cgen/templates/softmax_op.c.j2 +26 -0
emx_onnx_cgen/templates/space_to_depth_op.c.j2 +22 -0
emx_onnx_cgen/templates/split_op.c.j2 +18 -0
emx_onnx_cgen/templates/tensor_scatter_op.c.j2 +44 -0
emx_onnx_cgen/templates/testbench.c.j2 +161 -0
emx_onnx_cgen/templates/tfidf_vectorizer_op.c.j2 +144 -0
emx_onnx_cgen/templates/tile_op.c.j2 +14 -0
emx_onnx_cgen/templates/topk_op.c.j2 +50 -0
emx_onnx_cgen/templates/transpose_op.c.j2 +9 -0
emx_onnx_cgen/templates/trilu_op.c.j2 +33 -0
emx_onnx_cgen/templates/unary_op.c.j2 +23 -0
emx_onnx_cgen/templates/where_op.c.j2 +9 -0
emx_onnx_cgen/verification.py +45 -5
{emx_onnx_cgen-0.3.7.dist-info → emx_onnx_cgen-0.4.1.dev0.dist-info}/METADATA +33 -15
emx_onnx_cgen-0.4.1.dev0.dist-info/RECORD +190 -0
{emx_onnx_cgen-0.3.7.dist-info → emx_onnx_cgen-0.4.1.dev0.dist-info}/WHEEL +1 -1
emx_onnx_cgen/runtime/__init__.py +0 -1
emx_onnx_cgen/runtime/evaluator.py +0 -2955
emx_onnx_cgen-0.3.7.dist-info/RECORD +0 -107
{emx_onnx_cgen-0.3.7.dist-info → emx_onnx_cgen-0.4.1.dev0.dist-info}/entry_points.txt +0 -0
{emx_onnx_cgen-0.3.7.dist-info → emx_onnx_cgen-0.4.1.dev0.dist-info}/top_level.txt +0 -0

emx_onnx_cgen/templates/nonmax_suppression_op.c.j2 ADDED Viewed

@@ -0,0 +1,179 @@
+static inline int {{ op_name }}_suppress_by_iou(
+    const {{ input_c_type }} boxes[][4],
+    idx_t box_index1,
+    idx_t box_index2,
+    int center_point_box,
+    {{ compute_type }} iou_threshold
+) {
+    {{ compute_type }} box1_0 = ({{ compute_type }})boxes[box_index1][0];
+    {{ compute_type }} box1_1 = ({{ compute_type }})boxes[box_index1][1];
+    {{ compute_type }} box1_2 = ({{ compute_type }})boxes[box_index1][2];
+    {{ compute_type }} box1_3 = ({{ compute_type }})boxes[box_index1][3];
+    {{ compute_type }} box2_0 = ({{ compute_type }})boxes[box_index2][0];
+    {{ compute_type }} box2_1 = ({{ compute_type }})boxes[box_index2][1];
+    {{ compute_type }} box2_2 = ({{ compute_type }})boxes[box_index2][2];
+    {{ compute_type }} box2_3 = ({{ compute_type }})boxes[box_index2][3];
+    {{ compute_type }} x1_min;
+    {{ compute_type }} x1_max;
+    {{ compute_type }} x2_min;
+    {{ compute_type }} x2_max;
+    {{ compute_type }} y1_min;
+    {{ compute_type }} y1_max;
+    {{ compute_type }} y2_min;
+    {{ compute_type }} y2_max;
+    if (center_point_box == 0) {
+        x1_min = {{ min_fn }}(box1_1, box1_3);
+        x1_max = {{ max_fn }}(box1_1, box1_3);
+        x2_min = {{ min_fn }}(box2_1, box2_3);
+        x2_max = {{ max_fn }}(box2_1, box2_3);
+        y1_min = {{ min_fn }}(box1_0, box1_2);
+        y1_max = {{ max_fn }}(box1_0, box1_2);
+        y2_min = {{ min_fn }}(box2_0, box2_2);
+        y2_max = {{ max_fn }}(box2_0, box2_2);
+    } else {
+        {{ compute_type }} box1_width_half = box1_2 / ({{ compute_type }})2;
+        {{ compute_type }} box1_height_half = box1_3 / ({{ compute_type }})2;
+        {{ compute_type }} box2_width_half = box2_2 / ({{ compute_type }})2;
+        {{ compute_type }} box2_height_half = box2_3 / ({{ compute_type }})2;
+        x1_min = box1_0 - box1_width_half;
+        x1_max = box1_0 + box1_width_half;
+        x2_min = box2_0 - box2_width_half;
+        x2_max = box2_0 + box2_width_half;
+        y1_min = box1_1 - box1_height_half;
+        y1_max = box1_1 + box1_height_half;
+        y2_min = box2_1 - box2_height_half;
+        y2_max = box2_1 + box2_height_half;
+    }
+    {{ compute_type }} intersection_x_min = {{ max_fn }}(x1_min, x2_min);
+    {{ compute_type }} intersection_x_max = {{ min_fn }}(x1_max, x2_max);
+    if (intersection_x_max <= intersection_x_min) {
+        return 0;
+    }
+    {{ compute_type }} intersection_y_min = {{ max_fn }}(y1_min, y2_min);
+    {{ compute_type }} intersection_y_max = {{ min_fn }}(y1_max, y2_max);
+    if (intersection_y_max <= intersection_y_min) {
+        return 0;
+    }
+    {{ compute_type }} intersection_area =
+        (intersection_x_max - intersection_x_min)
+        * (intersection_y_max - intersection_y_min);
+    if (intersection_area <= ({{ compute_type }})0) {
+        return 0;
+    }
+    {{ compute_type }} area1 = (x1_max - x1_min) * (y1_max - y1_min);
+    {{ compute_type }} area2 = (x2_max - x2_min) * (y2_max - y2_min);
+    {{ compute_type }} union_area = area1 + area2 - intersection_area;
+    if (area1 <= ({{ compute_type }})0
+        || area2 <= ({{ compute_type }})0
+        || union_area <= ({{ compute_type }})0) {
+        return 0;
+    }
+    {{ compute_type }} intersection_over_union = intersection_area / union_area;
+    return intersection_over_union > iou_threshold;
+}
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+    idx_t output_index = 0;
+    const idx_t output_capacity = {{ output_capacity }};
+    int64_t max_output_boxes_per_class_value = 0;
+    {{ compute_type }} iou_threshold_value = {{ iou_threshold_default }};
+    {{ compute_type }} score_threshold_value = {{ score_threshold_default }};
+{% if max_output_boxes_per_class %}
+    max_output_boxes_per_class_value =
+        (int64_t){{ max_output_boxes_per_class }}[0];
+    if (max_output_boxes_per_class_value < 0) {
+        max_output_boxes_per_class_value = 0;
+    }
+{% endif %}
+{% if iou_threshold %}
+    iou_threshold_value = ({{ compute_type }}){{ iou_threshold }}[0];
+{% endif %}
+{% if score_threshold %}
+    score_threshold_value = ({{ compute_type }}){{ score_threshold }}[0];
+{% endif %}
+    if (output_capacity == 0 || max_output_boxes_per_class_value == 0) {
+        for (idx_t idx = 0; idx < output_capacity; ++idx) {
+            {{ output }}[idx][0] = 0;
+            {{ output }}[idx][1] = 0;
+            {{ output }}[idx][2] = 0;
+        }
+        return;
+    }
+    for (idx_t batch_index = 0; batch_index < {{ num_batches }}; ++batch_index) {
+        for (idx_t class_index = 0; class_index < {{ num_classes }}; ++class_index) {
+            {{ compute_type }} candidate_scores[{{ num_boxes }}];
+            idx_t candidate_indices[{{ num_boxes }}];
+            idx_t selected_indices[{{ num_boxes }}];
+            idx_t candidate_count = 0;
+            idx_t selected_count = 0;
+            for (idx_t box_index = 0; box_index < {{ num_boxes }}; ++box_index) {
+                {{ compute_type }} score = ({{ compute_type }})
+                    {{ scores }}[batch_index][class_index][box_index];
+{% if score_threshold_enabled %}
+                if (score <= score_threshold_value) {
+                    continue;
+                }
+{% endif %}
+                candidate_scores[candidate_count] = score;
+                candidate_indices[candidate_count] = box_index;
+                ++candidate_count;
+            }
+            for (idx_t i = 1; i < candidate_count; ++i) {
+                {{ compute_type }} candidate_score = candidate_scores[i];
+                idx_t candidate_index = candidate_indices[i];
+                idx_t j = i;
+                while (j > 0
+                    && (candidate_scores[j - 1] > candidate_score
+                        || (candidate_scores[j - 1] == candidate_score
+                            && candidate_indices[j - 1] < candidate_index))) {
+                    candidate_scores[j] = candidate_scores[j - 1];
+                    candidate_indices[j] = candidate_indices[j - 1];
+                    --j;
+                }
+                candidate_scores[j] = candidate_score;
+                candidate_indices[j] = candidate_index;
+            }
+            for (idx_t candidate_pos = candidate_count; candidate_pos > 0; --candidate_pos) {
+                if (selected_count >= (idx_t)max_output_boxes_per_class_value) {
+                    break;
+                }
+                idx_t candidate_index = candidate_indices[candidate_pos - 1];
+                int selected = 1;
+                for (idx_t selected_idx = 0; selected_idx < selected_count; ++selected_idx) {
+                    if ({{ op_name }}_suppress_by_iou(
+                        {{ boxes }}[batch_index],
+                        candidate_index,
+                        selected_indices[selected_idx],
+                        {{ center_point_box }},
+                        iou_threshold_value)) {
+                        selected = 0;
+                        break;
+                    }
+                }
+                if (selected) {
+                    selected_indices[selected_count] = candidate_index;
+                    if (output_index >= output_capacity) {
+                        return;
+                    }
+                    {{ output }}[output_index][0] = ({{ output_c_type }})batch_index;
+                    {{ output }}[output_index][1] = ({{ output_c_type }})class_index;
+                    {{ output }}[output_index][2] = ({{ output_c_type }})candidate_index;
+                    ++output_index;
+                    ++selected_count;
+                }
+            }
+        }
+    }
+}

emx_onnx_cgen/templates/nonzero_op.c.j2 ADDED Viewed

@@ -0,0 +1,15 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+    idx_t out_index = 0;
+{% for dim in input_shape %}
+    for (idx_t {{ loop_vars[loop.index0] }} = 0; {{ loop_vars[loop.index0] }} < {{ dim }}; ++{{ loop_vars[loop.index0] }}) {
+{% endfor %}
+        if ({{ input_expr }} != {{ zero_literal }}) {
+{% for var in loop_vars %}
+            {{ output }}[{{ loop.index0 }}][out_index] = ({{ output_c_type }}){{ var }};
+{% endfor %}
+            ++out_index;
+        }
+{% for _ in input_shape %}
+    }
+{% endfor %}
+}

emx_onnx_cgen/templates/one_hot_op.c.j2 ADDED Viewed

@@ -0,0 +1,25 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+    (void){{ depth }};
+    const {{ c_type }} off_value = {{ values }}[0];
+    const {{ c_type }} on_value = {{ values }}[1];
+    const int64_t depth_value = (int64_t){{ depth_dim }};
+{% for dim in output_shape %}
+    for (idx_t {{ loop_vars[loop.index0] }} = 0; {{ loop_vars[loop.index0] }} < {{ dim }}; ++{{ loop_vars[loop.index0] }}) {
+{% endfor %}
+    int64_t index_value = (int64_t){{ indices }}{% for idx in indices_indices %}[{{ idx }}]{% endfor %};
+    int64_t adjusted = index_value;
+    if (index_value < 0) {
+        adjusted = index_value + depth_value;
+    }
+    {{ output }}{% for idx in loop_vars %}[{{ idx }}]{% endfor %} = off_value;
+    if (
+        index_value >= -depth_value
+        && index_value < depth_value
+        && (int64_t){{ axis_index }} == adjusted
+    ) {
+        {{ output }}{% for idx in loop_vars %}[{{ idx }}]{% endfor %} = on_value;
+    }
+{% for _ in output_shape %}
+    }
+{% endfor %}
+}

emx_onnx_cgen/templates/optional_has_element_op.c.j2 ADDED Viewed

@@ -0,0 +1,4 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+    (void){{ input0 }};
+    {{ output }}[0] = {{ input_present }};
+}

emx_onnx_cgen/templates/pad_op.c.j2 ADDED Viewed

@@ -0,0 +1,80 @@
+static inline void {{ op_name }}({{ dim_args }}const {{ c_type }} {{ input0 }}{{ input_suffix }}{% if pads_input %}, const {{ pads_c_type }} {{ pads_input }}{{ pads_suffix }}{% endif %}{% if axes_input %}, const {{ axes_c_type }} {{ axes_input }}{{ axes_suffix }}{% endif %}{% if value_input %}, const {{ c_type }} {{ value_input }}{{ value_suffix }}{% endif %}, {{ c_type }} {{ output }}{{ output_suffix }}) {
+const {{ c_type }} *{{ input0_flat }} = (const {{ c_type }} *){{ input0 }};
+{% if axes_input %}
+{% if pads_values %}
+const {{ pads_c_type }} pad_values[] = { {% for value in pads_values %}{{ value }}{{ ", " if not loop.last else "" }}{% endfor %} };
+{% endif %}
+idx_t pad_begin[{{ output_shape|length }}];
+for (idx_t pad_index = 0; pad_index < {{ output_shape|length }}; ++pad_index) {
+    pad_begin[pad_index] = 0;
+}
+for (idx_t axis_index = 0; axis_index < {{ axes_length }}; ++axis_index) {
+    idx_t axis = (idx_t){{ axes_input }}[axis_index];
+    if (axis < 0) {
+        axis += {{ output_shape|length }};
+    }
+    if (axis >= 0 && axis < {{ output_shape|length }}) {
+        pad_begin[axis] = {% if pads_input %}{{ pads_input }}[axis_index]{% else %}pad_values[axis_index]{% endif %};
+    }
+}
+{% endif %}
+{% for dim in output_shape %}
+for (idx_t {{ out_loop_vars[loop.index0] }} = 0; {{ out_loop_vars[loop.index0] }} < {{ dim }}; ++{{ out_loop_vars[loop.index0] }}) {
+{% endfor %}
+{{ output }}{% for var in out_loop_vars %}[{{ var }}]{% endfor %} = {{ pad_value_expr }};
+{% for _ in output_shape %}
+}
+{% endfor %}
+{% for dim in output_shape %}
+for (idx_t {{ out_loop_vars[loop.index0] }} = 0; {{ out_loop_vars[loop.index0] }} < {{ dim }}; ++{{ out_loop_vars[loop.index0] }}) {
+{% endfor %}
+idx_t {{ base_index }} = 0;
+int pad_in_bounds = 1;
+{% for index in range(output_shape|length) %}
+    idx_t {{ idx_vars[index] }} = {{ out_loop_vars[index] }} - (idx_t)({{ pad_begin_exprs[index] }});
+if ({{ input_shape[index] }} == 0) {
+    pad_in_bounds = 0;
+}
+{% if mode == "constant" %}
+if (pad_in_bounds && ({{ idx_vars[index] }} < 0 || {{ idx_vars[index] }} >= {{ input_shape[index] }})) {
+    pad_in_bounds = 0;
+}
+{% elif mode == "edge" %}
+if (pad_in_bounds && {{ idx_vars[index] }} < 0) {
+    {{ idx_vars[index] }} = 0;
+} else if (pad_in_bounds && {{ idx_vars[index] }} >= {{ input_shape[index] }}) {
+    {{ idx_vars[index] }} = {{ input_shape[index] }} - 1;
+}
+{% elif mode == "wrap" %}
+if (pad_in_bounds) {
+    {{ idx_vars[index] }} %= {{ input_shape[index] }};
+    if ({{ idx_vars[index] }} < 0) {
+        {{ idx_vars[index] }} += {{ input_shape[index] }};
+    }
+}
+{% elif mode == "reflect" %}
+if (pad_in_bounds && {{ input_shape[index] }} == 1) {
+    {{ idx_vars[index] }} = 0;
+} else if (pad_in_bounds) {
+    idx_t {{ reflect_vars[index] }} = {{ input_shape[index] }} - 1;
+    {{ idx_vars[index] }} %= (2 * {{ reflect_vars[index] }});
+    if ({{ idx_vars[index] }} < 0) {
+        {{ idx_vars[index] }} += 2 * {{ reflect_vars[index] }};
+    }
+    if ({{ idx_vars[index] }} > {{ reflect_vars[index] }}) {
+        {{ idx_vars[index] }} = 2 * {{ reflect_vars[index] }} - {{ idx_vars[index] }};
+    }
+}
+{% endif %}
+if (pad_in_bounds) {
+    {{ base_index }} += {{ idx_vars[index] }} * {{ input_strides[index] }};
+}
+{% endfor %}
+if (!pad_in_bounds) {
+} else {
+    {{ output }}{% for var in out_loop_vars %}[{{ var }}]{% endfor %} = {{ input0_flat }}[{{ base_index }}];
+}
+{% for _ in output_shape %}
+}
+{% endfor %}
+}

emx_onnx_cgen/templates/qlinear_matmul_op.c.j2 ADDED Viewed

@@ -0,0 +1,33 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+{% if scale_is_float16 %}
+    const {{ scale_type }} scale_product = ({{ scale_type }})((( {{ scale_type }}){{ input0_scale_expr }}) * (({{ scale_type }}){{ input1_scale_expr }}));
+    const {{ scale_type }} scale = ({{ scale_type }})(scale_product / ({{ scale_type }}){{ output_scale_expr }});
+{% else %}
+    const {{ scale_type }} scale = (({{ scale_type }}){{ input0_scale_expr }}) * (({{ scale_type }}){{ input1_scale_expr }}) / (({{ scale_type }}){{ output_scale_expr }});
+{% endif %}
+    const int32_t input0_zero = (int32_t){{ input0_zero_expr }};
+    const int32_t input1_zero = (int32_t){{ input1_zero_expr }};
+    const {{ compute_type }} output_zero = ({{ compute_type }}){{ output_zero_expr }};
+{% for idx in range(output_loop_vars | length) %}
+    {% for indent in range(loop.index0) %}    {% endfor %}for (idx_t {{ output_loop_vars[idx] }} = 0; {{ output_loop_vars[idx] }} < {{ output_loop_bounds[idx] }}; ++{{ output_loop_vars[idx] }}) {
+{% endfor %}
+        {% for indent in range(output_loop_vars | length) %}    {% endfor %}int32_t acc = 0;
+        {% for indent in range(output_loop_vars | length) %}    {% endfor %}for (idx_t k = 0; k < {{ k }}; ++k) {
+            {% for indent in range(output_loop_vars | length + 1) %}    {% endfor %}acc += ((int32_t){{ input0_index_expr }} - input0_zero) * ((int32_t){{ input1_index_expr }} - input1_zero);
+        {% for indent in range(output_loop_vars | length) %}    {% endfor %}}
+        {% for indent in range(output_loop_vars | length) %}    {% endfor %}{{ compute_type }} scaled = (({{ compute_type }})acc) * scale + output_zero;
+        {% for indent in range(output_loop_vars | length) %}    {% endfor %}{{ compute_type }} rounded = {{ round_fn }}(scaled);
+        {% for indent in range(output_loop_vars | length) %}    {% endfor %}{{ compute_type }} wrapped = {{ mod_fn }}(rounded, ({{ compute_type }})256.0);
+        {% for indent in range(output_loop_vars | length) %}    {% endfor %}if (wrapped < ({{ compute_type }})0.0) {
+            {% for indent in range(output_loop_vars | length + 1) %}    {% endfor %}wrapped += ({{ compute_type }})256.0;
+        {% for indent in range(output_loop_vars | length) %}    {% endfor %}}
+{% if output_is_signed %}
+        {% for indent in range(output_loop_vars | length) %}    {% endfor %}if (wrapped >= ({{ compute_type }})128.0) {
+            {% for indent in range(output_loop_vars | length + 1) %}    {% endfor %}wrapped -= ({{ compute_type }})256.0;
+        {% for indent in range(output_loop_vars | length) %}    {% endfor %}}
+{% endif %}
+        {% for indent in range(output_loop_vars | length) %}    {% endfor %}{{ output_index_expr }} = ({{ output_c_type }})wrapped;
+{% for idx in range(output_loop_vars | length) | reverse %}
+    {% for indent in range(loop.index0) %}    {% endfor %}}
+{% endfor %}
+}

emx_onnx_cgen/templates/qlinear_mul_op.c.j2 ADDED Viewed

@@ -0,0 +1,18 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+    const {{ compute_type }} scale = (({{ compute_type }}){{ input0_scale_expr }}) * (({{ compute_type }}){{ input1_scale_expr }}) / (({{ compute_type }}){{ output_scale_expr }});
+    const int32_t input0_zero = (int32_t){{ input0_zero_expr }};
+    const int32_t input1_zero = (int32_t){{ input1_zero_expr }};
+    const {{ compute_type }} output_zero = ({{ compute_type }}){{ output_zero_expr }};
+{% for idx in range(output_loop_vars | length) %}
+    {% for indent in range(loop.index0) %}    {% endfor %}for (idx_t {{ output_loop_vars[idx] }} = 0; {{ output_loop_vars[idx] }} < {{ output_loop_bounds[idx] }}; ++{{ output_loop_vars[idx] }}) {
+{% endfor %}
+        {% for indent in range(output_loop_vars | length) %}    {% endfor %}int32_t acc = ((int32_t){{ input0_index_expr }} - input0_zero) * ((int32_t){{ input1_index_expr }} - input1_zero);
+        {% for indent in range(output_loop_vars | length) %}    {% endfor %}{{ compute_type }} scaled = (({{ compute_type }})acc) * scale + output_zero;
+        {% for indent in range(output_loop_vars | length) %}    {% endfor %}{{ compute_type }} rounded = {{ round_fn }}(scaled);
+        {% for indent in range(output_loop_vars | length) %}    {% endfor %}rounded = {{ max_fn }}(rounded, ({{ compute_type }}){{ min_literal }});
+        {% for indent in range(output_loop_vars | length) %}    {% endfor %}rounded = {{ min_fn }}(rounded, ({{ compute_type }}){{ max_literal }});
+        {% for indent in range(output_loop_vars | length) %}    {% endfor %}{{ output_index_expr }} = ({{ output_c_type }})rounded;
+{% for idx in range(output_loop_vars | length) | reverse %}
+    {% for indent in range(loop.index0) %}    {% endfor %}}
+{% endfor %}
+}

emx_onnx_cgen/templates/quantize_linear_op.c.j2 ADDED Viewed

@@ -0,0 +1,13 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+{% for dim in shape %}
+for (idx_t {{ loop_vars[loop.index0] }} = 0; {{ loop_vars[loop.index0] }} < {{ dim }}; ++{{ loop_vars[loop.index0] }}) {
+{% endfor %}
+    {{ compute_type }} scaled = (({{ compute_type }}){{ input_expr }} / ({{ compute_type }}){{ scale_expr }});
+    {{ compute_type }} rounded = {{ round_fn }}(scaled) + ({{ compute_type }}){{ zero_expr }};
+    rounded = {{ max_fn }}(rounded, ({{ compute_type }}){{ min_literal }});
+    rounded = {{ min_fn }}(rounded, ({{ compute_type }}){{ max_literal }});
+    {{ output_expr }} = ({{ output_c_type }})rounded;
+{% for _ in shape %}
+}
+{% endfor %}
+}

emx_onnx_cgen/templates/range_op.c.j2 ADDED Viewed

@@ -0,0 +1,8 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+    (void){{ limit }};
+    const {{ c_type }} start_value = {{ start }}[0];
+    const {{ c_type }} delta_value = {{ delta }}[0];
+    for (idx_t idx = 0; idx < {{ length }}; ++idx) {
+        {{ output }}[idx] = start_value + (({{ c_type }})idx * delta_value);
+    }
+}

emx_onnx_cgen/templates/reduce_op.c.j2 ADDED Viewed

@@ -0,0 +1,28 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+{% for dim in output_shape %}
+for (idx_t {{ output_loop_vars[loop.index0] }} = 0; {{ output_loop_vars[loop.index0] }} < {{ dim }}; ++{{ output_loop_vars[loop.index0] }}) {
+{% endfor %}
+{{ c_type }} acc = {{ init_literal }};
+{% if use_kahan %}
+{{ c_type }} acc_comp = {{ zero_literal }};
+{% endif %}
+{% for dim in reduce_dims %}
+for (idx_t {{ reduce_loop_vars[loop.index0] }} = 0; {{ reduce_loop_vars[loop.index0] }} < {{ dim }}; ++{{ reduce_loop_vars[loop.index0] }}) {
+{% endfor %}
+{% if use_kahan %}
+{{ c_type }} kahan_value = {{ kahan_value_expr }};
+{{ c_type }} kahan_y = kahan_value - acc_comp;
+{{ c_type }} kahan_t = acc + kahan_y;
+acc_comp = (kahan_t - acc) - kahan_y;
+acc = kahan_t;
+{% else %}
+{{ update_expr }}
+{% endif %}
+{% for _ in reduce_dims %}
+}
+{% endfor %}
+{{ output }}{{ output_index_expr }} = {{ final_expr }};
+{% for _ in output_shape %}
+}
+{% endfor %}
+}

emx_onnx_cgen/templates/reduce_op_dynamic.c.j2 ADDED Viewed

@@ -0,0 +1,77 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+    idx_t axis_count = {{ axes_count }};
+    bool reduce_mask[{{ input_shape | length }}];
+    for (idx_t i = 0; i < {{ input_shape | length }}; ++i) {
+        reduce_mask[i] = false;
+    }
+    if (axis_count == 0) {
+{% if noop_with_empty_axes %}
+{% for dim in input_shape %}
+        for (idx_t {{ input_loop_vars[loop.index0] }} = 0; {{ input_loop_vars[loop.index0] }} < {{ dim }}; ++{{ input_loop_vars[loop.index0] }}) {
+{% endfor %}
+        {{ output }}{{ input_index_expr }} = {{ input0 }}{{ input_index_expr }};
+{% for _ in input_shape %}
+        }
+{% endfor %}
+        return;
+{% else %}
+        for (idx_t i = 0; i < {{ input_shape | length }}; ++i) {
+            reduce_mask[i] = true;
+        }
+{% endif %}
+    } else {
+        for (idx_t i = 0; i < axis_count; ++i) {
+            int axis = (int){{ axes_input }}[i];
+            if (axis < 0) {
+                axis += {{ input_shape | length }};
+            }
+            if (axis >= 0 && axis < {{ input_shape | length }}) {
+                reduce_mask[axis] = true;
+            }
+        }
+    }
+    idx_t reduce_count = 1;
+{% for dim in input_shape %}
+    if (reduce_mask[{{ loop.index0 }}]) {
+        reduce_count *= {{ dim }};
+    }
+{% endfor %}
+{% for dim in output_shape %}
+    for (idx_t {{ output_loop_vars[loop.index0] }} = 0; {{ output_loop_vars[loop.index0] }} < {{ dim }}; ++{{ output_loop_vars[loop.index0] }}) {
+{% endfor %}
+        {{ output }}{{ output_loop_index_expr }} = {{ init_literal }};
+{% for _ in output_shape %}
+    }
+{% endfor %}
+{% for dim in input_shape %}
+    for (idx_t {{ input_loop_vars[loop.index0] }} = 0; {{ input_loop_vars[loop.index0] }} < {{ dim }}; ++{{ input_loop_vars[loop.index0] }}) {
+{% endfor %}
+        idx_t out_indices[{{ output_shape | length }}];
+{% if keepdims %}
+{% for axis in range(input_shape | length) %}
+        out_indices[{{ axis }}] = reduce_mask[{{ axis }}] ? 0 : {{ input_loop_vars[axis] }};
+{% endfor %}
+{% else %}
+        idx_t out_pos = 0;
+{% for axis in range(input_shape | length) %}
+        if (!reduce_mask[{{ axis }}]) {
+            out_indices[out_pos++] = {{ input_loop_vars[axis] }};
+        }
+{% endfor %}
+{% endif %}
+        {{ c_type }} *out_ptr = &{{ output }}{{ output_index_expr }};
+        {{ update_expr }}
+{% for _ in input_shape %}
+    }
+{% endfor %}
+{% if post_expr %}
+{% for dim in output_shape %}
+    for (idx_t {{ output_loop_vars[loop.index0] }} = 0; {{ output_loop_vars[loop.index0] }} < {{ dim }}; ++{{ output_loop_vars[loop.index0] }}) {
+{% endfor %}
+        {{ c_type }} *out_ptr = &{{ output }}{{ output_loop_index_expr }};
+        {{ post_expr }}
+{% for _ in output_shape %}
+    }
+{% endfor %}
+{% endif %}
+}

emx_onnx_cgen/templates/reshape_op.c.j2 ADDED Viewed

@@ -0,0 +1,18 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+    const {{ c_type }} *input0_data = (const {{ c_type }} *){{ input0 }};
+{% for dim in output_shape %}
+    for (idx_t {{ loop_vars[loop.index0] }} = 0; {{ loop_vars[loop.index0] }} < {{ dim }}; ++{{ loop_vars[loop.index0] }}) {
+{% endfor %}
+{% if loop_vars %}
+{% set ns = namespace(expr=loop_vars[0]) %}
+{% for dim in output_shape[1:] %}
+{% set ns.expr = "(" ~ ns.expr ~ " * " ~ dim ~ " + " ~ loop_vars[loop.index] ~ ")" %}
+{% endfor %}
+    {{ output }}{% for var in loop_vars %}[{{ var }}]{% endfor %} = input0_data[{{ ns.expr }}];
+{% else %}
+    {{ output }} = input0_data[0];
+{% endif %}
+{% for _ in output_shape %}
+    }
+{% endfor %}
+}

emx-onnx-cgen 0.3.7__py3-none-any.whl → 0.4.1.dev0__py3-none-any.whl

emx-onnx-cgen 0.3.7py3-none-any.whl → 0.4.1.dev0py3-none-any.whl