PyPI - emx-onnx-cgen - Versions diffs - 0.3.7__py3-none-any.whl → 0.4.1.dev0__py3-none-any.whl - Mend

emx-onnx-cgen 0.3.7py3-none-any.whl → 0.4.1.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (137) hide show

emx_onnx_cgen/_build_info.py +1 -1
emx_onnx_cgen/_version.py +2 -2
emx_onnx_cgen/cli.py +1025 -162
emx_onnx_cgen/codegen/__init__.py +2 -0
emx_onnx_cgen/codegen/c_emitter.py +2081 -458
emx_onnx_cgen/compiler.py +157 -75
emx_onnx_cgen/determinism.py +39 -0
emx_onnx_cgen/ir/context.py +25 -15
emx_onnx_cgen/ir/model.py +1 -0
emx_onnx_cgen/ir/op_base.py +32 -7
emx_onnx_cgen/ir/ops/__init__.py +20 -0
emx_onnx_cgen/ir/ops/elementwise.py +138 -22
emx_onnx_cgen/ir/ops/misc.py +95 -0
emx_onnx_cgen/ir/ops/nn.py +361 -38
emx_onnx_cgen/ir/ops/reduce.py +1 -16
emx_onnx_cgen/lowering/__init__.py +9 -0
emx_onnx_cgen/lowering/arg_reduce.py +0 -4
emx_onnx_cgen/lowering/average_pool.py +157 -27
emx_onnx_cgen/lowering/bernoulli.py +73 -0
emx_onnx_cgen/lowering/common.py +48 -0
emx_onnx_cgen/lowering/concat.py +41 -7
emx_onnx_cgen/lowering/conv.py +19 -8
emx_onnx_cgen/lowering/conv_integer.py +103 -0
emx_onnx_cgen/lowering/dequantize_linear.py +128 -0
emx_onnx_cgen/lowering/elementwise.py +140 -43
emx_onnx_cgen/lowering/gather.py +11 -2
emx_onnx_cgen/lowering/gemm.py +7 -124
emx_onnx_cgen/lowering/global_max_pool.py +0 -5
emx_onnx_cgen/lowering/gru.py +323 -0
emx_onnx_cgen/lowering/hamming_window.py +104 -0
emx_onnx_cgen/lowering/hardmax.py +1 -37
emx_onnx_cgen/lowering/identity.py +7 -6
emx_onnx_cgen/lowering/logsoftmax.py +1 -35
emx_onnx_cgen/lowering/lp_pool.py +15 -4
emx_onnx_cgen/lowering/matmul.py +3 -105
emx_onnx_cgen/lowering/optional_has_element.py +28 -0
emx_onnx_cgen/lowering/qlinear_mul.py +116 -0
emx_onnx_cgen/lowering/reduce.py +0 -5
emx_onnx_cgen/lowering/reshape.py +7 -16
emx_onnx_cgen/lowering/shape.py +14 -8
emx_onnx_cgen/lowering/slice.py +14 -4
emx_onnx_cgen/lowering/softmax.py +1 -35
emx_onnx_cgen/lowering/split.py +37 -3
emx_onnx_cgen/lowering/tfidf_vectorizer.py +199 -0
emx_onnx_cgen/lowering/tile.py +38 -1
emx_onnx_cgen/lowering/topk.py +1 -5
emx_onnx_cgen/lowering/transpose.py +9 -3
emx_onnx_cgen/lowering/unsqueeze.py +11 -16
emx_onnx_cgen/lowering/upsample.py +151 -0
emx_onnx_cgen/lowering/variadic.py +1 -1
emx_onnx_cgen/lowering/where.py +0 -5
emx_onnx_cgen/onnx_import.py +578 -14
emx_onnx_cgen/ops.py +3 -0
emx_onnx_cgen/templates/adagrad_op.c.j2 +16 -0
emx_onnx_cgen/templates/arg_reduce_op.c.j2 +18 -0
emx_onnx_cgen/templates/attention_op.c.j2 +189 -0
emx_onnx_cgen/templates/average_pool_op.c.j2 +126 -0
emx_onnx_cgen/templates/batch_norm_op.c.j2 +11 -0
emx_onnx_cgen/templates/bernoulli_op.c.j2 +34 -0
emx_onnx_cgen/templates/binary_op.c.j2 +9 -0
emx_onnx_cgen/templates/cast_op.c.j2 +9 -0
emx_onnx_cgen/templates/clip_op.c.j2 +14 -0
emx_onnx_cgen/templates/concat_op.c.j2 +28 -0
emx_onnx_cgen/templates/constant_of_shape_op.c.j2 +10 -0
emx_onnx_cgen/templates/conv_integer_op.c.j2 +34 -0
emx_onnx_cgen/templates/conv_op.c.j2 +32 -0
emx_onnx_cgen/templates/conv_transpose_op.c.j2 +43 -0
emx_onnx_cgen/templates/cumsum_op.c.j2 +51 -0
emx_onnx_cgen/templates/depth_to_space_op.c.j2 +26 -0
emx_onnx_cgen/templates/dequantize_linear_op.c.j2 +10 -0
emx_onnx_cgen/templates/einsum_op.c.j2 +55 -0
emx_onnx_cgen/templates/expand_op.c.j2 +14 -0
emx_onnx_cgen/templates/eye_like_op.c.j2 +27 -0
emx_onnx_cgen/templates/gather_elements_op.c.j2 +13 -0
emx_onnx_cgen/templates/gather_nd_op.c.j2 +29 -0
emx_onnx_cgen/templates/gather_op.c.j2 +13 -0
emx_onnx_cgen/templates/gemm_op.c.j2 +35 -0
emx_onnx_cgen/templates/grid_sample_op.c.j2 +184 -0
emx_onnx_cgen/templates/group_normalization_op.c.j2 +46 -0
emx_onnx_cgen/templates/gru_op.c.j2 +152 -0
emx_onnx_cgen/templates/hamming_window_op.c.j2 +12 -0
emx_onnx_cgen/templates/hardmax_op.c.j2 +24 -0
emx_onnx_cgen/templates/identity_op.c.j2 +9 -0
emx_onnx_cgen/templates/instance_normalization_op.c.j2 +35 -0
emx_onnx_cgen/templates/layer_normalization_op.c.j2 +65 -0
emx_onnx_cgen/templates/logsoftmax_op.c.j2 +27 -0
emx_onnx_cgen/templates/lp_normalization_op.c.j2 +27 -0
emx_onnx_cgen/templates/lp_pool_op.c.j2 +24 -0
emx_onnx_cgen/templates/lrn_op.c.j2 +20 -0
emx_onnx_cgen/templates/lstm_op.c.j2 +175 -0
emx_onnx_cgen/templates/matmul_op.c.j2 +13 -0
emx_onnx_cgen/templates/maxpool_op.c.j2 +118 -0
emx_onnx_cgen/templates/mean_variance_normalization_op.c.j2 +34 -0
emx_onnx_cgen/templates/multi_input_op.c.j2 +15 -0
emx_onnx_cgen/templates/negative_log_likelihood_loss_op.c.j2 +54 -0
emx_onnx_cgen/templates/nonmax_suppression_op.c.j2 +179 -0
emx_onnx_cgen/templates/nonzero_op.c.j2 +15 -0
emx_onnx_cgen/templates/one_hot_op.c.j2 +25 -0
emx_onnx_cgen/templates/optional_has_element_op.c.j2 +4 -0
emx_onnx_cgen/templates/pad_op.c.j2 +80 -0
emx_onnx_cgen/templates/qlinear_matmul_op.c.j2 +33 -0
emx_onnx_cgen/templates/qlinear_mul_op.c.j2 +18 -0
emx_onnx_cgen/templates/quantize_linear_op.c.j2 +13 -0
emx_onnx_cgen/templates/range_op.c.j2 +8 -0
emx_onnx_cgen/templates/reduce_op.c.j2 +28 -0
emx_onnx_cgen/templates/reduce_op_dynamic.c.j2 +77 -0
emx_onnx_cgen/templates/reshape_op.c.j2 +18 -0
emx_onnx_cgen/templates/resize_op.c.j2 +277 -0
emx_onnx_cgen/templates/rms_normalization_op.c.j2 +28 -0
emx_onnx_cgen/templates/rotary_embedding_op.c.j2 +66 -0
emx_onnx_cgen/templates/scatter_nd_op.c.j2 +52 -0
emx_onnx_cgen/templates/shape_op.c.j2 +6 -0
emx_onnx_cgen/templates/size_op.c.j2 +4 -0
emx_onnx_cgen/templates/slice_op.c.j2 +9 -0
emx_onnx_cgen/templates/slice_op_dynamic.c.j2 +70 -0
emx_onnx_cgen/templates/softmax_cross_entropy_loss_op.c.j2 +105 -0
emx_onnx_cgen/templates/softmax_op.c.j2 +26 -0
emx_onnx_cgen/templates/space_to_depth_op.c.j2 +22 -0
emx_onnx_cgen/templates/split_op.c.j2 +18 -0
emx_onnx_cgen/templates/tensor_scatter_op.c.j2 +44 -0
emx_onnx_cgen/templates/testbench.c.j2 +161 -0
emx_onnx_cgen/templates/tfidf_vectorizer_op.c.j2 +144 -0
emx_onnx_cgen/templates/tile_op.c.j2 +14 -0
emx_onnx_cgen/templates/topk_op.c.j2 +50 -0
emx_onnx_cgen/templates/transpose_op.c.j2 +9 -0
emx_onnx_cgen/templates/trilu_op.c.j2 +33 -0
emx_onnx_cgen/templates/unary_op.c.j2 +23 -0
emx_onnx_cgen/templates/where_op.c.j2 +9 -0
emx_onnx_cgen/verification.py +45 -5
{emx_onnx_cgen-0.3.7.dist-info → emx_onnx_cgen-0.4.1.dev0.dist-info}/METADATA +33 -15
emx_onnx_cgen-0.4.1.dev0.dist-info/RECORD +190 -0
{emx_onnx_cgen-0.3.7.dist-info → emx_onnx_cgen-0.4.1.dev0.dist-info}/WHEEL +1 -1
emx_onnx_cgen/runtime/__init__.py +0 -1
emx_onnx_cgen/runtime/evaluator.py +0 -2955
emx_onnx_cgen-0.3.7.dist-info/RECORD +0 -107
{emx_onnx_cgen-0.3.7.dist-info → emx_onnx_cgen-0.4.1.dev0.dist-info}/entry_points.txt +0 -0
{emx_onnx_cgen-0.3.7.dist-info → emx_onnx_cgen-0.4.1.dev0.dist-info}/top_level.txt +0 -0

emx_onnx_cgen/templates/split_op.c.j2 ADDED Viewed

@@ -0,0 +1,18 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+    const {{ c_type }} *input_data = (const {{ c_type }} *){{ input0 }};
+    {{ c_type }} *output_ptrs[] = { {% for output in outputs %}({{ c_type }} *){{ output }}{% if not loop.last %}, {% endif %}{% endfor %} };
+    const idx_t axis_sizes[] = { {% for axis in axis_sizes %}{{ axis }}{% if not loop.last %}, {% endif %}{% endfor %} };
+    for (idx_t outer_idx = 0; outer_idx < {{ outer }}; ++outer_idx) {
+        idx_t input_base = outer_idx * {{ axis_total }} * {{ inner }};
+        idx_t axis_offset = 0;
+        for (idx_t output_idx = 0; output_idx < {{ output_count }}; ++output_idx) {
+            idx_t copy_elems = axis_sizes[output_idx] * {{ inner }};
+            memcpy(
+                output_ptrs[output_idx] + outer_idx * copy_elems,
+                input_data + input_base + axis_offset,
+                copy_elems * sizeof({{ c_type }})
+            );
+            axis_offset += copy_elems;
+        }
+    }
+}

emx_onnx_cgen/templates/tensor_scatter_op.c.j2 ADDED Viewed

@@ -0,0 +1,44 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+{% for dim in output_shape %}
+for (idx_t {{ output_loop_vars[loop.index0] }} = 0; {{ output_loop_vars[loop.index0] }} < {{ dim }}; ++{{ output_loop_vars[loop.index0] }}) {
+{% endfor %}
+    {{ output }}{% for var in output_loop_vars %}[{{ var }}]{% endfor %} = {{ past_cache }}{% for var in output_loop_vars %}[{{ var }}]{% endfor %};
+{% for _ in output_shape %}
+}
+{% endfor %}
+{% if prefix_shape %}
+{% for dim in prefix_shape %}
+for (idx_t {{ prefix_loop_vars[loop.index0] }} = 0; {{ prefix_loop_vars[loop.index0] }} < {{ dim }}; ++{{ prefix_loop_vars[loop.index0] }}) {
+{% endfor %}
+{% endif %}
+    idx_t {{ write_index_var }} = 0;
+{% if write_indices_present %}
+    {{ write_index_var }} = (idx_t){{ write_indices }}[{{ batch_index_var }}];
+{% endif %}
+    for (idx_t {{ sequence_loop_var }} = 0; {{ sequence_loop_var }} < {{ sequence_dim }}; ++{{ sequence_loop_var }}) {
+        idx_t {{ cache_index_var }} = {{ write_index_var }} + {{ sequence_loop_var }};
+{% if circular %}
+        {{ cache_index_var }} = {{ cache_index_var }} % {{ max_sequence_length }};
+        if ({{ cache_index_var }} < 0) {
+            {{ cache_index_var }} += {{ max_sequence_length }};
+        }
+{% endif %}
+{% if tail_shape %}
+{% for dim in tail_shape %}
+        for (idx_t {{ tail_loop_vars[loop.index0] }} = 0; {{ tail_loop_vars[loop.index0] }} < {{ dim }}; ++{{ tail_loop_vars[loop.index0] }}) {
+{% endfor %}
+{% endif %}
+        {{ output_index_expr }} = {{ update_index_expr }};
+{% if tail_shape %}
+{% for _ in tail_shape %}
+        }
+{% endfor %}
+{% endif %}
+    }
+{% if prefix_shape %}
+{% for _ in prefix_shape %}
+}
+{% endfor %}
+{% endif %}
+}

emx_onnx_cgen/templates/testbench.c.j2 ADDED Viewed

@@ -0,0 +1,161 @@
+{% if rng_requires_u64 %}
+static uint64_t rng_state = 0x243f6a8885a308d3ull;
+static uint64_t rng_next_u64(void) {
+    uint64_t x = rng_state;
+    x ^= x >> 12;
+    x ^= x << 25;
+    x ^= x >> 27;
+    rng_state = x;
+    return x * 0x2545f4914f6cdd1dull;
+}
+{% endif %}
+{% if rng_requires_float %}
+static float rng_next_float(void) {
+    return (float)((double)rng_next_u64() * (1.0 / 18446744073709551616.0));
+}
+{% endif %}
+{% if rng_requires_double %}
+static double rng_next_double(void) {
+    return (double)rng_next_u64() * (1.0 / 18446744073709551616.0);
+}
+{% endif %}
+{% if rng_requires_i64 %}
+static int64_t rng_next_i64(void) {
+    return (int64_t)rng_next_u64();
+}
+{% endif %}
+{% for input in inputs %}
+{% if input.constant_lines %}
+static const {{ input.c_type }} {{ input.constant_name }}[] = {
+{% for line in input.constant_lines %}
+    {{ line }}{% if not loop.last %},{% endif %}
+{% endfor %}
+};
+{% endif %}
+{% endfor %}
+int main(int argc, char **argv) {
+    FILE *input_file = NULL;
+    if (argc > 1) {
+        input_file = fopen(argv[1], "rb");
+        if (!input_file) {
+            fprintf(stderr, "Failed to open input file: %s\n", argv[1]);
+            return 1;
+        }
+    }
+{% for dim in dim_args %}
+    int {{ dim.name }} = {{ dim.value }};
+{% endfor %}
+{% for input in inputs %}
+{% if input.optional_flag_name %}
+    _Bool {{ input.optional_flag_name }} = {{ "true" if input.optional_present else "false" }};
+{% endif %}
+    {{ input.c_type }} {{ input.name }}{{ input.array_suffix }};
+{% if input.constant_name %}
+{% if input.rank == 0 %}
+    {{ input.name }} = {{ input.constant_name }}[0];
+{% else %}
+{% for depth in range(input.rank) %}
+    for (idx_t {{ input.loop_vars[depth] }} = 0; {{ input.loop_vars[depth] }} < {{ input.shape[depth] }}; ++{{ input.loop_vars[depth] }}) {
+{% endfor %}
+        {{ input.name }}{{ input.array_index_expr }} = {{ input.constant_name }}[{{ input.index_expr }}];
+{% for depth in range(input.rank - 1, -1, -1) %}
+    }
+{% endfor %}
+{% endif %}
+{% else %}
+    if (input_file) {
+{% for depth in range(input.rank) %}
+    for (idx_t {{ input.loop_vars[depth] }} = 0; {{ input.loop_vars[depth] }} < {{ input.shape[depth] }}; ++{{ input.loop_vars[depth] }}) {
+{% endfor %}
+        if (fread(&{{ input.name }}{{ input.array_index_expr }}, sizeof({{ input.c_type }}), 1, input_file) != 1) {
+            fprintf(stderr, "Failed to read input {{ input.json_name }}\n");
+            return 1;
+        }
+{% for depth in range(input.rank - 1, -1, -1) %}
+    }
+{% endfor %}
+    } else {
+{% for depth in range(input.rank) %}
+    for (idx_t {{ input.loop_vars[depth] }} = 0; {{ input.loop_vars[depth] }} < {{ input.shape[depth] }}; ++{{ input.loop_vars[depth] }}) {
+{% endfor %}
+        {{ input.name }}{{ input.array_index_expr }} = {{ input.random_expr }};
+{% for depth in range(input.rank - 1, -1, -1) %}
+    }
+{% endfor %}
+    }
+{% endif %}
+{% endfor %}
+    if (input_file) {
+        fclose(input_file);
+    }
+{% for output in outputs %}
+    {{ output.c_type }} {{ output.name }}{{ output.array_suffix }};
+{% endfor %}
+    if (!{{ model_name }}_load("{{ weight_data_filename }}")) {
+        return 1;
+    }
+    {{ model_name }}({% for dim in dim_args %}{{ dim.name }}, {% endfor %}{% for input in inputs %}{{ input.name }}{% if input.optional_flag_name %}, {{ input.optional_flag_name }}{% endif %}, {% endfor %}{% for output in outputs %}{{ output.name }}{% if not loop.last %}, {% endif %}{% endfor %});
+    printf("{\"inputs\":{");
+{% for input in inputs %}
+{% if not loop.first %}
+    printf(",");
+{% endif %}
+    printf("\"{{ input.json_name }}\":{\"shape\":[{{ input.shape_literal }}],\"data\":");
+    printf("[");
+{% for depth in range(input.rank) %}
+for (idx_t {{ input.loop_vars[depth] }} = 0; {{ input.loop_vars[depth] }} < {{ input.shape[depth] }}; ++{{ input.loop_vars[depth] }}) {
+    if ({{ input.loop_vars[depth] }}) {
+        printf(",");
+    }
+{% if depth < input.rank - 1 %}
+    printf("[");
+{% endif %}
+{% endfor %}
+printf("{{ input.print_format }}", {{ input.print_cast }}{{ input.name }}{{ input.array_index_expr }});
+{% for depth in range(input.rank - 1, -1, -1) %}
+{% if depth < input.rank - 1 %}
+    printf("]");
+{% endif %}
+}
+{% endfor %}
+    printf("]}");
+{% endfor %}
+    printf("},\"outputs\":{");
+{% for output in outputs %}
+{% if not loop.first %}
+    printf(",");
+{% endif %}
+    printf("\"{{ output.json_name }}\":{\"shape\":[{{ output.shape_literal }}],\"data\":");
+    printf("[");
+{% for depth in range(output.rank) %}
+for (idx_t {{ output.loop_vars[depth] }} = 0; {{ output.loop_vars[depth] }} < {{ output.shape[depth] }}; ++{{ output.loop_vars[depth] }}) {
+    if ({{ output.loop_vars[depth] }}) {
+        printf(",");
+    }
+{% if depth < output.rank - 1 %}
+    printf("[");
+{% endif %}
+{% endfor %}
+printf("{{ output.print_format }}", {{ output.print_cast }}{{ output.name }}{{ output.array_index_expr }});
+{% for depth in range(output.rank - 1, -1, -1) %}
+{% if depth < output.rank - 1 %}
+    printf("]");
+{% endif %}
+}
+{% endfor %}
+    printf("]}");
+{% endfor %}
+    printf("}}\n");
+    return 0;
+}

emx_onnx_cgen/templates/tfidf_vectorizer_op.c.j2 ADDED Viewed

@@ -0,0 +1,144 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+    const int64_t pool[{{ pool_size if pool_size > 0 else 1 }}] = { {% if pool_values %}{{ pool_values | join(', ') }}{% else %}0{% endif %} };
+    const int64_t ngram_counts[{{ ngram_counts_len if ngram_counts_len > 0 else 1 }}] = { {% if ngram_counts_values %}{{ ngram_counts_values | join(', ') }}{% else %}0{% endif %} };
+    const int64_t ngram_indexes[{{ ngram_index_len if ngram_index_len > 0 else 1 }}] = { {% if ngram_indexes_values %}{{ ngram_indexes_values | join(', ') }}{% else %}0{% endif %} };
+    {% if weights_values %}
+    const {{ c_type }} weights[{{ weights_values | length if weights_values | length > 0 else 1 }}] = { {% if weights_values %}{{ weights_values | join(', ') }}{% else %}{{ one_literal }}{% endif %} };
+    {% endif %}
+    const idx_t output_dim = {{ output_dim }};
+    const idx_t pool_size = {{ pool_size }};
+    const idx_t ngram_counts_len = {{ ngram_counts_len }};
+    const idx_t max_skip = {{ max_skip_count }};
+    const idx_t min_gram = {{ min_gram_length }};
+    const idx_t max_gram = {{ max_gram_length }};
+{% if input_rank == 1 %}
+    const idx_t seq_len = {{ input_shape[0] }};
+    for (idx_t o = 0; o < output_dim; ++o) {
+        {{ output }}[o] = {{ zero_literal }};
+    }
+    idx_t ngram_index_offset = 0;
+    for (idx_t gram_len = 1; gram_len < min_gram; ++gram_len) {
+        const idx_t count_start = (idx_t)ngram_counts[gram_len - 1];
+        const idx_t count_end =
+            gram_len < ngram_counts_len ? (idx_t)ngram_counts[gram_len] : pool_size;
+        const idx_t num_ngrams = (count_end - count_start) / gram_len;
+        ngram_index_offset += num_ngrams;
+    }
+    for (idx_t gram_len = min_gram; gram_len <= max_gram; ++gram_len) {
+        const idx_t count_start = (idx_t)ngram_counts[gram_len - 1];
+        const idx_t count_end =
+            gram_len < ngram_counts_len ? (idx_t)ngram_counts[gram_len] : pool_size;
+        const idx_t num_ngrams = (count_end - count_start) / gram_len;
+        if (num_ngrams == 0) {
+            continue;
+        }
+        const idx_t skip_limit = gram_len == 1 ? 0 : max_skip;
+        for (idx_t skip = 0; skip <= skip_limit; ++skip) {
+            const idx_t stride = skip + 1;
+            if (seq_len < (gram_len - 1) * stride + 1) {
+                continue;
+            }
+            const idx_t max_start = seq_len - (gram_len - 1) * stride;
+            for (idx_t start = 0; start < max_start; ++start) {
+                for (idx_t ngram_idx = 0; ngram_idx < num_ngrams; ++ngram_idx) {
+                    const idx_t pool_offset = count_start + ngram_idx * gram_len;
+                    int match = 1;
+                    for (idx_t pos = 0; pos < gram_len; ++pos) {
+                        const int64_t token = (int64_t){{ input0 }}[start + pos * stride];
+                        if (token != pool[pool_offset + pos]) {
+                            match = 0;
+                            break;
+                        }
+                    }
+                    if (match) {
+                        const idx_t out_index =
+                            (idx_t)ngram_indexes[ngram_index_offset + ngram_idx];
+                        {{ output }}[out_index] += ({{ c_type }})1;
+                    }
+                }
+            }
+        }
+        ngram_index_offset += num_ngrams;
+    }
+{% if mode_id != 0 %}
+    for (idx_t o = 0; o < output_dim; ++o) {
+        {{ c_type }} value = {{ output }}[o];
+{% if mode_id == 1 %}
+        value = value > {{ zero_literal }} ? {{ one_literal }} : {{ zero_literal }};
+{% endif %}
+{% if weights_values %}
+        value *= weights[o];
+{% else %}
+        value *= {{ one_literal }};
+{% endif %}
+        {{ output }}[o] = value;
+    }
+{% endif %}
+{% else %}
+    const idx_t batch = {{ input_shape[0] }};
+    const idx_t seq_len = {{ input_shape[1] }};
+    for (idx_t b = 0; b < batch; ++b) {
+        for (idx_t o = 0; o < output_dim; ++o) {
+            {{ output }}[b][o] = {{ zero_literal }};
+        }
+        idx_t ngram_index_offset = 0;
+        for (idx_t gram_len = 1; gram_len < min_gram; ++gram_len) {
+            const idx_t count_start = (idx_t)ngram_counts[gram_len - 1];
+            const idx_t count_end =
+                gram_len < ngram_counts_len ? (idx_t)ngram_counts[gram_len] : pool_size;
+            const idx_t num_ngrams = (count_end - count_start) / gram_len;
+            ngram_index_offset += num_ngrams;
+        }
+        for (idx_t gram_len = min_gram; gram_len <= max_gram; ++gram_len) {
+            const idx_t count_start = (idx_t)ngram_counts[gram_len - 1];
+            const idx_t count_end =
+                gram_len < ngram_counts_len ? (idx_t)ngram_counts[gram_len] : pool_size;
+            const idx_t num_ngrams = (count_end - count_start) / gram_len;
+            if (num_ngrams == 0) {
+                continue;
+            }
+            const idx_t skip_limit = gram_len == 1 ? 0 : max_skip;
+            for (idx_t skip = 0; skip <= skip_limit; ++skip) {
+                const idx_t stride = skip + 1;
+                if (seq_len < (gram_len - 1) * stride + 1) {
+                    continue;
+                }
+                const idx_t max_start = seq_len - (gram_len - 1) * stride;
+                for (idx_t start = 0; start < max_start; ++start) {
+                    for (idx_t ngram_idx = 0; ngram_idx < num_ngrams; ++ngram_idx) {
+                        const idx_t pool_offset = count_start + ngram_idx * gram_len;
+                        int match = 1;
+                        for (idx_t pos = 0; pos < gram_len; ++pos) {
+                            const int64_t token = (int64_t){{ input0 }}[b][start + pos * stride];
+                            if (token != pool[pool_offset + pos]) {
+                                match = 0;
+                                break;
+                            }
+                        }
+                        if (match) {
+                            const idx_t out_index =
+                                (idx_t)ngram_indexes[ngram_index_offset + ngram_idx];
+                            {{ output }}[b][out_index] += ({{ c_type }})1;
+                        }
+                    }
+                }
+            }
+            ngram_index_offset += num_ngrams;
+        }
+{% if mode_id != 0 %}
+        for (idx_t o = 0; o < output_dim; ++o) {
+            {{ c_type }} value = {{ output }}[b][o];
+{% if mode_id == 1 %}
+            value = value > {{ zero_literal }} ? {{ one_literal }} : {{ zero_literal }};
+{% endif %}
+{% if weights_values %}
+            value *= weights[o];
+{% else %}
+            value *= {{ one_literal }};
+{% endif %}
+            {{ output }}[b][o] = value;
+        }
+{% endif %}
+    }
+{% endif %}
+}

emx_onnx_cgen/templates/tile_op.c.j2 ADDED Viewed

@@ -0,0 +1,14 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+    const {{ c_type }} *input_data = (const {{ c_type }} *){{ input0 }};
+    {{ c_type }} *output_data = ({{ c_type }} *){{ output }};
+    idx_t output_index = 0;
+{% for dim in output_shape %}
+    for (idx_t {{ loop_vars[loop.index0] }} = 0; {{ loop_vars[loop.index0] }} < {{ dim }}; ++{{ loop_vars[loop.index0] }}) {
+{% endfor %}
+        idx_t input_index = {{ input_index_expr }};
+        output_data[output_index] = input_data[input_index];
+        output_index++;
+{% for _ in output_shape %}
+    }
+{% endfor %}
+}

emx_onnx_cgen/templates/topk_op.c.j2 ADDED Viewed

@@ -0,0 +1,50 @@
+static inline int {{ op_name }}_better({{ input_c_type }} a, {{ output_indices_c_type }} ai, {{ input_c_type }} b, {{ output_indices_c_type }} bi) {
+    return {{ compare_expr }};
+}
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+{% for dim in outer_shape %}
+for (idx_t {{ outer_loop_vars[loop.index0] }} = 0; {{ outer_loop_vars[loop.index0] }} < {{ dim }}; ++{{ outer_loop_vars[loop.index0] }}) {
+{% endfor %}
+{{ input_c_type }} best_values[{{ k }}];
+{{ output_indices_c_type }} best_indices[{{ k }}];
+for (idx_t {{ reduce_var }} = 0; {{ reduce_var }} < {{ k }}; ++{{ reduce_var }}) {
+    best_values[{{ reduce_var }}] = {{ input0 }}{{ input_index_expr }};
+    best_indices[{{ reduce_var }}] = ({{ output_indices_c_type }}){{ reduce_var }};
+}
+for (idx_t i = 1; i < {{ k }}; ++i) {
+    idx_t j = i;
+    while (j > 0
+        && {{ op_name }}_better(best_values[j], best_indices[j], best_values[j - 1], best_indices[j - 1])) {
+        {{ input_c_type }} temp_value = best_values[j - 1];
+        {{ output_indices_c_type }} temp_index = best_indices[j - 1];
+        best_values[j - 1] = best_values[j];
+        best_indices[j - 1] = best_indices[j];
+        best_values[j] = temp_value;
+        best_indices[j] = temp_index;
+        --j;
+    }
+}
+for (idx_t {{ reduce_var }} = {{ k }}; {{ reduce_var }} < {{ axis_dim }}; ++{{ reduce_var }}) {
+    {{ input_c_type }} candidate = {{ input0 }}{{ input_index_expr }};
+    {{ output_indices_c_type }} candidate_index = ({{ output_indices_c_type }}){{ reduce_var }};
+    if ({{ op_name }}_better(candidate, candidate_index, best_values[{{ k - 1 }}], best_indices[{{ k - 1 }}])) {
+        idx_t pos = {{ k - 1 }};
+        while (pos > 0
+            && {{ op_name }}_better(candidate, candidate_index, best_values[pos - 1], best_indices[pos - 1])) {
+            best_values[pos] = best_values[pos - 1];
+            best_indices[pos] = best_indices[pos - 1];
+            --pos;
+        }
+        best_values[pos] = candidate;
+        best_indices[pos] = candidate_index;
+    }
+}
+for (idx_t {{ k_var }} = 0; {{ k_var }} < {{ k }}; ++{{ k_var }}) {
+    {{ output_values }}{{ output_index_expr }} = best_values[{{ k_var }}];
+    {{ output_indices }}{{ output_index_expr }} = best_indices[{{ k_var }}];
+}
+{% for _ in outer_shape %}
+}
+{% endfor %}
+}

emx_onnx_cgen/templates/transpose_op.c.j2 ADDED Viewed

@@ -0,0 +1,9 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+{% for dim in output_shape %}
+for (idx_t {{ loop_vars[loop.index0] }} = 0; {{ loop_vars[loop.index0] }} < {{ dim }}; ++{{ loop_vars[loop.index0] }}) {
+{% endfor %}
+{{ output }}{% for var in loop_vars %}[{{ var }}]{% endfor %} = {{ input0 }}{% for var in input_indices %}[{{ var }}]{% endfor %};
+{% for _ in output_shape %}
+}
+{% endfor %}
+}

emx_onnx_cgen/templates/trilu_op.c.j2 ADDED Viewed

@@ -0,0 +1,33 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+    const {{ c_type }} *input_data = (const {{ c_type }} *){{ input0 }};
+    {{ c_type }} *output_data = ({{ c_type }} *){{ output }};
+    int64_t k = {{ k_value }};
+    {% if k_input %}
+    const {{ k_c_type }} *k_data = (const {{ k_c_type }} *){{ k_input }};
+    k = (int64_t)k_data[0];
+    {% endif %}
+    idx_t rows = {{ rows }};
+    idx_t cols = {{ cols }};
+    idx_t batch_size = {{ batch_size }};
+    for (idx_t batch = 0; batch < batch_size; ++batch) {
+        idx_t base = batch * rows * cols;
+        for (idx_t row = 0; row < rows; ++row) {
+            for (idx_t col = 0; col < cols; ++col) {
+                idx_t offset = base + row * cols + col;
+                {% if upper %}
+                if ((int64_t)col - (int64_t)row >= k) {
+                    output_data[offset] = input_data[offset];
+                } else {
+                    output_data[offset] = {{ zero_literal }};
+                }
+                {% else %}
+                if ((int64_t)row - (int64_t)col >= -k) {
+                    output_data[offset] = input_data[offset];
+                } else {
+                    output_data[offset] = {{ zero_literal }};
+                }
+                {% endif %}
+            }
+        }
+    }
+}

emx_onnx_cgen/templates/unary_op.c.j2 ADDED Viewed

@@ -0,0 +1,23 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+{% for dim in shape %}
+for (idx_t {{ loop_vars[loop.index0] }} = 0; {{ loop_vars[loop.index0] }} < {{ dim }}; ++{{ loop_vars[loop.index0] }}) {
+{% endfor %}
+{% if operator == "relu" %}
+{{ output }}{% for var in loop_vars %}[{{ var }}]{% endfor %} = {{ input0 }}{% for var in loop_vars %}[{{ var }}]{% endfor %} > {{ zero_literal }} ? {{ input0 }}{% for var in loop_vars %}[{{ var }}]{% endfor %} : {{ zero_literal }};
+{% elif operator == "neg" %}
+{{ output }}{% for var in loop_vars %}[{{ var }}]{% endfor %} = -{{ input0 }}{% for var in loop_vars %}[{{ var }}]{% endfor %};
+{% elif operator == "identity" %}
+{{ output }}{% for var in loop_vars %}[{{ var }}]{% endfor %} = {{ input0 }}{% for var in loop_vars %}[{{ var }}]{% endfor %};
+{% elif operator == "zero" %}
+{{ output }}{% for var in loop_vars %}[{{ var }}]{% endfor %} = {{ zero_literal }};
+{% elif operator == "isneginf" %}
+{{ output }}{% for var in loop_vars %}[{{ var }}]{% endfor %} = isinf({{ input0 }}{% for var in loop_vars %}[{{ var }}]{% endfor %}) && signbit({{ input0 }}{% for var in loop_vars %}[{{ var }}]{% endfor %});
+{% elif operator == "isposinf" %}
+{{ output }}{% for var in loop_vars %}[{{ var }}]{% endfor %} = isinf({{ input0 }}{% for var in loop_vars %}[{{ var }}]{% endfor %}) && !signbit({{ input0 }}{% for var in loop_vars %}[{{ var }}]{% endfor %});
+{% else %}
+{{ output }}{% for var in loop_vars %}[{{ var }}]{% endfor %} = {{ operator }}({{ input0 }}{% for var in loop_vars %}[{{ var }}]{% endfor %});
+{% endif %}
+{% for _ in shape %}
+}
+{% endfor %}
+}

emx_onnx_cgen/templates/where_op.c.j2 ADDED Viewed

@@ -0,0 +1,9 @@
+static inline void {{ op_name }}({{ dim_args }}{{ params | join(', ') }}) {
+{% for dim in output_shape %}
+for (idx_t {{ loop_vars[loop.index0] }} = 0; {{ loop_vars[loop.index0] }} < {{ dim }}; ++{{ loop_vars[loop.index0] }}) {
+{% endfor %}
+{{ output_expr }} = {{ condition_expr }} ? {{ x_expr }} : {{ y_expr }};
+{% for _ in output_shape %}
+}
+{% endfor %}
+}

emx_onnx_cgen/verification.py CHANGED Viewed

@@ -1,29 +1,69 @@
 from __future__ import annotations
+from typing import TypeAlias
 import numpy as np
 from shared.ulp import ulp_intdiff_float
+WorstUlpDiff: TypeAlias = tuple[tuple[int, ...], float, float]
-def max_ulp_diff(actual: np.ndarray, expected: np.ndarray) -> int:
+def _validate_ulp_inputs(
+    actual: np.ndarray, expected: np.ndarray
+) -> np.dtype | None:
     if actual.shape != expected.shape:
         raise ValueError(
             f"Shape mismatch for ULP calculation: {actual.shape} vs {expected.shape}"
         )
     if not np.issubdtype(expected.dtype, np.floating):
-        return 0
+        return None
     dtype = expected.dtype
     if dtype not in (np.float16, np.float32, np.float64):
         raise ValueError(f"Unsupported floating dtype for ULP calculation: {dtype}")
+    return dtype
+def worst_ulp_diff(
+    actual: np.ndarray,
+    expected: np.ndarray,
+    *,
+    atol_eps: float = 1.0,
+) -> tuple[int, WorstUlpDiff | None]:
+    dtype = _validate_ulp_inputs(actual, expected)
+    if dtype is None:
+        return 0, None
+    if actual.size == 0:
+        return 0, None
     actual_cast = actual.astype(dtype, copy=False)
     expected_cast = expected.astype(dtype, copy=False)
+    abs_tol = np.finfo(dtype).eps * atol_eps
     max_diff = 0
-    for actual_value, expected_value in np.nditer(
-        [actual_cast, expected_cast], flags=["refs_ok"]
-    ):
+    worst: WorstUlpDiff | None = None
+    iterator = np.nditer(
+        [actual_cast, expected_cast], flags=["refs_ok", "multi_index"]
+    )
+    for actual_value, expected_value in iterator:
+        if (
+            abs(float(actual_value[()]) - float(expected_value[()]))
+            <= abs_tol
+        ):
+            continue
         diff = ulp_intdiff_float(actual_value[()], expected_value[()])
         if diff > max_diff:
             max_diff = diff
+            worst = (
+                iterator.multi_index,
+                float(actual_value[()]),
+                float(expected_value[()]),
+            )
+    return max_diff, worst
+def max_ulp_diff(
+    actual: np.ndarray, expected: np.ndarray, *, atol_eps: float = 1.0
+) -> int:
+    max_diff, _ = worst_ulp_diff(actual, expected, atol_eps=atol_eps)
     return max_diff

emx-onnx-cgen 0.3.7__py3-none-any.whl → 0.4.1.dev0__py3-none-any.whl

emx-onnx-cgen 0.3.7py3-none-any.whl → 0.4.1.dev0py3-none-any.whl