evoasm 0.0.2.pre7 → 0.1.0.pre2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. checksums.yaml +4 -4
  2. data/.gdbinit +41 -0
  3. data/.gitignore +1 -2
  4. data/.gitmodules +3 -0
  5. data/.rubocop.yml +8 -0
  6. data/Gemfile +4 -0
  7. data/LICENSE.md +660 -0
  8. data/Makefile +1 -1
  9. data/README.md +17 -9
  10. data/Rakefile +39 -107
  11. data/bin/gdb +1 -1
  12. data/bin/gdb_loop +4 -0
  13. data/docs/FindingInstructions.md +17 -0
  14. data/docs/JIT.md +14 -0
  15. data/docs/SymbolicRegression.md +102 -0
  16. data/docs/Visualization.md +29 -0
  17. data/docs/examples/bit_insts.rb +44 -0
  18. data/docs/examples/jit.rb +26 -0
  19. data/docs/examples/loss.gif +0 -0
  20. data/docs/examples/program.png +0 -0
  21. data/docs/examples/sym_reg.rb +64 -0
  22. data/docs/examples/vis.rb +38 -0
  23. data/evoasm.gemspec +21 -15
  24. data/ext/evoasm_ext/Rakefile +3 -0
  25. data/ext/evoasm_ext/compile.rake +35 -0
  26. data/ext/evoasm_ext/libevoasm/src/evoasm-alloc.c +226 -0
  27. data/ext/evoasm_ext/libevoasm/src/evoasm-alloc.h +84 -0
  28. data/ext/evoasm_ext/libevoasm/src/evoasm-arch.c +52 -0
  29. data/ext/evoasm_ext/libevoasm/src/evoasm-arch.h +101 -0
  30. data/ext/evoasm_ext/libevoasm/src/evoasm-bitmap.h +158 -0
  31. data/ext/evoasm_ext/libevoasm/src/evoasm-buf.c +204 -0
  32. data/ext/evoasm_ext/libevoasm/src/evoasm-buf.h +109 -0
  33. data/ext/evoasm_ext/libevoasm/src/evoasm-domain.c +124 -0
  34. data/ext/evoasm_ext/libevoasm/src/evoasm-domain.h +279 -0
  35. data/ext/evoasm_ext/libevoasm/src/evoasm-error.c +65 -0
  36. data/ext/evoasm_ext/libevoasm/src/evoasm-error.h +108 -0
  37. data/ext/evoasm_ext/{evoasm-log.c → libevoasm/src/evoasm-log.c} +36 -18
  38. data/ext/evoasm_ext/libevoasm/src/evoasm-log.h +93 -0
  39. data/ext/evoasm_ext/libevoasm/src/evoasm-param.c +22 -0
  40. data/ext/evoasm_ext/libevoasm/src/evoasm-param.h +33 -0
  41. data/ext/evoasm_ext/libevoasm/src/evoasm-pop-params.c +192 -0
  42. data/ext/evoasm_ext/libevoasm/src/evoasm-pop-params.h +60 -0
  43. data/ext/evoasm_ext/libevoasm/src/evoasm-pop.c +1323 -0
  44. data/ext/evoasm_ext/libevoasm/src/evoasm-pop.h +107 -0
  45. data/ext/evoasm_ext/libevoasm/src/evoasm-program-io.c +116 -0
  46. data/ext/evoasm_ext/libevoasm/src/evoasm-program-io.h +60 -0
  47. data/ext/evoasm_ext/libevoasm/src/evoasm-program.c +1827 -0
  48. data/ext/evoasm_ext/libevoasm/src/evoasm-program.h +167 -0
  49. data/ext/evoasm_ext/libevoasm/src/evoasm-rand.c +65 -0
  50. data/ext/evoasm_ext/libevoasm/src/evoasm-rand.h +76 -0
  51. data/ext/evoasm_ext/libevoasm/src/evoasm-signal.c +106 -0
  52. data/ext/evoasm_ext/libevoasm/src/evoasm-signal.h +58 -0
  53. data/ext/evoasm_ext/libevoasm/src/evoasm-util.h +112 -0
  54. data/ext/evoasm_ext/libevoasm/src/evoasm-x64.c +925 -0
  55. data/ext/evoasm_ext/libevoasm/src/evoasm-x64.h +277 -0
  56. data/ext/evoasm_ext/libevoasm/src/evoasm.c +28 -0
  57. data/ext/evoasm_ext/libevoasm/src/evoasm.h +35 -0
  58. data/ext/evoasm_ext/libevoasm/src/gen/evoasm-x64-enums.h +2077 -0
  59. data/ext/evoasm_ext/libevoasm/src/gen/evoasm-x64-insts.c +191203 -0
  60. data/ext/evoasm_ext/libevoasm/src/gen/evoasm-x64-insts.h +1713 -0
  61. data/ext/evoasm_ext/libevoasm/src/gen/evoasm-x64-misc.c +348 -0
  62. data/ext/evoasm_ext/libevoasm/src/gen/evoasm-x64-misc.h +93 -0
  63. data/ext/evoasm_ext/libevoasm/src/gen/evoasm-x64-params.c +51 -0
  64. data/ext/evoasm_ext/libevoasm/src/gen/evoasm-x64-params.h +509 -0
  65. data/lib/evoasm.rb +28 -11
  66. data/lib/evoasm/buffer.rb +105 -0
  67. data/lib/evoasm/capstone.rb +100 -0
  68. data/lib/evoasm/domain.rb +116 -0
  69. data/lib/evoasm/error.rb +37 -16
  70. data/lib/evoasm/exception_error.rb +19 -0
  71. data/lib/evoasm/ffi_ext.rb +53 -0
  72. data/lib/evoasm/libevoasm.rb +286 -0
  73. data/lib/evoasm/libevoasm/x64_enums.rb +1967 -0
  74. data/lib/evoasm/parameter.rb +20 -0
  75. data/lib/evoasm/population.rb +145 -0
  76. data/lib/evoasm/population/parameters.rb +227 -0
  77. data/lib/evoasm/population/plotter.rb +89 -0
  78. data/lib/evoasm/prng.rb +64 -0
  79. data/lib/evoasm/program.rb +195 -12
  80. data/lib/evoasm/program/io.rb +144 -0
  81. data/lib/evoasm/test.rb +8 -0
  82. data/lib/evoasm/version.rb +1 -1
  83. data/lib/evoasm/x64.rb +115 -0
  84. data/lib/evoasm/x64/cpu_state.rb +95 -0
  85. data/lib/evoasm/x64/instruction.rb +109 -0
  86. data/lib/evoasm/x64/operand.rb +156 -0
  87. data/lib/evoasm/x64/parameters.rb +211 -0
  88. data/test/helpers/population_helper.rb +128 -0
  89. data/test/helpers/test_helper.rb +1 -0
  90. data/test/helpers/x64_helper.rb +24 -0
  91. data/test/integration/bitwise_reverse_test.rb +41 -0
  92. data/test/integration/gcd_test.rb +52 -0
  93. data/test/integration/popcnt_test.rb +46 -0
  94. data/test/integration/sym_reg_test.rb +68 -0
  95. data/test/unit/evoasm/buffer_test.rb +48 -0
  96. data/test/unit/evoasm/capstone_test.rb +18 -0
  97. data/test/unit/evoasm/domain_test.rb +55 -0
  98. data/test/unit/evoasm/population/parameters_test.rb +106 -0
  99. data/test/unit/evoasm/population_test.rb +96 -0
  100. data/test/unit/evoasm/prng_test.rb +47 -0
  101. data/test/unit/evoasm/x64/cpu_state_test.rb +73 -0
  102. data/test/unit/evoasm/x64/encoding_test.rb +320 -0
  103. data/test/unit/evoasm/x64/instruction_access_test.rb +177 -0
  104. data/test/unit/evoasm/x64/instruction_encoding_test.rb +780 -0
  105. data/test/unit/evoasm/x64/instruction_test.rb +62 -0
  106. data/test/unit/evoasm/x64/parameters_test.rb +65 -0
  107. data/test/unit/evoasm/x64_test.rb +52 -0
  108. metadata +195 -89
  109. data/Gemfile.rake +0 -8
  110. data/Gemfile.rake.lock +0 -51
  111. data/LICENSE.txt +0 -373
  112. data/data/tables/README.md +0 -19
  113. data/data/tables/x64.csv +0 -1684
  114. data/data/templates/evoasm-x64.c.erb +0 -319
  115. data/data/templates/evoasm-x64.h.erb +0 -126
  116. data/examples/abs.yml +0 -20
  117. data/examples/popcnt.yml +0 -17
  118. data/examples/sym_reg.yml +0 -26
  119. data/exe/evoasm-search +0 -13
  120. data/ext/evoasm_ext/evoasm-alloc.c +0 -145
  121. data/ext/evoasm_ext/evoasm-alloc.h +0 -59
  122. data/ext/evoasm_ext/evoasm-arch.c +0 -44
  123. data/ext/evoasm_ext/evoasm-arch.h +0 -161
  124. data/ext/evoasm_ext/evoasm-bitmap.h +0 -114
  125. data/ext/evoasm_ext/evoasm-buf.c +0 -130
  126. data/ext/evoasm_ext/evoasm-buf.h +0 -47
  127. data/ext/evoasm_ext/evoasm-error.c +0 -31
  128. data/ext/evoasm_ext/evoasm-error.h +0 -75
  129. data/ext/evoasm_ext/evoasm-free-list.c.tmpl +0 -121
  130. data/ext/evoasm_ext/evoasm-free-list.h.tmpl +0 -86
  131. data/ext/evoasm_ext/evoasm-log.h +0 -69
  132. data/ext/evoasm_ext/evoasm-misc.c +0 -23
  133. data/ext/evoasm_ext/evoasm-misc.h +0 -282
  134. data/ext/evoasm_ext/evoasm-param.h +0 -37
  135. data/ext/evoasm_ext/evoasm-search.c +0 -2145
  136. data/ext/evoasm_ext/evoasm-search.h +0 -214
  137. data/ext/evoasm_ext/evoasm-util.h +0 -40
  138. data/ext/evoasm_ext/evoasm-x64.c +0 -275624
  139. data/ext/evoasm_ext/evoasm-x64.h +0 -5436
  140. data/ext/evoasm_ext/evoasm.c +0 -7
  141. data/ext/evoasm_ext/evoasm.h +0 -23
  142. data/ext/evoasm_ext/evoasm_ext.c +0 -1757
  143. data/ext/evoasm_ext/extconf.rb +0 -31
  144. data/lib/evoasm/cli.rb +0 -6
  145. data/lib/evoasm/cli/search.rb +0 -127
  146. data/lib/evoasm/core_ext.rb +0 -1
  147. data/lib/evoasm/core_ext/array.rb +0 -9
  148. data/lib/evoasm/core_ext/integer.rb +0 -10
  149. data/lib/evoasm/core_ext/kwstruct.rb +0 -13
  150. data/lib/evoasm/core_ext/range.rb +0 -5
  151. data/lib/evoasm/examples.rb +0 -27
  152. data/lib/evoasm/gen.rb +0 -8
  153. data/lib/evoasm/gen/enum.rb +0 -169
  154. data/lib/evoasm/gen/name_util.rb +0 -80
  155. data/lib/evoasm/gen/state.rb +0 -176
  156. data/lib/evoasm/gen/state_dsl.rb +0 -152
  157. data/lib/evoasm/gen/strio.rb +0 -27
  158. data/lib/evoasm/gen/translator.rb +0 -1102
  159. data/lib/evoasm/gen/version.rb +0 -5
  160. data/lib/evoasm/gen/x64.rb +0 -237
  161. data/lib/evoasm/gen/x64/funcs.rb +0 -495
  162. data/lib/evoasm/gen/x64/inst.rb +0 -781
  163. data/lib/evoasm/search.rb +0 -40
  164. data/lib/evoasm/tasks/gen_task.rb +0 -86
  165. data/lib/evoasm/tasks/template_task.rb +0 -52
  166. data/test/test_helper.rb +0 -1
  167. data/test/x64/test_helper.rb +0 -19
  168. data/test/x64/x64_test.rb +0 -87
@@ -0,0 +1,107 @@
1
+ /*
2
+ * Copyright (C) 2016 Julian Aron Prenner <jap@polyadic.com>
3
+ *
4
+ * This program is free software: you can redistribute it and/or modify
5
+ * it under the terms of the GNU Affero General Public License as published by
6
+ * the Free Software Foundation, either version 3 of the License, or
7
+ * (at your option) any later version.
8
+ *
9
+ * This program is distributed in the hope that it will be useful,
10
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ * GNU Affero General Public License for more details.
13
+ *
14
+ * You should have received a copy of the GNU Affero General Public License
15
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
16
+ */
17
+
18
+ #pragma once
19
+
20
+ #include <stdalign.h>
21
+ #include "evoasm-error.h"
22
+ #include "evoasm-pop-params.h"
23
+
24
+ typedef struct {
25
+ evoasm_loss_t *samples;
26
+ uint8_t *counters;
27
+ } evoasm_pop_loss_data_t;
28
+
29
+ typedef struct {
30
+ int16_t *jmp_offs;
31
+ uint8_t *jmp_cond;
32
+ } evoasm_pop_program_data_t;
33
+
34
+ typedef struct {
35
+ float *pheromones;
36
+ uint16_t *sizes;
37
+ evoasm_pop_program_data_t program_data;
38
+ } evoasm_pop_module_data_t;
39
+
40
+ typedef struct {
41
+ evoasm_inst_id_t *insts;
42
+ union {
43
+ evoasm_x64_basic_params_t *x64;
44
+ void *data;
45
+ } params;
46
+ } evoasm_pop_kernel_data_t;
47
+
48
+ struct evoasm_deme_s {
49
+ evoasm_prng_t prng;
50
+ uint16_t *blessed_indiv_idxs;
51
+ uint16_t *doomed_indiv_idxs;
52
+ evoasm_pop_program_data_t parent_program_data;
53
+ evoasm_pop_kernel_data_t parent_kernel_data;
54
+ evoasm_program_t program;
55
+ uint64_t *error_counters;
56
+ uint64_t error_counter;
57
+ evoasm_pop_loss_data_t loss_data;
58
+ evoasm_pop_program_data_t program_data;
59
+ evoasm_pop_kernel_data_t kernel_data;
60
+ evoasm_loss_t *top_losses;
61
+
62
+ evoasm_loss_t best_loss;
63
+ evoasm_pop_program_data_t best_program_data;
64
+ evoasm_pop_kernel_data_t best_kernel_data;
65
+
66
+ uint16_t n_doomed_indivs;
67
+ uint16_t n_blessed_indivs;
68
+ uint16_t n_examples;
69
+ evoasm_arch_id_t arch_id;
70
+ evoasm_pop_params_t *params;
71
+ evoasm_domain_t *domains;
72
+ } evoasm_aligned(EVOASM_CACHE_LINE_SIZE) ;
73
+
74
+ typedef struct evoasm_deme_s evoasm_deme_t;
75
+
76
+ typedef struct evoasm_pop_s {
77
+ evoasm_pop_params_t *params;
78
+ evoasm_domain_t *domains;
79
+ evoasm_deme_t *demes;
80
+ evoasm_pop_module_data_t module_data;
81
+ bool seeded : 1;
82
+ evoasm_loss_t *summary_losses;
83
+
84
+ } evoasm_pop_t;
85
+
86
+ evoasm_success_t
87
+ evoasm_pop_init(evoasm_pop_t *pop,
88
+ evoasm_arch_id_t arch_id,
89
+ evoasm_pop_params_t *params);
90
+
91
+
92
+ evoasm_success_t
93
+ evoasm_pop_eval(evoasm_pop_t *pop);
94
+
95
+ void
96
+ evoasm_pop_next_gen(evoasm_pop_t *pop);
97
+
98
+
99
+ evoasm_success_t
100
+ evoasm_pop_seed(evoasm_pop_t *pop);
101
+
102
+ void
103
+ evoasm_pop_destroy(evoasm_pop_t *pop);
104
+
105
+ //void
106
+ //evoasm_pop_inject(evoasm_pop_t *pop, evoasm_indiv_t *indiv, size_t indiv_size, evoasm_loss_t loss);
107
+
@@ -0,0 +1,116 @@
1
+ /*
2
+ * Copyright (C) 2016 Julian Aron Prenner <jap@polyadic.com>
3
+ *
4
+ * This program is free software: you can redistribute it and/or modify
5
+ * it under the terms of the GNU Affero General Public License as published by
6
+ * the Free Software Foundation, either version 3 of the License, or
7
+ * (at your option) any later version.
8
+ *
9
+ * This program is distributed in the hope that it will be useful,
10
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ * GNU Affero General Public License for more details.
13
+ *
14
+ * You should have received a copy of the GNU Affero General Public License
15
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
16
+ */
17
+
18
+ #include "evoasm-alloc.h"
19
+ #include "evoasm-program-io.h"
20
+ #include <stdarg.h>
21
+
22
+ static const char *const _evoasm_example_type_names[] = {
23
+ "i64",
24
+ "u64",
25
+ "f64"
26
+ };
27
+
28
+ evoasm_program_io_t *
29
+ evoasm_program_io_alloc(size_t len) {
30
+ evoasm_program_io_t *program_io = evoasm_malloc(sizeof(evoasm_program_io_t) + len * sizeof(evoasm_program_io_val_t));
31
+ program_io->len = (uint16_t) len;
32
+
33
+ return program_io;
34
+ }
35
+
36
+ evoasm_success_t
37
+ evoasm_program_io_init(evoasm_program_io_t *program_io, size_t arity, ...) {
38
+ va_list args;
39
+ bool retval = true;
40
+
41
+ if(arity > EVOASM_PROGRAM_IO_MAX_ARITY) {
42
+ evoasm_error(EVOASM_ERROR_TYPE_PROGRAM, EVOASM_ERROR_CODE_NONE,
43
+ "Maximum arity exceeded (%zu > %d)", arity, EVOASM_PROGRAM_IO_MAX_ARITY);
44
+ retval = false;
45
+ goto done;
46
+ }
47
+
48
+ program_io->arity = (uint8_t) arity;
49
+
50
+ va_start(args, arity);
51
+ for(size_t i = 0; i < program_io->len; i++) {
52
+ size_t type_idx = i % arity;
53
+ evoasm_program_io_val_type_t type = va_arg(args, evoasm_program_io_val_type_t);
54
+ evoasm_program_io_val_t val;
55
+ switch(type) {
56
+ case EVOASM_PROGRAM_IO_VAL_TYPE_F64:
57
+ val.f64 = va_arg(args, double);
58
+ break;
59
+ case EVOASM_PROGRAM_IO_VAL_TYPE_I64:
60
+ val.i64 = va_arg(args, int64_t);
61
+ break;
62
+ case EVOASM_PROGRAM_IO_VAL_TYPE_U64:
63
+ val.u64 = va_arg(args, uint64_t);
64
+ break;
65
+ default:
66
+ evoasm_assert_not_reached();
67
+ }
68
+
69
+ program_io->vals[i] = val;
70
+
71
+ if(i >= arity) {
72
+ evoasm_program_io_val_type_t prev_type = program_io->types[type_idx];
73
+
74
+ if(prev_type != type) {
75
+ evoasm_error(EVOASM_ERROR_TYPE_PROGRAM, EVOASM_ERROR_CODE_NONE,
76
+ "Example value type mismatch (previously %s, now %s)",
77
+ _evoasm_example_type_names[prev_type], _evoasm_example_type_names[type]);
78
+ retval = false;
79
+ goto done;
80
+ }
81
+ }
82
+ program_io->types[type_idx] = type;
83
+ }
84
+
85
+
86
+ done:
87
+ va_end(args);
88
+ return retval;
89
+ }
90
+
91
+ double
92
+ evoasm_program_io_get_value_f64(evoasm_program_io_t *program_io, size_t idx) {
93
+ return program_io->vals[idx].f64;
94
+ }
95
+
96
+ int64_t
97
+ evoasm_program_io_get_value_i64(evoasm_program_io_t *program_io, size_t idx) {
98
+ return program_io->vals[idx].i64;
99
+ }
100
+
101
+ void
102
+ evoasm_program_io_destroy(evoasm_program_io_t *program_io) {
103
+
104
+ }
105
+
106
+ evoasm_program_io_val_type_t
107
+ evoasm_program_io_get_type(evoasm_program_io_t *program_io, size_t idx) {
108
+ return program_io->types[idx % program_io->arity];
109
+ }
110
+
111
+ EVOASM_DEF_FREE_FUNC(program_io)
112
+
113
+ EVOASM_DEF_GETTER(program_io, arity, size_t)
114
+
115
+ EVOASM_DEF_GETTER(program_io, len, size_t)
116
+
@@ -0,0 +1,60 @@
1
+ /*
2
+ * Copyright (C) 2016 Julian Aron Prenner <jap@polyadic.com>
3
+ *
4
+ * This program is free software: you can redistribute it and/or modify
5
+ * it under the terms of the GNU Affero General Public License as published by
6
+ * the Free Software Foundation, either version 3 of the License, or
7
+ * (at your option) any later version.
8
+ *
9
+ * This program is distributed in the hope that it will be useful,
10
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ * GNU Affero General Public License for more details.
13
+ *
14
+ * You should have received a copy of the GNU Affero General Public License
15
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
16
+ */
17
+
18
+ #pragma once
19
+
20
+ #include <stdint.h>
21
+
22
+ #define EVOASM_PROGRAM_IO_MAX_ARITY 8
23
+
24
+ typedef enum {
25
+ EVOASM_PROGRAM_IO_VAL_TYPE_I64,
26
+ EVOASM_PROGRAM_IO_VAL_TYPE_U64,
27
+ EVOASM_PROGRAM_IO_VAL_TYPE_F64,
28
+ } evoasm_program_io_val_type_t;
29
+
30
+ typedef union {
31
+ double f64;
32
+ int64_t i64;
33
+ uint64_t u64;
34
+ } evoasm_program_io_val_t;
35
+
36
+ typedef struct {
37
+ uint8_t arity;
38
+ uint16_t len;
39
+ evoasm_program_io_val_type_t types[EVOASM_PROGRAM_IO_MAX_ARITY];
40
+ evoasm_program_io_val_t vals[1];
41
+ } evoasm_program_io_t;
42
+
43
+ #define EVOASM_PROGRAM_OUTPUT_MAX_ARITY EVOASM_PROGRAM_IO_MAX_ARITY
44
+ #define EVOASM_PROGRAM_INPUT_MAX_ARITY EVOASM_PROGRAM_IO_MAX_ARITY
45
+
46
+ typedef evoasm_program_io_t evoasm_program_output_t;
47
+ typedef evoasm_program_io_t evoasm_program_input_t;
48
+
49
+ #define EVOASM_PROGRAM_IO_N_EXAMPLES(program_io) ((size_t)((program_io)->len / (program_io)->arity))
50
+ #define EVOASM_PROGRAM_INPUT_N_TUPLES(program_input) EVOASM_PROGRAM_IO_N_EXAMPLES((evoasm_program_io_t *)program_input)
51
+ #define EVOASM_PROGRAM_OUTPUT_N_TUPLES(program_output) EVOASM_PROGRAM_IO_N_EXAMPLES((evoasm_program_io_t *)program_output)
52
+
53
+ evoasm_program_io_t *
54
+ evoasm_program_io_alloc(size_t len);
55
+
56
+ void
57
+ evoasm_program_io_destroy(evoasm_program_io_t *program_io);
58
+
59
+ #define evoasm_program_output_destroy(program_output) \
60
+ evoasm_program_io_destroy((evoasm_program_io *)program_output)
@@ -0,0 +1,1827 @@
1
+ /*
2
+ * Copyright (C) 2016 Julian Aron Prenner <jap@polyadic.com>
3
+ *
4
+ * This program is free software: you can redistribute it and/or modify
5
+ * it under the terms of the GNU Affero General Public License as published by
6
+ * the Free Software Foundation, either version 3 of the License, or
7
+ * (at your option) any later version.
8
+ *
9
+ * This program is distributed in the hope that it will be useful,
10
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ * GNU Affero General Public License for more details.
13
+ *
14
+ * You should have received a copy of the GNU Affero General Public License
15
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
16
+ */
17
+
18
+ #include "evoasm-signal.h"
19
+ #include "evoasm-program.h"
20
+ #include "evoasm-arch.h"
21
+ #include "evoasm.h"
22
+ #include "evoasm-x64.h"
23
+ #include "evoasm-param.h"
24
+ #include "evoasm-program-io.h"
25
+
26
+
27
+ EVOASM_DEF_LOG_TAG("program")
28
+
29
+ static inline double
30
+ evoasm_program_io_val_to_dbl(evoasm_program_io_val_t io_val, evoasm_program_io_val_type_t io_val_type) {
31
+ switch(io_val_type) {
32
+ case EVOASM_PROGRAM_IO_VAL_TYPE_F64:
33
+ return io_val.f64;
34
+ case EVOASM_PROGRAM_IO_VAL_TYPE_I64:
35
+ return (double) io_val.i64;
36
+ default:
37
+ evoasm_log_fatal("unsupported input/output value type %d", io_val_type);
38
+ evoasm_assert_not_reached();
39
+ }
40
+ }
41
+
42
+ bool
43
+ evoasm_program_destroy(evoasm_program_t *program) {
44
+
45
+ bool retval = true;
46
+
47
+ if(!program->shallow) {
48
+ for(size_t i = 0; i < program->size; i++) {
49
+ evoasm_kernel_t *kernel = &program->kernels[i];
50
+ evoasm_free(kernel->insts);
51
+ switch(program->arch_info->id) {
52
+ case EVOASM_ARCH_X64:
53
+ evoasm_free(kernel->params.x64);
54
+ break;
55
+ default:
56
+ evoasm_assert_not_reached();
57
+ }
58
+ }
59
+
60
+ }
61
+
62
+ evoasm_free(program->jmp_offs);
63
+ evoasm_free(program->jmp_conds);
64
+ evoasm_free(program->kernels);
65
+ evoasm_free(program->recur_counters);
66
+ evoasm_free(program->output_vals);
67
+
68
+ if(program->buf) {
69
+ if(!evoasm_buf_destroy(program->buf)) {
70
+ retval = false;
71
+ }
72
+ }
73
+
74
+ if(program->body_buf) {
75
+ if(!evoasm_buf_destroy(program->body_buf)) {
76
+ retval = false;
77
+ }
78
+ }
79
+
80
+ return retval;
81
+ }
82
+
83
+ #if 0
84
+ evoasm_success_t
85
+ evoasm_program_clone(evoasm_program_t *program, evoasm_program_t *cloned_program) {
86
+ size_t i = 0;
87
+
88
+ *cloned_program = *program;
89
+ cloned_program->reset_rflags = false;
90
+ cloned_program->_input.len = 0;
91
+ cloned_program->_output.len = 0;
92
+ cloned_program->output_vals = NULL;
93
+ cloned_program->buf = NULL;
94
+ cloned_program->body_buf = NULL;
95
+
96
+ /* memory addresses in original buffer point to memory in original program,
97
+ * we need to reemit assembly, this is done in a lazy fashion */
98
+ cloned_program->need_emit = true;
99
+
100
+ EVOASM_TRY(error, evoasm_buf_clone, program->buf, &cloned_program->_buf);
101
+ cloned_program->buf = &cloned_program->_buf;
102
+ EVOASM_TRY(error, evoasm_buf_clone, program->body_buf, &cloned_program->_body_buf);
103
+ cloned_program->body_buf = &cloned_program->_body_buf;
104
+
105
+ size_t program_params_size = sizeof(evoasm_program_params_t);
106
+ cloned_program->params = evoasm_malloc(program_params_size);
107
+
108
+ if(!cloned_program->params) {
109
+ goto error;
110
+ }
111
+
112
+ memcpy(cloned_program->params, program->params, program_params_size);
113
+
114
+ for(; i < program->size; i++) {
115
+ evoasm_kernel_t *orig_kernel = &program->kernels[i];
116
+ evoasm_kernel_t *cloned_kernel = &cloned_program->kernels[i];
117
+ *cloned_kernel = *orig_kernel;
118
+
119
+ size_t params_size =
120
+ sizeof(evoasm_kernel_params_t) + orig_kernel->size * sizeof(evoasm_kernel_param_t);
121
+ cloned_kernel->params = evoasm_malloc(params_size);
122
+ if(!cloned_kernel->params) {
123
+ goto error;
124
+ }
125
+ memcpy(cloned_kernel->params, orig_kernel->params, params_size);
126
+ }
127
+
128
+ return true;
129
+
130
+ error:
131
+ (void) evoasm_program_destroy_(cloned_program, i);
132
+ return false;
133
+ }
134
+ #endif
135
+
136
+ evoasm_buf_t *
137
+ evoasm_program_get_buf(evoasm_program_t *program, bool body) {
138
+ if(body) {
139
+ return program->body_buf;
140
+ } else {
141
+ return program->buf;
142
+ }
143
+ }
144
+
145
+ size_t
146
+ evoasm_program_get_size(evoasm_program_t *program) {
147
+ return program->size;
148
+ }
149
+
150
+ size_t
151
+ evoasm_program_get_kernel_code(evoasm_program_t *program, size_t kernel_idx, const uint8_t **code) {
152
+ evoasm_kernel_t *kernel = &program->kernels[kernel_idx];
153
+ size_t len = (size_t) kernel->buf_end - kernel->buf_start;
154
+ *code = program->body_buf->data + kernel->buf_start;
155
+ return len;
156
+ }
157
+
158
+ size_t
159
+ evoasm_program_get_code(evoasm_program_t *program, bool frame, const uint8_t **code) {
160
+ evoasm_buf_t *buf;
161
+ if(frame) {
162
+ buf = program->buf;
163
+ } else {
164
+ buf = program->body_buf;
165
+ }
166
+ *code = buf->data;
167
+ return buf->pos;
168
+ }
169
+
170
+
171
+ int
172
+ evoasm_program_get_jmp_off(evoasm_program_t *program, size_t pos) {
173
+ return program->jmp_offs[pos];
174
+ }
175
+
176
+
177
+ bool
178
+ evoasm_program_is_input_reg(evoasm_program_t *program, size_t kernel_idx, evoasm_reg_id_t reg_id) {
179
+ evoasm_kernel_t *kernel = &program->kernels[kernel_idx];
180
+ switch(program->arch_info->id) {
181
+ case EVOASM_ARCH_X64:
182
+ return kernel->reg_info.x64.regs[reg_id].input;
183
+ default:
184
+ evoasm_assert_not_reached();
185
+ }
186
+ }
187
+
188
+ bool
189
+ evoasm_program_is_output_reg(evoasm_program_t *program, size_t kernel_idx, evoasm_reg_id_t reg_id) {
190
+ evoasm_kernel_t *kernel = &program->kernels[kernel_idx];
191
+ switch(program->arch_info->id) {
192
+ case EVOASM_ARCH_X64:
193
+ return kernel->reg_info.x64.regs[reg_id].output;
194
+ default:
195
+ evoasm_assert_not_reached();
196
+ }
197
+ }
198
+
199
+ #define EVOASM_PROGRAM_TMP_REG_X64 EVOASM_X64_REG_14
200
+
201
+ static evoasm_success_t
202
+ evoasm_program_x64_emit_rflags_reset(evoasm_program_t *program) {
203
+ evoasm_x64_params_t params = {0};
204
+ evoasm_buf_t *buf = program->buf;
205
+
206
+ evoasm_log_debug("emitting RFLAGS reset");
207
+ EVOASM_X64_ENC(pushfq);
208
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, EVOASM_X64_REG_SP);
209
+ EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, 0);
210
+ EVOASM_X64_ENC(mov_rm64_imm32);
211
+ EVOASM_X64_ENC(popfq);
212
+
213
+ return true;
214
+ enc_failed:
215
+ return false;
216
+ }
217
+
218
+ static evoasm_used evoasm_success_t
219
+ evoasm_program_x64_emit_mxcsr_reset(evoasm_program_t *program) {
220
+ static uint32_t default_mxcsr_val = 0x1f80;
221
+ evoasm_x64_params_t params = {0};
222
+ evoasm_buf_t *buf = program->buf;
223
+
224
+ evoasm_param_val_t addr_imm = (evoasm_param_val_t) (uintptr_t) &default_mxcsr_val;
225
+ evoasm_x64_reg_id_t reg_tmp0 = EVOASM_PROGRAM_TMP_REG_X64;
226
+
227
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, reg_tmp0);
228
+ EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, addr_imm);
229
+ EVOASM_X64_ENC(mov_r32_imm32);
230
+
231
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, reg_tmp0);
232
+ EVOASM_X64_ENC(ldmxcsr_m32);
233
+
234
+ return true;
235
+ enc_failed:
236
+ return false;
237
+ }
238
+
239
+
240
+ static evoasm_success_t
241
+ evoasm_program_x64_emit_output_store(evoasm_program_t *program,
242
+ size_t tuple_idx) {
243
+
244
+ evoasm_x64_params_t params = {0};
245
+ evoasm_kernel_t *kernel = &program->kernels[program->size - 1];
246
+ evoasm_buf_t *buf = program->buf;
247
+
248
+ for(size_t i = 0; i < kernel->n_output_regs; i++) {
249
+ evoasm_x64_reg_id_t reg_id = kernel->output_regs.x64[i];
250
+ evoasm_program_io_val_t *val_addr = &program->output_vals[(tuple_idx * kernel->n_output_regs) + i];
251
+ evoasm_x64_reg_type_t reg_type = evoasm_x64_get_reg_type(reg_id);
252
+
253
+ evoasm_param_val_t addr_imm = (evoasm_param_val_t) (uintptr_t) val_addr;
254
+
255
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, EVOASM_PROGRAM_TMP_REG_X64);
256
+ EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, addr_imm);
257
+ EVOASM_X64_ENC(mov_r64_imm64);
258
+
259
+ switch(reg_type) {
260
+ case EVOASM_X64_REG_TYPE_GP: {
261
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG1, reg_id);
262
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, EVOASM_PROGRAM_TMP_REG_X64);
263
+ EVOASM_X64_ENC(mov_rm64_r64);
264
+ break;
265
+ }
266
+ case EVOASM_X64_REG_TYPE_XMM: {
267
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG1, reg_id);
268
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, EVOASM_PROGRAM_TMP_REG_X64);
269
+ EVOASM_X64_ENC(movsd_xmmm64_xmm);
270
+ break;
271
+ }
272
+ default: {
273
+ evoasm_assert_not_reached();
274
+ }
275
+ }
276
+ }
277
+
278
+ return true;
279
+
280
+ enc_failed:
281
+ return false;
282
+ }
283
+
284
+ static evoasm_x64_reg_id_t
285
+ evoasm_kernel_get_operand_reg_id_x64(evoasm_kernel_t *kernel, evoasm_x64_operand_t *op, size_t inst_idx) {
286
+ evoasm_x64_inst_t *inst = evoasm_x64_inst_((evoasm_x64_inst_id_t) kernel->insts[inst_idx]);
287
+
288
+ if(op->param_idx < inst->n_params) {
289
+ return (evoasm_x64_reg_id_t) evoasm_x64_basic_params_get_(&kernel->params.x64[inst_idx],
290
+ (evoasm_x64_basic_param_id_t) inst->params[op->param_idx].id);
291
+ } else if(op->reg_id < EVOASM_X64_REG_NONE) {
292
+ return (evoasm_x64_reg_id_t) op->reg_id;
293
+ } else {
294
+ evoasm_assert_not_reached();
295
+ return EVOASM_X64_REG_NONE;
296
+ }
297
+ }
298
+
299
+ typedef struct {
300
+ evoasm_bitmap512_t mask;
301
+ } evoasm_x64_reg_liveness_t;
302
+
303
+ static void
304
+ evoasm_x64_reg_liveness_or_mask(evoasm_x64_inst_t *inst, evoasm_x64_operand_t *op, evoasm_x64_basic_params_t *params,
305
+ evoasm_bitmap512_t *mask512) {
306
+ evoasm_bitmap_t *mask = (evoasm_bitmap_t *) mask512;
307
+ switch(op->word) {
308
+ case EVOASM_X64_OPERAND_WORD_LB:
309
+ if(!op->implicit && op->param_idx < inst->n_params &&
310
+ (
311
+ (inst->params[op->param_idx].id == EVOASM_X64_BASIC_PARAM_REG0 && params->reg0_high_byte)
312
+ ||
313
+ (inst->params[op->param_idx].id == EVOASM_X64_BASIC_PARAM_REG1 && params->reg1_high_byte)
314
+ )) {
315
+ goto hb;
316
+ }
317
+ evoasm_bitmap_or64(mask, 0, 0x00ffu);
318
+ break;
319
+ case EVOASM_X64_OPERAND_WORD_HB: {
320
+ hb:
321
+ evoasm_bitmap_or64(mask, 0, 0xff00u);
322
+ break;
323
+ }
324
+ case EVOASM_X64_OPERAND_WORD_W:
325
+ evoasm_bitmap_or64(mask, 0, 0xffffu);
326
+ break;
327
+ case EVOASM_X64_OPERAND_WORD_DW:
328
+ /* 32bit writes clear the whole register */
329
+ if(op->reg_type == EVOASM_X64_REG_TYPE_GP) {
330
+ evoasm_bitmap_or64(mask, 0, 0xffffffffffffffffull);
331
+ } else {
332
+ /* xmm[0..31] does this for example */
333
+ evoasm_bitmap_or64(mask, 0, 0xffffffffu);
334
+ }
335
+ break;
336
+ case EVOASM_X64_OPERAND_WORD_LQW:
337
+ evoasm_bitmap_or64(mask, 0, 0xffffffffffffffffull);
338
+ break;
339
+ case EVOASM_X64_OPERAND_WORD_HQW:
340
+ evoasm_bitmap_or64(mask, 1, 0xffffffffffffffffull);
341
+ break;
342
+ case EVOASM_X64_OPERAND_WORD_DQW:
343
+ evoasm_bitmap_or64(mask, 0, 0xffffffffffffffffull);
344
+ evoasm_bitmap_or64(mask, 1, 0xffffffffffffffffull);
345
+ break;
346
+ case EVOASM_X64_OPERAND_WORD_VW:
347
+ evoasm_bitmap_or64(mask, 0, 0xffffffffffffffffull);
348
+ evoasm_bitmap_or64(mask, 1, 0xffffffffffffffffull);
349
+ evoasm_bitmap_or64(mask, 2, 0xffffffffffffffffull);
350
+ evoasm_bitmap_or64(mask, 3, 0xffffffffffffffffull);
351
+ break;
352
+ default:
353
+ evoasm_assert_not_reached();
354
+ }
355
+ }
356
+
357
+ static void
358
+ evoasm_x64_reg_liveness_update(evoasm_x64_reg_liveness_t *reg_liveness, evoasm_x64_inst_t *inst,
359
+ evoasm_x64_operand_t *op, evoasm_x64_basic_params_t *params) {
360
+ evoasm_x64_reg_liveness_or_mask(inst, op, params, &reg_liveness->mask);
361
+ }
362
+
363
+
364
+ static bool
365
+ evoasm_x64_reg_liveness_is_dirty_read_(evoasm_x64_reg_liveness_t *reg_liveness,
366
+ evoasm_bitmap512_t *mask) {
367
+
368
+ evoasm_bitmap512_andn(mask, &reg_liveness->mask, mask);
369
+ return !evoasm_bitmap512_is_zero(mask);
370
+ }
371
+
372
+ static bool
373
+ evoasm_x64_reg_liveness_is_dirty_read(evoasm_x64_reg_liveness_t *reg_liveness, evoasm_x64_inst_t *inst,
374
+ evoasm_x64_operand_t *op,
375
+ evoasm_x64_basic_params_t *params) {
376
+
377
+ evoasm_bitmap512_t mask = {0};
378
+ evoasm_x64_reg_liveness_or_mask(inst, op, params, &mask);
379
+
380
+ return evoasm_x64_reg_liveness_is_dirty_read_(reg_liveness, &mask);
381
+ }
382
+
383
+
384
+ static bool
385
+ evoasm_kernel_is_writing_inst_x64(evoasm_kernel_t *kernel, size_t inst_idx, evoasm_reg_id_t reg_id,
386
+ evoasm_x64_reg_liveness_t *reg_liveness) {
387
+ evoasm_x64_inst_t *x64_inst = evoasm_x64_inst_((evoasm_x64_inst_id_t) kernel->insts[inst_idx]);
388
+
389
+ for(size_t i = 0; i < x64_inst->n_operands; i++) {
390
+ evoasm_x64_operand_t *op = &x64_inst->operands[i];
391
+ evoasm_x64_reg_id_t op_reg_id = evoasm_kernel_get_operand_reg_id_x64(kernel, op, inst_idx);
392
+ evoasm_x64_basic_params_t *x64_basic_params = &kernel->params.x64[inst_idx];
393
+
394
+ if(op->written && op_reg_id == reg_id &&
395
+ evoasm_x64_reg_liveness_is_dirty_read(reg_liveness, x64_inst, op, x64_basic_params)) {
396
+ evoasm_x64_reg_liveness_update(reg_liveness, x64_inst, op, x64_basic_params);
397
+ return true;
398
+ }
399
+ }
400
+ return false;
401
+ }
402
+
403
+
404
+ static void
405
+ evoasm_x64_reg_liveness_init(evoasm_x64_reg_liveness_t *reg_liveness) {
406
+ static evoasm_x64_reg_liveness_t zero_reg_liveness = {0};
407
+ *reg_liveness = zero_reg_liveness;
408
+ }
409
+
410
+ static evoasm_success_t
411
+ evoasm_program_x64_prepare_kernel(evoasm_program_t *program, evoasm_kernel_t *kernel) {
412
+ /* NOTE: output register are register that are written to
413
+ * _input registers are register that are read from without
414
+ * a previous write
415
+ */
416
+ evoasm_x64_reg_liveness_t reg_livenesses[EVOASM_X64_REG_NONE];
417
+ for(int i = 0; i < EVOASM_X64_REG_NONE; i++) {
418
+ evoasm_x64_reg_liveness_init(&reg_livenesses[i]);
419
+ }
420
+
421
+ kernel->n_input_regs = 0;
422
+ kernel->n_output_regs = 0;
423
+
424
+ static evoasm_kernel_reg_info_t zero_reg_info = {0};
425
+ kernel->reg_info = zero_reg_info;
426
+
427
+ /* First, handle read ops, so that writing ops do not disturb us */
428
+ for(size_t i = 0; i < kernel->size; i++) {
429
+ evoasm_x64_inst_t *x64_inst = evoasm_x64_inst_((evoasm_x64_inst_id_t) kernel->insts[i]);
430
+ evoasm_x64_basic_params_t *x64_basic_params = &kernel->params.x64[i];
431
+
432
+ for(size_t j = 0; j < x64_inst->n_operands; j++) {
433
+ evoasm_x64_operand_t *op = &x64_inst->operands[j];
434
+
435
+ if((op->read || op->maybe_written) &&
436
+ (op->type == EVOASM_X64_OPERAND_TYPE_REG || op->type == EVOASM_X64_OPERAND_TYPE_RM)) {
437
+
438
+ if(op->reg_type == EVOASM_X64_REG_TYPE_RFLAGS) {
439
+ program->reset_rflags = true;
440
+ } else {
441
+ evoasm_x64_reg_id_t reg_id = evoasm_kernel_get_operand_reg_id_x64(kernel, op, (uint16_t) i);
442
+ evoasm_kernel_x64_reg_info_reg_t *reg_info = &kernel->reg_info.x64.regs[reg_id];
443
+ evoasm_x64_reg_liveness_t *reg_liveness = &reg_livenesses[reg_id];
444
+
445
+ if(!reg_info->input) {
446
+ // has not been written before, might contain garbage
447
+ bool dirty_read;
448
+
449
+ /* the writer rank check is needed for the following case
450
+ * inst regX (operand1, written), regX (operand2, read)
451
+ *
452
+ * The first operand marks regX as written. The read in the second
453
+ * operand, however, is dirty, since the write has not yet occurred at this point.
454
+ */
455
+
456
+ if(reg_info->written) {
457
+ dirty_read = evoasm_x64_reg_liveness_is_dirty_read(reg_liveness, x64_inst, op, x64_basic_params);
458
+ } else {
459
+ dirty_read = true;
460
+ }
461
+
462
+ if(dirty_read) {
463
+ reg_info->input = true;
464
+ kernel->n_input_regs++;
465
+ }
466
+ }
467
+ }
468
+ }
469
+ }
470
+
471
+ for(size_t j = 0; j < x64_inst->n_operands; j++) {
472
+ evoasm_x64_operand_t *op = &x64_inst->operands[j];
473
+
474
+ if(op->written && (op->type == EVOASM_X64_OPERAND_TYPE_REG || op->type == EVOASM_X64_OPERAND_TYPE_RM)) {
475
+
476
+ if(op->reg_type == EVOASM_X64_REG_TYPE_RFLAGS) {
477
+ kernel->reg_info.x64.written_flags =
478
+ (kernel->reg_info.x64.written_flags | op->written_flags) & EVOASM_X64_RFLAGS_FLAGS_BITSIZE;
479
+ kernel->reg_info.x64.regs[EVOASM_X64_REG_RFLAGS].written = true;
480
+ } else {
481
+ evoasm_x64_reg_id_t reg_id = evoasm_kernel_get_operand_reg_id_x64(kernel, op, (uint16_t) i);
482
+ evoasm_kernel_x64_reg_info_reg_t *reg_info = &kernel->reg_info.x64.regs[reg_id];
483
+ evoasm_x64_reg_liveness_t *reg_liveness = &reg_livenesses[reg_id];
484
+
485
+ if(!reg_info->written) {
486
+ reg_info->written = true;
487
+ reg_info->output = true;
488
+ kernel->output_regs.x64[kernel->n_output_regs] = reg_id;
489
+ kernel->n_output_regs++;
490
+ }
491
+
492
+ evoasm_x64_reg_liveness_update(reg_liveness, x64_inst, op, x64_basic_params);
493
+ }
494
+ }
495
+ }
496
+ }
497
+
498
+ for(int i = 0; i < kernel->n_output_regs; i++) {
499
+ evoasm_x64_reg_id_t reg_id = kernel->output_regs.x64[i];
500
+ evoasm_kernel_x64_reg_info_reg_t *reg_info = &kernel->reg_info.x64.regs[reg_id];
501
+ if(!reg_info->input) {
502
+ evoasm_x64_reg_liveness_t *reg_liveness = &reg_livenesses[reg_id];
503
+
504
+ evoasm_bitmap512_t mask = {0};
505
+
506
+ switch(evoasm_x64_get_reg_type(reg_id)) {
507
+ case EVOASM_X64_REG_TYPE_GP:
508
+ evoasm_bitmap_set64((evoasm_bitmap_t *) &mask, 0, 0xffffffffffffffffull);
509
+ break;
510
+ case EVOASM_X64_REG_TYPE_XMM:
511
+ case EVOASM_X64_REG_TYPE_ZMM:
512
+ evoasm_bitmap_set64((evoasm_bitmap_t *) &mask, 0, 0xffffffffffffffffull);
513
+ evoasm_bitmap_set64((evoasm_bitmap_t *) &mask, 1, 0xffffffffffffffffull);
514
+ evoasm_bitmap_set64((evoasm_bitmap_t *) &mask, 2, 0xffffffffffffffffull);
515
+ evoasm_bitmap_set64((evoasm_bitmap_t *) &mask, 3, 0xffffffffffffffffull);
516
+ break;
517
+ default:
518
+ evoasm_assert_not_reached();
519
+ }
520
+
521
+ bool dirty_read = evoasm_x64_reg_liveness_is_dirty_read_(reg_liveness, &mask);
522
+ if(dirty_read) {
523
+ reg_info->input = true;
524
+ kernel->n_input_regs++;
525
+ }
526
+ }
527
+ }
528
+
529
+ #ifdef EVOASM_ENABLE_PARANOID_MODE
530
+ {
531
+ size_t n_input_regs = 0;
532
+ for(evoasm_x64_reg_id_t i = (evoasm_x64_reg_id_t) 0; i < EVOASM_X64_REG_NONE; i++) {
533
+ if(kernel->reg_info.x64.regs[i].input) n_input_regs++;
534
+ }
535
+ assert(n_input_regs == kernel->n_input_regs);
536
+ }
537
+ #endif
538
+
539
+ assert(kernel->n_output_regs <= EVOASM_KERNEL_MAX_OUTPUT_REGS);
540
+ assert(kernel->n_input_regs <= EVOASM_KERNEL_MAX_INPUT_REGS);
541
+
542
+ if(kernel->n_output_regs == 0) {
543
+ evoasm_error(EVOASM_ERROR_TYPE_PROGRAM, EVOASM_PROGRAM_ERROR_CODE_NO_OUTPUT, NULL);
544
+ return false;
545
+ }
546
+
547
+ return true;
548
+ }
549
+
550
+ static evoasm_success_t
551
+ evoasm_program_x64_prepare(evoasm_program_t *program) {
552
+ for(size_t i = 0; i < program->size; i++) {
553
+ evoasm_kernel_t *kernel = &program->kernels[i];
554
+ EVOASM_TRY(error, evoasm_program_x64_prepare_kernel, program, kernel);
555
+ }
556
+
557
+ return true;
558
+
559
+ error:
560
+ return false;
561
+
562
+ }
563
+
564
+
565
+ static evoasm_success_t
566
+ evoasm_program_x64_emit_input_reg_load(evoasm_x64_reg_id_t input_reg_id,
567
+ evoasm_buf_t *buf,
568
+ evoasm_program_io_val_t *tuple,
569
+ evoasm_program_io_val_t *loaded_tuple,
570
+ bool force_load) {
571
+
572
+ evoasm_x64_reg_type_t reg_type = evoasm_x64_get_reg_type(input_reg_id);
573
+ evoasm_x64_params_t params = {0};
574
+
575
+ evoasm_log_debug("emitting _input register initialization of register %d to value %"
576
+ PRId64, input_reg_id, tuple->i64);
577
+
578
+ switch(reg_type) {
579
+ case EVOASM_X64_REG_TYPE_GP: {
580
+ if(force_load) {
581
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, EVOASM_PROGRAM_TMP_REG_X64);
582
+ EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, (evoasm_param_val_t) (uintptr_t) &tuple->i64);
583
+ EVOASM_X64_ENC(mov_r64_imm64);
584
+
585
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
586
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, EVOASM_PROGRAM_TMP_REG_X64);
587
+ EVOASM_X64_ENC(mov_r64_rm64);
588
+ } else {
589
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
590
+ /*FIXME: hard-coded tuple type */
591
+ EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, (evoasm_param_val_t) tuple->i64);
592
+ EVOASM_X64_ENC(mov_r64_imm64);
593
+ }
594
+ break;
595
+ }
596
+ case EVOASM_X64_REG_TYPE_XMM: {
597
+ /* load address of tuple into tmp_reg */
598
+ if(loaded_tuple != tuple) {
599
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, EVOASM_PROGRAM_TMP_REG_X64);
600
+ EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, (evoasm_param_val_t) (uintptr_t) &tuple->f64);
601
+ EVOASM_X64_ENC(mov_r64_imm64);
602
+ loaded_tuple = tuple;
603
+ }
604
+
605
+ /* load into xmm via address in tmp_reg */
606
+ /*FIXME: hard-coded tuple type */
607
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
608
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, EVOASM_PROGRAM_TMP_REG_X64);
609
+ EVOASM_X64_ENC(movsd_xmm_xmmm64);
610
+ break;
611
+ }
612
+ default:
613
+ evoasm_log_fatal("non-gpr register type (%d) (unimplemented)", reg_type);
614
+ evoasm_assert_not_reached();
615
+ }
616
+
617
+ return true;
618
+
619
+ enc_failed:
620
+ return false;
621
+ }
622
+
623
+
624
+ static evoasm_success_t
625
+ evoasm_program_x64_emit_input_load(evoasm_program_t *program,
626
+ evoasm_program_io_val_t *input_vals,
627
+ evoasm_program_io_val_type_t *types,
628
+ size_t in_arity,
629
+ bool set_io_mapping) {
630
+
631
+
632
+ evoasm_program_io_val_t *loaded_tuple = NULL;
633
+ evoasm_buf_t *buf = program->buf;
634
+ evoasm_kernel_t *kernel = &program->kernels[0];
635
+
636
+ evoasm_log_debug("n _input regs %d", kernel->n_input_regs);
637
+ #if 0
638
+ for(input_reg_id = (evoasm_x64_reg_id_t) 13; input_reg_id < 19; input_reg_id++) {
639
+ if(input_reg_id == EVOASM_X64_REG_SP) continue;
640
+ evoasm_x64_params_t params = {0};
641
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
642
+ /*FIXME: hard-coded tuple type */
643
+ EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, 0);
644
+ EVOASM_X64_ENC(mov_r64_imm64);
645
+ }
646
+ #endif
647
+
648
+ {
649
+ size_t input_reg_idx = 0;
650
+ for(evoasm_x64_reg_id_t input_reg = (evoasm_x64_reg_id_t) 0; input_reg < EVOASM_X64_REG_NONE; input_reg++) {
651
+ if(!kernel->reg_info.x64.regs[input_reg].input) continue;
652
+
653
+ size_t tuple_idx;
654
+
655
+ if(set_io_mapping) {
656
+ tuple_idx = input_reg_idx++ % in_arity;
657
+ program->reg_inputs.x64[input_reg] = (uint8_t) tuple_idx;
658
+ } else {
659
+ tuple_idx = program->reg_inputs.x64[input_reg];
660
+ }
661
+
662
+ evoasm_program_io_val_t *tuple = &input_vals[tuple_idx];
663
+ EVOASM_TRY(error, evoasm_program_x64_emit_input_reg_load, input_reg, buf, tuple, loaded_tuple, false);
664
+ }
665
+ }
666
+
667
+ #ifdef EVOASM_ENABLE_PARANOID_MODE
668
+ EVOASM_TRY(error, evoasm_x64_emit_push, EVOASM_PROGRAM_TMP_REG_X64, buf);
669
+ for(evoasm_x64_reg_id_t non_input_reg = (evoasm_x64_reg_id_t) EVOASM_X64_REG_A;
670
+ non_input_reg < EVOASM_X64_REG_15; non_input_reg++) {
671
+ if(kernel->reg_info.x64.regs[non_input_reg].input) continue;
672
+ if(non_input_reg == EVOASM_X64_REG_SP) continue;
673
+
674
+ evoasm_program_io_val_t *tuple = &kernel->rand_vals[non_input_reg];
675
+ EVOASM_TRY(error, evoasm_program_x64_emit_input_reg_load, non_input_reg, buf, tuple, NULL, true);
676
+ }
677
+ EVOASM_TRY(error, evoasm_x64_emit_pop, EVOASM_PROGRAM_TMP_REG_X64, buf);
678
+ #endif
679
+
680
+ if(program->reset_rflags) {
681
+ EVOASM_TRY(error, evoasm_program_x64_emit_rflags_reset, program);
682
+ }
683
+ return true;
684
+
685
+ error:
686
+ return false;
687
+ }
688
+
689
+ static evoasm_success_t
690
+ evoasm_program_x64_emit_kernel_transition(evoasm_program_t *program,
691
+ evoasm_kernel_t *from_kernel,
692
+ evoasm_kernel_t *to_kernel,
693
+ evoasm_buf_t *buf,
694
+ size_t trans_idx,
695
+ bool set_io_mapping) {
696
+ size_t input_reg_idx;
697
+ evoasm_x64_reg_id_t input_reg_id;
698
+
699
+ assert(from_kernel->n_output_regs > 0);
700
+
701
+ for(input_reg_id = (evoasm_x64_reg_id_t) 0, input_reg_idx = 0; input_reg_id < EVOASM_X64_REG_NONE; input_reg_id++) {
702
+ if(!to_kernel->reg_info.x64.regs[input_reg_id].input) continue;
703
+
704
+ evoasm_x64_reg_id_t output_reg_id;
705
+
706
+ if(set_io_mapping) {
707
+ size_t output_reg_idx = input_reg_idx % from_kernel->n_output_regs;
708
+ output_reg_id = from_kernel->output_regs.x64[output_reg_idx];
709
+
710
+ from_kernel->reg_info.x64.trans_regs[trans_idx][input_reg_id] = output_reg_id;
711
+ } else {
712
+ output_reg_id = from_kernel->reg_info.x64.trans_regs[trans_idx][input_reg_id];
713
+ }
714
+
715
+ evoasm_x64_reg_type_t output_reg_type = evoasm_x64_get_reg_type(output_reg_id);
716
+ evoasm_x64_reg_type_t input_reg_type = evoasm_x64_get_reg_type(input_reg_id);
717
+ evoasm_x64_params_t params = {0};
718
+
719
+ if(input_reg_id != output_reg_id) {
720
+ if(output_reg_type == EVOASM_X64_REG_TYPE_GP &&
721
+ input_reg_type == EVOASM_X64_REG_TYPE_GP) {
722
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
723
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG1, output_reg_id);
724
+ EVOASM_X64_ENC(mov_r64_rm64);
725
+ } else if(output_reg_type == EVOASM_X64_REG_TYPE_XMM &&
726
+ input_reg_type == EVOASM_X64_REG_TYPE_XMM) {
727
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
728
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG1, output_reg_id);
729
+ if(program->arch_info->features & EVOASM_X64_FEATURE_AVX) {
730
+ EVOASM_X64_ENC(vmovdqa_ymm_ymmm256);
731
+ } else {
732
+ EVOASM_X64_ENC(movdqa_xmm_xmmm128);
733
+ }
734
+ } else if(output_reg_type == EVOASM_X64_REG_TYPE_GP &&
735
+ input_reg_type == EVOASM_X64_REG_TYPE_XMM) {
736
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
737
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG1, output_reg_id);
738
+ if(program->arch_info->features & EVOASM_X64_FEATURE_AVX) {
739
+ EVOASM_X64_ENC(vmovq_xmm_rm64);
740
+ } else {
741
+ EVOASM_X64_ENC(movq_xmm_rm64);
742
+ }
743
+ } else if(output_reg_type == EVOASM_X64_REG_TYPE_XMM &&
744
+ input_reg_type == EVOASM_X64_REG_TYPE_GP) {
745
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
746
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG1, output_reg_id);
747
+ if(program->arch_info->features & EVOASM_X64_FEATURE_AVX) {
748
+ EVOASM_X64_ENC(vmovq_rm64_xmm);
749
+ } else {
750
+ EVOASM_X64_ENC(movq_rm64_xmm);
751
+ }
752
+ } else {
753
+ evoasm_assert_not_reached();
754
+ }
755
+ }
756
+ input_reg_idx++;
757
+ }
758
+
759
+ return true;
760
+
761
+ enc_failed:
762
+ return false;
763
+ }
764
+
765
+ #define EVOASM_BUF_PHI_GET(buf) ((uint32_t *)((buf)->data + (buf)->pos - 4))
766
+ #define EVOASM_BUF_PHI_SET(label, val) \
767
+ do { (*(label) = (uint32_t)((uint8_t *)(val) - ((uint8_t *)(label) + 4)));} while(0);
768
+ #define EVOASM_BUF_POS_ADDR(buf) (buf->data + buf->pos)
769
+
770
+ #define EVOASM_PROGRAM_X64_N_JMP_INSTS 16
771
+
772
+ static evoasm_success_t
773
+ evoasm_program_x64_emit_kernel_transitions(evoasm_program_t *program,
774
+ evoasm_kernel_t *kernel,
775
+ evoasm_kernel_t *next_kernel,
776
+ evoasm_kernel_t *branch_kernel,
777
+ evoasm_buf_t *buf,
778
+ uint32_t **branch_kernel_phi,
779
+ bool set_io_mapping) {
780
+
781
+ static const evoasm_x64_inst_id_t jmp_insts[] = {
782
+ EVOASM_X64_INST_JA_REL32, // 0
783
+ EVOASM_X64_INST_JAE_REL32, // 1
784
+ EVOASM_X64_INST_JB_REL32, // 2
785
+ EVOASM_X64_INST_JBE_REL32, // 3
786
+ EVOASM_X64_INST_JE_REL32, // 4
787
+ EVOASM_X64_INST_JG_REL32, // 5
788
+ EVOASM_X64_INST_JGE_REL32, // 6
789
+ EVOASM_X64_INST_JL_REL32, // 7
790
+ EVOASM_X64_INST_JLE_REL32, // 8
791
+ EVOASM_X64_INST_JNE_REL32, // 9
792
+ EVOASM_X64_INST_JNO_REL32, // 10
793
+ EVOASM_X64_INST_JNP_REL32, // 11
794
+ EVOASM_X64_INST_JNS_REL32, // 12
795
+ EVOASM_X64_INST_JO_REL32, // 13
796
+ EVOASM_X64_INST_JP_REL32, // 14
797
+ EVOASM_X64_INST_JS_REL32, // 15
798
+ };
799
+
800
+ evoasm_x64_params_t params = {0};
801
+ uint32_t *branch_phi = NULL;
802
+ uint32_t *counter_phi = NULL;
803
+
804
+ if(program->recur_limit == 0) goto next_transition;
805
+
806
+ evoasm_inst_id_t jmp_inst_id = jmp_insts[program->jmp_conds[kernel->idx] % EVOASM_PROGRAM_X64_N_JMP_INSTS];
807
+
808
+ if(kernel->reg_info.x64.regs[EVOASM_X64_REG_RFLAGS].written) {
809
+ if(EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_OF)) {
810
+ if(jmp_inst_id == EVOASM_X64_INST_JO_REL32 || jmp_inst_id == EVOASM_X64_INST_JNO_REL32) goto branch_transition;
811
+ }
812
+
813
+ if(EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_SF)) {
814
+ if(jmp_inst_id == EVOASM_X64_INST_JS_REL32 || jmp_inst_id == EVOASM_X64_INST_JNS_REL32) goto branch_transition;
815
+ }
816
+
817
+ if(EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_ZF)) {
818
+ if(jmp_inst_id == EVOASM_X64_INST_JE_REL32 ||
819
+ jmp_inst_id == EVOASM_X64_INST_JNE_REL32 ||
820
+ jmp_inst_id == EVOASM_X64_INST_JBE_REL32 ||
821
+ jmp_inst_id == EVOASM_X64_INST_JLE_REL32) {
822
+ goto branch_transition;
823
+ }
824
+ }
825
+
826
+ if(EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_CF)) {
827
+ if(jmp_inst_id == EVOASM_X64_INST_JB_REL32 ||
828
+ jmp_inst_id == EVOASM_X64_INST_JAE_REL32 ||
829
+ jmp_inst_id == EVOASM_X64_INST_JBE_REL32) {
830
+ goto branch_transition;
831
+ }
832
+ }
833
+
834
+ if((EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_ZF)) &&
835
+ (EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_CF))) {
836
+ if(jmp_inst_id == EVOASM_X64_INST_JA_REL32) goto branch_transition;
837
+ }
838
+
839
+ if((EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_SF)) &&
840
+ (EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_OF))) {
841
+
842
+ if(jmp_inst_id == EVOASM_X64_INST_JL_REL32 ||
843
+ jmp_inst_id == EVOASM_X64_INST_JGE_REL32 ||
844
+ jmp_inst_id == EVOASM_X64_INST_JLE_REL32 ||
845
+ ((EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_ZF)) &&
846
+ jmp_inst_id == EVOASM_X64_INST_JG_REL32)) {
847
+ goto branch_transition;
848
+ }
849
+ }
850
+
851
+ if(EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_PF)) {
852
+ if(jmp_inst_id == EVOASM_X64_INST_JP_REL32 || jmp_inst_id == EVOASM_X64_INST_JNP_REL32) goto branch_transition;
853
+ }
854
+ }
855
+ /* kernel does not write to required jump flag, ignore jmp_off and emit next kernel */
856
+ goto next_transition;
857
+
858
+ #if 0
859
+ /*FIXME: only 8bit possible, check and activate if feasable*/
860
+ if(kernel->reg_info.x64.regs[EVOASM_X64_REG_C].written) {
861
+ jmp_insts[possible_jmp_insts_len++] = EVOASM_X64_INST_JECXZ_JRCXZ_REL8;
862
+ }
863
+ #endif
864
+
865
+ branch_transition:
866
+ {
867
+ evoasm_buf_ref_t buf_ref = {
868
+ .data = buf->data,
869
+ .pos = &buf->pos
870
+ };
871
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REL, 0xdeadbeef);
872
+ EVOASM_TRY(error, evoasm_x64_enc_, (evoasm_x64_inst_id_t) jmp_inst_id, &params, &buf_ref);
873
+ branch_phi = EVOASM_BUF_PHI_GET(buf);
874
+ assert(*branch_phi == 0xdeadbeef);
875
+
876
+ if(branch_kernel->idx <= kernel->idx) {
877
+ /* back jump, guard with counter */
878
+
879
+ uint32_t *counter = &program->recur_counters[kernel->idx];
880
+ uintptr_t addr_imm = (uintptr_t) counter;
881
+
882
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, EVOASM_PROGRAM_TMP_REG_X64);
883
+ EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, (evoasm_param_val_t) addr_imm);
884
+ EVOASM_X64_ENC(mov_r64_imm64);
885
+
886
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, EVOASM_PROGRAM_TMP_REG_X64);
887
+ EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, program->recur_limit);
888
+ EVOASM_X64_ENC(cmp_rm32_imm32);
889
+
890
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REL, 0xdeadbeef);
891
+ EVOASM_X64_ENC(jge_rel32);
892
+
893
+ counter_phi = EVOASM_BUF_PHI_GET(buf);
894
+ assert(*counter_phi == 0xdeadbeef);
895
+
896
+ EVOASM_X64_ENC(inc_rm32);
897
+ }
898
+
899
+ EVOASM_TRY(error, evoasm_program_x64_emit_kernel_transition, program,
900
+ kernel, branch_kernel, buf, 1, set_io_mapping);
901
+
902
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REL, 0xdeadbeef);
903
+ EVOASM_X64_ENC(jmp_rel32);
904
+
905
+ *branch_kernel_phi = EVOASM_BUF_PHI_GET(buf);
906
+ assert(**branch_kernel_phi == 0xdeadbeef);
907
+
908
+ if(branch_phi != NULL) {
909
+ EVOASM_BUF_PHI_SET(branch_phi, EVOASM_BUF_POS_ADDR(buf));
910
+ }
911
+
912
+ if(counter_phi != NULL) {
913
+ EVOASM_BUF_PHI_SET(counter_phi, EVOASM_BUF_POS_ADDR(buf));
914
+ }
915
+ }
916
+
917
+ next_transition:
918
+ if(next_kernel != NULL) {
919
+ EVOASM_TRY(error, evoasm_program_x64_emit_kernel_transition, program,
920
+ kernel, next_kernel, buf, 0, set_io_mapping);
921
+ }
922
+
923
+ evoasm_buf_log(buf, EVOASM_LOG_LEVEL_DEBUG);
924
+
925
+ return true;
926
+
927
+
928
+ error:
929
+ enc_failed:
930
+ return false;
931
+ }
932
+
933
+
934
+ static evoasm_success_t
935
+ evoasm_program_x64_emit_kernel(evoasm_program_t *program, evoasm_kernel_t *kernel, evoasm_buf_t *buf) {
936
+ evoasm_buf_ref_t buf_ref = {
937
+ .data = buf->data,
938
+ .pos = &buf->pos
939
+ };
940
+
941
+ assert(kernel->size > 0);
942
+ for(size_t i = 0; i < kernel->size; i++) {
943
+ evoasm_x64_inst_t *inst = evoasm_x64_inst_((evoasm_x64_inst_id_t) kernel->insts[i]);
944
+ program->exception_mask = program->exception_mask | inst->exceptions;
945
+ EVOASM_TRY(error, evoasm_x64_inst_enc_basic_, inst, &kernel->params.x64[i], &buf_ref);
946
+ }
947
+ return true;
948
+ error:
949
+ return false;
950
+ }
951
+
952
+
953
+ static size_t
954
+ evoasm_program_branch_kernel_idx(evoasm_program_t *program, size_t idx) {
955
+ return (size_t) EVOASM_CLAMP((int) idx + program->jmp_offs[idx], 0, program->size - 1);
956
+ }
957
+
958
+ static evoasm_success_t
959
+ evoasm_program_x64_emit_program_kernels(evoasm_program_t *program, bool set_io_mapping) {
960
+ evoasm_buf_t *buf = program->body_buf;
961
+ evoasm_kernel_t *kernel, *next_kernel, *branch_kernel;
962
+ size_t program_size = program->size;
963
+ uint32_t *branch_phis[EVOASM_PROGRAM_MAX_SIZE] = {0};
964
+ uint8_t *kernel_addrs[EVOASM_PROGRAM_MAX_SIZE];
965
+
966
+ evoasm_buf_reset(buf);
967
+
968
+ assert(program_size > 0);
969
+
970
+ for(size_t i = 0; i < program_size; i++) {
971
+ kernel = &program->kernels[i];
972
+
973
+ kernel_addrs[i] = buf->data + buf->pos;
974
+ kernel->buf_start = (uint16_t) buf->pos;
975
+
976
+ EVOASM_TRY(error, evoasm_program_x64_emit_kernel, program, kernel, buf);
977
+
978
+ if(i < program_size - 1) {
979
+ next_kernel = &program->kernels[i + 1];
980
+ } else {
981
+ next_kernel = NULL;
982
+ }
983
+
984
+ size_t branch_kernel_idx = evoasm_program_branch_kernel_idx(program, i);
985
+ assert(branch_kernel_idx < program->size);
986
+ branch_kernel = &program->kernels[branch_kernel_idx];
987
+
988
+ EVOASM_TRY(error, evoasm_program_x64_emit_kernel_transitions, program, kernel,
989
+ next_kernel, branch_kernel, buf, &branch_phis[i], set_io_mapping);
990
+
991
+ kernel->buf_end = (uint16_t) buf->pos;
992
+ }
993
+
994
+ for(size_t i = 0; i < program_size; i++) {
995
+ size_t branch_kernel_idx = evoasm_program_branch_kernel_idx(program, i);
996
+ uint32_t *branch_phi = branch_phis[i];
997
+ if(branch_phi != NULL) {
998
+ uint8_t *branch_kernel_addr = kernel_addrs[branch_kernel_idx];
999
+ assert(*branch_phi == 0xdeadbeef);
1000
+ EVOASM_BUF_PHI_SET(branch_phi, branch_kernel_addr);
1001
+ }
1002
+ }
1003
+
1004
+ return true;
1005
+ error:
1006
+ return false;
1007
+ }
1008
+
1009
+ static evoasm_success_t
1010
+ evoasm_program_x64_emit_io_load_store(evoasm_program_t *program,
1011
+ evoasm_program_input_t *input,
1012
+ bool io_mapping) {
1013
+ size_t n_tuples = EVOASM_PROGRAM_INPUT_N_TUPLES(input);
1014
+
1015
+ evoasm_buf_reset(program->buf);
1016
+ EVOASM_TRY(error, evoasm_x64_emit_func_prolog, EVOASM_X64_ABI_SYSV, program->buf);
1017
+
1018
+ for(size_t i = 0; i < n_tuples; i++) {
1019
+ evoasm_program_io_val_t *input_vals = input->vals + i * input->arity;
1020
+ EVOASM_TRY(error, evoasm_program_x64_emit_input_load, program, input_vals, input->types, input->arity,
1021
+ io_mapping);
1022
+ size_t r = evoasm_buf_append(program->buf, program->body_buf);
1023
+ assert(r == 0);
1024
+ EVOASM_TRY(error, evoasm_program_x64_emit_output_store, program, i);
1025
+ }
1026
+
1027
+ EVOASM_TRY(error, evoasm_x64_emit_func_epilog, EVOASM_X64_ABI_SYSV, program->buf);
1028
+ return true;
1029
+
1030
+ error:
1031
+ return false;
1032
+ }
1033
+
1034
+ static evoasm_success_t
1035
+ evoasm_program_x64_emit(evoasm_program_t *program,
1036
+ evoasm_program_input_t *input,
1037
+ evoasm_program_emit_flags_t emit_flags) {
1038
+
1039
+ bool set_io_mapping = emit_flags & EVOASM_PROGRAM_EMIT_FLAG_SET_IO_MAPPING;
1040
+
1041
+ if(emit_flags & EVOASM_PROGRAM_EMIT_FLAG_PREPARE) {
1042
+ EVOASM_TRY(error, evoasm_program_x64_prepare, program);
1043
+ }
1044
+
1045
+ if(emit_flags & EVOASM_PROGRAM_EMIT_FLAG_EMIT_KERNELS) {
1046
+ EVOASM_TRY(error, evoasm_program_x64_emit_program_kernels, program, set_io_mapping);
1047
+ }
1048
+
1049
+ if(emit_flags & EVOASM_PROGRAM_EMIT_FLAG_EMIT_IO_LOAD_STORE) {
1050
+ EVOASM_TRY(error, evoasm_program_x64_emit_io_load_store, program, input, set_io_mapping);
1051
+ }
1052
+
1053
+ evoasm_buf_log(program->buf, EVOASM_LOG_LEVEL_DEBUG);
1054
+
1055
+ return true;
1056
+
1057
+ error:
1058
+ return false;
1059
+ }
1060
+
1061
+
1062
+ typedef enum {
1063
+ EVOASM_METRIC_ABSDIFF,
1064
+ EVOASM_METRIC_NONE
1065
+ } evoasm_metric;
1066
+
1067
+ static inline void
1068
+ evoasm_program_update_dist_mat(evoasm_program_t *program,
1069
+ evoasm_kernel_t *kernel,
1070
+ evoasm_program_output_t *output,
1071
+ size_t height,
1072
+ size_t tuple_idx,
1073
+ double *dist_mat,
1074
+ evoasm_metric metric) {
1075
+ size_t width = kernel->n_output_regs;
1076
+ evoasm_program_io_val_t *io_vals = output->vals + tuple_idx * output->arity;
1077
+
1078
+ for(size_t i = 0; i < height; i++) {
1079
+ evoasm_program_io_val_t io_val = io_vals[i];
1080
+ evoasm_program_io_val_type_t tuple_type = output->types[i];
1081
+ double io_val_dbl = evoasm_program_io_val_to_dbl(io_val, tuple_type);
1082
+
1083
+ for(size_t j = 0; j < width; j++) {
1084
+ evoasm_program_io_val_t output_val = program->output_vals[tuple_idx * width + j];
1085
+ //uint8_t output_size = program->output_sizes[j];
1086
+ //switch(output_size) {
1087
+ //
1088
+ //}
1089
+ // FIXME: output is essentially just a bitstring and could be anything
1090
+ // an integer (both, signed or unsigned) a float or double.
1091
+ // Moreover, a portion of the output value could
1092
+ // hold the correct answer (e.g. lower 8 or 16 bits etc.).
1093
+ // For now we use the tuple output type and assume signedness.
1094
+ // This needs to be fixed.
1095
+ double output_val_dbl = evoasm_program_io_val_to_dbl(output_val, tuple_type);
1096
+
1097
+ switch(metric) {
1098
+ default:
1099
+ case EVOASM_METRIC_ABSDIFF: {
1100
+ double dist = fabs(output_val_dbl - io_val_dbl);
1101
+ dist_mat[i * width + j] += dist;
1102
+ break;
1103
+ }
1104
+ }
1105
+ }
1106
+ }
1107
+ }
1108
+
1109
+ static void
1110
+ evoasm_program_log_program_output(evoasm_program_t *program,
1111
+ evoasm_kernel_t *kernel,
1112
+ evoasm_program_output_t *output,
1113
+ uint_fast8_t *const matching,
1114
+ evoasm_log_level_t log_level) {
1115
+
1116
+ size_t n_tuples = EVOASM_PROGRAM_OUTPUT_N_TUPLES(output);
1117
+ size_t height = output->arity;
1118
+ size_t width = kernel->n_output_regs;
1119
+
1120
+ evoasm_log(log_level, EVOASM_LOG_TAG, "OUTPUT MATRICES:\n");
1121
+
1122
+ for(size_t i = 0; i < width; i++) {
1123
+ evoasm_log(log_level, EVOASM_LOG_TAG, " %d ", kernel->output_regs.x64[i]);
1124
+ }
1125
+
1126
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \n\n ");
1127
+
1128
+ for(size_t i = 0; i < n_tuples; i++) {
1129
+ for(size_t j = 0; j < height; j++) {
1130
+ for(size_t k = 0; k < width; k++) {
1131
+ bool matched = matching[j] == k;
1132
+ evoasm_program_io_val_t val = program->output_vals[i * width + k];
1133
+
1134
+ if(matched) {
1135
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \x1b[1m ");
1136
+ }
1137
+ evoasm_log(log_level, EVOASM_LOG_TAG, " %ld (%f)\t ", val.i64, val.f64);
1138
+ if(matched) {
1139
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \x1b[0m ");
1140
+ }
1141
+ }
1142
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \n ");
1143
+ }
1144
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \n\n ");
1145
+ }
1146
+ }
1147
+
1148
+ static void
1149
+ evoasm_program_log_dist_dist_mat(evoasm_program_t *program,
1150
+ evoasm_kernel_t *kernel,
1151
+ size_t height,
1152
+ double *dist_mat,
1153
+ uint_fast8_t *matching,
1154
+ evoasm_log_level_t log_level) {
1155
+
1156
+ size_t width = kernel->n_output_regs;
1157
+
1158
+ evoasm_log(log_level, EVOASM_LOG_TAG, "DIST MATRIX: (%zu, %zu)\n", height, width);
1159
+ for(size_t i = 0; i < height; i++) {
1160
+ for(size_t j = 0; j < width; j++) {
1161
+ if(matching[i] == j) {
1162
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \x1b[1m ");
1163
+ }
1164
+ evoasm_log(log_level, EVOASM_LOG_TAG, " %.2g\t ", dist_mat[i * width + j]);
1165
+ if(matching[i] == j) {
1166
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \x1b[0m ");
1167
+ }
1168
+ }
1169
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \n ");
1170
+ }
1171
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \n\n ");
1172
+ }
1173
+
1174
+
1175
+ static inline bool
1176
+ evoasm_program_match(evoasm_program_t *program,
1177
+ size_t width,
1178
+ double *dist_mat,
1179
+ uint_fast8_t *matching) {
1180
+
1181
+ uint_fast8_t best_index = UINT_FAST8_MAX;
1182
+ double best_dist = INFINITY;
1183
+ uint_fast8_t i;
1184
+
1185
+ for(i = 0; i < width; i++) {
1186
+ double v = dist_mat[i];
1187
+ if(v < best_dist) {
1188
+ best_dist = v;
1189
+ best_index = i;
1190
+ }
1191
+ }
1192
+
1193
+ if(evoasm_likely(best_index != UINT_FAST8_MAX)) {
1194
+ *matching = best_index;
1195
+ return true;
1196
+ } else {
1197
+ /*evoasm_program_log_dist_dist_mat(program,
1198
+ 1,
1199
+ dist_mat,
1200
+ matching,
1201
+ EVOASM_LOG_LEVEL_WARN);
1202
+ evoasm_assert_not_reached();*/
1203
+ /*
1204
+ * Might happen if all elements are inf or nan
1205
+ */
1206
+ return false;
1207
+ }
1208
+ }
1209
+
1210
+ static inline void
1211
+ evoasm_program_calc_stable_matching(evoasm_program_t *program,
1212
+ evoasm_kernel_t *kernel,
1213
+ size_t height,
1214
+ double *dist_mat,
1215
+ uint_fast8_t *matching) {
1216
+
1217
+ uint_fast8_t width = (uint_fast8_t) kernel->n_output_regs;
1218
+ uint_fast8_t *inv_matching = evoasm_alloca(width * sizeof(uint_fast8_t));
1219
+ uint_fast8_t i;
1220
+
1221
+ // calculates a stable matching
1222
+ for(i = 0; i < height; i++) {
1223
+ matching[i] = UINT_FAST8_MAX;
1224
+ }
1225
+
1226
+ for(i = 0; i < width; i++) {
1227
+ inv_matching[i] = UINT_FAST8_MAX;
1228
+ }
1229
+
1230
+ while(true) {
1231
+ uint_fast8_t unmatched_index = UINT_FAST8_MAX;
1232
+ uint_fast8_t best_index = UINT_FAST8_MAX;
1233
+ double best_dist = INFINITY;
1234
+
1235
+ for(i = 0; i < height; i++) {
1236
+ if(matching[i] == UINT_FAST8_MAX) {
1237
+ unmatched_index = i;
1238
+ break;
1239
+ }
1240
+ }
1241
+
1242
+ if(unmatched_index == UINT_FAST8_MAX) {
1243
+ break;
1244
+ }
1245
+
1246
+ for(i = 0; i < width; i++) {
1247
+ double v = dist_mat[unmatched_index * width + i];
1248
+ if(v < best_dist) {
1249
+ best_dist = v;
1250
+ best_index = i;
1251
+ }
1252
+ }
1253
+
1254
+ if(evoasm_likely(best_index != UINT_FAST8_MAX)) {
1255
+ if(inv_matching[best_index] == UINT_FAST8_MAX) {
1256
+ inv_matching[best_index] = unmatched_index;
1257
+ matching[unmatched_index] = best_index;
1258
+ } else {
1259
+ if(dist_mat[inv_matching[best_index] * width + best_index] > best_dist) {
1260
+ matching[inv_matching[best_index]] = UINT_FAST8_MAX;
1261
+ inv_matching[best_index] = unmatched_index;
1262
+ matching[unmatched_index] = best_index;
1263
+ } else {
1264
+ //dist_mat[unmatched_index * width + i] = copysign(best_dist, -1.0);
1265
+ dist_mat[unmatched_index * width + i] = INFINITY;
1266
+ }
1267
+ }
1268
+ } else {
1269
+ evoasm_program_log_dist_dist_mat(program,
1270
+ kernel,
1271
+ height,
1272
+ dist_mat,
1273
+ matching,
1274
+ EVOASM_LOG_LEVEL_DEBUG);
1275
+ evoasm_assert_not_reached();
1276
+ }
1277
+ }
1278
+ }
1279
+
1280
+
1281
+ static inline evoasm_loss_t
1282
+ evoasm_program_calc_loss(evoasm_program_t *program,
1283
+ evoasm_kernel_t *kernel,
1284
+ size_t height,
1285
+ double *dist_mat,
1286
+ uint_fast8_t *matching) {
1287
+ size_t width = kernel->n_output_regs;
1288
+ double scale = 1.0 / (double) width;
1289
+ double loss = 0.0;
1290
+
1291
+ for(size_t i = 0; i < height; i++) {
1292
+ loss += (scale * dist_mat[i * width + matching[i]]);
1293
+ }
1294
+
1295
+ return (evoasm_loss_t) loss;
1296
+ }
1297
+
1298
+
1299
+ static evoasm_loss_t
1300
+ evoasm_program_assess(evoasm_program_t *program,
1301
+ evoasm_program_output_t *output) {
1302
+
1303
+ size_t n_tuples = EVOASM_PROGRAM_OUTPUT_N_TUPLES(output);
1304
+ size_t height = output->arity;
1305
+ evoasm_kernel_t *kernel = &program->kernels[program->size - 1];
1306
+ size_t width = kernel->n_output_regs;
1307
+ size_t dist_mat_len = (size_t) (width * height);
1308
+ double *dist_mat = evoasm_alloca(dist_mat_len * sizeof(double));
1309
+ uint_fast8_t *matching = evoasm_alloca(height * sizeof(uint_fast8_t));
1310
+ evoasm_loss_t loss;
1311
+
1312
+ for(size_t i = 0; i < dist_mat_len; i++) {
1313
+ dist_mat[i] = 0.0;
1314
+ }
1315
+
1316
+ if(height == 1) {
1317
+ /* COMMON FAST-PATH */
1318
+ for(size_t i = 0; i < n_tuples; i++) {
1319
+ evoasm_program_update_dist_mat(program, kernel, output, 1, i, dist_mat, EVOASM_METRIC_ABSDIFF);
1320
+ }
1321
+
1322
+ if(evoasm_program_match(program, width, dist_mat, matching)) {
1323
+ loss = evoasm_program_calc_loss(program, kernel, 1, dist_mat, matching);
1324
+ } else {
1325
+ loss = INFINITY;
1326
+ }
1327
+ } else {
1328
+ for(size_t i = 0; i < n_tuples; i++) {
1329
+ evoasm_program_update_dist_mat(program, kernel, output, height, i, dist_mat, EVOASM_METRIC_ABSDIFF);
1330
+ }
1331
+
1332
+ evoasm_program_calc_stable_matching(program, kernel, height, dist_mat, matching);
1333
+ loss = evoasm_program_calc_loss(program, kernel, height, dist_mat, matching);
1334
+ }
1335
+
1336
+
1337
+ for(size_t i = 0; i < height; i++) {
1338
+ switch(program->arch_info->id) {
1339
+ case EVOASM_ARCH_X64: {
1340
+ program->output_regs[i] = kernel->output_regs.x64[matching[i]];
1341
+ break;
1342
+ }
1343
+ default:
1344
+ evoasm_assert_not_reached();
1345
+ }
1346
+ }
1347
+
1348
+ #if EVOASM_LOG_LEVEL <= EVOASM_LOG_LEVEL_DEBUG
1349
+ if(loss == 0.0) {
1350
+ evoasm_program_log_program_output(program,
1351
+ kernel,
1352
+ output,
1353
+ matching,
1354
+ EVOASM_LOG_LEVEL_DEBUG);
1355
+ }
1356
+ #endif
1357
+
1358
+ return loss;
1359
+ }
1360
+
1361
+ static void
1362
+ evoasm_program_reset_recur_counters(evoasm_program_t *program) {
1363
+ memset(program->recur_counters, 0, sizeof(program->recur_counters[0]) * program->size);
1364
+ }
1365
+
1366
+ static inline evoasm_loss_t
1367
+ evoasm_program_eval_(evoasm_program_t *program,
1368
+ evoasm_program_output_t *output) {
1369
+
1370
+ evoasm_kernel_t *last_kernel = &program->kernels[program->size - 1];
1371
+ evoasm_loss_t loss;
1372
+
1373
+ if(evoasm_unlikely(last_kernel->n_output_regs == 0)) {
1374
+ evoasm_log_info("program %p has no output", (void *) program);
1375
+ return INFINITY;
1376
+ }
1377
+
1378
+ evoasm_program_reset_recur_counters(program);
1379
+
1380
+ evoasm_signal_set_exception_mask(program->exception_mask);
1381
+
1382
+ #ifdef EVOASM_ENABLE_PARANOID_MODE
1383
+ for(size_t i = 0; i < program->size; i++) {
1384
+ evoasm_kernel_t *kernel = &program->kernels[i];
1385
+ for(size_t j = 0; j < EVOASM_X64_REG_NONE; j++) {
1386
+ kernel->rand_vals[j].i64 = rand() | (rand() << (rand() % 24));
1387
+ }
1388
+ }
1389
+ #endif
1390
+
1391
+ if(EVOASM_SIGNAL_TRY()) {
1392
+ evoasm_buf_exec(program->buf);
1393
+ loss = evoasm_program_assess(program, output);
1394
+ } else {
1395
+ evoasm_log_debug("program %p signaled", (void *) program);
1396
+ loss = INFINITY;
1397
+ }
1398
+
1399
+ evoasm_signal_clear_exception_mask();
1400
+
1401
+ return loss;
1402
+ }
1403
+
1404
+ evoasm_loss_t
1405
+ evoasm_program_eval(evoasm_program_t *program,
1406
+ evoasm_program_output_t *output) {
1407
+
1408
+ evoasm_loss_t loss = evoasm_program_eval_(program, output);
1409
+
1410
+ #ifdef EVOASM_ENABLE_PARANOID_MODE
1411
+ for(size_t i = 0; i < 10; i++) {
1412
+ evoasm_loss_t loss_ = evoasm_program_eval_(program, output);
1413
+
1414
+ if(loss_ != loss) {
1415
+ evoasm_program_log(program, EVOASM_LOG_LEVEL_WARN);
1416
+ evoasm_buf_log(program->buf, EVOASM_LOG_LEVEL_WARN);
1417
+ }
1418
+ assert(loss_ == loss);
1419
+ }
1420
+ #endif
1421
+
1422
+ return loss;
1423
+ }
1424
+
1425
+ static evoasm_program_output_t *
1426
+ evoasm_program_load_output(evoasm_program_t *program,
1427
+ evoasm_kernel_t *kernel,
1428
+ evoasm_program_input_t *input) {
1429
+
1430
+ size_t width = kernel->n_output_regs;
1431
+ evoasm_program_output_t *output = &program->_output;
1432
+ size_t height = output->arity;
1433
+ size_t n_tuples = EVOASM_PROGRAM_INPUT_N_TUPLES(input);
1434
+ uint_fast8_t *matching = evoasm_alloca(height * sizeof(uint_fast8_t));
1435
+
1436
+ evoasm_program_output_t *load_output = evoasm_program_io_alloc(
1437
+ (uint16_t) (EVOASM_PROGRAM_INPUT_N_TUPLES(input) * height));
1438
+
1439
+ for(size_t i = 0; i < height; i++) {
1440
+ for(size_t j = 0; j < kernel->n_output_regs; j++) {
1441
+ if(program->output_regs[i] == kernel->output_regs.x64[j]) {
1442
+ matching[i] = (uint_fast8_t) j;
1443
+ goto next;
1444
+ }
1445
+ }
1446
+ evoasm_log_fatal("program output reg %d not found in kernel output regs", program->output_regs[i]);
1447
+ evoasm_assert_not_reached();
1448
+ next:;
1449
+ }
1450
+
1451
+ for(size_t i = 0; i < n_tuples; i++) {
1452
+ for(size_t j = 0; j < height; j++) {
1453
+ load_output->vals[i * height + j] = program->output_vals[i * width + matching[j]];
1454
+ }
1455
+ }
1456
+
1457
+ load_output->arity = output->arity;
1458
+ memcpy(load_output->types, output->types, EVOASM_ARY_LEN(output->types));
1459
+
1460
+ #if EVOASM_LOG_LEVEL <= EVOASM_LOG_LEVEL_DEBUG
1461
+ evoasm_program_log_program_output(program,
1462
+ kernel,
1463
+ load_output,
1464
+ matching,
1465
+ EVOASM_LOG_LEVEL_DEBUG);
1466
+ #endif
1467
+ return load_output;
1468
+ }
1469
+
1470
+ evoasm_program_output_t *
1471
+ evoasm_program_run(evoasm_program_t *program,
1472
+ evoasm_program_input_t *input) {
1473
+ evoasm_kernel_t *kernel = &program->kernels[program->size - 1];
1474
+ evoasm_program_output_t *output;
1475
+
1476
+ if(input->arity != program->_input.arity) {
1477
+ evoasm_error(EVOASM_ERROR_TYPE_PROGRAM, EVOASM_ERROR_CODE_NONE,
1478
+ "arity mismatch (%d for %d)", input->arity, program->_input.arity);
1479
+ return NULL;
1480
+ }
1481
+
1482
+ size_t n_tuples = EVOASM_PROGRAM_INPUT_N_TUPLES(input);
1483
+ if(n_tuples > program->max_tuples) {
1484
+ evoasm_error(EVOASM_ERROR_TYPE_PROGRAM, EVOASM_ERROR_CODE_NONE,
1485
+ "Maximum number of input/output tuples exceeded (%zu > %d)", n_tuples, program->max_tuples);
1486
+ return NULL;
1487
+ }
1488
+
1489
+ for(size_t i = 0; i < input->arity; i++) {
1490
+ if(input->types[i] != program->_input.types[i]) {
1491
+ evoasm_error(EVOASM_ERROR_TYPE_PROGRAM, EVOASM_ERROR_CODE_NONE,
1492
+ "type mismatch (%d != %d)", input->types[i], program->_input.types[i]);
1493
+ return NULL;
1494
+ }
1495
+ }
1496
+
1497
+ evoasm_program_emit_flags_t emit_flags = EVOASM_PROGRAM_EMIT_FLAG_EMIT_IO_LOAD_STORE;
1498
+ if(!evoasm_program_emit(program, input, emit_flags)) {
1499
+ return NULL;
1500
+ }
1501
+
1502
+ evoasm_buf_log(program->buf, EVOASM_LOG_LEVEL_DEBUG);
1503
+ evoasm_signal_set_exception_mask(program->exception_mask);
1504
+
1505
+ if(!evoasm_buf_protect(program->buf, EVOASM_MPROT_MODE_RX)) {
1506
+ evoasm_assert_not_reached();
1507
+ }
1508
+
1509
+ evoasm_program_reset_recur_counters(program);
1510
+
1511
+ if(EVOASM_SIGNAL_TRY()) {
1512
+ evoasm_buf_exec(program->buf);
1513
+ output = evoasm_program_load_output(program,
1514
+ kernel,
1515
+ input);
1516
+ } else {
1517
+ evoasm_log_debug("signaled\n");
1518
+ output = NULL;
1519
+ }
1520
+
1521
+ if(!evoasm_buf_protect(program->buf, EVOASM_MPROT_MODE_RW)) {
1522
+ evoasm_assert_not_reached();
1523
+ }
1524
+
1525
+ evoasm_signal_clear_exception_mask();
1526
+
1527
+ return output;
1528
+ }
1529
+
1530
+ evoasm_success_t
1531
+ evoasm_program_emit(evoasm_program_t *program,
1532
+ evoasm_program_input_t *input,
1533
+ evoasm_program_emit_flags_t emit_flags) {
1534
+ switch(program->arch_info->id) {
1535
+ case EVOASM_ARCH_X64: {
1536
+ return evoasm_program_x64_emit(program, input,
1537
+ emit_flags);
1538
+ break;
1539
+ }
1540
+ default:
1541
+ evoasm_assert_not_reached();
1542
+ }
1543
+ }
1544
+
1545
+ static size_t
1546
+ evoasm_program_x64_find_writers_(evoasm_program_t *program, evoasm_kernel_t *kernel, evoasm_reg_id_t reg_id,
1547
+ size_t idx, size_t *writers) {
1548
+ size_t len = 0;
1549
+ for(int i = (int) idx; i >= 0; i--) {
1550
+ evoasm_x64_reg_liveness_t reg_liveness;
1551
+ evoasm_x64_reg_liveness_init(&reg_liveness);
1552
+
1553
+ if(evoasm_kernel_is_writing_inst_x64(kernel, (size_t) i, reg_id, &reg_liveness)) {
1554
+ writers[len++] = (size_t) i;
1555
+ }
1556
+ }
1557
+ return len;
1558
+ }
1559
+
1560
+ static size_t
1561
+ evoasm_program_x64_find_writers(evoasm_program_t *program, evoasm_kernel_t *kernel,
1562
+ evoasm_reg_id_t reg_id, size_t idx, size_t *writers) {
1563
+
1564
+ return evoasm_program_x64_find_writers_(program, kernel, reg_id, idx, writers);
1565
+ }
1566
+
1567
+ typedef struct {
1568
+ bool change;
1569
+ evoasm_bitmap1024_t inst_bitmaps[EVOASM_PROGRAM_MAX_SIZE];
1570
+ evoasm_bitmap256_t output_reg_bitmaps[EVOASM_PROGRAM_MAX_SIZE];
1571
+ } evoasm_program_intron_elimination_ctx;
1572
+
1573
+ static void
1574
+ evoasm_program_x64_mark_writers(evoasm_program_t *program, evoasm_kernel_t *kernel,
1575
+ evoasm_reg_id_t reg_id, size_t idx, evoasm_program_intron_elimination_ctx *ctx) {
1576
+ size_t writers[16];
1577
+
1578
+ size_t writers_len = evoasm_program_x64_find_writers(program, kernel, reg_id, idx, writers);
1579
+
1580
+ if(writers_len > 0) {
1581
+ for(size_t i = 0; i < writers_len; i++) {
1582
+ size_t writer_idx = writers[i];
1583
+ evoasm_bitmap_t *inst_bitmap = (evoasm_bitmap_t *) &ctx->inst_bitmaps[kernel->idx];
1584
+ if(evoasm_bitmap_get(inst_bitmap, writer_idx)) continue;
1585
+
1586
+ evoasm_x64_inst_t *x64_inst = evoasm_x64_inst_((evoasm_x64_inst_id_t) kernel->insts[writer_idx]);
1587
+ evoasm_bitmap_set(inst_bitmap, writer_idx);
1588
+ ctx->change = true;
1589
+
1590
+ for(size_t j = 0; j < x64_inst->n_operands; j++) {
1591
+ evoasm_x64_operand_t *op = &x64_inst->operands[j];
1592
+ evoasm_x64_reg_id_t op_reg_id = evoasm_kernel_get_operand_reg_id_x64(kernel, op, (uint16_t) writer_idx);
1593
+
1594
+ if(op->read) {
1595
+ if(writer_idx > 0) {
1596
+ evoasm_program_x64_mark_writers(program, kernel, op_reg_id, writer_idx - 1u, ctx);
1597
+ }
1598
+
1599
+ if(kernel->reg_info.x64.regs[op_reg_id].input) {
1600
+ for(int k = kernel->idx - 1; k >= 0; k--) {
1601
+ size_t trans_idx = SIZE_MAX;
1602
+ if(k + 1 == kernel->idx) {
1603
+ trans_idx = 0;
1604
+ } else if(evoasm_program_branch_kernel_idx(program, (size_t) k) == kernel->idx) {
1605
+ trans_idx = 1;
1606
+ };
1607
+
1608
+ if(trans_idx != SIZE_MAX) {
1609
+ evoasm_kernel_t *trans_kernel = &program->kernels[k];
1610
+
1611
+ for(size_t l = 0; l < EVOASM_X64_REG_NONE; l++) {
1612
+ if(trans_kernel->reg_info.x64.trans_regs[trans_idx][op_reg_id] == l) {
1613
+ evoasm_bitmap_set((evoasm_bitmap_t *) &ctx->output_reg_bitmaps[k], l);
1614
+ }
1615
+ }
1616
+ }
1617
+ }
1618
+ }
1619
+ }
1620
+ }
1621
+ }
1622
+ }
1623
+ }
1624
+
1625
+ static void
1626
+ evoasm_program_mark_writers(evoasm_program_t *program, evoasm_kernel_t *kernel,
1627
+ evoasm_reg_id_t reg_id, size_t index, evoasm_program_intron_elimination_ctx *ctx) {
1628
+ switch(program->arch_info->id) {
1629
+ case EVOASM_ARCH_X64: {
1630
+ evoasm_program_x64_mark_writers(program, kernel, reg_id, index, ctx);
1631
+ break;
1632
+ }
1633
+ default:
1634
+ evoasm_assert_not_reached();
1635
+ }
1636
+ }
1637
+
1638
+ static evoasm_success_t
1639
+ evoasm_program_mark_kernel(evoasm_program_t *program, evoasm_kernel_t *kernel,
1640
+ evoasm_program_intron_elimination_ctx *ctx) {
1641
+ for(size_t i = 0; i < EVOASM_X64_REG_NONE; i++) {
1642
+ evoasm_bitmap_t *bitmap = (evoasm_bitmap_t *) &ctx->output_reg_bitmaps[kernel->idx];
1643
+ if(evoasm_bitmap_get(bitmap, i)) {
1644
+ evoasm_program_mark_writers(program, kernel, (evoasm_reg_id_t) i, (size_t) (kernel->size - 1),
1645
+ ctx);
1646
+ }
1647
+ }
1648
+
1649
+ return true;
1650
+ }
1651
+
1652
+ evoasm_success_t
1653
+ evoasm_program_eliminate_introns(evoasm_program_t *program, evoasm_program_t *dst_program) {
1654
+ size_t last_kernel_idx = (size_t) (program->size - 1);
1655
+ evoasm_program_intron_elimination_ctx ctx = {0};
1656
+
1657
+ //evoasm_kernel_t *last_kernel = &program->kernels[last_kernel_idx];
1658
+
1659
+ EVOASM_TRY(error, evoasm_program_init,
1660
+ dst_program,
1661
+ program->arch_info,
1662
+ program->size,
1663
+ program->kernels[0].size,
1664
+ program->max_tuples,
1665
+ program->recur_limit,
1666
+ false);
1667
+
1668
+ evoasm_bitmap_t *output_bitmap = (evoasm_bitmap_t *) &ctx.output_reg_bitmaps[last_kernel_idx];
1669
+ for(size_t i = 0; i < program->_output.arity; i++) {
1670
+ evoasm_bitmap_set(output_bitmap, program->output_regs[i]);
1671
+ }
1672
+
1673
+ do {
1674
+ ctx.change = false;
1675
+ for(int i = (int) last_kernel_idx; i >= 0; i--) {
1676
+ EVOASM_TRY(error, evoasm_program_mark_kernel, program,
1677
+ &program->kernels[i], &ctx);
1678
+ }
1679
+ } while(ctx.change);
1680
+
1681
+ /* sweep */
1682
+ for(size_t i = 0; i <= last_kernel_idx; i++) {
1683
+ evoasm_kernel_t *kernel = &program->kernels[i];
1684
+ evoasm_kernel_t *dst_kernel = &dst_program->kernels[i];
1685
+ evoasm_bitmap_t *inst_bitmap = (evoasm_bitmap_t *) &ctx.inst_bitmaps[i];
1686
+
1687
+ size_t k = 0;
1688
+ for(size_t j = 0; j < kernel->size; j++) {
1689
+ if(evoasm_bitmap_get(inst_bitmap, j)) {
1690
+ dst_kernel->insts[k] = kernel->insts[j];
1691
+ dst_kernel->params.x64[k] = kernel->params.x64[j];
1692
+ k++;
1693
+ }
1694
+ }
1695
+
1696
+ if(dst_kernel != kernel) {
1697
+ dst_kernel->size = (uint16_t) k;
1698
+ dst_kernel->reg_info = kernel->reg_info;
1699
+ dst_kernel->output_regs = kernel->output_regs;
1700
+ dst_kernel->n_input_regs = kernel->n_input_regs;
1701
+ dst_kernel->n_output_regs = kernel->n_output_regs;
1702
+ }
1703
+ }
1704
+
1705
+ if(dst_program != program) {
1706
+ dst_program->_input = program->_input;
1707
+ dst_program->_output = program->_output;
1708
+ memcpy(dst_program->output_regs, program->output_regs, sizeof(program->output_regs));
1709
+ EVOASM_MEMCPY_N(dst_program->jmp_offs, program->jmp_offs, program->size);
1710
+ EVOASM_MEMCPY_N(dst_program->jmp_conds, program->jmp_conds, program->size);
1711
+ }
1712
+
1713
+ evoasm_program_emit_flags_t emit_flags =
1714
+ EVOASM_PROGRAM_EMIT_FLAG_PREPARE |
1715
+ EVOASM_PROGRAM_EMIT_FLAG_EMIT_KERNELS;
1716
+
1717
+ EVOASM_TRY(error, evoasm_program_emit, dst_program, NULL, emit_flags);
1718
+
1719
+ return true;
1720
+ error:
1721
+ return false;
1722
+ }
1723
+
1724
+
1725
+ #define EVOASM_PROGRAM_PROLOG_EPILOG_SIZE UINT32_C(1024)
1726
+ #define EVOASM_PROGRAM_TRANSITION_SIZE UINT32_C(512)
1727
+
1728
+
1729
+ evoasm_success_t
1730
+ evoasm_program_init(evoasm_program_t *program,
1731
+ evoasm_arch_info_t *arch_info,
1732
+ size_t program_size,
1733
+ size_t kernel_size,
1734
+ size_t max_tuples,
1735
+ size_t recur_limit,
1736
+ bool shallow) {
1737
+
1738
+ static evoasm_program_t zero_program = {0};
1739
+ size_t n_transitions = program_size - 1u;
1740
+
1741
+ *program = zero_program;
1742
+ program->arch_info = arch_info;
1743
+ program->recur_limit = (uint32_t) recur_limit;
1744
+ program->shallow = shallow;
1745
+ program->size = (uint16_t) program_size;
1746
+ program->max_tuples = (uint16_t) max_tuples;
1747
+
1748
+ size_t body_buf_size =
1749
+ (size_t) (n_transitions * EVOASM_PROGRAM_TRANSITION_SIZE
1750
+ + program_size * kernel_size * program->arch_info->max_inst_len);
1751
+
1752
+ size_t buf_size = max_tuples * (body_buf_size + EVOASM_PROGRAM_PROLOG_EPILOG_SIZE);
1753
+
1754
+ EVOASM_TRY(error, evoasm_buf_init, &program->_buf, EVOASM_BUF_TYPE_MMAP, buf_size);
1755
+ program->buf = &program->_buf;
1756
+
1757
+ EVOASM_TRY(error, evoasm_buf_init, &program->_body_buf, EVOASM_BUF_TYPE_MALLOC, body_buf_size);
1758
+ program->body_buf = &program->_body_buf;
1759
+
1760
+ EVOASM_TRY(error, evoasm_buf_protect, &program->_buf,
1761
+ EVOASM_MPROT_MODE_RWX);
1762
+
1763
+ size_t output_vals_len = max_tuples * EVOASM_KERNEL_MAX_OUTPUT_REGS;
1764
+
1765
+ EVOASM_TRY_ALLOC(error, calloc, program->output_vals, output_vals_len, sizeof(evoasm_program_io_val_t));
1766
+ EVOASM_TRY_ALLOC(error, calloc, program->kernels, program_size, sizeof(evoasm_kernel_t));
1767
+ EVOASM_TRY_ALLOC(error, calloc, program->recur_counters, program_size, sizeof(uint32_t));
1768
+ EVOASM_TRY_ALLOC(error, calloc, program->jmp_conds, program_size, sizeof(uint8_t));
1769
+ EVOASM_TRY_ALLOC(error, calloc, program->jmp_offs, program_size, sizeof(int16_t));
1770
+
1771
+ for(uint16_t i = 0; i < program_size; i++) {
1772
+ evoasm_kernel_t *kernel = &program->kernels[i];
1773
+
1774
+ kernel->idx = i;
1775
+ kernel->size = (uint16_t) kernel_size;
1776
+
1777
+ if(!shallow) {
1778
+ EVOASM_TRY_ALLOC(error, calloc, kernel->insts, kernel_size, sizeof(kernel->insts[0]));
1779
+ switch(program->arch_info->id) {
1780
+ case EVOASM_ARCH_X64: {
1781
+ EVOASM_TRY_ALLOC(error, calloc, kernel->params.x64, kernel_size, sizeof(kernel->params.x64[0]));
1782
+ break;
1783
+ }
1784
+ default:
1785
+ evoasm_assert_not_reached();
1786
+ }
1787
+ }
1788
+ }
1789
+
1790
+
1791
+ return true;
1792
+
1793
+ error:
1794
+ EVOASM_TRY_WARN(evoasm_program_destroy, program);
1795
+ return false;
1796
+ }
1797
+
1798
+ void
1799
+ evoasm_kernel_log(evoasm_kernel_t *kernel, evoasm_arch_id_t arch_id, evoasm_log_level_t log_level) {
1800
+ if(_evoasm_log_level > log_level) return;
1801
+
1802
+ switch(arch_id) {
1803
+ case EVOASM_ARCH_X64:
1804
+ for(size_t i = 0; i < kernel->size; i++) {
1805
+ evoasm_x64_inst_t *inst = evoasm_x64_inst_((evoasm_x64_inst_id_t) kernel->insts[i]);
1806
+ const char *mnem = evoasm_x64_inst_get_mnem(inst);
1807
+ evoasm_log(log_level, EVOASM_LOG_TAG, "%s", mnem);
1808
+ }
1809
+ break;
1810
+ default:
1811
+ evoasm_assert_not_reached();
1812
+ }
1813
+ }
1814
+
1815
+ void
1816
+ evoasm_program_log(evoasm_program_t *program, evoasm_log_level_t log_level) {
1817
+ if(_evoasm_log_level > log_level) return;
1818
+
1819
+ evoasm_log(log_level, EVOASM_LOG_TAG, "Evoasm::Program: size: %d", program->size);
1820
+
1821
+ for(size_t i = 0; i < program->size; i++) {
1822
+ evoasm_kernel_log(&program->kernels[i], (evoasm_arch_id_t) program->arch_info->id, log_level);
1823
+ }
1824
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \n ");
1825
+ }
1826
+
1827
+ EVOASM_DEF_ALLOC_FREE_FUNCS(program)